date:20240514

[COMMITTED] ada: Correct System.Win32.LocalFileTimeToFileTime wrapper typo

2024-05-14 Thread Marc Poulhiès

From: Philippe Gil 

The parameters should be swapped to fit Fileapi.h documentation.
BOOL LocalFileTimeToFileTime(
[in]  const FILETIME *lpLocalFileTime,
[out] LPFILETIME lpFileTime
);

gcc/ada/
* libgnat/s-win32.ads (LocalFileTimeToFileTime): Swap parameters.

Tested on x86_64-pc-linux-gnu, committed on master.

---
 gcc/ada/libgnat/s-win32.ads | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/gcc/ada/libgnat/s-win32.ads b/gcc/ada/libgnat/s-win32.ads
index 6ea66153639..6e8e246d903 100644
--- a/gcc/ada/libgnat/s-win32.ads
+++ b/gcc/ada/libgnat/s-win32.ads
@@ -315,8 +315,8 @@ package System.Win32 is
pragma Import (Stdcall, FileTimeToLocalFileTime, "FileTimeToLocalFileTime");
 
function LocalFileTimeToFileTime
- (lpFileTime  : access Long_Long_Integer;
-  lpLocalFileTime : access Long_Long_Integer) return BOOL;
+ (lpLocalFileTime : access Long_Long_Integer;
+  lpFileTime  : access Long_Long_Integer) return BOOL;
pragma Import (Stdcall, LocalFileTimeToFileTime, "LocalFileTimeToFileTime");
 
procedure Sleep (dwMilliseconds : DWORD);
-- 
2.43.2

[COMMITTED] ada: Follow-up adjustment to earlier fix in Build_Allocate_Deallocate_Proc

2024-05-14 Thread Marc Poulhiès

From: Eric Botcazou 

The profile of the procedure built for an allocation on the secondary stack
now includes the alignment parameter, so the parameter can just be forwarded
in the call to Allocate_Any_Controlled.

gcc/ada/

* exp_util.adb (Build_Allocate_Deallocate_Proc): Pass the alignment
parameter in the inner call for a secondary stack allocation too.

Tested on x86_64-pc-linux-gnu, committed on master.

---
 gcc/ada/exp_util.adb | 43 ++-
 1 file changed, 18 insertions(+), 25 deletions(-)

diff --git a/gcc/ada/exp_util.adb b/gcc/ada/exp_util.adb
index 103d59e4deb..4b1c5322f62 100644
--- a/gcc/ada/exp_util.adb
+++ b/gcc/ada/exp_util.adb
@@ -1081,10 +1081,8 @@ package body Exp_Util is
  --  allocations can be performed without getting the alignment from
  --  the type's Type Specific Record.
 
- if ((Is_Allocate and then No (Alloc_Expr))
-   or else
- not Is_Class_Wide_Type (Desig_Typ))
-   and then not Use_Secondary_Stack_Pool
+ if (Is_Allocate and then No (Alloc_Expr))
+   or else not Is_Class_Wide_Type (Desig_Typ)
  then
 Append_To (Actuals, New_Occurrence_Of (Alig_Id, Loc));
 
@@ -1103,9 +1101,6 @@ package body Exp_Util is
 --  into the code that reads the value of alignment from the TSD
 --  (see Expand_N_Attribute_Reference)
 
---  In the Use_Secondary_Stack_Pool case, Alig_Id is not
---  passed in and therefore must not be referenced.
-
 Append_To (Actuals,
   Unchecked_Convert_To (RTE (RE_Storage_Offset),
 Make_Attribute_Reference (Loc,
@@ -1255,53 +1250,51 @@ package body Exp_Util is
 Proc_To_Call := RTE (RE_Deallocate_Any_Controlled);
  end if;
 
- --  Create a custom Allocate / Deallocate routine which has identical
- --  profile to that of System.Storage_Pools.
+ --  Create a custom Allocate/Deallocate routine which has identical
+ --  profile to that of System.Storage_Pools, except for a secondary
+ --  stack allocation where the profile must be identical to that of
+ --  the System.Secondary_Stack.SS_Allocate procedure (deallocation
+ --  is not supported for the secondary stack).
 
  declare
---  P : Root_Storage_Pool
 function Pool_Param return Node_Id is (
   Make_Parameter_Specification (Loc,
 Defining_Identifier => Make_Temporary (Loc, 'P'),
 Parameter_Type  =>
   New_Occurrence_Of (RTE (RE_Root_Storage_Pool), Loc)));
+--  P : Root_Storage_Pool
 
---  A : [out] Address
 function Address_Param return Node_Id is (
   Make_Parameter_Specification (Loc,
 Defining_Identifier => Addr_Id,
 Out_Present => Is_Allocate,
 Parameter_Type  =>
   New_Occurrence_Of (RTE (RE_Address), Loc)));
+--  A : [out] Address
 
---  S : Storage_Count
 function Size_Param return Node_Id is (
   Make_Parameter_Specification (Loc,
 Defining_Identifier => Size_Id,
 Parameter_Type  =>
   New_Occurrence_Of (RTE (RE_Storage_Count), Loc)));
+--  S : Storage_Count
 
---  L : Storage_Count
 function Alignment_Param return Node_Id is (
   Make_Parameter_Specification (Loc,
 Defining_Identifier => Alig_Id,
 Parameter_Type  =>
   New_Occurrence_Of (RTE (RE_Storage_Count), Loc)));
+--  L : Storage_Count
 
-Formal_Params : List_Id;
+Formal_Params : constant List_Id :=
+  (if Use_Secondary_Stack_Pool
+then New_List (Address_Param, Size_Param, Alignment_Param)
+else
+  New_List
+(Pool_Param, Address_Param, Size_Param, Alignment_Param));
+--  The list of formal parameters of the routine
 
  begin
-if Use_Secondary_Stack_Pool then
-   --  Gigi expects a different profile in the Secondary_Stack_Pool
-   --  case. There must be no uses of the two missing formals
-   --  (i.e., Pool_Param and Alignment_Param) in this case.
-   Formal_Params := New_List
- (Address_Param, Size_Param, Alignment_Param);
-else
-   Formal_Params := New_List (
- Pool_Param, Address_Param, Size_Param, Alignment_Param);
-end if;
-
 Insert_Action (N,
   Make_Subprogram_Body (Loc,
 Specification  =>
-- 
2.43.2

[COMMITTED] ada: Spurious unreferenced warning on selected component

2024-05-14 Thread Marc Poulhiès

From: Justin Squirek 

This patch fixes an error in the compiler whereby a selected component on the
left hand side of an assignment statement may not get marked as referenced -
leading to spurious unreferenced warnings on such objects.

gcc/ada/

* sem_util.adb (Set_Referenced_Modified): Use Original_Node to
avoid recursive calls on expanded / internal objects such that
source nodes get appropriately marked as referenced.

Tested on x86_64-pc-linux-gnu, committed on master.

---
 gcc/ada/sem_util.adb | 6 +-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/gcc/ada/sem_util.adb b/gcc/ada/sem_util.adb
index b5c33638b35..4e1258e7cec 100644
--- a/gcc/ada/sem_util.adb
+++ b/gcc/ada/sem_util.adb
@@ -27625,7 +27625,11 @@ package body Sem_Util is
   --  Deal with indexed or selected component where prefix is modified
 
   if Nkind (N) in N_Indexed_Component | N_Selected_Component then
- Pref := Prefix (N);
+
+ --  Grab the original node to avoid looking at internally generated
+ --  objects.
+
+ Pref := Original_Node (Prefix (N));
 
  --  If prefix is access type, then it is the designated object that is
  --  being modified, which means we have no entity to set the flag on.
-- 
2.43.2

[COMMITTED] ada: Fix ghost policy in use for generic instantiation

2024-05-14 Thread Marc Poulhiès

From: Yannick Moy 

The Ghost assertion policy relevant for analyzing a generic instantiation
is the Ghost policy at the point of instantiation, not the one applicable
for the generic itself.

gcc/ada/

* ghost.adb (Mark_And_Set_Ghost_Instantiation): Fix the current
Ghost policy for the instantiation.

Tested on x86_64-pc-linux-gnu, committed on master.

---
 gcc/ada/ghost.adb | 14 +-
 1 file changed, 9 insertions(+), 5 deletions(-)

diff --git a/gcc/ada/ghost.adb b/gcc/ada/ghost.adb
index 14951a031d9..677089039e8 100644
--- a/gcc/ada/ghost.adb
+++ b/gcc/ada/ghost.adb
@@ -1734,13 +1734,17 @@ package body Ghost is
   elsif Ghost_Mode = Ignore then
  Policy := Name_Ignore;
 
-  --  Inherit the "ghostness" of the generic unit
+  --  Inherit the "ghostness" of the generic unit, but the current Ghost
+  --  policy is the relevant one for the instantiation.
 
-  elsif Is_Checked_Ghost_Entity (Gen_Id) then
- Policy := Name_Check;
+  elsif Is_Checked_Ghost_Entity (Gen_Id)
+or else Is_Ignored_Ghost_Entity (Gen_Id)
+  then
+ Policy := Policy_In_Effect (Name_Ghost);
 
-  elsif Is_Ignored_Ghost_Entity (Gen_Id) then
- Policy := Name_Ignore;
+ if Policy = No_Name then
+Policy := Name_Ignore;
+ end if;
   end if;
 
   --  Mark the instantiation as Ghost
-- 
2.43.2

[COMMITTED] ada: Fix small inaccuracy in previous change

2024-05-14 Thread Marc Poulhiès

From: Eric Botcazou 

The call to Build_Allocate_Deallocate_Proc must occur before the special
accessibility check for class-wide allocation is generated, because this
check comes with cleanup code.

gcc/ada/

* exp_ch4.adb (Expand_Allocator_Expression): Move the first call to
Build_Allocate_Deallocate_Proc up to before the accessibility check.

Tested on x86_64-pc-linux-gnu, committed on master.

---
 gcc/ada/exp_ch4.adb | 8 +++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/gcc/ada/exp_ch4.adb b/gcc/ada/exp_ch4.adb
index b1f7593de2a..762e75616a7 100644
--- a/gcc/ada/exp_ch4.adb
+++ b/gcc/ada/exp_ch4.adb
@@ -960,12 +960,18 @@ package body Exp_Ch4 is
 end if;
  end if;
 
+ --  This needs to done before generating the accessibility check below
+ --  because the check comes with cleanup code that invokes Free on the
+ --  temporary and, therefore, expects the object to be attached to its
+ --  finalization collection if it is controlled.
+
+ Build_Allocate_Deallocate_Proc (Declaration_Node (Temp), Mark => N);
+
  --  Note: the accessibility check must be inserted after the call to
  --  [Deep_]Adjust to ensure proper completion of the assignment.
 
  Apply_Accessibility_Check_For_Allocator (N, Exp, Temp);
 
- Build_Allocate_Deallocate_Proc (Declaration_Node (Temp), Mark => N);
  Rewrite (N, New_Occurrence_Of (Temp, Loc));
  Analyze_And_Resolve (N, PtrT);
 
-- 
2.43.2

[COMMITTED] ada: Missing support for consistent assertion policy

2024-05-14 Thread Marc Poulhiès

From: Javier Miranda 

Add missing support for RM 10.2/5: the region for a pragma
Assertion_Policy given as a configuration pragma is the
declarative region for the entire compilation unit (or units)
to which it applies.

gcc/ada/

* sem_ch10.adb (Install_Inherited_Policy_Pragmas): New subprogram.
(Remove_Inherited_Policy_Pragmas): New subprogram.
(Analyze_Compilation_Unit): Call the new subprograms to
install and remove inherited assertion policy pragmas.

Tested on x86_64-pc-linux-gnu, committed on master.

---
 gcc/ada/sem_ch10.adb | 212 ++-
 1 file changed, 208 insertions(+), 4 deletions(-)

diff --git a/gcc/ada/sem_ch10.adb b/gcc/ada/sem_ch10.adb
index 7fc623b6278..73e5388affd 100644
--- a/gcc/ada/sem_ch10.adb
+++ b/gcc/ada/sem_ch10.adb
@@ -292,6 +292,18 @@ package body Sem_Ch10 is
   --  Spec_Context_Items to that of the spec. Parent packages are not
   --  examined for documentation purposes.
 
+  function Install_Inherited_Policy_Pragmas
+(Comp_Unit : Node_Id) return Node_Id;
+  --  Install assertion_policy pragmas placed at the start of the spec of
+  --  the given compilation unit (and the spec of its parent units). Return
+  --  the last pragma found in the check policy list before installing
+  --  these pragmas; used to remove the installed pragmas.
+
+  procedure Remove_Inherited_Policy_Pragmas (Last_Pragma : Node_Id);
+  --  Remove assertion_policy pragmas installed after the given pragma. If
+  --  Last_Pragma is empty then remove all the pragmas installed in the
+  --  check policy list (if any).
+
   ---
   -- Check_Redundant_Withs --
   ---
@@ -631,6 +643,186 @@ package body Sem_Ch10 is
  end loop;
   end Check_Redundant_Withs;
 
+  --
+  -- Install_Inherited_Policy_Pragmas --
+  --
+
+  --  Opt.Check_Policy_List is handled as a stack; assertion policy
+  --  pragmas defined at inner scopes are placed at the beginning of
+  --  the list. Therefore, policy pragmas defined at the start of
+  --  parent units must be appended to the end of this list.
+
+  --  When the compilation unit is a package body (or a subprogram body
+  --  that does not act as its spec) we recursively traverse to its spec
+  --  (and from there to its ultimate parent); when the compilation unit
+  --  is a child package (or subprogram) spec we recursively climb until
+  --  its ultimate parent. In both cases policy pragmas defined at the
+  --  beginning of all these traversed units are appended to the check
+  --  policy list in the way back to the current compilation unit (and
+  --  they are left installed in reverse order). For example:
+  --
+  -- pragma Assertion_Policy (...) -- [policy-1]
+  -- package Pkg is ...
+  --
+  -- pragma Assertion_Policy (...) -- [policy-2]
+  -- package Pkg.Child is ...
+  --
+  -- pragma Assertion_Policy (...) -- [policy-3]
+  -- package body Pkg.Child is ...
+  --
+  --  When the compilation unit Pkg.Child is analyzed, and its context
+  --  clauses are analyzed, these are the contents of Check_Policy_List:
+  --
+  -- Opt.Check_Policy_List -> [policy-3]
+  --  ^
+  --   last_policy_pragma
+  --
+  --  After climbing to the ultimate parent spec, these are the contents
+  --  of Check_Policy_List:
+  --
+  -- Opt.Check_Policy_List -> [policy-3] -> [policy-2] -> [policy-1]
+  --  ^
+  --   last_policy_pragma
+  --
+  --  The reference to the last policy pragma in the initial contents of
+  --  the list is used later to remove installed inherited pragmas.
+
+  function Install_Inherited_Policy_Pragmas
+(Comp_Unit : Node_Id) return Node_Id
+  is
+ Last_Policy_Pragma : Node_Id;
+
+ procedure Install_Parent_Policy_Pragmas (N : Node_Id);
+ --  Recursively climb to the ultimate parent and install their policy
+ --  pragmas after Last_Policy_Pragma.
+
+ ---
+ -- Install_Parent_Policy_Pragmas --
+ ---
+
+ procedure Install_Parent_Policy_Pragmas (N : Node_Id) is
+Lib_Unit : constant Node_Id := Unit (N);
+Item : Node_Id;
+
+ begin
+if Is_Child_Spec (Lib_Unit) then
+   Install_Parent_Policy_Pragmas (Parent_Spec (Lib_Unit));
+
+elsif Nkind (Lib_Unit) = N_Package_Body then
+   Install_Parent_Policy_Pragmas (Library_Unit (N));
+
+elsif Nkind (Lib_Unit) = N_Subprogram_Body
+

[COMMITTED] ada: Error in determining accumulator subtype for a reduction expression

2024-05-14 Thread Marc Poulhiès

From: Steve Baird 

There was an earlier bug in determining the accumulator subtype for a
reduction expression in the case where the reducer subprogram is overloaded.
The fix for that bug introduced a recently-discovered
regression. Redo accumulator subtype computation in order to address
this regression while preserving the benefits of the earlier fix.

gcc/ada/

* exp_attr.adb: Move computation of Accum_Typ entirely into the
function Build_Stat.

Tested on x86_64-pc-linux-gnu, committed on master.

---
 gcc/ada/exp_attr.adb | 65 ++--
 1 file changed, 26 insertions(+), 39 deletions(-)

diff --git a/gcc/ada/exp_attr.adb b/gcc/ada/exp_attr.adb
index 63b311c1b89..809116d89e3 100644
--- a/gcc/ada/exp_attr.adb
+++ b/gcc/ada/exp_attr.adb
@@ -24,7 +24,6 @@
 --
 
 with Accessibility;  use Accessibility;
-with Aspects;use Aspects;
 with Atree;  use Atree;
 with Checks; use Checks;
 with Debug;  use Debug;
@@ -6013,6 +6012,7 @@ package body Exp_Attr is
 
 begin
if Nkind (E1) = N_Attribute_Reference then
+  Accum_Typ := Base_Type (Entity (Prefix (E1)));
   Stat := Make_Assignment_Statement (Loc,
 Name => New_Occurrence_Of (Bnn, Loc),
 Expression => Make_Attribute_Reference (Loc,
@@ -6023,12 +6023,15 @@ package body Exp_Attr is
 Comp)));
 
elsif Ekind (Entity (E1)) = E_Procedure then
+  Accum_Typ := Etype (First_Formal (Entity (E1)));
   Stat := Make_Procedure_Call_Statement (Loc,
 Name => New_Occurrence_Of (Entity (E1), Loc),
Parameter_Associations => New_List (
  New_Occurrence_Of (Bnn, Loc),
  Comp));
+
else
+  Accum_Typ := Etype (Entity (E1));
   Stat := Make_Assignment_Statement (Loc,
 Name => New_Occurrence_Of (Bnn, Loc),
 Expression => Make_Function_Call (Loc,
@@ -6038,6 +6041,28 @@ package body Exp_Attr is
 Comp)));
end if;
 
+   --  Try to cope if E1 is wrong because it is an overloaded
+   --  subprogram that happens to be the first candidate
+   --  on a homonym chain, but that resolution candidate turns
+   --  out to be the wrong one.
+   --  This workaround usually gets the right type, but it can
+   --  yield the wrong subtype of that type.
+
+   if Base_Type (Accum_Typ) /= Base_Type (Etype (N)) then
+  Accum_Typ := Etype (N);
+   end if;
+
+   --  Try to cope with wrong E1 when Etype (N) doesn't help
+   if Is_Universal_Numeric_Type (Accum_Typ) then
+  if Is_Array_Type (Etype (Prefix (N))) then
+ Accum_Typ := Component_Type (Etype (Prefix (N)));
+  else
+ --  Further hackery can be added here when there is a
+ --  demonstrated need.
+ null;
+  end if;
+   end if;
+
return Stat;
 end Build_Stat;
 
@@ -6088,10 +6113,6 @@ package body Exp_Attr is
   End_Label => Empty,
   Statements =>
 New_List (Build_Stat (Relocate_Node (Expr;
-
-  --  Look at the context to find the type.
-
-  Accum_Typ := Etype (N);
end;
 
 else
@@ -6121,40 +6142,6 @@ package body Exp_Attr is
   Statements => New_List (
 Build_Stat (New_Occurrence_Of (Elem, Loc;
 
-  --  Look at the prefix to find the type. This is
-  --  modeled on Analyze_Iterator_Specification in Sem_Ch5.
-
-  declare
- Ptyp : constant Entity_Id :=
-  Base_Type (Etype (Prefix (N)));
-
-  begin
- if Is_Array_Type (Ptyp) then
-Accum_Typ := Component_Type (Ptyp);
-
- elsif Has_Aspect (Ptyp, Aspect_Iterable) then
-declare
-   Element : constant Entity_Id :=
-   Get_Iterable_Type_Primitive
- (Ptyp, Name_Element);
-begin
-   if Present (Element) then
-  Accum_Typ := Etype (Element);
-   end if;
-end;
-
- else
-declar

[COMMITTED] ada: Compiler crash or errors on if_expression in container aggregate

2024-05-14 Thread Marc Poulhiès

From: Gary Dismukes 

The compiler may either crash or incorrectly report errors when
a component association in a container aggregate is an if_expression
with an elsif part whose dependent expression is a call to a function
returning a result that requires finalization. The compiler complains
that a private type is expected, but a package or procedure name was
found. This is due to the compiler improperly associating expanded
calls to Finalize_Object with the aggregate, rather than the enclosing
object declaration being initialized by the aggregate, which can result
in the Finalize_Object procedure call being passed as an actual to
the Add_Unnamed operation of the container type and leading to a type
mismatch and the confusing error message. This is fixed by adjusting
the code that locates the proper context for insertion of Finalize_Object
calls to locate the enclosing declaration or statement rather than
stopping at the aggregate.

gcc/ada/

* exp_util.adb (Find_Hook_Context): Exclude N_*Aggregate Nkinds
of Parent (Par) from the early return in the second loop of the
In_Cond_Expr case, to prevent returning an aggregate from this
function rather than the enclosing declaration or statement.

Tested on x86_64-pc-linux-gnu, committed on master.

---
 gcc/ada/exp_util.adb | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/gcc/ada/exp_util.adb b/gcc/ada/exp_util.adb
index 4b1c5322f62..d9623e2ea40 100644
--- a/gcc/ada/exp_util.adb
+++ b/gcc/ada/exp_util.adb
@@ -6412,7 +6412,9 @@ package body Exp_Util is
   and then Nkind (Parent (Par)) not in N_Function_Call
  | N_Procedure_Call_Statement
  | N_Entry_Call_Statement
-
+ | N_Aggregate
+ | N_Delta_Aggregate
+ | N_Extension_Aggregate
 then
return Par;
 
-- 
2.43.2

[COMMITTED] ada: Rtsfind should not trash state used in analyzing instantiations.

2024-05-14 Thread Marc Poulhiès

From: Steve Baird 

During analysis of an instantiation, Sem_Ch12 manages formal/actual binding
information in package state (see Sem_Ch12.Generic_Renamings_HTable).
A call to rtsfind can cause another unit to be loaded and compiled.
If this occurs during the analysis of an instantiation, and if the loaded
unit contains a second instantiation, then the Sem_Ch12 state needed for
analyzing the first instantiation can be trashed during the analysis of the
second instantiation. Rtsfind calls that can include the analysis of an
instantiation need to save and restore Sem_Ch12's state.

gcc/ada/

* sem_ch12.ads: Declare new Instance_Context package, which
declares a private type Context with operations Save_And_Reset and
Restore.
* sem_ch12.adb: Provide body for new Instance_Context package.
* rtsfind.adb (Load_RTU): Wrap an Instance_Context Save/Restore
call pair around the call to Semantics.
* table.ads: Add initial value for Last_Val (because
Save_And_Reset expects Last_Val to be initialized).

Tested on x86_64-pc-linux-gnu, committed on master.

---
 gcc/ada/rtsfind.adb  |  9 ++-
 gcc/ada/sem_ch12.adb | 62 
 gcc/ada/sem_ch12.ads | 25 ++
 gcc/ada/table.ads|  2 +-
 4 files changed, 96 insertions(+), 2 deletions(-)

diff --git a/gcc/ada/rtsfind.adb b/gcc/ada/rtsfind.adb
index 8933ca6ce16..7c9935e614c 100644
--- a/gcc/ada/rtsfind.adb
+++ b/gcc/ada/rtsfind.adb
@@ -47,6 +47,7 @@ with Restrict;   use Restrict;
 with Sem;use Sem;
 with Sem_Aux;use Sem_Aux;
 with Sem_Ch7;use Sem_Ch7;
+with Sem_Ch12;use Sem_Ch12;
 with Sem_Dist;   use Sem_Dist;
 with Sem_Util;   use Sem_Util;
 with Sinfo;  use Sinfo;
@@ -1185,7 +1186,13 @@ package body Rtsfind is
 
 else
Save_Private_Visibility;
-   Semantics (Cunit (U.Unum));
+   declare
+  Saved_Instance_Context : constant Instance_Context.Context
+:= Instance_Context.Save_And_Reset;
+   begin
+  Semantics (Cunit (U.Unum));
+  Instance_Context.Restore (Saved_Instance_Context);
+   end;
Restore_Private_Visibility;
 
if Fatal_Error (U.Unum) = Error_Detected then
diff --git a/gcc/ada/sem_ch12.adb b/gcc/ada/sem_ch12.adb
index cb05a71e96f..4ceddda2052 100644
--- a/gcc/ada/sem_ch12.adb
+++ b/gcc/ada/sem_ch12.adb
@@ -17753,4 +17753,66 @@ package body Sem_Ch12 is
 raise Program_Error;
   end case;
end Validate_Formal_Type_Default;
+
+   package body Instance_Context is
+
+  
+  -- Save_And_Reset --
+  
+
+  function Save_And_Reset return Context is
+  begin
+ return Result : Context (0 .. Integer (Generic_Renamings.Last)) do
+for Index in Result'Range loop
+   declare
+  Indexed_Assoc : Assoc renames Generic_Renamings.Table
+  (Assoc_Ptr (Index));
+  Result_Pair : Binding_Pair renames Result (Index);
+   begin
+  --  If we have called Increment_Last but have not yet
+  --  initialized the new last element of the table, then
+  --  that last element might be invalid. Saving and
+  --  restoring (especially restoring, it turns out) invalid
+  --  values can result in exceptions if predicate checking
+  --  is enabled, so replace invalid values with Empty.
+
+  if Indexed_Assoc.Gen_Id'Valid then
+ Result_Pair.Formal_Id := Indexed_Assoc.Gen_Id;
+  else
+ pragma Assert (Index = Result'Last);
+ Result_Pair.Formal_Id := Empty;
+  end if;
+
+  if Indexed_Assoc.Act_Id'Valid then
+ Result_Pair.Actual_Id := Indexed_Assoc.Act_Id;
+  else
+ pragma Assert (Index = Result'Last);
+ Result_Pair.Actual_Id := Empty;
+  end if;
+   end;
+end loop;
+
+Generic_Renamings.Init;
+Generic_Renamings.Set_Last (0);
+Generic_Renamings_HTable.Reset;
+ end return;
+  end Save_And_Reset;
+
+  -
+  -- Restore --
+  -
+
+  procedure Restore (Saved : Context) is
+  begin
+ Generic_Renamings.Init;
+ Generic_Renamings.Set_Last (0);
+ Generic_Renamings_HTable.Reset;
+ Generic_Renamings.Increment_Last;
+ for Pair of Saved loop
+Set_Instance_Of (Pair.Formal_Id, Pair.Actual_Id);
+ end loop;
+ Generic_Renamings.Decrement_Last;
+  end Restore;
+
+   end Instance_Context;
 end Sem_Ch12;
d

[COMMITTED] ada: Fix crash with -gnatdJ and -gnatf

2024-05-14 Thread Marc Poulhiès

From: Ronan Desplanques 

This patch fixes a crash when the compiler emits a warning about
an unchecked conversion and -gnatdJ is enabled.

gcc/ada/

* sem_ch13.adb (Validate_Unchecked_Conversions): Add node
parameters to Error_Msg calls.

Tested on x86_64-pc-linux-gnu, committed on master.

---
 gcc/ada/sem_ch13.adb | 9 +
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/gcc/ada/sem_ch13.adb b/gcc/ada/sem_ch13.adb
index 0470ce10ac7..1ad5c4c0128 100644
--- a/gcc/ada/sem_ch13.adb
+++ b/gcc/ada/sem_ch13.adb
@@ -18401,7 +18401,8 @@ package body Sem_Ch13 is
  Error_Msg_Uint_1 := Source_Siz;
  Error_Msg_Name_2 := Chars (Target);
  Error_Msg_Uint_2 := Target_Siz;
- Error_Msg ("\size of % is ^, size of % is ^?z?", Eloc);
+ Error_Msg
+   ("\size of % is ^, size of % is ^?z?", Eloc, Act_Unit);
 
  Error_Msg_Uint_1 := UI_Abs (Source_Siz - Target_Siz);
 
@@ -18412,17 +18413,17 @@ package body Sem_Ch13 is
 if Source_Siz > Target_Siz then
Error_Msg
  ("\?z?^ high order bits of source will "
-  & "be ignored!", Eloc);
+  & "be ignored!", Eloc, Act_Unit);
 
 elsif Is_Unsigned_Type (Source) then
Error_Msg
  ("\?z?source will be extended with ^ high order "
-  & "zero bits!", Eloc);
+  & "zero bits!", Eloc, Act_Unit);
 
 else
Error_Msg
  ("\?z?source will be extended with ^ high order "
-  & "sign bits!", Eloc);
+  & "sign bits!", Eloc, Act_Unit);
 end if;
 
  elsif Source_Siz < Target_Siz then
-- 
2.43.2

[COMMITTED] ada: Reduce generated code duplication for streaming and Put_Image subprograms

2024-05-14 Thread Marc Poulhiès

From: Steve Baird 

In the case of an untagged composite type, the compiler does not generate
streaming-related subprograms or a Put_Image procedure when the type is
declared. Instead, these subprograms are declared "on demand" when a
corresponding attribute reference is encountered. In this case, hoist the
declaration of the implicitly declared subprogram out as far as possible
in order to maximize the chances that it can be reused (as opposed to
generating an identical second subprogram) in the case where a second
reference to the same attribute is encountered. Also relax some
privacy-related rules to allow these procedures to do what they need to do
even when constructed in a scope where some of those actions would
normally be illegal.

gcc/ada/

* exp_attr.adb: Change name of package Cached_Streaming_Ops to
reflect the fact that it is now also used for Put_Image
procedures. Similarly change other "Streaming_Op" names therein.
Add Validate_Cached_Candidate procedure to detect case where a
subprogram found in the cache cannot be reused. Add new generic
procedure Build_And_Insert_Type_Attr_Subp; the "Build" part is
handled by just calling a formal procedure; the bulk of this
(generic) procedure's code has to with deciding where in the tree
to insert the newly-constructed subprogram. Replace each later
"Build" call (and the following Insert_Action or
Compile_Stream_Body_In_Scope call) with a declare block that
instantiates and then calls this generic procedure. Delete the
now-unused procedure Compile_Stream_Body_In_Scope. A constructed
subprogram is entered in the appropriate cache if the
corresponding type is untagged; this replaces more complex tests.
A new function Interunit_Ref_OK is added to determine whether an
attribute reference occuring in one unit can safely refer to a
cached subprogram declared in another unit.
* exp_ch3.adb (Build_Predefined_Primitive_Bodies): A formal
parameter was deleted, so delete the corresponding actual in a
call.
* exp_put_image.adb (Build_Array_Put_Image_Procedure): Because the
procedure being built may be referenced more than once, the
generated procedure takes its source position info from the type
declaration instead of the (first) attribute reference.
(Build_Record_Put_Image_Procedure): Likewise.
* exp_put_image.ads (Build_Array_Put_Image_Procedure): Eliminate
now-unused Nod parameter.
(Build_Record_Put_Image_Procedure): Eliminate now-unused Loc parameter.
* sem_ch3.adb (Constrain_Discriminated_Type): For declaring a
subtype with a discriminant constraint, ignore privacy if
Comes_From_Source is false (as is already done if Is_Instance is
true).
* sem_res.adb (Resolve): When passed two type entities that have
the same underlying base type, Sem_Type.Covers may return False in
some cases because of privacy. [This can happen even if
Is_Private_Type returns False both for Etype (N) and for Typ;
Covers calls Base_Type, which can take a non-private argument and
yield a private result.] If Comes_From_Source (N) is False
(e.g., for a compiler-generated Put_Image or streaming subprogram), then
avoid that scenario by not calling Covers. Covers already has tests for
doing this sort of thing (see the calls therein to Full_View_Covers),
but the Comes_From_Source test is too coarse to apply there. So instead
we handle the problem here at the call site.
(Original_Implementation_Base_Type): A new function. Same as
Implementation_Base_Type except if the Original_Node attribute of
a non-derived type declaration indicates that it once was a derived
type declaration. Needed for looking through privacy.
(Valid Conversion): Ignore privacy when converting between different 
views
of the same type if Comes_From_Source is False for the conversion.
(Valid_Tagged_Conversion): An ancestor-to-descendant conversion is not 
an
illegal downward conversion if there is no type extension involved
(because the derivation was from an untagged view of the parent type).

Tested on x86_64-pc-linux-gnu, committed on master.

---
 gcc/ada/exp_attr.adb  | 627 --
 gcc/ada/exp_ch3.adb   |   2 +-
 gcc/ada/exp_put_image.adb |  13 +-
 gcc/ada/exp_put_image.ads |   8 +-
 gcc/ada/sem_ch3.adb   |   5 +-
 gcc/ada/sem_res.adb   |  79 -
 6 files changed, 484 insertions(+), 250 deletions(-)

diff --git a/gcc/ada/exp_attr.adb b/gcc/ada/exp_attr.adb
index 809116d89e3..b7277118a9c 100644
--- a/gcc/ada/exp_attr.adb
+++ b/gcc/ada/exp_attr.adb
@@ -80,12 +80,12 @@ with GNAT.HTable;
 
 package body Exp_Attr is
 
-   package Cached_Streaming_Ops is
+   packag

[COMMITTED] ada: Fix overlap warning suppression

2024-05-14 Thread Marc Poulhiès

From: Ronan Desplanques 

Before this patch, some warnings about overlapping actuals were
emitted regardless of the Value of
Warnsw.Warnings_Package.Warn_On_Overlap. This patch fixes this.

gcc/ada/

* sem_warn.adb (Warn_On_Overlapping_Actuals): Stop ignoring
warning suppression settings.

Tested on x86_64-pc-linux-gnu, committed on master.

---
 gcc/ada/sem_warn.adb | 33 +
 1 file changed, 13 insertions(+), 20 deletions(-)

diff --git a/gcc/ada/sem_warn.adb b/gcc/ada/sem_warn.adb
index 57bdee65356..ccf9067c672 100644
--- a/gcc/ada/sem_warn.adb
+++ b/gcc/ada/sem_warn.adb
@@ -3831,16 +3831,6 @@ package body Sem_Warn is
   then
  null;
 
-  --  We only report warnings on overlapping arrays and record
-  --  types if switch is set.
-
-  elsif not Warn_On_Overlap
-and then not (Is_Elementary_Type (Etype (Form1))
-and then
-  Is_Elementary_Type (Etype (Form2)))
-  then
- null;
-
   --  Here we may need to issue overlap message
 
   else
@@ -3858,22 +3848,25 @@ package body Sem_Warn is
 
or else not
 (Is_Elementary_Type (Etype (Form1))
- and then Is_Elementary_Type (Etype (Form2)))
+ and then Is_Elementary_Type (Etype (Form2)));
 
-   --  debug flag -gnatd.E changes the error to a warning
-   --  even in Ada 2012 mode.
+ if not Error_Msg_Warn or else Warn_On_Overlap then
+--  debug flag -gnatd.E changes the error to a warning
+--  even in Ada 2012 mode.
 
-   or else Error_To_Warning;
+if Error_To_Warning then
+   Error_Msg_Warn := True;
+end if;
 
- --  For greater clarity, give name of formal
+--  For greater clarity, give name of formal
 
- Error_Msg_Node_2 := Form2;
+Error_Msg_Node_2 := Form2;
 
- --  This is one of the messages
+--  This is one of the messages
 
- Error_Msg_FE
-   ("<.i

[COMMITTED] ada: Small cleanup about allocators and aggregates

2024-05-14 Thread Marc Poulhiès

From: Eric Botcazou 

This eliminates a few oddities present in the expander for allocators and
aggregates present in allocators:

  - Convert_Array_Aggr_In_Allocator takes both a Decl and Alloc parameters,
and inserts new code before Alloc for records and after Decl for arrays
through Convert_Array_Aggr_In_Allocator.  Now, for the 3 (duplicated)
calls to the procedure, that's the same place.  It also creates a new
list that it does not use in most cases.

  - Expand_Allocator_Expression uses the same code sequence in 3 places
when the expression is an aggregate to build in place.

  - Build_Allocate_Deallocate_Proc takes an Is_Allocate parameter that is
entirely determined by the N parameter: if N is an allocator, it must
be true; if N is a free statement, it must be false.  Barring that,
the procedure either raises an assertion or Program_Error.  It also
contains useless pattern matching code in the second part.

No functional changes.

gcc/ada/

* exp_aggr.ads (Convert_Aggr_In_Allocator): Rename Alloc into N,
replace Decl with Temp and adjust description.
(Convert_Aggr_In_Object_Decl): Alphabetize.
(Is_Delayed_Aggregate): Likewise.
* exp_aggr.adb (Convert_Aggr_In_Allocator): Rename Alloc into N
and replace Decl with Temp.  Allocate a list only when neeeded.
(Convert_Array_Aggr_In_Allocator): Replace N with Decl and insert
new code before it.
* exp_ch4.adb (Build_Aggregate_In_Place): New procedure nested in
Expand_Allocator_Expression.
(Expand_Allocator_Expression): Call it to build aggregates in place.
Remove second parameter in calls to Build_Allocate_Deallocate_Proc.
(Expand_N_Allocator): Likewise.
* exp_ch13.adb (Expand_N_Free_Statement): Likewise.
* exp_util.ads (Build_Allocate_Deallocate_Proc): Remove Is_Allocate
parameter.
* exp_util.adb (Build_Allocate_Deallocate_Proc): Remove Is_Allocate
parameter and replace it with local variable of same name.  Delete
useless pattern matching.

Tested on x86_64-pc-linux-gnu, committed on master.

---
 gcc/ada/exp_aggr.adb |  34 ++--
 gcc/ada/exp_aggr.ads |  33 ++--
 gcc/ada/exp_ch13.adb |   2 +-
 gcc/ada/exp_ch4.adb  | 123 ++-
 gcc/ada/exp_util.adb |  48 ++---
 gcc/ada/exp_util.ads |   7 +--
 6 files changed, 102 insertions(+), 145 deletions(-)

diff --git a/gcc/ada/exp_aggr.adb b/gcc/ada/exp_aggr.adb
index a4e4d81f0a8..27a7f3d2b49 100644
--- a/gcc/ada/exp_aggr.adb
+++ b/gcc/ada/exp_aggr.adb
@@ -283,7 +283,7 @@ package body Exp_Aggr is
--are writing into.
 
procedure Convert_Array_Aggr_In_Allocator
- (Decl   : Node_Id;
+ (N  : Node_Id;
   Aggr   : Node_Id;
   Target : Node_Id);
--  If the aggregate appears within an allocator and can be expanded in
@@ -3542,13 +3542,12 @@ package body Exp_Aggr is
---
 
procedure Convert_Aggr_In_Allocator
- (Alloc :  Node_Id;
-  Decl  :  Node_Id;
-  Aggr  :  Node_Id)
+ (N: Node_Id;
+  Aggr : Node_Id;
+  Temp : Entity_Id)
is
   Loc  : constant Source_Ptr := Sloc (Aggr);
   Typ  : constant Entity_Id  := Etype (Aggr);
-  Temp : constant Entity_Id  := Defining_Identifier (Decl);
 
   Occ  : constant Node_Id :=
 Unchecked_Convert_To (Typ,
@@ -3556,26 +3555,29 @@ package body Exp_Aggr is
 
begin
   if Is_Array_Type (Typ) then
- Convert_Array_Aggr_In_Allocator (Decl, Aggr, Occ);
+ Convert_Array_Aggr_In_Allocator (N, Aggr, Occ);
 
   elsif Has_Default_Init_Comps (Aggr) then
  declare
-L  : constant List_Id := New_List;
-Init_Stmts : List_Id;
+Init_Stmts : constant List_Id := Late_Expansion (Aggr, Typ, Occ);
 
  begin
-Init_Stmts := Late_Expansion (Aggr, Typ, Occ);
-
 if Has_Task (Typ) then
-   Build_Task_Allocate_Block (L, Aggr, Init_Stmts);
-   Insert_Actions (Alloc, L);
+   declare
+  Actions : constant List_Id := New_List;
+
+   begin
+  Build_Task_Allocate_Block (Actions, Aggr, Init_Stmts);
+  Insert_Actions (N, Actions);
+   end;
+
 else
-   Insert_Actions (Alloc, Init_Stmts);
+   Insert_Actions (N, Init_Stmts);
 end if;
  end;
 
   else
- Insert_Actions (Alloc, Late_Expansion (Aggr, Typ, Occ));
+ Insert_Actions (N, Late_Expansion (Aggr, Typ, Occ));
   end if;
end Convert_Aggr_In_Allocator;
 
@@ -3774,7 +3776,7 @@ package body Exp_Aggr is
-
 
procedure Convert_Array_Aggr_In_Allocator
- (Decl   : Node_Id;
+ (N  : Node_Id;
   Aggr   : Node_Id;
   Target : Node_Id)
is
@@ -3829,7 +3831,7 @@ pa

[COMMITTED] ada: Better error message for bad general case statements

2024-05-14 Thread Marc Poulhiès

From: Steve Baird 

If -gnatX0 is specified, we allow case statements with a selector
expression of a record or array type, but not of a private type.
If the selector expression is of a private type then we should generate
an appropriate error message instead of a bugbox.

gcc/ada/

* sem_ch5.adb (Analyze_Case_Statement): Emit a message and return
early in the case where general case statements are allowed but
the selector expression is of a private type. This is done to
avoid a bugbox.

Tested on x86_64-pc-linux-gnu, committed on master.

---
 gcc/ada/sem_ch5.adb | 9 +
 1 file changed, 9 insertions(+)

diff --git a/gcc/ada/sem_ch5.adb b/gcc/ada/sem_ch5.adb
index 2677a2c5a1c..65370ebfe7e 100644
--- a/gcc/ada/sem_ch5.adb
+++ b/gcc/ada/sem_ch5.adb
@@ -1497,6 +1497,15 @@ package body Sem_Ch5 is
  Resolve (Exp, Etype (Exp));
  Exp_Type := Etype (Exp);
  Is_General_Case_Statement := True;
+ if not (Is_Record_Type (Exp_Type) or Is_Array_Type (Exp_Type)) then
+Error_Msg_N
+  ("selecting expression of general case statement " &
+   "must be a record or an array",
+   Exp);
+
+--  Avoid cascading errors
+return;
+ end if;
   else
  Analyze_And_Resolve (Exp, Any_Discrete);
  Exp_Type := Etype (Exp);
-- 
2.43.2

[COMMITTED] ada: Follow-up adjustment after fix to Default_Initialize_Object

2024-05-14 Thread Marc Poulhiès

From: Eric Botcazou 

Now that Default_Initialize_Object honors the No_Initialization flag in all
cases, objects of an access type declared without initialization expression
can no longer be considered as being automatically initialized to null.

gcc/ada/

* exp_ch3.adb (Expand_N_Object_Declaration): Examine the Expression
field after the call to Default_Initialize_Object in order to set
Is_Known_Null, as well as Is_Known_Non_Null, on an access object.

Tested on x86_64-pc-linux-gnu, committed on master.

---
 gcc/ada/exp_ch3.adb | 16 ++--
 1 file changed, 10 insertions(+), 6 deletions(-)

diff --git a/gcc/ada/exp_ch3.adb b/gcc/ada/exp_ch3.adb
index e34cb8fb58f..9109d592690 100644
--- a/gcc/ada/exp_ch3.adb
+++ b/gcc/ada/exp_ch3.adb
@@ -7604,6 +7604,16 @@ package body Exp_Ch3 is
 
  if not Special_Ret_Obj then
 Default_Initialize_Object (Init_After);
+
+--  Check whether an access object has been initialized above
+
+if Is_Access_Type (Typ) and then Present (Expression (N)) then
+   if Known_Non_Null (Expression (N)) then
+  Set_Is_Known_Non_Null (Def_Id);
+   elsif Known_Null (Expression (N)) then
+  Set_Is_Known_Null (Def_Id);
+   end if;
+end if;
  end if;
 
  --  Generate attribute for Persistent_BSS if needed
@@ -7625,12 +7635,6 @@ package body Exp_Ch3 is
 end;
  end if;
 
- --  If access type, then we know it is null if not initialized
-
- if Is_Access_Type (Typ) then
-Set_Is_Known_Null (Def_Id);
- end if;
-
   --  Explicit initialization present
 
   else
-- 
2.43.2

[COMMITTED] ada: Restore default size for dynamic allocations of discriminated type

2024-05-14 Thread Marc Poulhiès

From: Eric Botcazou 

The allocation strategy for objects of a discriminated type with defaulted
discriminants is not the same when the allocation is dynamic as when it is
static (i.e a declaration): in the former case, the compiler allocates the
default size whereas, in the latter case, it allocates the maximum size.

This restores the default size, which was dropped during the refactoring.

gcc/ada/

* exp_aggr.adb (Build_Array_Aggr_Code): Pass N in the call to
Build_Initialization_Call.
(Build_Record_Aggr_Code): Likewise.
(Convert_Aggr_In_Object_Decl): Likewise.
(Initialize_Discriminants): Likewise.
* exp_ch3.ads (Build_Initialization_Call): Replace Loc witn N.
* exp_ch3.adb (Build_Array_Init_Proc): Pass N in the call to
Build_Initialization_Call.
(Build_Default_Initialization): Likewise.
(Expand_N_Object_Declaration): Likewise.
(Build_Initialization_Call): Replace Loc witn N parameter and add
Loc local variable.  Build a default subtype for an allocator of
a discriminated type with defaulted discriminants.
(Build_Record_Init_Proc): Pass the declaration of components in the
call to Build_Initialization_Call.
* exp_ch6.adb (Make_CPP_Constructor_Call_In_Allocator): Pass the
allocator in the call to Build_Initialization_Call.

Tested on x86_64-pc-linux-gnu, committed on master.

---
 gcc/ada/exp_aggr.adb | 18 --
 gcc/ada/exp_ch3.adb  | 37 -
 gcc/ada/exp_ch3.ads  |  4 ++--
 gcc/ada/exp_ch6.adb  |  2 +-
 4 files changed, 39 insertions(+), 22 deletions(-)

diff --git a/gcc/ada/exp_aggr.adb b/gcc/ada/exp_aggr.adb
index 86f304e90bb..a4e4d81f0a8 100644
--- a/gcc/ada/exp_aggr.adb
+++ b/gcc/ada/exp_aggr.adb
@@ -1493,7 +1493,7 @@ package body Exp_Aggr is
   or else Has_Task (Base_Type (Ctype))
 then
Append_List_To (Stmts,
- Build_Initialization_Call (Loc,
+ Build_Initialization_Call (N,
Id_Ref=> Indexed_Comp,
Typ   => Ctype,
With_Default_Init => True));
@@ -2936,7 +2936,7 @@ package body Exp_Aggr is
 
if not Is_Interface (Init_Typ) then
   Append_List_To (L,
-Build_Initialization_Call (Loc,
+Build_Initialization_Call (N,
   Id_Ref=> Ref,
   Typ   => Init_Typ,
   In_Init_Proc  => Within_Init_Proc,
@@ -2971,7 +2971,7 @@ package body Exp_Aggr is
Set_Assignment_OK (Ref);
 
Append_List_To (L,
- Build_Initialization_Call (Loc,
+ Build_Initialization_Call (N,
Id_Ref=> Ref,
Typ   => Init_Typ,
In_Init_Proc  => Within_Init_Proc,
@@ -3148,7 +3148,7 @@ package body Exp_Aggr is
 
  if Is_CPP_Constructor_Call (Expression (Comp)) then
 Append_List_To (L,
-  Build_Initialization_Call (Loc,
+  Build_Initialization_Call (N,
 Id_Ref=>
   Make_Selected_Component (Loc,
 Prefix=> New_Copy_Tree (Target),
@@ -3217,7 +3217,7 @@ package body Exp_Aggr is
 end;
 
 Append_List_To (L,
-  Build_Initialization_Call (Loc,
+  Build_Initialization_Call (N,
 Id_Ref=> Make_Selected_Component (Loc,
Prefix=> New_Copy_Tree (Target),
Selector_Name =>
@@ -3747,8 +3747,8 @@ package body Exp_Aggr is
   Param := First (Parameter_Associations (Stmt));
   Insert_Actions
 (Stmt,
- Build_Initialization_Call
-   (Sloc (N), New_Copy_Tree (Param), Etype (Param)));
+ Build_Initialization_Call (N,
+   New_Copy_Tree (Param), Etype (Param)));
end if;
 
Next (Stmt);
@@ -9279,13 +9279,11 @@ package body Exp_Aggr is
   Present (Variant_Part (Component_List (Type_Definition (Decl
 and then Nkind (N) /= N_Extension_Aggregate
   then
-
  --   Call init proc to set discriminants.
  --   There should eventually be a special procedure for this ???
 
  Ref := New_Occurrence_Of (Defining_Identifier (N), Loc);
- Insert_Actions_After (N,
-   Build_Initialization_Call (Sloc (N), Ref, Typ));
+ Insert_Actions_After (N, Build_Initialization_Call (N, Ref, Typ));
   end if;
end Initialize_Discriminants;
 
diff --git a/gcc/ada/exp_ch3.adb b/gcc/ada/exp_ch3.adb
index 9109d592690..13a0c8e7500 100644
--- a/gcc/ada/exp_ch3.adb
+++ b/gcc

[COMMITTED] ada: Minor typo fix in comment

2024-05-14 Thread Marc Poulhiès

gcc/ada/

* sem_util.adb: Typo fix in comment.
* exp_aggr.adb: Likewise.

Tested on x86_64-pc-linux-gnu, committed on master.

---
 gcc/ada/exp_aggr.adb | 2 +-
 gcc/ada/sem_util.adb | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/gcc/ada/exp_aggr.adb b/gcc/ada/exp_aggr.adb
index 27a7f3d2b49..bdaca4aab58 100644
--- a/gcc/ada/exp_aggr.adb
+++ b/gcc/ada/exp_aggr.adb
@@ -1988,7 +1988,7 @@ package body Exp_Aggr is
   --  STEP 1: Process component associations
 
   --  For those associations that may generate a loop, initialize
-  --  Loop_Actions to collect inserted actions that may be crated.
+  --  Loop_Actions to collect inserted actions that may be created.
 
   --  Skip this if no component associations
 
diff --git a/gcc/ada/sem_util.adb b/gcc/ada/sem_util.adb
index 4e1258e7cec..1785931530f 100644
--- a/gcc/ada/sem_util.adb
+++ b/gcc/ada/sem_util.adb
@@ -7785,7 +7785,7 @@ package body Sem_Util is
 Set_Is_Immediately_Visible (E, False);
 
  --  Case of renaming declaration constructed for package instances.
- --  if there is an explicit declaration with the same identifier,
+ --  If there is an explicit declaration with the same identifier,
  --  the renaming is not immediately visible any longer, but remains
  --  visible through selected component notation.
 
@@ -7794,7 +7794,7 @@ package body Sem_Util is
  then
 Set_Is_Immediately_Visible (E, False);
 
- --  The new entity may be the package renaming, which has the same
+ --  The new entity may be the package renaming, which has the
  --  same name as a generic formal which has been seen already.
 
  elsif Nkind (Parent (Def_Id)) = N_Package_Renaming_Declaration
-- 
2.43.2

[COMMITTED] ada: Fix warning indicators in usage string

2024-05-14 Thread Marc Poulhiès

From: Ronan Desplanques 

Before this patch, the default status of -gnatw.i and -gnatw.d are
reported incorrectly in the usage string used throughout GNAT tools.
This patch fixes this.

gcc/ada/

* usage.adb (Usage): Fix enabled-by-default indicators.

Tested on x86_64-pc-linux-gnu, committed on master.

---
 gcc/ada/usage.adb | 8 
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/gcc/ada/usage.adb b/gcc/ada/usage.adb
index 85b371ac4f1..59cbd6f4a2f 100644
--- a/gcc/ada/usage.adb
+++ b/gcc/ada/usage.adb
@@ -498,8 +498,8 @@ begin
  "Compile_Time_Warning");
Write_Line ("dturn on warnings for implicit dereference");
Write_Line ("D*   turn off warnings for implicit dereference");
-   Write_Line (".d   turn on tagging of warnings with -gnatw switch");
-   Write_Line (".D*  turn off tagging of warnings with -gnatw switch");
+   Write_Line (".d*  turn on tagging of warnings with -gnatw switch");
+   Write_Line (".D   turn off tagging of warnings with -gnatw switch");
Write_Line ("etreat all warnings (but not info) as errors");
Write_Line (".e   turn on every optional info/warning " &
   "(no exceptions)");
@@ -517,8 +517,8 @@ begin
Write_Line (".H*  turn off warnings for holes in records");
Write_Line ("i*+  turn on warnings for implementation unit");
Write_Line ("Iturn off warnings for implementation unit");
-   Write_Line (".i*+ turn on warnings for overlapping actuals");
-   Write_Line (".I   turn off warnings for overlapping actuals");
+   Write_Line (".i+  turn on warnings for overlapping actuals");
+   Write_Line (".I*  turn off warnings for overlapping actuals");
Write_Line ("j+   turn on warnings for obsolescent " &
   "(annex J) feature");
Write_Line ("J*   turn off warnings for obsolescent " &
-- 
2.43.2

[COMMITTED] ada: Replace "not Present" tests with "No".

2024-05-14 Thread Marc Poulhiès

From: Steve Baird 

Fix constructs that were flagged by CodePeer.

gcc/ada/

* exp_attr.adb: Replace 6 "not Present" tests with equivalent calls to 
"No".

Tested on x86_64-pc-linux-gnu, committed on master.

---
 gcc/ada/exp_attr.adb | 12 ++--
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/gcc/ada/exp_attr.adb b/gcc/ada/exp_attr.adb
index b7277118a9c..6dba600620e 100644
--- a/gcc/ada/exp_attr.adb
+++ b/gcc/ada/exp_attr.adb
@@ -4334,7 +4334,7 @@ package body Exp_Attr is
 
  Fname := Find_Stream_Subprogram (P_Type, TSS_Stream_Input, N);
 
- if not Present (Fname) then
+ if No (Fname) then
 
 --  If there is a Stream_Convert pragma, use it, we rewrite
 
@@ -5422,7 +5422,7 @@ package body Exp_Attr is
 
  Pname := Find_Stream_Subprogram (P_Type, TSS_Stream_Output, N);
 
- if not Present (Pname) then
+ if No (Pname) then
 
 --  If there is a Stream_Convert pragma, use it, we rewrite
 
@@ -5998,7 +5998,7 @@ package body Exp_Attr is
Pname := Cached_Attribute_Ops.Put_Image_Map.Get (U_Type);
Cached_Attribute_Ops.Validate_Cached_Candidate
  (Pname, Attr_Ref => N);
-   if not Present (Pname) then
+   if No (Pname) then
   declare
  procedure Build_And_Insert_Array_Put_Image_Proc is
new Build_And_Insert_Type_Attr_Subp
@@ -6052,7 +6052,7 @@ package body Exp_Attr is
   Pname := Cached_Attribute_Ops.Put_Image_Map.Get (Base_Typ);
   Cached_Attribute_Ops.Validate_Cached_Candidate
 (Pname, Attr_Ref => N);
-  if not Present (Pname) then
+  if No (Pname) then
  declare
 procedure Build_And_Insert_Record_Put_Image_Proc is
   new Build_And_Insert_Type_Attr_Subp
@@ -6352,7 +6352,7 @@ package body Exp_Attr is
 
  Pname := Find_Stream_Subprogram (P_Type, TSS_Stream_Read, N);
 
- if not Present (Pname) then
+ if No (Pname) then
 
 --  If there is a Stream_Convert pragma, use it, we rewrite
 
@@ -8067,7 +8067,7 @@ package body Exp_Attr is
 
  Pname := Find_Stream_Subprogram (P_Type, TSS_Stream_Write, N);
 
- if not Present (Pname) then
+ if No (Pname) then
 
 --  If there is a Stream_Convert pragma, use it, we rewrite
 
-- 
2.43.2

[COMMITTED] ada: Fix crash with -gnatdJ and JSON output

2024-05-14 Thread Marc Poulhiès

From: Ronan Desplanques 

This patch tweaks the calls made to Errout subprograms to report
violations of dependence restrictions, in order fix a crash that
occurred with -gnatdJ and -fdiagnostics-format=json.

gcc/ada/

* restrict.adb (Violation_Of_No_Dependence): Tweak error
reporting calls.

Tested on x86_64-pc-linux-gnu, committed on master.

---
 gcc/ada/restrict.adb | 12 ++--
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/gcc/ada/restrict.adb b/gcc/ada/restrict.adb
index 1cc75bec326..bda35d8f441 100644
--- a/gcc/ada/restrict.adb
+++ b/gcc/ada/restrict.adb
@@ -1704,16 +1704,16 @@ package body Restrict is

 
procedure Violation_Of_No_Dependence (Unit : Int; N : Node_Id) is
+  Unit_Node : constant Node_Id := No_Dependences.Table (Unit).Unit;
begin
-  Error_Msg_Node_1 := No_Dependences.Table (Unit).Unit;
-  Error_Msg_Sloc   := Sloc (Error_Msg_Node_1);
+  Error_Msg_Sloc := Sloc (Unit_Node);
 
   if No_Dependences.Table (Unit).Warn then
- Error_Msg
-   ("?*?violation of restriction `No_Dependence '='> &`#", Sloc (N));
+ Error_Msg_NE ("?*?violation of restriction `No_Dependence '='> &`#",
+   N, Unit_Node);
   else
- Error_Msg
-   ("|violation of restriction `No_Dependence '='> &`#", Sloc (N));
+ Error_Msg_NE ("|violation of restriction `No_Dependence '='> &`#", N,
+   Unit_Node);
   end if;
end Violation_Of_No_Dependence;
 
-- 
2.43.2

[COMMITTED] ada: Fix pragma Compile_Time_Error and -gnatdJ crash

2024-05-14 Thread Marc Poulhiès

From: Ronan Desplanques 

This patch makes it so the diagnostics coming from occurrences of
pragma Compile_Time_Error and Compile_Time_Warning are emitted with
a node parameter so they don't cause a crash when -gnatdJ is enabled.

gcc/ada/

* errout.ads (Error_Msg): Add node parameter.
* errout.adb (Error_Msg): Add parameter and pass it to
the underlying call.
* sem_prag.adb (Validate_Compile_Time_Warning_Or_Error): Pass
pragma node when emitting errors.

Tested on x86_64-pc-linux-gnu, committed on master.

---
 gcc/ada/errout.adb   | 3 ++-
 gcc/ada/errout.ads   | 7 ---
 gcc/ada/sem_prag.adb | 8 
 3 files changed, 10 insertions(+), 8 deletions(-)

diff --git a/gcc/ada/errout.adb b/gcc/ada/errout.adb
index 4622290897b..f10539d0949 100644
--- a/gcc/ada/errout.adb
+++ b/gcc/ada/errout.adb
@@ -325,12 +325,13 @@ package body Errout is
procedure Error_Msg
   (Msg : String;
Flag_Location : Source_Ptr;
+   N : Node_Id;
Is_Compile_Time_Pragma : Boolean)
is
   Save_Is_Compile_Time_Msg : constant Boolean := Is_Compile_Time_Msg;
begin
   Is_Compile_Time_Msg := Is_Compile_Time_Pragma;
-  Error_Msg (Msg, To_Span (Flag_Location), Current_Node);
+  Error_Msg (Msg, To_Span (Flag_Location), N);
   Is_Compile_Time_Msg := Save_Is_Compile_Time_Msg;
end Error_Msg;
 
diff --git a/gcc/ada/errout.ads b/gcc/ada/errout.ads
index 089da867d45..f0e3f5d0b7c 100644
--- a/gcc/ada/errout.ads
+++ b/gcc/ada/errout.ads
@@ -738,10 +738,11 @@ package Errout is
procedure Error_Msg
  (Msg: String;
   Flag_Location  : Source_Ptr;
+  N  : Node_Id;
   Is_Compile_Time_Pragma : Boolean);
-   --  Same as Error_Msg (String, Source_Ptr) except Is_Compile_Time_Pragma
-   --  lets the caller specify whether this is a Compile_Time_Warning or
-   --  Compile_Time_Error pragma.
+   --  Same as Error_Msg (String, Source_Ptr, Node_Id) except
+   --  Is_Compile_Time_Pragma lets the caller specify whether this is a
+   --  Compile_Time_Warning or Compile_Time_Error pragma.
 
procedure Error_Msg_S (Msg : String);
--  Output a message at current scan pointer location. This routine can be
diff --git a/gcc/ada/sem_prag.adb b/gcc/ada/sem_prag.adb
index dfc415da3f3..9646e891c46 100644
--- a/gcc/ada/sem_prag.adb
+++ b/gcc/ada/sem_prag.adb
@@ -33145,11 +33145,11 @@ package body Sem_Prag is
   if Force then
  if Cont = False then
 Error_Msg
-   ("<<~!!", Eloc, Is_Compile_Time_Pragma => True);
+   ("<<~!!", Eloc, N, Is_Compile_Time_Pragma => True);
 Cont := True;
  else
 Error_Msg
-   ("\<<~!!", Eloc, Is_Compile_Time_Pragma => True);
+   ("\<<~!!", Eloc, N, Is_Compile_Time_Pragma => True);
  end if;
 
   --  Error, rather than warning, or in a body, so we do not
@@ -33161,11 +33161,11 @@ package body Sem_Prag is
   else
  if Cont = False then
 Error_Msg
-   ("<<~", Eloc, Is_Compile_Time_Pragma => True);
+   ("<<~", Eloc, N, Is_Compile_Time_Pragma => True);
 Cont := True;
  else
 Error_Msg
-   ("\<<~", Eloc, Is_Compile_Time_Pragma => True);
+   ("\<<~", Eloc, N, Is_Compile_Time_Pragma => True);
  end if;
   end if;
 
-- 
2.43.2

[COMMITTED] ada: Fix crash with -gnatdJ and -gnatyz

2024-05-14 Thread Marc Poulhiès

From: Ronan Desplanques 

This patch makes it so -gnatyz style checks reports specify a node
ID. That is required since those checks are sometimes made during
semantic analysis of short-circuit operators, where the Current_Node
mechanism that -gnatdJ uses is not operational.

Check_Xtra_Parens_Precedence is moved from Styleg to Style to make
this possible.

gcc/ada/

* styleg.ads (Check_Xtra_Parens_Precedence): Moved ...
* style.ads (Check_Xtra_Parens_Precedence): ... here. Also
replace corresponding renaming.
* styleg.adb (Check_Xtra_Parens_Precedence): Moved ...
* style.adb (Check_Xtra_Parens_Precedence): here. Also use
Errout.Error_Msg and pass it a node parameter.

Tested on x86_64-pc-linux-gnu, committed on master.

---
 gcc/ada/style.adb  | 22 ++
 gcc/ada/style.ads  |  3 +--
 gcc/ada/styleg.adb | 22 --
 gcc/ada/styleg.ads |  4 
 4 files changed, 23 insertions(+), 28 deletions(-)

diff --git a/gcc/ada/style.adb b/gcc/ada/style.adb
index aaa668aab00..18b110b911d 100644
--- a/gcc/ada/style.adb
+++ b/gcc/ada/style.adb
@@ -337,6 +337,28 @@ package body Style is
   end if;
end Check_Identifier;
 
+   --
+   -- Check_Xtra_Parens_Precedence --
+   --
+
+   procedure Check_Xtra_Parens_Precedence (N : Node_Id) is
+   begin
+  if Style_Check_Xtra_Parens_Precedence
+and then
+  Paren_Count (N) >
+(if Nkind (N) in N_Case_Expression
+   | N_Expression_With_Actions
+   | N_If_Expression
+   | N_Quantified_Expression
+   | N_Raise_Expression
+ then 1
+ else 0)
+  then
+ Error_Msg -- CODEFIX
+   ("(style) redundant parentheses?z?", First_Sloc (N), N);
+  end if;
+   end Check_Xtra_Parens_Precedence;
+

-- Missing_Overriding --

diff --git a/gcc/ada/style.ads b/gcc/ada/style.ads
index c0925e9ce34..9614242269c 100644
--- a/gcc/ada/style.ads
+++ b/gcc/ada/style.ads
@@ -196,8 +196,7 @@ package Style is
--  Called after scanning an entire expression (N) that does not require an
--  extra level of parentheses.
 
-   procedure Check_Xtra_Parens_Precedence (N : Node_Id)
- renames Style_Inst.Check_Xtra_Parens_Precedence;
+   procedure Check_Xtra_Parens_Precedence (N : Node_Id);
--  Called after scanning a subexpression (N) that does not require an
--  extra level of parentheses according to operator precedence rules.
 
diff --git a/gcc/ada/styleg.adb b/gcc/ada/styleg.adb
index 287589f92da..c405dec2b33 100644
--- a/gcc/ada/styleg.adb
+++ b/gcc/ada/styleg.adb
@@ -1054,28 +1054,6 @@ package body Styleg is
   end if;
end Check_Xtra_Parens;
 
-   --
-   -- Check_Xtra_Parens_Precedence --
-   --
-
-   procedure Check_Xtra_Parens_Precedence (N : Node_Id) is
-   begin
-  if Style_Check_Xtra_Parens_Precedence
-and then
-  Paren_Count (N) >
-(if Nkind (N) in N_Case_Expression
-   | N_Expression_With_Actions
-   | N_If_Expression
-   | N_Quantified_Expression
-   | N_Raise_Expression
- then 1
- else 0)
-  then
- Error_Msg -- CODEFIX
-   ("(style) redundant parentheses?z?", Errout.First_Sloc (N));
-  end if;
-   end Check_Xtra_Parens_Precedence;
-

-- Determine_Token_Casing --

diff --git a/gcc/ada/styleg.ads b/gcc/ada/styleg.ads
index 9028e85cc4e..a16ea5c5961 100644
--- a/gcc/ada/styleg.ads
+++ b/gcc/ada/styleg.ads
@@ -160,10 +160,6 @@ package Styleg is
--  Called after scanning an entire expression (N) that does not require an
--  extra level of parentheses.
 
-   procedure Check_Xtra_Parens_Precedence (N : Node_Id);
-   --  Called after scanning a subexpression (N) that does not require an
-   --  extra level of parentheses according to operator precedence rules.
-
function Mode_In_Check return Boolean;
pragma Inline (Mode_In_Check);
--  Determines whether style checking is active and the Mode_In_Check is
-- 
2.43.2

[COMMITTED] ada: Fix typo in diagnostic message

2024-05-14 Thread Marc Poulhiès

From: Ronan Desplanques 

A previous change introduced an error in the diagnostic message about
overlapping actuals. This commit fixes this.

gcc/ada/

* sem_warn.adb (Warn_On_Overlapping_Actuals): Fix typo.

Tested on x86_64-pc-linux-gnu, committed on master.

---
 gcc/ada/sem_warn.adb | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/gcc/ada/sem_warn.adb b/gcc/ada/sem_warn.adb
index ccf9067c672..2de3f8668b0 100644
--- a/gcc/ada/sem_warn.adb
+++ b/gcc/ada/sem_warn.adb
@@ -3865,7 +3865,7 @@ package body Sem_Warn is
 --  This is one of the messages
 
 Error_Msg_FE ("<.i

[COMMITED] MAINTAINERS: Fix an entry using spaces instead of tabs

2024-05-14 Thread Filip Kastl

In the MAINTAINERS file, names and emails are separated by tabs.  One of
the entries recently added used spaces.  This patch corrects this.

The check-MAINTAINERS.py script breaks a bit when this happens.  This
patch also adds warning about this situation into the script.

ChangeLog:

* MAINTAINERS: Use tabs between name and email.

contrib/ChangeLog:

* check-MAINTAINERS.py: Add warning about not using tabs.

Signed-off-by: Filip Kastl 
---
 MAINTAINERS  | 2 +-
 contrib/check-MAINTAINERS.py | 8 
 2 files changed, 9 insertions(+), 1 deletion(-)

diff --git a/MAINTAINERS b/MAINTAINERS
index 361059fd55c..8bb435dd54e 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -738,7 +738,7 @@ Kwok Cheung Yeung   

 Greta Yorsh
 David Yuste
 Adhemerval Zanella 
-Xiao Zeng   
+Xiao Zeng  
 Dennis Zhang   
 Yufeng Zhang   
 Qing Zhao  
diff --git a/contrib/check-MAINTAINERS.py b/contrib/check-MAINTAINERS.py
index 9f31a10bcff..2bac67f0821 100755
--- a/contrib/check-MAINTAINERS.py
+++ b/contrib/check-MAINTAINERS.py
@@ -71,6 +71,14 @@ def check_group(name, lines):
 print(f'Line should not start with space: "{line}"')
 exit_code = 2
 
+# Special-case some names
+if line == 'James Norris':
+continue
+
+if '\t' not in line:
+print(f'Name and email should be separated by tabs: "{line}"')
+exit_code = 2
+
 lines = [line + '\n' for line in lines]
 sorted_lines = sorted(lines, key=sort_by_surname)
 if lines != sorted_lines:
-- 
2.45.0

[COMMITTED] ada: Fix pragma Warnings and -gnatD interaction

2024-05-14 Thread Marc Poulhiès

From: Ronan Desplanques 

A recent change broke pragma Warnings when -gnatD is enabled in some
cases. This patch fixes this by caching more slocs at times when it's
known that they haven't been modified by -gnatD.

gcc/ada/

* errout.adb (Validate_Specific_Warnings): Adapt to record
definition change.
* erroutc.adb (Set_Specific_Warning_On, Set_Specific_Warning_Off,
Warning_Specifically_Suppressed): Likewise.
* erroutc.ads: Change record definition.

Tested on x86_64-pc-linux-gnu, committed on master.

---
 gcc/ada/errout.adb  |  4 ++--
 gcc/ada/erroutc.adb | 11 +--
 gcc/ada/erroutc.ads | 10 +-
 3 files changed, 16 insertions(+), 9 deletions(-)

diff --git a/gcc/ada/errout.adb b/gcc/ada/errout.adb
index f10539d0949..92c4f6a4635 100644
--- a/gcc/ada/errout.adb
+++ b/gcc/ada/errout.adb
@@ -2028,7 +2028,7 @@ package body Errout is
if SWE.Open then
   Error_Msg_N
 ("?.w?pragma Warnings Off with no matching Warnings On",
- SWE.Start);
+ SWE.Node);
 
--  Warn for ineffective Warnings (Off, ..)
 
@@ -2043,7 +2043,7 @@ package body Errout is
then
   Error_Msg_N
 ("?.w?no warning suppressed by this pragma",
- SWE.Start);
+ SWE.Node);
end if;
 end if;
  end;
diff --git a/gcc/ada/erroutc.adb b/gcc/ada/erroutc.adb
index 96d8d128d84..be200e0016e 100644
--- a/gcc/ada/erroutc.adb
+++ b/gcc/ada/erroutc.adb
@@ -1660,9 +1660,10 @@ package body Erroutc is
   Loc : constant Source_Ptr := Sinfo.Nodes.Sloc (Node);
begin
   Specific_Warnings.Append
-((Start  => Node,
+((Start  => Loc,
   Msg=> new String'(Msg),
   Stop   => Source_Last (Get_Source_File_Index (Loc)),
+  Node   => Node,
   Reason => Reason,
   Open   => True,
   Used   => Used,
@@ -1682,13 +1683,12 @@ package body Erroutc is
   for J in 1 .. Specific_Warnings.Last loop
  declare
 SWE : Specific_Warning_Entry renames Specific_Warnings.Table (J);
-Start_Loc : constant Source_Ptr := Sinfo.Nodes.Sloc (SWE.Start);
 
  begin
 if Msg = SWE.Msg.all
-  and then Loc > Start_Loc
+  and then Loc > SWE.Start
   and then SWE.Open
-  and then Get_Source_File_Index (Start_Loc) =
+  and then Get_Source_File_Index (SWE.Start) =
Get_Source_File_Index (Loc)
 then
SWE.Stop := Loc;
@@ -1819,13 +1819,12 @@ package body Erroutc is
   for J in Specific_Warnings.First .. Specific_Warnings.Last loop
  declare
 SWE : Specific_Warning_Entry renames Specific_Warnings.Table (J);
-Start_Loc : constant Source_Ptr := Sinfo.Nodes.Sloc (SWE.Start);
  begin
 --  Pragma applies if it is a configuration pragma, or if the
 --  location is in range of a specific non-configuration pragma.
 
 if SWE.Config
-  or else Sloc_In_Range (Loc, Start_Loc, SWE.Stop)
+  or else Sloc_In_Range (Loc, SWE.Start, SWE.Stop)
 then
if Matches (Msg.all, SWE.Msg.all)
  or else Matches (Tag, SWE.Msg.all)
diff --git a/gcc/ada/erroutc.ads b/gcc/ada/erroutc.ads
index 250461f4b5c..1c43bce2b21 100644
--- a/gcc/ada/erroutc.ads
+++ b/gcc/ada/erroutc.ads
@@ -347,11 +347,19 @@ package Erroutc is
--  which is the pattern to match for suppressing a warning.
 
type Specific_Warning_Entry is record
-  Start : Node_Id;
+  Start : Source_Ptr;
   Stop  : Source_Ptr;
   --  Starting and ending source pointers for the range. These are always
   --  from the same source file.
 
+  Node : Node_Id;
+  --  Node for the pragma Warnings occurrence. We store it to compute the
+  --  enclosing subprogram if -gnatdJ is enabled and a message about this
+  --  clause needs to be emitted. Note that we cannot remove the Start
+  --  component above and use Sloc (Node) on message display instead
+  --  because -gnatD output can already have messed with slocs at the point
+  --  when warnings about ineffective clauses are emitted.
+
   Reason : String_Id;
   --  Reason string from pragma Warnings, or null string if none
 
-- 
2.43.2

[COMMITTED] ada: Decouple attachment from dynamic allocation for controlled objects

2024-05-14 Thread Marc Poulhiès

From: Eric Botcazou 

This decouples the attachment to the appropriate finalization collection of
dynamically allocated objects that need finalization from their allocation.

The current implementation immediately attaches them after allocating them,
which means that they will be finalized even if their initialization does
not complete successfully.  The new implementation instead generates the
same sequence as the one generated for (statically) declared objects, that
is to say, allocation, initialization and attachment in this order.

gcc/ada/

* exp_ch3.adb (Build_Default_Initialization): Do not generate the
protection for finalization collections.
(Build_Heap_Or_Pool_Allocator): Set the No_Initialization flag on
the declaration of the temporary.
* exp_ch4.adb (Build_Aggregate_In_Place): Do not build an allocation
procedure here.
(Expand_Allocator_Expression): Build an allocation procedure, if it
is required, only just before rewriting the allocator.
(Expand_N_Allocator): Do not build an allocation procedure if the
No_Initialization flag is set on the allocator, except for those
generated for special return objects.  In other cases, build an
allocation procedure, if it is required, only before rewriting
the allocator.
* exp_ch7.ads (Make_Address_For_Finalize): New function declaration.
* exp_ch7.adb (Finalization Management): Update description for
dynamically allocated objects.
(Make_Address_For_Finalize): Remove declaration.
(Find_Last_Init): Change to function and move to...
(Process_Object_Declaration): Adjust to above change.
* exp_util.ads (Build_Allocate_Deallocate_Proc): Add Mark parameter
with Empty default and document it.
(Find_Last_Init): New function declaration.
* exp_util.adb (Build_Allocate_Deallocate_Proc): Add Mark parameter
with Empty default and pass it in recursive call.  Deal with type
conversions created for interface types.  Adjust call sequence to
Allocate_Any_Controlled by changing Collection to In/Out parameter
and removing Finalize_Address parameter.  For a controlled object,
generate a conditional call to Attach_Object_To_Collection for an
allocation and to Detach_Object_From_Collection for a deallocation.
(Find_Last_Init): ...here.  Compute the initialization type for an
allocator whose designating type is class wide specifically and also
handle concurrent types.
* rtsfind.ads (RE_Id): Add RE_Attach_Object_To_Collection and
RE_Detach_Object_From_Collection.
(RE_Unit_Table): Add entries for RE_Attach_Object_To_Collection and
RE_Detach_Object_From_Collection.
* libgnat/s-finpri.ads (Finalization_Started): Delete.
(Attach_Node_To_Collection): Likewise.
(Detach_Node_From_Collection): Move to...
(Attach_Object_To_Collection): New procedure declaration.
(Detach_Object_From_Collection): Likewise.
(Finalization_Collection): Remove Atomic for Finalization_Started.
Add pragma Inline for Initialize.
* libgnat/s-finpri.adb: Add clause for Ada.Unchecked_Conversion.
(To_Collection_Node_Ptr): New instance of Ada.Unchecked_Conversion.
(Detach_Node_From_Collection): ...here.
(Attach_Object_To_Collection): New procedure.
(Detach_Object_From_Collection): Likewise.
(Finalization_Started): Delete.
(Finalize): Replace allocation with attachment in comments.
* libgnat/s-stposu.ads (Allocate_Any_Controlled): Rename parameter
Context_Subpool into Named_Subpool, parameter Context_Collection
into Collection and change it to In/Out, and remove Fin_Address.
* libgnat/s-stposu.adb: Remove clause for Ada.Unchecked_Conversion
and Finalization_Primitives.
(To_Collection_Node_Ptr): Delete.
(Allocate_Any_Controlled): Rename parameter Context_Subpool into
Named_Subpool, parameter Context_Collection into Collection and
change it to In/Out, and remove Fin_Address.  Do not lock/unlock
and do not attach the object, instead only displace its address.
(Deallocate_Any_Controlled): Do not lock/unlock and do not detach
the object.
(Header_Size_With_Padding): Use qualified name for Header_Size.

Tested on x86_64-pc-linux-gnu, committed on master.

---
 gcc/ada/exp_ch3.adb  |   5 +-
 gcc/ada/exp_ch4.adb  |  20 +-
 gcc/ada/exp_ch7.adb  | 370 +++--
 gcc/ada/exp_ch7.ads  |   7 +
 gcc/ada/exp_util.adb | 520 ---
 gcc/ada/exp_util.ads |  25 +-
 gcc/ada/libgnat/s-finpri.adb |  84 --
 gcc/ada/libgnat/s-finpri.ads |  26 +-
 gcc/ada/libgnat/s-stposu.adb | 196 -
 gcc/ada/libgnat/s-stposu.ads |  40 ++-
 gcc/ada/rtsfind.ads

[COMMITTED] ada: Fix crash with -gnatdJ and -gnatw.w

2024-05-14 Thread Marc Poulhiès

From: Ronan Desplanques 

This patch fixes a crash when -gnatdJ is enabled and a warning
must be emitted about an ineffective pragma Warnings clause.

Some modifications are made to the specific warnings machinery so
that warnings carry the ID of the pragma node they're about, so the
-gnatdJ mechanism can find an appropriate enclosing subprogram.

gcc/ada/

* sem_prag.adb (Analyze_Pragma): Adapt call to new signature.
* erroutc.ads (Set_Specific_Warning_Off): change signature
and update documentation.
(Validate_Specific_Warnings): Move ...
* errout.adb: ... here and change signature. Also move body
of Validate_Specific_Warnings from erroutc.adb.
(Finalize): Adapt call.
* errout.ads (Set_Specific_Warning_Off): Adapt signature of
renaming.
* erroutc.adb (Set_Specific_Warning_Off): Adapt signature and
body.
(Validate_Specific_Warnings): Move to the body of Errout.
(Warning_Specifically_Suppressed): Adapt body.

Tested on x86_64-pc-linux-gnu, committed on master.

---
 gcc/ada/errout.adb   | 50 +-
 gcc/ada/errout.ads   |  2 +-
 gcc/ada/erroutc.adb  | 58 +++-
 gcc/ada/erroutc.ads  | 25 +++
 gcc/ada/sem_prag.adb |  2 +-
 5 files changed, 69 insertions(+), 68 deletions(-)

diff --git a/gcc/ada/errout.adb b/gcc/ada/errout.adb
index c4761bd1bc9..4622290897b 100644
--- a/gcc/ada/errout.adb
+++ b/gcc/ada/errout.adb
@@ -213,6 +213,10 @@ package body Errout is
--  should have 'Class appended to its name (see Add_Class procedure), and
--  is otherwise unchanged.
 
+   procedure Validate_Specific_Warnings;
+   --  Checks that specific warnings are consistent (for non-configuration
+   --  case, properly closed, and used).
+
function Warn_Insertion return String;
--  This is called for warning messages only (so Warning_Msg_Char is set)
--  and returns a corresponding string to use at the beginning of generated
@@ -1745,7 +1749,7 @@ package body Errout is
   --  do this on the last call, after all possible warnings are posted.
 
   if Last_Call then
- Validate_Specific_Warnings (Error_Msg'Access);
+ Validate_Specific_Warnings;
   end if;
end Finalize;
 
@@ -2001,6 +2005,50 @@ package body Errout is
   --  True if S starts with Size_For
end Is_Size_Too_Small_Message;
 
+   
+   -- Validate_Specific_Warnings --
+   
+
+   procedure Validate_Specific_Warnings is
+   begin
+  if not Warnsw.Warn_On_Warnings_Off then
+ return;
+  end if;
+
+  for J in Specific_Warnings.First .. Specific_Warnings.Last loop
+ declare
+SWE : Specific_Warning_Entry renames Specific_Warnings.Table (J);
+
+ begin
+if not SWE.Config then
+
+   --  Warn for unmatched Warnings (Off, ...)
+
+   if SWE.Open then
+  Error_Msg_N
+("?.w?pragma Warnings Off with no matching Warnings On",
+ SWE.Start);
+
+   --  Warn for ineffective Warnings (Off, ..)
+
+   elsif not SWE.Used
+
+ --  Do not issue this warning for -Wxxx messages since the
+ --  back-end doesn't report the information. Note that there
+ --  is always an asterisk at the start of every message.
+
+ and then not
+   (SWE.Msg'Length > 3 and then SWE.Msg (2 .. 3) = "-W")
+   then
+  Error_Msg_N
+("?.w?no warning suppressed by this pragma",
+ SWE.Start);
+   end if;
+end if;
+ end;
+  end loop;
+   end Validate_Specific_Warnings;
+
---
-- Last_Node --
---
diff --git a/gcc/ada/errout.ads b/gcc/ada/errout.ads
index 5a7764aa0a3..089da867d45 100644
--- a/gcc/ada/errout.ads
+++ b/gcc/ada/errout.ads
@@ -896,7 +896,7 @@ package Errout is
--  location from which warnings are to be turned back on.
 
procedure Set_Specific_Warning_Off
- (Loc: Source_Ptr;
+ (Node   : Node_Id;
   Msg: String;
   Reason : String_Id;
   Config : Boolean;
diff --git a/gcc/ada/erroutc.adb b/gcc/ada/erroutc.adb
index 125cbf822ff..96d8d128d84 100644
--- a/gcc/ada/erroutc.adb
+++ b/gcc/ada/erroutc.adb
@@ -38,6 +38,7 @@ with Fname;use Fname;
 with Namet;use Namet;
 with Opt;  use Opt;
 with Output;   use Output;
+with Sinfo.Nodes;
 with Sinput;   use Sinput;
 with Snames;   use Snames;
 with Stringt;  use Stringt;
@@ -1650,15 +1651,16 @@ package body Erroutc is
--
 
procedure Set_Specific_Warning_Off
- (Loc: Source_Ptr;
+ (Node   : Node_Id;
   Msg: String;
   Reason : String_Id;
   Config : Boolean;
   Used   : Boolean := False)
i

[COMMITTED] ada: Follow up fixes for Put_Image/streaming regressions

2024-05-14 Thread Marc Poulhiès

From: Steve Baird 

A recent change to reduce duplication of compiler-generated Put_Image and
streaming subprograms introduced two regressions. One is yet another of the
many cases where generating these routines "on demand" (as opposed at the
point of the associated type declaration) requires loosening the compiler's
enforcement of privacy. The other is a use-before-definition issue that
occurs because the declaration of a Put_Image procedure is not hoisted far
enough.

gcc/ada/

* exp_attr.adb (Build_And_Insert_Type_Attr_Subp): If a subprogram
associated with a (library-level) type declared in another unit is
to be inserted somewhere in a list, then insert it at the head of
the list.
* sem_ch5.adb (Analyze_Assignment): Normally a limited-type
assignment is illegal. Relax this rule if Comes_From_Source is
False and the type is not immutably limited.

Tested on x86_64-pc-linux-gnu, committed on master.

---
 gcc/ada/exp_attr.adb | 3 ++-
 gcc/ada/sem_ch5.adb  | 3 +++
 2 files changed, 5 insertions(+), 1 deletion(-)

diff --git a/gcc/ada/exp_attr.adb b/gcc/ada/exp_attr.adb
index 6dba600620e..e12e8b4a439 100644
--- a/gcc/ada/exp_attr.adb
+++ b/gcc/ada/exp_attr.adb
@@ -1953,7 +1953,8 @@ package body Exp_Attr is
 
 while Present (Ancestor) loop
if Is_List_Member (Ancestor) then
-  Insertion_Point := Ancestor;
+  Insertion_Point := First (List_Containing (Ancestor));
+  pragma Assert (Present (Insertion_Point));
end if;
Ancestor := Parent (Ancestor);
 end loop;
diff --git a/gcc/ada/sem_ch5.adb b/gcc/ada/sem_ch5.adb
index 65370ebfe7e..1e09e57919e 100644
--- a/gcc/ada/sem_ch5.adb
+++ b/gcc/ada/sem_ch5.adb
@@ -597,10 +597,13 @@ package body Sem_Ch5 is
 
   --  Error of assigning to limited type. We do however allow this in
   --  certain cases where the front end generates the assignments.
+  --  Comes_From_Source test is needed to allow compiler-generated
+  --  streaming/put_image subprograms, which may ignore privacy.
 
   elsif Is_Limited_Type (T1)
 and then not Assignment_OK (Lhs)
 and then not Assignment_OK (Original_Node (Lhs))
+and then (Comes_From_Source (N) or Is_Immutably_Limited_Type (T1))
   then
  --  CPP constructors can only be called in declarations
 
-- 
2.43.2

[COMMITTED] ada: Document more details of the implementation of finalization chains

2024-05-14 Thread Marc Poulhiès

From: Eric Botcazou 

gcc/ada/

* exp_ch7.adb (Finalization Management): Add a short description of
the implementation of finalization chains.

Tested on x86_64-pc-linux-gnu, committed on master.

---
 gcc/ada/exp_ch7.adb | 29 +
 1 file changed, 29 insertions(+)

diff --git a/gcc/ada/exp_ch7.adb b/gcc/ada/exp_ch7.adb
index e978a778f1e..25a7c0b2b46 100644
--- a/gcc/ada/exp_ch7.adb
+++ b/gcc/ada/exp_ch7.adb
@@ -100,6 +100,35 @@ package body Exp_Ch7 is
--  have to be detached from the finalization chain, in case (2) they must
--  not and in case (1) this is optional as we are exiting the scope anyway.
 
+   --  There are two kinds of finalization chain to which objects are attached,
+   --  depending on the way they are created. For objects (statically) declared
+   --  in a scope, the finalization chain is that of the master of the scope,
+   --  which is embodied in a Finalization_Master object. As per RM 7.6.1(11/3)
+   --  the finalization of the master (on scope exit) performs the finalization
+   --  of objects attached to its chain in the reverse order of their creation.
+
+   --  For dynamically allocated objects, the finalization chain is that of the
+   --  finalization collection of the access type through which the objects are
+   --  allocated, which is embodied in a Finalization_Collection object. As per
+   --  RM 7.6.1(11.1/3), the finalization of the collection performs the
+   --  finalization of objects attached to its chain in an arbitrary order.
+
+   --  A Finalization_Collection object is implemented as a controlled object
+   --  and its finalization is therefore driven by the finalization master of
+   --  the scope where it is declared. As per RM 7.6.1(11.2/3), for a named
+   --  access type, the Finalization_Collection object is declared in the list
+   --  of actions of its freeze node.
+
+   --  ??? For an anonymous access type, the implementation deviates from the
+   --  RM 7.6.1 clause as follows: all the anonymous access types with the same
+   --  designated type that are (implicitly) declared in a library unit share a
+   --  single Finalization_Collection object declared in the outermost scope of
+   --  the library unit, except if the designated type is declared in a dynamic
+   --  scope nested in the unit; in this case no Finalization_Collection object
+   --  is created. As a result, in the first case, objects allocated through
+   --  the anonymous access types are finalized when the library unit goes out
+   --  of scope, while in the second case, they are not finalized at all.
+
--  Here is a simple example of the expansion of a controlled block:
 
--declare
-- 
2.43.2

Re: [x86 SSE] Improve handling of ternlog instructions in i386/sse.md

2024-05-14 Thread Hongtao Liu

On Mon, May 13, 2024 at 5:57 AM Roger Sayle  wrote:
>
>
> This patch improves the way that the x86 backend recognizes and
> expands AVX512's bitwise ternary logic (vpternlog) instructions.
I like the patch.

1 file changed, 25 insertions(+), 1 deletion(-)
gcc/config/i386/i386-expand.cc | 26 +-

modified   gcc/config/i386/i386-expand.cc
@@ -25601,6 +25601,7 @@ ix86_gen_bcst_mem (machine_mode mode, rtx x)
 int
 ix86_ternlog_idx (rtx op, rtx *args)
 {
+  /* Nice dynamic programming:)  */
   int idx0, idx1;

   if (!op)
@@ -25651,6 +25652,7 @@ ix86_ternlog_idx (rtx op, rtx *args)
return 0xaa;
  }
   /* Maximum of one volatile memory reference per expression.  */
+  /* According to comments, it should be && ?  */
   if (side_effects_p (op) || side_effects_p (args[2]))
  return -1;
   if (rtx_equal_p (op, args[2]))
@@ -25666,6 +25668,8 @@ ix86_ternlog_idx (rtx op, rtx *args)

 case SUBREG:
   if (!VECTOR_MODE_P (GET_MODE (SUBREG_REG (op)))
+   /* It could be TI/OI/XImode since it's just bit operations,
+  So no need for VECTOR_MODE_P?  */
|| GET_MODE_SIZE (GET_MODE (SUBREG_REG (op)))
   != GET_MODE_SIZE (GET_MODE (op)))
  return -1;
@@ -25701,7 +25705,7 @@ ix86_ternlog_idx (rtx op, rtx *args)
 case UNSPEC:
   if (XINT (op, 1) != UNSPEC_VTERNLOG
|| XVECLEN (op, 0) != 4
-   || CONST_INT_P (XVECEXP (op, 0, 3)))
+   || !CONST_INT_P (XVECEXP (op, 0, 3)))
  return -1;

   /* TODO: Handle permuted operands.  */
@@ -25778,10 +25782,13 @@ ix86_ternlog_operand_p (rtx op)
   /* Prefer pxor.  */
   if (ix86_ternlog_leaf_p (XEXP (op, 0), mode)
&& (ix86_ternlog_leaf_p (op1, mode)
+   /* Add some comments, it's because we already have
one_cmpl2.  */
|| vector_all_ones_operand (op1, mode)))
  return false;
   break;

+  /* Wouldn't pternlog match (SUBREG: (REG))???,and it should
also be excluded.
+Similar for SUBREG: (AND/IOR/XOR)?   */
 default:
   break;
 }
@@ -25865,25 +25872,35 @@ ix86_expand_ternlog (machine_mode mode, rtx
op0, rtx op1, rtx op2, int idx,

 case 0x0a: /* ~a&c */
   if ((!op1 || !side_effects_p (op1))
+   /* shouldn't op1 always be register_operand with no side effects
when it exists?
+  _vternlog_mask only supports register_operand for op1.
+  ix86_ternlog_idx only assigns REG to args[1].
+  Ditto for op0, also we should add op2 && register_operand (op2, mode)
+  to avoid segment fault?   */
&& register_operand (op0, mode)
&& register_operand (op2, mode))
  return ix86_expand_ternlog_andnot (mode, op0, op1, target);
+  /* op2 instead of op1??? */
   break;

 case 0x0c: /* ~a&b */
   if ((!op2 || !side_effects_p (op2))
&& register_operand (op0, mode)
&& register_operand (op1, mode))
+ /* If op0 and op1 exist, they must be register_operand? So just op0
&& op1?  */
  return ix86_expand_ternlog_andnot (mode, op0, op1, target);
   break;

 case 0x0f:  /* ~a */
   if ((!op1 || !side_effects_p (op1))
+   /* No need for !side_effects for op1?  */
+   /* Ditto.  */
&& (!op2 || !side_effects_p (op2)))
  {
if (GET_MODE (op0) != mode)
  op0 = gen_lowpart (mode, op0);
if (!TARGET_64BIT && !register_operand (op0, mode))
+ /* It must be register_operand for op0 when it exists, no? */
  op0 = force_reg (mode, op0);
emit_move_insn (target, gen_rtx_XOR (mode, op0, CONSTM1_RTX (mode)));
return target;
@@ -25894,6 +25911,7 @@ ix86_expand_ternlog (machine_mode mode, rtx
op0, rtx op1, rtx op2, int idx,
   if ((!op0 || !side_effects_p (op0))
&& register_operand (op1, mode)
&& register_operand (op2, mode))
+ /* op1 && op2 && register_operand (op2, mode)??  */
  return ix86_expand_ternlog_andnot (mode, op1, op2, target);
   break;

@@ -25901,12 +25919,14 @@ ix86_expand_ternlog (machine_mode mode, rtx
op0, rtx op1, rtx op2, int idx,
   if ((!op2 || !side_effects_p (op2))
&& register_operand (op0, mode)
&& register_operand (op1, mode))
+ /* op0 && op1? */
  return ix86_expand_ternlog_andnot (mode, op1, op0, target);
   break;

 case 0x33:  /* ~b */
   if ((!op0 || !side_effects_p (op0))
&& (!op2 || !side_effects_p (op2)))
+ /* op1 && (!op2 || !side_effects_p (op2)) ?  */
  {
if (GET_MODE (op1) != mode)
  op1 = gen_lowpart (mode, op1);
@@ -26051,6 +26071,10 @@ ix86_expand_ternlog (machine_mode mode, rtx
op0, rtx op1, rtx op2, int idx,
   tmp2 = ix86_gen_bcst_mem (mode, op2);
   if (!tmp2)
  tmp2 = validize_mem (force_const_mem (mode, op2));
+  /* Can we use ix86_expand_vector_move here, it will try move
integer to gpr,
+ and broadcast gpr to the vector register.
+ It should be faster than a constant pool, and PR115021 should be solved by
+ another way instead of this walkaround.  */
 }
   else
 tmp2 = op2;




-- 
BR,
Hongtao

[COMMITTED] ada: Fix classification of SPARK Boolean aspects

2024-05-14 Thread Marc Poulhiès

From: Piotr Trojanek 

The implementation of User_Aspect_Definition uses subtype
Boolean_Aspects to decide which existing aspects can be used to define
old aspects. This subtype didn't include many of the SPARK aspects,
notably the Always_Terminates.

gcc/ada/

* aspects.ads (Aspect_Id, Boolean_Aspect): Change categorization
of Boolean-valued SPARK aspects.
* sem_ch13.adb (Analyze_Aspect_Specification): Adapt CASE
statements to new classification of Boolean-valued SPARK
aspects.

Tested on x86_64-pc-linux-gnu, committed on master.

---
 gcc/ada/aspects.ads  |  39 -
 gcc/ada/sem_ch13.adb | 203 ++-
 2 files changed, 41 insertions(+), 201 deletions(-)

diff --git a/gcc/ada/aspects.ads b/gcc/ada/aspects.ads
index a348b322d29..eb5ab1a85dd 100644
--- a/gcc/ada/aspects.ads
+++ b/gcc/ada/aspects.ads
@@ -72,14 +72,10 @@ package Aspects is
   Aspect_Address,
   Aspect_Aggregate,
   Aspect_Alignment,
-  Aspect_Always_Terminates, -- GNAT
   Aspect_Annotate,  -- GNAT
-  Aspect_Async_Readers, -- GNAT
-  Aspect_Async_Writers, -- GNAT
   Aspect_Attach_Handler,
   Aspect_Bit_Order,
   Aspect_Component_Size,
-  Aspect_Constant_After_Elaboration,-- GNAT
   Aspect_Constant_Indexing,
   Aspect_Contract_Cases,-- GNAT
   Aspect_Convention,
@@ -95,13 +91,9 @@ package Aspects is
   Aspect_Dimension_System,  -- GNAT
   Aspect_Dispatching_Domain,
   Aspect_Dynamic_Predicate,
-  Aspect_Effective_Reads,   -- GNAT
-  Aspect_Effective_Writes,  -- GNAT
   Aspect_Exceptional_Cases, -- GNAT
-  Aspect_Extensions_Visible,-- GNAT
   Aspect_External_Name,
   Aspect_External_Tag,
-  Aspect_Ghost, -- GNAT
   Aspect_Ghost_Predicate,   -- GNAT
   Aspect_Global,-- GNAT
   Aspect_GNAT_Annotate, -- GNAT
@@ -121,7 +113,6 @@ package Aspects is
   Aspect_Max_Entry_Queue_Depth, -- GNAT
   Aspect_Max_Entry_Queue_Length,
   Aspect_Max_Queue_Length,  -- GNAT
-  Aspect_No_Caching,-- GNAT
   Aspect_No_Controlled_Parts,
   Aspect_No_Task_Parts, -- GNAT
   Aspect_Object_Size,   -- GNAT
@@ -146,7 +137,6 @@ package Aspects is
   Aspect_Relaxed_Initialization,-- GNAT
   Aspect_Scalar_Storage_Order,  -- GNAT
   Aspect_Secondary_Stack_Size,  -- GNAT
-  Aspect_Side_Effects,  -- GNAT
   Aspect_Simple_Storage_Pool,   -- GNAT
   Aspect_Size,
   Aspect_Small,
@@ -168,7 +158,6 @@ package Aspects is
   Aspect_User_Aspect,   -- GNAT
   Aspect_Value_Size,-- GNAT
   Aspect_Variable_Indexing,
-  Aspect_Volatile_Function, -- GNAT
   Aspect_Warnings,  -- GNAT
   Aspect_Write,
 
@@ -190,17 +179,25 @@ package Aspects is
   --  the aspect value is inherited from the parent, in which case, we do
   --  not allow False if we inherit a True value from the parent.
 
+  Aspect_Always_Terminates, -- GNAT
   Aspect_Asynchronous,
+  Aspect_Async_Readers, -- GNAT
+  Aspect_Async_Writers, -- GNAT
   Aspect_Atomic,
   Aspect_Atomic_Components,
+  Aspect_Constant_After_Elaboration,-- GNAT
   Aspect_Disable_Controlled,-- GNAT
   Aspect_Discard_Names,
   Aspect_CUDA_Device,   -- GNAT
   Aspect_CUDA_Global,   -- GNAT
+  Aspect_Effective_Reads,   -- GNAT
+  Aspect_Effective_Writes,  -- GNAT
   Aspect_Exclusive_Functions,
   Aspect_Export,
+  Aspect_Extensions_Visible,-- GNAT
   Aspect_Favor_Top_Level,   -- GNAT
   Aspect_Full_Access_Only,
+  Aspect_Ghost, -- GNAT
   Aspect_Independent,
   Aspect_Independent_Components,
   Aspect_Import,
@@ -208,6 +205,7 @@ package Aspects is
   Aspect_Inline_Always, -- GNAT
   Aspect_Interrupt_Handler,
   Aspect_Lock_Free, -- GNAT
+  Aspect_No_Caching,-- GNAT
   Aspect_No_Inline, -- GNAT
   Aspect_No_Return,
   Aspect_No_Tagged_Streams, -- GNAT
@@ -217,6 +215,7 @@ package Aspects is
   Aspect_Pure_Function, -- GNAT
   Aspect_Remote_Access_Type,-- GNAT
   Aspect_Shared,-- GNAT (equivalent to Atomic)
+  Aspect_Side_Effects,  -- GNAT
   Aspect_Simple_Storage_Pool_Type,  -- GNAT
   Aspect_Static,
   Aspect_Suppress_Debug_Info,

[PATCH wwwdocs] gcc-14/changes: Add Rust section to New languages

2024-05-14 Thread Arthur Cohen

---
 htdocs/gcc-14/changes.html | 13 +
 1 file changed, 13 insertions(+)

diff --git a/htdocs/gcc-14/changes.html b/htdocs/gcc-14/changes.html
index 924e045a..dd3fea8d 100644
--- a/htdocs/gcc-14/changes.html
+++ b/htdocs/gcc-14/changes.html
@@ -626,6 +626,19 @@ You may also want to check out our
   
 
 
+
+Rust
+
+  
+Experimental support for the Rust programming language has been
+added. The compiler is incomplete, but already supports a subset of the
+Rust programming language. The frontend does not support compiling the
+Rust standard library, so it cannot be used for most real-world Rust code 
yet.
+However, you can experiment with the compiler to run Rust code on 
#[no_core]
+targets.
+  
+
+
 
 libgccjit
 
-- 
2.42.0

Re: [PATCH 2/13] rs6000, Remove __builtin_vsx_xvcvspsxws built-in

2024-05-14 Thread Kewen.Lin

Hi,

on 2024/4/20 05:17, Carl Love wrote:
> rs6000, Remove __builtin_vsx_xvcvspsxws built-in
> 
> The built-in __builtin_vsx_xvcvspsxws is a duplicate of the vec_signed
> built-in that is documented in the PVIPR.  The __builtin_vsx_xvcvspsxws
> built-in is not documented and there are no test cases for it.
> 
> This patch removes the redundant built-in.

By revisiting the comments on the previous version:
https://gcc.gnu.org/pipermail/gcc-patches/2024-February/646723.html

I wonder if it's intentional to keep the others, at least bifs
__builtin_vsx_xvcvdpuxds_uns, __builtin_vsx_xvcvspuxws and
__builtin_vsx_xvcvuxddp_uns looks removable, users can just uses the
equivalent ones in PVIPR.  And for the others, users can still use
the PVIPR ones by considering endianness (controlling with endianness
macros).

BR,
Kewen

> 
> gcc/ChangeLog:
> * config/rs6000/rs6000-builtins.def (__builtin_vsx_xvcvspsxws):
>   Remove built-in definition.
> ---
>  gcc/config/rs6000/rs6000-builtins.def | 3 ---
>  1 file changed, 3 deletions(-)
> 
> diff --git a/gcc/config/rs6000/rs6000-builtins.def 
> b/gcc/config/rs6000/rs6000-builtins.def
> index 7c36976a089..c6d2ea1bc39 100644
> --- a/gcc/config/rs6000/rs6000-builtins.def
> +++ b/gcc/config/rs6000/rs6000-builtins.def
> @@ -1709,9 +1709,6 @@
>const vsll __builtin_vsx_xvcvspsxds (vf);
>  XVCVSPSXDS vsx_xvcvspsxds {}
>  
> -  const vsi __builtin_vsx_xvcvspsxws (vf);
> -XVCVSPSXWS vsx_fix_truncv4sfv4si2 {}
> -
>const vsll __builtin_vsx_xvcvspuxds (vf);
>  XVCVSPUXDS vsx_xvcvspuxds {}
>

Re: [PATCH] MATCH: Maybe expand (T)(A + C1) * C2 and (T)(A + C1) * C2 + C3 [PR109393]

2024-05-14 Thread Manolis Tsamis

New patch with the requested changes can be found below.

I don't know how much this affects SCEV, but I do believe that we
should incorporate this change somehow. I've seen various cases of
suboptimal address calculation codegen that boil down to this.

gcc/match.pd | 31 +++
gcc/testsuite/gcc.dg/pr109393.c | 16 
2 files changed, 47 insertions(+)
create mode 100644 gcc/testsuite/gcc.dg/pr109393.c

diff --git a/gcc/match.pd b/gcc/match.pd
index 07e743ae464..1d642c205f0 100644
--- a/gcc/match.pd
+++ b/gcc/match.pd
@@ -3650,6 +3650,37 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
(plus (convert @0) (op @2 (convert @1))
#endif
+/* ((T)(A + CST1)) * CST2 + CST3
+ -> ((T)(A) * CST2) + ((T)CST1 * CST2 + CST3)
+ Where (A + CST1) doesn't need to have a single use. */
+#if GIMPLE
+ (for op (plus minus)
+ (simplify
+ (plus (mult:s (convert:s (op @0 INTEGER_CST@1)) INTEGER_CST@2)
+ INTEGER_CST@3)
+ (if (INTEGRAL_TYPE_P (TREE_TYPE (@0))
+ && INTEGRAL_TYPE_P (type)
+ && TYPE_PRECISION (type) > TYPE_PRECISION (TREE_TYPE (@0))
+ && TYPE_OVERFLOW_UNDEFINED (TREE_TYPE (@0))
+ && !TYPE_OVERFLOW_SANITIZED (TREE_TYPE (@0))
+ && TYPE_OVERFLOW_WRAPS (type))
+ (op (mult (convert @0) @2) (plus (mult (convert @1) @2) @3)
+#endif
+
+/* ((T)(A + CST1)) * CST2 -> ((T)(A) * CST2) + ((T)CST1 * CST2) */
+#if GIMPLE
+ (for op (plus minus)
+ (simplify
+ (mult (convert:s (op:s @0 INTEGER_CST@1)) INTEGER_CST@2)
+ (if (INTEGRAL_TYPE_P (TREE_TYPE (@0))
+ && INTEGRAL_TYPE_P (type)
+ && TYPE_PRECISION (type) > TYPE_PRECISION (TREE_TYPE (@0))
+ && TYPE_OVERFLOW_UNDEFINED (TREE_TYPE (@0))
+ && !TYPE_OVERFLOW_SANITIZED (TREE_TYPE (@0))
+ && TYPE_OVERFLOW_WRAPS (type))
+ (op (mult (convert @0) @2) (mult (convert @1) @2)
+#endif
+
/* (T)(A) +- (T)(B) -> (T)(A +- B) only when (A +- B) could be simplified
to a simple value. */
(for op (plus minus)
diff --git a/gcc/testsuite/gcc.dg/pr109393.c b/gcc/testsuite/gcc.dg/pr109393.c
new file mode 100644
index 000..e9051273672
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/pr109393.c
@@ -0,0 +1,16 @@
+/* PR tree-optimization/109393 */
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-optimized" } */
+/* { dg-final { scan-tree-dump-times "return 1;" 2 "optimized" } } */
+
+int foo(int *a, int j)
+{
+ int k = j - 1;
+ return a[j - 1] == a[k];
+}
+
+int bar(int *a, int j)
+{
+ int k = j - 1;
+ return (&a[j + 1] - 2) == &a[k];
+}
-- 
2.44.0


On Tue, Apr 23, 2024 at 1:33 PM Manolis Tsamis  wrote:
>
> The original motivation for this pattern was that the following function does
> not fold to 'return 1':
>
> int foo(int *a, int j)
> {
>   int k = j - 1;
>   return a[j - 1] == a[k];
> }
>
> The expression ((unsigned long) (X +- C1) * C2) appears frequently as part of
> address calculations (e.g. arrays). These patterns help fold and simplify more
> expressions.
>
> PR tree-optimization/109393
>
> gcc/ChangeLog:
>
> * match.pd: Add new patterns for ((T)(A +- CST1)) * CST2 and
>   ((T)(A +- CST1)) * CST2 + CST3.
>
> gcc/testsuite/ChangeLog:
>
> * gcc.dg/pr109393.c: New test.
>
> Signed-off-by: Manolis Tsamis 
> ---
>
>  gcc/match.pd| 30 ++
>  gcc/testsuite/gcc.dg/pr109393.c | 16 
>  2 files changed, 46 insertions(+)
>  create mode 100644 gcc/testsuite/gcc.dg/pr109393.c
>
> diff --git a/gcc/match.pd b/gcc/match.pd
> index d401e7503e6..13c828ba70d 100644
> --- a/gcc/match.pd
> +++ b/gcc/match.pd
> @@ -3650,6 +3650,36 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
> (plus (convert @0) (op @2 (convert @1))
>  #endif
>
> +/* ((T)(A + CST1)) * CST2 + CST3
> + -> ((T)(A) * CST2) + ((T)CST1 * CST2 + CST3)
> +   Where (A + CST1) doesn't need to have a single use.  */
> +#if GIMPLE
> +  (for op (plus minus)
> +   (simplify
> +(plus (mult (convert:s (op @0 INTEGER_CST@1)) INTEGER_CST@2) 
> INTEGER_CST@3)
> + (if (TREE_CODE (TREE_TYPE (@0)) == INTEGER_TYPE
> + && TREE_CODE (type) == INTEGER_TYPE
> + && TYPE_PRECISION (type) > TYPE_PRECISION (TREE_TYPE (@0))
> + && TYPE_OVERFLOW_UNDEFINED (TREE_TYPE (@0))
> + && !TYPE_OVERFLOW_SANITIZED (TREE_TYPE (@0))
> + && TYPE_OVERFLOW_WRAPS (type))
> +   (op (mult @2 (convert @0)) (plus (mult @2 (convert @1)) @3)
> +#endif
> +
> +/* ((T)(A + CST1)) * CST2 -> ((T)(A) * CST2) + ((T)CST1 * CST2)  */
> +#if GIMPLE
> +  (for op (plus minus)
> +   (simplify
> +(mult (convert:s (op:s @0 INTEGER_CST@1)) INTEGER_CST@2)
> + (if (TREE_CODE (TREE_TYPE (@0)) == INTEGER_TYPE
> + && TREE_CODE (type) == INTEGER_TYPE
> + && TYPE_PRECISION (type) > TYPE_PRECISION (TREE_TYPE (@0))
> + && TYPE_OVERFLOW_UNDEFINED (TREE_TYPE (@0))
> + && !TYPE_OVERFLOW_SANITIZED (TREE_TYPE (@0))
> + && TYPE_OVERFLOW_WRAPS (type))
> +   (op (mult @2 (convert @0)) (mult @2 (convert @1))
> +#endif
> +
>  /* (T)(A) +- (T)(B) -> (T)(A +- B) only when (A +- B) coul

[PATCH] x86: Add 3-instruction subroutine vector shift for V16QI in ix86_expand_vec_perm_const_1 [PR107563]

2024-05-14 Thread Levy Hsu

Hi All

We've introduced a new subroutine in ix86_expand_vec_perm_const_1
to optimize vector shifting for the V16QI type on x86.
This patch uses a three-instruction sequence psrlw, psllw, and por
to handle specific vector shuffle operations more efficiently.
The change aims to improve assembly code generation for configurations
supporting SSE2.

Bootstrapped and tested on x86_64-linux-gnu, OK for trunk?

Best
Levy

gcc/ChangeLog:

PR target/107563
* config/i386/i386-expand.cc (expand_vec_perm_psrlw_psllw_por): New
subroutine.
(ix86_expand_vec_perm_const_1): Call expand_vec_perm_psrlw_psllw_por.

gcc/testsuite/ChangeLog:

PR target/107563
* g++.target/i386/pr107563-a.C: New test.
* g++.target/i386/pr107563-b.C: New test.
---
 gcc/config/i386/i386-expand.cc | 64 ++
 gcc/testsuite/g++.target/i386/pr107563-a.C | 13 +
 gcc/testsuite/g++.target/i386/pr107563-b.C | 12 
 3 files changed, 89 insertions(+)
 create mode 100755 gcc/testsuite/g++.target/i386/pr107563-a.C
 create mode 100755 gcc/testsuite/g++.target/i386/pr107563-b.C

diff --git a/gcc/config/i386/i386-expand.cc b/gcc/config/i386/i386-expand.cc
index 2f27bfb484c..5098d2886bb 100644
--- a/gcc/config/i386/i386-expand.cc
+++ b/gcc/config/i386/i386-expand.cc
@@ -22362,6 +22362,67 @@ expand_vec_perm_2perm_pblendv (struct 
expand_vec_perm_d *d, bool two_insn)
   return true;
 }
 
+/* A subroutine of ix86_expand_vec_perm_const_1.
+   Implement a permutation with psrlw, psllw and por.
+   It handles case:
+   __builtin_shufflevector (v,v,1,0,3,2,5,4,7,6,9,8,11,10,13,12,15,14);
+   __builtin_shufflevector (v,v,1,0,3,2,5,4,7,6); */
+
+static bool
+expand_vec_perm_psrlw_psllw_por (struct expand_vec_perm_d *d)
+{
+  unsigned i;
+  rtx (*gen_shr) (rtx, rtx, rtx);
+  rtx (*gen_shl) (rtx, rtx, rtx);
+  rtx (*gen_or) (rtx, rtx, rtx);
+  machine_mode mode = VOIDmode;
+
+  if (!TARGET_SSE2 || !d->one_operand_p)
+return false;
+
+  switch (d->vmode)
+{
+case E_V8QImode:
+  if (!TARGET_MMX_WITH_SSE)
+   return false;
+  mode = V4HImode;
+  gen_shr = gen_ashrv4hi3;
+  gen_shl = gen_ashlv4hi3;
+  gen_or = gen_iorv4hi3;
+  break;
+case E_V16QImode:
+  mode = V8HImode;
+  gen_shr = gen_vlshrv8hi3;
+  gen_shl = gen_vashlv8hi3;
+  gen_or = gen_iorv8hi3;
+  break;
+default: return false;
+}
+
+  if (!rtx_equal_p (d->op0, d->op1))
+return false;
+
+  for (i = 0; i < d->nelt; i += 2)
+if (d->perm[i] != i + 1 || d->perm[i + 1] != i)
+  return false;
+
+  if (d->testing_p)
+return true;
+
+  rtx tmp1 = gen_reg_rtx (mode);
+  rtx tmp2 = gen_reg_rtx (mode);
+  rtx op0 = force_reg (d->vmode, d->op0);
+
+  emit_move_insn (tmp1, lowpart_subreg (mode, op0, d->vmode));
+  emit_move_insn (tmp2, lowpart_subreg (mode, op0, d->vmode));
+  emit_insn (gen_shr (tmp1, tmp1, GEN_INT (8)));
+  emit_insn (gen_shl (tmp2, tmp2, GEN_INT (8)));
+  emit_insn (gen_or (tmp1, tmp1, tmp2));
+  emit_move_insn (d->target, lowpart_subreg (d->vmode, tmp1, mode));
+
+  return true;
+}
+
 /* A subroutine of ix86_expand_vec_perm_const_1.  Implement a V4DF
permutation using two vperm2f128, followed by a vshufpd insn blending
the two vectors together.  */
@@ -23782,6 +23843,9 @@ ix86_expand_vec_perm_const_1 (struct expand_vec_perm_d 
*d)
   if (expand_vec_perm_2perm_pblendv (d, false))
 return true;
 
+  if (expand_vec_perm_psrlw_psllw_por (d))
+return true;
+
   /* Try sequences of four instructions.  */
 
   if (expand_vec_perm_even_odd_trunc (d))
diff --git a/gcc/testsuite/g++.target/i386/pr107563-a.C 
b/gcc/testsuite/g++.target/i386/pr107563-a.C
new file mode 100755
index 000..605c1bdf814
--- /dev/null
+++ b/gcc/testsuite/g++.target/i386/pr107563-a.C
@@ -0,0 +1,13 @@
+/* PR target/107563.C */
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-std=c++2b -O3 -msse2" } */
+/* { dg-final { scan-assembler-times "psllw" 1 } } */
+/* { dg-final { scan-assembler-times "psraw" 1 } } */
+/* { dg-final { scan-assembler-times "por" 1 } } */
+
+using temp_vec_type2 [[__gnu__::__vector_size__(8)]] = char;
+
+void foo2(temp_vec_type2& v) noexcept
+{
+  v = __builtin_shufflevector(v, v, 1, 0, 3, 2, 5, 4, 7, 6);
+}
diff --git a/gcc/testsuite/g++.target/i386/pr107563-b.C 
b/gcc/testsuite/g++.target/i386/pr107563-b.C
new file mode 100755
index 000..0ce3e8263bb
--- /dev/null
+++ b/gcc/testsuite/g++.target/i386/pr107563-b.C
@@ -0,0 +1,12 @@
+/* PR target/107563.C */
+/* { dg-options "-std=c++2b -O3 -msse2" } */
+/* { dg-final { scan-assembler-times "psllw" 1 } } */
+/* { dg-final { scan-assembler-times "psrlw" 1 } } */
+/* { dg-final { scan-assembler-times "por" 1 } } */
+
+using temp_vec_type [[__gnu__::__vector_size__(16)]] = char;
+
+void foo(temp_vec_type& v) noexcept
+{
+  v = __builtin_shufflevector(v, v, 1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 
12, 15, 14);
+}
-- 
2.31.1

Re: [PATCH] report message for operator %a on unaddressible exp

2024-05-14 Thread Segher Boessenkool

On Tue, May 14, 2024 at 11:00:38AM +0800, Jiufu Guo wrote:
> >> diff --git a/gcc/config/rs6000/rs6000.cc b/gcc/config/rs6000/rs6000.cc
> >> index 117999613d8..50943d76f79 100644
> >> --- a/gcc/config/rs6000/rs6000.cc
> >> +++ b/gcc/config/rs6000/rs6000.cc
> >> @@ -14659,6 +14659,12 @@ print_operand_address (FILE *file, rtx x)
> >>else if (SYMBOL_REF_P (x) || GET_CODE (x) == CONST
> >>   || GET_CODE (x) == LABEL_REF)
> >>  {
> >> +  if (this_is_asm_operands && !address_operand (x, VOIDmode))
> >
> > Do we really need this_is_asm_operands here?
> I understand your point: 
> since in function 'print_operand_address' which supports not only user
> asm code.  So, it maybe incorrect if 'x' is not an 'address_operand',
> no matter this_is_asm_operands.
> 
> Here, 'this_is_asm_operands' is needed because it would be treated as an
> user fault in asm-code (otherwise, internal_error in the compiler).

You almost never want to test for asm, and just give the same error you
would give in non-asm.  It is the same problem after all, and giving the
user the same error message is the most helpful thing to do!

It can be useful to not say "ICE", but it already is prevented from
doing that here.


Segher

Re: [COMMITED] MAINTAINERS: Fix an entry using spaces instead of tabs

2024-05-14 Thread Xiao Zeng

2024-05-14 16:37  Filip Kastl  wrote:
>
>In the MAINTAINERS file, names and emails are separated by tabs.  One of
>the entries recently added used spaces.  This patch corrects this. 
Thanks.

>
>The check-MAINTAINERS.py script breaks a bit when this happens.  This
>patch also adds warning about this situation into the script.
>
>ChangeLog:
>
>   * MAINTAINERS: Use tabs between name and email.
>
>contrib/ChangeLog:
>
>   * check-MAINTAINERS.py: Add warning about not using tabs.
>
>Signed-off-by: Filip Kastl 
>---
> MAINTAINERS  | 2 +-
> contrib/check-MAINTAINERS.py | 8 
> 2 files changed, 9 insertions(+), 1 deletion(-)
>
>diff --git a/MAINTAINERS b/MAINTAINERS
>index 361059fd55c..8bb435dd54e 100644
>--- a/MAINTAINERS
>+++ b/MAINTAINERS
>@@ -738,7 +738,7 @@ Kwok Cheung Yeung  
> Greta Yorsh   
> David Yuste   
> Adhemerval Zanella
>-Xiao Zeng   
>+Xiao Zeng 
> Dennis Zhang  
> Yufeng Zhang  
> Qing Zhao 
>diff --git a/contrib/check-MAINTAINERS.py b/contrib/check-MAINTAINERS.py
>index 9f31a10bcff..2bac67f0821 100755
>--- a/contrib/check-MAINTAINERS.py
>+++ b/contrib/check-MAINTAINERS.py
>@@ -71,6 +71,14 @@ def check_group(name, lines):
> print(f'Line should not start with space: "{line}"')
> exit_code = 2
>
>+    # Special-case some names
>+    if line == 'James Norris':
>+    continue
>+
>+    if '\t' not in line:
>+    print(f'Name and email should be separated by tabs: "{line}"')
>+    exit_code = 2
>+
> lines = [line + '\n' for line in lines]
> sorted_lines = sorted(lines, key=sort_by_surname)
> if lines != sorted_lines:
>--
>2.45.0
Thanks
Xiao Zeng

Re: [PATCH] report message for operator %a on unaddressible exp

2024-05-14 Thread Segher Boessenkool

Oh, btw:

On Tue, May 14, 2024 at 11:00:38AM +0800, Jiufu Guo wrote:
> >> --- a/gcc/config/rs6000/rs6000.cc
> >> +++ b/gcc/config/rs6000/rs6000.cc
> >> @@ -14659,6 +14659,12 @@ print_operand_address (FILE *file, rtx x)
> >>else if (SYMBOL_REF_P (x) || GET_CODE (x) == CONST
> >>   || GET_CODE (x) == LABEL_REF)
> >>  {
> >> +  if (this_is_asm_operands && !address_operand (x, VOIDmode))
> >> +  {
> >> +output_operand_lossage ("invalid expression as operand");
> >> +return;
> >> +  }

That error message is not so good.  Firstly, it typically *is* a valid
expression here, just not a correct expression to have for an address.
But, more generally and usefully, the error message should say *what* is
wrong about the expression (namely, it is not an address).

Most of the time you can use the same error message for asm and other
expressions, and you get a great message in all contexts.
operand_lossage already takes care of telling the user "you did
something foolish" for inline asm, or "ICE" if it is a compiler problem
instead.

In error messages you do not often know what caused the problem, so
just report on the facts you *do* know (and moreso with warnings, there
you typically only know something looks unusual).

Segher

[PATCH] tree-optimization/99954 - redo loop distribution memcpy recognition fix

2024-05-14 Thread Richard Biener


The following revisits the fix for PR99954 which was observed as
causing missed memcpy recognition and instead using memmove for
non-aliasing copies.  While the original fix mitigated bogus
recognition of memcpy the root cause was not properly identified.
The root cause is dr_analyze_indices "failing" to handle union
references and leaving the DRs indices in a state that's not correctly
handled by dr_may_alias.  The following mitigates this there
appropriately, restoring memcpy recognition for non-aliasing copies.

Boostrap and regtest running on x86_64-unknown-linux-gnu.

PR tree-optimization/99954
* tree-data-ref.cc (dr_may_alias_p): For bases that are
not completely analyzed fall back to TBAA and points-to.
* tree-loop-distribution.cc
(loop_distribution::classify_builtin_ldst): When there
is no dependence again classify as memcpy.

* gcc.dg/tree-ssa/ldist-40.c: New testcase.
---
 gcc/testsuite/gcc.dg/tree-ssa/ldist-40.c | 10 ++
 gcc/tree-data-ref.cc | 21 +
 gcc/tree-loop-distribution.cc|  4 ++--
 3 files changed, 33 insertions(+), 2 deletions(-)
 create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/ldist-40.c

diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ldist-40.c 
b/gcc/testsuite/gcc.dg/tree-ssa/ldist-40.c
new file mode 100644
index 000..238a0098352
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/ldist-40.c
@@ -0,0 +1,10 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-ldist-details" } */
+
+void copy_a_to_b (char * __restrict b, char * a, int n)
+{
+  for (int i = 0; i < n; ++i)
+b[i] = a[i];
+}
+
+/* { dg-final { scan-tree-dump "generated memcpy" "ldist" } } */
diff --git a/gcc/tree-data-ref.cc b/gcc/tree-data-ref.cc
index f37734b5340..9d3f5d7507f 100644
--- a/gcc/tree-data-ref.cc
+++ b/gcc/tree-data-ref.cc
@@ -3066,6 +3066,27 @@ dr_may_alias_p (const struct data_reference *a, const 
struct data_reference *b,
return ptr_derefs_may_alias_p (build_fold_addr_expr (addr_a),
   TREE_OPERAND (addr_b, 0));
 }
+  /* If dr_analyze_innermost failed to handle a component we are
+ possibly left with a non-base in which case we didn't analyze
+ a possible evolution of the base.  */
+  else if (handled_component_p (addr_a) || handled_component_p (addr_b))
+{
+  /* For true dependences we can apply TBAA.  */
+  if (flag_strict_aliasing
+ && DR_IS_WRITE (a) && DR_IS_READ (b)
+ && !alias_sets_conflict_p (get_alias_set (DR_REF (a)),
+get_alias_set (DR_REF (b
+   return false;
+  if (TREE_CODE (addr_a) == MEM_REF)
+   return ptr_derefs_may_alias_p (TREE_OPERAND (addr_a, 0),
+  build_fold_addr_expr (addr_b));
+  else if (TREE_CODE (addr_b) == MEM_REF)
+   return ptr_derefs_may_alias_p (build_fold_addr_expr (addr_a),
+  TREE_OPERAND (addr_b, 0));
+  else
+   return ptr_derefs_may_alias_p (build_fold_addr_expr (addr_a),
+  build_fold_addr_expr (addr_b));
+}

   /* Otherwise DR_BASE_OBJECT is an access that covers the whole object
  that is being subsetted in the loop nest.  */
diff --git a/gcc/tree-loop-distribution.cc b/gcc/tree-loop-distribution.cc
index 45932bae5e7..668dc420449 100644
--- a/gcc/tree-loop-distribution.cc
+++ b/gcc/tree-loop-distribution.cc
@@ -1840,11 +1840,11 @@ loop_distribution::classify_builtin_ldst (loop_p loop, 
struct graph *rdg,
   /* Now check that if there is a dependence.  */
   ddr_p ddr = get_data_dependence (rdg, src_dr, dst_dr);

-  /* Classify as memmove if no dependence between load and store.  */
+  /* Classify as memcpy if no dependence between load and store.  */
   if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
 {
   partition->builtin = alloc_builtin (dst_dr, src_dr, base, src_base, 
size);
-  partition->kind = PKIND_MEMMOVE;
+  partition->kind = PKIND_MEMCPY;
   return;
 }

--
2.25.1

[committed] libstdc++: Fix typo in std::stacktrace::max_size [PR115063]

2024-05-14 Thread Jonathan Wakely

Tested x86_64-linux. Pushed to trunk, gcc14 and gcc-13.

-- >8 --

libstdc++-v3/ChangeLog:

PR libstdc++/115063
* include/std/stacktrace (basic_stacktrace::max_size): Fix typo
in reference to _M_alloc member.
* testsuite/19_diagnostics/stacktrace/stacktrace.cc: Check
max_size() compiles.
---
 libstdc++-v3/include/std/stacktrace|  2 +-
 .../testsuite/19_diagnostics/stacktrace/stacktrace.cc  | 10 ++
 2 files changed, 11 insertions(+), 1 deletion(-)

diff --git a/libstdc++-v3/include/std/stacktrace 
b/libstdc++-v3/include/std/stacktrace
index 92a69a53d98..d217d63af3b 100644
--- a/libstdc++-v3/include/std/stacktrace
+++ b/libstdc++-v3/include/std/stacktrace
@@ -430,7 +430,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
   [[nodiscard]]
   size_type
   max_size() const noexcept
-  { return _Impl::_S_max_size(_M_impl._M_alloc); }
+  { return _Impl::_S_max_size(_M_alloc); }
 
   [[nodiscard]]
   const_reference
diff --git a/libstdc++-v3/testsuite/19_diagnostics/stacktrace/stacktrace.cc 
b/libstdc++-v3/testsuite/19_diagnostics/stacktrace/stacktrace.cc
index 070c4157471..a49cddfef26 100644
--- a/libstdc++-v3/testsuite/19_diagnostics/stacktrace/stacktrace.cc
+++ b/libstdc++-v3/testsuite/19_diagnostics/stacktrace/stacktrace.cc
@@ -206,10 +206,20 @@ test_pr105031()
   s = auto(s);
 }
 
+void
+test_pr115063()
+{
+  // PR libstdc++/115063
+  // compilation error: std::basic_stracktrace::max_size()
+  std::stacktrace s;
+  VERIFY( s.max_size() != 0 );
+}
+
 int main()
 {
   test_cons();
   test_assign();
   test_swap();
   test_pr105031();
+  test_pr115063();
 }
-- 
2.44.0

Re: [PATCH] report message for operator %a on unaddressible exp

2024-05-14 Thread Jiufu Guo

Hi,

Segher Boessenkool  writes:

> On Tue, May 14, 2024 at 11:00:38AM +0800, Jiufu Guo wrote:
>> >> diff --git a/gcc/config/rs6000/rs6000.cc b/gcc/config/rs6000/rs6000.cc
>> >> index 117999613d8..50943d76f79 100644
>> >> --- a/gcc/config/rs6000/rs6000.cc
>> >> +++ b/gcc/config/rs6000/rs6000.cc
>> >> @@ -14659,6 +14659,12 @@ print_operand_address (FILE *file, rtx x)
>> >>else if (SYMBOL_REF_P (x) || GET_CODE (x) == CONST
>> >>  || GET_CODE (x) == LABEL_REF)
>> >>  {
>> >> +  if (this_is_asm_operands && !address_operand (x, VOIDmode))
>> >
>> > Do we really need this_is_asm_operands here?
>> I understand your point: 
>> since in function 'print_operand_address' which supports not only user
>> asm code.  So, it maybe incorrect if 'x' is not an 'address_operand',
>> no matter this_is_asm_operands.
>> 
>> Here, 'this_is_asm_operands' is needed because it would be treated as an
>> user fault in asm-code (otherwise, internal_error in the compiler).
>
> You almost never want to test for asm, and just give the same error you
> would give in non-asm.  It is the same problem after all, and giving the
> user the same error message is the most helpful thing to do!
Yes, just as Kewen's comments. The testing on 'this_is_asm_operands' and
'address_operand' is not in good place.
The message emitting and it's checking chould be more straightforward,
something like:
/* emit error for user asm code, or fault in compiler. */
else if (TARGET_TOC)
  output_operand_lossage ("xxx");

I would update the patch for this.

BR,
Jeff(Jiufu) Guo

>
> It can be useful to not say "ICE", but it already is prevented from
> doing that here.
>
>
> Segher

On Tue, May 14, 2024 at 05:53:56PM +0800, Jiufu Guo wrote:
> Thanks so much for your great review!
> Reference other messages, I'm wondering "invalid %%a value" may be
> acceptable, or "invalid %%a address expression in TOC" maybe better.

"%%a requires a memory operand"?  Maybe even print out the actual
operand given, too.


Segher

[Patch, aarch64] v4: Preparatory patch to place target independent and,dependent changed code in one file

2024-05-14 Thread Ajit Agarwal

Hello Alex/Richard:

All comments are addressed.

There were some issues in sending the patch sending it again.

Common infrastructure of load store pair fusion is divided into target
independent and target dependent changed code.

Target independent code is the Generic code with pure virtual function
to interface betwwen target independent and dependent code.

Target dependent code is the implementation of pure virtual function for
aarch64 target and the call to target independent code.

Bootstrapped on aarch64-linux-gnu.

Thanks & Regards
Ajit



aarch64: Preparatory patch to place target independent and
dependent changed code in one file

Common infrastructure of load store pair fusion is divided into target
independent and target dependent changed code.

Target independent code is the Generic code with pure virtual function
to interface betwwen target independent and dependent code.

Target dependent code is the implementation of pure virtual function for
aarch64 target and the call to target independent code.

2024-05-14  Ajit Kumar Agarwal  

gcc/ChangeLog:

* config/aarch64/aarch64-ldp-fusion.cc: Place target
independent and dependent changed code.
---
 gcc/config/aarch64/aarch64-ldp-fusion.cc | 526 +++
 1 file changed, 345 insertions(+), 181 deletions(-)

diff --git a/gcc/config/aarch64/aarch64-ldp-fusion.cc 
b/gcc/config/aarch64/aarch64-ldp-fusion.cc
index 1d9caeab05d..e6af4b0570a 100644
--- a/gcc/config/aarch64/aarch64-ldp-fusion.cc
+++ b/gcc/config/aarch64/aarch64-ldp-fusion.cc
@@ -138,6 +138,210 @@ struct alt_base
   poly_int64 offset;
 };
 
+// Virtual base class for load/store walkers used in alias analysis.
+struct alias_walker
+{
+  virtual bool conflict_p (int &budget) const = 0;
+  virtual insn_info *insn () const = 0;
+  virtual bool valid () const = 0;
+  virtual void advance () = 0;
+};
+
+// This is used in handle_writeback_opportunities describing
+// ALL if aarch64_ldp_writeback > 1 otherwise check
+// EXISTING if aarch64_ldp_writeback.
+enum class writeback {
+  ALL,
+  EXISTING
+};
+
+struct pair_fusion {
+  pair_fusion ()
+  {
+calculate_dominance_info (CDI_DOMINATORS);
+df_analyze ();
+crtl->ssa = new rtl_ssa::function_info (cfun);
+  };
+
+  // Given:
+  // - an rtx REG_OP, the non-memory operand in a load/store insn,
+  // - a machine_mode MEM_MODE, the mode of the MEM in that insn, and
+  // - a boolean LOAD_P (true iff the insn is a load), then:
+  // return true if the access should be considered an FP/SIMD access.
+  // Such accesses are segregated from GPR accesses, since we only want
+  // to form pairs for accesses that use the same register file.
+  virtual bool fpsimd_op_p (rtx, machine_mode, bool)
+  {
+return false;
+  }
+
+  // Return true if we should consider forming ldp/stp insns from memory
+  // accesses with operand mode MODE at this stage in compilation.
+  virtual bool pair_operand_mode_ok_p (machine_mode mode) = 0;
+
+  // Return true iff REG_OP is a suitable register operand for a paired
+  // memory access, where LOAD_P is true if we're asking about loads and
+  // false for stores.  MODE gives the mode of the operand.
+  virtual bool pair_reg_operand_ok_p (bool load_p, rtx reg_op,
+ machine_mode mode) = 0;
+
+  // Return alias check limit.
+  // This is needed to avoid unbounded quadratic behaviour when
+  // performing alias analysis.
+  virtual int pair_mem_alias_check_limit () = 0;
+
+  // Returns true if we should try to handle writeback opportunities
+  // WHICH parameter decides ALL or EXISTING writeback pairs.
+  virtual bool handle_writeback_opportunities (enum writeback which) = 0 ;
+
+  // Given BASE_MEM, the mem from the lower candidate access for a pair,
+  // and LOAD_P (true if the access is a load), check if we should proceed
+  // to form the pair given the target's code generation policy on
+  // paired accesses.
+  virtual bool pair_mem_ok_with_policy (rtx base_mem, bool load_p) = 0;
+
+  // Generate the pattern for a paired access. PATS gives the patterns
+  // for the individual memory accesses (which by this point must share a
+  // common base register).  If WRITEBACK is non-NULL, then this rtx
+  // describes the update to the base register that should be performed by
+  // the resulting insn.  LOAD_P is true iff the accesses are loads.
+  virtual rtx gen_pair (rtx *pats, rtx writeback, bool load_p) = 0;
+
+  // Return true if memory is paired access, given INSN and LOAD_P
+  // is true for load insn and false for store insn.
+  virtual bool pair_mem_insn_p (rtx_insn *, bool &) = 0;
+
+  // Return true if we should track loads.
+  virtual bool track_loads_p ()
+  {
+return true;
+  }
+
+  // Return true if we should track stores.
+  virtual bool track_stores_p ()
+  {
+return true;
+  }
+
+  // Return true if OFF  is in range.
+  virtual bool pair_mem_in_range_p (HOST_WIDE_INT off) = 0;
+
+  // Given a load/store pair insn in PATTERN, u

Avoid TYPE_MAIN_VARIANT compares in TBAA

2024-05-14 Thread Jan Hubicka

Hi,
while building more testcases for ipa-icf I noticed that there are two places
in aliasing code where we still compare TYPE_MAIN_VARIANT for pointer equality.
This is not good idea for LTO since type merging may not happen for example
when in one unit pointed to type is forward declared while in other it is fully
defined.  We have same_type_for_tbaa for that.

Bootstrapped/regtested x86_64-linux, OK?

gcc/ChangeLog:

* alias.cc (reference_alias_ptr_type_1): Use view_converted_memref_p.
* alias.h (view_converted_memref_p): Declare.
* tree-ssa-alias.cc (view_converted_memref_p): Export.
(ao_compare::compare_ao_refs): Use same_type_for_tbaa.

diff --git a/gcc/alias.cc b/gcc/alias.cc
index 808e2095d9b..853e84d7439 100644
--- a/gcc/alias.cc
+++ b/gcc/alias.cc
@@ -770,10 +770,7 @@ reference_alias_ptr_type_1 (tree *t)
   /* If the innermost reference is a MEM_REF that has a
  conversion embedded treat it like a VIEW_CONVERT_EXPR above,
  using the memory access type for determining the alias-set.  */
-  if (TREE_CODE (inner) == MEM_REF
-  && (TYPE_MAIN_VARIANT (TREE_TYPE (inner))
- != TYPE_MAIN_VARIANT
-  (TREE_TYPE (TREE_TYPE (TREE_OPERAND (inner, 1))
+  if (view_converted_memref_p (inner))
 {
   tree alias_ptrtype = TREE_TYPE (TREE_OPERAND (inner, 1));
   /* Unless we have the (aggregate) effective type of the access
diff --git a/gcc/alias.h b/gcc/alias.h
index f8d93e8b5f4..36095f0bf73 100644
--- a/gcc/alias.h
+++ b/gcc/alias.h
@@ -41,6 +41,7 @@ bool alias_ptr_types_compatible_p (tree, tree);
 int compare_base_decls (tree, tree);
 bool refs_same_for_tbaa_p (tree, tree);
 bool mems_same_for_tbaa_p (rtx, rtx);
+bool view_converted_memref_p (tree);
 
 /* This alias set can be used to force a memory to conflict with all
other memories, creating a barrier across which no memory reference
diff --git a/gcc/tree-ssa-alias.cc b/gcc/tree-ssa-alias.cc
index e7c1c1aa624..632cf78028b 100644
--- a/gcc/tree-ssa-alias.cc
+++ b/gcc/tree-ssa-alias.cc
@@ -2044,7 +2044,7 @@ decl_refs_may_alias_p (tree ref1, tree base1,
which is done by ao_ref_base and thus one extra walk
of handled components is needed.  */
 
-static bool
+bool
 view_converted_memref_p (tree base)
 {
   if (TREE_CODE (base) != MEM_REF && TREE_CODE (base) != TARGET_MEM_REF)
@@ -4325,8 +4325,8 @@ ao_compare::compare_ao_refs (ao_ref *ref1, ao_ref *ref2,
   else if ((end_struct_ref1 != NULL) != (end_struct_ref2 != NULL))
 return flags | ACCESS_PATH;
   if (end_struct_ref1
-  && TYPE_MAIN_VARIANT (TREE_TYPE (end_struct_ref1))
-!= TYPE_MAIN_VARIANT (TREE_TYPE (end_struct_ref2)))
+  && same_type_for_tbaa (TREE_TYPE (end_struct_ref1),
+TREE_TYPE (end_struct_ref2)) != 1)
 return flags | ACCESS_PATH;
 
   /* Now compare all handled components of the access path.

[PING] [PATCH v2] testsuite: Verify r0-r3 are extended with CMSE

2024-05-14 Thread Torbjorn SVENSSON


Hi,

I'm not sure if the previous "ok" from Richard on the v1 is enough for 
this or if there needs another approval.


Adding extra maintainers since Richard Earnshaw appears to be busy the 
past weeks.


Kind regards,
Torbjörn

On 2024-05-06 13:50, Torbjorn SVENSSON wrote:

Hi,

Forgot to mention when I sent the patch that I would like to commit it 
to the following branches:


- releases/gcc-11
- releases/gcc-12
- releases/gcc-13
- releases/gcc-14
- trunk

Kind regards,
Torbjörn

On 2024-05-02 12:50, Torbjörn SVENSSON wrote:

Add regression test to the existing zero/sign extend tests for CMSE to
verify that r0, r1, r2 and r3 are properly extended, not just r0.

boolCharShortEnumSecureFunc test is done using -O0 to ensure the
instructions are in a predictable order.

gcc/testsuite/ChangeLog:

* gcc.target/arm/cmse/extend-param.c: Add regression test. Add
  -fshort-enums.
* gcc.target/arm/cmse/extend-return.c: Add -fshort-enums option.

Signed-off-by: Torbjörn SVENSSON 
---
  .../gcc.target/arm/cmse/extend-param.c    | 21 +++
  .../gcc.target/arm/cmse/extend-return.c   |  4 ++--
  2 files changed, 19 insertions(+), 6 deletions(-)

diff --git a/gcc/testsuite/gcc.target/arm/cmse/extend-param.c 
b/gcc/testsuite/gcc.target/arm/cmse/extend-param.c

index 01fac786238..d01ef87e0be 100644
--- a/gcc/testsuite/gcc.target/arm/cmse/extend-param.c
+++ b/gcc/testsuite/gcc.target/arm/cmse/extend-param.c
@@ -1,5 +1,5 @@
  /* { dg-do compile } */
-/* { dg-options "-mcmse" } */
+/* { dg-options "-mcmse -fshort-enums" } */
  /* { dg-final { check-function-bodies "**" "" "" } } */
  #include 
@@ -78,7 +78,6 @@ __attribute__((cmse_nonsecure_entry)) char 
enumSecureFunc (enum offset index) {

    if (index >= ARRAY_SIZE)
  return 0;
    return array[index];
-
  }
  /*
@@ -88,9 +87,23 @@ __attribute__((cmse_nonsecure_entry)) char 
enumSecureFunc (enum offset index) {

  **    ...
  */
  __attribute__((cmse_nonsecure_entry)) char boolSecureFunc (bool 
index) {

-
    if (index >= ARRAY_SIZE)
  return 0;
    return array[index];
+}
-}
\ No newline at end of file
+/*
+**__acle_se_boolCharShortEnumSecureFunc:
+**    ...
+**    uxtb    r0, r0
+**    uxtb    r1, r1
+**    uxth    r2, r2
+**    uxtb    r3, r3
+**    ...
+*/
+__attribute__((cmse_nonsecure_entry,optimize(0))) char 
boolCharShortEnumSecureFunc (bool a, unsigned char b, unsigned short 
c, enum offset d) {

+  size_t index = a + b + c + d;
+  if (index >= ARRAY_SIZE)
+    return 0;
+  return array[index];
+}
diff --git a/gcc/testsuite/gcc.target/arm/cmse/extend-return.c 
b/gcc/testsuite/gcc.target/arm/cmse/extend-return.c

index cf731ed33df..081de0d699f 100644
--- a/gcc/testsuite/gcc.target/arm/cmse/extend-return.c
+++ b/gcc/testsuite/gcc.target/arm/cmse/extend-return.c
@@ -1,5 +1,5 @@
  /* { dg-do compile } */
-/* { dg-options "-mcmse" } */
+/* { dg-options "-mcmse -fshort-enums" } */
  /* { dg-final { check-function-bodies "**" "" "" } } */
  #include 
@@ -89,4 +89,4 @@ unsigned char __attribute__((noipa)) enumNonsecure0 
(ns_enum_foo_t * ns_foo_p)

  unsigned char boolNonsecure0 (ns_bool_foo_t * ns_foo_p)
  {
    return ns_foo_p ();
-}
\ No newline at end of file
+}

[Patch, aarch64] v4: Preparatory patch to place target independent and,dependent changed code in one file

2024-05-14 Thread Ajit Agarwal

Hello Alex/Richard:

All comments are addressed.

Common infrastructure of load store pair fusion is divided into target
independent and target dependent changed code.

Target independent code is the Generic code with pure virtual function
to interface betwwen target independent and dependent code.

Target dependent code is the implementation of pure virtual function for
aarch64 target and the call to target independent code.

Bootstrapped on aarch64-linux-gnu.

Thanks & Regards
Ajit



arch64: Preparatory patch to place target independent and
dependent changed code in one file

Common infrastructure of load store pair fusion is divided into target
independent and target dependent changed code.

Target independent code is the Generic code with pure virtual function
to interface betwwen target independent and dependent code.

Target dependent code is the implementation of pure virtual function for
aarch64 target and the call to target independent code.

2024-05-14  Ajit Kumar Agarwal  

gcc/ChangeLog:

* config/aarch64/aarch64-ldp-fusion.cc: Place target
independent and dependent changed code.
---
 gcc/config/aarch64/aarch64-ldp-fusion.cc | 526 +++
 1 file changed, 345 insertions(+), 181 deletions(-)

diff --git a/gcc/config/aarch64/aarch64-ldp-fusion.cc 
b/gcc/config/aarch64/aarch64-ldp-fusion.cc
index 1d9caeab05d..e6af4b0570a 100644
--- a/gcc/config/aarch64/aarch64-ldp-fusion.cc
+++ b/gcc/config/aarch64/aarch64-ldp-fusion.cc
@@ -138,6 +138,210 @@ struct alt_base
   poly_int64 offset;
 };
 
+// Virtual base class for load/store walkers used in alias analysis.
+struct alias_walker
+{
+  virtual bool conflict_p (int &budget) const = 0;
+  virtual insn_info *insn () const = 0;
+  virtual bool valid () const = 0;
+  virtual void advance () = 0;
+};
+
+// This is used in handle_writeback_opportunities describing
+// ALL if aarch64_ldp_writeback > 1 otherwise check
+// EXISTING if aarch64_ldp_writeback.
+enum class writeback {
+  ALL,
+  EXISTING
+};
+
+struct pair_fusion {
+  pair_fusion ()
+  {
+calculate_dominance_info (CDI_DOMINATORS);
+df_analyze ();
+crtl->ssa = new rtl_ssa::function_info (cfun);
+  };
+
+  // Given:
+  // - an rtx REG_OP, the non-memory operand in a load/store insn,
+  // - a machine_mode MEM_MODE, the mode of the MEM in that insn, and
+  // - a boolean LOAD_P (true iff the insn is a load), then:
+  // return true if the access should be considered an FP/SIMD access.
+  // Such accesses are segregated from GPR accesses, since we only want
+  // to form pairs for accesses that use the same register file.
+  virtual bool fpsimd_op_p (rtx, machine_mode, bool)
+  {
+return false;
+  }
+
+  // Return true if we should consider forming ldp/stp insns from memory
+  // accesses with operand mode MODE at this stage in compilation.
+  virtual bool pair_operand_mode_ok_p (machine_mode mode) = 0;
+
+  // Return true iff REG_OP is a suitable register operand for a paired
+  // memory access, where LOAD_P is true if we're asking about loads and
+  // false for stores.  MODE gives the mode of the operand.
+  virtual bool pair_reg_operand_ok_p (bool load_p, rtx reg_op,
+ machine_mode mode) = 0;
+
+  // Return alias check limit.
+  // This is needed to avoid unbounded quadratic behaviour when
+  // performing alias analysis.
+  virtual int pair_mem_alias_check_limit () = 0;
+
+  // Returns true if we should try to handle writeback opportunities
+  // WHICH parameter decides ALL or EXISTING writeback pairs.
+  virtual bool handle_writeback_opportunities (enum writeback which) = 0 ;
+
+  // Given BASE_MEM, the mem from the lower candidate access for a pair,
+  // and LOAD_P (true if the access is a load), check if we should proceed
+  // to form the pair given the target's code generation policy on
+  // paired accesses.
+  virtual bool pair_mem_ok_with_policy (rtx base_mem, bool load_p) = 0;
+
+  // Generate the pattern for a paired access. PATS gives the patterns
+  // for the individual memory accesses (which by this point must share a
+  // common base register).  If WRITEBACK is non-NULL, then this rtx
+  // describes the update to the base register that should be performed by
+  // the resulting insn.  LOAD_P is true iff the accesses are loads.
+  virtual rtx gen_pair (rtx *pats, rtx writeback, bool load_p) = 0;
+
+  // Return true if memory is paired access, given INSN and LOAD_P
+  // is true for load insn and false for store insn.
+  virtual bool pair_mem_insn_p (rtx_insn *, bool &) = 0;
+
+  // Return true if we should track loads.
+  virtual bool track_loads_p ()
+  {
+return true;
+  }
+
+  // Return true if we should track stores.
+  virtual bool track_stores_p ()
+  {
+return true;
+  }
+
+  // Return true if OFF  is in range.
+  virtual bool pair_mem_in_range_p (HOST_WIDE_INT off) = 0;
+
+  // Given a load/store pair insn in PATTERN, unpack the insn, storing
+  // the register operands in REGS, and

Re: Fwd: [PATCH 2/7 v2] lto: Remove random_seed from section name.

2024-05-14 Thread Jan Hubicka

> This patch removes suffixes from section names during LTO linking.
> 
> These suffixes were originally added for ld -r to work (PR lto/44992).
> They were added to all LTO object files, but are only useful before WPA.
> After that they waste space, and if kept random, make LTO caching
> impossible.
> 
> Bootstrapped/regtested on x86_64-pc-linux-gnu
> 
> gcc/ChangeLog:
> 
>   * lto-streamer.cc (lto_get_section_name): Remove suffixes after WPA.
> 
> gcc/lto/ChangeLog:
> 
>   * lto-common.cc (lto_section_with_id): Dont load suffix during LTRANS.
OK,
thanks
Honza

[PATCH] [debug] Fix dwarf v4 .debug_macro.dwo

2024-05-14 Thread Tom de Vries

Consider a hello world, compiled with -gsplit-dwarf and dwarf version 4, and 
-g3:
...
$ gcc -gdwarf-4 -gsplit-dwarf /data/vries/hello.c -g3 -save-temps -dA
...

In section .debug_macro.dwo, we have:
...
.Ldebug_macro0:
.value  0x4 # DWARF macro version number
.byte   0x2 # Flags: 32-bit, lineptr present
.long   .Lskeleton_debug_line0
.byte   0x3 # Start new file
.uleb128 0  # Included from line number 0
.uleb128 0x1# file /data/vries/hello.c
.byte   0x5 # Define macro strp
.uleb128 0  # At line number 0
.uleb128 0x1d0  # The macro: "__STDC__ 1"
...

Given that we use a DW_MACRO_define_strp, we'd expect 0x1d0 to be an
offset into a .debug_str.dwo section.

But in fact, 0x1d0 is an index into the string offset table in
.debug_str_offsets.dwo:
...
.long   0x34f0  # indexed string 0x1d0: __STDC__ 1
...

Add asserts that catch this inconsistency, and fix this by using
DW_MACRO_define_strx instead.

Tested on x86_64.

PR debug/115066
---
 gcc/dwarf2out.cc| 20 ++--
 gcc/testsuite/gcc.dg/pr115066.c |  8 
 2 files changed, 22 insertions(+), 6 deletions(-)
 create mode 100644 gcc/testsuite/gcc.dg/pr115066.c

diff --git a/gcc/dwarf2out.cc b/gcc/dwarf2out.cc
index eedb13bb069..70b7f5f42cd 100644
--- a/gcc/dwarf2out.cc
+++ b/gcc/dwarf2out.cc
@@ -29045,7 +29045,7 @@ output_macinfo_op (macinfo_entry *ref)
  && !DWARF2_INDIRECT_STRING_SUPPORT_MISSING_ON_TARGET
  && (debug_str_section->common.flags & SECTION_MERGE) != 0)
{
- if (dwarf_split_debug_info && dwarf_version >= 5)
+ if (dwarf_split_debug_info)
ref->code = ref->code == DW_MACINFO_define
? DW_MACRO_define_strx : DW_MACRO_undef_strx;
  else
@@ -29097,12 +29097,20 @@ output_macinfo_op (macinfo_entry *ref)
   HOST_WIDE_INT_PRINT_UNSIGNED,
   ref->lineno);
   if (node->form == DW_FORM_strp)
-dw2_asm_output_offset (dwarf_offset_size, node->label,
-   debug_str_section, "The macro: \"%s\"",
-   ref->info);
+   {
+ gcc_assert (ref->code == DW_MACRO_define_strp
+ || ref->code == DW_MACRO_undef_strp);
+ dw2_asm_output_offset (dwarf_offset_size, node->label,
+debug_str_section, "The macro: \"%s\"",
+ref->info);
+   }
   else
-dw2_asm_output_data_uleb128 (node->index, "The macro: \"%s\"",
- ref->info);
+   {
+ gcc_assert (ref->code == DW_MACRO_define_strx
+ || ref->code == DW_MACRO_undef_strx);
+ dw2_asm_output_data_uleb128 (node->index, "The macro: \"%s\"",
+  ref->info);
+   }
   break;
 case DW_MACRO_import:
   dw2_asm_output_data (1, ref->code, "Import");
diff --git a/gcc/testsuite/gcc.dg/pr115066.c b/gcc/testsuite/gcc.dg/pr115066.c
new file mode 100644
index 000..645757df209
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/pr115066.c
@@ -0,0 +1,8 @@
+/* { dg-do compile } */
+/* { dg-skip-if "split DWARF unsupported" { hppa*-*-hpux* powerpc*-ibm-aix* 
*-*-darwin* } } */
+/* { dg-options "-gsplit-dwarf -g3 -dA -gdwarf-4" } */
+/* { dg-final { scan-assembler-times {\.section\t"?\.debug_macro} 1 } } */
+/* { dg-final { scan-assembler-not {\.byte\t0x5\t# Define macro strp} } } */
+/* { dg-final { scan-assembler {\.byte\t0xb\t# Define macro strx} } } */
+
+#define foo 1

base-commit: 2d0eeb529d400e61197a09c56011be976dd81ef0
-- 
2.35.3

Re: [PATCH] [debug] Fix dwarf v4 .debug_macro.dwo

2024-05-14 Thread Jakub Jelinek

On Tue, May 14, 2024 at 01:35:30PM +0200, Tom de Vries wrote:
> Consider a hello world, compiled with -gsplit-dwarf and dwarf version 4, and 
> -g3:
> ...
> $ gcc -gdwarf-4 -gsplit-dwarf /data/vries/hello.c -g3 -save-temps -dA
> ...
> 
> In section .debug_macro.dwo, we have:
> ...
> .Ldebug_macro0:
> .value  0x4 # DWARF macro version number
> .byte   0x2 # Flags: 32-bit, lineptr present
> .long   .Lskeleton_debug_line0
> .byte   0x3 # Start new file
> .uleb128 0  # Included from line number 0
> .uleb128 0x1# file /data/vries/hello.c
> .byte   0x5 # Define macro strp
> .uleb128 0  # At line number 0
> .uleb128 0x1d0  # The macro: "__STDC__ 1"
> ...
> 
> Given that we use a DW_MACRO_define_strp, we'd expect 0x1d0 to be an
> offset into a .debug_str.dwo section.
> 
> But in fact, 0x1d0 is an index into the string offset table in
> .debug_str_offsets.dwo:
> ...
> .long   0x34f0  # indexed string 0x1d0: __STDC__ 1
> ...
> 
> Add asserts that catch this inconsistency, and fix this by using
> DW_MACRO_define_strx instead.
> 
> Tested on x86_64.
> 
> PR debug/115066

ChangeLog entry is missing.

Otherwise LGTM.

Jakub

Re: [PATCH 4/7] lto: Implement ltrans cache

2024-05-14 Thread Jan Hubicka

> This patch implements Incremental LTO as ltrans cache.
> 
> The cache is active when directory $GCC_LTRANS_CACHE is specified and exists.
> Stored are pairs of ltrans input/output files and input file hash.
> File locking is used to allow multiple GCC instances to use to same cache.
> 
> Bootstrapped/regtested on x86_64-pc-linux-gnu
> 
> gcc/ChangeLog:
> 
>   * Makefile.in: Add lto-ltrans-cache.o.
>   * lto-wrapper.cc: Use ltrans cache.
>   * lto-ltrans-cache.cc: New file.
>   * lto-ltrans-cache.h: New file.
> diff --git a/gcc/lto-ltrans-cache.cc b/gcc/lto-ltrans-cache.cc
> new file mode 100644
> index 000..0d43e548fb3
> --- /dev/null
> +++ b/gcc/lto-ltrans-cache.cc
> @@ -0,0 +1,407 @@
> +/* File caching.
> +   Copyright (C) 2009-2023 Free Software Foundation, Inc.

Probably copyright should be 2023-2024
> +const md5_checksum_t INVALID_CHECKSUM = {
Maybe static here? Officially there should be comment before the
function.
> +  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
> +};
> +
> +/* Computes checksum for given file, returns INVALID_CHECKSUM if not 
> possible.
> + */
comment would look more regular if linebreak is made before possible :)
> +
> +/* Checks identity of two files byte by byte.  */
> +static bool
> +files_identical (char const *first_filename, char const *second_filename)
> +{
> +  FILE *f_first = fopen (first_filename, "rb");
> +  if (!f_first)
> +return false;
> +
> +  FILE *f_second = fopen (second_filename, "rb");
> +  if (!f_second)
> +{
> +  fclose (f_first);
> +  return false;
> +}
> +
> +  bool ret = true;
> +
> +  for (;;)
> +{
> +  int c1, c2;
> +  c1 = fgetc (f_first);
> +  c2 = fgetc (f_second);

I guess reading by fgetc may get quite ineffecient here.  Comparing
bigger blocks is probably going to be faster.  We could also
(incrementally) use mmap where supported.
> +
> +/* Contructor of cache item.  */
> +ltrans_file_cache::item::item (std::string input, std::string output,
> +  md5_checksum_t input_checksum, uint32_t last_used):
Here should be enough whitespace so md5_checksum appears just after ( in
line above
  md5_checksum_t input_checksum, uint32_t 
last_used):
> +  input (std::move (input)), output (std::move (output)),
> +  input_checksum (input_checksum), last_used (last_used)
> +{
> +  lock = lockfile (this->input + ".lock");
> +}
> +/* Destructor of cache item.  */
> +ltrans_file_cache::item::~item ()
> +{
> +  lock.unlock ();
> +}
> +
> +/* Reads next cache item from cachedata file.
> +   Adds `dir/` prefix to filenames.  */
> +static ltrans_file_cache::item*
> +read_cache_item (FILE* f, const char* dir)
> +{
> +  md5_checksum_t checksum;
> +  uint32_t last_used;
> +
> +  if (fread (&checksum, 1, checksum.size (), f) != checksum.size ())
> +return NULL;
> +  if (fread (&last_used, sizeof (last_used), 1, f) != 1)
> +return NULL;
> +
> +  std::vector input (strlen (dir));
> +  memcpy (&input[0], dir, input.size ());
> +  input.push_back ('/');
Why this is not std::string?
> +  /* Loads data about previously cached items from cachedata file.
> +
> + Must be called with creation_lock or deletion_lock held to
> + prevent data race.  */
> +  void
> +  load_cache ();
There should be no newline between type and name.  It is there only when
defining function (so it is easy to use old-school grep to find where
function is defined.)

Looks good to me otherwise.
Honza

[Patch, aarch64] v5: Preparatory patch to place target independent and,dependent changed code in one file

2024-05-14 Thread Ajit Agarwal

Hello Alex/Richard:

All review comments are incorporated.

Changes since v4:

 - changed prototype of destructure_pair from rti parameter to pattern 
parameter.


Common infrastructure of load store pair fusion is divided into target
independent and target dependent changed code.

Target independent code is the Generic code with pure virtual function
to interface betwwen target independent and dependent code.

Target dependent code is the implementation of pure virtual function for
aarch64 target and the call to target independent code.

Bootstrapped on aarch64-linux-gnu.

Thanks & Regards
Ajit


aarch64: Preparatory patch to place target independent and
dependent changed code in one file

Common infrastructure of load store pair fusion is divided into target
independent and target dependent changed code.

Target independent code is the Generic code with pure virtual function
to interface betwwen target independent and dependent code.

Target dependent code is the implementation of pure virtual function for
aarch64 target and the call to target independent code.

2024-05-14  Ajit Kumar Agarwal  

gcc/ChangeLog:

* config/aarch64/aarch64-ldp-fusion.cc: Place target
independent and dependent changed code.
---
 gcc/config/aarch64/aarch64-ldp-fusion.cc | 526 +++
 1 file changed, 345 insertions(+), 181 deletions(-)

diff --git a/gcc/config/aarch64/aarch64-ldp-fusion.cc 
b/gcc/config/aarch64/aarch64-ldp-fusion.cc
index 1d9caeab05d..3551767e29e 100644
--- a/gcc/config/aarch64/aarch64-ldp-fusion.cc
+++ b/gcc/config/aarch64/aarch64-ldp-fusion.cc
@@ -138,6 +138,210 @@ struct alt_base
   poly_int64 offset;
 };
 
+// Virtual base class for load/store walkers used in alias analysis.
+struct alias_walker
+{
+  virtual bool conflict_p (int &budget) const = 0;
+  virtual insn_info *insn () const = 0;
+  virtual bool valid () const = 0;
+  virtual void advance () = 0;
+};
+
+// This is used in handle_writeback_opportunities describing
+// ALL if aarch64_ldp_writeback > 1 otherwise check
+// EXISTING if aarch64_ldp_writeback.
+enum class writeback {
+  ALL,
+  EXISTING
+};
+
+struct pair_fusion {
+  pair_fusion ()
+  {
+calculate_dominance_info (CDI_DOMINATORS);
+df_analyze ();
+crtl->ssa = new rtl_ssa::function_info (cfun);
+  };
+
+  // Given:
+  // - an rtx REG_OP, the non-memory operand in a load/store insn,
+  // - a machine_mode MEM_MODE, the mode of the MEM in that insn, and
+  // - a boolean LOAD_P (true iff the insn is a load), then:
+  // return true if the access should be considered an FP/SIMD access.
+  // Such accesses are segregated from GPR accesses, since we only want
+  // to form pairs for accesses that use the same register file.
+  virtual bool fpsimd_op_p (rtx, machine_mode, bool)
+  {
+return false;
+  }
+
+  // Return true if we should consider forming ldp/stp insns from memory
+  // accesses with operand mode MODE at this stage in compilation.
+  virtual bool pair_operand_mode_ok_p (machine_mode mode) = 0;
+
+  // Return true iff REG_OP is a suitable register operand for a paired
+  // memory access, where LOAD_P is true if we're asking about loads and
+  // false for stores.  MODE gives the mode of the operand.
+  virtual bool pair_reg_operand_ok_p (bool load_p, rtx reg_op,
+ machine_mode mode) = 0;
+
+  // Return alias check limit.
+  // This is needed to avoid unbounded quadratic behaviour when
+  // performing alias analysis.
+  virtual int pair_mem_alias_check_limit () = 0;
+
+  // Returns true if we should try to handle writeback opportunities
+  // WHICH parameter decides ALL or EXISTING writeback pairs.
+  virtual bool handle_writeback_opportunities (enum writeback which) = 0 ;
+
+  // Given BASE_MEM, the mem from the lower candidate access for a pair,
+  // and LOAD_P (true if the access is a load), check if we should proceed
+  // to form the pair given the target's code generation policy on
+  // paired accesses.
+  virtual bool pair_mem_ok_with_policy (rtx base_mem, bool load_p) = 0;
+
+  // Generate the pattern for a paired access. PATS gives the patterns
+  // for the individual memory accesses (which by this point must share a
+  // common base register).  If WRITEBACK is non-NULL, then this rtx
+  // describes the update to the base register that should be performed by
+  // the resulting insn.  LOAD_P is true iff the accesses are loads.
+  virtual rtx gen_pair (rtx *pats, rtx writeback, bool load_p) = 0;
+
+  // Return true if memory is paired access, given INSN and LOAD_P
+  // is true for load insn and false for store insn.
+  virtual bool pair_mem_insn_p (rtx_insn *, bool &) = 0;
+
+  // Return true if we should track loads.
+  virtual bool track_loads_p ()
+  {
+return true;
+  }
+
+  // Return true if we should track stores.
+  virtual bool track_stores_p ()
+  {
+return true;
+  }
+
+  // Return true if OFF  is in range.
+  virtual bool pair_mem_in_range_p (HOST_WIDE_INT off) = 0;

Re: [PATCH v4 0/3] ifcvt: Allow if conversion of arithmetic in basic blocks with multiple sets

2024-05-14 Thread Manolis Tsamis

Pinging this for GCC15.

Thanks

On Tue, Apr 23, 2024 at 1:47 PM Manolis Tsamis  wrote:
>
>
> noce_convert_multiple_sets has been introduced and extended over time to 
> handle
> if conversion for blocks with multiple sets. Currently this is focused on
> register moves and rejects any sort of arithmetic operations.
>
> This series is an extension to allow more sequences to take part in if
> conversion. The first patch is a required change to emit correct code and the
> second patch whitelists a larger number of operations through
> bb_ok_for_noce_convert_multiple_sets. The third patch adds support to rewire
> multiple registers in noce_convert_multiple_sets_1 and refactors the code with
> a new helper info struct. The fourth patch removes some old code that should
> not be needed anymore.
>
> For targets that have a rich selection of conditional instructions,
> like aarch64, I have seen an ~5x increase of profitable if conversions for
> multiple set blocks in SPEC benchmarks. Also tested with a wide variety of
> benchmarks and I have not seen performance regressions on either x64 / 
> aarch64.
>
> Some samples that previously resulted in a branch but now better use these
> instructions can be seen in the provided test cases.
>
> Bootstrapped and tested on AArch64 and x86-64.
>
>
> Changes in v4:
> - Remove unnecessary hardcoded list of allowed ops in
> bb_ok_for_noce_convert_multiple_sets.
> - Set need_cmov based on BB live_out instead of REG_DEAD notes.
> - Fix preexisting issues and improve the code that sets 
> read_comparison.
>
> Manolis Tsamis (3):
>   [RFC] ifcvt: handle sequences that clobber flags in
> noce_convert_multiple_sets
>   [RFC] ifcvt: Allow more operations in multiple set if conversion
>   [RFC] ifcvt: Handle multiple rewired regs and refactor
> noce_convert_multiple_sets
>
>  gcc/ifcvt.cc  | 383 --
>  gcc/ifcvt.h   |  16 +
>  .../aarch64/ifcvt_multiple_sets_arithm.c  |  79 
>  .../aarch64/ifcvt_multiple_sets_rewire.c  |  20 +
>  4 files changed, 292 insertions(+), 206 deletions(-)
>  create mode 100644 
> gcc/testsuite/gcc.target/aarch64/ifcvt_multiple_sets_arithm.c
>  create mode 100644 
> gcc/testsuite/gcc.target/aarch64/ifcvt_multiple_sets_rewire.c
>
> --
> 2.34.1
>

Re: [PATCH 5/7] lto: Implement cache partitioning

2024-05-14 Thread Jan Hubicka

> gcc/ChangeLog:
> 
>   * common.opt: Add cache partitioning.
>   * flag-types.h (enum lto_partition_model): Likewise.
> 
> gcc/lto/ChangeLog:
> 
>   * lto-partition.cc (new_partition): Use new_partition_no_push.
>   (new_partition_no_push): New.
>   (free_ltrans_partition): New.
>   (free_ltrans_partitions): Use free_ltrans_partition.
>   (join_partitions): New.
>   (split_partition_into_nodes): New.
>   (is_partition_reorder): New.
>   (class partition_set): New.
>   (distribute_n_partitions): New.
>   (partition_over_target_split): New.
>   (partition_binary_split): New.
>   (partition_fixed_split): New.
>   (class partitioner_base): New.
>   (class partitioner_default): New.
>   (lto_cache_map): New.
>   * lto-partition.h (lto_cache_map): New.
>   * lto.cc (do_whole_program_analysis): Use lto_cache_map.
> 
> gcc/testsuite/ChangeLog:
> 
>   * gcc.dg/completion-2.c: Add -flto-partition=cache.
> +/* Free memory used by ltrans partition.
> +   Encoder can be kept to be freed after streaming.  */
> +static void
> +free_ltrans_partition (ltrans_partition part, bool delete_encoder)
> +  {
No two spaces here (indent everything to left by 2).
> +if (part->initializers_visited)
> +  delete part->initializers_visited;
> +if (delete_encoder)
> +  lto_symtab_encoder_delete (part->encoder);
> +free (part);
It would make sense to turn this into C++ and use destructors
(incrementally).

OK,
Honza

Re: [PATCH 7/7] lto: partition specific lto_clone_numbers

2024-05-14 Thread Jan Hubicka

> Replaces "lto_priv.$clone_number" by
> "lto_priv.$partition_hash.$partition_specific_clone_number".
> To reduce divergence for incremental LTO.
> 
> Bootstrapped/regtested on x86_64-pc-linux-gnu
OK,
thanks!
Honza
> 
> gcc/lto/ChangeLog:
> 
>   * lto-partition.cc (set_clone_partition_name_checksum): New.
>   (CHECKSUM_STRING): New.
>   (privatize_symbol_name_1): Use partition hash for lto_priv.
>   (lto_promote_cross_file_statics): Use set_clone_partition_name_checksum.
>   (lto_promote_statics_nonwpa): Changed clone_map type.
> ---
>  gcc/lto/lto-partition.cc | 49 +++-
>  1 file changed, 43 insertions(+), 6 deletions(-)
> 
> diff --git a/gcc/lto/lto-partition.cc b/gcc/lto/lto-partition.cc
> index eb31ecba0d3..a2ce24eea23 100644
> --- a/gcc/lto/lto-partition.cc
> +++ b/gcc/lto/lto-partition.cc
> @@ -35,6 +35,7 @@ along with GCC; see the file COPYING3.  If not see
>  #include "ipa-fnsummary.h"
>  #include "lto-partition.h"
>  #include "sreal.h"
> +#include "md5.h"
>  
>  #include 
>  #include 
> @@ -1516,8 +1517,36 @@ validize_symbol_for_target (symtab_node *node)
>  }
>  }
>  
> -/* Maps symbol names to unique lto clone counters.  */
> -static hash_map *lto_clone_numbers;
> +/* Maps symbol names with partition checksum to unique lto clone counters.  
> */
> +using clone_map = hash_map +  int_hash_base>, unsigned>;
> +static clone_map *lto_clone_numbers;
> +uint64_t current_partition_checksum = 0;
> +
> +/* Computes a quick checksum to distinguish partitions of clone numbers.  */
> +void
> +set_clone_partition_name_checksum (ltrans_partition part)
> +{
> +#define CHECKSUM_STRING(FOO) md5_process_bytes ((FOO), strlen (FOO), &ctx)
> +  struct md5_ctx ctx;
> +  md5_init_ctx (&ctx);
> +
> +  CHECKSUM_STRING (part->name);
> +
> +  lto_symtab_encoder_iterator lsei;
> +  lto_symtab_encoder_t encoder = part->encoder;
> +
> +  for (lsei = lsei_start (encoder); !lsei_end_p (lsei); lsei_next (&lsei))
> +{
> +  symtab_node *node = lsei_node (lsei);
> +  CHECKSUM_STRING (node->name ());
> +}
> +
> +  uint64_t checksum[2];
> +  md5_finish_ctx (&ctx, checksum);
> +  current_partition_checksum = checksum[0];
> +#undef CHECKSUM_STRING
> +}
>  
>  /* Helper for privatize_symbol_name.  Mangle NODE symbol name
> represented by DECL.  */
> @@ -1531,10 +1560,16 @@ privatize_symbol_name_1 (symtab_node *node, tree decl)
>  return false;
>  
>const char *name = maybe_rewrite_identifier (name0);
> -  unsigned &clone_number = lto_clone_numbers->get_or_insert (name);
> +
> +  unsigned &clone_number = lto_clone_numbers->get_or_insert (
> +std::pair {name, current_partition_checksum});
> +
> +  char lto_priv[32];
> +  sprintf (lto_priv, "lto_priv.%lu", current_partition_checksum);
> +
>symtab->change_decl_assembler_name (decl,
> clone_function_name (
> -   name, "lto_priv", clone_number));
> +   name, lto_priv, clone_number));
>clone_number++;
>  
>if (node->lto_file_data)
> @@ -1735,11 +1770,13 @@ lto_promote_cross_file_statics (void)
>part->encoder = compute_ltrans_boundary (part->encoder);
>  }
>  
> -  lto_clone_numbers = new hash_map;
> +  lto_clone_numbers = new clone_map;
>  
>/* Look at boundaries and promote symbols as needed.  */
>for (i = 0; i < n_sets; i++)
>  {
> +  set_clone_partition_name_checksum (ltrans_partitions[i]);
> +
>lto_symtab_encoder_iterator lsei;
>lto_symtab_encoder_t encoder = ltrans_partitions[i]->encoder;
>  
> @@ -1778,7 +1815,7 @@ lto_promote_statics_nonwpa (void)
>  {
>symtab_node *node;
>  
> -  lto_clone_numbers = new hash_map;
> +  lto_clone_numbers = new clone_map;
>FOR_EACH_SYMBOL (node)
>  {
>rename_statics (NULL, node);
> -- 
> 2.42.1
>

Re: [PATCH 6/7] lto: squash order of symbols in partitions

2024-05-14 Thread Jan Hubicka

> This patch squashes order of symbols in individual partitions, so that
> their relative order is conserved, but is not influenced by symbols in
> other partitions.
> Order of cloned symbols is set to 0. This should be fine because order
> specifies order of symbols in input files, which cloned symbols are not
> part of.

The current use of order is somewhat broken (after converting cgraph to
C++, that is a while).
The original code was setting order at the time function was finalized,
which made them to be output in same order as the bodies appear in
source code (with -fno-toplevel-reorder build at least).

With this logic the clones should have same order as originals, so they
appear next to tihem.

Later initialization of order was moved to register_symbol that
is king of wrong since frontends are allowed to produce symbols early.
So it would be nice to fix this problem and make sure that order of
clons is sane.

I guess this is bit of independent of the rest of caching, so maybe we
can first get the other patches in and then worry about order?
> 
> This is important for incremental LTO because if there is a new symbol,
> it otherwise shifts order of all symbols with higher order, which would
> diverge them all.
> 
> Bootstrapped/regtested on x86_64-pc-linux-gnu
> 
> gcc/ChangeLog:
> 
>   * lto-cgraph.cc (lto_output_node): Add and use order_remap.
>   (lto_output_varpool_node): Likewise.
>   (output_symtab): Likewise.
>   * lto-streamer-out.cc (produce_asm): Likewise.
>   (output_function): Likewise.
>   (output_constructor): Likewise.
>   (copy_function_or_variable): Likewise.
>   (cmp_int): New.
>   (lto_output): Generate order_remap.
>   * lto-streamer.h (produce_asm): Add order_remap.
>   (output_symtab): Likewise.
> ---
>  gcc/lto-cgraph.cc   | 20 
>  gcc/lto-streamer-out.cc | 71 +
>  gcc/lto-streamer.h  |  5 +--
>  3 files changed, 73 insertions(+), 23 deletions(-)
> 
> diff --git a/gcc/lto-cgraph.cc b/gcc/lto-cgraph.cc
> index 32c0f5ac6db..a7530290fba 100644
> --- a/gcc/lto-cgraph.cc
> +++ b/gcc/lto-cgraph.cc
> @@ -381,7 +381,8 @@ reachable_from_this_partition_p (struct cgraph_node 
> *node, lto_symtab_encoder_t
>  
>  static void
>  lto_output_node (struct lto_simple_output_block *ob, struct cgraph_node 
> *node,
> -  lto_symtab_encoder_t encoder)
> +  lto_symtab_encoder_t encoder,
> +  hash_map, int>* order_remap)
>  {
>unsigned int tag;
>struct bitpack_d bp;
> @@ -405,7 +406,9 @@ lto_output_node (struct lto_simple_output_block *ob, 
> struct cgraph_node *node,
>  
>streamer_write_enum (ob->main_stream, LTO_symtab_tags, LTO_symtab_last_tag,
>  tag);
> -  streamer_write_hwi_stream (ob->main_stream, node->order);
> +
> +  int order = flag_wpa ? *order_remap->get (node->order) : node->order;
> +  streamer_write_hwi_stream (ob->main_stream, order);
>  
>/* In WPA mode, we only output part of the call-graph.  Also, we
>   fake cgraph node attributes.  There are two cases that we care.
> @@ -585,7 +588,8 @@ lto_output_node (struct lto_simple_output_block *ob, 
> struct cgraph_node *node,
>  
>  static void
>  lto_output_varpool_node (struct lto_simple_output_block *ob, varpool_node 
> *node,
> -  lto_symtab_encoder_t encoder)
> +  lto_symtab_encoder_t encoder,
> +  hash_map, int>* order_remap)
>  {
>bool boundary_p = !lto_symtab_encoder_in_partition_p (encoder, node);
>bool encode_initializer_p
> @@ -602,7 +606,8 @@ lto_output_varpool_node (struct lto_simple_output_block 
> *ob, varpool_node *node,
>  
>streamer_write_enum (ob->main_stream, LTO_symtab_tags, LTO_symtab_last_tag,
>  LTO_symtab_variable);
> -  streamer_write_hwi_stream (ob->main_stream, node->order);
> +  int order = flag_wpa ? *order_remap->get (node->order) : node->order;
> +  streamer_write_hwi_stream (ob->main_stream, order);
>lto_output_var_decl_ref (ob->decl_state, ob->main_stream, node->decl);
>bp = bitpack_create (ob->main_stream);
>bp_pack_value (&bp, node->externally_visible, 1);
> @@ -967,7 +972,7 @@ compute_ltrans_boundary (lto_symtab_encoder_t in_encoder)
>  /* Output the part of the symtab in SET and VSET.  */
>  
>  void
> -output_symtab (void)
> +output_symtab (hash_map, int>* order_remap)
>  {
>struct cgraph_node *node;
>struct lto_simple_output_block *ob;
> @@ -994,9 +999,10 @@ output_symtab (void)
>  {
>symtab_node *node = lto_symtab_encoder_deref (encoder, i);
>if (cgraph_node *cnode = dyn_cast  (node))
> -lto_output_node (ob, cnode, encoder);
> + lto_output_node (ob, cnode, encoder, order_remap);
>else
> - lto_output_varpool_node (ob, dyn_cast (node), encoder);
> + lto_output_varpool_node (ob, dyn_cast (node), encoder,
> +  order_remap);
>  }
>  
>

RE: [PATCH] vect: generate suitable convert insn for int -> int, float -> float and int <-> float.

2024-05-14 Thread Richard Biener

On Tue, 14 May 2024, Hu, Lin1 wrote:

> Do you have any advice?
> 
> BRs,
> Lin
> 
> -Original Message-
> From: Hu, Lin1  
> Sent: Wednesday, May 8, 2024 9:38 AM
> To: gcc-patches@gcc.gnu.org
> Cc: Liu, Hongtao ; ubiz...@gmail.com
> Subject: [PATCH] vect: generate suitable convert insn for int -> int, float 
> -> float and int <-> float.
> 
> Hi, all
> 
> This patch aims to optimize __builtin_convertvector. We want the function can 
> generate more efficient insn for some situations. Like v2si -> v2di.
> 
> The patch has been bootstrapped and regtested on x86_64-pc-linux-gnu, OK for 
> trunk?

I don't like the new code to be in a separate function, not integrated
with the existing handling.  Note the existing handling should get, say,
V8DF -> V8SI correct for SSE by splitting the operation into smaller
vectors but your code seems to just handle the cases the vectors are
already properly sized.

Without checking it seems you are basing the code on what the
vectorizer does?  Maybe we should have some common code that
computes intermediate conversion steps supported by the HW
unifying what for example supportable_widening_operation or
supportable_narrowing_operation can do to also cover int <-> float
conversions.

That said, if you don't want to do that please still think about
the core part of tree-vect-generic.cc which is breaking down large
emulated vectors into small supported vectors.

Richard.

> BRs,
> Lin
> 
> gcc/ChangeLog:
> 
>   PR target/107432
>   * tree-vect-generic.cc (expand_vector_conversion): Support
>   convert for int -> int, float -> float and int <-> float.
>   (expand_vector_conversion_no_vec_pack): Check if can convert
>   int <-> int, float <-> float and int <-> float, directly.
>   Support indirect convert, when direct optab is not supported.
> 
> gcc/testsuite/ChangeLog:
> 
>   PR target/107432
>   * gcc.target/i386/pr107432-1.c: New test.
>   * gcc.target/i386/pr107432-2.c: Ditto.
>   * gcc.target/i386/pr107432-3.c: Ditto.
>   * gcc.target/i386/pr107432-4.c: Ditto.
>   * gcc.target/i386/pr107432-5.c: Ditto.
>   * gcc.target/i386/pr107432-6.c: Ditto.
>   * gcc.target/i386/pr107432-7.c: Ditto.
> ---
>  gcc/testsuite/gcc.target/i386/pr107432-1.c | 234 +  
> gcc/testsuite/gcc.target/i386/pr107432-2.c | 105 +  
> gcc/testsuite/gcc.target/i386/pr107432-3.c |  55 +  
> gcc/testsuite/gcc.target/i386/pr107432-4.c |  56 +  
> gcc/testsuite/gcc.target/i386/pr107432-5.c |  72 +++  
> gcc/testsuite/gcc.target/i386/pr107432-6.c | 139   
> gcc/testsuite/gcc.target/i386/pr107432-7.c | 156 ++
>  gcc/tree-vect-generic.cc   | 107 +-
>  8 files changed, 918 insertions(+), 6 deletions(-)  create mode 100644 
> gcc/testsuite/gcc.target/i386/pr107432-1.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/pr107432-2.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/pr107432-3.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/pr107432-4.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/pr107432-5.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/pr107432-6.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/pr107432-7.c
> 
> diff --git a/gcc/testsuite/gcc.target/i386/pr107432-1.c 
> b/gcc/testsuite/gcc.target/i386/pr107432-1.c
> new file mode 100644
> index 000..a4f37447eb4
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/pr107432-1.c
> @@ -0,0 +1,234 @@
> +/* { dg-do compile } */
> +/* { dg-options "-march=x86-64 -mavx512bw -mavx512vl -O3" } */
> +/* { dg-final { scan-assembler-times "vpmovqd" 6 } } */
> +/* { dg-final { scan-assembler-times "vpmovqw" 6 } } */
> +/* { dg-final { scan-assembler-times "vpmovqb" 6 } } */
> +/* { dg-final { scan-assembler-times "vpmovdw" 6 { target { ia32 } } } 
> +} */
> +/* { dg-final { scan-assembler-times "vpmovdw" 8 { target { ! ia32 } } 
> +} } */
> +/* { dg-final { scan-assembler-times "vpmovdb" 6 { target { ia32 } } } 
> +} */
> +/* { dg-final { scan-assembler-times "vpmovdb" 8 { target { ! ia32 } } 
> +} } */
> +/* { dg-final { scan-assembler-times "vpmovwb" 8 } } */
> +
> +#include 
> +
> +typedef short __v2hi __attribute__ ((__vector_size__ (4))); typedef 
> +char __v2qi __attribute__ ((__vector_size__ (2))); typedef char __v4qi 
> +__attribute__ ((__vector_size__ (4))); typedef char __v8qi 
> +__attribute__ ((__vector_size__ (8)));
> +
> +typedef unsigned short __v2hu __attribute__ ((__vector_size__ (4))); 
> +typedef unsigned short __v4hu __attribute__ ((__vector_size__ (8))); 
> +typedef unsigned char __v2qu __attribute__ ((__vector_size__ (2))); 
> +typedef unsigned char __v4qu __attribute__ ((__vector_size__ (4))); 
> +typedef unsigned char __v8qu __attribute__ ((__vector_size__ (8))); 
> +typedef unsigned int __v2su __attribute__ ((__vector_size__ (8)));
> +
> +__v2si mm_cvtepi64_epi32_builtin_convertvector(__m128i a) {
> +  return __builtin_convertvector((__v2di)a, __v2si); }
>

Re: Avoid TYPE_MAIN_VARIANT compares in TBAA

2024-05-14 Thread Richard Biener

On Tue, 14 May 2024, Jan Hubicka wrote:

> Hi,
> while building more testcases for ipa-icf I noticed that there are two places
> in aliasing code where we still compare TYPE_MAIN_VARIANT for pointer 
> equality.
> This is not good idea for LTO since type merging may not happen for example
> when in one unit pointed to type is forward declared while in other it is 
> fully
> defined.  We have same_type_for_tbaa for that.
> 
> Bootstrapped/regtested x86_64-linux, OK?

OK.

Richard.

> gcc/ChangeLog:
> 
>   * alias.cc (reference_alias_ptr_type_1): Use view_converted_memref_p.
>   * alias.h (view_converted_memref_p): Declare.
>   * tree-ssa-alias.cc (view_converted_memref_p): Export.
>   (ao_compare::compare_ao_refs): Use same_type_for_tbaa.
> 
> diff --git a/gcc/alias.cc b/gcc/alias.cc
> index 808e2095d9b..853e84d7439 100644
> --- a/gcc/alias.cc
> +++ b/gcc/alias.cc
> @@ -770,10 +770,7 @@ reference_alias_ptr_type_1 (tree *t)
>/* If the innermost reference is a MEM_REF that has a
>   conversion embedded treat it like a VIEW_CONVERT_EXPR above,
>   using the memory access type for determining the alias-set.  */
> -  if (TREE_CODE (inner) == MEM_REF
> -  && (TYPE_MAIN_VARIANT (TREE_TYPE (inner))
> -   != TYPE_MAIN_VARIANT
> -(TREE_TYPE (TREE_TYPE (TREE_OPERAND (inner, 1))
> +  if (view_converted_memref_p (inner))
>  {
>tree alias_ptrtype = TREE_TYPE (TREE_OPERAND (inner, 1));
>/* Unless we have the (aggregate) effective type of the access
> diff --git a/gcc/alias.h b/gcc/alias.h
> index f8d93e8b5f4..36095f0bf73 100644
> --- a/gcc/alias.h
> +++ b/gcc/alias.h
> @@ -41,6 +41,7 @@ bool alias_ptr_types_compatible_p (tree, tree);
>  int compare_base_decls (tree, tree);
>  bool refs_same_for_tbaa_p (tree, tree);
>  bool mems_same_for_tbaa_p (rtx, rtx);
> +bool view_converted_memref_p (tree);
>  
>  /* This alias set can be used to force a memory to conflict with all
> other memories, creating a barrier across which no memory reference
> diff --git a/gcc/tree-ssa-alias.cc b/gcc/tree-ssa-alias.cc
> index e7c1c1aa624..632cf78028b 100644
> --- a/gcc/tree-ssa-alias.cc
> +++ b/gcc/tree-ssa-alias.cc
> @@ -2044,7 +2044,7 @@ decl_refs_may_alias_p (tree ref1, tree base1,
> which is done by ao_ref_base and thus one extra walk
> of handled components is needed.  */
>  
> -static bool
> +bool
>  view_converted_memref_p (tree base)
>  {
>if (TREE_CODE (base) != MEM_REF && TREE_CODE (base) != TARGET_MEM_REF)
> @@ -4325,8 +4325,8 @@ ao_compare::compare_ao_refs (ao_ref *ref1, ao_ref *ref2,
>else if ((end_struct_ref1 != NULL) != (end_struct_ref2 != NULL))
>  return flags | ACCESS_PATH;
>if (end_struct_ref1
> -  && TYPE_MAIN_VARIANT (TREE_TYPE (end_struct_ref1))
> -  != TYPE_MAIN_VARIANT (TREE_TYPE (end_struct_ref2)))
> +  && same_type_for_tbaa (TREE_TYPE (end_struct_ref1),
> +  TREE_TYPE (end_struct_ref2)) != 1)
>  return flags | ACCESS_PATH;
>  
>/* Now compare all handled components of the access path.
> 

-- 
Richard Biener 
SUSE Software Solutions Germany GmbH,
Frankenstrasse 146, 90461 Nuernberg, Germany;
GF: Ivo Totev, Andrew McDonald, Werner Knoblich; (HRB 36809, AG Nuernberg)

[PATCH] testsuite: analyzer: Fix fd-glibc-byte-stream-connection-server.c on Solaris [PR107750]

2024-05-14 Thread Rainer Orth

gcc.dg/analyzer/fd-glibc-byte-stream-connection-server.c currently FAILs
on Solaris:

FAIL: gcc.dg/analyzer/fd-glibc-byte-stream-connection-server.c (test for excess 
errors)

Excess errors:
/vol/gcc/src/hg/master/local/gcc/testsuite/gcc.dg/analyzer/fd-glibc-byte-stream-connection-server.c:91:3:
 error: implicit declaration of function 'memset' 
[-Wimplicit-function-declaration]

Solaris  has

#define FD_ZERO(__p)(void) memset((__p), 0, sizeof (*(__p))) 

but no declaration of memset.  While one can argue that this should be
fixed, it's easy enough to just include  instead, which is
what this patch does.

Tested on i386-pc-solaris2.11 and i686-pc-linux-gnu.

Ok for trunk?

Rainer

-- 
-
Rainer Orth, Center for Biotechnology, Bielefeld University


2024-05-14  Rainer Orth  

gcc/testsuite:
PR analyzer/107750
* gcc.dg/analyzer/fd-glibc-byte-stream-connection-server.c:
Include .

# HG changeset patch
# Parent  4d8ceef2997b0a0ff7a4b4df140f9f864e923eb4
testsuite: analyzer: Fix fd-glibc-byte-stream-connection-server.c on Solaris [PR107750]

diff --git a/gcc/testsuite/gcc.dg/analyzer/fd-glibc-byte-stream-connection-server.c b/gcc/testsuite/gcc.dg/analyzer/fd-glibc-byte-stream-connection-server.c
--- a/gcc/testsuite/gcc.dg/analyzer/fd-glibc-byte-stream-connection-server.c
+++ b/gcc/testsuite/gcc.dg/analyzer/fd-glibc-byte-stream-connection-server.c
@@ -6,6 +6,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 #include 
 #if __has_include()

Re: [COMMITTED 2/5] Fix ranger when called from SCEV.

2024-05-14 Thread Richard Biener

On Mon, May 13, 2024 at 8:28 PM Jan-Benedict Glaw  wrote:
>
> On Mon, 2024-05-13 20:19:42 +0200, Jan-Benedict Glaw  
> wrote:
> > On Tue, 2024-04-30 17:24:15 -0400, Andrew MacLeod  
> > wrote:
> > > Bootstrapped on x86_64-pc-linux-gnu with no regressions.  pushed.
> >
> > Starting with this patch (upstream as
> > e8ae56a7dc46e39a48017bb5159e4dc672ec7fad, can still be reproduced with
> > 0c585c8d0dd85601a8d116ada99126a48c8ce9fd as of May 13th), my CI builds fail 
> > for
> > csky-elf in all-target-libgcc by falling into a loop infinite loop:

Does the CI build GCC for the host and then use that compiler to build
the csky cross?  That said,
I can't see how the ref (or wasn't this a bisect?) can cause an issue
in LRA when building a cross-compiler.

Richard.

> > ../gcc/configure '--with-pkgversion=basepoints/gcc-15-432-g0c585c8d0dd, 
> > built at 1715608899'  \
> >   --prefix=/tmp/gcc-csky-elf --enable-werror-always 
> > --enable-languages=all\
> >   --disable-gcov --disable-shared --disable-threads --target=csky-elf 
> > --without-headers
> > make V=1 all-gcc
> > make V=1 install-strip-gcc
> > make V=1 all-target-libgcc
>
> Just to add:
>
> /var/lib/laminar/run/gcc-csky-elf/65/toolchain-build/./gcc/cc1 -quiet 
>   \
> -I . -I . -I ../../.././gcc -I ../../../../gcc/libgcc 
>   \
> -I ../../../../gcc/libgcc/. -I ../../../../gcc/libgcc/../gcc  
>   \
> -I ../../../../gcc/libgcc/../include -imultilib ck801 
>   \
> -iprefix 
> /var/lib/laminar/run/gcc-csky-elf/65/toolchain-build/gcc/../lib/gcc/csky-elf/15.0.0/
>\
> -isystem 
> /var/lib/laminar/run/gcc-csky-elf/65/toolchain-build/./gcc/include
>  \
> -isystem 
> /var/lib/laminar/run/gcc-csky-elf/65/toolchain-build/./gcc/include-fixed  
>  \
> -MD unwind-dw2-fde.d -MF unwind-dw2-fde.dep -MP -MT unwind-dw2-fde.o  
>   \
> -D IN_GCC -D CROSS_DIRECTORY_STRUCTURE -D IN_LIBGCC2 -D inhibit_libc  
>   \
> -D HAVE_CC_TLS -D USE_EMUTLS -D HIDE_EXPORTS  
>   \
> -isystem /tmp/gcc-csky-elf/csky-elf/include   
>   \
> -isystem /tmp/gcc-csky-elf/csky-elf/sys-include   
>   \
> -isystem ./include ../../../../gcc/libgcc/unwind-dw2-fde.c -quiet 
>   \
> -dumpbase unwind-dw2-fde.c -dumpbase-ext .c -mcpu=ck801 -g -g -g -O2 
> -O2 -O2\
> -Wextra -Wall -Wno-narrowing -Wwrite-strings -Wcast-qual 
> -Wstrict-prototypes\
> -Wmissing-prototypes -Wold-style-definition -fbuilding-libgcc 
> -fno-stack-protector  \
> -fexceptions -fvisibility=hidden -o /tmp/cc3SHedS.s
>
> > (gdb) bt
> > #0  0x0098f1df in bitmap_list_find_element (head=0x38f2e18, 
> > indx=5001) at ../../gcc/gcc/bitmap.cc:375
> > #1  bitmap_set_bit (head=0x38f2e18, bit=640244) at 
> > ../../gcc/gcc/bitmap.cc:962
> > #2  0x00d39cd1 in process_bb_lives (bb=, 
> > curr_point=@0x7ffe062c1b2c: 3039473, dead_insn_p=) at 
> > ../../gcc/gcc/lra-lives.cc:889
> > #3  lra_create_live_ranges_1 (all_p=all_p@entry=true, 
> > dead_insn_p=) at ../../gcc/gcc/lra-lives.cc:1416
> > #4  0x00d3b810 in lra_create_live_ranges (all_p=all_p@entry=true, 
> > dead_insn_p=) at ../../gcc/gcc/lra-lives.cc:1486
> > #5  0x00d1a8bd in lra (f=, verbose=) 
> > at ../../gcc/gcc/lra.cc:2482
> > #6  0x00cd0e18 in do_reload () at ../../gcc/gcc/ira.cc:5973
> > #7  (anonymous namespace)::pass_reload::execute (this=) at 
> > ../../gcc/gcc/ira.cc:6161
> > #8  0x00de6368 in execute_one_pass (pass=pass@entry=0x367c490) at 
> > ../../gcc/gcc/passes.cc:2647
> > #9  0x00de6c00 in execute_pass_list_1 (pass=0x367c490) at 
> > ../../gcc/gcc/passes.cc:2756
> > #10 0x00de6c12 in execute_pass_list_1 (pass=0x367b2f0) at 
> > ../../gcc/gcc/passes.cc:2757
> > #11 0x00de6c39 in execute_pass_list (fn=0x7f24a1c06240, 
> > pass=) at ../../gcc/gcc/passes.cc:2767
> > #12 0x00a188c6 in cgraph_node::expand (this=0x7f24a1bfaaa0) at 
> > ../../gcc/gcc/context.h:48
> > #13 cgraph_node::expand (this=0x7f24a1bfaaa0) at 
> > ../../gcc/gcc/cgraphunit.cc:1798
> > #14 0x00a1a69b in expand_all_functions () at 
> > ../../gcc/gcc/cgraphunit.cc:2028
> > #15 symbol_table::compile (this=0x7f24a205b000) at 
> > ../../gcc/gcc/cgraphunit.cc:2404
> > #16 0x00a1ccb8 in symbol_table::compile (this=0x7f24a205b000) at 
> > ../../gcc/gcc/cgraphunit.cc:2315
> > #17 symbol_table::finalize_compilation_unit (this=0x7f24a205b000) at 
> > ../../gcc/gcc/cgraphunit.cc:2589
> > #18 0x00f0932d in compile_file () at ../../gcc/gcc/toplev.cc:476
> > #19 0x00839648 in do_compile () at ../../gcc/gcc/toplev.cc:2158
> > #20 toplev::main (this=this@

Re: [RFC][PATCH] PR tree-optimization/109071 - -Warray-bounds false positive warnings due to code duplication from jump threading

2024-05-14 Thread Richard Biener

On Mon, 13 May 2024, Qing Zhao wrote:

> -Warray-bounds is an important option to enable linux kernal to keep
> the array out-of-bound errors out of the source tree.
> 
> However, due to the false positive warnings reported in PR109071
> (-Warray-bounds false positive warnings due to code duplication from
> jump threading), -Warray-bounds=1 cannot be added on by default.
> 
> Although it's impossible to elinimate all the false positive warnings
> from -Warray-bounds=1 (See PR104355 Misleading -Warray-bounds
> documentation says "always out of bounds"), we should minimize the
> false positive warnings in -Warray-bounds=1.
> 
> The root reason for the false positive warnings reported in PR109071 is:
> 
> When the thread jump optimization tries to reduce the # of branches
> inside the routine, sometimes it needs to duplicate the code and
> split into two conditional pathes. for example:
> 
> The original code:
> 
> void sparx5_set (int * ptr, struct nums * sg, int index)
> {
>   if (index >= 4)
> warn ();
>   *ptr = 0;
>   *val = sg->vals[index];
>   if (index >= 4)
> warn ();
>   *ptr = *val;
> 
>   return;
> }
> 
> With the thread jump, the above becomes:
> 
> void sparx5_set (int * ptr, struct nums * sg, int index)
> {
>   if (index >= 4)
> {
>   warn ();
>   *ptr = 0;   // Code duplications since "warn" does return;
>   *val = sg->vals[index]; // same this line.
>   // In this path, since it's under the condition
>   // "index >= 4", the compiler knows the value
>   // of "index" is larger then 4, therefore the
>   // out-of-bound warning.
>   warn ();
> }
>   else
> {
>   *ptr = 0;
>   *val = sg->vals[index];
> }
>   *ptr = *val;
>   return;
> }
> 
> We can see, after the thread jump optimization, the # of branches inside
> the routine "sparx5_set" is reduced from 2 to 1, however,  due to the
> code duplication (which is needed for the correctness of the code), we
> got a false positive out-of-bound warning.
> 
> In order to eliminate such false positive out-of-bound warning,
> 
> A. Add one more flag for GIMPLE: is_splitted.
> B. During the thread jump optimization, when the basic blocks are
>duplicated, mark all the STMTs inside the original and duplicated
>basic blocks as "is_splitted";
> C. Inside the array bound checker, add the following new heuristic:
> 
> If
>1. the stmt is duplicated and splitted into two conditional paths;
> +  2. the warning level < 2;
> +  3. the current block is not dominating the exit block
> Then not report the warning.
> 
> The false positive warnings are moved from -Warray-bounds=1 to
>  -Warray-bounds=2 now.
> 
> Bootstrapped and regression tested on both x86 and aarch64. adjusted
>  -Warray-bounds-61.c due to the false positive warnings.
> 
> Let me know if you have any comments and suggestions.

At the last Cauldron I talked with David Malcolm about these kind of
issues and thought of instead of suppressing diagnostics to record
how a block was duplicated.  For jump threading my idea was to record
the condition that was proved true when entering the path and do this
by recording the corresponding locations so that in the end we can
use the diagnostic-path infrastructure to say

warning: array index always above array bounds
events 1:

| 3 |  if (index >= 4)
 |
(1) when index >= 4

it would be possible to record the info as part of the ad-hoc
location data on each duplicated stmt or, possibly simpler,
as part of a debug stmt of new kind.

I'm not sure pruning the warnings is a good thing to do.  One
would argue we should instead isolate such path as unreachable
since it invokes undefined behavior.  In particular your
example is clearly a bug and should be diagnosed.

Note very similar issues happen when unrolling a loop.

Note all late diagnostics are prone to these kind of issues.

Richard.

> Thanks.
> 
> Qing
> 
> 
>   PR tree optimization/109071
> 
> gcc/ChangeLog:
> 
>   * gimple-array-bounds.cc (check_out_of_bounds_and_warn): Add two new
>   arguments for the new heuristic to not issue warnings.
>   (array_bounds_checker::check_array_ref): Call the new prototype of the
>   routine check_out_of_bounds_and_warn.
>   (array_bounds_checker::check_mem_ref): Add one new argument for the
>   new heuristic to not issue warnings.
>   (array_bounds_checker::check_addr_expr): Call the new prototype of the
>   routine check_mem_ref, add new heuristic for not issue warnings.
>   (array_bounds_checker::check_array_bounds): Call the new prototype of
>   the routine check_mem_ref.
>   * gimple-array-bounds.h: New prototype of check_mem_ref.
>   * gimple.h (struct GTY): Add one new flag is_splitted for gimple.
>   (gimple_is_splitted_p): New function.
>   (gimple_set_is_splitted): New function.
>   * tree-ssa-threadupdat

Re: [PATCH v4 1/3] Internal-fn: Support new IFN SAT_ADD for unsigned scalar int

2024-05-14 Thread Richard Biener

On Mon, May 6, 2024 at 4:48 PM  wrote:
>
> From: Pan Li 
>
> This patch would like to add the middle-end presentation for the
> saturation add.  Aka set the result of add to the max when overflow.
> It will take the pattern similar as below.
>
> SAT_ADD (x, y) => (x + y) | (-(TYPE)((TYPE)(x + y) < x))
>
> Take uint8_t as example, we will have:
>
> * SAT_ADD (1, 254)   => 255.
> * SAT_ADD (1, 255)   => 255.
> * SAT_ADD (2, 255)   => 255.
> * SAT_ADD (255, 255) => 255.
>
> Given below example for the unsigned scalar integer uint64_t:
>
> uint64_t sat_add_u64 (uint64_t x, uint64_t y)
> {
>   return (x + y) | (- (uint64_t)((uint64_t)(x + y) < x));
> }
>
> Before this patch:
> uint64_t sat_add_uint64_t (uint64_t x, uint64_t y)
> {
>   long unsigned int _1;
>   _Bool _2;
>   long unsigned int _3;
>   long unsigned int _4;
>   uint64_t _7;
>   long unsigned int _10;
>   __complex__ long unsigned int _11;
>
> ;;   basic block 2, loop depth 0
> ;;pred:   ENTRY
>   _11 = .ADD_OVERFLOW (x_5(D), y_6(D));
>   _1 = REALPART_EXPR <_11>;
>   _10 = IMAGPART_EXPR <_11>;
>   _2 = _10 != 0;
>   _3 = (long unsigned int) _2;
>   _4 = -_3;
>   _7 = _1 | _4;
>   return _7;
> ;;succ:   EXIT
>
> }
>
> After this patch:
> uint64_t sat_add_uint64_t (uint64_t x, uint64_t y)
> {
>   uint64_t _7;
>
> ;;   basic block 2, loop depth 0
> ;;pred:   ENTRY
>   _7 = .SAT_ADD (x_5(D), y_6(D)); [tail call]
>   return _7;
> ;;succ:   EXIT
> }
>
> We perform the tranform during widen_mult because that the sub-expr of
> SAT_ADD will be optimized to .ADD_OVERFLOW.  We need to try the .SAT_ADD
> pattern first and then .ADD_OVERFLOW,  or we may never catch the pattern
> .SAT_ADD.  Meanwhile, the isel pass is after widen_mult and then we
> cannot perform the .SAT_ADD pattern match as the sub-expr will be
> optmized to .ADD_OVERFLOW first.
>
> The below tests are passed for this patch:
> 1. The riscv fully regression tests.
> 2. The aarch64 fully regression tests.
> 3. The x86 bootstrap tests.
> 4. The x86 fully regression tests.
>
> PR target/51492
> PR target/112600
>
> gcc/ChangeLog:
>
> * internal-fn.cc (commutative_binary_fn_p): Add type IFN_SAT_ADD
> to the return true switch case(s).
> * internal-fn.def (SAT_ADD):  Add new signed optab SAT_ADD.
> * match.pd: Add unsigned SAT_ADD match.
> * optabs.def (OPTAB_NL): Remove fixed-point limitation for us/ssadd.
> * tree-ssa-math-opts.cc (gimple_unsigned_integer_sat_add): New extern
> func decl generated in match.pd match.
> (match_saturation_arith): New func impl to match the saturation arith.
> (math_opts_dom_walker::after_dom_children): Try match saturation
> arith.
>
> Signed-off-by: Pan Li 
> ---
>  gcc/internal-fn.cc|  1 +
>  gcc/internal-fn.def   |  2 ++
>  gcc/match.pd  | 28 
>  gcc/optabs.def|  4 ++--
>  gcc/tree-ssa-math-opts.cc | 46 +++
>  5 files changed, 79 insertions(+), 2 deletions(-)
>
> diff --git a/gcc/internal-fn.cc b/gcc/internal-fn.cc
> index 0a7053c2286..73045ca8c8c 100644
> --- a/gcc/internal-fn.cc
> +++ b/gcc/internal-fn.cc
> @@ -4202,6 +4202,7 @@ commutative_binary_fn_p (internal_fn fn)
>  case IFN_UBSAN_CHECK_MUL:
>  case IFN_ADD_OVERFLOW:
>  case IFN_MUL_OVERFLOW:
> +case IFN_SAT_ADD:
>  case IFN_VEC_WIDEN_PLUS:
>  case IFN_VEC_WIDEN_PLUS_LO:
>  case IFN_VEC_WIDEN_PLUS_HI:
> diff --git a/gcc/internal-fn.def b/gcc/internal-fn.def
> index 848bb9dbff3..25badbb86e5 100644
> --- a/gcc/internal-fn.def
> +++ b/gcc/internal-fn.def
> @@ -275,6 +275,8 @@ DEF_INTERNAL_SIGNED_OPTAB_FN (MULHS, ECF_CONST | 
> ECF_NOTHROW, first,
>  DEF_INTERNAL_SIGNED_OPTAB_FN (MULHRS, ECF_CONST | ECF_NOTHROW, first,
>   smulhrs, umulhrs, binary)
>
> +DEF_INTERNAL_SIGNED_OPTAB_FN (SAT_ADD, ECF_CONST, first, ssadd, usadd, 
> binary)
> +
>  DEF_INTERNAL_COND_FN (ADD, ECF_CONST, add, binary)
>  DEF_INTERNAL_COND_FN (SUB, ECF_CONST, sub, binary)
>  DEF_INTERNAL_COND_FN (MUL, ECF_CONST, smul, binary)
> diff --git a/gcc/match.pd b/gcc/match.pd
> index d401e7503e6..7058e4cbe29 100644
> --- a/gcc/match.pd
> +++ b/gcc/match.pd
> @@ -3043,6 +3043,34 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
> || POINTER_TYPE_P (itype))
>&& wi::eq_p (wi::to_wide (int_cst), wi::max_value (itype))
>
> +/* Unsigned Saturation Add */
> +(match (usadd_left_part @0 @1)
> + (plus:c @0 @1)
> + (if (INTEGRAL_TYPE_P (type)
> +  && TYPE_UNSIGNED (TREE_TYPE (@0))
> +  && types_match (type, TREE_TYPE (@0))
> +  && types_match (type, TREE_TYPE (@1)
> +
> +(match (usadd_right_part @0 @1)
> + (negate (convert (lt (plus:c @0 @1) @0)))
> + (if (INTEGRAL_TYPE_P (type)
> +  && TYPE_UNSIGNED (TREE_TYPE (@0))
> +  && types_match (type, TREE_TYPE (@0))
> +  && types_match (type, TREE_TYPE (@1)
> +
> +(match (usadd_right_part @0 @1)
>

Re: [PATCH v4 2/3] VECT: Support new IFN SAT_ADD for unsigned vector int

2024-05-14 Thread Richard Biener

On Mon, May 6, 2024 at 4:49 PM  wrote:
>
> From: Pan Li 
>
> This patch depends on below scalar enabling patch:
>
> https://gcc.gnu.org/pipermail/gcc-patches/2024-May/650822.html
>
> For vectorize, we leverage the existing vect pattern recog to find
> the pattern similar to scalar and let the vectorizer to perform
> the rest part for standard name usadd3 in vector mode.
> The riscv vector backend have insn "Vector Single-Width Saturating
> Add and Subtract" which can be leveraged when expand the usadd3
> in vector mode.  For example:
>
> void vec_sat_add_u64 (uint64_t *out, uint64_t *x, uint64_t *y, unsigned n)
> {
>   unsigned i;
>
>   for (i = 0; i < n; i++)
> out[i] = (x[i] + y[i]) | (- (uint64_t)((uint64_t)(x[i] + y[i]) < x[i]));
> }
>
> Before this patch:
> void vec_sat_add_u64 (uint64_t *out, uint64_t *x, uint64_t *y, unsigned n)
> {
>   ...
>   _80 = .SELECT_VL (ivtmp_78, POLY_INT_CST [2, 2]);
>   ivtmp_58 = _80 * 8;
>   vect__4.7_61 = .MASK_LEN_LOAD (vectp_x.5_59, 64B, { -1, ... }, _80, 0);
>   vect__6.10_65 = .MASK_LEN_LOAD (vectp_y.8_63, 64B, { -1, ... }, _80, 0);
>   vect__7.11_66 = vect__4.7_61 + vect__6.10_65;
>   mask__8.12_67 = vect__4.7_61 > vect__7.11_66;
>   vect__12.15_72 = .VCOND_MASK (mask__8.12_67, { 18446744073709551615, ... }, 
> vect__7.11_66);
>   .MASK_LEN_STORE (vectp_out.16_74, 64B, { -1, ... }, _80, 0, vect__12.15_72);
>   vectp_x.5_60 = vectp_x.5_59 + ivtmp_58;
>   vectp_y.8_64 = vectp_y.8_63 + ivtmp_58;
>   vectp_out.16_75 = vectp_out.16_74 + ivtmp_58;
>   ivtmp_79 = ivtmp_78 - _80;
>   ...
> }
>
> After this patch:
> void vec_sat_add_u64 (uint64_t *out, uint64_t *x, uint64_t *y, unsigned n)
> {
>   ...
>   _62 = .SELECT_VL (ivtmp_60, POLY_INT_CST [2, 2]);
>   ivtmp_46 = _62 * 8;
>   vect__4.7_49 = .MASK_LEN_LOAD (vectp_x.5_47, 64B, { -1, ... }, _62, 0);
>   vect__6.10_53 = .MASK_LEN_LOAD (vectp_y.8_51, 64B, { -1, ... }, _62, 0);
>   vect__12.11_54 = .SAT_ADD (vect__4.7_49, vect__6.10_53);
>   .MASK_LEN_STORE (vectp_out.12_56, 64B, { -1, ... }, _62, 0, vect__12.11_54);
>   ...
> }
>
> The below test suites are passed for this patch.
> * The riscv fully regression tests.
> * The aarch64 fully regression tests.
> * The x86 bootstrap tests.
> * The x86 fully regression tests.
>
> PR target/51492
> PR target/112600
>
> gcc/ChangeLog:
>
> * tree-vect-patterns.cc (gimple_unsigned_integer_sat_add): New func
> decl generated by match.pd match.
> (vect_recog_sat_add_pattern): New func impl to recog the pattern
> for unsigned SAT_ADD.
>
> Signed-off-by: Pan Li 
> ---
>  gcc/tree-vect-patterns.cc | 51 +++
>  1 file changed, 51 insertions(+)
>
> diff --git a/gcc/tree-vect-patterns.cc b/gcc/tree-vect-patterns.cc
> index 87c2acff386..8ffcaf71d5c 100644
> --- a/gcc/tree-vect-patterns.cc
> +++ b/gcc/tree-vect-patterns.cc
> @@ -4487,6 +4487,56 @@ vect_recog_mult_pattern (vec_info *vinfo,
>return pattern_stmt;
>  }
>
> +extern bool gimple_unsigned_integer_sat_add (tree, tree*, tree (*)(tree));
> +
> +/*
> + * Try to detect saturation add pattern (SAT_ADD), aka below gimple:
> + *   _7 = _4 + _6;
> + *   _8 = _4 > _7;
> + *   _9 = (long unsigned int) _8;
> + *   _10 = -_9;
> + *   _12 = _7 | _10;
> + *
> + * And then simplied to
> + *   _12 = .SAT_ADD (_4, _6);
> + */
> +
> +static gimple *
> +vect_recog_sat_add_pattern (vec_info *vinfo, stmt_vec_info stmt_vinfo,
> +   tree *type_out)
> +{
> +  gimple *last_stmt = STMT_VINFO_STMT (stmt_vinfo);
> +
> +  if (!is_gimple_assign (last_stmt))
> +return NULL;
> +
> +  tree res_ops[2];
> +  tree lhs = gimple_assign_lhs (last_stmt);
> +
> +  if (gimple_unsigned_integer_sat_add (lhs, res_ops, NULL))
> +{
> +  tree itype = TREE_TYPE (res_ops[0]);
> +  tree vtype = get_vectype_for_scalar_type (vinfo, itype);
> +
> +  if (vtype != NULL_TREE && direct_internal_fn_supported_p (
> +   IFN_SAT_ADD, vtype, OPTIMIZE_FOR_SPEED))

Please break the line before the && instead, like

  if (vtype != NULL_TREE
  && direct_internal_fn_supported_p (...

Otherwise this is OK once 1/3 is approved.

Thanks,
Richard.

> +   {
> + *type_out = vtype;
> + gcall *call = gimple_build_call_internal (IFN_SAT_ADD, 2, 
> res_ops[0],
> +   res_ops[1]);
> +
> + gimple_call_set_lhs (call, vect_recog_temp_ssa_var (itype, NULL));
> + gimple_call_set_nothrow (call, /* nothrow_p */ false);
> + gimple_set_location (call, gimple_location (last_stmt));
> +
> + vect_pattern_detected ("vect_recog_sat_add_pattern", last_stmt);
> + return call;
> +   }
> +}
> +
> +  return NULL;
> +}
> +
>  /* Detect a signed division by a constant that wouldn't be
> otherwise vectorized:
>
> @@ -6987,6 +7037,7 @@ static vect_recog_func vect_vect_recog_func_ptrs[] = {
>{ vect_recog_vector_vector_shift_pattern, "vector_vector_shift" },
>{ vect_reco

Re: [r15-429 Regression] FAIL: experimental/simd/pr109261_constexpr_simd.cc -msse2 -O2 -Wno-psabi (test for excess errors) on Linux/x86_64

2024-05-14 Thread Matthias Kretz

Thanks for the report. But I'm unable to reproduce the issue. I'm testing on a 
Skylake-AVX512 system. I even did a clean rebuild of all of GCC using your 
configuration (minus your prefix) and still no failure.

Could you please send me your libstdc++.log after failing the test?

Best,
  Matthias

On Montag, 13. Mai 2024 18:55:13 MESZ haochen. jiang wrote:
> On Linux/x86_64,
> 
> fb1649f8b4ad5043dd0e65e4e3a643a0ced018a9 is the first bad commit
> commit fb1649f8b4ad5043dd0e65e4e3a643a0ced018a9
> Author: Matthias Kretz 
> Date:   Mon May 6 12:13:55 2024 +0200
> 
> libstdc++: Use __builtin_shufflevector for simd split and concat
> 
> caused
> 
> FAIL: experimental/simd/pr109261_constexpr_simd.cc -msse2 -O2 -Wno-psabi
> (test for excess errors)
> 
> with GCC configured with
> 
> ../../gcc/configure
> --prefix=/export/users/haochenj/src/gcc-bisect/master/master/r15-429/usr
> --enable-clocale=gnu --with-system-zlib --with-demangler-in-ld
> --with-fpmath=sse --enable-languages=c,c++,fortran --enable-cet
> --without-isl --enable-libmpx x86_64-linux --disable-bootstrap
> 
> To reproduce:
> 
> $ cd {build_dir}/x86_64-linux/libstdc++-v3/testsuite && make check
> RUNTESTFLAGS="conformance.exp=experimental/simd/pr109261_constexpr_simd.cc
> --target_board='unix{-m32}'"
> 
> (Please do not reply to this email, for question about this report, contact
> me at haochen dot jiang at intel.com.) (If you met problems with
> cascadelake related, disabling AVX512F in command line might save that.)
> (However, please make sure that there is no potential problems with
> AVX512.)


-- 
──
 Dr. Matthias Kretz   https://mattkretz.github.io
 GSI Helmholtz Center for Heavy Ion Research   https://gsi.de
 std::simd
──

Re: Ping [PATCH/RFC] target, hooks: Allow a target to trap on unreachable [PR109267].

2024-05-14 Thread Richard Biener

On Wed, May 8, 2024 at 9:37 PM Iain Sandoe  wrote:
>
> Hi Folks,
>
> I’d like to land a viable solution to this issue if possible, (it is a show-
> stopper for the aarch64-darwin development branch).

I was looking as to how we handle __builtin_trap (whether we have an
optab for it) - we seem to use two target hooks, have_trap () and
gen_trap () to expand it (and fall back to a call to abort()).  So I guess
your target hook is reasonable though I'd name it
expand_unreachable_as_trap maybe (well, that's now bikeshedding).

Is this all still required or is there a workaround you can apply at
mdreorg or bb-reorder time to avoid expanding _all_ unreachable()s
as traps?

> > On 9 Apr 2024, at 14:55, Iain Sandoe  wrote:
> >
> > So far, tested lightly on aarch64-darwin; if this is acceptable then
> > it will be possible to back out of the ad hoc fixes used on x86 and
> > powerpc darwin.
> > Comments welcome, thanks,
>
> @Andrew - you were also (at one stage) talking about some ideas about
> how to handle this is in the middle end.
> Is that something you are likely to have time to do?
> Would it still be reasonable to have a target hook to control the behaviour.
> (the implementation below allows one to make the effect per TU)
>
>
> > Iain
> >
> > --- 8< ---
> >
> >
> > In the PR cited case a target linker cannot handle enpty FDEs,
> > arguably this is a linker bug - but in some cases we might still
> > wish to work around it.
> >
> > In the case of Darwin, the ABI does not allow two global symbols
> > to have the same address, so that emitting empty functions has
> > potential (almost guarantee) to break ABI.
> >
> > This patch allows a target to ask that __builtin_unreachable is
> > expanded in the same way as __builtin_trap (either to a trap
> > instruction or to abort() if there is no such insn).
> >
> > This means that the middle end's use of unreachability for
> > optimisation should not be altered.
> >
> > __builtin_unreachble is currently expanded to a barrier and
> > __builtin_trap is expanded to a trap insn + a barrier so that it
> > seems we should not be unduly affecting RTL optimisations.
> >
> > For Darwin, we enable this by default, but allow it to be disabled
> > per TU using -mno-unreachable-traps.
> >
> >   PR middle-end/109267
> >
> > gcc/ChangeLog:
> >
> >   * builtins.cc (expand_builtin_unreachable): Allow for
> >   a target to expand this as a trap.
> >   * config/darwin-protos.h (darwin_unreachable_traps_p): New.
> >   * config/darwin.cc (darwin_unreachable_traps_p): New.
> >   * config/darwin.h (TARGET_UNREACHABLE_SHOULD_TRAP): New.
> >   * config/darwin.opt (munreachable-traps): New.
> >   * doc/invoke.texi: Document -munreachable-traps.
> >   * doc/tm.texi: Regenerate.
> >   * doc/tm.texi.in: Document TARGET_UNREACHABLE_SHOULD_TRAP.
> >   * target.def (TARGET_UNREACHABLE_SHOULD_TRAP): New hook.
> >
> > Signed-off-by: Iain Sandoe 
> > ---
> > gcc/builtins.cc|  7 +++
> > gcc/config/darwin-protos.h |  1 +
> > gcc/config/darwin.cc   |  7 +++
> > gcc/config/darwin.h|  4 
> > gcc/config/darwin.opt  |  4 
> > gcc/doc/invoke.texi|  7 ++-
> > gcc/doc/tm.texi|  5 +
> > gcc/doc/tm.texi.in |  2 ++
> > gcc/target.def | 10 ++
> > 9 files changed, 46 insertions(+), 1 deletion(-)
> >
> > diff --git a/gcc/builtins.cc b/gcc/builtins.cc
> > index f8d94c4b435..13f321b6be6 100644
> > --- a/gcc/builtins.cc
> > +++ b/gcc/builtins.cc
> > @@ -5929,6 +5929,13 @@ expand_builtin_trap (void)
> > static void
> > expand_builtin_unreachable (void)
> > {
> > +  /* If the target wants a trap in place of the fall-through, use that.  */
> > +  if (targetm.unreachable_should_trap ())
> > +{
> > +  expand_builtin_trap ();
> > +  return;
> > +}
> > +
> >   /* Use gimple_build_builtin_unreachable or builtin_decl_unreachable
> >  to avoid this.  */
> >   gcc_checking_assert (!sanitize_flags_p (SANITIZE_UNREACHABLE));
> > diff --git a/gcc/config/darwin-protos.h b/gcc/config/darwin-protos.h
> > index b67e05264e1..48a32b2ccc2 100644
> > --- a/gcc/config/darwin-protos.h
> > +++ b/gcc/config/darwin-protos.h
> > @@ -124,6 +124,7 @@ extern void darwin_enter_string_into_cfstring_table 
> > (tree);
> > extern void darwin_asm_output_anchor (rtx symbol);
> > extern bool darwin_use_anchors_for_symbol_p (const_rtx symbol);
> > extern bool darwin_kextabi_p (void);
> > +extern bool darwin_unreachable_traps_p (void);
> > extern void darwin_override_options (void);
> > extern void darwin_patch_builtins (void);
> > extern void darwin_rename_builtins (void);
> > diff --git a/gcc/config/darwin.cc b/gcc/config/darwin.cc
> > index dcfccb4952a..018547d09c6 100644
> > --- a/gcc/config/darwin.cc
> > +++ b/gcc/config/darwin.cc
> > @@ -3339,6 +3339,13 @@ darwin_kextabi_p (void) {
> >   return flag_apple_kext;
> > }
> >
> > +/* True, iff we want to map __builtin_unreachable to a trap.  */
> > +

[committed] libstdc++: Document when std::string::shrink_to_fit was added

2024-05-14 Thread Jonathan Wakely

Pushed to trunk.

-- >8 --

This section can be misread to say that shrink_to_fit is available from
GCC 3.4, but it was added later.

libstdc++-v3/ChangeLog:

* doc/xml/manual/strings.xml: Clarify that GCC 4.5 added
std::string::shrink_to_fit.
* doc/html/manual/strings.html: Regenerate.
---
 libstdc++-v3/doc/html/manual/strings.html | 4 ++--
 libstdc++-v3/doc/xml/manual/strings.xml   | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/libstdc++-v3/doc/html/manual/strings.html 
b/libstdc++-v3/doc/html/manual/strings.html
index ceb09f97eac..34a34dfa980 100644
--- a/libstdc++-v3/doc/html/manual/strings.html
+++ b/libstdc++-v3/doc/html/manual/strings.html
@@ -269,8 +269,8 @@ stringtok(Container &container, string const &in,
   (see this FAQ
   entry) but the regular copy constructor cannot be used
   because libstdc++'s string is Copy-On-Write in 
GCC 3.
-   In C++11 mode you can call
-  s.shrink_to_fit() to achieve the same effect as
+   From GCC 4.5 in C++11 mode you
+  can call s.shrink_to_fit() to achieve the same 
effect as
   s.reserve(s.size()).
CString 
(MFC)
 A common lament seen in various newsgroups deals with the Standard
diff --git a/libstdc++-v3/doc/xml/manual/strings.xml 
b/libstdc++-v3/doc/xml/manual/strings.xml
index b0dab645a2d..4a63dd96477 100644
--- a/libstdc++-v3/doc/xml/manual/strings.xml
+++ b/libstdc++-v3/doc/xml/manual/strings.xml
@@ -356,8 +356,8 @@ stringtok(Container &container, string const &in,
   entry) but the regular copy constructor cannot be used
   because libstdc++'s string is Copy-On-Write in GCC 3.

-   In C++11 mode you can call
-  s.shrink_to_fit() to achieve the same effect as
+   From GCC 4.5 in C++11 mode you
+  can call s.shrink_to_fit() to achieve the same effect as
   s.reserve(s.size()).

 
-- 
2.44.0

[PATCH] [testsuite] Fix gcc.dg/pr115066.c fail on aarch64

2024-05-14 Thread Tom de Vries

On aarch64, I get this failure:
...
FAIL: gcc.dg/pr115066.c scan-assembler \\.byte\\t0xb\\t# Define macro strx
...

This happens because we expect to match:
...
.byte   0xb # Define macro strx
...
but instead we get:
...
.byte   0xb // Define macro strx
...

Fix this by not explicitly matching the comment marker.

Tested on aarch64 and x86_64.

gcc/testsuite/ChangeLog:

2024-05-14  Tom de Vries  

* gcc.dg/pr115066.c: Don't match comment marker.
---
 gcc/testsuite/gcc.dg/pr115066.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/gcc/testsuite/gcc.dg/pr115066.c b/gcc/testsuite/gcc.dg/pr115066.c
index 645757df209..a7e98500160 100644
--- a/gcc/testsuite/gcc.dg/pr115066.c
+++ b/gcc/testsuite/gcc.dg/pr115066.c
@@ -2,7 +2,7 @@
 /* { dg-skip-if "split DWARF unsupported" { hppa*-*-hpux* powerpc*-ibm-aix* 
*-*-darwin* } } */
 /* { dg-options "-gsplit-dwarf -g3 -dA -gdwarf-4" } */
 /* { dg-final { scan-assembler-times {\.section\t"?\.debug_macro} 1 } } */
-/* { dg-final { scan-assembler-not {\.byte\t0x5\t# Define macro strp} } } */
-/* { dg-final { scan-assembler {\.byte\t0xb\t# Define macro strx} } } */
+/* { dg-final { scan-assembler-not {\.byte\t0x5\t.* Define macro strp} } } */
+/* { dg-final { scan-assembler {\.byte\t0xb\t.* Define macro strx} } } */
 
 #define foo 1

base-commit: b7003b4cc5e263343f047fe64ed1ae12f561b2d1
-- 
2.35.3

[PATCH v2] c++: ICE with reference NSDMI [PR114854]

2024-05-14 Thread Marek Polacek

On Thu, May 09, 2024 at 03:47:54PM -0400, Jason Merrill wrote:
> On 5/9/24 12:04, Marek Polacek wrote:
> > Bootstrapped/regtested on x86_64-pc-linux-gnu, ok for trunk?
> > 
> > -- >8 --
> > Here we crash on a cp_gimplify_expr/TARGET_EXPR assert:
> > 
> >/* A TARGET_EXPR that expresses direct-initialization should have 
> > been
> >   elided by cp_gimplify_init_expr.  */
> >gcc_checking_assert (!TARGET_EXPR_DIRECT_INIT_P (*expr_p));
> > 
> > the TARGET_EXPR in question is created for the NSDMI in:
> > 
> >class Vector { int m_size; };
> >struct S {
> >  const Vector &vec{};
> >};
> > 
> > where we first need to create a Vector{} temporary, and then bind the
> > vec reference to it.  The temporary is represented by a TARGET_EXPR
> > and it cannot be elided.  When we create an object of type S, we get
> > 
> >D.2848 = {.vec=(const struct Vector &) &TARGET_EXPR  > {.m_size=0}>}
> > 
> > where the TARGET_EXPR is no longer direct-initializing anything.
> 
> Seems like the problem is in convert_like_internal:
> 
> > bool direct = CONSTRUCTOR_IS_DIRECT_INIT (expr);
> > if (abstract_virtuals_error (NULL_TREE, totype, complain))
> >   return error_mark_node;
> > expr = build_value_init (totype, complain);
> > expr = get_target_expr (expr, complain);
> > if (expr != error_mark_node)
> >   {
> > TARGET_EXPR_LIST_INIT_P (expr) = true;
> > =>  TARGET_EXPR_DIRECT_INIT_P (expr) = direct;
> >   }
> 
> My patch for 50930 assumed that if a CONSTRUCTOR represents syntactic
> direct-initialization, a resulting TARGET_EXPR is itself the direct
> initializer, but that isn't the case here; the temporary is
> copy-initialized.
> 
> We could calculate direct-initializanity from cand->flags, but perhaps we
> can just stop trying to set TARGET_EXPR_DIRECT_INIT_P here at all? We don't
> do that for other list-initialization in ck_user, I don't know why I thought
> it was needed for {} specifically.  It doesn't seem to be needed for the
> 50930 testcase.

...and it doesn't seem to be needed for any other test.  Then not setting
the flag in the first place is better than resetting it later, sure.

I thought about leaving a gcc_checking_assert (!TARGET_EXPR_DIRECT_INIT_P)
in cp_build_addr_expr_1, but since we already have that assert when
gimplifying, I dropped it.

Bootstrapped/regtested on x86_64-pc-linux-gnu, ok for trunk/branches?

-- >8 --
Here we crash on a cp_gimplify_expr/TARGET_EXPR assert:

  /* A TARGET_EXPR that expresses direct-initialization should have been
 elided by cp_gimplify_init_expr.  */
  gcc_checking_assert (!TARGET_EXPR_DIRECT_INIT_P (*expr_p));

the TARGET_EXPR in question is created for the NSDMI in:

  class Vector { int m_size; };
  struct S {
const Vector &vec{};
  };

where we first need to create a Vector{} temporary, and then bind the
vec reference to it.  The temporary is represented by a TARGET_EXPR
and it cannot be elided.  When we create an object of type S, we get

  D.2848 = {.vec=(const struct Vector &) &TARGET_EXPR }

where the TARGET_EXPR is no longer direct-initializing anything.

Fixed by not setting TARGET_EXPR_DIRECT_INIT_P in convert_like_internal/ck_user.

PR c++/114854

gcc/cp/ChangeLog:

* call.cc (convert_like_internal) : Don't set
TARGET_EXPR_DIRECT_INIT_P.

gcc/testsuite/ChangeLog:

* g++.dg/cpp1y/nsdmi-aggr22.C: New test.
---
 gcc/cp/call.cc|  6 +-
 gcc/testsuite/g++.dg/cpp1y/nsdmi-aggr22.C | 12 
 2 files changed, 13 insertions(+), 5 deletions(-)
 create mode 100644 gcc/testsuite/g++.dg/cpp1y/nsdmi-aggr22.C

diff --git a/gcc/cp/call.cc b/gcc/cp/call.cc
index e058da7735f..ed68eb3c568 100644
--- a/gcc/cp/call.cc
+++ b/gcc/cp/call.cc
@@ -8597,16 +8597,12 @@ convert_like_internal (conversion *convs, tree expr, 
tree fn, int argnum,
&& TYPE_HAS_DEFAULT_CONSTRUCTOR (totype)
&& !processing_template_decl)
  {
-   bool direct = CONSTRUCTOR_IS_DIRECT_INIT (expr);
if (abstract_virtuals_error (NULL_TREE, totype, complain))
  return error_mark_node;
expr = build_value_init (totype, complain);
expr = get_target_expr (expr, complain);
if (expr != error_mark_node)
- {
-   TARGET_EXPR_LIST_INIT_P (expr) = true;
-   TARGET_EXPR_DIRECT_INIT_P (expr) = direct;
- }
+ TARGET_EXPR_LIST_INIT_P (expr) = true;
return expr;
  }
 
diff --git a/gcc/testsuite/g++.dg/cpp1y/nsdmi-aggr22.C 
b/gcc/testsuite/g++.dg/cpp1y/nsdmi-aggr22.C
new file mode 100644
index 000..a4f9ae19ca9
--- /dev/null
+++ b/gcc/testsuite/g++.dg/cpp1y/nsdmi-aggr22.C
@@ -0,0 +1,12 @@
+// PR c++/114854
+// { dg-do compile { target c++14 } }
+
+struct Vector {
+  int m_size;
+};
+struct S {
+  const Vecto

[committed] libstdc++: Guard dynamic_cast use in src/c++23/print.cc [PR115015]

2024-05-14 Thread Jonathan Wakely

Tested x86_64-linux, x86_64-w64-mingw32. Pushed to trunk. Backport to
gcc-14 to follow.

-- >8 --

Do not use dynamic_cast unconditionally, in case libstdc++ is built with
-fno-rtti.

libstdc++-v3/ChangeLog:

PR libstdc++/115015
* src/c++23/print.cc (__open_terminal(streambuf*)) [!__cpp_rtti]:
Do not use dynamic_cast.
---
 libstdc++-v3/src/c++23/print.cc | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/libstdc++-v3/src/c++23/print.cc b/libstdc++-v3/src/c++23/print.cc
index aceca6f9139..99a19cd4500 100644
--- a/libstdc++-v3/src/c++23/print.cc
+++ b/libstdc++-v3/src/c++23/print.cc
@@ -87,7 +87,7 @@ namespace
   void*
   __open_terminal(std::streambuf* sb)
   {
-#ifndef _GLIBCXX_USE_STDIO_PURE
+#if ! defined _GLIBCXX_USE_STDIO_PURE && defined __cpp_rtti
 using namespace __gnu_cxx;
 
 if (auto fb = dynamic_cast*>(sb))
-- 
2.44.0

Re: [PATCH] testsuite: analyzer: Fix fd-glibc-byte-stream-connection-server.c on Solaris [PR107750]

2024-05-14 Thread David Malcolm

On Tue, 2024-05-14 at 14:32 +0200, Rainer Orth wrote:
> gcc.dg/analyzer/fd-glibc-byte-stream-connection-server.c currently
> FAILs
> on Solaris:
> 
> FAIL: gcc.dg/analyzer/fd-glibc-byte-stream-connection-server.c (test
> for excess errors)
> 
> Excess errors:
> /vol/gcc/src/hg/master/local/gcc/testsuite/gcc.dg/analyzer/fd-glibc-
> byte-stream-connection-server.c:91:3: error: implicit declaration of
> function 'memset' [-Wimplicit-function-declaration]
> 
> Solaris  has
> 
> #define FD_ZERO(__p)    (void) memset((__p), 0, sizeof (*(__p))) 
> 
> but no declaration of memset.  While one can argue that this should
> be
> fixed, it's easy enough to just include  instead, which is
> what this patch does.
> 
> Tested on i386-pc-solaris2.11 and i686-pc-linux-gnu.
> 
> Ok for trunk?

Yes, thanks.

Dave

[PING] [contrib] validate_failures.py: fix python 3.12 escape sequence warnings

2024-05-14 Thread Gabi Falk

Hi,

This one still needs review:

https://inbox.sourceware.org/gcc-patches/20240415233833.104460-1-gabif...@gmx.com/

--
gabi

Re: [PATCH v5 1/5] Improve must tail in RTL backend

2024-05-14 Thread Richard Biener

On Sun, May 5, 2024 at 8:16 PM Andi Kleen  wrote:
>
> - Give error messages for all causes of non sibling call generation
> - Don't override choices of other non sibling call checks with
> must tail. This causes ICEs. The must tail attribute now only
> overrides flag_optimize_sibling_calls locally.
> - Error out when tree-tailcall failed to mark a must-tail call
> sibcall. In this case it doesn't know the true reason and only gives
> a vague message (this could be improved, but it's already useful without
> that) tree-tailcall usually fails without optimization, so must
> adjust the existing must-tail plugin test to specify -O2.
>
> PR83324
>
> gcc/ChangeLog:
>
> * calls.cc (expand_call): Fix mustcall implementation.
>
> gcc/testsuite/ChangeLog:
>
> * gcc.dg/plugin/must-tail-call-1.c: Adjust.
> ---
>  gcc/calls.cc  | 30 ---
>  .../gcc.dg/plugin/must-tail-call-1.c  |  1 +
>  2 files changed, 21 insertions(+), 10 deletions(-)
>
> diff --git a/gcc/calls.cc b/gcc/calls.cc
> index 21d78f9779fe..a6b8ee44cc29 100644
> --- a/gcc/calls.cc
> +++ b/gcc/calls.cc
> @@ -2650,7 +2650,9 @@ expand_call (tree exp, rtx target, int ignore)
>/* The type of the function being called.  */
>tree fntype;
>bool try_tail_call = CALL_EXPR_TAILCALL (exp);
> -  bool must_tail_call = CALL_EXPR_MUST_TAIL_CALL (exp);
> +  /* tree-tailcall decided not to do tail calls. Error for the musttail 
> case.  */
> +  if (!try_tail_call)
> +  maybe_complain_about_tail_call (exp, "other reasons");
>int pass;
>
>/* Register in which non-BLKmode value will be returned,
> @@ -3022,10 +3024,22 @@ expand_call (tree exp, rtx target, int ignore)
>   pushed these optimizations into -O2.  Don't try if we're already
>   expanding a call, as that means we're an argument.  Don't try if
>   there's cleanups, as we know there's code to follow the call.  */
> -  if (currently_expanding_call++ != 0
> -  || (!flag_optimize_sibling_calls && !CALL_FROM_THUNK_P (exp))
> -  || args_size.var
> -  || dbg_cnt (tail_call) == false)
> +  if (currently_expanding_call++ != 0)
> +{
> +  maybe_complain_about_tail_call (exp, "inside another call");
> +  try_tail_call = 0;
> +}
> +  if (!flag_optimize_sibling_calls
> +   && !CALL_FROM_THUNK_P (exp)
> +   && !CALL_EXPR_MUST_TAIL_CALL (exp))
> +try_tail_call = 0;
> +  if (args_size.var)

If we are both inside another call and run into this we give two errors,
but I guess that's OK ...

> +{
> +  /* ??? correct message?  */
> +  maybe_complain_about_tail_call (exp, "stack space needed");

args_size.var != NULL_TREE means the argument size is not constant.
I'm quite sure this is an overly conservative check.

> +  try_tail_call = 0;
> +}
> +  if (dbg_cnt (tail_call) == false)
>  try_tail_call = 0;
>
>/* Workaround buggy C/C++ wrappers around Fortran routines with
> @@ -3046,15 +3060,11 @@ expand_call (tree exp, rtx target, int ignore)
> if (MEM_P (*iter))
>   {
> try_tail_call = 0;
> +   maybe_complain_about_tail_call (exp, "hidden string length 
> argument");

"hidden string length argument passed on stack"

from what I read the code.

> break;
>   }
> }
>
> -  /* If the user has marked the function as requiring tail-call
> - optimization, attempt it.  */
> -  if (must_tail_call)
> -try_tail_call = 1;
> -
>/*  Rest of purposes for tail call optimizations to fail.  */
>if (try_tail_call)
>  try_tail_call = can_implement_as_sibling_call_p (exp,
> diff --git a/gcc/testsuite/gcc.dg/plugin/must-tail-call-1.c 
> b/gcc/testsuite/gcc.dg/plugin/must-tail-call-1.c
> index 3a6d4cceaba7..44af361e2925 100644
> --- a/gcc/testsuite/gcc.dg/plugin/must-tail-call-1.c
> +++ b/gcc/testsuite/gcc.dg/plugin/must-tail-call-1.c
> @@ -1,4 +1,5 @@
>  /* { dg-do compile { target tail_call } } */
> +/* { dg-options "-O2" } */

So I think this is unfortunate - I think when there's a must-tail attribute
we should either run the tailcall pass to check the call even at -O0 or
trust the user with correctness  (hoping no optimization interfered with
the ability to tail-call).

What were the ICEs you ran into?

I would guess it's for example problematic to duplicate must-tail calls?

Thanks,
Richard.

>  /* { dg-options "-fdelayed-branch" { target sparc*-*-* } } */
>
>  extern void abort (void);
> --
> 2.44.0
>

RE: [PATCH v4 1/3] Internal-fn: Support new IFN SAT_ADD for unsigned scalar int

2024-05-14 Thread Li, Pan2

Thanks Richard for comments.

> If you require a gassign please statically type your function
> argument as gassign * instead and remove this assert.

Sure

> As addition to Tamars good comments why do you set *cfg_changed_p to
> true?  You are
> not changing the CFG afer all?

Yes, we can add it back in future if we really changed cfg, will update in v5 
(include vect patch 2/3) after all test passed.

Pan


-Original Message-
From: Richard Biener  
Sent: Tuesday, May 14, 2024 9:18 PM
To: Li, Pan2 
Cc: gcc-patches@gcc.gnu.org; juzhe.zh...@rivai.ai; kito.ch...@gmail.com; 
tamar.christ...@arm.com; Liu, Hongtao 
Subject: Re: [PATCH v4 1/3] Internal-fn: Support new IFN SAT_ADD for unsigned 
scalar int

On Mon, May 6, 2024 at 4:48 PM  wrote:
>
> From: Pan Li 
>
> This patch would like to add the middle-end presentation for the
> saturation add.  Aka set the result of add to the max when overflow.
> It will take the pattern similar as below.
>
> SAT_ADD (x, y) => (x + y) | (-(TYPE)((TYPE)(x + y) < x))
>
> Take uint8_t as example, we will have:
>
> * SAT_ADD (1, 254)   => 255.
> * SAT_ADD (1, 255)   => 255.
> * SAT_ADD (2, 255)   => 255.
> * SAT_ADD (255, 255) => 255.
>
> Given below example for the unsigned scalar integer uint64_t:
>
> uint64_t sat_add_u64 (uint64_t x, uint64_t y)
> {
>   return (x + y) | (- (uint64_t)((uint64_t)(x + y) < x));
> }
>
> Before this patch:
> uint64_t sat_add_uint64_t (uint64_t x, uint64_t y)
> {
>   long unsigned int _1;
>   _Bool _2;
>   long unsigned int _3;
>   long unsigned int _4;
>   uint64_t _7;
>   long unsigned int _10;
>   __complex__ long unsigned int _11;
>
> ;;   basic block 2, loop depth 0
> ;;pred:   ENTRY
>   _11 = .ADD_OVERFLOW (x_5(D), y_6(D));
>   _1 = REALPART_EXPR <_11>;
>   _10 = IMAGPART_EXPR <_11>;
>   _2 = _10 != 0;
>   _3 = (long unsigned int) _2;
>   _4 = -_3;
>   _7 = _1 | _4;
>   return _7;
> ;;succ:   EXIT
>
> }
>
> After this patch:
> uint64_t sat_add_uint64_t (uint64_t x, uint64_t y)
> {
>   uint64_t _7;
>
> ;;   basic block 2, loop depth 0
> ;;pred:   ENTRY
>   _7 = .SAT_ADD (x_5(D), y_6(D)); [tail call]
>   return _7;
> ;;succ:   EXIT
> }
>
> We perform the tranform during widen_mult because that the sub-expr of
> SAT_ADD will be optimized to .ADD_OVERFLOW.  We need to try the .SAT_ADD
> pattern first and then .ADD_OVERFLOW,  or we may never catch the pattern
> .SAT_ADD.  Meanwhile, the isel pass is after widen_mult and then we
> cannot perform the .SAT_ADD pattern match as the sub-expr will be
> optmized to .ADD_OVERFLOW first.
>
> The below tests are passed for this patch:
> 1. The riscv fully regression tests.
> 2. The aarch64 fully regression tests.
> 3. The x86 bootstrap tests.
> 4. The x86 fully regression tests.
>
> PR target/51492
> PR target/112600
>
> gcc/ChangeLog:
>
> * internal-fn.cc (commutative_binary_fn_p): Add type IFN_SAT_ADD
> to the return true switch case(s).
> * internal-fn.def (SAT_ADD):  Add new signed optab SAT_ADD.
> * match.pd: Add unsigned SAT_ADD match.
> * optabs.def (OPTAB_NL): Remove fixed-point limitation for us/ssadd.
> * tree-ssa-math-opts.cc (gimple_unsigned_integer_sat_add): New extern
> func decl generated in match.pd match.
> (match_saturation_arith): New func impl to match the saturation arith.
> (math_opts_dom_walker::after_dom_children): Try match saturation
> arith.
>
> Signed-off-by: Pan Li 
> ---
>  gcc/internal-fn.cc|  1 +
>  gcc/internal-fn.def   |  2 ++
>  gcc/match.pd  | 28 
>  gcc/optabs.def|  4 ++--
>  gcc/tree-ssa-math-opts.cc | 46 +++
>  5 files changed, 79 insertions(+), 2 deletions(-)
>
> diff --git a/gcc/internal-fn.cc b/gcc/internal-fn.cc
> index 0a7053c2286..73045ca8c8c 100644
> --- a/gcc/internal-fn.cc
> +++ b/gcc/internal-fn.cc
> @@ -4202,6 +4202,7 @@ commutative_binary_fn_p (internal_fn fn)
>  case IFN_UBSAN_CHECK_MUL:
>  case IFN_ADD_OVERFLOW:
>  case IFN_MUL_OVERFLOW:
> +case IFN_SAT_ADD:
>  case IFN_VEC_WIDEN_PLUS:
>  case IFN_VEC_WIDEN_PLUS_LO:
>  case IFN_VEC_WIDEN_PLUS_HI:
> diff --git a/gcc/internal-fn.def b/gcc/internal-fn.def
> index 848bb9dbff3..25badbb86e5 100644
> --- a/gcc/internal-fn.def
> +++ b/gcc/internal-fn.def
> @@ -275,6 +275,8 @@ DEF_INTERNAL_SIGNED_OPTAB_FN (MULHS, ECF_CONST | 
> ECF_NOTHROW, first,
>  DEF_INTERNAL_SIGNED_OPTAB_FN (MULHRS, ECF_CONST | ECF_NOTHROW, first,
>   smulhrs, umulhrs, binary)
>
> +DEF_INTERNAL_SIGNED_OPTAB_FN (SAT_ADD, ECF_CONST, first, ssadd, usadd, 
> binary)
> +
>  DEF_INTERNAL_COND_FN (ADD, ECF_CONST, add, binary)
>  DEF_INTERNAL_COND_FN (SUB, ECF_CONST, sub, binary)
>  DEF_INTERNAL_COND_FN (MUL, ECF_CONST, smul, binary)
> diff --git a/gcc/match.pd b/gcc/match.pd
> index d401e7503e6..7058e4cbe29 100644
> --- a/gcc/match.pd
> +++ b/gcc/match.pd
> @@ -3043,

Re: [RFC][PATCH] PR tree-optimization/109071 - -Warray-bounds false positive warnings due to code duplication from jump threading

2024-05-14 Thread Qing Zhao



> On May 14, 2024, at 09:08, Richard Biener  wrote:
> 
> On Mon, 13 May 2024, Qing Zhao wrote:
> 
>> -Warray-bounds is an important option to enable linux kernal to keep
>> the array out-of-bound errors out of the source tree.
>> 
>> However, due to the false positive warnings reported in PR109071
>> (-Warray-bounds false positive warnings due to code duplication from
>> jump threading), -Warray-bounds=1 cannot be added on by default.
>> 
>> Although it's impossible to elinimate all the false positive warnings
>> from -Warray-bounds=1 (See PR104355 Misleading -Warray-bounds
>> documentation says "always out of bounds"), we should minimize the
>> false positive warnings in -Warray-bounds=1.
>> 
>> The root reason for the false positive warnings reported in PR109071 is:
>> 
>> When the thread jump optimization tries to reduce the # of branches
>> inside the routine, sometimes it needs to duplicate the code and
>> split into two conditional pathes. for example:
>> 
>> The original code:
>> 
>> void sparx5_set (int * ptr, struct nums * sg, int index)
>> {
>>  if (index >= 4)
>>warn ();
>>  *ptr = 0;
>>  *val = sg->vals[index];
>>  if (index >= 4)
>>warn ();
>>  *ptr = *val;
>> 
>>  return;
>> }
>> 
>> With the thread jump, the above becomes:
>> 
>> void sparx5_set (int * ptr, struct nums * sg, int index)
>> {
>>  if (index >= 4)
>>{
>>  warn ();
>>  *ptr = 0; // Code duplications since "warn" does return;
>>  *val = sg->vals[index]; // same this line.
>> // In this path, since it's under the condition
>> // "index >= 4", the compiler knows the value
>> // of "index" is larger then 4, therefore the
>> // out-of-bound warning.
>>  warn ();
>>}
>>  else
>>{
>>  *ptr = 0;
>>  *val = sg->vals[index];
>>}
>>  *ptr = *val;
>>  return;
>> }
>> 
>> We can see, after the thread jump optimization, the # of branches inside
>> the routine "sparx5_set" is reduced from 2 to 1, however,  due to the
>> code duplication (which is needed for the correctness of the code), we
>> got a false positive out-of-bound warning.
>> 
>> In order to eliminate such false positive out-of-bound warning,
>> 
>> A. Add one more flag for GIMPLE: is_splitted.
>> B. During the thread jump optimization, when the basic blocks are
>>   duplicated, mark all the STMTs inside the original and duplicated
>>   basic blocks as "is_splitted";
>> C. Inside the array bound checker, add the following new heuristic:
>> 
>> If
>>   1. the stmt is duplicated and splitted into two conditional paths;
>> +  2. the warning level < 2;
>> +  3. the current block is not dominating the exit block
>> Then not report the warning.
>> 
>> The false positive warnings are moved from -Warray-bounds=1 to
>> -Warray-bounds=2 now.
>> 
>> Bootstrapped and regression tested on both x86 and aarch64. adjusted
>> -Warray-bounds-61.c due to the false positive warnings.
>> 
>> Let me know if you have any comments and suggestions.
> 
> At the last Cauldron I talked with David Malcolm about these kind of
> issues and thought of instead of suppressing diagnostics to record
> how a block was duplicated.  For jump threading my idea was to record
> the condition that was proved true when entering the path and do this
> by recording the corresponding locations so that in the end we can
> use the diagnostic-path infrastructure to say
> 
> warning: array index always above array bounds
> events 1:
> 
> | 3 |  if (index >= 4)
> |
>(1) when index >= 4

Yes, this is a good idea. 

The current major issue with the warning is:  the constant index value 4 is not 
in the source code, it’s a compiler generated intermediate value (even though 
it’s a correct value -:)). Such warning messages confuse the end-users with 
information that cannot be connected directly to the source code. 

With the above recorded “events” information, the warning messages should make 
good sense to the end user, and also help the end user to locate the place 
where the fix in the source code can be added. 

Actually, with the above warning information, the user can locate the place 
“line 3” to add fixes as following:

if (*index >= 4)
  {
warn();
*index = 3;
  }

I.e.
[109071]$ diff t_org.c t.c
2c2
< static inline void assign(int val, int *regs, int index)
---
> static inline void assign(int val, int *regs, int *index)
4c4,5
< if (index >= 4)
---
> if (*index >= 4)
>   {
5a7,8
> *index = 3;
>   }
14,15c17,18
< assign(0,ptr, index);
< assign(*val, ptr, index);
---
> assign(0,ptr, &index);
> assign(*val, ptr, &index);

> 
> it would be possible to record the info as part of the ad-hoc
> location data on each duplicated stmt or, possibly simpler,
> as part of a debug stmt of new kind.

Recording such info to each stmt might be more reliable? 

> 
> I'm not sure pruning the warnings is a good thing to do.  One
> would argue we should instead isolate such path as unreachable
> since it invokes undefined

Re: [PATCH] [testsuite] Fix gcc.dg/pr115066.c fail on aarch64

2024-05-14 Thread Jakub Jelinek

On Tue, May 14, 2024 at 03:47:46PM +0200, Tom de Vries wrote:
> On aarch64, I get this failure:
> ...
> FAIL: gcc.dg/pr115066.c scan-assembler \\.byte\\t0xb\\t# Define macro strx
> ...
> 
> This happens because we expect to match:
> ...
> .byte   0xb # Define macro strx
> ...
> but instead we get:
> ...
> .byte   0xb // Define macro strx
> ...
> 
> Fix this by not explicitly matching the comment marker.
> 
> Tested on aarch64 and x86_64.
> 
> gcc/testsuite/ChangeLog:
> 
> 2024-05-14  Tom de Vries  
> 
> * gcc.dg/pr115066.c: Don't match comment marker.
> ---
>  gcc/testsuite/gcc.dg/pr115066.c | 4 ++--
>  1 file changed, 2 insertions(+), 2 deletions(-)

Ok.

Jakub

Re: [PATCH] [testsuite] Fix gcc.dg/pr115066.c fail on aarch64

2024-05-14 Thread Jakub Jelinek

On Tue, May 14, 2024 at 03:47:46PM +0200, Tom de Vries wrote:
> On aarch64, I get this failure:
> ...
> FAIL: gcc.dg/pr115066.c scan-assembler \\.byte\\t0xb\\t# Define macro strx
> ...
> 
> This happens because we expect to match:
> ...
> .byte   0xb # Define macro strx
> ...
> but instead we get:
> ...
> .byte   0xb // Define macro strx
> ...
> 
> Fix this by not explicitly matching the comment marker.
> 
> Tested on aarch64 and x86_64.
> 
> gcc/testsuite/ChangeLog:
> 
> 2024-05-14  Tom de Vries  
> 
> * gcc.dg/pr115066.c: Don't match comment marker.
> ---
>  gcc/testsuite/gcc.dg/pr115066.c | 4 ++--
>  1 file changed, 2 insertions(+), 2 deletions(-)
> 
> diff --git a/gcc/testsuite/gcc.dg/pr115066.c b/gcc/testsuite/gcc.dg/pr115066.c
> index 645757df209..a7e98500160 100644
> --- a/gcc/testsuite/gcc.dg/pr115066.c
> +++ b/gcc/testsuite/gcc.dg/pr115066.c
> @@ -2,7 +2,7 @@
>  /* { dg-skip-if "split DWARF unsupported" { hppa*-*-hpux* powerpc*-ibm-aix* 
> *-*-darwin* } } */
>  /* { dg-options "-gsplit-dwarf -g3 -dA -gdwarf-4" } */
>  /* { dg-final { scan-assembler-times {\.section\t"?\.debug_macro} 1 } } */
> -/* { dg-final { scan-assembler-not {\.byte\t0x5\t# Define macro strp} } } */
> -/* { dg-final { scan-assembler {\.byte\t0xb\t# Define macro strx} } } */
> +/* { dg-final { scan-assembler-not {\.byte\t0x5\t.* Define macro strp} } } */
> +/* { dg-final { scan-assembler {\.byte\t0xb\t.* Define macro strx} } } */

Actually, perhaps better use [^\n\r]* instead of .*
You don't want to match the comment on a different line.

Jakub

Re: [PATCH v5 5/5] Add documentation for musttail attribute

2024-05-14 Thread Richard Biener

On Sun, May 5, 2024 at 8:16 PM Andi Kleen  wrote:
>
> gcc/ChangeLog:
>
> * doc/extend.texi: Document [[musttail]]
> ---
>  gcc/doc/extend.texi | 22 --
>  1 file changed, 20 insertions(+), 2 deletions(-)
>
> diff --git a/gcc/doc/extend.texi b/gcc/doc/extend.texi
> index e290265d68d3..deb100ad93b6 100644
> --- a/gcc/doc/extend.texi
> +++ b/gcc/doc/extend.texi
> @@ -9839,7 +9839,7 @@ same manner as the @code{deprecated} attribute.
>  @section Statement Attributes
>  @cindex Statement Attributes
>
> -GCC allows attributes to be set on null statements.  @xref{Attribute Syntax},
> +GCC allows attributes to be set on statements.  @xref{Attribute Syntax},
>  for details of the exact syntax for using attributes.  Other attributes are
>  available for functions (@pxref{Function Attributes}), variables
>  (@pxref{Variable Attributes}), labels (@pxref{Label Attributes}), enumerators
> @@ -9896,6 +9896,22 @@ foo (int x, int y)
>  @code{y} is not actually incremented and the compiler can but does not
>  have to optimize it to just @code{return 42 + 42;}.
>
> +@cindex @code{musttail} statement attribute
> +@item musttail
> +
> +The @code{gnu::musttail} or @code{clang::musttail} attribute
> +can be applied to a @code{return} statement with a return-value expression
> +that is a function call.  It asserts that the call must be a tail call that
> +does not allocate extra stack space.
> +
> +@smallexample
> +[[gnu::musttail]] return foo();
> +@end smallexample
> +
> +If the compiler cannot generate a tail call it generates
> +an error. Tail calls generally require enabling optimization.
> +On some targets they may not be supported.

Looks generally OK though does this mean people can debug
programs using [[gnu::musttail]] only with optimized builds?  It
seems to me we should try harder to make [[gnu::musttail]] work
at -O0 and generally behave the same at all optimization levels?

> +
>  @end table
>
>  @node Attribute Syntax
> @@ -10019,7 +10035,9 @@ the constant expression, if present.
>
>  @subsubheading Statement Attributes
>  In GNU C, an attribute specifier list may appear as part of a null
> -statement.  The attribute goes before the semicolon.
> +statement. The attribute goes before the semicolon.
> +Some attributes in new style syntax are also supported
> +on non-null statements.
>
>  @subsubheading Type Attributes
>
> --
> 2.44.0
>

Re: [PATCH 2/4] libcpp/init: remove unnecessary `struct` keyword

2024-05-14 Thread Richard Biener

On Sat, May 4, 2024 at 5:06 PM Ben Boeckel  wrote:
>
> The initial P1689 patches were written in 2019 and ended up having code
> move around over time ended up introducing a `struct` keyword to the
> implementation of `cpp_finish`. Remove it to match the rest of the file
> and its declaration in the header.
>
> Fixes: 024f135a1e9 (p1689r5: initial support, 2023-09-01)
>
> Reported-by: Roland Illig 

OK.

Thanks,
Richard.

> libcpp/
>
> * init.cc (cpp_finish): Remove unnecessary `struct` keyword.
>
> Signed-off-by: Ben Boeckel 
> ---
>  libcpp/init.cc | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
>
> diff --git a/libcpp/init.cc b/libcpp/init.cc
> index 54fc9236d38..cbd22249b04 100644
> --- a/libcpp/init.cc
> +++ b/libcpp/init.cc
> @@ -862,7 +862,7 @@ read_original_directory (cpp_reader *pfile)
> Maybe it should also reset state, such that you could call
> cpp_start_read with a new filename to restart processing.  */
>  void
> -cpp_finish (struct cpp_reader *pfile, FILE *deps_stream, FILE *fdeps_stream)
> +cpp_finish (cpp_reader *pfile, FILE *deps_stream, FILE *fdeps_stream)
>  {
>/* Warn about unused macros before popping the final buffer.  */
>if (CPP_OPTION (pfile, warn_unused_macros))
> --
> 2.44.0
>

Re: [RFC][PATCH] PR tree-optimization/109071 - -Warray-bounds false positive warnings due to code duplication from jump threading

2024-05-14 Thread Richard Biener

On Tue, 14 May 2024, Qing Zhao wrote:

> 
> 
> > On May 14, 2024, at 09:08, Richard Biener  wrote:
> > 
> > On Mon, 13 May 2024, Qing Zhao wrote:
> > 
> >> -Warray-bounds is an important option to enable linux kernal to keep
> >> the array out-of-bound errors out of the source tree.
> >> 
> >> However, due to the false positive warnings reported in PR109071
> >> (-Warray-bounds false positive warnings due to code duplication from
> >> jump threading), -Warray-bounds=1 cannot be added on by default.
> >> 
> >> Although it's impossible to elinimate all the false positive warnings
> >> from -Warray-bounds=1 (See PR104355 Misleading -Warray-bounds
> >> documentation says "always out of bounds"), we should minimize the
> >> false positive warnings in -Warray-bounds=1.
> >> 
> >> The root reason for the false positive warnings reported in PR109071 is:
> >> 
> >> When the thread jump optimization tries to reduce the # of branches
> >> inside the routine, sometimes it needs to duplicate the code and
> >> split into two conditional pathes. for example:
> >> 
> >> The original code:
> >> 
> >> void sparx5_set (int * ptr, struct nums * sg, int index)
> >> {
> >>  if (index >= 4)
> >>warn ();
> >>  *ptr = 0;
> >>  *val = sg->vals[index];
> >>  if (index >= 4)
> >>warn ();
> >>  *ptr = *val;
> >> 
> >>  return;
> >> }
> >> 
> >> With the thread jump, the above becomes:
> >> 
> >> void sparx5_set (int * ptr, struct nums * sg, int index)
> >> {
> >>  if (index >= 4)
> >>{
> >>  warn ();
> >>  *ptr = 0; // Code duplications since "warn" does return;
> >>  *val = sg->vals[index]; // same this line.
> >> // In this path, since it's under the condition
> >> // "index >= 4", the compiler knows the value
> >> // of "index" is larger then 4, therefore the
> >> // out-of-bound warning.
> >>  warn ();
> >>}
> >>  else
> >>{
> >>  *ptr = 0;
> >>  *val = sg->vals[index];
> >>}
> >>  *ptr = *val;
> >>  return;
> >> }
> >> 
> >> We can see, after the thread jump optimization, the # of branches inside
> >> the routine "sparx5_set" is reduced from 2 to 1, however,  due to the
> >> code duplication (which is needed for the correctness of the code), we
> >> got a false positive out-of-bound warning.
> >> 
> >> In order to eliminate such false positive out-of-bound warning,
> >> 
> >> A. Add one more flag for GIMPLE: is_splitted.
> >> B. During the thread jump optimization, when the basic blocks are
> >>   duplicated, mark all the STMTs inside the original and duplicated
> >>   basic blocks as "is_splitted";
> >> C. Inside the array bound checker, add the following new heuristic:
> >> 
> >> If
> >>   1. the stmt is duplicated and splitted into two conditional paths;
> >> +  2. the warning level < 2;
> >> +  3. the current block is not dominating the exit block
> >> Then not report the warning.
> >> 
> >> The false positive warnings are moved from -Warray-bounds=1 to
> >> -Warray-bounds=2 now.
> >> 
> >> Bootstrapped and regression tested on both x86 and aarch64. adjusted
> >> -Warray-bounds-61.c due to the false positive warnings.
> >> 
> >> Let me know if you have any comments and suggestions.
> > 
> > At the last Cauldron I talked with David Malcolm about these kind of
> > issues and thought of instead of suppressing diagnostics to record
> > how a block was duplicated.  For jump threading my idea was to record
> > the condition that was proved true when entering the path and do this
> > by recording the corresponding locations so that in the end we can
> > use the diagnostic-path infrastructure to say
> > 
> > warning: array index always above array bounds
> > events 1:
> > 
> > | 3 |  if (index >= 4)
> > |
> >(1) when index >= 4

As it's been quite some time I think I remeber that I thought of
constructing the diagnostic path at jump threading time and associating
that with the location.  But I don't remember exactly where I wanted to
put it - I think it was on an extra stmt to avoid having too many
ad-hoc locations as I'm not sure of their cost.  It would of course
need experimenting since we can end up moving stmts and merging blocks
though the linear traces created by jump threading should be quite
stable (as opposed to say the unrolling case where multiple instances
of the loop body likely will end up in the exact same basic block).

> Yes, this is a good idea. 
> 
> The current major issue with the warning is:  the constant index value 4 is 
> not in the source code, it’s a compiler generated intermediate value (even 
> though it’s a correct value -:)). Such warning messages confuse the end-users 
> with information that cannot be connected directly to the source code. 
> 
> With the above recorded “events” information, the warning messages should 
> make good sense to the end user, and also help the end user to locate the 
> place where the fix in the source code can be added. 
> 
> Actually, with the above warning information, the user can locate the place 
> “line

Re: [PATCH v2 2/2] RISC-V: avoid LUI based const mat in prologue/epilogue expansion [PR/105733]

2024-05-14 Thread Patrick O'Neill




On 5/13/24 20:36, Jeff Law wrote:



On 5/13/24 6:54 PM, Patrick O'Neill wrote:


On 5/13/24 13:28, Jeff Law wrote:



On 5/13/24 12:49 PM, Vineet Gupta wrote:
If the constant used for stack offset can be expressed as sum of 
two S12
values, the constant need not be materialized (in a reg) and 
instead the

two S12 bits can be added to instructions involved with frame pointer.
This avoids burning a register and more importantly can often get down
to be 2 insn vs. 3.

The prev patches to generally avoid LUI based const materialization 
didn't

fix this PR and need this directed fix in funcion prologue/epilogue
expansion.

This fix doesn't move the neddle for SPEC, at all, but it is still a
win considering gcc generates one insn fewer than llvm for the test 
;-)


    gcc-13.1 release   |  gcc 230823 | |
   |    g6619b3d4c15c    |   This patch | 
clang/llvm
- 

li  t0,-4096 | li    t0,-4096  | addi sp,sp,-2048 | 
addi sp,sp,-2048
addi    t0,t0,2016   | addi  t0,t0,2032    | add sp,sp,-16   | addi 
sp,sp,-32
li  a4,4096  | add   sp,sp,t0  | add a5,sp,a0    | add 
a1,sp,16
add sp,sp,t0 | addi  a5,sp,-2032   | sb zero,0(a5)  | add 
a0,a0,a1
li  a5,-4096 | add   a0,a5,a0  | addi sp,sp,2032  | sb 
zero,0(a0)
addi    a4,a4,-2032  | li    t0, 4096  | addi sp,sp,32    | 
addi sp,sp,2032
add a4,a4,a5 | sb    zero,2032(a0) | ret   | 
addi sp,sp,48

addi    a5,sp,16 | addi  t0,t0,-2032 |   | ret
add a5,a4,a5 | add   sp,sp,t0  |
add a0,a5,a0 | ret |
li  t0,4096  |
sd  a5,8(sp) |
sb  zero,2032(a0)|
addi    t0,t0,-2016  |
add sp,sp,t0 |
ret  |

gcc/ChangeLog:
PR target/105733
* config/riscv/riscv.h: New macros for with aligned offsets.
* config/riscv/riscv.cc (riscv_split_sum_of_two_s12): New
function to split a sum of two s12 values into constituents.
(riscv_expand_prologue): Handle offset being sum of two S12.
(riscv_expand_epilogue): Ditto.
* config/riscv/riscv-protos.h (riscv_split_sum_of_two_s12): New.

gcc/testsuite/ChangeLog:
* gcc.target/riscv/pr105733.c: New Test.
* gcc.target/riscv/rvv/autovec/vls/spill-1.c: Adjust to not
expect LUI 4096.
* gcc.target/riscv/rvv/autovec/vls/spill-2.c: Ditto.
* gcc.target/riscv/rvv/autovec/vls/spill-3.c: Ditto.
* gcc.target/riscv/rvv/autovec/vls/spill-4.c: Ditto.
* gcc.target/riscv/rvv/autovec/vls/spill-5.c: Ditto.
* gcc.target/riscv/rvv/autovec/vls/spill-6.c: Ditto.
* gcc.target/riscv/rvv/autovec/vls/spill-7.c: Ditto.





@@ -8074,14 +8111,26 @@ riscv_expand_epilogue (int style)
  }
    else
  {
-  if (!SMALL_OPERAND (adjust_offset.to_constant ()))
+  HOST_WIDE_INT adj_off_value = adjust_offset.to_constant ();
+  if (SMALL_OPERAND (adj_off_value))
+    {
+  adjust = GEN_INT (adj_off_value);
+    }
+  else if (SUM_OF_TWO_S12_ALGN (adj_off_value))
+    {
+  HOST_WIDE_INT base, off;
+  riscv_split_sum_of_two_s12 (adj_off_value, &base, &off);
+  insn = gen_add3_insn (stack_pointer_rtx, 
hard_frame_pointer_rtx,

+    GEN_INT (base));
+  RTX_FRAME_RELATED_P (insn) = 1;
+  adjust = GEN_INT (off);
+    }
So this was the hunk that we identified internally as causing 
problems with libgomp's testsuite.  We never fully chased it down as 
this hunk didn't seem terribly important performance wise -- we just 
set it aside.  The thing is it looked basically correct to me.  So 
the failure was certainly unexpected, but it was consistent.


So I think the question is whether or not the CI system runs the 
libgomp testsuite, particularly in the rv64 linux configuration. If 
it does, and it passes, then we're good. I'm still finding my way 
around the configuration, so I don't know if the CI system Edwin & 
Patrick have built tests libgomp or not.


I poked around the .sum files in pre/postcommit and we do run tests 
like:


PASS: c-c++-common/gomp/affinity-2.c  (test for errors, line 45)

I was able to find the summary info:


Tests that now fail, but worked before (15 tests):
libgomp: libgomp.fortran/simd7.f90   -O0  execution test
libgomp: libgomp.fortran/task2.f90   -O0  execution test
libgomp: libgomp.fortran/vla2.f90   -O0  execution test
libgomp: libgomp.fortran/vla3.f90   -O3 -fomit-frame-pointer 
-funroll-loops -fpeel-loops -ftracer -finline-functions execution test

libgomp: libgomp.fortran/vla3.f90   -O3 -g  execution test
libgomp: libgomp.fortran/vla4.f90   -O1  execution test
libgomp: libgomp.fortran/vla4.f90   -O2  execution test
libgomp: libgomp.fortran/vla4.f90   -O3 -fomit-frame-pointer 
-funroll-loops -fpeel-loops -ftracer -finline-functions execution test

libgomp: libgomp.fortran/vla4.f90   -O3 -g  execution test
libgomp: libgomp.fortr

[to-be-committed][RISC-V] Remove redundant AND in shift-add sequence

2024-05-14 Thread Jeff Law

So this patch allows us to eliminate an redundant AND in some shift-add 
style sequences.   I think the testcase was reduced from xz by the RAU 
team, but I'm not highly confident of that.


Specifically the AND is masking off the upper 32 bits of the un-shifted 
value and there's an outer SIGN_EXTEND from SI to DI.  However in the 
RTL it's working on the post-shifted value, so the constant is left 
shifted, so we have to account for that in the pattern's condition.


We can just drop the AND in this case.  So instead we do a 64bit shift, 
then a sign extending ADD utilizing the low part of that 64bit shift result.



This has run through Ventana's CI as well as my own.  I'll wait for it 
to run through the larger CI system before pushing.


Jeff
gcc/
* config/riscv/riscv.md: Add pattern for sign extended shift-add 
sequence with a masked input.

gcc/testsuite

* gcc.target/riscv/shift-add-2.c: New test.

diff --git a/gcc/config/riscv/riscv.md b/gcc/config/riscv/riscv.md
index 4d6de992557..520c0f54150 100644
--- a/gcc/config/riscv/riscv.md
+++ b/gcc/config/riscv/riscv.md
@@ -4056,6 +4056,31 @@ (define_insn "*large_load_address"
   [(set_attr "type" "load")
(set (attr "length") (const_int 8))])
 
+;; The AND is redunant here.  It always turns off the high 32 bits  and the
+;; low number of bits equal to the shift count.  Those upper 32 bits will be
+;; reset by the SIGN_EXTEND at the end.
+;;
+;; One could argue combine should have realized this and simplified what it
+;; presented to the backend.  But we can obviously cope with what it gave us.
+(define_insn_and_split ""
+  [(set (match_operand:DI 0 "register_operand" "=r")
+   (sign_extend:DI
+ (plus:SI (subreg:SI
+(and:DI
+  (ashift:DI (match_operand:DI 1 "register_operand" "r")
+ (match_operand 2 "const_int_operand" "n"))
+  (match_operand 3 "const_int_operand" "n")) 0)
+  (match_operand:SI 4 "register_operand" "r"
+   (clobber (match_scratch:DI 5 "=&r"))]
+  "TARGET_64BIT
+   && (INTVAL (operands[3]) | ((1 << INTVAL (operands[2])) - 1)) == 0x"
+  "#"
+  "&& reload_completed"
+  [(set (match_dup 5) (ashift:DI (match_dup 1) (match_dup 2)))
+   (set (match_dup 0) (sign_extend:DI (plus:SI (match_dup 6) (match_dup 4]
+  "{ operands[6] = gen_lowpart (SImode, operands[5]); }"
+  [(set_attr "type" "arith")])
+
 (include "bitmanip.md")
 (include "crypto.md")
 (include "sync.md")
diff --git a/gcc/testsuite/gcc.target/riscv/shift-add-2.c 
b/gcc/testsuite/gcc.target/riscv/shift-add-2.c
new file mode 100644
index 000..87439858e59
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/shift-add-2.c
@@ -0,0 +1,16 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gc_zba_zbb_zbs -mabi=lp64" } */
+/* { dg-skip-if "" { *-*-* } { "-O0" "-Og" } } */
+
+int sub2(int a, long long b) {
+  b = (b << 32) >> 31;
+  unsigned int x = a + b;
+  return x;
+}
+
+
+/* { dg-final { scan-assembler-times "\tslli\t" 1 } } */
+/* { dg-final { scan-assembler-times "\taddw\t" 1 } } */
+/* { dg-final { scan-assembler-not "\tsrai\t" } } */
+/* { dg-final { scan-assembler-not "\tsh.add\t" } } */
+

[PATCH][v2] tree-optimization/99954 - redo loop distribution memcpy recognition fix

2024-05-14 Thread Richard Biener

The following revisits the fix for PR99954 which was observed as
causing missed memcpy recognition and instead using memmove for
non-aliasing copies.  While the original fix mitigated bogus
recognition of memcpy the root cause was not properly identified.
The root cause is dr_analyze_indices "failing" to handle union
references and leaving the DRs indices in a state that's not correctly
handled by dr_may_alias.  The following mitigates this there
appropriately, restoring memcpy recognition for non-aliasing copies.

This makes us run into a latent issue in ptr_deref_may_alias_decl_p
when the pointer is something like &MEM[0].a in which case we fail
to handle non-SSA name pointers.  Add code similar to what we have
in ptr_derefs_may_alias_p.

Bootstrap & regtest in progress on x86_64-unknown-linux-gnu.

PR tree-optimization/99954
* tree-data-ref.cc (dr_may_alias_p): For bases that are
not completely analyzed fall back to TBAA and points-to.
* tree-loop-distribution.cc
(loop_distribution::classify_builtin_ldst): When there
is no dependence again classify as memcpy.
* tree-ssa-alias.cc (ptr_deref_may_alias_decl_p): Verify
the pointer is an SSA name.

* gcc.dg/tree-ssa/ldist-40.c: New testcase.
---
 gcc/testsuite/gcc.dg/tree-ssa/ldist-40.c | 10 ++
 gcc/tree-data-ref.cc | 22 ++
 gcc/tree-loop-distribution.cc|  4 ++--
 gcc/tree-ssa-alias.cc|  5 +
 4 files changed, 39 insertions(+), 2 deletions(-)
 create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/ldist-40.c

diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ldist-40.c 
b/gcc/testsuite/gcc.dg/tree-ssa/ldist-40.c
new file mode 100644
index 000..238a0098352
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/ldist-40.c
@@ -0,0 +1,10 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-ldist-details" } */
+
+void copy_a_to_b (char * __restrict b, char * a, int n)
+{
+  for (int i = 0; i < n; ++i)
+b[i] = a[i];
+}
+
+/* { dg-final { scan-tree-dump "generated memcpy" "ldist" } } */
diff --git a/gcc/tree-data-ref.cc b/gcc/tree-data-ref.cc
index f37734b5340..db15ddb43de 100644
--- a/gcc/tree-data-ref.cc
+++ b/gcc/tree-data-ref.cc
@@ -3066,6 +3066,28 @@ dr_may_alias_p (const struct data_reference *a, const 
struct data_reference *b,
return ptr_derefs_may_alias_p (build_fold_addr_expr (addr_a),
   TREE_OPERAND (addr_b, 0));
 }
+  /* If dr_analyze_innermost failed to handle a component we are
+ possibly left with a non-base in which case we didn't analyze
+ a possible evolution of the base when analyzing a loop.  */
+  else if (loop_nest
+  && (handled_component_p (addr_a) || handled_component_p (addr_b)))
+{
+  /* For true dependences we can apply TBAA.  */
+  if (flag_strict_aliasing
+ && DR_IS_WRITE (a) && DR_IS_READ (b)
+ && !alias_sets_conflict_p (get_alias_set (DR_REF (a)),
+get_alias_set (DR_REF (b
+   return false;
+  if (TREE_CODE (addr_a) == MEM_REF)
+   return ptr_derefs_may_alias_p (TREE_OPERAND (addr_a, 0),
+  build_fold_addr_expr (addr_b));
+  else if (TREE_CODE (addr_b) == MEM_REF)
+   return ptr_derefs_may_alias_p (build_fold_addr_expr (addr_a),
+  TREE_OPERAND (addr_b, 0));
+  else
+   return ptr_derefs_may_alias_p (build_fold_addr_expr (addr_a),
+  build_fold_addr_expr (addr_b));
+}
 
   /* Otherwise DR_BASE_OBJECT is an access that covers the whole object
  that is being subsetted in the loop nest.  */
diff --git a/gcc/tree-loop-distribution.cc b/gcc/tree-loop-distribution.cc
index 45932bae5e7..668dc420449 100644
--- a/gcc/tree-loop-distribution.cc
+++ b/gcc/tree-loop-distribution.cc
@@ -1840,11 +1840,11 @@ loop_distribution::classify_builtin_ldst (loop_p loop, 
struct graph *rdg,
   /* Now check that if there is a dependence.  */
   ddr_p ddr = get_data_dependence (rdg, src_dr, dst_dr);
 
-  /* Classify as memmove if no dependence between load and store.  */
+  /* Classify as memcpy if no dependence between load and store.  */
   if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
 {
   partition->builtin = alloc_builtin (dst_dr, src_dr, base, src_base, 
size);
-  partition->kind = PKIND_MEMMOVE;
+  partition->kind = PKIND_MEMCPY;
   return;
 }
 
diff --git a/gcc/tree-ssa-alias.cc b/gcc/tree-ssa-alias.cc
index e7c1c1aa624..374ba04e6fd 100644
--- a/gcc/tree-ssa-alias.cc
+++ b/gcc/tree-ssa-alias.cc
@@ -294,6 +294,11 @@ ptr_deref_may_alias_decl_p (tree ptr, tree decl)
   if (!may_be_aliased (decl))
 return false;
 
+  /* From here we require a SSA name pointer.  Anything else aliases.  */
+  if (TREE_CODE (ptr) != SSA_NAME
+  || !POINTER_TYPE_P (TREE_TYPE (ptr)))
+return true;
+
   /* If we do n

Re: [PATCH v2 2/2] RISC-V: avoid LUI based const mat in prologue/epilogue expansion [PR/105733]

2024-05-14 Thread Jeff Law





On 5/14/24 8:51 AM, Patrick O'Neill wrote:


On 5/13/24 20:36, Jeff Law wrote:



On 5/13/24 6:54 PM, Patrick O'Neill wrote:


On 5/13/24 13:28, Jeff Law wrote:



On 5/13/24 12:49 PM, Vineet Gupta wrote:
If the constant used for stack offset can be expressed as sum of 
two S12
values, the constant need not be materialized (in a reg) and 
instead the

two S12 bits can be added to instructions involved with frame pointer.
This avoids burning a register and more importantly can often get down
to be 2 insn vs. 3.

The prev patches to generally avoid LUI based const materialization 
didn't

fix this PR and need this directed fix in funcion prologue/epilogue
expansion.

This fix doesn't move the neddle for SPEC, at all, but it is still a
win considering gcc generates one insn fewer than llvm for the 
test ;-)


    gcc-13.1 release   |  gcc 230823 | |
   |    g6619b3d4c15c    |   This patch | 
clang/llvm

-
li  t0,-4096 | li    t0,-4096  | addi sp,sp,-2048 | 
addi sp,sp,-2048
addi    t0,t0,2016   | addi  t0,t0,2032    | add sp,sp,-16   | addi 
sp,sp,-32
li  a4,4096  | add   sp,sp,t0  | add a5,sp,a0    | add 
a1,sp,16
add sp,sp,t0 | addi  a5,sp,-2032   | sb zero,0(a5)  | add 
a0,a0,a1
li  a5,-4096 | add   a0,a5,a0  | addi sp,sp,2032  | sb 
zero,0(a0)
addi    a4,a4,-2032  | li    t0, 4096  | addi sp,sp,32    | 
addi sp,sp,2032
add a4,a4,a5 | sb    zero,2032(a0) | ret   | 
addi sp,sp,48

addi    a5,sp,16 | addi  t0,t0,-2032 |   | ret
add a5,a4,a5 | add   sp,sp,t0  |
add a0,a5,a0 | ret |
li  t0,4096  |
sd  a5,8(sp) |
sb  zero,2032(a0)|
addi    t0,t0,-2016  |
add sp,sp,t0 |
ret  |

gcc/ChangeLog:
PR target/105733
* config/riscv/riscv.h: New macros for with aligned offsets.
* config/riscv/riscv.cc (riscv_split_sum_of_two_s12): New
function to split a sum of two s12 values into constituents.
(riscv_expand_prologue): Handle offset being sum of two S12.
(riscv_expand_epilogue): Ditto.
* config/riscv/riscv-protos.h (riscv_split_sum_of_two_s12): New.

gcc/testsuite/ChangeLog:
* gcc.target/riscv/pr105733.c: New Test.
* gcc.target/riscv/rvv/autovec/vls/spill-1.c: Adjust to not
expect LUI 4096.
* gcc.target/riscv/rvv/autovec/vls/spill-2.c: Ditto.
* gcc.target/riscv/rvv/autovec/vls/spill-3.c: Ditto.
* gcc.target/riscv/rvv/autovec/vls/spill-4.c: Ditto.
* gcc.target/riscv/rvv/autovec/vls/spill-5.c: Ditto.
* gcc.target/riscv/rvv/autovec/vls/spill-6.c: Ditto.
* gcc.target/riscv/rvv/autovec/vls/spill-7.c: Ditto.





@@ -8074,14 +8111,26 @@ riscv_expand_epilogue (int style)
  }
    else
  {
-  if (!SMALL_OPERAND (adjust_offset.to_constant ()))
+  HOST_WIDE_INT adj_off_value = adjust_offset.to_constant ();
+  if (SMALL_OPERAND (adj_off_value))
+    {
+  adjust = GEN_INT (adj_off_value);
+    }
+  else if (SUM_OF_TWO_S12_ALGN (adj_off_value))
+    {
+  HOST_WIDE_INT base, off;
+  riscv_split_sum_of_two_s12 (adj_off_value, &base, &off);
+  insn = gen_add3_insn (stack_pointer_rtx, 
hard_frame_pointer_rtx,

+    GEN_INT (base));
+  RTX_FRAME_RELATED_P (insn) = 1;
+  adjust = GEN_INT (off);
+    }
So this was the hunk that we identified internally as causing 
problems with libgomp's testsuite.  We never fully chased it down as 
this hunk didn't seem terribly important performance wise -- we just 
set it aside.  The thing is it looked basically correct to me.  So 
the failure was certainly unexpected, but it was consistent.


So I think the question is whether or not the CI system runs the 
libgomp testsuite, particularly in the rv64 linux configuration. If 
it does, and it passes, then we're good. I'm still finding my way 
around the configuration, so I don't know if the CI system Edwin & 
Patrick have built tests libgomp or not.


I poked around the .sum files in pre/postcommit and we do run tests 
like:


PASS: c-c++-common/gomp/affinity-2.c  (test for errors, line 45)

I was able to find the summary info:


Tests that now fail, but worked before (15 tests):
libgomp: libgomp.fortran/simd7.f90   -O0  execution test
libgomp: libgomp.fortran/task2.f90   -O0  execution test
libgomp: libgomp.fortran/vla2.f90   -O0  execution test
libgomp: libgomp.fortran/vla3.f90   -O3 -fomit-frame-pointer - 
funroll-loops -fpeel-loops -ftracer -finline-functions execution test

libgomp: libgomp.fortran/vla3.f90   -O3 -g  execution test
libgomp: libgomp.fortran/vla4.f90   -O1  execution test
libgomp: libgomp.fortran/vla4.f90   -O2  execution test
libgomp: libgomp.fortran/vla4.f90   -O3 -fomit-frame-pointer - 
funroll-loops -fpeel-loops -ftracer -finline-functions execution test

libgomp: libgomp.fortran/vla4.f90

[PATCH 00/12] aarch64: Extend aarch64_feature_flags to 128 bits

2024-05-14 Thread Andrew Carlotti

The end goal of the series is to change the definition of aarch64_feature_flags
from a uint64_t typedef to a class with 128 bits of storage.  This class uses
operator overloading to mimic the existing integer interface as much as
possible, but with added restrictions to facilate type checking and
extensibility.

Patches 01-10 are preliminary enablement work, and have passed regression
testing.  Are these ok for master?

Patch 11 is an RFC, and the only patch that touches the middle end.  I am
seeking clarity on which part(s) of the compiler should be expected to handle
or prevent non-bool types in instruction pattern conditions.  The actual patch
does not compile by itself (though it does in combination with 12/12), but that
is not important to the questions I'm asking.

Patch 12 is then a small patch that actually replaces the uint64_t typedef with
a class.  I think this patch is fine in it's current form, but it depends on a
resolution to the issues in patch 11/12 first.

[PATCH 02/12] aarch64: Move AARCH64_NUM_ISA_MODES definition

2024-05-14 Thread Andrew Carlotti

AARCH64_NUM_ISA_MODES will be used within aarch64-opts.h in a later
commit.

gcc/ChangeLog:

* config/aarch64/aarch64.h (DEF_AARCH64_ISA_MODE): Move to...
* config/aarch64/aarch64-opts.h (DEF_AARCH64_ISA_MODE): ...here.


diff --git a/gcc/config/aarch64/aarch64-opts.h 
b/gcc/config/aarch64/aarch64-opts.h
index 
a05c0d3ded1c69802f15eebb8c150c7dcc62b4ef..06a4fed3833482543891b4f7c778933f7cebd631
 100644
--- a/gcc/config/aarch64/aarch64-opts.h
+++ b/gcc/config/aarch64/aarch64-opts.h
@@ -24,6 +24,11 @@
 
 #ifndef USED_FOR_TARGET
 typedef uint64_t aarch64_feature_flags;
+
+constexpr unsigned int AARCH64_NUM_ISA_MODES = (0
+#define DEF_AARCH64_ISA_MODE(IDENT) + 1
+#include "aarch64-isa-modes.def"
+);
 #endif
 
 /* The various cores that implement AArch64.  */
diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h
index 
4fa1dfc79065c291ee5c97cc8f641c1f7c9919ec..8eb21cfcfc1e80bef051c571ec7cfae47e3393ed
 100644
--- a/gcc/config/aarch64/aarch64.h
+++ b/gcc/config/aarch64/aarch64.h
@@ -189,11 +189,6 @@ enum class aarch64_feature : unsigned char {
 
 constexpr auto AARCH64_FL_SM_STATE = AARCH64_FL_SM_ON | AARCH64_FL_SM_OFF;
 
-constexpr unsigned int AARCH64_NUM_ISA_MODES = (0
-#define DEF_AARCH64_ISA_MODE(IDENT) + 1
-#include "aarch64-isa-modes.def"
-);
-
 /* The mask of all ISA modes.  */
 constexpr auto AARCH64_FL_ISA_MODES
   = (aarch64_feature_flags (1) << AARCH64_NUM_ISA_MODES) - 1;

[PATCH 01/12] aarch64: Remove unused global aarch64_tune_flags

2024-05-14 Thread Andrew Carlotti

gcc/ChangeLog:

* config/aarch64/aarch64.cc
(aarch64_tune_flags): Remove unused global variable.
(aarch64_override_options_internal): Remove dead assignment.


diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
index 
662ff5a9b0c715d0cab0ae4ba63af1b3c8ebbd00..4e6ad1023f638c9756ee9503b1ecbd3c1573871a
 100644
--- a/gcc/config/aarch64/aarch64.cc
+++ b/gcc/config/aarch64/aarch64.cc
@@ -349,9 +349,6 @@ static bool aarch64_print_address_internal (FILE*, 
machine_mode, rtx,
 /* The processor for which instructions should be scheduled.  */
 enum aarch64_processor aarch64_tune = cortexa53;
 
-/* Mask to specify which instruction scheduling options should be used.  */
-uint64_t aarch64_tune_flags = 0;
-
 /* Global flag for PC relative loads.  */
 bool aarch64_pcrelative_literal_loads;
 
@@ -18237,7 +18234,6 @@ void
 aarch64_override_options_internal (struct gcc_options *opts)
 {
   const struct processor *tune = aarch64_get_tune_cpu (opts->x_selected_tune);
-  aarch64_tune_flags = tune->flags;
   aarch64_tune = tune->sched_core;
   /* Make a copy of the tuning parameters attached to the core, which
  we may later overwrite.  */

[PATCH 04/12] aarch64: Don't compare aarch64_feature_flags to 0.

2024-05-14 Thread Andrew Carlotti

A later commit will disallow such comparisons.  We can instead convert
directly to a boolean value, and make sure all such conversions are
explicit.

TODO: FIX SYSREG GATING.

gcc/ChangeLog:

* config/aarch64/aarch64-sve-builtins.cc
(check_required_extensions): Replace comparison with 0.
(add_overloaded_function): Ditto.
* config/aarch64/aarch64.cc (aarch64_add_offset): Ditto.
(aarch64_guard_switch_pstate_sm): Ditto.
(aarch64_switch_pstate_sm): Ditto.
(aarch64_need_old_pstate_sm): Ditto.
(aarch64_epilogue_uses): Ditto.
(aarch64_update_ipa_fn_target_info): Ditto.
(aarch64_optimize_mode_switching): Ditto.
(aarch64_mode_entry): Ditto.
(aarch64_mode_exit): Ditto.
(aarch64_valid_sysreg_name_p): Ditto.
(aarch64_retrieve_sysreg): Ditto..
* config/aarch64/aarch64.h (TARGET_STREAMING_COMPATIBLE): Ditto.


diff --git a/gcc/config/aarch64/aarch64-sve-builtins.cc 
b/gcc/config/aarch64/aarch64-sve-builtins.cc
index 
d555f350cd79ebed21dab77208b0ce291ab90e79..f033db5b25371d6b20a7c3cc2a4dc5462f8f991a
 100644
--- a/gcc/config/aarch64/aarch64-sve-builtins.cc
+++ b/gcc/config/aarch64/aarch64-sve-builtins.cc
@@ -1125,7 +1125,7 @@ check_required_extensions (location_t location, tree 
fndecl,
   aarch64_feature_flags required_extensions)
 {
   auto missing_extensions = required_extensions & ~aarch64_asm_isa_flags;
-  if (missing_extensions == 0)
+  if (!missing_extensions)
 return check_required_registers (location, fndecl);
 
   if (missing_extensions & AARCH64_FL_SM_OFF)
@@ -1635,8 +1635,8 @@ add_overloaded_function (const function_instance 
&instance,
   tree id = get_identifier (name);
   if (registered_function **map_value = name_map->get (id))
 gcc_assert ((*map_value)->instance == instance
-   && ((*map_value)->required_extensions
-   & ~required_extensions) == 0);
+   && !((*map_value)->required_extensions
+& ~required_extensions));
   else
 {
   registered_function &rfn
diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h
index 
8eb21cfcfc1e80bef051c571ec7cfae47e3393ed..f4ab220271239ce5a750cf211120d5b37d7f8b27
 100644
--- a/gcc/config/aarch64/aarch64.h
+++ b/gcc/config/aarch64/aarch64.h
@@ -275,7 +275,7 @@ constexpr auto AARCH64_FL_DEFAULT_ISA_MODE = 
AARCH64_FL_SM_OFF;
 
 /* The current function has a streaming-compatible body.  */
 #define TARGET_STREAMING_COMPATIBLE \
-  ((aarch64_isa_flags & AARCH64_FL_SM_STATE) == 0)
+  (!(aarch64_isa_flags & AARCH64_FL_SM_STATE))
 
 /* PSTATE.ZA is enabled in the current function body.  */
 #define TARGET_ZA (AARCH64_ISA_ZA_ON)
diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
index 
582dac5129faccee0db3a68f6bdf866e8b41a059..e84151c474029b437ce67eb0cd6fca591a823b82
 100644
--- a/gcc/config/aarch64/aarch64.cc
+++ b/gcc/config/aarch64/aarch64.cc
@@ -4649,7 +4649,7 @@ aarch64_add_offset (scalar_int_mode mode, rtx dest, rtx 
src,
 {
   gcc_assert (offset.coeffs[0] == offset.coeffs[1]);
   rtx offset_rtx;
-  if (force_isa_mode == 0)
+  if (!force_isa_mode)
offset_rtx = gen_int_mode (offset, mode);
   else
offset_rtx = aarch64_sme_vq_immediate (mode, offset.coeffs[0], 0);
@@ -4675,7 +4675,7 @@ aarch64_add_offset (scalar_int_mode mode, rtx dest, rtx 
src,
   && aarch64_sve_addvl_addpl_immediate_p (poly_offset))
 {
   rtx offset_rtx;
-  if (force_isa_mode == 0)
+  if (!force_isa_mode)
offset_rtx = gen_int_mode (poly_offset, mode);
   else
offset_rtx = aarch64_sme_vq_immediate (mode, factor, 0);
@@ -4759,8 +4759,7 @@ aarch64_add_offset (scalar_int_mode mode, rtx dest, rtx 
src,
 a shift and add sequence for the multiplication.
 If CNTB << SHIFT is out of range, stick with the current
 shift factor.  */
- if (force_isa_mode == 0
- && IN_RANGE (low_bit, 2, 16 * 16))
+ if (!force_isa_mode && IN_RANGE (low_bit, 2, 16 * 16))
{
  val = gen_int_mode (poly_int64 (low_bit, low_bit), mode);
  shift = 0;
@@ -4900,7 +4899,7 @@ static rtx_insn *
 aarch64_guard_switch_pstate_sm (rtx old_svcr, aarch64_feature_flags local_mode)
 {
   local_mode &= AARCH64_FL_SM_STATE;
-  gcc_assert (local_mode != 0);
+  gcc_assert (local_mode);
   auto already_ok_cond = (local_mode & AARCH64_FL_SM_ON ? NE : EQ);
   auto *label = gen_label_rtx ();
   auto branch = aarch64_gen_test_and_branch (already_ok_cond, old_svcr, 0,
@@ -4923,7 +4922,7 @@ aarch64_switch_pstate_sm (aarch64_feature_flags old_mode,
   gcc_assert (old_mode != new_mode);
 
   if ((new_mode & AARCH64_FL_SM_ON)
-  || (new_mode == 0 && (old_mode & AARCH64_FL_SM_OFF)))
+  || (!new_mode && (old_mode & AARCH64_FL_SM_OFF)))
 emit_insn (gen_aarch64_smstart_sm ());
   else
 emit_insn (gen_aarch64_smstop_sm ());

[PATCH 03/12] aarch64: Don't use 0 for aarch64_feature_flags

2024-05-14 Thread Andrew Carlotti

Replace all uses of 0 for aarch64_feature_flags variable initialisation
with the (almost) new macro AARCH64_NO_FEATURES.

This is needed because a later commit will disallow casts to
aarch64_feature_flags from integer types.

gcc/ChangeLog:

* common/config/aarch64/aarch64-common.cc
(all_extensions): Use AARCH64_NO_FEATURES.
(all_cores): Ditto.
(all_architectures): Ditto.
(aarch64_get_extension_string_for_isa_flags): Ditto.
* config/aarch64/aarch64-feature-deps.h (get_flags): Ditto.
(get_enable): Ditto.
(get_flags_off): Ditto.
* config/aarch64/aarch64-opts.h (AARCH64_NO_FEATURES): Define.
* config/aarch64/aarch64-protos.h: Use AARCH64_NO_FEATURES.
* config/aarch64/aarch64-sve-builtins-sme.def
(REQUIRED_EXTENSIONS): Ditto.
* config/aarch64/aarch64-sve-builtins.cc
(function_groups): Ditto.
* config/aarch64/aarch64-sve-builtins.h:
(get_contiguous_base): Ditto.
(sve_switcher): Ditto.
* config/aarch64/aarch64.cc (all_architectures): Ditto.
(all_cores): Ditto.
(AARCH64_NO_FEATURES): Remove superceded #define and #undef.
(aarch64_override_options): Use AARCH64_NO_FEATURES.
(aarch64_process_target_attr): Remove dead initialisation.
* config/aarch64/driver-aarch64.cc
(aarch64_cpu_data): Use AARCH64_NO_FEATURES.
(aarch64_arches): Ditto.
(host_detect_local_cpu): Ditto.


diff --git a/gcc/common/config/aarch64/aarch64-common.cc 
b/gcc/common/config/aarch64/aarch64-common.cc
index 
951d041d3109b935e90a7cb5d714940414e81761..162b622564ab543cadfc24a7341f1fc476733f45
 100644
--- a/gcc/common/config/aarch64/aarch64-common.cc
+++ b/gcc/common/config/aarch64/aarch64-common.cc
@@ -158,7 +158,8 @@ static constexpr aarch64_option_extension all_extensions[] =
   {NAME, AARCH64_FL_##IDENT, feature_deps::IDENT ().explicit_on, \
feature_deps::get_flags_off (feature_deps::root_off_##IDENT)},
 #include "config/aarch64/aarch64-option-extensions.def"
-  {NULL, 0, 0, 0}
+  {NULL, AARCH64_NO_FEATURES, AARCH64_NO_FEATURES,
+AARCH64_NO_FEATURES}
 };
 
 struct processor_name_to_arch
@@ -183,7 +184,7 @@ static constexpr processor_name_to_arch all_cores[] =
   {NAME, AARCH64_ARCH_##ARCH_IDENT, feature_deps::cpu_##CORE_IDENT},
 #include "config/aarch64/aarch64-cores.def"
   {"generic", AARCH64_ARCH_V8A, feature_deps::V8A ().enable},
-  {"", aarch64_no_arch, 0}
+  {"", aarch64_no_arch, AARCH64_NO_FEATURES}
 };
 
 /* Map architecture revisions to their string representation.  */
@@ -192,7 +193,7 @@ static constexpr arch_to_arch_name all_architectures[] =
 #define AARCH64_ARCH(NAME, B, ARCH_IDENT, D, E)\
   {AARCH64_ARCH_##ARCH_IDENT, NAME, feature_deps::ARCH_IDENT ().enable},
 #include "config/aarch64/aarch64-arches.def"
-  {aarch64_no_arch, "", 0}
+  {aarch64_no_arch, "", AARCH64_NO_FEATURES}
 };
 
 /* Parse the architecture extension string STR and update ISA_FLAGS
@@ -299,14 +300,14 @@ aarch64_get_extension_string_for_isa_flags
  However, assemblers with Armv8-R AArch64 support should not have this
  issue, so we don't need this fix when targeting Armv8-R.  */
   auto explicit_flags = (!(current_flags & AARCH64_FL_V8R)
-? AARCH64_FL_CRC : 0);
+? AARCH64_FL_CRC : AARCH64_NO_FEATURES);
 
   /* Add the features in isa_flags & ~current_flags using the smallest
  possible number of extensions.  We can do this by iterating over the
  array in reverse order, since the array is sorted topologically.
  But in order to make the output more readable, it seems better
  to add the strings in definition order.  */
-  aarch64_feature_flags added = 0;
+  aarch64_feature_flags added = AARCH64_NO_FEATURES;
   auto flags_crypto = AARCH64_FL_AES | AARCH64_FL_SHA2;
   for (unsigned int i = ARRAY_SIZE (all_extensions); i-- > 0; )
 {
diff --git a/gcc/config/aarch64/aarch64-feature-deps.h 
b/gcc/config/aarch64/aarch64-feature-deps.h
index 
79126db88254b89f74a8583d50a77bc27865e265..992e133d76935d411ce4cd39480c07ea18c62ddf
 100644
--- a/gcc/config/aarch64/aarch64-feature-deps.h
+++ b/gcc/config/aarch64/aarch64-feature-deps.h
@@ -26,7 +26,7 @@ namespace feature_deps {
 /* Together, these definitions of get_flags take a list of
feature names (representing functions that are defined below)
and return the set of associated flags.  */
-constexpr aarch64_feature_flags get_flags () { return 0; }
+constexpr aarch64_feature_flags get_flags () { return AARCH64_NO_FEATURES; }
 
 template
 constexpr aarch64_feature_flags
@@ -37,7 +37,7 @@ get_flags (T1 i, Ts... args)
 
 /* Like get_flags, but return the transitive closure of those features
and the ones that they rely on.  */
-constexpr aarch64_feature_flags get_enable () { return 0; }
+constexpr aarch64_feature_flags get_enable () { return AARCH64_NO_FEATURES; }
 
 template
 constexpr aarch64_feature_flags
@@ -97,9 +97,10 @@

[PATCH 05/12] aarch64: Eliminate a temporary variable.

2024-05-14 Thread Andrew Carlotti

The name would become misleading in a later commit anyway, and I think
this is marginally more readable.

gcc/ChangeLog:

* config/aarch64/aarch64.cc
(aarch64_override_options): Remove temporary variable.


diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
index 
e84151c474029b437ce67eb0cd6fca591a823b82..7b4e625190018dc3f16ef45c6eaf8fd3af10c784
 100644
--- a/gcc/config/aarch64/aarch64.cc
+++ b/gcc/config/aarch64/aarch64.cc
@@ -18817,7 +18817,6 @@ aarch64_override_options (void)
   SUBTARGET_OVERRIDE_OPTIONS;
 #endif
 
-  auto isa_mode = AARCH64_FL_DEFAULT_ISA_MODE;
   if (cpu && arch)
 {
   /* If both -mcpu and -march are specified, warn if they are not
@@ -18840,25 +18839,25 @@ aarch64_override_options (void)
}
 
   selected_arch = arch->arch;
-  aarch64_set_asm_isa_flags (arch_isa | isa_mode);
+  aarch64_set_asm_isa_flags (arch_isa | AARCH64_FL_DEFAULT_ISA_MODE);
 }
   else if (cpu)
 {
   selected_arch = cpu->arch;
-  aarch64_set_asm_isa_flags (cpu_isa | isa_mode);
+  aarch64_set_asm_isa_flags (cpu_isa | AARCH64_FL_DEFAULT_ISA_MODE);
 }
   else if (arch)
 {
   cpu = &all_cores[arch->ident];
   selected_arch = arch->arch;
-  aarch64_set_asm_isa_flags (arch_isa | isa_mode);
+  aarch64_set_asm_isa_flags (arch_isa | AARCH64_FL_DEFAULT_ISA_MODE);
 }
   else
 {
   /* No -mcpu or -march specified, so use the default CPU.  */
   cpu = &all_cores[TARGET_CPU_DEFAULT];
   selected_arch = cpu->arch;
-  aarch64_set_asm_isa_flags (cpu->flags | isa_mode);
+  aarch64_set_asm_isa_flags (cpu->flags | AARCH64_FL_DEFAULT_ISA_MODE);
 }
 
   selected_tune = tune ? tune->ident : cpu->ident;

[PATCH 06/12] aarch64: Introduce aarch64_isa_mode type

2024-05-14 Thread Andrew Carlotti

Currently there are many places where an aarch64_feature_flags variable
is used, but only the bottom three isa mode bits are set and read.
Using a separate data type for these value makes it more clear that
they're not expected or required to have any of their upper feature bits
set.  It will also make things simpler and more efficient when we extend
aarch64_feature_flags to 128 bits.

This patch uses explicit casts whenever converting from an
aarch64_feature_flags value to an aarch64_isa_mode value.  This isn't
strictly necessary, but serves to highlight the locations where an
explicit conversion will become necessary later.

gcc/ChangeLog:

* config/aarch64/aarch64-opts.h: Add aarch64_isa_mode typedef.
* config/aarch64/aarch64-protos.h
(aarch64_gen_callee_cookie): Use aarch64_isa_mode parameter.
(aarch64_sme_vq_immediate): Ditto.
* config/aarch64/aarch64.cc
(aarch64_fntype_pstate_sm): Use aarch64_isa_mode values.
(aarch64_fntype_pstate_za): Ditto.
(aarch64_fndecl_pstate_sm): Ditto.
(aarch64_fndecl_pstate_za): Ditto.
(aarch64_fndecl_isa_mode): Ditto.
(aarch64_cfun_incoming_pstate_sm): Ditto.
(aarch64_cfun_enables_pstate_sm): Ditto.
(aarch64_call_switches_pstate_sm): Ditto.
(aarch64_gen_callee_cookie): Ditto.
(aarch64_callee_isa_mode): Ditto.
(aarch64_insn_callee_abi): Ditto.
(aarch64_sme_vq_immediate): Ditto.
(aarch64_add_offset_temporaries): Ditto.
(aarch64_add_offset): Ditto.
(aarch64_add_sp): Ditto.
(aarch64_sub_sp): Ditto.
(aarch64_guard_switch_pstate_sm): Ditto.
(aarch64_switch_pstate_sm): Ditto.
(aarch64_init_cumulative_args): Ditto.
(aarch64_allocate_and_probe_stack_space): Ditto.
(aarch64_expand_prologue): Ditto.
(aarch64_expand_epilogue): Ditto.
(aarch64_start_call_args): Ditto.
(aarch64_expand_call): Ditto.
(aarch64_end_call_args): Ditto.
(aarch64_set_current_function): Ditto, with added conversions.
(aarch64_handle_attr_arch): Avoid macro with changed type.
(aarch64_handle_attr_cpu): Ditto.
(aarch64_handle_attr_isa_flags): Ditto.
(aarch64_switch_pstate_sm_for_landing_pad):
Use arch64_isa_mode values.
(aarch64_switch_pstate_sm_for_jump): Ditto.
(pass_switch_pstate_sm::gate): Ditto.
* config/aarch64/aarch64.h
(AARCH64_ISA_MODE_{SM_ON|SM_OFF|ZA_ON}): New macros.
(AARCH64_FL_SM_STATE): Mark as possibly unused.
(AARCH64_ISA_MODE_SM_STATE): New aarch64_isa_mode mask.
(AARCH64_DEFAULT_ISA_MODE): New aarch64_isa_mode value.
(AARCH64_FL_DEFAULT_ISA_MODE): Define using above value.
(AARCH64_ISA_MODE): Change type to aarch64_isa_mode.
(arm_pcs): Use aarch64_isa_mode value.


diff --git a/gcc/config/aarch64/aarch64-opts.h 
b/gcc/config/aarch64/aarch64-opts.h
index 
376d7b5ad25e8838bc83fd9ab1c6f09c6de10835..c2d68716857b49db8f9c1393f11b3377f51fb60c
 100644
--- a/gcc/config/aarch64/aarch64-opts.h
+++ b/gcc/config/aarch64/aarch64-opts.h
@@ -23,6 +23,8 @@
 #define GCC_AARCH64_OPTS_H
 
 #ifndef USED_FOR_TARGET
+typedef uint64_t aarch64_isa_mode;
+
 typedef uint64_t aarch64_feature_flags;
 
 constexpr unsigned int AARCH64_NUM_ISA_MODES = (0
diff --git a/gcc/config/aarch64/aarch64-protos.h 
b/gcc/config/aarch64/aarch64-protos.h
index 
4b1fefdd53843e97d3249bfb4d9fed2ffe60f865..585beee44d51275545775420905e7c7b37e2ce5c
 100644
--- a/gcc/config/aarch64/aarch64-protos.h
+++ b/gcc/config/aarch64/aarch64-protos.h
@@ -768,7 +768,7 @@ bool aarch64_constant_address_p (rtx);
 bool aarch64_emit_approx_div (rtx, rtx, rtx);
 bool aarch64_emit_approx_sqrt (rtx, rtx, bool);
 tree aarch64_vector_load_decl (tree);
-rtx aarch64_gen_callee_cookie (aarch64_feature_flags, arm_pcs);
+rtx aarch64_gen_callee_cookie (aarch64_isa_mode, arm_pcs);
 void aarch64_expand_call (rtx, rtx, rtx, bool);
 bool aarch64_expand_cpymem_mops (rtx *, bool);
 bool aarch64_expand_cpymem (rtx *, bool);
@@ -809,7 +809,7 @@ int aarch64_add_offset_temporaries (rtx);
 void aarch64_split_add_offset (scalar_int_mode, rtx, rtx, rtx, rtx, rtx);
 bool aarch64_rdsvl_immediate_p (const_rtx);
 rtx aarch64_sme_vq_immediate (machine_mode mode, HOST_WIDE_INT,
- aarch64_feature_flags);
+ aarch64_isa_mode);
 char *aarch64_output_rdsvl (const_rtx);
 bool aarch64_addsvl_addspl_immediate_p (const_rtx);
 char *aarch64_output_addsvl_addspl (rtx);
diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h
index 
f4ab220271239ce5a750cf211120d5b37d7f8b27..773cc12d5a88f774ab78af8a9099312335c19513
 100644
--- a/gcc/config/aarch64/aarch64.h
+++ b/gcc/config/aarch64/aarch64.h
@@ -187,7 +187,17 @@ enum class aarch64_feature : unsigned char {
 #include "aarch64-arches.def"
 #undef HANDLE
 
-constexpr auto AARCH64_FL_SM_STATE = AARCH64_FL_SM_ON | AARCH64_FL_SM_O

[PATCH 07/12] aarch64: Define aarch64_get_{asm_|}isa_flags

2024-05-14 Thread Andrew Carlotti

Building an aarch64_feature_flags value from data within a gcc_options
or cl_target_option struct will get more complicated in a later commit.
Use a macro to avoid doing this manually in more than one location.

gcc/ChangeLog:

* common/config/aarch64/aarch64-common.cc
(aarch64_handle_option): Use new macro.
* config/aarch64/aarch64.cc
(aarch64_override_options_internal): Ditto.
(aarch64_option_print): Ditto.
(aarch64_set_current_function): Ditto.
(aarch64_can_inline_p): Ditto.
(aarch64_declare_function_name): Ditto.
(aarch64_start_file): Ditto.
* config/aarch64/aarch64.h (aarch64_get_asm_isa_flags): New
(aarch64_get_isa_flags): New.
(aarch64_asm_isa_flags): Use new macro.
(aarch64_isa_flags): Ditto.


diff --git a/gcc/common/config/aarch64/aarch64-common.cc 
b/gcc/common/config/aarch64/aarch64-common.cc
index 
162b622564ab543cadfc24a7341f1fc476733f45..e08a0fc86590b35a595a305599dfb919f83d6906
 100644
--- a/gcc/common/config/aarch64/aarch64-common.cc
+++ b/gcc/common/config/aarch64/aarch64-common.cc
@@ -111,7 +111,7 @@ aarch64_handle_option (struct gcc_options *opts,
 
 case OPT_mgeneral_regs_only:
   opts->x_target_flags |= MASK_GENERAL_REGS_ONLY;
-  aarch64_set_asm_isa_flags (opts, opts->x_aarch64_asm_isa_flags);
+  aarch64_set_asm_isa_flags (opts, aarch64_get_asm_isa_flags (opts));
   return true;
 
 case OPT_mfix_cortex_a53_835769:
diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h
index 
773cc12d5a88f774ab78af8a9099312335c19513..49bdc7565cd5ca80fbe2d4abf30aae12841c340f
 100644
--- a/gcc/config/aarch64/aarch64.h
+++ b/gcc/config/aarch64/aarch64.h
@@ -22,15 +22,18 @@
 #ifndef GCC_AARCH64_H
 #define GCC_AARCH64_H
 
+#define aarch64_get_asm_isa_flags(opts) \
+  (aarch64_feature_flags ((opts)->x_aarch64_asm_isa_flags))
+#define aarch64_get_isa_flags(opts) \
+  (aarch64_feature_flags ((opts)->x_aarch64_isa_flags))
+
 /* Make these flags read-only so that all uses go via
aarch64_set_asm_isa_flags.  */
 #ifndef GENERATOR_FILE
 #undef aarch64_asm_isa_flags
-#define aarch64_asm_isa_flags \
-  ((aarch64_feature_flags) global_options.x_aarch64_asm_isa_flags)
+#define aarch64_asm_isa_flags (aarch64_get_asm_isa_flags (&global_options))
 #undef aarch64_isa_flags
-#define aarch64_isa_flags \
-  ((aarch64_feature_flags) global_options.x_aarch64_isa_flags)
+#define aarch64_isa_flags (aarch64_get_isa_flags (&global_options))
 #endif
 
 /* Target CPU builtins.  */
diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
index 
b6300fc24c0d674edbb0df8e2d10121f2d39e7d6..eef0905069232bacc59d574cad0f6edbaf062387
 100644
--- a/gcc/config/aarch64/aarch64.cc
+++ b/gcc/config/aarch64/aarch64.cc
@@ -18292,10 +18292,11 @@ aarch64_override_options_internal (struct gcc_options 
*opts)
   && !fixed_regs[R18_REGNUM])
 error ("%<-fsanitize=shadow-call-stack%> requires %<-ffixed-x18%>");
 
-  if ((opts->x_aarch64_isa_flags & (AARCH64_FL_SM_ON | AARCH64_FL_ZA_ON))
-  && !(opts->x_aarch64_isa_flags & AARCH64_FL_SME))
+  aarch64_feature_flags isa_flags = aarch64_get_isa_flags (opts);
+  if ((isa_flags & (AARCH64_FL_SM_ON | AARCH64_FL_ZA_ON))
+  && !(isa_flags & AARCH64_FL_SME))
 {
-  if (opts->x_aarch64_isa_flags & AARCH64_FL_SM_ON)
+  if (isa_flags & AARCH64_FL_SM_ON)
error ("streaming functions require the ISA extension %qs", "sme");
   else
error ("functions with SME state require the ISA extension %qs",
@@ -18304,8 +18305,7 @@ aarch64_override_options_internal (struct gcc_options 
*opts)
  " option %<-march%>, or by using the %"
  " attribute or pragma", "sme");
   opts->x_target_flags &= ~MASK_GENERAL_REGS_ONLY;
-  auto new_flags = (opts->x_aarch64_asm_isa_flags
-   | feature_deps::SME ().enable);
+  auto new_flags = isa_flags | feature_deps::SME ().enable;
   aarch64_set_asm_isa_flags (opts, new_flags);
 }
 
@@ -18999,9 +18999,9 @@ aarch64_option_print (FILE *file, int indent, struct 
cl_target_option *ptr)
   const struct processor *cpu
 = aarch64_get_tune_cpu (ptr->x_selected_tune);
   const struct processor *arch = aarch64_get_arch (ptr->x_selected_arch);
+  aarch64_feature_flags isa_flags = aarch64_get_asm_isa_flags(ptr);
   std::string extension
-= aarch64_get_extension_string_for_isa_flags (ptr->x_aarch64_asm_isa_flags,
- arch->flags);
+= aarch64_get_extension_string_for_isa_flags (isa_flags, arch->flags);
 
   fprintf (file, "%*sselected tune = %s\n", indent, "", cpu->name);
   fprintf (file, "%*sselected arch = %s%s\n", indent, "",
@@ -19061,7 +19061,7 @@ aarch64_set_current_function (tree fndecl)
   auto new_isa_mode = (fndecl
   ? aarch64_fndecl_isa_mode (fndecl)
   : AARCH64_DEFAULT_ISA_MODE);
-  auto isa_flags = TREE_TARGET_OPTION (new_tree)->x_a

Re: [RFC][PATCH] PR tree-optimization/109071 - -Warray-bounds false positive warnings due to code duplication from jump threading

2024-05-14 Thread Qing Zhao



> On May 13, 2024, at 20:14, Kees Cook  wrote:
> 
> On Tue, May 14, 2024 at 01:38:49AM +0200, Andrew Pinski wrote:
>> On Mon, May 13, 2024, 11:41 PM Kees Cook  wrote:
>>> But it makes no sense to warn about:
>>> 
>>> void sparx5_set (int * ptr, struct nums * sg, int index)
>>> {
>>>   if (index >= 4)
>>> warn ();
>>>   *ptr = 0;
>>>   *val = sg->vals[index];
>>>   if (index >= 4)
>>> warn ();
>>>   *ptr = *val;
>>> }
>>> 
>>> Because at "*val = sg->vals[index];" the actual value range tracking for
>>> index is _still_ [INT_MIN,INT_MAX]. (Only within the "then" side of the
>>> "if" statements is the range tracking [4,INT_MAX].)
>>> 
>>> However, in the case where jump threading has split the execution flow
>>> and produced a copy of "*val = sg->vals[index];" where the value range
>>> tracking for "index" is now [4,INT_MAX], is the warning valid. But it
>>> is only for that instance. Reporting it for effectively both (there is
>>> only 1 source line for the array indexing) is misleading because there
>>> is nothing the user can do about it -- the compiler created the copy and
>>> then noticed it had a range it could apply to that array index.
>>> 
>> 
>> "there is nothing the user can do about it" is very much false. They could
>> change warn call into a noreturn function call instead.  (In the case of
>> the Linux kernel panic). There are things the user can do to fix the
>> warning and even get better code generation out of the compilers.
> 
> This isn't about warn() not being noreturn. The warn() could be any
> function call; the jump threading still happens.

When the program is executed on the “if (index > = 4)” path,  the value of 
“index” is definitely
>= 4, when sg->vals[index] is referenced on this path (the case when the 
>routine “warn” is NOT noreturn), it’s
definitely an out-of-bounds array access.  So, the compiler’s warning is 
correct. And this warning does catch 
a potential issue in the source code that need to be fixed by either of the 
following two solutions:

   1. Make the routine “warn” as noreturn and mark it noreturn;
Or
   2. On the path “if (index >= 4)”, make the value of “index” in the bound of 
the array. 

With either of the above source code changes, we can fix this potential 
out-of-bound array access bug in the source code.

Qing
> 
> GCC is warning about a compiler-constructed situation that cannot be
> reliably fixed on the source side (GCC emitting the warning is highly
> unstable in these cases), since the condition is not *always* true for
> the given line of code. If it is not useful to warn for "array[index]"
> being out of range when "index" is always [INT_MIN,INT_MAX], then it
> is not useful to warn when "index" MAY be [INT_MIN,INT_MAX] for a given
> line of code.
> 
> -Kees
> 
> -- 
> Kees Cook

[PATCH 08/12] aarch64: Decouple feature flag option storage type

2024-05-14 Thread Andrew Carlotti

The awk scripts that process the .opt files are relatively fragile and
only handle a limited set of data types correctly.  The unrecognised
aarch64_feature_flags type is handled as a uint64_t, which happens to be
correct for now.  However, that assumption will change when we extend
the mask to 128 bits.

This patch changes the option members to use uint64_t types, and adds a
"_0" suffix to the names (both for future extensibility, and to allow
the original name to be used for the full aarch64_feature_flags mask
within generator files).

gcc/ChangeLog:

* common/config/aarch64/aarch64-common.cc
(aarch64_set_asm_isa_flags): Reorder, and add suffix to names.
* config/aarch64/aarch64.h
(aarch64_get_asm_isa_flags): Add "_0" suffix.
(aarch64_get_isa_flags): Ditto.
(aarch64_asm_isa_flags): Redefine using renamed uint64_t value.
(aarch64_isa_flags): Ditto.
* config/aarch64/aarch64.opt:
(aarch64_asm_isa_flags): Rename to...
(aarch64_asm_isa_flags_0): ...this, and change to uint64_t.
(aarch64_isa_flags): Rename to...
(aarch64_isa_flags_0): ...this, and change to uint64_t.


diff --git a/gcc/common/config/aarch64/aarch64-common.cc 
b/gcc/common/config/aarch64/aarch64-common.cc
index 
e08a0fc86590b35a595a305599dfb919f83d6906..2f437b82a24c16d9f808a4367ce2a281a49a77ee
 100644
--- a/gcc/common/config/aarch64/aarch64-common.cc
+++ b/gcc/common/config/aarch64/aarch64-common.cc
@@ -66,15 +66,16 @@ static const struct default_options 
aarch_option_optimization_table[] =
 { OPT_LEVELS_NONE, 0, NULL, 0 }
   };
 
-/* Set OPTS->x_aarch64_asm_isa_flags to FLAGS and update
-   OPTS->x_aarch64_isa_flags accordingly.  */
+
+/* Set OPTS->x_aarch64_asm_isa_flags_0 to FLAGS and update
+   OPTS->x_aarch64_isa_flags_0 accordingly.  */
 void
 aarch64_set_asm_isa_flags (gcc_options *opts, aarch64_feature_flags flags)
 {
-  opts->x_aarch64_asm_isa_flags = flags;
-  opts->x_aarch64_isa_flags = flags;
+  opts->x_aarch64_asm_isa_flags_0 = flags;
   if (opts->x_target_flags & MASK_GENERAL_REGS_ONLY)
-opts->x_aarch64_isa_flags &= ~feature_deps::get_flags_off (AARCH64_FL_FP);
+flags &= ~feature_deps::get_flags_off (AARCH64_FL_FP);
+  opts->x_aarch64_isa_flags_0 = flags;
 }
 
 /* Implement TARGET_HANDLE_OPTION.
diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h
index 
49bdc7565cd5ca80fbe2d4abf30aae12841c340f..af256c581aedc04e4194ac0158380fcdb8b65594
 100644
--- a/gcc/config/aarch64/aarch64.h
+++ b/gcc/config/aarch64/aarch64.h
@@ -23,13 +23,18 @@
 #define GCC_AARCH64_H
 
 #define aarch64_get_asm_isa_flags(opts) \
-  (aarch64_feature_flags ((opts)->x_aarch64_asm_isa_flags))
+  (aarch64_feature_flags ((opts)->x_aarch64_asm_isa_flags_0))
 #define aarch64_get_isa_flags(opts) \
-  (aarch64_feature_flags ((opts)->x_aarch64_isa_flags))
+  (aarch64_feature_flags ((opts)->x_aarch64_isa_flags_0))
 
 /* Make these flags read-only so that all uses go via
aarch64_set_asm_isa_flags.  */
-#ifndef GENERATOR_FILE
+#ifdef GENERATOR_FILE
+#undef aarch64_asm_isa_flags
+#define aarch64_asm_isa_flags (aarch64_feature_flags (aarch64_asm_isa_flags_0))
+#undef aarch64_isa_flags
+#define aarch64_isa_flags (aarch64_feature_flags (aarch64_isa_flags_0))
+#else
 #undef aarch64_asm_isa_flags
 #define aarch64_asm_isa_flags (aarch64_get_asm_isa_flags (&global_options))
 #undef aarch64_isa_flags
diff --git a/gcc/config/aarch64/aarch64.opt b/gcc/config/aarch64/aarch64.opt
index 
6356c419399bd324929cd599e5a4b926b0383469..45aab49de27bdfa0fb3f67ec06c7dcf0ac242fb3
 100644
--- a/gcc/config/aarch64/aarch64.opt
+++ b/gcc/config/aarch64/aarch64.opt
@@ -31,10 +31,10 @@ TargetVariable
 enum aarch64_arch selected_arch = aarch64_no_arch
 
 TargetVariable
-aarch64_feature_flags aarch64_asm_isa_flags = 0
+uint64_t aarch64_asm_isa_flags_0 = 0
 
 TargetVariable
-aarch64_feature_flags aarch64_isa_flags = 0
+uint64_t aarch64_isa_flags_0 = 0
 
 TargetVariable
 unsigned aarch_enable_bti = 2

[PATCH 10/12] aarch64: Add aarch64_feature_flags_from_index macro

2024-05-14 Thread Andrew Carlotti

When aarch64_feature_flags grows to 128 bits, constructing a mask with a
specific indexed value set will become more complicated.  Extract this
operation into a separate macro, and preemptively annotate the feature
masks as possibly unused.

gcc/ChangeLog:

* config/aarch64/aarch64-opts.h
(aarch64_feature_flags_from_index): New macro.
* config/aarch64/aarch64.h
(AARCH64_FL_##IDENT): Mark as maybe unused, and use new macro.


diff --git a/gcc/config/aarch64/aarch64-opts.h 
b/gcc/config/aarch64/aarch64-opts.h
index 
c2d68716857b49db8f9c1393f11b3377f51fb60c..80926a008aa2ed7dffa79aaa425dd3d7fc9d2581
 100644
--- a/gcc/config/aarch64/aarch64-opts.h
+++ b/gcc/config/aarch64/aarch64-opts.h
@@ -32,6 +32,9 @@ constexpr unsigned int AARCH64_NUM_ISA_MODES = (0
 #include "aarch64-isa-modes.def"
 );
 
+#define aarch64_feature_flags_from_index(index) \
+  (aarch64_feature_flags (uint64_t (1) << index))
+
 #define AARCH64_NO_FEATURES aarch64_feature_flags (0)
 #endif
 
diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h
index 
af256c581aedc04e4194ac0158380fcdb8b65594..dd3437214e1597f03ac947a09c124ea0b04e27e8
 100644
--- a/gcc/config/aarch64/aarch64.h
+++ b/gcc/config/aarch64/aarch64.h
@@ -185,8 +185,8 @@ enum class aarch64_feature : unsigned char {
 
 /* Define unique flags for each of the above.  */
 #define HANDLE(IDENT) \
-  constexpr auto AARCH64_FL_##IDENT \
-= aarch64_feature_flags (1) << int (aarch64_feature::IDENT);
+  constexpr auto AARCH64_FL_##IDENT ATTRIBUTE_UNUSED \
+= aarch64_feature_flags_from_index (int (aarch64_feature::IDENT));
 #define DEF_AARCH64_ISA_MODE(IDENT) HANDLE (IDENT)
 #define AARCH64_OPT_EXTENSION(A, IDENT, C, D, E, F) HANDLE (IDENT)
 #define AARCH64_ARCH(A, B, IDENT, D, E) HANDLE (IDENT)

[RFC 11/12] Add explicit bool casts to .md condition users

2024-05-14 Thread Andrew Carlotti

This patch is one way to fix some issues I discovered when disallowing
implicit casts to bool from aarch64_feature_flags (in a later patch).
That in turn was necessary to prohibit accidental conversion of an
aarch64_feature_flags value to an integer by first implicitly casting to
a bool (and thus setting the resulting integer value to 0 or 1).

Most of the uses of TARGET_ macros occur indirectly in middle end code,
via their use in instruction pattern conditions.  There are also a few
uses in aarch64 backend code, which are also changed in this patch.

The documentation on instruction patterns [1] doesn't explicitly say
that the condition must be a bool.  If we want to assume this, I think
we should update the documentation, and ideally enforce type consistency
within the compiler.

The code generated in genconditions.cc by write_one_condition already
includes an assumption that casting a condition's value to an int is
valid (i.e. that it does not invoke undefined behaviour, and does not
change the result obtained when later converting it to a boolean
result).  Fortunately, for aarch64 at least, this assumption only needs
to hold when the original constant is a compile time constant, whereas
all our problematic usage involves comparisons against the runtime
feature mask.

If the use of non-bool instruction pattern conditions should be
disallowed, then it would be straightforward to fix the type mismatches
in the aarch64 backend, by adding explicit bool casts to all of the
TARGET_* macros.  Indeed, I think that would be a better approach to
fixing this issue.  However, I felt it would be more useful to first
investigate and demonstrate the downstream impact of these type issues.

Note that this patch doesn't compile without the subsequent patch,
due to ambiguous calls to aarch64_def_or_undef(int, ...).  I expect to
replace this patch with one that avoids the issue, so it isn't worth
meddling with the next patch in the series just to make this RFC compile
by itself.

[1] https://gcc.gnu.org/onlinedocs/gccint/Patterns.html


diff --git a/gcc/c-family/c-cppbuiltin.cc b/gcc/c-family/c-cppbuiltin.cc
index 
b6f25e4db3c06a1addc09a47335fe5184cb4a100..0cfcac6ba6b1e0ae7cdc0fb864eb28ec7de78605
 100644
--- a/gcc/c-family/c-cppbuiltin.cc
+++ b/gcc/c-family/c-cppbuiltin.cc
@@ -1506,7 +1506,7 @@ c_cpp_builtins (cpp_reader *pfile)
 
 #ifdef HAVE_adddf3
  builtin_define_with_int_value ("__LIBGCC_HAVE_HWDBL__",
-HAVE_adddf3);
+(bool) HAVE_adddf3);
 #endif
}
 
diff --git a/gcc/config/aarch64/aarch64-c.cc b/gcc/config/aarch64/aarch64-c.cc
index 
fe1a20e4e546a68e5f7eddff3bbb0d3e831fbd9b..de4b383cda92c160bd706f9085999daac5d8313a
 100644
--- a/gcc/config/aarch64/aarch64-c.cc
+++ b/gcc/config/aarch64/aarch64-c.cc
@@ -47,6 +47,12 @@ aarch64_def_or_undef (bool def_p, const char *macro, 
cpp_reader *pfile)
 cpp_undef (pfile, macro);
 }
 
+static void
+aarch64_def_or_undef (aarch64_feature_flags def_p, const char *macro, 
cpp_reader *pfile)
+{
+  aarch64_def_or_undef ((bool) def_p, macro, pfile);
+}
+
 /* Define the macros that we always expect to have on AArch64.  */
 
 static void
diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
index 
69c3b257982b4a0e282cbf7486802b147d166945..052cf297e7672abf015a085ab357836cb3b235e4
 100644
--- a/gcc/config/aarch64/aarch64.cc
+++ b/gcc/config/aarch64/aarch64.cc
@@ -6561,10 +6561,10 @@ aarch64_function_value_regno_p (const unsigned int 
regno)
   /* Up to four fp/simd registers can return a function value, e.g. a
  homogeneous floating-point aggregate having four members.  */
   if (regno >= V0_REGNUM && regno < V0_REGNUM + HA_MAX_NUM_FLDS)
-return TARGET_FLOAT;
+return (bool) TARGET_FLOAT;
 
   if (regno >= P0_REGNUM && regno < P0_REGNUM + HA_MAX_NUM_FLDS)
-return TARGET_SVE;
+return (bool) TARGET_SVE;
 
   return false;
 }
diff --git a/gcc/genconditions.cc b/gcc/genconditions.cc
index 
13963dc3ff46aa250c39ce80d0b92356390e41ff..3aee4428ff7ff5c97260f56a5f6b0fffa4e95fc2
 100644
--- a/gcc/genconditions.cc
+++ b/gcc/genconditions.cc
@@ -140,9 +140,9 @@ write_one_condition (void **slot, void * ARG_UNUSED (dummy))
   putchar (*p);
 }
 
-  fputs ("\",\n__builtin_constant_p ", stdout);
+  fputs ("\",\n__builtin_constant_p ((bool)", stdout);
   rtx_reader_ptr->print_c_condition (test->expr);
-  fputs ("\n? (int) ", stdout);
+  fputs (")\n? (int) (bool)", stdout);
   rtx_reader_ptr->print_c_condition (test->expr);
   fputs ("\n: -1 },\n", stdout);
   return 1;
diff --git a/gcc/genopinit.cc b/gcc/genopinit.cc
index 
d8682b2a9ad56a0a62b4407741c695489c72795b..0d9cf0de8b93da5884a352858b343f81644f9d3f
 100644
--- a/gcc/genopinit.cc
+++ b/gcc/genopinit.cc
@@ -386,7 +386,7 @@ main (int argc, const char **argv)
  unsigned end = MIN (patterns.length (),
  (i + 1) * patterns_per_function);
  for (j = sta

[PATCH 09/12] aarch64: Assign flags to local constexpr variable

2024-05-14 Thread Andrew Carlotti

This guarantees that the constant values are actually evaluated at
compile time.

In previous testing, I have observed GCC failing to evaluate and inline
these constant values, which exposed a separate bug in which some of the
required symbols from feature_deps were missing.  Richard Sandiford has
since fixed that bug, but we still want to ensure we get the benefits of
compile-time evaluation here.

gcc/ChangeLog:

* common/config/aarch64/aarch64-common.cc
(aarch64_set_asm_isa_flags): Make constant explicitly constexpr.
* config/aarch64/aarch64.cc
(aarch64_override_options_internal): Ditto.


diff --git a/gcc/common/config/aarch64/aarch64-common.cc 
b/gcc/common/config/aarch64/aarch64-common.cc
index 
2f437b82a24c16d9f808a4367ce2a281a49a77ee..9f583bb80456709e0028c358a1bad23ad59f20f4
 100644
--- a/gcc/common/config/aarch64/aarch64-common.cc
+++ b/gcc/common/config/aarch64/aarch64-common.cc
@@ -74,7 +74,10 @@ aarch64_set_asm_isa_flags (gcc_options *opts, 
aarch64_feature_flags flags)
 {
   opts->x_aarch64_asm_isa_flags_0 = flags;
   if (opts->x_target_flags & MASK_GENERAL_REGS_ONLY)
-flags &= ~feature_deps::get_flags_off (AARCH64_FL_FP);
+{
+  constexpr auto flags_mask = ~feature_deps::get_flags_off (AARCH64_FL_FP);
+  flags &= flags_mask;
+}
   opts->x_aarch64_isa_flags_0 = flags;
 }
 
diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
index 
eef0905069232bacc59d574cad0f6edbaf062387..69c3b257982b4a0e282cbf7486802b147d166945
 100644
--- a/gcc/config/aarch64/aarch64.cc
+++ b/gcc/config/aarch64/aarch64.cc
@@ -18305,7 +18305,8 @@ aarch64_override_options_internal (struct gcc_options 
*opts)
  " option %<-march%>, or by using the %"
  " attribute or pragma", "sme");
   opts->x_target_flags &= ~MASK_GENERAL_REGS_ONLY;
-  auto new_flags = isa_flags | feature_deps::SME ().enable;
+  constexpr auto flags_enable_sme = feature_deps::SME ().enable;
+  auto new_flags = isa_flags | flags_enable_sme;
   aarch64_set_asm_isa_flags (opts, new_flags);
 }

[PATCH 12/12] aarch64: Extend aarch64_feature_flags to 128 bits

2024-05-14 Thread Andrew Carlotti

Replace the existing typedef with a new class containing two private
uint64_t members.

Most of the preparatory work was carried out in previous commits.  The
most notable remaining changes are the addition of the get_isa_mode and
with_isa_mode functions for conversion to or from aarch64_isa_mode
types, and the use of a 'save' member function from within
aarch64_set_asm_isa_flags, to avoid needing to expose the uint64_t
members.

gcc/ChangeLog:

* common/config/aarch64/aarch64-common.cc
(aarch64_set_asm_isa_flags): Use new flags.save function.
* config/aarch64/aarch64-opts.h
(class aarch64_feature_flags): New class.
(aarch64_feature_flags_from_index): Update to handle 128 bits.
(AARCH64_NO_FEATURES): Pass a second constructor parameter.
* config/aarch64/aarch64.cc
(aarch64_guard_switch_pstate_sm): Extract isa mode explicitly.
(aarch64_expand_epilogue): Ditto.
(aarch64_expand_call): Ditto
(aarch64_set_current_function): Set/extract isa mode explicitly.
* config/aarch64/aarch64.h
(aarch64_get_asm_isa_flags): Use new option struct member.
(aarch64_get_isa_flags): Use new option struct member.
(aarch64_asm_isa_flags): Use second global variable.
(aarch64_isa_flags): Ditto.
(AARCH64_FL_ISA_MODES): Pass a second constructor parameter.
(AARCH64_FL_DEFAULT_ISA_MODE): Ditto.
(AARCH64_ISA_MODE): Extract isa mode explicitly.
* config/aarch64/aarch64.opt
(aarch64_asm_isa_flags_1): Add a second uint64_t for bitmask.
(aarch64_isa_flags_1): Ditto.


diff --git a/gcc/common/config/aarch64/aarch64-common.cc 
b/gcc/common/config/aarch64/aarch64-common.cc
index 
9f583bb80456709e0028c358a1bad23ad59f20f4..a84650086ba9a1054f3ba15022567a00b7fb4313
 100644
--- a/gcc/common/config/aarch64/aarch64-common.cc
+++ b/gcc/common/config/aarch64/aarch64-common.cc
@@ -67,18 +67,18 @@ static const struct default_options 
aarch_option_optimization_table[] =
   };
 
 
-/* Set OPTS->x_aarch64_asm_isa_flags_0 to FLAGS and update
-   OPTS->x_aarch64_isa_flags_0 accordingly.  */
+/* Set OPTS->x_aarch64_asm_isa_flags_<0..n> to FLAGS and update
+   OPTS->x_aarch64_isa_flags_<0..n> accordingly.  */
 void
 aarch64_set_asm_isa_flags (gcc_options *opts, aarch64_feature_flags flags)
 {
-  opts->x_aarch64_asm_isa_flags_0 = flags;
+  flags.save(&opts->x_aarch64_asm_isa_flags_0, 
&opts->x_aarch64_asm_isa_flags_1);
   if (opts->x_target_flags & MASK_GENERAL_REGS_ONLY)
 {
   constexpr auto flags_mask = ~feature_deps::get_flags_off (AARCH64_FL_FP);
   flags &= flags_mask;
 }
-  opts->x_aarch64_isa_flags_0 = flags;
+  flags.save(&opts->x_aarch64_isa_flags_0, &opts->x_aarch64_isa_flags_1);
 }
 
 /* Implement TARGET_HANDLE_OPTION.
diff --git a/gcc/config/aarch64/aarch64-opts.h 
b/gcc/config/aarch64/aarch64-opts.h
index 
80926a008aa2ed7dffa79aaa425dd3d7fc9d2581..7571385740d5271ab99bcc3380899a550788592d
 100644
--- a/gcc/config/aarch64/aarch64-opts.h
+++ b/gcc/config/aarch64/aarch64-opts.h
@@ -25,17 +25,110 @@
 #ifndef USED_FOR_TARGET
 typedef uint64_t aarch64_isa_mode;
 
-typedef uint64_t aarch64_feature_flags;
-
 constexpr unsigned int AARCH64_NUM_ISA_MODES = (0
 #define DEF_AARCH64_ISA_MODE(IDENT) + 1
 #include "aarch64-isa-modes.def"
 );
 
+class aarch64_feature_flags
+{
+private:
+  uint64_t flags0;
+  uint64_t flags1;
+
+public:
+  constexpr aarch64_feature_flags (uint64_t flags0_m, uint64_t flags1_m)
+: flags0 (flags0_m), flags1 (flags1_m) {}
+  aarch64_feature_flags () = default;
+
+  void save(uint64_t *save0, uint64_t *save1)
+{
+  *save0 = flags0;
+  *save1 = flags1;
+}
+
+  constexpr aarch64_isa_mode get_isa_mode ()
+{
+  return flags0 & ((1 << AARCH64_NUM_ISA_MODES) - 1);
+}
+
+  constexpr aarch64_feature_flags with_isa_mode (const aarch64_isa_mode mode) 
const
+{
+  return aarch64_feature_flags ((flags0 & ~((1 << AARCH64_NUM_ISA_MODES) - 
1)) | mode,
+   flags1);
+}
+
+  constexpr aarch64_feature_flags operator&(const aarch64_feature_flags other) 
const
+{
+  return aarch64_feature_flags (flags0 & other.flags0,
+   flags1 & other.flags1);
+}
+
+  aarch64_feature_flags operator&=(const aarch64_feature_flags other)
+{
+  flags0 &= other.flags0;
+  flags1 &= other.flags1;
+  return *this;
+}
+
+  constexpr aarch64_feature_flags operator|(const aarch64_feature_flags other) 
const
+{
+  return aarch64_feature_flags (flags0 | other.flags0,
+   flags1 | other.flags1);
+}
+
+  aarch64_feature_flags operator|=(const aarch64_feature_flags other)
+{
+  flags0 |= other.flags0;
+  flags1 |= other.flags1;
+  return *this;
+}
+
+  constexpr aarch64_feature_flags operator^(const aarch64_feature_flags other) 
const
+{
+  return aarch64_feature_flags (flags0 ^ other.flags

Re: [RFC][PATCH] PR tree-optimization/109071 - -Warray-bounds false positive warnings due to code duplication from jump threading

2024-05-14 Thread Jeff Law





On 5/14/24 8:57 AM, Qing Zhao wrote:




On May 13, 2024, at 20:14, Kees Cook  wrote:

On Tue, May 14, 2024 at 01:38:49AM +0200, Andrew Pinski wrote:

On Mon, May 13, 2024, 11:41 PM Kees Cook  wrote:

But it makes no sense to warn about:

void sparx5_set (int * ptr, struct nums * sg, int index)
{
   if (index >= 4)
 warn ();
   *ptr = 0;
   *val = sg->vals[index];
   if (index >= 4)
 warn ();
   *ptr = *val;
}

Because at "*val = sg->vals[index];" the actual value range tracking for
index is _still_ [INT_MIN,INT_MAX]. (Only within the "then" side of the
"if" statements is the range tracking [4,INT_MAX].)

However, in the case where jump threading has split the execution flow
and produced a copy of "*val = sg->vals[index];" where the value range
tracking for "index" is now [4,INT_MAX], is the warning valid. But it
is only for that instance. Reporting it for effectively both (there is
only 1 source line for the array indexing) is misleading because there
is nothing the user can do about it -- the compiler created the copy and
then noticed it had a range it could apply to that array index.



"there is nothing the user can do about it" is very much false. They could
change warn call into a noreturn function call instead.  (In the case of
the Linux kernel panic). There are things the user can do to fix the
warning and even get better code generation out of the compilers.


This isn't about warn() not being noreturn. The warn() could be any
function call; the jump threading still happens.


When the program is executed on the “if (index > = 4)” path,  the value of 
“index” is definitely

= 4, when sg->vals[index] is referenced on this path (the case when the routine 
“warn” is NOT noreturn), it’s

definitely an out-of-bounds array access.  So, the compiler’s warning is 
correct. And this warning does catch
a potential issue in the source code that need to be fixed by either of the 
following two solutions:

1. Make the routine “warn” as noreturn and mark it noreturn;
This would be my recommendation.  We're about to execute undefined 
behavior.  I don't see a way to necessarily recover safely here, so I'd 
suggest having warn() not return and mark it appropriately.


That'll have numerous secondary benefits as well.

jeff

[commited, gcc13] ipa: Compare jump functions in ICF (PR 113907)

2024-05-14 Thread Martin Jambor

Hi,

This is a manual backport of r14-9840-g1162861439fd3c from master.
Manual because the bits and value range representation in jump
functions have changes during the gcc 14 development cycle.

In PR 113907 comment #58, Honza found a case where ICF thinks bodies
of functions are equivalent but becaise of difference in aliases in a
memory access, different aggregate jump functions are associated with
supposedly equivalent call statements.  This patch adds a way to
compare jump functions and plugs it into ICF to avoid the issue.

Bootstrapped and tested on x86_64-linux.  Committed to the gcc-13
branch.

Martin


gcc/ChangeLog:

2024-05-14  Martin Jambor  

PR ipa/113907
* ipa-prop.h (ipa_jump_functions_equivalent_p): Declare.
(values_equal_for_ipcp_p): Likewise.
* ipa-prop.cc (ipa_agg_pass_through_jf_equivalent_p): New function.
(ipa_agg_jump_functions_equivalent_p): Likewise.
(ipa_jump_functions_equivalent_p): Likewise.
* ipa-cp.cc (values_equal_for_ipcp_p): Make function public.
* ipa-icf-gimple.cc: Include alloc-pool.h, symbol-summary.h, sreal.h,
ipa-cp.h and ipa-prop.h.
(func_checker::compare_gimple_call): Comapre jump functions.

gcc/testsuite/ChangeLog:

2024-05-10  Martin Jambor  

PR ipa/113907
* gcc.dg/lto/pr113907_0.c: New.
* gcc.dg/lto/pr113907_1.c: Likewise.
* gcc.dg/lto/pr113907_2.c: Likewise.
---
 gcc/ipa-cp.cc |   2 +-
 gcc/ipa-icf-gimple.cc |  29 +
 gcc/ipa-prop.cc   | 157 ++
 gcc/ipa-prop.h|   3 +
 gcc/testsuite/gcc.dg/lto/pr113907_0.c |  18 +++
 gcc/testsuite/gcc.dg/lto/pr113907_1.c |  35 ++
 gcc/testsuite/gcc.dg/lto/pr113907_2.c |  11 ++
 7 files changed, 254 insertions(+), 1 deletion(-)
 create mode 100644 gcc/testsuite/gcc.dg/lto/pr113907_0.c
 create mode 100644 gcc/testsuite/gcc.dg/lto/pr113907_1.c
 create mode 100644 gcc/testsuite/gcc.dg/lto/pr113907_2.c

diff --git a/gcc/ipa-cp.cc b/gcc/ipa-cp.cc
index b3e0f62e400..8f36608cf33 100644
--- a/gcc/ipa-cp.cc
+++ b/gcc/ipa-cp.cc
@@ -458,7 +458,7 @@ ipcp_lattice::is_single_const ()
 
 /* Return true iff X and Y should be considered equal values by IPA-CP.  */
 
-static bool
+bool
 values_equal_for_ipcp_p (tree x, tree y)
 {
   gcc_checking_assert (x != NULL_TREE && y != NULL_TREE);
diff --git a/gcc/ipa-icf-gimple.cc b/gcc/ipa-icf-gimple.cc
index 49302ad56c6..054a557bd58 100644
--- a/gcc/ipa-icf-gimple.cc
+++ b/gcc/ipa-icf-gimple.cc
@@ -42,7 +42,11 @@ along with GCC; see the file COPYING3.  If not see
 #include "tree-sra.h"
 
 #include "tree-ssa-alias-compare.h"
+#include "alloc-pool.h"
+#include "symbol-summary.h"
 #include "ipa-icf-gimple.h"
+#include "sreal.h"
+#include "ipa-prop.h"
 
 namespace ipa_icf_gimple {
 
@@ -751,6 +755,31 @@ func_checker::compare_gimple_call (gcall *s1, gcall *s2)
   && !compatible_types_p (TREE_TYPE (t1), TREE_TYPE (t2)))
 return return_false_with_msg ("GIMPLE internal call LHS type mismatch");
 
+  if (!gimple_call_internal_p (s1))
+{
+  cgraph_edge *e1 = cgraph_node::get (m_source_func_decl)->get_edge (s1);
+  cgraph_edge *e2 = cgraph_node::get (m_target_func_decl)->get_edge (s2);
+  class ipa_edge_args *args1 = ipa_edge_args_sum->get (e1);
+  class ipa_edge_args *args2 = ipa_edge_args_sum->get (e2);
+  if ((args1 != nullptr) != (args2 != nullptr))
+   return return_false_with_msg ("ipa_edge_args mismatch");
+  if (args1)
+   {
+ int n1 = ipa_get_cs_argument_count (args1);
+ int n2 = ipa_get_cs_argument_count (args2);
+ if (n1 != n2)
+   return return_false_with_msg ("ipa_edge_args nargs mismatch");
+ for (int i = 0; i < n1; i++)
+   {
+ struct ipa_jump_func *jf1 = ipa_get_ith_jump_func (args1, i);
+ struct ipa_jump_func *jf2 = ipa_get_ith_jump_func (args2, i);
+ if (((jf1 != nullptr) != (jf2 != nullptr))
+ || (jf1 && !ipa_jump_functions_equivalent_p (jf1, jf2)))
+   return return_false_with_msg ("jump function mismatch");
+   }
+   }
+}
+
   return compare_operand (t1, t2, get_operand_access_type (&map, t1));
 }
 
diff --git a/gcc/ipa-prop.cc b/gcc/ipa-prop.cc
index 0d816749534..11ba2521b2c 100644
--- a/gcc/ipa-prop.cc
+++ b/gcc/ipa-prop.cc
@@ -6022,5 +6022,162 @@ ipcp_transform_function (struct cgraph_node *node)
   return modified_mem_access ? TODO_update_ssa_only_virtuals : 0;
 }
 
+/* Return true if the two pass_through components of two jump functions are
+   known to be equivalent.  AGG_JF denotes whether they are part of aggregate
+   functions or not.  The function can be used before the IPA phase of IPA-CP
+   or inlining because it cannot cope with refdesc changes these passes can
+   carry out.  */
+
+static bool
+ipa_agg_pass_through_jf_equivalent_p (ipa_pass_through_data *ipt1,
+

1 2 >

1 - 100 of 191 matches

Mail list logo