[r13-3761 Regression] FAIL: g++.dg/warn/Warray-bounds-16.C -std=gnu++98 (test for excess errors) on Linux/x86_64

2022-11-08 Thread haochen.jiang via Gcc-patches
On Linux/x86_64,

a239a63f868e29e9276088e7c0fb00804c2903ba is the first bad commit
commit a239a63f868e29e9276088e7c0fb00804c2903ba
Author: Aldy Hernandez 
Date:   Fri Nov 4 22:24:42 2022 +0100

Improve multiplication by powers of 2 in range-ops.

caused

FAIL: g++.dg/pr71488.C   (test for excess errors)
FAIL: g++.dg/warn/Warray-bounds-16.C  -std=gnu++14 (test for excess errors)
FAIL: g++.dg/warn/Warray-bounds-16.C  -std=gnu++17 (test for excess errors)
FAIL: g++.dg/warn/Warray-bounds-16.C  -std=gnu++20 (test for excess errors)
FAIL: g++.dg/warn/Warray-bounds-16.C  -std=gnu++98 (test for excess errors)

with GCC configured with

../../gcc/configure 
--prefix=/export/users/haochenj/src/gcc-bisect/master/master/r13-3761/usr 
--enable-clocale=gnu --with-system-zlib --with-demangler-in-ld 
--with-fpmath=sse --enable-languages=c,c++,fortran --enable-cet --without-isl 
--enable-libmpx x86_64-linux --disable-bootstrap

To reproduce:

$ cd {build_dir}/gcc && make check RUNTESTFLAGS="dg.exp=g++.dg/pr71488.C 
--target_board='unix{-m32}'"
$ cd {build_dir}/gcc && make check RUNTESTFLAGS="dg.exp=g++.dg/pr71488.C 
--target_board='unix{-m32\ -march=cascadelake}'"
$ cd {build_dir}/gcc && make check RUNTESTFLAGS="dg.exp=g++.dg/pr71488.C 
--target_board='unix{-m64}'"
$ cd {build_dir}/gcc && make check RUNTESTFLAGS="dg.exp=g++.dg/pr71488.C 
--target_board='unix{-m64\ -march=cascadelake}'"
$ cd {build_dir}/gcc && make check 
RUNTESTFLAGS="dg.exp=g++.dg/warn/Warray-bounds-16.C --target_board='unix{-m32}'"
$ cd {build_dir}/gcc && make check 
RUNTESTFLAGS="dg.exp=g++.dg/warn/Warray-bounds-16.C --target_board='unix{-m32\ 
-march=cascadelake}'"

(Please do not reply to this email, for question about this report, contact me 
at haochen dot jiang at intel.com)


[COMMITTED] ada: Add new -gnatw_q switch to usage message

2022-11-08 Thread Marc Poulhiès via Gcc-patches
From: Bob Duff 

...along with -gnatw_Q.

gcc/ada/

* usage.adb: Add -gnatw_q and -gnatw_Q.

Tested on x86_64-pc-linux-gnu, committed on master.

---
 gcc/ada/usage.adb | 4 
 1 file changed, 4 insertions(+)

diff --git a/gcc/ada/usage.adb b/gcc/ada/usage.adb
index 7d11ae53696..642cfda1b27 100644
--- a/gcc/ada/usage.adb
+++ b/gcc/ada/usage.adb
@@ -567,6 +567,10 @@ begin
   "record types");
Write_Line (".Q*  turn off warnings for questionable layout of " &
   "record types");
+   Write_Line ("_q   turn on warnings for ignored " &
+  "equality operators");
+   Write_Line ("_Q*  turn off warnings for ignored " &
+  "equality operators");
Write_Line ("r+   turn on warnings for redundant construct");
Write_Line ("R*   turn off warnings for redundant construct");
Write_Line (".r+  turn on warnings for object renaming function");
-- 
2.34.1



[COMMITTED] ada: Raise Tag_Error when Ada.Tags operations are called with No_Tag

2022-11-08 Thread Marc Poulhiès via Gcc-patches
From: Piotr Trojanek 

Implement missing behavior of RM 13.9 (25.1/3): Tag_Error is raised by a
call of Interface_Ancestor_Tags and Is_Descendant_At_Same_Level, if any
tag passed is No_Tag. This change also fixes Descendant_Tag, which
relies on Is_Descendant_At_Same_Level. The remaining operations already
worked properly.

gcc/ada/

* libgnat/a-tags.adb
(Interface_Ancestor_Tags): Raise Tag_Error on No_Tag.
(Is_Descendant_At_Same_Level): Likewise.

Tested on x86_64-pc-linux-gnu, committed on master.

---
 gcc/ada/libgnat/a-tags.adb | 22 +++---
 1 file changed, 15 insertions(+), 7 deletions(-)

diff --git a/gcc/ada/libgnat/a-tags.adb b/gcc/ada/libgnat/a-tags.adb
index d663a41f5a8..a9af942fc64 100644
--- a/gcc/ada/libgnat/a-tags.adb
+++ b/gcc/ada/libgnat/a-tags.adb
@@ -554,13 +554,18 @@ package body Ada.Tags is
-
 
function Interface_Ancestor_Tags (T : Tag) return Tag_Array is
-  TSD_Ptr : constant Addr_Ptr :=
-  To_Addr_Ptr (To_Address (T) - DT_Typeinfo_Ptr_Size);
-  TSD : constant Type_Specific_Data_Ptr :=
-  To_Type_Specific_Data_Ptr (TSD_Ptr.all);
-  Iface_Table : constant Interface_Data_Ptr := TSD.Interfaces_Table;
-
+  TSD_Ptr : Addr_Ptr;
+  TSD : Type_Specific_Data_Ptr;
+  Iface_Table : Interface_Data_Ptr;
begin
+  if T = No_Tag then
+ raise Tag_Error;
+  end if;
+
+  TSD_Ptr := To_Addr_Ptr (To_Address (T) - DT_Typeinfo_Ptr_Size);
+  TSD := To_Type_Specific_Data_Ptr (TSD_Ptr.all);
+  Iface_Table := TSD.Interfaces_Table;
+
   if Iface_Table = null then
  declare
 Table : Tag_Array (1 .. 0);
@@ -731,7 +736,10 @@ package body Ada.Tags is
   Ancestor   : Tag) return Boolean
is
begin
-  if Descendant = Ancestor then
+  if Descendant = No_Tag or else Ancestor = No_Tag then
+ raise Tag_Error;
+
+  elsif Descendant = Ancestor then
  return True;
 
   else
-- 
2.34.1



[COMMITTED] ada: Allow initialization of limited objects with delta aggregates

2022-11-08 Thread Marc Poulhiès via Gcc-patches
From: Piotr Trojanek 

Objects of a limited type can be initialized with "aggregates", which is
a collective term for ordinary aggregates (i.e. record aggregates and
array aggregates), extension aggregates and finally for delta
aggregates (introduced by Ada 2022).

gcc/ada/

* sem_ch3.adb (OK_For_Limited_Init_In_05): Handle delta aggregates
just like other aggregates.

Tested on x86_64-pc-linux-gnu, committed on master.

---
 gcc/ada/sem_ch3.adb | 1 +
 1 file changed, 1 insertion(+)

diff --git a/gcc/ada/sem_ch3.adb b/gcc/ada/sem_ch3.adb
index 76dc6325060..f6b852051dc 100644
--- a/gcc/ada/sem_ch3.adb
+++ b/gcc/ada/sem_ch3.adb
@@ -20145,6 +20145,7 @@ package body Sem_Ch3 is
 
   case Nkind (Original_Node (Exp)) is
  when N_Aggregate
+| N_Delta_Aggregate
 | N_Extension_Aggregate
 | N_Function_Call
 | N_Op
-- 
2.34.1



[COMMITTED] ada: Missing master of task causing assertion failure

2022-11-08 Thread Marc Poulhiès via Gcc-patches
From: Javier Miranda 

gcc/ada/

* exp_ch9.adb
(Build_Master_Entity): Handle missing case: when the context of
the master is a BIP function whose result type has tasks.

Tested on x86_64-pc-linux-gnu, committed on master.

---
 gcc/ada/exp_ch9.adb | 37 -
 1 file changed, 36 insertions(+), 1 deletion(-)

diff --git a/gcc/ada/exp_ch9.adb b/gcc/ada/exp_ch9.adb
index decf61782af..70ede15901e 100644
--- a/gcc/ada/exp_ch9.adb
+++ b/gcc/ada/exp_ch9.adb
@@ -3207,10 +3207,45 @@ package body Exp_Ch9 is
  Find_Enclosing_Context (Par, Context, Context_Id, Decls);
   end if;
 
+  --  When the enclosing context is a BIP function whose result type has
+  --  tasks, the function has an extra formal that is the master of the
+  --  tasks to be created by its returned object (that is, when its
+  --  enclosing context is a return statement). However, if the body of
+  --  the function creates tasks before its return statements, such tasks
+  --  need their own master.
+
+  if Has_Master_Entity (Context_Id)
+and then Ekind (Context_Id) = E_Function
+and then Is_Build_In_Place_Function (Context_Id)
+and then Needs_BIP_Task_Actuals (Context_Id)
+  then
+ --  No need to add it again if previously added
+
+ declare
+Master_Present : Boolean;
+
+ begin
+--  Handle transient scopes
+
+if Context_Id /= Current_Scope then
+   Push_Scope (Context_Id);
+   Master_Present :=
+ Present (Current_Entity_In_Scope (Name_uMaster));
+   Pop_Scope;
+else
+   Master_Present :=
+ Present (Current_Entity_In_Scope (Name_uMaster));
+end if;
+
+if Master_Present then
+   return;
+end if;
+ end;
+
   --  Nothing to do if the context already has a master; internally built
   --  finalizers don't need a master.
 
-  if Has_Master_Entity (Context_Id)
+  elsif Has_Master_Entity (Context_Id)
 or else Is_Finalizer (Context_Id)
   then
  return;
-- 
2.34.1



[COMMITTED] ada: Remove obsolete code in Resolve_If_Expression

2022-11-08 Thread Marc Poulhiès via Gcc-patches
From: Eric Botcazou 

gcc/ada/

* sem_res.adb (Resolve_If_Expression): Remove obsolete special
case.

Tested on x86_64-pc-linux-gnu, committed on master.

---
 gcc/ada/sem_res.adb | 13 +
 1 file changed, 1 insertion(+), 12 deletions(-)

diff --git a/gcc/ada/sem_res.adb b/gcc/ada/sem_res.adb
index 402da430b2f..e5b3612d186 100644
--- a/gcc/ada/sem_res.adb
+++ b/gcc/ada/sem_res.adb
@@ -9393,21 +9393,10 @@ package body Sem_Res is
   Apply_Check (Then_Expr);
 
   --  If ELSE expression present, just resolve using the determined type
-  --  If type is universal, resolve to any member of the class.
 
   if Present (Else_Expr) then
- if Typ = Universal_Integer then
-Resolve (Else_Expr, Any_Integer);
-
- elsif Typ = Universal_Real then
-Resolve (Else_Expr, Any_Real);
-
- else
-Resolve (Else_Expr, Result_Type);
- end if;
-
+ Resolve (Else_Expr, Result_Type);
  Check_Unset_Reference (Else_Expr);
-
  Apply_Check (Else_Expr);
 
  --  Apply RM 4.5.7 (17/3): whether the expression is statically or
-- 
2.34.1



[COMMITTED] ada: Reject limited objects in array and record delta aggregates

2022-11-08 Thread Marc Poulhiès via Gcc-patches
From: Piotr Trojanek 

For array delta aggregates the base expression cannot be limited; for
record delta aggregates the base expression can only be limited if it is
a newly constructed object.

gcc/ada/

* sem_aggr.adb (Resolve_Delta_Aggregate): Implement rules related
to limited objects appearing as the base expression.

Tested on x86_64-pc-linux-gnu, committed on master.

---
 gcc/ada/sem_aggr.adb | 17 +
 1 file changed, 17 insertions(+)

diff --git a/gcc/ada/sem_aggr.adb b/gcc/ada/sem_aggr.adb
index 383f18f7301..4da05dd7317 100644
--- a/gcc/ada/sem_aggr.adb
+++ b/gcc/ada/sem_aggr.adb
@@ -3421,6 +3421,18 @@ package body Sem_Aggr is
   Analyze_And_Resolve (Base, Typ);
 
   if Is_Array_Type (Typ) then
+ --  For an array_delta_aggregate, the base_expression and each
+ --  expression in every array_component_association shall be of a
+ --  nonlimited type; RM 4.3.4(13/5). However, to prevent repeated
+ --  errors we only check the base expression and not array component
+ --  associations.
+
+ if Is_Limited_Type (Etype (Base)) then
+Error_Msg_N
+  ("array delta aggregate shall be of a nonlimited type", Base);
+Explain_Limited_Type (Etype (Base), Base);
+ end if;
+
  Resolve_Delta_Array_Aggregate (N, Typ);
   else
 
@@ -3432,6 +3444,11 @@ package body Sem_Aggr is
   ("delta aggregates for record types must use (), not '[']", N);
  end if;
 
+ --  The base_expression of a record_delta_aggregate can be of a
+ --  limited type only if it is newly constructed; RM 7.5(2.1/5).
+
+ Check_Expr_OK_In_Limited_Aggregate (Base);
+
  Resolve_Delta_Record_Aggregate (N, Typ);
   end if;
 
-- 
2.34.1



[COMMITTED] ada: Reject record delta aggregates with limited expressions

2022-11-08 Thread Marc Poulhiès via Gcc-patches
From: Piotr Trojanek 

Implement a missing check related to record delta aggregates.

gcc/ada/

* sem_aggr.adb (Resolve_Delta_Record_Aggregate): Reject
expressions of a limited types.

Tested on x86_64-pc-linux-gnu, committed on master.

---
 gcc/ada/sem_aggr.adb | 10 ++
 1 file changed, 10 insertions(+)

diff --git a/gcc/ada/sem_aggr.adb b/gcc/ada/sem_aggr.adb
index 31ce9cadd94..383f18f7301 100644
--- a/gcc/ada/sem_aggr.adb
+++ b/gcc/ada/sem_aggr.adb
@@ -3746,7 +3746,17 @@ package body Sem_Aggr is
   ("'<'> in record delta aggregate is not allowed", Assoc);
  else
 Analyze_And_Resolve (Expression (Assoc), Comp_Type);
+
+--  The expression must not be of a limited type; RM 4.3.1(17.4/5)
+
+if Is_Limited_Type (Etype (Expression (Assoc))) then
+   Error_Msg_N
+ ("expression of a limited type in record delta aggregate " &
+"is not allowed",
+  Expression (Assoc));
+end if;
  end if;
+
  Next (Assoc);
   end loop;
end Resolve_Delta_Record_Aggregate;
-- 
2.34.1



[COMMITTED] ada: Align -gnatwc's documentation with its behavior

2022-11-08 Thread Marc Poulhiès via Gcc-patches
From: Ronan Desplanques 

Shortly after the -gnatwc flag was introduced, its behavior was
tweaked, but its documentation was not updated accordingly.

gcc/ada/

* doc/gnat_ugn/building_executable_programs_with_gnat.rst
(-gnatwc): Fix flag documentation.
* gnat_ugn.texi: Regenerate.

Tested on x86_64-pc-linux-gnu, committed on master.

---
 gcc/ada/doc/gnat_ugn/building_executable_programs_with_gnat.rst | 2 +-
 gcc/ada/gnat_ugn.texi   | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/gcc/ada/doc/gnat_ugn/building_executable_programs_with_gnat.rst 
b/gcc/ada/doc/gnat_ugn/building_executable_programs_with_gnat.rst
index 31e2e31421e..87fb1087e42 100644
--- a/gcc/ada/doc/gnat_ugn/building_executable_programs_with_gnat.rst
+++ b/gcc/ada/doc/gnat_ugn/building_executable_programs_with_gnat.rst
@@ -2938,7 +2938,7 @@ of the pragma in the :title:`GNAT_Reference_manual`).
   tests that are known to be True or False at compile time. The default
   is that such warnings are not generated.
   Note that this warning does
-  not get issued for the use of boolean variables or constants whose
+  not get issued for the use of boolean constants whose
   values are known at compile time, since this is a standard technique
   for conditional compilation in Ada, and this would generate too many
   false positive warnings.
diff --git a/gcc/ada/gnat_ugn.texi b/gcc/ada/gnat_ugn.texi
index 385f1d3deb5..7b1aaeba954 100644
--- a/gcc/ada/gnat_ugn.texi
+++ b/gcc/ada/gnat_ugn.texi
@@ -10925,7 +10925,7 @@ This switch activates warnings for conditional 
expressions used in
 tests that are known to be True or False at compile time. The default
 is that such warnings are not generated.
 Note that this warning does
-not get issued for the use of boolean variables or constants whose
+not get issued for the use of boolean constants whose
 values are known at compile time, since this is a standard technique
 for conditional compilation in Ada, and this would generate too many
 false positive warnings.
-- 
2.34.1



[COMMITTED] ada: Remove unneeded code in handling formal type defaults

2022-11-08 Thread Marc Poulhiès via Gcc-patches
From: Piotr Trojanek 

Unneeded code found while experimenting with improved detection of
unreferenced objects.

gcc/ada/

* sem_ch12.adb (Validate_Formal_Type_Default): Remove call to
Collect_Interfaces, which had no effect apart from populating a
list that was not used; fix style.

Tested on x86_64-pc-linux-gnu, committed on master.

---
 gcc/ada/sem_ch12.adb | 6 ++
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/gcc/ada/sem_ch12.adb b/gcc/ada/sem_ch12.adb
index 2b7833dfdcd..ca0f4913e36 100644
--- a/gcc/ada/sem_ch12.adb
+++ b/gcc/ada/sem_ch12.adb
@@ -17303,13 +17303,11 @@ package body Sem_Ch12 is
 
 else
declare
-  Act_Iface_List : Elist_Id;
-  Iface  : Node_Id;
-  Iface_Ent  : Entity_Id;
+  Iface : Node_Id;
+  Iface_Ent : Entity_Id;
 
begin
   Iface := First (Abstract_Interface_List (Formal));
-  Collect_Interfaces (Def_Sub, Act_Iface_List);
 
   while Present (Iface) loop
  Iface_Ent := Entity (Iface);
-- 
2.34.1



[COMMITTED] ada: Cleanup local variable that is only set as an out parameter

2022-11-08 Thread Marc Poulhiès via Gcc-patches
From: Piotr Trojanek 

Minor improvements; found experimenting with improved detection of
unreferenced objects.

gcc/ada/

* exp_spark.adb (SPARK_Freeze_Type): Refine type of a local
object.
* sem_ch3.adb (Derive_Subprograms): Remove initial value for
New_Subp, which is in only written as an out parameter and never
read.

Tested on x86_64-pc-linux-gnu, committed on master.

---
 gcc/ada/exp_spark.adb |  2 +-
 gcc/ada/sem_ch3.adb   | 18 +-
 2 files changed, 10 insertions(+), 10 deletions(-)

diff --git a/gcc/ada/exp_spark.adb b/gcc/ada/exp_spark.adb
index c89d604aa80..ba7bd7fe5d2 100644
--- a/gcc/ada/exp_spark.adb
+++ b/gcc/ada/exp_spark.adb
@@ -895,7 +895,7 @@ package body Exp_SPARK is
procedure SPARK_Freeze_Type (N : Entity_Id) is
   Typ : constant Entity_Id := Entity (N);
 
-  Renamed_Eq : Node_Id;
+  Renamed_Eq : Entity_Id;
   --  Defining unit name for the predefined equality function in the case
   --  where the type has a primitive operation that is a renaming of
   --  predefined equality (but only if there is also an overriding
diff --git a/gcc/ada/sem_ch3.adb b/gcc/ada/sem_ch3.adb
index f6b852051dc..8f4e9f80eb8 100644
--- a/gcc/ada/sem_ch3.adb
+++ b/gcc/ada/sem_ch3.adb
@@ -16497,15 +16497,15 @@ package body Sem_Ch3 is
 
   --  Local variables
 
-  Alias_Subp   : Entity_Id;
-  Act_List : Elist_Id;
-  Act_Elmt : Elmt_Id;
-  Act_Subp : Entity_Id := Empty;
-  Elmt : Elmt_Id;
-  Need_Search  : Boolean   := False;
-  New_Subp : Entity_Id := Empty;
-  Parent_Base  : Entity_Id;
-  Subp : Entity_Id;
+  Alias_Subp  : Entity_Id;
+  Act_List: Elist_Id;
+  Act_Elmt: Elmt_Id;
+  Act_Subp: Entity_Id := Empty;
+  Elmt: Elmt_Id;
+  Need_Search : Boolean   := False;
+  New_Subp: Entity_Id;
+  Parent_Base : Entity_Id;
+  Subp: Entity_Id;
 
--  Start of processing for Derive_Subprograms
 
-- 
2.34.1



[COMMITTED] ada: Preanalyze classwide contracts as spec expressions

2022-11-08 Thread Marc Poulhiès via Gcc-patches
From: Ronan Desplanques 

Classwide contracts are "spec expressions" as defined in the
documentation in sem.ads. Before this patch, the instances of
classwide contracts that are destined to class conditions merging
were not preanalyzed as spec expressions. That caused preanalysis to
emit spurious errors in some cases.

gcc/ada/

* contracts.adb (Preanalyze_Condition): Use
Preanalyze_Spec_Expression.

Tested on x86_64-pc-linux-gnu, committed on master.

---
 gcc/ada/contracts.adb | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/gcc/ada/contracts.adb b/gcc/ada/contracts.adb
index a300d739eff..21f438f90f3 100644
--- a/gcc/ada/contracts.adb
+++ b/gcc/ada/contracts.adb
@@ -42,13 +42,13 @@ with Nmake;  use Nmake;
 with Opt;use Opt;
 with Sem;use Sem;
 with Sem_Aux;use Sem_Aux;
+with Sem_Ch3;use Sem_Ch3;
 with Sem_Ch6;use Sem_Ch6;
 with Sem_Ch8;use Sem_Ch8;
 with Sem_Ch12;   use Sem_Ch12;
 with Sem_Ch13;   use Sem_Ch13;
 with Sem_Disp;   use Sem_Disp;
 with Sem_Prag;   use Sem_Prag;
-with Sem_Res;use Sem_Res;
 with Sem_Type;   use Sem_Type;
 with Sem_Util;   use Sem_Util;
 with Sinfo;  use Sinfo;
@@ -4755,7 +4755,7 @@ package body Contracts is
  Install_Formals (Subp);
  Inside_Class_Condition_Preanalysis := True;
 
- Preanalyze_And_Resolve (Expr, Standard_Boolean);
+ Preanalyze_Spec_Expression (Expr, Standard_Boolean);
 
  Inside_Class_Condition_Preanalysis := False;
  Remove_Formals (Subp);
-- 
2.34.1



[COMMITTED] ada: Implement RM 4.5.7(10/3) name resolution rule

2022-11-08 Thread Marc Poulhiès via Gcc-patches
From: Eric Botcazou 

This rule deals with the specific case of a conditional expression that is
the operand of a type conversion and effectively distributes the conversion
to the dependent expressions with the help of the dynamic semantics.

gcc/ada/

* sem_ch4.adb (Analyze_Case_Expression): Compute the
interpretations of the expression only at the end of the analysis,
but skip doing it if it is the operand of a type conversion.
(Analyze_If_Expression): Likewise.
* sem_res.adb (Resolve): Deal specially with conditional
expression that is the operand of a type conversion.
(Resolve_Dependent_Expression): New procedure.
(Resolve_Case_Expression): Call Resolve_Dependent_Expression.
(Resolve_If_Expression): Likewise.
(Resolve_If_Expression.Apply_Check): Take result type as
parameter.
(Resolve_Type_Conversion): Do not warn about a redundant
conversion when the operand is a conditional expression.

Tested on x86_64-pc-linux-gnu, committed on master.

---
 gcc/ada/sem_ch4.adb | 129 +---
 gcc/ada/sem_res.adb | 109 -
 2 files changed, 156 insertions(+), 82 deletions(-)

diff --git a/gcc/ada/sem_ch4.adb b/gcc/ada/sem_ch4.adb
index 0c02fd80675..23040d7033b 100644
--- a/gcc/ada/sem_ch4.adb
+++ b/gcc/ada/sem_ch4.adb
@@ -1740,6 +1740,70 @@ package body Sem_Ch4 is
  return;
   end if;
 
+  --  The expression must be of a discrete type which must be determinable
+  --  independently of the context in which the expression occurs, but
+  --  using the fact that the expression must be of a discrete type.
+  --  Moreover, the type this expression must not be a character literal
+  --  (which is always ambiguous).
+
+  --  If error already reported by Resolve, nothing more to do
+
+  if Exp_Btype = Any_Discrete or else Exp_Btype = Any_Type then
+ return;
+
+  --  Special case message for character literal
+
+  elsif Exp_Btype = Any_Character then
+ Error_Msg_N
+   ("character literal as case expression is ambiguous", Expr);
+ return;
+  end if;
+
+  --  If the case expression is a formal object of mode in out, then
+  --  treat it as having a nonstatic subtype by forcing use of the base
+  --  type (which has to get passed to Check_Case_Choices below). Also
+  --  use base type when the case expression is parenthesized.
+
+  if Paren_Count (Expr) > 0
+or else (Is_Entity_Name (Expr)
+  and then Ekind (Entity (Expr)) = E_Generic_In_Out_Parameter)
+  then
+ Exp_Type := Exp_Btype;
+  end if;
+
+  --  The case expression alternatives cover the range of a static subtype
+  --  subject to aspect Static_Predicate. Do not check the choices when the
+  --  case expression has not been fully analyzed yet because this may lead
+  --  to bogus errors.
+
+  if Is_OK_Static_Subtype (Exp_Type)
+and then Has_Static_Predicate_Aspect (Exp_Type)
+and then In_Spec_Expression
+  then
+ null;
+
+  --  Call Analyze_Choices and Check_Choices to do the rest of the work
+
+  else
+ Analyze_Choices (Alternatives (N), Exp_Type);
+ Check_Choices (N, Alternatives (N), Exp_Type, Others_Present);
+
+ if Exp_Type = Universal_Integer and then not Others_Present then
+Error_Msg_N
+  ("case on universal integer requires OTHERS choice", Expr);
+return;
+ end if;
+  end if;
+
+  --  RM 4.5.7(10/3): If the case_expression is the operand of a type
+  --  conversion, the type of the case_expression is the target type
+  --  of the conversion.
+
+  if Nkind (Parent (N)) = N_Type_Conversion then
+ Set_Etype (N, Etype (Parent (N)));
+ return;
+  end if;
+
   --  Loop through the interpretations of the first expression and check
   --  the other expressions if present.
 
@@ -1763,25 +1827,6 @@ package body Sem_Ch4 is
  end loop;
   end if;
 
-  --  The expression must be of a discrete type which must be determinable
-  --  independently of the context in which the expression occurs, but
-  --  using the fact that the expression must be of a discrete type.
-  --  Moreover, the type this expression must not be a character literal
-  --  (which is always ambiguous).
-
-  --  If error already reported by Resolve, nothing more to do
-
-  if Exp_Btype = Any_Discrete or else Exp_Btype = Any_Type then
- return;
-
-  --  Special casee message for character literal
-
-  elsif Exp_Btype = Any_Character then
- Error_Msg_N
-   ("character literal as case expression is ambiguous", Expr);
- return;
-  end if;
-
   --  If no possible interpretation has been found, the type of the wrong
   --  alternative doesn'

[COMMITTED] ada: Fix inconsistent whitespace in Ada.Numerics.Generic_Complex_Arrays

2022-11-08 Thread Marc Poulhiès via Gcc-patches
From: Piotr Trojanek 

Cleanup only.

gcc/ada/

* libgnat/a-ngcoar.ads, libgnat/a-ngcoar.adb: Remove extra spaces.

Tested on x86_64-pc-linux-gnu, committed on master.

---
 gcc/ada/libgnat/a-ngcoar.adb | 4 ++--
 gcc/ada/libgnat/a-ngcoar.ads | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/gcc/ada/libgnat/a-ngcoar.adb b/gcc/ada/libgnat/a-ngcoar.adb
index 8dfbc3b174a..1b9118cd3b2 100644
--- a/gcc/ada/libgnat/a-ngcoar.adb
+++ b/gcc/ada/libgnat/a-ngcoar.adb
@@ -902,7 +902,7 @@ package body Ada.Numerics.Generic_Complex_Arrays is
function "-"
  (Left  : Real_Vector;
   Right : Complex_Vector) return Complex_Vector
-  renames Instantiations."-";
+ renames Instantiations."-";
 
function "-"
  (Left  : Complex_Vector;
@@ -956,7 +956,7 @@ package body Ada.Numerics.Generic_Complex_Arrays is
---
 
function "abs" (Right : Complex_Vector) return Real'Base
-  renames Instantiations."abs";
+ renames Instantiations."abs";
 
--
-- Argument --
diff --git a/gcc/ada/libgnat/a-ngcoar.ads b/gcc/ada/libgnat/a-ngcoar.ads
index 8f8f37a7906..05295caa655 100644
--- a/gcc/ada/libgnat/a-ngcoar.ads
+++ b/gcc/ada/libgnat/a-ngcoar.ads
@@ -135,7 +135,7 @@ package Ada.Numerics.Generic_Complex_Arrays is
function Compose_From_Cartesian (Re : Real_Matrix) return Complex_Matrix;
 
function Compose_From_Cartesian
- (Re, Im : Real_Matrix) return  Complex_Matrix;
+ (Re, Im : Real_Matrix) return Complex_Matrix;
 
function Modulus (X : Complex_Matrix) return Real_Matrix;
function "abs" (Right : Complex_Matrix) return Real_Matrix renames Modulus;
@@ -229,7 +229,7 @@ package Ada.Numerics.Generic_Complex_Arrays is
 
function "*"
  (Left  : Complex;
-  Right : Complex_Matrix) return  Complex_Matrix;
+  Right : Complex_Matrix) return Complex_Matrix;
 
function "*"
  (Left  : Complex_Matrix;
-- 
2.34.1



[COMMITTED] ada: Remove redundant line in Analyze_Qualified_Expression

2022-11-08 Thread Marc Poulhiès via Gcc-patches
From: Eric Botcazou 

The same statement is present a few lines above.

gcc/ada/

* sem_ch4.adb (Analyze_Qualified_Expression): Remove redundant
line.

Tested on x86_64-pc-linux-gnu, committed on master.

---
 gcc/ada/sem_ch4.adb | 2 --
 1 file changed, 2 deletions(-)

diff --git a/gcc/ada/sem_ch4.adb b/gcc/ada/sem_ch4.adb
index f136e9715d7..489fb47247a 100644
--- a/gcc/ada/sem_ch4.adb
+++ b/gcc/ada/sem_ch4.adb
@@ -4389,8 +4389,6 @@ package body Sem_Ch4 is
 end loop;
  end if;
   end if;
-
-  Set_Etype  (N, T);
end Analyze_Qualified_Expression;
 
---
-- 
2.34.1



[COMMITTED] ada: Fix expansion of 'Wide_Image and 'Wide_Wide_Image on composite types

2022-11-08 Thread Marc Poulhiès via Gcc-patches
From: Piotr Trojanek 

Attributes Wide_Image and Wide_Wide_Image applied to composite types are
now expanded just like attribute Image.

gcc/ada/

* exp_imgv.adb
(Expand_Wide_Image_Attribute): Handle just like attribute Image.
(Expand_Wide_Wide_Image_Attribute): Likewise.
* exp_put_image.adb
(Build_Image_Call): Adapt to also work for Wide and Wide_Wide
attributes.
* exp_put_image.ads
(Build_Image_Call): Update comment.
* rtsfind.ads
(RE_Id): Support wide variants of Get.
(RE_Unit_Table): Likewise.

Tested on x86_64-pc-linux-gnu, committed on master.

---
 gcc/ada/exp_imgv.adb  | 19 +++
 gcc/ada/exp_put_image.adb | 29 +
 gcc/ada/exp_put_image.ads |  6 +++---
 gcc/ada/rtsfind.ads   |  4 
 4 files changed, 51 insertions(+), 7 deletions(-)

diff --git a/gcc/ada/exp_imgv.adb b/gcc/ada/exp_imgv.adb
index f2043f525d5..398e4771c14 100644
--- a/gcc/ada/exp_imgv.adb
+++ b/gcc/ada/exp_imgv.adb
@@ -1842,6 +1842,15 @@ package body Exp_Imgv is
  return;
   end if;
 
+  --  If Image should be transformed using Put_Image, then do so. See
+  --  Exp_Put_Image for details.
+
+  if Exp_Put_Image.Image_Should_Call_Put_Image (N) then
+ Rewrite (N, Exp_Put_Image.Build_Image_Call (N));
+ Analyze_And_Resolve (N, Standard_Wide_String, Suppress => All_Checks);
+ return;
+  end if;
+
   Rtyp := Root_Type (Entity (Pref));
 
   Insert_Actions (N, New_List (
@@ -1942,6 +1951,16 @@ package body Exp_Imgv is
  return;
   end if;
 
+  --  If Image should be transformed using Put_Image, then do so. See
+  --  Exp_Put_Image for details.
+
+  if Exp_Put_Image.Image_Should_Call_Put_Image (N) then
+ Rewrite (N, Exp_Put_Image.Build_Image_Call (N));
+ Analyze_And_Resolve
+   (N, Standard_Wide_Wide_String, Suppress => All_Checks);
+ return;
+  end if;
+
   Rtyp := Root_Type (Entity (Pref));
 
   Insert_Actions (N, New_List (
diff --git a/gcc/ada/exp_put_image.adb b/gcc/ada/exp_put_image.adb
index c489ad41fd1..f90f0206f27 100644
--- a/gcc/ada/exp_put_image.adb
+++ b/gcc/ada/exp_put_image.adb
@@ -1058,12 +1058,14 @@ package body Exp_Put_Image is
--
 
function Build_Image_Call (N : Node_Id) return Node_Id is
-  --  For T'Image (X) Generate an Expression_With_Actions node:
+  --  For T'[[Wide_]Wide_]Image (X) Generate an Expression_With_Actions
+  --  node:
   --
   -- do
   --S : Buffer;
   --U_Type'Put_Image (S, X);
-  --Result : constant String := Get (S);
+  --Result : constant [[Wide_]Wide_]String :=
+  --  [[Wide_[Wide_]]Get (S);
   --Destroy (S);
   -- in Result end
   --
@@ -1091,14 +1093,33 @@ package body Exp_Put_Image is
 Image_Prefix));
   Result_Entity : constant Entity_Id :=
 Make_Temporary (Loc, 'R');
+
+  subtype Image_Name_Id is Name_Id with Static_Predicate =>
+Image_Name_Id in Name_Image | Name_Wide_Image | Name_Wide_Wide_Image;
+  --  Attribute names that will be mapped to the corresponding result types
+  --  and functions.
+
+  Attribute_Name_Id : constant Name_Id := Attribute_Name (N);
+
+  Result_Typ: constant Entity_Id :=
+(case Image_Name_Id'(Attribute_Name_Id) is
+when Name_Image   => Stand.Standard_String,
+when Name_Wide_Image  => Stand.Standard_Wide_String,
+when Name_Wide_Wide_Image => Stand.Standard_Wide_Wide_String);
+  Get_Func_Id   : constant RE_Id :=
+(case Image_Name_Id'(Attribute_Name_Id) is
+when Name_Image   => RE_Get,
+when Name_Wide_Image  => RE_Wide_Get,
+when Name_Wide_Wide_Image => RE_Wide_Wide_Get);
+
   Result_Decl : constant Node_Id :=
 Make_Object_Declaration (Loc,
   Defining_Identifier => Result_Entity,
   Object_Definition =>
-New_Occurrence_Of (Stand.Standard_String, Loc),
+New_Occurrence_Of (Result_Typ, Loc),
   Expression =>
 Make_Function_Call (Loc,
-  Name => New_Occurrence_Of (RTE (RE_Get), Loc),
+  Name => New_Occurrence_Of (RTE (Get_Func_Id), Loc),
   Parameter_Associations => New_List (
 New_Occurrence_Of (Sink_Entity, Loc;
   Actions : List_Id;
diff --git a/gcc/ada/exp_put_image.ads b/gcc/ada/exp_put_image.ads
index b2b65aa2374..d4055d10b96 100644
--- a/gcc/ada/exp_put_image.ads
+++ b/gcc/ada/exp_put_image.ads
@@ -91,9 +91,9 @@ package Exp_Put_Image is
--  T'Image.
 
function Build_Image_Call (N : Node_Id) return Node_Id;
-   --  N is a call to T'Image, and this translates it into the appropriate code
-   --  to call T'Put_Image into a buffer and then extract the string from the
-   -

[COMMITTED] ada: Minor consistency tweaks in Sem_Ch4

2022-11-08 Thread Marc Poulhiès via Gcc-patches
From: Eric Botcazou 

This ensures that, during the analysis of the qualified expressions, type
conversions and unchecked type conversions, the determination of the type
of the node and the analysis of its expression are done in the same order.

No functional changes.

gcc/ada/

* sem_ch4.adb (Analyze_Qualified_Expression): Analyze the
expression only after setting the type.
(Analyze_Unchecked_Type_Conversion): Likewise.
(Analyze_Short_Circuit): Likewise for the operands.
(Analyze_Type_Conversion): Minor tweaks.
(Analyze_Unchecked_Expression): Likewise.

Tested on x86_64-pc-linux-gnu, committed on master.

---
 gcc/ada/sem_ch4.adb | 39 ---
 1 file changed, 24 insertions(+), 15 deletions(-)

diff --git a/gcc/ada/sem_ch4.adb b/gcc/ada/sem_ch4.adb
index 489fb47247a..0c02fd80675 100644
--- a/gcc/ada/sem_ch4.adb
+++ b/gcc/ada/sem_ch4.adb
@@ -4323,16 +4323,14 @@ package body Sem_Ch4 is
--
 
procedure Analyze_Qualified_Expression (N : Node_Id) is
-  Mark : constant Entity_Id := Subtype_Mark (N);
   Expr : constant Node_Id   := Expression (N);
+  Mark : constant Entity_Id := Subtype_Mark (N);
+
   I: Interp_Index;
   It   : Interp;
   T: Entity_Id;
 
begin
-  Analyze_Expression (Expr);
-
-  Set_Etype (N, Any_Type);
   Find_Type (Mark);
   T := Entity (Mark);
 
@@ -4353,6 +4351,8 @@ package body Sem_Ch4 is
 
   Set_Etype (N, T);
 
+  Analyze_Expression (Expr);
+
   if T = Any_Type then
  return;
   end if;
@@ -5948,9 +5948,9 @@ package body Sem_Ch4 is
   It  : Interp;
 
begin
+  Set_Etype (N, Any_Type);
   Analyze_Expression (L);
   Analyze_Expression (R);
-  Set_Etype (N, Any_Type);
 
   if not Is_Overloaded (L) then
  if Root_Type (Etype (L)) = Standard_Boolean
@@ -6083,7 +6083,9 @@ package body Sem_Ch4 is
-
 
procedure Analyze_Type_Conversion (N : Node_Id) is
-  Expr : constant Node_Id := Expression (N);
+  Expr : constant Node_Id   := Expression (N);
+  Mark : constant Entity_Id := Subtype_Mark (N);
+
   Typ  : Entity_Id;
 
begin
@@ -6100,11 +6102,13 @@ package body Sem_Ch4 is
   --  Otherwise full type analysis is required, as well as some semantic
   --  checks to make sure the argument of the conversion is appropriate.
 
-  Find_Type (Subtype_Mark (N));
-  Typ := Entity (Subtype_Mark (N));
+  Find_Type (Mark);
+  Typ := Entity (Mark);
   Set_Etype (N, Typ);
-  Check_Fully_Declared (Typ, N);
+
   Analyze_Expression (Expr);
+
+  Check_Fully_Declared (Typ, N);
   Validate_Remote_Type_Type_Conversion (N);
 
   --  Only remaining step is validity checks on the argument. These
@@ -6227,10 +6231,12 @@ package body Sem_Ch4 is
--
 
procedure Analyze_Unchecked_Expression (N : Node_Id) is
+  Expr : constant Node_Id := Expression (N);
+
begin
-  Analyze (Expression (N), Suppress => All_Checks);
-  Set_Etype (N, Etype (Expression (N)));
-  Save_Interps (Expression (N), N);
+  Analyze (Expr, Suppress => All_Checks);
+  Set_Etype (N, Etype (Expr));
+  Save_Interps (Expr, N);
end Analyze_Unchecked_Expression;
 
---
@@ -6238,10 +6244,13 @@ package body Sem_Ch4 is
---
 
procedure Analyze_Unchecked_Type_Conversion (N : Node_Id) is
+  Expr : constant Node_Id   := Expression (N);
+  Mark : constant Entity_Id := Subtype_Mark (N);
+
begin
-  Find_Type (Subtype_Mark (N));
-  Analyze_Expression (Expression (N));
-  Set_Etype (N, Entity (Subtype_Mark (N)));
+  Find_Type (Mark);
+  Set_Etype (N, Entity (Mark));
+  Analyze_Expression (Expr);
end Analyze_Unchecked_Type_Conversion;
 

-- 
2.34.1



[COMMITTED] ada: Small consistency fix

2022-11-08 Thread Marc Poulhiès via Gcc-patches
From: Eric Botcazou 

gcc/ada/

* fe.h (Get_Warn_On_Questionable_Layout): Add void parameter.

Tested on x86_64-pc-linux-gnu, committed on master.

---
 gcc/ada/fe.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/gcc/ada/fe.h b/gcc/ada/fe.h
index 8102c6d5ec4..12ad15b6d35 100644
--- a/gcc/ada/fe.h
+++ b/gcc/ada/fe.h
@@ -368,7 +368,7 @@ extern Boolean Stack_Check_Probes_On_Target;
 
 #define Get_Warn_On_Questionable_Layout
warnsw__get_warn_on_questionable_layout
 
-extern Boolean Get_Warn_On_Questionable_Layout ();
+extern Boolean Get_Warn_On_Questionable_Layout (void);
 
 // The following corresponds to Ada code in Einfo.Utils.
 
-- 
2.34.1



Re: [r13-3761 Regression] FAIL: g++.dg/warn/Warray-bounds-16.C -std=gnu++98 (test for excess errors) on Linux/x86_64

2022-11-08 Thread Aldy Hernandez via Gcc-patches
It looks like this was failing on x86-64 before my patch:

In constructor ‘S::S(int)’,
inlined from ‘void __static_initialization_and_destruction_0()’ at a.c:26:7,
inlined from ‘(static initializers for a.c)’ at a.c:26:8:
a.c:22:24: warning: ‘void* __builtin_memset(void*, int, long unsigned
int)’ writing between 4 and 85899
34588 bytes into a region of size 0 overflows the destination
[-Wstringop-overflow=]
   22 |   new (p + i) int (); /* { dg-bogus "bounds" "pr102690" {
xfail *-*-* } } */
  |^
a.c:19:51: note: destination object of size 0 allocated by ‘operator new []’
   19 | p = (int*) new unsigned char [sizeof (int) * m];
  |

and now it's just failing on -m32 as well.

This is the same thing as I reported in PR107561, where
-Wstringop-overflow is getting the same exact IL as before, but some
ranges are different, and it's throwing the warning off.

Aldy

On Tue, Nov 8, 2022 at 9:22 AM haochen.jiang
 wrote:
>
> On Linux/x86_64,
>
> a239a63f868e29e9276088e7c0fb00804c2903ba is the first bad commit
> commit a239a63f868e29e9276088e7c0fb00804c2903ba
> Author: Aldy Hernandez 
> Date:   Fri Nov 4 22:24:42 2022 +0100
>
> Improve multiplication by powers of 2 in range-ops.
>
> caused
>
> FAIL: g++.dg/pr71488.C   (test for excess errors)
> FAIL: g++.dg/warn/Warray-bounds-16.C  -std=gnu++14 (test for excess errors)
> FAIL: g++.dg/warn/Warray-bounds-16.C  -std=gnu++17 (test for excess errors)
> FAIL: g++.dg/warn/Warray-bounds-16.C  -std=gnu++20 (test for excess errors)
> FAIL: g++.dg/warn/Warray-bounds-16.C  -std=gnu++98 (test for excess errors)
>
> with GCC configured with
>
> ../../gcc/configure 
> --prefix=/export/users/haochenj/src/gcc-bisect/master/master/r13-3761/usr 
> --enable-clocale=gnu --with-system-zlib --with-demangler-in-ld 
> --with-fpmath=sse --enable-languages=c,c++,fortran --enable-cet --without-isl 
> --enable-libmpx x86_64-linux --disable-bootstrap
>
> To reproduce:
>
> $ cd {build_dir}/gcc && make check RUNTESTFLAGS="dg.exp=g++.dg/pr71488.C 
> --target_board='unix{-m32}'"
> $ cd {build_dir}/gcc && make check RUNTESTFLAGS="dg.exp=g++.dg/pr71488.C 
> --target_board='unix{-m32\ -march=cascadelake}'"
> $ cd {build_dir}/gcc && make check RUNTESTFLAGS="dg.exp=g++.dg/pr71488.C 
> --target_board='unix{-m64}'"
> $ cd {build_dir}/gcc && make check RUNTESTFLAGS="dg.exp=g++.dg/pr71488.C 
> --target_board='unix{-m64\ -march=cascadelake}'"
> $ cd {build_dir}/gcc && make check 
> RUNTESTFLAGS="dg.exp=g++.dg/warn/Warray-bounds-16.C 
> --target_board='unix{-m32}'"
> $ cd {build_dir}/gcc && make check 
> RUNTESTFLAGS="dg.exp=g++.dg/warn/Warray-bounds-16.C 
> --target_board='unix{-m32\ -march=cascadelake}'"
>
> (Please do not reply to this email, for question about this report, contact 
> me at haochen dot jiang at intel.com)
>



[COMMITTED] ada: Improve handling of declare expressions in deferred-freezing contexts

2022-11-08 Thread Marc Poulhiès via Gcc-patches
From: Steve Baird 

In some cases where a declare expression occurs in a deferred-freezing
context (e.g., within the default value for a discriminant or for a formal
parameter, or within the expression of an expression function), the compiler
generates a bugbox.

gcc/ada/

* sem_ch3.adb
(Analyze_Object_Declaration): Do not perform expansion actions if
In_Spec_Expression is true.

Tested on x86_64-pc-linux-gnu, committed on master.

---
 gcc/ada/sem_ch3.adb | 20 
 1 file changed, 20 insertions(+)

diff --git a/gcc/ada/sem_ch3.adb b/gcc/ada/sem_ch3.adb
index 8f4e9f80eb8..95ffbe00ba4 100644
--- a/gcc/ada/sem_ch3.adb
+++ b/gcc/ada/sem_ch3.adb
@@ -4721,6 +4721,26 @@ package body Sem_Ch3 is
   Expand_Sliding_Conversion (E, T);
end if;
 
+   if In_Spec_Expression and then In_Declare_Expr > 0 then
+  --  It is too early to be doing expansion-ish things,
+  --  so exit early. But we have to set Ekind (Id) now so
+  --  that subsequent uses of this entity are not rejected
+  --  via the same mechanism that (correctly) rejects
+  --  "X : Integer := X;".
+
+  if Constant_Present (N) then
+ Mutate_Ekind (Id, E_Constant);
+ Set_Is_True_Constant (Id);
+  else
+ Mutate_Ekind (Id, E_Variable);
+ if Present (E) then
+Set_Has_Initial_Value (Id);
+ end if;
+  end if;
+
+  goto Leave;
+   end if;
+
Expand_Subtype_From_Expr
  (N => N,
   Unc_Type  => T,
-- 
2.34.1



[COMMITTED] ada: Move warnings switches -- initial work

2022-11-08 Thread Marc Poulhiès via Gcc-patches
From: Bob Duff 

This patch prepares to move warning switches from Opt into Warnsw.

gcc/ada/

* warnsw.ads, warnsw.adb, fe.h, err_vars.ads, errout.ads: Move
Warning_Doc_Switch from Err_Vars to Warnsw. Access
Warn_On_Questionable_Layout on the C side via a function rather
than a variable, because we plan to turn the variables into
renamings, and you can't Export renamings.
* erroutc.adb, switch-c.adb, errout.adb: Likewise.
* gcc-interface/decl.cc: Use Get_Warn_On_Questionable_Layout
instead of Warn_On_Questionable_Layout.
* gcc-interface/Makefile.in (GNATMAKE_OBJS): Add warnsw.o, because
it is indirectly imported via Errout.
* gcc-interface/Make-lang.in (GNATBIND_OBJS): Likewise and remove
restrict.o (not needed).

Tested on x86_64-pc-linux-gnu, committed on master.

---
 gcc/ada/err_vars.ads   |  6 --
 gcc/ada/errout.adb |  2 ++
 gcc/ada/errout.ads |  9 -
 gcc/ada/erroutc.adb|  1 +
 gcc/ada/fe.h   |  4 ++--
 gcc/ada/gcc-interface/Make-lang.in |  2 +-
 gcc/ada/gcc-interface/Makefile.in  |  2 +-
 gcc/ada/gcc-interface/decl.cc  |  2 +-
 gcc/ada/switch-c.adb   |  1 -
 gcc/ada/warnsw.adb |  1 -
 gcc/ada/warnsw.ads | 13 -
 11 files changed, 20 insertions(+), 23 deletions(-)

diff --git a/gcc/ada/err_vars.ads b/gcc/ada/err_vars.ads
index 05329dc6f21..79d5f319f59 100644
--- a/gcc/ada/err_vars.ads
+++ b/gcc/ada/err_vars.ads
@@ -81,12 +81,6 @@ package Err_Vars is
--  Source_Reference line, then this is initialized to No_Source_File,
--  to force an initial reference to the real source file name.
 
-   Warning_Doc_Switch : Boolean := True;
-   --  If this is set True, then the ??/?x?/?x? sequences in error messages
-   --  are active (see errout.ads for details). If this switch is False, then
-   --  these sequences are ignored (i.e. simply equivalent to a single ?). The
-   --  -gnatw.d switch sets this flag True, -gnatw.D sets this flag False.
-

-- Error Message Insertion Parameters --

diff --git a/gcc/ada/errout.adb b/gcc/ada/errout.adb
index 5730a543ee1..19ea1553260 100644
--- a/gcc/ada/errout.adb
+++ b/gcc/ada/errout.adb
@@ -53,6 +53,8 @@ with Stand;  use Stand;
 with Stylesw;use Stylesw;
 with System.OS_Lib;
 with Uname;  use Uname;
+with Warnsw; pragma Unreferenced (Warnsw);
+--  Will be referenced when various flags are moved to Warnsw.
 
 package body Errout is
 
diff --git a/gcc/ada/errout.ads b/gcc/ada/errout.ads
index 846a4a6c07b..aeb9a2fb912 100644
--- a/gcc/ada/errout.ads
+++ b/gcc/ada/errout.ads
@@ -59,15 +59,6 @@ package Errout is
Error_Msg_Exception : exception renames Err_Vars.Error_Msg_Exception;
--  Exception raised if Raise_Exception_On_Error is true
 
-   Warning_Doc_Switch : Boolean renames Err_Vars.Warning_Doc_Switch;
-   --  If this is set True, then the ??/?*?/?$?/?x?/?.x?/?_x? insertion
-   --  sequences in error messages generate appropriate tags for the output
-   --  error messages. If this switch is False, then these sequences are still
-   --  recognized (for the purposes of implementing the pattern matching in
-   --  pragmas Warnings (Off,..) and Warning_As_Error(...) but do not result
-   --  in adding the error message tag. The -gnatw.d switch sets this flag
-   --  True, -gnatw.D sets this flag False.
-
Current_Node : Node_Id := Empty;
--  Used by Error_Msg as a default Node_Id.
--  Relevant only when Opt.Include_Subprogram_In_Messages is set.
diff --git a/gcc/ada/erroutc.adb b/gcc/ada/erroutc.adb
index d0cbe9fdff1..9ecc97fb46d 100644
--- a/gcc/ada/erroutc.adb
+++ b/gcc/ada/erroutc.adb
@@ -44,6 +44,7 @@ with Stringt;  use Stringt;
 with Targparm;
 with Uintp;use Uintp;
 with Widechar; use Widechar;
+with Warnsw;   use Warnsw;
 
 package body Erroutc is
 
diff --git a/gcc/ada/fe.h b/gcc/ada/fe.h
index 79a1b58836e..8102c6d5ec4 100644
--- a/gcc/ada/fe.h
+++ b/gcc/ada/fe.h
@@ -366,9 +366,9 @@ extern Boolean Stack_Check_Probes_On_Target;
 
 /* warnsw: */
 
-#define Warn_On_Questionable_Layoutwarnsw__warn_on_questionable_layout
+#define Get_Warn_On_Questionable_Layout
warnsw__get_warn_on_questionable_layout
 
-extern Boolean Warn_On_Questionable_Layout;
+extern Boolean Get_Warn_On_Questionable_Layout ();
 
 // The following corresponds to Ada code in Einfo.Utils.
 
diff --git a/gcc/ada/gcc-interface/Make-lang.in 
b/gcc/ada/gcc-interface/Make-lang.in
index 02b2d1c6f2a..45a4168e890 100644
--- a/gcc/ada/gcc-interface/Make-lang.in
+++ b/gcc/ada/gcc-interface/Make-lang.in
@@ -601,7 +601,6 @@ GNATBIND_OBJS = \
  ada/osint-b.o\
  ada/osint.o  \
  ada/output.o \
- ada/restrict.o   \
  ada/rident.o \
  ada/scans.o  \
  ada/scil_ll.o\
@@ -629,6 +628,7 @@ GNATBIND_OBJS =

[COMMITTED] ada: Set Support_Atomic_Primitives for VxWorks 7 runtimes

2022-11-08 Thread Marc Poulhiès via Gcc-patches
From: Johannes Kliemann 

gcc/ada/

* libgnat/system-vxworks7-aarch64-rtp-smp.ads: Set
Support_Atomic_Primitives to True.
* libgnat/system-vxworks7-aarch64.ads: Set
Support_Atomic_Primitives to True.
* libgnat/system-vxworks7-arm-rtp-smp.ads: Set
Support_Atomic_Primitives to True.
* libgnat/system-vxworks7-arm.ads: Set Support_Atomic_Primitives
to True.
* libgnat/system-vxworks7-ppc-kernel.ads: Set
Support_Atomic_Primitives to False.
* libgnat/system-vxworks7-ppc-rtp-smp.ads: Set
Support_Atomic_Primitives to False.
* libgnat/system-vxworks7-ppc64-kernel.ads: Set
Support_Atomic_Primitives to True.
* libgnat/system-vxworks7-ppc64-rtp-smp.ads: Set
Support_Atomic_Primitives to True.
* libgnat/system-vxworks7-x86-kernel.ads: Set
Support_Atomic_Primitives to True.
* libgnat/system-vxworks7-x86-rtp-smp.ads: Set
Support_Atomic_Primitives to True.
* libgnat/system-vxworks7-x86_64-kernel.ads: Set
Support_Atomic_Primitives to True.
* libgnat/system-vxworks7-x86_64-rtp-smp.ads: Set
Support_Atomic_Primitives to True.

Tested on x86_64-pc-linux-gnu, committed on master.

---
 gcc/ada/libgnat/system-vxworks7-aarch64-rtp-smp.ads | 2 +-
 gcc/ada/libgnat/system-vxworks7-aarch64.ads | 2 +-
 gcc/ada/libgnat/system-vxworks7-arm-rtp-smp.ads | 2 +-
 gcc/ada/libgnat/system-vxworks7-arm.ads | 2 +-
 gcc/ada/libgnat/system-vxworks7-ppc-kernel.ads  | 1 +
 gcc/ada/libgnat/system-vxworks7-ppc-rtp-smp.ads | 1 +
 gcc/ada/libgnat/system-vxworks7-ppc64-kernel.ads| 1 +
 gcc/ada/libgnat/system-vxworks7-ppc64-rtp-smp.ads   | 1 +
 gcc/ada/libgnat/system-vxworks7-x86-kernel.ads  | 2 +-
 gcc/ada/libgnat/system-vxworks7-x86-rtp-smp.ads | 2 +-
 gcc/ada/libgnat/system-vxworks7-x86_64-kernel.ads   | 2 +-
 gcc/ada/libgnat/system-vxworks7-x86_64-rtp-smp.ads  | 2 +-
 12 files changed, 12 insertions(+), 8 deletions(-)

diff --git a/gcc/ada/libgnat/system-vxworks7-aarch64-rtp-smp.ads 
b/gcc/ada/libgnat/system-vxworks7-aarch64-rtp-smp.ads
index ae67cd0bab8..46b740eadf6 100644
--- a/gcc/ada/libgnat/system-vxworks7-aarch64-rtp-smp.ads
+++ b/gcc/ada/libgnat/system-vxworks7-aarch64-rtp-smp.ads
@@ -151,7 +151,7 @@ private
Stack_Check_Probes: constant Boolean := True;
Stack_Check_Limits: constant Boolean := False;
Support_Aggregates: constant Boolean := True;
-   Support_Atomic_Primitives : constant Boolean := False;
+   Support_Atomic_Primitives : constant Boolean := True;
Support_Composite_Assign  : constant Boolean := True;
Support_Composite_Compare : constant Boolean := True;
Support_Long_Shifts   : constant Boolean := True;
diff --git a/gcc/ada/libgnat/system-vxworks7-aarch64.ads 
b/gcc/ada/libgnat/system-vxworks7-aarch64.ads
index a943ecd9c4a..1aba15b212e 100644
--- a/gcc/ada/libgnat/system-vxworks7-aarch64.ads
+++ b/gcc/ada/libgnat/system-vxworks7-aarch64.ads
@@ -148,7 +148,7 @@ private
Stack_Check_Probes: constant Boolean := True;
Stack_Check_Limits: constant Boolean := False;
Support_Aggregates: constant Boolean := True;
-   Support_Atomic_Primitives : constant Boolean := False;
+   Support_Atomic_Primitives : constant Boolean := True;
Support_Composite_Assign  : constant Boolean := True;
Support_Composite_Compare : constant Boolean := True;
Support_Long_Shifts   : constant Boolean := True;
diff --git a/gcc/ada/libgnat/system-vxworks7-arm-rtp-smp.ads 
b/gcc/ada/libgnat/system-vxworks7-arm-rtp-smp.ads
index 49e6e7adeeb..e81348e8f62 100644
--- a/gcc/ada/libgnat/system-vxworks7-arm-rtp-smp.ads
+++ b/gcc/ada/libgnat/system-vxworks7-arm-rtp-smp.ads
@@ -148,7 +148,7 @@ private
Stack_Check_Probes: constant Boolean := True;
Stack_Check_Limits: constant Boolean := False;
Support_Aggregates: constant Boolean := True;
-   Support_Atomic_Primitives : constant Boolean := False;
+   Support_Atomic_Primitives : constant Boolean := True;
Support_Composite_Assign  : constant Boolean := True;
Support_Composite_Compare : constant Boolean := True;
Support_Long_Shifts   : constant Boolean := True;
diff --git a/gcc/ada/libgnat/system-vxworks7-arm.ads 
b/gcc/ada/libgnat/system-vxworks7-arm.ads
index 6d3218f456f..4ced0f1e606 100644
--- a/gcc/ada/libgnat/system-vxworks7-arm.ads
+++ b/gcc/ada/libgnat/system-vxworks7-arm.ads
@@ -146,7 +146,7 @@ private
Stack_Check_Probes: constant Boolean := True;
Stack_Check_Limits: constant Boolean := False;
Support_Aggregates: constant Boolean := True;
-   Support_Atomic_Primitives : constant Boolean := False;
+   Support_Atomic_Primitives : constant Boolean := True;
Support_Composite_Assign  : constant Boolean := True;
Support_Composite_Compare : constant Boolean := True;
Support_Long_Shifts   : constant B

[COMMITTED] ada: Adjust classwide contract expression preanalysis

2022-11-08 Thread Marc Poulhiès via Gcc-patches
From: Ronan Desplanques 

Before this patch, a classwide contract expression was preanalyzed
only when its primitive operation's type was frozen. It caused name
resolution to be off in the cases where the freezing took place
after the end of the declaration list the primitive operation was
declared in.

This patch makes it so that if the compiler gets to the end of
the declaration list before the type is frozen, it preanalyzes the
classwide contract expression, so that the names are resolved in the
right context.

gcc/ada/

* contracts.adb
(Preanalyze_Class_Conditions): New procedure.
(Preanalyze_Condition): Moved out from Merge_Class_Conditions in
order to be spec-visible.
* contracts.ads
(Preanalyze_Class_Conditions): New procedure.
* sem_prag.adb
(Analyze_Pre_Post_Condition_In_Decl_Part): Call
Preanalyze_Class_Conditions when necessary.

Tested on x86_64-pc-linux-gnu, committed on master.

---
 gcc/ada/contracts.adb | 481 ++
 gcc/ada/contracts.ads |   4 +
 gcc/ada/sem_prag.adb  |  14 ++
 3 files changed, 267 insertions(+), 232 deletions(-)

diff --git a/gcc/ada/contracts.adb b/gcc/ada/contracts.adb
index 21f438f90f3..218fd66852f 100644
--- a/gcc/ada/contracts.adb
+++ b/gcc/ada/contracts.adb
@@ -107,6 +107,11 @@ package body Contracts is
--  well as Contract_Cases, Subprogram_Variant, invariants and predicates.
--  Body_Id denotes the entity of the subprogram body.
 
+   procedure Preanalyze_Condition
+ (Subp : Entity_Id;
+  Expr : Node_Id);
+   --  Preanalyze the class-wide condition Expr of Subp
+
procedure Set_Class_Condition
  (Kind : Condition_Kind;
   Subp : Entity_Id;
@@ -4548,242 +4553,10 @@ package body Contracts is
 
procedure Merge_Class_Conditions (Spec_Id : Entity_Id) is
 
-  procedure Preanalyze_Condition
-(Subp : Entity_Id;
- Expr : Node_Id);
-  --  Preanalyze the class-wide condition Expr of Subp
-
   procedure Process_Inherited_Conditions (Kind : Condition_Kind);
   --  Collect all inherited class-wide conditions of Spec_Id and merge
   --  them into one big condition.
 
-  --
-  -- Preanalyze_Condition --
-  --
-
-  procedure Preanalyze_Condition
-(Subp : Entity_Id;
- Expr : Node_Id)
-  is
- procedure Clear_Unset_References;
- --  Clear unset references on formals of Subp since preanalysis
- --  occurs in a place unrelated to the actual code.
-
- procedure Remove_Controlling_Arguments;
- --  Traverse Expr and clear the Controlling_Argument of calls to
- --  nonabstract functions.
-
- procedure Remove_Formals (Id : Entity_Id);
- --  Remove formals from homonym chains and make them not visible
-
- procedure Restore_Original_Selected_Component;
- --  Traverse Expr searching for dispatching calls to functions whose
- --  original node was a selected component, and replace them with
- --  their original node.
-
- 
- -- Clear_Unset_References --
- 
-
- procedure Clear_Unset_References is
-F : Entity_Id := First_Formal (Subp);
-
- begin
-while Present (F) loop
-   Set_Unset_Reference (F, Empty);
-   Next_Formal (F);
-end loop;
- end Clear_Unset_References;
-
- --
- -- Remove_Controlling_Arguments --
- --
-
- procedure Remove_Controlling_Arguments is
-function Remove_Ctrl_Arg (N : Node_Id) return Traverse_Result;
---  Reset the Controlling_Argument of calls to nonabstract
---  function calls.
-
--
--- Remove_Ctrl_Arg --
--
-
-function Remove_Ctrl_Arg (N : Node_Id) return Traverse_Result is
-begin
-   if Nkind (N) = N_Function_Call
- and then Present (Controlling_Argument (N))
- and then not Is_Abstract_Subprogram (Entity (Name (N)))
-   then
-  Set_Controlling_Argument (N, Empty);
-   end if;
-
-   return OK;
-end Remove_Ctrl_Arg;
-
-procedure Remove_Ctrl_Args is new Traverse_Proc (Remove_Ctrl_Arg);
- begin
-Remove_Ctrl_Args (Expr);
- end Remove_Controlling_Arguments;
-
- 
- -- Remove_Formals --
- 
-
- procedure Remove_Formals (Id : Entity_Id) is
-F : Entity_Id := First_Formal (Id);
-
- begin
-while Present (F) loop
-   Set_Is_Immediately_Visible (F, False);
-   Remove_Homonym (F);

[COMMITTED] ada: Enforce matching of extra formals

2022-11-08 Thread Marc Poulhiès via Gcc-patches
From: Javier Miranda 

This patch enforces matching of extra formals in overridden subprograms,
subprogram renamings, and subprograms to which attributes 'Access,
'Unchecked_Access, or 'Unrestricted_Access is applied (for these access
cases the subprogram is checked against its corresponding subprogram
type). This enforcement is an internal consistency check, not an
implementation of some language legality rule.

gcc/ada/

* debug.adb
(Debug_Flag_Underscore_XX): Switch -gnatd_X used temporarily to allow
disabling extra formal checks.
* exp_attr.adb
(Expand_N_Attribute_Reference [access types]): Add extra formals
to the subprogram referenced in the prefix of 'Unchecked_Access,
'Unrestricted_Access or 'Access; required to check that its extra
formals match the extra formals of the corresponding subprogram type.
* exp_ch3.adb
(Stream_Operation_OK): Declaration moved to the public part of the
package.
(Validate_Tagged_Type_Extra_Formals): New subprogram.
(Expand_Freeze_Record_Type): Improve the code that takes care of
adding the extra formals of dispatching primitives; extended to
add also the extra formals to renamings of dispatching primitives.
* exp_ch3.ads
(Stream_Operation_OK): Declaration moved from the package body.
* exp_ch6.adb
(Check_BIP_Actuals): Complete documentation.
(Has_BIP_Extra_Formal): Subprogram declaration moved to the public
part of the package. In addition, a parameter has been added to
disable an assertion that requires its use with frozen entities.
(Duplicate_Params_Without_Extra_Actuals): New subprogram.
(Check_Subprogram_Variant): Emit the call without duplicating the
extra formals since they will be added when the call is analyzed.
(Expand_Call_Helper): Ensure that the called subprogram has all its
extra formals, enforce assertion checking extra formals on thunks,
and mark calls from thunks as processed-BIP-calls to avoid adding
their extra formals twice.
(Is_Build_In_Place_Function): Return False for entities with foreign
convention.
(Is_Build_In_Place_Function_Call): Return True also for not BIP 
functions
that have BIP formals since the extra actuals are required.
(Make_Build_In_Place_Call_In_Object_Declaration): Occurrences of
Is_Return_Object replaced by the local variable Is_OK_Return_Object
that evaluates to False for scopes with foreign convention.
(Might_Have_Tasks): Fix check of class-wide limited record types.
(Needs_BIP_Task_Actuals): Remove assertion to allow calling this
function in more contexts; in addition it returns False for functions
returning objects with foreign convention.
(Needs_BIP_Finalization_Master): Likewise.
(Needs_BIP_Alloc_Form): Likewise.
(Validate_Subprogram_Calls): Check that the number of actuals (including
extra actuals) of calls in the subtree N match their corresponding
formals.
* exp_ch6.ads
(Has_BIP_Extra_Formal): Subprogram declaration moved to the public
part of the package. In addition, a parameter has been added to
disable an assertion that requires its use with frozen entities.
(Is_Build_In_Place_Function_Call): Complete documentation.
(Validate_Subprogram_Calls): Check that the number of actuals (including
extra actuals) of calls in the subtree N match their corresponding
formals.
* freeze.adb
(Check_Itype): Add extra formals to anonymous access subprogram itypes.
(Freeze_Expression): Improve code that disables the addition of extra
formals to functions with foreign convention.
(Check_Extra_Formals): Moved to package Sem_Ch6 as Extra_Formals_OK.
(Freeze_Subprogram): Add extra formals to non-dispatching subprograms.
* frontend.adb
(Frontend): Validate all the subprogram calls; it can be disabled using
switch -gnatd_X
* sem_ch3.adb
(Access_Subprogram_Declaration): Defer the addition of extra formals to
the freezing point so that we know the convention.
(Check_Anonymous_Access_Component): Likewise.
(Derive_Subprogram): Fix documentation.
* sem_ch6.adb
(Has_Reliable_Extra_Formals): New subprogram.
(Check_Anonymous_Return): Fix check of access to class-wide limited
record types.
(Check_Untagged_Equality): Placed in alphabetical order.
(Extra_Formals_OK): Subprogram moved from freeze.adb.
(Extra_Formals_Match_OK): New subprogram.
(Has_BIP_Formals): New subprogram.
(Has_Extra_Formals): New subprograms.
(Needs_Accessibility_Check_Extra): New subprogram.
(Parent_Subprogram): New subprogram.
(Add_Extra_Formal): Minor code clea

[COMMITTED] ada: Propagate aspect Ghost when instantiating null formal procedures

2022-11-08 Thread Marc Poulhiès via Gcc-patches
From: Piotr Trojanek 

When instantiating generic package that includes a formal subprogram
declaration with Ghost aspect and a subprogram_default of null, e.g.:

   generic
 with procedure Proc is null with Ghost;
   package P is ...

the Ghost aspect should be propagated to the internally generated null
subprogram, so this null subprogram can be used in contexts that require
ghost entities.

gcc/ada/

* sem_ch12.adb (Instantiate_Formal_Subprogram): Copy aspect Ghost
from formal subprogram declaration to the internally generated
procedure.

Tested on x86_64-pc-linux-gnu, committed on master.

---
 gcc/ada/sem_ch12.adb | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/gcc/ada/sem_ch12.adb b/gcc/ada/sem_ch12.adb
index ca0f4913e36..276656085be 100644
--- a/gcc/ada/sem_ch12.adb
+++ b/gcc/ada/sem_ch12.adb
@@ -11088,6 +11088,8 @@ package body Sem_Ch12 is
 
  Set_Convention (Defining_Unit_Name (New_Spec), Convention_Intrinsic);
 
+ Copy_Ghost_Aspect (Formal, To => Decl_Node);
+
  --  Eliminate the calls to it when optimization is enabled
 
  Set_Is_Inlined (Defining_Unit_Name (New_Spec));
-- 
2.34.1



[COMMITTED] ada: Clean up call to check if aspects are present

2022-11-08 Thread Marc Poulhiès via Gcc-patches
From: Piotr Trojanek 

Code cleanup; semantics is unaffected.

gcc/ada/

* exp_ch6.adb, exp_put_image.adb, sem_aggr.adb, sem_attr.adb,
sem_ch5.adb, sem_type.adb, sem_util.adb: Replace
"Present (Find_Aspect (...))" with "Has_Aspect".

Tested on x86_64-pc-linux-gnu, committed on master.

---
 gcc/ada/exp_ch6.adb   |  4 ++--
 gcc/ada/exp_put_image.adb |  2 +-
 gcc/ada/sem_aggr.adb  |  2 +-
 gcc/ada/sem_attr.adb  |  4 ++--
 gcc/ada/sem_ch5.adb   |  2 +-
 gcc/ada/sem_type.adb  |  2 +-
 gcc/ada/sem_util.adb  | 14 ++
 7 files changed, 14 insertions(+), 16 deletions(-)

diff --git a/gcc/ada/exp_ch6.adb b/gcc/ada/exp_ch6.adb
index fce7a7cebf5..1466e4dc36a 100644
--- a/gcc/ada/exp_ch6.adb
+++ b/gcc/ada/exp_ch6.adb
@@ -3305,8 +3305,8 @@ package body Exp_Ch6 is
or else No (Aspect)
 
--  Do not fold if multiple applicable predicate aspects
-   or else Present (Find_Aspect (Subt, Aspect_Static_Predicate))
-   or else Present (Find_Aspect (Subt, Aspect_Predicate))
+   or else Has_Aspect (Subt, Aspect_Static_Predicate)
+   or else Has_Aspect (Subt, Aspect_Predicate)
or else Augments_Other_Dynamic_Predicate (Aspect)
or else CodePeer_Mode
  then
diff --git a/gcc/ada/exp_put_image.adb b/gcc/ada/exp_put_image.adb
index f90f0206f27..eaedebe4001 100644
--- a/gcc/ada/exp_put_image.adb
+++ b/gcc/ada/exp_put_image.adb
@@ -1045,7 +1045,7 @@ package body Exp_Put_Image is
   declare
  U_Type : constant Entity_Id := Underlying_Type (Entity (Prefix (N)));
   begin
- if Present (Find_Aspect (U_Type, Aspect_Put_Image)) then
+ if Has_Aspect (U_Type, Aspect_Put_Image) then
 return True;
  end if;
 
diff --git a/gcc/ada/sem_aggr.adb b/gcc/ada/sem_aggr.adb
index 4da05dd7317..3a093d26007 100644
--- a/gcc/ada/sem_aggr.adb
+++ b/gcc/ada/sem_aggr.adb
@@ -1052,7 +1052,7 @@ package body Sem_Aggr is
   elsif Is_Array_Type (Typ) and then Null_Record_Present (N) then
  Error_Msg_N ("null record forbidden in array aggregate", N);
 
-  elsif Present (Find_Aspect (Typ, Aspect_Aggregate))
+  elsif Has_Aspect (Typ, Aspect_Aggregate)
 and then Ekind (Typ) /= E_Record_Type
 and then Ada_Version >= Ada_2022
   then
diff --git a/gcc/ada/sem_attr.adb b/gcc/ada/sem_attr.adb
index 299ea04959c..de4e8aa681c 100644
--- a/gcc/ada/sem_attr.adb
+++ b/gcc/ada/sem_attr.adb
@@ -5996,8 +5996,8 @@ package body Sem_Attr is
--  Verify that prefix can be iterated upon.
 
if Is_Array_Type (Typ)
- or else Present (Find_Aspect (Typ, Aspect_Default_Iterator))
- or else Present (Find_Aspect (Typ, Aspect_Iterable))
+ or else Has_Aspect (Typ, Aspect_Default_Iterator)
+ or else Has_Aspect (Typ, Aspect_Iterable)
then
   null;
else
diff --git a/gcc/ada/sem_ch5.adb b/gcc/ada/sem_ch5.adb
index 5f0629d32b3..7bca6d39dd2 100644
--- a/gcc/ada/sem_ch5.adb
+++ b/gcc/ada/sem_ch5.adb
@@ -2191,7 +2191,7 @@ package body Sem_Ch5 is
 if Is_Array_Type (Typ)
   or else Is_Reversible_Iterator (Typ)
   or else
-(Present (Find_Aspect (Typ, Aspect_Iterable))
+(Has_Aspect (Typ, Aspect_Iterable)
   and then
 Present
   (Get_Iterable_Type_Primitive (Typ, Name_Previous)))
diff --git a/gcc/ada/sem_type.adb b/gcc/ada/sem_type.adb
index 2fc82d14016..718c29754c1 100644
--- a/gcc/ada/sem_type.adb
+++ b/gcc/ada/sem_type.adb
@@ -1007,7 +1007,7 @@ package body Sem_Type is
 
   elsif Ada_Version >= Ada_2022
 and then T2 = Any_Composite
-and then Present (Find_Aspect (T1, Aspect_Aggregate))
+and then Has_Aspect (T1, Aspect_Aggregate)
   then
  return True;
 
diff --git a/gcc/ada/sem_util.adb b/gcc/ada/sem_util.adb
index 71548dcca17..e43581ec6e9 100644
--- a/gcc/ada/sem_util.adb
+++ b/gcc/ada/sem_util.adb
@@ -13309,7 +13309,7 @@ package body Sem_Util is
 
begin
   return Nkind (Exp) = N_Aggregate
-and then Present (Find_Aspect (Etype (Exp), Aspect_Aggregate))
+and then Has_Aspect (Etype (Exp), Aspect_Aggregate)
 and then not Is_Record_Aggregate;
end Is_Container_Aggregate;
 
@@ -21718,18 +21718,16 @@ package body Sem_Util is
   --  type has the appropriate user-defined literal aspect.
 
   return (Nkind (N) in N_Numeric_Or_String_Literal
-and then Present (Find_Aspect (Typ, Literal_Aspect_Map (Nkind (N)
+and then Has_Aspect (Typ, Literal_Aspect_Map (Nkind (N
   or else
 (Is_Entity_Name (N)
   and then Present (Entity (N))
   and then
 ((Ekind (Entity (N)) = E_Named_Integer
-and then
-  Present (Find_Aspect (Typ, Aspec

[COMMITTED] ada: Fix oversight in implementation of allocators for storage models

2022-11-08 Thread Marc Poulhiès via Gcc-patches
From: Eric Botcazou 

When the allocator is of an unconstrained array type and has an initializing
expression, the copy of the initializing expression must be done separately
from that of the bounds.

gcc/ada/

* gcc-interface/utils2.cc (build_allocator): For unconstrained
array types with a storage model and an initializing expression,
copy the initialization expression separately from the bounds. In
all cases with a storage model, pass the locally computed size for
the store.

Tested on x86_64-pc-linux-gnu, committed on master.

---
 gcc/ada/gcc-interface/utils2.cc | 33 ++---
 1 file changed, 26 insertions(+), 7 deletions(-)

diff --git a/gcc/ada/gcc-interface/utils2.cc b/gcc/ada/gcc-interface/utils2.cc
index ef81f8dd56a..80d550c91e1 100644
--- a/gcc/ada/gcc-interface/utils2.cc
+++ b/gcc/ada/gcc-interface/utils2.cc
@@ -2439,8 +2439,8 @@ build_allocator (tree type, tree init, tree result_type, 
Entity_Id gnat_proc,
   tree storage_ptr_type = build_pointer_type (storage_type);
   tree lhs, rhs;
 
-  size = SUBSTITUTE_PLACEHOLDER_IN_EXPR (TYPE_SIZE_UNIT (storage_type),
-init);
+  size = TYPE_SIZE_UNIT (storage_type);
+  size = SUBSTITUTE_PLACEHOLDER_IN_EXPR (size, init);
 
   /* If the size overflows, pass -1 so Storage_Error will be raised.  */
   if (TREE_CODE (size) == INTEGER_CST && !valid_constant_size_p (size))
@@ -2454,8 +2454,10 @@ build_allocator (tree type, tree init, tree result_type, 
Entity_Id gnat_proc,
 
   /* If there is an initializing expression, then make a constructor for
 the entire object including the bounds and copy it into the object.
-If there is no initializing expression, just set the bounds.  */
-  if (init)
+If there is no initializing expression, just set the bounds.  Note
+that, if we have a storage model, we need to copy the initializing
+expression separately from the bounds.  */
+  if (init && !pool_is_storage_model)
{
  vec *v;
  vec_alloc (v, 2);
@@ -2472,11 +2474,28 @@ build_allocator (tree type, tree init, tree 
result_type, Entity_Id gnat_proc,
{
  lhs = build_component_ref (storage_deref, TYPE_FIELDS (storage_type),
 false);
- rhs = build_template (template_type, type, NULL_TREE);
+ rhs = build_template (template_type, type, init);
}
 
   if (pool_is_storage_model)
-   storage_init = build_storage_model_store (gnat_pool, lhs, rhs);
+   {
+ storage_init = build_storage_model_store (gnat_pool, lhs, rhs);
+ if (init)
+   {
+ start_stmt_group ();
+ add_stmt (storage_init);
+ lhs
+   = build_component_ref (storage_deref,
+  DECL_CHAIN (TYPE_FIELDS (storage_type)),
+  false);
+ rhs = init;
+ size = TYPE_SIZE_UNIT (TREE_TYPE (lhs));
+ size = SUBSTITUTE_PLACEHOLDER_IN_EXPR (size, init);
+ tree t = build_storage_model_store (gnat_pool, lhs, rhs, size);
+ add_stmt (t);
+ storage_init = end_stmt_group ();
+   }
+   }
   else
storage_init = build_binary_op (INIT_EXPR, NULL_TREE, lhs, rhs);
 
@@ -2520,7 +2539,7 @@ build_allocator (tree type, tree init, tree result_type, 
Entity_Id gnat_proc,
   TREE_THIS_NOTRAP (storage_deref) = 1;
   if (pool_is_storage_model)
storage_init
- = build_storage_model_store (gnat_pool, storage_deref, init);
+ = build_storage_model_store (gnat_pool, storage_deref, init, size);
   else
storage_init
  = build_binary_op (INIT_EXPR, NULL_TREE, storage_deref, init);
-- 
2.34.1



Re: [PATCH] libstdc++: Add _Float128 to_chars/from_chars support for x86, ia64 and ppc64le with glibc

2022-11-08 Thread Jakub Jelinek via Gcc-patches
On Tue, Nov 08, 2022 at 01:41:41AM +, Joseph Myers wrote:
> I've committed this further fix for a syntax error as obvious.

Thanks and sorry.

> libstdc++: Fix syntax error in old-glibc case in floating_from_chars.cc 
> [PR107562]
> 
>   PR libstdc++/107562
>   * src/c++17/floating_from_chars.cc (from_chars_impl): Fix syntax
>   error.
> 
> diff --git a/libstdc++-v3/src/c++17/floating_from_chars.cc 
> b/libstdc++-v3/src/c++17/floating_from_chars.cc
> index 29eb4634e9d..be1e1051b5c 100644
> --- a/libstdc++-v3/src/c++17/floating_from_chars.cc
> +++ b/libstdc++-v3/src/c++17/floating_from_chars.cc
> @@ -632,7 +632,7 @@ namespace
> {
>  #ifndef _GLIBCXX_HAVE_FLOAT128_MATH
>   if (&__strtof128 == nullptr)
> -   tmpval = _Float128(std::strtold(str, &endptr);
> +   tmpval = _Float128(std::strtold(str, &endptr));
>   else
>  #endif
> tmpval = __strtof128(str, &endptr);

Jakub



Re: [PATCH] maintainer-scripts/gcc_release: compress xz in parallel

2022-11-08 Thread Jakub Jelinek via Gcc-patches
On Tue, Nov 08, 2022 at 07:40:02AM +, Sam James wrote:
> > On 8 Nov 2022, at 07:33, Xi Ruoyao  wrote:
> > I'm wondering if running xz -T0 on different machines (with different
> > core numbers) may produce different compressed data.  The difference can
> > cause trouble distributing checksums.
> > 
> 
> Your question is a good one - xz -T0 produces different results to xz -T1
> but:
> 1. The tarballs for GCC are only created on one machine and aren't
> created repeatedly then compared with each other wrt mirroring;

No, that is not the case.
While the snapshots are created on sourceware locally, GCC releases (and
release candidates) are typically created on some RM's local machine.

gcc_release script has the -l option which indicates it is running on
sourceware, and when -l is not present, -u username is used for upload.

Jakub



[COMMITTED] ada: Compile-time simplification of 'Image incorrectly ignores Put_Image

2022-11-08 Thread Marc Poulhiès via Gcc-patches
From: Steve Baird 

In the case of Some_Enumeration_Type'Image (),
the compiler will replace this expression in its internal program
representation with a corresponding string literal. This is incorrect
if the Put_Image aspect has been specified (directly or via inheritance)
for the enumeration type.

gcc/ada/

* sem_attr.adb
(Eval_Attribute): Don't simplify 'Image call if Put_Image has been
specified.

Tested on x86_64-pc-linux-gnu, committed on master.

---
 gcc/ada/sem_attr.adb | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/gcc/ada/sem_attr.adb b/gcc/ada/sem_attr.adb
index de4e8aa681c..5166b4be4e9 100644
--- a/gcc/ada/sem_attr.adb
+++ b/gcc/ada/sem_attr.adb
@@ -9203,13 +9203,15 @@ package body Sem_Attr is
   --  Image is a scalar attribute, but is never static, because it is
   --  not a static function (having a non-scalar argument (RM 4.9(22))
   --  However, we can constant-fold the image of an enumeration literal
-  --  if names are available.
+  --  if names are available and default Image implementation has not
+  --  been overridden.
 
   when Attribute_Image =>
  if Is_Entity_Name (E1)
and then Ekind (Entity (E1)) = E_Enumeration_Literal
and then not Discard_Names (First_Subtype (Etype (E1)))
and then not Global_Discard_Names
+   and then not Has_Aspect (Etype (E1), Aspect_Put_Image)
  then
 declare
Lit : constant Entity_Id := Entity (E1);
-- 
2.34.1



Re: [PATCH] maintainer-scripts/gcc_release: compress xz in parallel

2022-11-08 Thread Sam James via Gcc-patches


> On 8 Nov 2022, at 08:52, Jakub Jelinek  wrote:
> 
> On Tue, Nov 08, 2022 at 07:40:02AM +, Sam James wrote:
>>> On 8 Nov 2022, at 07:33, Xi Ruoyao  wrote:
>>> I'm wondering if running xz -T0 on different machines (with different
>>> core numbers) may produce different compressed data.  The difference can
>>> cause trouble distributing checksums.
>>> 
>> 
>> Your question is a good one - xz -T0 produces different results to xz -T1
>> but:
>> 1. The tarballs for GCC are only created on one machine and aren't
>> created repeatedly then compared with each other wrt mirroring;
> 
> No, that is not the case.
> While the snapshots are created on sourceware locally, GCC releases (and
> release candidates) are typically created on some RM's local machine.

We've misinterpreted each other. I mean the same tarball isn't then
recreated repeatedly and different copies uploaded to mirrors.

Obviously different machines may be used at different points.


signature.asc
Description: Message signed with OpenPGP


[PATCH] middle-end IFN_ASSUME support [PR106654]

2022-11-08 Thread Pilar Latiesa via Gcc-patches
On Mon, Oct 17, 2022 at 05:32:32AM +0200, Martin Uecker wrote:
> Hm, that already seems to work with
>
> if (!std::isfinite(x))
>   __builtin_unreachable();
>
> https://godbolt.org/z/hj3WrEhjb

Not anymore. Perhaps after making ranger the VRP default, because I get the
mentioned outcome with --param=vrp1-mode=vrp


[COMMITTED] gcc: fix PR rtl-optimization/107482

2022-11-08 Thread Max Filippov via Gcc-patches
gcc/
PR rtl-optimization/107482
* ira-color.cc (assign_hard_reg): Only call
update_costs_from_copies when retry_p is false.
---
 gcc/ira-color.cc | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/gcc/ira-color.cc b/gcc/ira-color.cc
index 4a1a325e8e31..ffe73b61c450 100644
--- a/gcc/ira-color.cc
+++ b/gcc/ira-color.cc
@@ -2209,8 +2209,8 @@ assign_hard_reg (ira_allocno_t a, bool retry_p)
 restore_costs_from_copies (a);
   ALLOCNO_HARD_REGNO (a) = best_hard_regno;
   ALLOCNO_ASSIGNED_P (a) = true;
-  if (best_hard_regno >= 0)
-update_costs_from_copies (a, true, ! retry_p);
+  if (best_hard_regno >= 0 && !retry_p)
+update_costs_from_copies (a, true, true);
   ira_assert (ALLOCNO_CLASS (a) == aclass);
   /* We don't need updated costs anymore.  */
   ira_free_allocno_updated_costs (a);
-- 
2.30.2



Re: [PATCH v2] Always use TYPE_MODE instead of DECL_MODE for vector field

2022-11-08 Thread Richard Biener via Gcc-patches
On Mon, Nov 7, 2022 at 9:07 PM H.J. Lu  wrote:
>
> On Mon, Oct 24, 2022 at 11:28 PM Richard Biener
>  wrote:
> >
> > On Mon, Oct 24, 2022 at 10:02 PM H.J. Lu  wrote:
> > >
> > > On Mon, Oct 24, 2022 at 12:12 AM Richard Biener
> > >  wrote:
> > > >
> > > > On Fri, Oct 21, 2022 at 6:18 PM H.J. Lu  wrote:
> > > > >
> > > > > On Fri, Oct 21, 2022 at 2:33 AM Richard Biener
> > > > >  wrote:
> > > > > >
> > > > > > On Thu, Oct 20, 2022 at 6:58 PM H.J. Lu via Gcc-patches
> > > > > >  wrote:
> > > > > > >
> > > > > > > commit e034c5c895722e0092d2239cd8c2991db77d6d39
> > > > > > > Author: Jakub Jelinek 
> > > > > > > Date:   Sat Dec 2 08:54:47 2017 +0100
> > > > > > >
> > > > > > > PR target/78643
> > > > > > > PR target/80583
> > > > > > > * expr.c (get_inner_reference): If DECL_MODE of a 
> > > > > > > non-bitfield
> > > > > > > is BLKmode for vector field with vector raw mode, use 
> > > > > > > TYPE_MODE
> > > > > > > instead of DECL_MODE.
> > > > > > >
> > > > > > > fixed the case where DECL_MODE of a vector field is BLKmode and 
> > > > > > > its
> > > > > > > TYPE_MODE is a vector mode because of target attribute.  Remove 
> > > > > > > the
> > > > > > > BLKmode check for the case where DECL_MODE of a vector field is a 
> > > > > > > vector
> > > > > > > mode and its TYPE_MODE is BLKmode because of target attribute.
> > > > > > >
> > > > > > > gcc/
> > > > > > >
> > > > > > > PR target/107304
> > > > > > > * expr.c (get_inner_reference): Always use TYPE_MODE for 
> > > > > > > vector
> > > > > > > field with vector raw mode.
> > > > > > >
> > > > > > > gcc/testsuite/
> > > > > > >
> > > > > > > PR target/107304
> > > > > > > * gcc.target/i386/pr107304.c: New test.
> > > > > > > ---
> > > > > > >  gcc/expr.cc  |  3 +-
> > > > > > >  gcc/testsuite/gcc.target/i386/pr107304.c | 39 
> > > > > > > 
> > > > > > >  2 files changed, 40 insertions(+), 2 deletions(-)
> > > > > > >  create mode 100644 gcc/testsuite/gcc.target/i386/pr107304.c
> > > > > > >
> > > > > > > diff --git a/gcc/expr.cc b/gcc/expr.cc
> > > > > > > index efe387e6173..9145193c2c1 100644
> > > > > > > --- a/gcc/expr.cc
> > > > > > > +++ b/gcc/expr.cc
> > > > > > > @@ -7905,8 +7905,7 @@ get_inner_reference (tree exp, 
> > > > > > > poly_int64_pod *pbitsize,
> > > > > > >   /* For vector fields re-check the target flags, as 
> > > > > > > DECL_MODE
> > > > > > >  could have been set with different target flags than
> > > > > > >  the current function has.  */
> > > > > > > - if (mode == BLKmode
> > > > > > > - && VECTOR_TYPE_P (TREE_TYPE (field))
> > > > > > > + if (VECTOR_TYPE_P (TREE_TYPE (field))
> > > > > > >   && VECTOR_MODE_P (TYPE_MODE_RAW (TREE_TYPE 
> > > > > > > (field
> > > > > >
> > > > > > Isn't the check on TYPE_MODE_RAW also wrong then?  Btw, the mode 
> > > > > > could
> > > > >
> > > > > TYPE_MODE_RAW is always set to a vector mode for a vector type:
> > > > >
> > > > >/* Find an appropriate mode for the vector type.  */
> > > > > if (TYPE_MODE (type) == VOIDmode)
> > > > >   SET_TYPE_MODE (type,
> > > > >  mode_for_vector (SCALAR_TYPE_MODE 
> > > > > (innertype),
> > > > >   nunits).else_blk ());
> > > >
> > > > But mode_for_vector can return a MODE_INT!
> > >
> > > You are right.
> > >
> > > >   /* For integers, try mapping it to a same-sized scalar mode.  */
> > > >   if (GET_MODE_CLASS (innermode) == MODE_INT)
> > > > {
> > > >   poly_uint64 nbits = nunits * GET_MODE_BITSIZE (innermode);
> > > >   if (int_mode_for_size (nbits, 0).exists (&mode)
> > > >   && have_regs_of_mode[mode])
> > > > return mode;
> > > >
> > > > > But TYPE_MODE returns BLKmode if the vector mode is unsupported.
> > > > >
> > > > > > also be an integer mode.
> > > > >
> > > > > For a vector field, mode is either BLK mode or the vector mode.  
> > > > > Jakub,
> > > > > can you comment on it?
> > > >
> > > > I think that for
> > > >
> > > > typedef int v2si __attribute__((vector_size(8)));
> > > >
> > > > struct X { int i; v2si j; };
> > > >
> > > > v2si should get DImode with -mno-sse?
> > > >
> > >
> > > Currently GCC generates
> > >
> > > (insn 31 32 33 (set (subreg:DI (reg:V2SI 105) 0)
> > > (reg:DI 84 [ _3 ])) "y2.c":12:11 -1
> > >  (nil))
> > >
> > > With my patch, v2si gets DImode directly without SUBREG.
> > >
> > > Here is the v2 patch with the update commit message:
> > >
> > > Remove the BLKmode check for the case where DECL_MODE
> > > of a vector field is a vector mode and its TYPE_MODE isn't a
> > > vector mode because of target attribute.
> > >
> > > OK for master?
> >
> > OK.
>
> OK to backport to release branches?

Yes

> Thanks.
>
> --
> H.J.


Re: [RFA] gcc: fix PR rtl-optimization/107482

2022-11-08 Thread Max Filippov via Gcc-patches
On Mon, Nov 7, 2022 at 12:52 PM Vladimir Makarov  wrote:
> even better patch:
>
> diff --git a/gcc/ira-color.cc b/gcc/ira-color.cc
> index 4a1a325e8e3..ffe73b61c45 100644
> --- a/gcc/ira-color.cc
> +++ b/gcc/ira-color.cc
> @@ -2209,8 +2209,8 @@ assign_hard_reg (ira_allocno_t a, bool retry_p)
>   restore_costs_from_copies (a);
> ALLOCNO_HARD_REGNO (a) = best_hard_regno;
> ALLOCNO_ASSIGNED_P (a) = true;
> -  if (best_hard_regno >= 0)
> -update_costs_from_copies (a, true, ! retry_p);
> +  if (best_hard_regno >= 0 && !retry_p)
> +update_costs_from_copies (a, true, true);
> ira_assert (ALLOCNO_CLASS (a) == aclass);
> /* We don't need updated costs anymore.  */
> ira_free_allocno_updated_costs (a);
>
...
> Please, check that my proposed patch works and commit it in the case of
> success.

Thank you for taking a look and suggesting a better fix.
I've tested your version for target=xtensa-linux-uclibc, it fixes
the issue without new regressions. I've committed the fix to the
master branch and will backport it to gcc-10, -11 and -12 in a few
days.

-- 
Thanks.
-- Max


Re: [PATCH] Support Intel prefetchit0/t1

2022-11-08 Thread Jakub Jelinek via Gcc-patches
On Fri, Nov 04, 2022 at 03:46:32PM +0800, Haochen Jiang via Gcc-patches wrote:
> We will take back the patches which add a new parameter on original
> builtin_prefetch and implement instruction prefetch on that.
> 
> Also we consider that since we will only do that on specific backend,
> no need to add a new rtl for that.
> 
> This patch will only support instructions prefetch for x86 backend.
> 
> Regtested on x86_64-pc-linux-gnu. Ok for trunk?

The gcc.target/i386/prefetchi-4.c testcase ICEs for me on i686-linux.
Can be reproduced even on x86_64, with:
./cc1 -quiet -m32 -march=pentiumpro prefetchi-4.c -isystem include/
during RTL pass: expand
prefetchi-4.c: In function ‘prefetch_test’:
prefetchi-4.c:11:3: internal compiler error: in gen_prefetch, at 
config/i386/i386.md:23913
   11 |   __builtin_ia32_prefetch (p, 0, 3, 0);
  |   ^~~~
0x1b92416 gen_prefetch(rtx_def*, rtx_def*, rtx_def*)
../../gcc/config/i386/i386.md:23913
0x141dcf3 ix86_expand_builtin(tree_node*, rtx_def*, rtx_def*, machine_mode, int)
../../gcc/config/i386/i386-expand.cc:13077
0x60deb4 expand_builtin(tree_node*, rtx_def*, rtx_def*, machine_mode, int)
../../gcc/builtins.cc:7321
0x80803d expand_expr_real_1(tree_node*, rtx_def*, machine_mode, 
expand_modifier, rtx_def**, bool)
../../gcc/expr.cc:11865
0x7fa4d5 expand_expr_real(tree_node*, rtx_def*, machine_mode, expand_modifier, 
rtx_def**, bool)
../../gcc/expr.cc:9000
0x648c12 expand_expr
../../gcc/expr.h:310
0x651c17 expand_call_stmt
../../gcc/cfgexpand.cc:2831
0x655709 expand_gimple_stmt_1
../../gcc/cfgexpand.cc:3880
0x655d93 expand_gimple_stmt
../../gcc/cfgexpand.cc:4044
0x65e061 expand_gimple_basic_block
../../gcc/cfgexpand.cc:6096
0x660575 execute
../../gcc/cfgexpand.cc:6822
Please submit a full bug report, with preprocessed source (by using 
-freport-bug).
Please include the complete backtrace with any bug report.
See  for instructions.

The ICE is on
  gcc_assert (TARGET_3DNOW);
  operands[2] = GEN_INT (3);
The expander has
  "TARGET_3DNOW || TARGET_PREFETCH_SSE || TARGET_PRFCHW || TARGET_PREFETCHWT1"
condition and for write handles all those different ISAs, so gcc_assert 
(TARGET_3DNOW);
at the end only asserts the obvious that the expander condition had to be
satisfied.  But for !write, it only has:
  if (TARGET_PREFETCH_SSE)
;
  else
{
  gcc_assert (TARGET_3DNOW);
  operands[2] = GEN_INT (3);
}
and here I don't understand how it can work, because if
!TARGET_3DNOW && !TARGET_PREFETCH_SSE, but
TARGET_PRFCHW || TARGET_PREFETCHWT1
then it clearly ICEs.  Both of the latter ISAs can be enabled/disabled
individually without dependencies.

It is unclear what exactly changed though, because the prefetch pattern
has not changed, but it didn't ICE before that commit.

Jakub



Re: [PATCH] rtl: Try to remove EH edges after {pro,epi}logue generation [PR90259]

2022-11-08 Thread Richard Biener via Gcc-patches
On Tue, Nov 8, 2022 at 3:49 AM Kewen.Lin  wrote:
>
> Hi,
>
> After prologue and epilogue generation, the judgement on whether
> one memory access onto stack frame may trap or not could change,
> since we get more exact stack information by now.
>
> As PR90259 shows, some memory access becomes impossible to trap
> any more after prologue and epilogue generation, it can make
> subsequent optimization be able to remove it if safe, but it
> results in unexpected control flow status due to REG_EH_REGION
> note missing.
>
> This patch proposes to try to remove EH edges with function
> purge_all_dead_edges after prologue and epilogue generation,
> it simplifies CFG as early as we can and don't need any fixup
> in downstream passes.
>
> CFG simplification result with PR90259's case as example:
>
> *before*
>
>18: %1:TF=call [`__gcc_qdiv'] argc:0
>   REG_EH_REGION 0x2
>77: NOTE_INSN_BASIC_BLOCK 3
>19: NOTE_INSN_DELETED
>20: NOTE_INSN_DELETED
>   110: [%31:SI+0x20]=%1:DF
>   REG_EH_REGION 0x2
>   116: NOTE_INSN_BASIC_BLOCK 4
>   111: [%31:SI+0x28]=%2:DF
>   REG_EH_REGION 0x2
>22: NOTE_INSN_BASIC_BLOCK 5
>   108: %0:DF=[%31:SI+0x20]
>   REG_EH_REGION 0x2
>   117: NOTE_INSN_BASIC_BLOCK 6
>   109: %1:DF=[%31:SI+0x28]
>   REG_EH_REGION 0x2
>79: NOTE_INSN_BASIC_BLOCK 7
>26: [%31:SI+0x18]=%0:DF
>   104: pc=L69
>   105: barrier
>
> *after*
>
>18: %1:TF=call [`__gcc_qdiv'] argc:0
>   REG_EH_REGION 0x2
>77: NOTE_INSN_BASIC_BLOCK 3
>19: NOTE_INSN_DELETED
>20: NOTE_INSN_DELETED
>   110: [%31:SI+0x20]=%1:DF
>   111: [%31:SI+0x28]=%2:DF
>   108: %0:DF=[%31:SI+0x20]
>   109: %1:DF=[%31:SI+0x28]
>26: [%31:SI+0x18]=%0:DF
>   104: pc=L69
>   105: barrier
>
> Bootstrapped and regtested on x86_64-redhat-linux,
> aarch64-linux-gnu and powerpc64{,le}-linux-gnu.
>
> Is it ok for trunk?

It looks reasonable - OK if the others CCed have no comments.

Thanks,
Richard.

> BR,
> Kewen
>
> -
> PR rtl-optimization/90259
>
> gcc/ChangeLog:
>
> * function.cc (rest_of_handle_thread_prologue_and_epilogue): Add
> parameter fun, and call function purge_all_dead_edges.
> (pass_thread_prologue_and_epilogue::execute): Name unamed parameter
> as fun, and use it for rest_of_handle_thread_prologue_and_epilogue.
>
> gcc/testsuite/ChangeLog:
>
> * g++.target/powerpc/pr90259.C: New.
> ---
>  gcc/function.cc|  13 ++-
>  gcc/testsuite/g++.target/powerpc/pr90259.C | 103 +
>  2 files changed, 113 insertions(+), 3 deletions(-)
>  create mode 100644 gcc/testsuite/g++.target/powerpc/pr90259.C
>
> diff --git a/gcc/function.cc b/gcc/function.cc
> index 6474a663b30..3757ded547d 100644
> --- a/gcc/function.cc
> +++ b/gcc/function.cc
> @@ -6540,7 +6540,7 @@ make_pass_leaf_regs (gcc::context *ctxt)
>  }
>
>  static unsigned int
> -rest_of_handle_thread_prologue_and_epilogue (void)
> +rest_of_handle_thread_prologue_and_epilogue (function *fun)
>  {
>/* prepare_shrink_wrap is sensitive to the block structure of the control
>   flow graph, so clean it up first.  */
> @@ -6557,6 +6557,13 @@ rest_of_handle_thread_prologue_and_epilogue (void)
>   Fix that up.  */
>fixup_partitions ();
>
> +  /* After prologue and epilogue generation, the judgement on whether
> + one memory access onto stack frame may trap or not could change,
> + since we get more exact stack information by now.  So try to
> + remove any EH edges here, see PR90259.  */
> +  if (fun->can_throw_non_call_exceptions)
> +purge_all_dead_edges ();
> +
>/* Shrink-wrapping can result in unreachable edges in the epilogue,
>   see PR57320.  */
>cleanup_cfg (optimize ? CLEANUP_EXPENSIVE : 0);
> @@ -6625,9 +6632,9 @@ public:
>{}
>
>/* opt_pass methods: */
> -  unsigned int execute (function *) final override
> +  unsigned int execute (function * fun) final override
>  {
> -  return rest_of_handle_thread_prologue_and_epilogue ();
> +  return rest_of_handle_thread_prologue_and_epilogue (fun);
>  }
>
>  }; // class pass_thread_prologue_and_epilogue
> diff --git a/gcc/testsuite/g++.target/powerpc/pr90259.C 
> b/gcc/testsuite/g++.target/powerpc/pr90259.C
> new file mode 100644
> index 000..db75ac7fe02
> --- /dev/null
> +++ b/gcc/testsuite/g++.target/powerpc/pr90259.C
> @@ -0,0 +1,103 @@
> +/* { dg-require-effective-target long_double_ibm128 } */
> +/* { dg-options "-O2 -ffloat-store -fgcse -fnon-call-exceptions 
> -fno-forward-propagate -fno-omit-frame-pointer -fstack-protector-all" } */
> +/* { dg-add-options long_double_ibm128 } */
> +
> +/* Verify there is no ICE.  */
> +
> +template  struct b
> +{
> +  static constexpr int c = a;
> +};
> +template  using d = b;
> +struct e
> +{
> +  int f;
> +  int
> +  g ()
> +  {
> +return __builtin_ceil (f / (long double) h);
> +  }
> +  float h;
> +};
> +template  using k = d;
> +template  class n
> +{
> +public:
> +  e ae;
> +  void af ();

Re: [PATCH] c++: Allow module name to be a single letter on Windows

2022-11-08 Thread Torbjorn SVENSSON via Gcc-patches

Hi Nathan,

On 2022-11-08 00:03, Nathan Sidwell wrote:

On 11/3/22 11:06, Torbjorn SVENSSON wrote:



On 2022-11-03 15:17, Nathan Sidwell wrote:

On 10/28/22 05:15, Torbjörn SVENSSON wrote:

On Windows, the ':' character is special and when the module name is
a single character, like 'A', then the flatname would be (for
example) 'A:Foo'. On Windows, 'A:Foo' is treated as an absolute
path by the module loader and is likely not found.

Without this patch, the test case pr98944_c.C fails with:

In module imported at 
/src/gcc/testsuite/g++.dg/modules/pr98944_b.C:7:1,
of module A:Foo, imported at 
/src/gcc/testsuite/g++.dg/modules/pr98944_c.C:7:

A:Internals: error: header module expected, module 'A:Internals' found
A:Internals: error: failed to read compiled module: Bad file data
A:Internals: note: compiled module file is 'gcm.cache/A-Internals.gcm'
In module imported at 
/src/gcc/testsuite/g++.dg/modules/pr98944_c.C:7:8:

A:Foo: error: failed to read compiled module: Bad import dependency
A:Foo: note: compiled module file is 'gcm.cache/A-Foo.gcm'
A:Foo: fatal error: returning to the gate for a mechanical issue
compilation terminated.

include/ChangeLog:

* filenames.h: Added IS_REAL_ABSOLUTE_PATH macro to check if
path is absolute and not semi-absolute on Windows.


Hm, this is unfortunate.  The current IS_ABSOLUTE_PATH, is really 
'not relative to cwd', and even then that's untrue if the drive 
letter there is the drive letter of cwd, right?


It's awkward to have a new macro for just this purpose and the new 
name isn't very indicative of the difference to the current 
IS_ABSOLUTE_PATH.


Would it be better to not deal with drive letters here?  How 
prevalent are they these days in windows?  Would something like


    if (IS_DIR_SEPARATOR (ptr[ptr[0] == '.'])

suffice?


I don't think you can ignore the drive letter part... see below.


#include 
#include "include/filenames.h"
#define TF(x) ((x) ? "true" : "false")
int main(int argc, char *argv[]) {
   const char *test[] = {
   /* absolute */  "c:\\foo", "c:/foo", "/foo", "\\foo",
   /* semi-absolute */ "c:foo",
   /* relative */  "foo", "./foo", ".\\foo",
   };
   for (int i = 0; i < sizeof(test) / sizeof(test[0]); i++) {
 const char *ptr = test[i];
 printf("\nptr: %s\n", ptr);
 printf("  IS_DOS_ABSOLUTE_PATH: %s\n",
    TF(IS_DOS_ABSOLUTE_PATH(ptr)));
 printf("  IS_DOS_REAL_ABSOLUTE_PATH: %s\n",
    TF(IS_DOS_REAL_ABSOLUTE_PATH(ptr)));
 printf("  IS_DIR_SEPARATOR: %s\n",
    TF(IS_DIR_SEPARATOR(ptr[ptr[0] == '.'])));
   }
   return 0;
}


The output is:

ptr: c:\foo
   IS_DOS_ABSOLUTE_PATH: true
   IS_DOS_REAL_ABSOLUTE_PATH: true
   IS_DIR_SEPARATOR: false

ptr: c:/foo
   IS_DOS_ABSOLUTE_PATH: true
   IS_DOS_REAL_ABSOLUTE_PATH: true
   IS_DIR_SEPARATOR: false

ptr: /foo
   IS_DOS_ABSOLUTE_PATH: true
   IS_DOS_REAL_ABSOLUTE_PATH: true
   IS_DIR_SEPARATOR: true

ptr: \foo
   IS_DOS_ABSOLUTE_PATH: true
   IS_DOS_REAL_ABSOLUTE_PATH: true
   IS_DIR_SEPARATOR: false

ptr: c:foo
   IS_DOS_ABSOLUTE_PATH: true
   IS_DOS_REAL_ABSOLUTE_PATH: false
   IS_DIR_SEPARATOR: false

ptr: foo
   IS_DOS_ABSOLUTE_PATH: false
   IS_DOS_REAL_ABSOLUTE_PATH: false
   IS_DIR_SEPARATOR: false

ptr: ./foo
   IS_DOS_ABSOLUTE_PATH: false
   IS_DOS_REAL_ABSOLUTE_PATH: false
   IS_DIR_SEPARATOR: true

ptr: .\foo
   IS_DOS_ABSOLUTE_PATH: false
   IS_DOS_REAL_ABSOLUTE_PATH: false
   IS_DIR_SEPARATOR: false




or, failing that perhaps put some explicit WINDOWS-specific #ifdef'd 
code there?  It's a real corner case.


Would you rather have something like this in module.cc?

if (ptr[0] == '.')
   {
 if IS_DIR_SEPARATOR (ptr[1]))
   return get_module (build_string (strlen (ptr), ptr));
   }
else
   {
#if HAVE_DOS_BASED_FILE_SYSTEM
 if (HAS_DRIVE_SPEC (ptr) && IS_DIR_SEPARATOR (ptr[2]))
#else
 if (IS_ABSOLUTE_PATH (ptr))
#endif
   return get_module (build_string (strlen (ptr), ptr));
   }


Yes, something like the above, but I think you're missing "/bob' in the 
DOS_BASED case?  shouldn't that also be a pathname?


if (IS_DIR_SEPARATOR (ptr[ptr[0] == '.']) // ./FOO or /FOO
#if HAVE_DOS_BASED_FILE_SYSTEM
     // DOS-FS IS_ABSOLUTE_PATH thinks 'A:B' is absolute, but we need to 
consider

     // that as a module:partition.
     || (HAS_DRIVE_SPEC (ptr) && IS_DIR_SEPARATOR (ptr[2])) // A:/FOO
#endif
     || false)
    return 

Does (something like) that work?


I tested it and your solution appears to work.
Are you okay with me pushing that solution or do you want me to send a 
v2 with it first?


Kind regards,
Torbjörn



nathan




Let me know what you prefer.

Kind regards,
Torbjörn



nathan



gcc/cp/ChangeLog:

* module.cc: Use IS_REAL_ABSOLUTE_PATH macro.

Co-Authored-By: Yvan ROUX 
Signed-off-by: Torbjörn SVENSSON 
---
  gcc/cp/module.cc    | 2 +-
  include/filenames.h | 4 
  2 files changed, 5 insertions(+), 1 deletion(-)

diff --git a/gcc/cp/module.cc b/gcc/cp/module.cc
index 9957d

[committed] libstdc++: Uncomment denorm_min test

2022-11-08 Thread Jakub Jelinek via Gcc-patches
Hi!

As r13-3609-g6d9dbdf51f9afe8 has been committed, we can now enable
even the denorm_min test.

Bootstrapped/regtested on x86_64-linux and i686-linux, committed to trunk
as obvious.

2022-11-08  Jakub Jelinek  

* testsuite/20_util/to_chars/float128_c++23.cc (test): Uncomment
denorm_min test.

--- libstdc++-v3/testsuite/20_util/to_chars/float128_c++23.cc.jj
2022-11-07 15:15:42.929314805 +0100
+++ libstdc++-v3/testsuite/20_util/to_chars/float128_c++23.cc   2022-11-07 
15:21:19.935716429 +0100
@@ -34,7 +34,7 @@ void
 test(std::chars_format fmt = std::chars_format{})
 {
   std::float128_t tests[] = {
-//std::numeric_limits::denorm_min(),
+std::numeric_limits::denorm_min(),
 std::numeric_limits::min(),
 0.0f128,
 -42.0f128,

Jakub



Re: [PATCH, v3] Fortran: ordering of hidden procedure arguments [PR107441]

2022-11-08 Thread Mikael Morin

Hello,

Le 07/11/2022 à 22:45, Harald Anlauf via Fortran a écrit :

Dear all,

Am 04.11.22 um 10:53 schrieb Mikael Morin:

Le 03/11/2022 à 23:03, Harald Anlauf a écrit :

I've spent some time not only staring at create_function_arglist,
but trying several variations handling the declared hidden parms,
and applying the necessary adjustments to gfc_get_function_type.
(Managing linked trees is not the issue, just understanding them.)
I've been unable to get the declarations in sync, and would need
help how to debug the mess I've created.  Dropping my patch for
the time being.


If you want, we can meet on IRC somewhen (tonight?).


armed with the new knowledge, I could now understand what
(more or less) trivially went wrong with my previous patch.

The attached patch remedies that: gfc_get_function_type() now
properly separates the types of the hidden parameters so that
optional+value comes before string length and caf stuff,
while in create_function_arglist we simply need to split
the walking over the typelists so that the optional+value
stuff, which is basically just booleans, is done separately
from the other parts.

Looking at the tree-dumps, the function decls now seem to be
fine at least for the given testcases.  I've adjusted one of
the testcases to validate this.

Regtests fine on x86_64-pc-linux-gnu.  OK for mainline?


this is mostly good.
There is one last corner case that is not properly handled:


diff --git a/gcc/fortran/trans-decl.cc b/gcc/fortran/trans-decl.cc
index 63515b9072a..94988b8690e 100644
--- a/gcc/fortran/trans-decl.cc
+++ b/gcc/fortran/trans-decl.cc

(...)

@@ -2619,6 +2620,15 @@ create_function_arglist (gfc_symbol * sym)
 if (f->sym != NULL) /* Ignore alternate returns.  */
   hidden_typelist = TREE_CHAIN (hidden_typelist);
 
+  /* Advance hidden_typelist over optional+value argument presence flags.  */

+  optval_typelist = hidden_typelist;
+  for (f = gfc_sym_get_dummy_args (sym); f; f = f->next)
+if (f->sym != NULL
+   && f->sym->attr.optional && f->sym->attr.value
+   && !f->sym->attr.dimension && f->sym->ts.type != BT_CLASS
+   && !gfc_bt_struct (f->sym->ts.type))
+  hidden_typelist = TREE_CHAIN (hidden_typelist);
+


This new loop copies the condition guarding the handling of optional 
value presence arguments, except that the condition is in an "else if", 
and the complement of the condition in the corresponding "if" is 
missing, to have strictly the same conditions.


Admittedly, it only makes a difference for character optional value 
arguments, which are hardly working.  At least they work as long as one 
doesn't try to query their presence.  Below is a case regressing with 
your patch.


With that fixed, I think it's good for mainline.
Thanks for your patience.


! { dg-do compile }
!
! PR fortran/107441
! Check that procedure types and procedure decls match when the procedure
! has both character-typed and character-typed optional value args.
!
! Contributed by M.Morin

program p
  interface
subroutine i(c, o)
  character(*) :: c
  character(3), optional, value :: o
end subroutine i
  end interface
  procedure(i), pointer :: pp
  pp => s
  call pp("abcd", "xyz")
contains
  subroutine s(c, o)
character(*) :: c
character(3), optional, value :: o
if (o /= "xyz") stop 1
if (c /= "abcd") stop 2
  end subroutine s
end program p




Re: [PATCH] rtl: Try to remove EH edges after {pro, epi}logue generation [PR90259]

2022-11-08 Thread Eric Botcazou via Gcc-patches
> It looks reasonable - OK if the others CCed have no comments.

My only comment is that it needs to be tested with languages enabling -fnon-
call-exceptions by default (Ada & Go), if not already done.

-- 
Eric Botcazou




[PATCH] i386: Improve vector [GL]E{,U} comparison against vector constants [PR107546]

2022-11-08 Thread Jakub Jelinek via Gcc-patches
Hi!

For integer vector comparisons without XOP before AVX512{F,VL} we are
constrained by only GT and EQ being supported in HW.
For GTU we play tricks to implement it using GT or unsigned saturating
subtraction, for LT/LTU we swap the operands and thus turn it into
GT/GTU.  For LE/LEU we handle it by using GT/GTU and negating the
result and for GE/GEU by using GT/GTU on swapped operands and negating
the result.
If the second operand is a CONST_VECTOR, we can usually do better though,
we can avoid the negation.  For LE/LEU cst by doing LT/LTU cst+1 (and
then cst+1 GT/GTU x) and for GE/GEU cst by doing GT/GTU cst-1, provided
there is no wrap-around on those cst+1 or cst-1.
GIMPLE canonicalizes x < cst to x <= cst-1 etc. (the rule is smaller
absolute value on constant), but only for scalars or uniform vectors,
so in some cases this undoes that canonicalization in order to avoid
the extra negation, but it handles also non-uniform constants.
E.g. with -mavx2 the testcase assembly difference is:
-   movl$47, %eax
+   movl$48, %eax
vmovdqa %xmm0, %xmm1
vmovd   %eax, %xmm0
vpbroadcastb%xmm0, %xmm0
-   vpminsb %xmm0, %xmm1, %xmm0
-   vpcmpeqb%xmm1, %xmm0, %xmm0
+   vpcmpgtb%xmm1, %xmm0, %xmm0
and
-   vmovdqa %xmm0, %xmm1
-   vmovdqa .LC1(%rip), %xmm0
-   vpminsb %xmm1, %xmm0, %xmm1
-   vpcmpeqb%xmm1, %xmm0, %xmm0
+   vpcmpgtb.LC1(%rip), %xmm0, %xmm0
while with just SSE2:
-   pcmpgtb .LC0(%rip), %xmm0
-   pxor%xmm1, %xmm1
-   pcmpeqb %xmm1, %xmm0
+   movdqa  %xmm0, %xmm1
+   movdqa  .LC0(%rip), %xmm0
+   pcmpgtb %xmm1, %xmm0
and
-   movdqa  %xmm0, %xmm1
-   movdqa  .LC1(%rip), %xmm0
-   pcmpgtb %xmm1, %xmm0
-   pxor%xmm1, %xmm1
-   pcmpeqb %xmm1, %xmm0
+   pcmpgtb .LC1(%rip), %xmm0

Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?

2022-11-08  Jakub Jelinek  

PR target/107546
* config/i386/predicates.md (vector_or_const_vector_operand): New
predicate.
* config/i386/sse.md (vec_cmp,
vec_cmpv2div2di, vec_cmpu,
vec_cmpuv2div2di): Use nonimmediate_or_const_vector_operand
predicate instead of nonimmediate_operand and
vector_or_const_vector_operand instead of vector_operand.
* config/i386/i386-expand.cc (ix86_expand_int_sse_cmp): For
LE/LEU or GE/GEU with CONST_VECTOR cop1 try to transform those
into LE/LEU or GT/GTU with larger or smaller by one cop1 if
there is no wrap-around.  Force CONST_VECTOR cop0 or cop1 into
REG.  Formatting fix.

* gcc.target/i386/pr107546.c: New test.

--- gcc/config/i386/predicates.md.jj2022-11-07 10:30:42.73962 +0100
+++ gcc/config/i386/predicates.md   2022-11-07 11:39:42.665065553 +0100
@@ -1235,6 +1235,13 @@ (define_predicate "vector_operand"
   (ior (match_operand 0 "register_operand")
(match_operand 0 "vector_memory_operand")))
 
+; Return true when OP is register_operand, vector_memory_operand
+; or const_vector.
+(define_predicate "vector_or_const_vector_operand"
+  (ior (match_operand 0 "register_operand")
+   (match_operand 0 "vector_memory_operand")
+   (match_code "const_vector")))
+
 (define_predicate "bcst_mem_operand"
   (and (match_code "vec_duplicate")
(and (match_test "TARGET_AVX512F")
--- gcc/config/i386/sse.md.jj   2022-11-01 13:33:17.557857756 +0100
+++ gcc/config/i386/sse.md  2022-11-07 11:43:45.703748212 +0100
@@ -4311,7 +4311,7 @@ (define_expand "vec_cmp 0 "register_operand")
(match_operator: 1 ""
  [(match_operand:VI_256 2 "register_operand")
-  (match_operand:VI_256 3 "nonimmediate_operand")]))]
+  (match_operand:VI_256 3 "nonimmediate_or_const_vector_operand")]))]
   "TARGET_AVX2"
 {
   bool ok = ix86_expand_int_vec_cmp (operands);
@@ -4323,7 +4323,7 @@ (define_expand "vec_cmp 0 "register_operand")
(match_operator: 1 ""
  [(match_operand:VI124_128 2 "register_operand")
-  (match_operand:VI124_128 3 "vector_operand")]))]
+  (match_operand:VI124_128 3 "vector_or_const_vector_operand")]))]
   "TARGET_SSE2"
 {
   bool ok = ix86_expand_int_vec_cmp (operands);
@@ -4335,7 +4335,7 @@ (define_expand "vec_cmpv2div2di"
   [(set (match_operand:V2DI 0 "register_operand")
(match_operator:V2DI 1 ""
  [(match_operand:V2DI 2 "register_operand")
-  (match_operand:V2DI 3 "vector_operand")]))]
+  (match_operand:V2DI 3 "vector_or_const_vector_operand")]))]
   "TARGET_SSE4_2"
 {
   bool ok = ix86_expand_int_vec_cmp (operands);
@@ -4397,7 +4397,7 @@ (define_expand "vec_cmpu 0 "register_operand")
(match_operator: 1 ""
  [(match_operand:VI_256 2 "register_operand")
-  (match_operand:VI_256 3 "nonimmediate_operand")]))]
+  (match_operand:VI_256 3 "nonimmediate_or_const_vector_operand")]))]
   "TARGET_AVX2"
 {
   bool ok = ix86_expand_int_

Re: [PATCH] i386: Improve vector [GL]E{, U} comparison against vector constants [PR107546]

2022-11-08 Thread Uros Bizjak via Gcc-patches
On Tue, Nov 8, 2022 at 11:42 AM Jakub Jelinek  wrote:
>
> Hi!
>
> For integer vector comparisons without XOP before AVX512{F,VL} we are
> constrained by only GT and EQ being supported in HW.
> For GTU we play tricks to implement it using GT or unsigned saturating
> subtraction, for LT/LTU we swap the operands and thus turn it into
> GT/GTU.  For LE/LEU we handle it by using GT/GTU and negating the
> result and for GE/GEU by using GT/GTU on swapped operands and negating
> the result.
> If the second operand is a CONST_VECTOR, we can usually do better though,
> we can avoid the negation.  For LE/LEU cst by doing LT/LTU cst+1 (and
> then cst+1 GT/GTU x) and for GE/GEU cst by doing GT/GTU cst-1, provided
> there is no wrap-around on those cst+1 or cst-1.
> GIMPLE canonicalizes x < cst to x <= cst-1 etc. (the rule is smaller
> absolute value on constant), but only for scalars or uniform vectors,
> so in some cases this undoes that canonicalization in order to avoid
> the extra negation, but it handles also non-uniform constants.
> E.g. with -mavx2 the testcase assembly difference is:
> -   movl$47, %eax
> +   movl$48, %eax
> vmovdqa %xmm0, %xmm1
> vmovd   %eax, %xmm0
> vpbroadcastb%xmm0, %xmm0
> -   vpminsb %xmm0, %xmm1, %xmm0
> -   vpcmpeqb%xmm1, %xmm0, %xmm0
> +   vpcmpgtb%xmm1, %xmm0, %xmm0
> and
> -   vmovdqa %xmm0, %xmm1
> -   vmovdqa .LC1(%rip), %xmm0
> -   vpminsb %xmm1, %xmm0, %xmm1
> -   vpcmpeqb%xmm1, %xmm0, %xmm0
> +   vpcmpgtb.LC1(%rip), %xmm0, %xmm0
> while with just SSE2:
> -   pcmpgtb .LC0(%rip), %xmm0
> -   pxor%xmm1, %xmm1
> -   pcmpeqb %xmm1, %xmm0
> +   movdqa  %xmm0, %xmm1
> +   movdqa  .LC0(%rip), %xmm0
> +   pcmpgtb %xmm1, %xmm0
> and
> -   movdqa  %xmm0, %xmm1
> -   movdqa  .LC1(%rip), %xmm0
> -   pcmpgtb %xmm1, %xmm0
> -   pxor%xmm1, %xmm1
> -   pcmpeqb %xmm1, %xmm0
> +   pcmpgtb .LC1(%rip), %xmm0
>
> Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?
>
> 2022-11-08  Jakub Jelinek  
>
> PR target/107546
> * config/i386/predicates.md (vector_or_const_vector_operand): New
> predicate.
> * config/i386/sse.md (vec_cmp,
> vec_cmpv2div2di, vec_cmpu,
> vec_cmpuv2div2di): Use nonimmediate_or_const_vector_operand
> predicate instead of nonimmediate_operand and
> vector_or_const_vector_operand instead of vector_operand.
> * config/i386/i386-expand.cc (ix86_expand_int_sse_cmp): For
> LE/LEU or GE/GEU with CONST_VECTOR cop1 try to transform those
> into LE/LEU or GT/GTU with larger or smaller by one cop1 if
> there is no wrap-around.  Force CONST_VECTOR cop0 or cop1 into
> REG.  Formatting fix.
>
> * gcc.target/i386/pr107546.c: New test.

OK.

Thanks,
Uros.

>
> --- gcc/config/i386/predicates.md.jj2022-11-07 10:30:42.73962 +0100
> +++ gcc/config/i386/predicates.md   2022-11-07 11:39:42.665065553 +0100
> @@ -1235,6 +1235,13 @@ (define_predicate "vector_operand"
>(ior (match_operand 0 "register_operand")
> (match_operand 0 "vector_memory_operand")))
>
> +; Return true when OP is register_operand, vector_memory_operand
> +; or const_vector.
> +(define_predicate "vector_or_const_vector_operand"
> +  (ior (match_operand 0 "register_operand")
> +   (match_operand 0 "vector_memory_operand")
> +   (match_code "const_vector")))
> +
>  (define_predicate "bcst_mem_operand"
>(and (match_code "vec_duplicate")
> (and (match_test "TARGET_AVX512F")
> --- gcc/config/i386/sse.md.jj   2022-11-01 13:33:17.557857756 +0100
> +++ gcc/config/i386/sse.md  2022-11-07 11:43:45.703748212 +0100
> @@ -4311,7 +4311,7 @@ (define_expand "vec_cmp[(set (match_operand: 0 "register_operand")
> (match_operator: 1 ""
>   [(match_operand:VI_256 2 "register_operand")
> -  (match_operand:VI_256 3 "nonimmediate_operand")]))]
> +  (match_operand:VI_256 3 "nonimmediate_or_const_vector_operand")]))]
>"TARGET_AVX2"
>  {
>bool ok = ix86_expand_int_vec_cmp (operands);
> @@ -4323,7 +4323,7 @@ (define_expand "vec_cmp[(set (match_operand: 0 "register_operand")
> (match_operator: 1 ""
>   [(match_operand:VI124_128 2 "register_operand")
> -  (match_operand:VI124_128 3 "vector_operand")]))]
> +  (match_operand:VI124_128 3 "vector_or_const_vector_operand")]))]
>"TARGET_SSE2"
>  {
>bool ok = ix86_expand_int_vec_cmp (operands);
> @@ -4335,7 +4335,7 @@ (define_expand "vec_cmpv2div2di"
>[(set (match_operand:V2DI 0 "register_operand")
> (match_operator:V2DI 1 ""
>   [(match_operand:V2DI 2 "register_operand")
> -  (match_operand:V2DI 3 "vector_operand")]))]
> +  (match_operand:V2DI 3 "vector_or_const_vector_operand")]))]
>"TARGET_SSE4_2"
>  {
>bool ok = ix86_expand_int_vec_cmp (operands);
> @@ -

[PATCH] cdce: Fix up get_no_error_domain for new f{16,32,64,128} builtins [PR107547]

2022-11-08 Thread Jakub Jelinek via Gcc-patches
Hi!

I've missed that this function needs to handle all the builtins that
are handled in can_test_argument_range.
The following patch does that.  For many of the builtins (like acos, or
log) it is the same range regardless of the floating point type, but for
some (cosh, sinh, exp{,m1,2}) it is different for each format,
so I had to compute those ranges.

Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?

Note, seems the existing ranges were in some cases (e.g. for exp2)
the smallest in absolute value which results infinite result, in others
the largest which still results in finite result (but consistently so
for the IEEE single vs. double).  I've followed that for IEEE half and
quad cases too, just am not sure why it was like that.  I think
get_domain with true, false is open interval rather than closed
and the comments indicate that too, conservatively that is certainly
correct.

OT, with frange, perhaps we could DCE the calls unconditionally if
frange can prove we are in the domain range.

2022-11-08  Jakub Jelinek  

PR tree-optimization/107547
* tree-call-cdce.cc (get_no_error_domain): Handle CASE_FLT_FN_FLOATN_NX
of BUILT_IN_{ACOS,ASIN,ACOSH,ATANH,LOG,LOG2,LOG10,LOG1P}.  Handle
BUILT_IN_{COSH,SINH,EXP,EXPM1,EXP2}F{16,32,64,128}.

* gcc.dg/pr107547.c: New test.

--- gcc/tree-call-cdce.cc.jj2022-10-31 09:04:56.484075098 +0100
+++ gcc/tree-call-cdce.cc   2022-11-07 14:51:54.223803618 +0100
@@ -693,20 +693,31 @@ get_no_error_domain (enum built_in_funct
 {
 /* Trig functions: return [-1, +1]  */
 CASE_FLT_FN (BUILT_IN_ACOS):
+CASE_FLT_FN_FLOATN_NX (BUILT_IN_ACOS):
 CASE_FLT_FN (BUILT_IN_ASIN):
+CASE_FLT_FN_FLOATN_NX (BUILT_IN_ASIN):
   return get_domain (-1, true, true,
  1, true, true);
 /* Hyperbolic functions.  */
 CASE_FLT_FN (BUILT_IN_ACOSH):
+CASE_FLT_FN_FLOATN_NX (BUILT_IN_ACOSH):
   /* acosh: [1, +inf)  */
   return get_domain (1, true, true,
  1, false, false);
 CASE_FLT_FN (BUILT_IN_ATANH):
+CASE_FLT_FN_FLOATN_NX (BUILT_IN_ATANH):
   /* atanh: (-1, +1)  */
   return get_domain (-1, true, false,
  1, true, false);
+case BUILT_IN_COSHF16:
+case BUILT_IN_SINHF16:
+  /* coshf16: (-11, +11)  */
+  return get_domain (-11, true, false,
+11, true, false);
 case BUILT_IN_COSHF:
 case BUILT_IN_SINHF:
+case BUILT_IN_COSHF32:
+case BUILT_IN_SINHF32:
   /* coshf: (-89, +89)  */
   return get_domain (-89, true, false,
  89, true, false);
@@ -714,21 +725,39 @@ get_no_error_domain (enum built_in_funct
 case BUILT_IN_SINH:
 case BUILT_IN_COSHL:
 case BUILT_IN_SINHL:
+case BUILT_IN_COSHF64:
+case BUILT_IN_SINHF64:
   /* cosh: (-710, +710)  */
   return get_domain (-710, true, false,
  710, true, false);
+case BUILT_IN_COSHF128:
+case BUILT_IN_SINHF128:
+  /* coshf128: (-11357, +11357)  */
+  return get_domain (-11357, true, false,
+11357, true, false);
 /* Log functions: (0, +inf)  */
 CASE_FLT_FN (BUILT_IN_LOG):
+CASE_FLT_FN_FLOATN_NX (BUILT_IN_LOG):
 CASE_FLT_FN (BUILT_IN_LOG2):
+CASE_FLT_FN_FLOATN_NX (BUILT_IN_LOG2):
 CASE_FLT_FN (BUILT_IN_LOG10):
+CASE_FLT_FN_FLOATN_NX (BUILT_IN_LOG10):
   return get_domain (0, true, false,
  0, false, false);
 CASE_FLT_FN (BUILT_IN_LOG1P):
+CASE_FLT_FN_FLOATN_NX (BUILT_IN_LOG1P):
   return get_domain (-1, true, false,
  0, false, false);
 /* Exp functions.  */
+case BUILT_IN_EXPF16:
+case BUILT_IN_EXPM1F16:
+  /* expf: (-inf, 11)  */
+  return get_domain (-1, false, false,
+11, true, false);
 case BUILT_IN_EXPF:
 case BUILT_IN_EXPM1F:
+case BUILT_IN_EXPF32:
+case BUILT_IN_EXPM1F32:
   /* expf: (-inf, 88)  */
   return get_domain (-1, false, false,
  88, true, false);
@@ -736,18 +765,35 @@ get_no_error_domain (enum built_in_funct
 case BUILT_IN_EXPM1:
 case BUILT_IN_EXPL:
 case BUILT_IN_EXPM1L:
+case BUILT_IN_EXPF64:
+case BUILT_IN_EXPM1F64:
   /* exp: (-inf, 709)  */
   return get_domain (-1, false, false,
  709, true, false);
+case BUILT_IN_EXPF128:
+case BUILT_IN_EXPM1F128:
+  /* expf128: (-inf, 11356)  */
+  return get_domain (-1, false, false,
+11356, true, false);
+case BUILT_IN_EXP2F16:
+  /* exp2f16: (-inf, 16)  */
+  return get_domain (-1, false, false,
+16, true, false);
 case BUILT_IN_EXP2F:
+case BUILT_IN_EXP2F32:
   /* exp2f: (-inf, 128)  */
   return get_domain (-1, false, false,
  128, true, false);
 case BUILT_IN_EXP2:
 case BUILT_

Re: [PATCH] [PR24021] Implement PLUS_EXPR range-op entry for floats.

2022-11-08 Thread Jakub Jelinek via Gcc-patches
On Mon, Nov 07, 2022 at 04:38:29PM +0100, Aldy Hernandez wrote:
> From d214bcdff2cb90ad1eb808d29bda6fb98d510b4c Mon Sep 17 00:00:00 2001
> From: Aldy Hernandez 
> Date: Mon, 7 Nov 2022 14:18:57 +0100
> Subject: [PATCH] Provide normalized and denormal format version of
>  real_isdenormal.
> 
> Implement real_isdenormal_target() to be used within real.cc where the
> argument is known to be in denormal format.  Rewrite real_isdenormal()
> for use outside of real.cc where the argument is known to be
> normalized.
> 
> gcc/ChangeLog:
> 
>   * real.cc (real_isdenormal_target): New.
>   (encode_ieee_single): Use real_isdenormal_target.
>   (encode_ieee_double): Same.
>   (encode_ieee_extended): Same.
>   (encode_ieee_quad): Same.
>   (encode_ieee_half): Same.
>   (encode_arm_bfloat_half): Same.
>   * value-range.cc (frange::flush_denormals_to_zero): Same.
>   * real.h (real_isdenormal): Rewrite to look at mode.

I'd make real_isdenormal_target static inline bool
rather than inline bool, it is only defined in real.cc, so there is
no point exporting it.
Though, as you've added the mode argument, the real.cc inline
could very well also be called real_isdenormal too, it wouldn't be
a redeclaration or ODR violation. 

Jakub



Re: [PATCH] [PR24021] Implement PLUS_EXPR range-op entry for floats.

2022-11-08 Thread Jakub Jelinek via Gcc-patches
On Mon, Nov 07, 2022 at 04:41:23PM +0100, Aldy Hernandez wrote:
> As suggested upthread, I have also adjusted update_nan_sign() to drop
> the NAN sign to VARYING if both operands are NAN.  As an optimization
> I keep the sign if both operands are NAN and have the same sign.

For NaNs this still relies on something IEEE754 doesn't guarantee,
as I cited, after a binary operation the sign bit of the NaN is
unspecified, whether there is one NaN operand or two.
It might be that all CPUs handle it the way you've implemented
(that for one NaN operand the sign of NaN result will be the same
as that NaN operand and for two it will be the sign of one of the two
NaNs operands, never something else), but I think we'd need to check
more than one implementation for that (I've only tried x86_64 and thus
SSE behavior in it), so one would need to test i387 long double behavior
too, ARM/AArch64, PowerPC, s390{,x}, RISCV, ...
The guarantee given by IEEE754 is only for those copy, negate, abs, copySign
operations, so copying values around, NEG_EXPR, ABS_EXPR, __builtin_fabs*,
__builtin_copysign*.

Otherwise LGTM (but would be nice to get into GCC13 not just
+, but also -, *, /, sqrt at least).

Jakub



[PATCH] testsuite: Fix up pr107541.c test

2022-11-08 Thread Jakub Jelinek via Gcc-patches
On Mon, Nov 07, 2022 at 12:42:38PM +0100, Aldy Hernandez via Gcc-patches wrote:
>   * gcc.dg/tree-ssa/pr107541.c: New test.

The test fails when long is 32-bit rather than 64-bit (say x86_64 with
RUNTESTFLAGS='--target_board=unix\{-m32,-m64\} tree-ssa.exp=pr107541.c'
).
I've tweaked it to use long long so it passes even on the 32-bit
targets, and added an early out for weirdo targets because I think
the test assumes the usual 1/2/4/8 bytes sizes for char/short/int/long long.

Tested on x86_64-linux, ok for trunk?

2022-11-08  Jakub Jelinek  

PR tree-optimization/107541
* gcc.dg/tree-ssa/pr107541.c (c): Use long long type rather than long.
(main): Punt if sizeof short isn't 2, or int 4, or long long 8.

--- gcc/testsuite/gcc.dg/tree-ssa/pr107541.c.jj 2022-11-07 15:12:24.519022064 
+0100
+++ gcc/testsuite/gcc.dg/tree-ssa/pr107541.c2022-11-08 12:45:12.926718243 
+0100
@@ -3,9 +3,11 @@
 
 unsigned char a = 1;
 char b, e;
-long c;
+long long c;
 short d;
 int main() {
+  if (sizeof (short) != 2 || sizeof (int) != 4 || sizeof (long long) != 8)
+return 0;
   a = ~(1 && a);
   c = ~((~a / 8 | -2) & 11007578330939886389LLU);
   e = -c;


Jakub



Re: [PATCH] c++: Allow module name to be a single letter on Windows

2022-11-08 Thread Nathan Sidwell via Gcc-patches

On 11/8/22 05:18, Torbjorn SVENSSON wrote:

Hi Nathan,

On 2022-11-08 00:03, Nathan Sidwell wrote:




Yes, something like the above, but I think you're missing "/bob' in the 
DOS_BASED case?  shouldn't that also be a pathname?


if (IS_DIR_SEPARATOR (ptr[ptr[0] == '.']) // ./FOO or /FOO
#if HAVE_DOS_BASED_FILE_SYSTEM
 // DOS-FS IS_ABSOLUTE_PATH thinks 'A:B' is absolute, but we need to 
consider
 // that as a module:partition.
 || (HAS_DRIVE_SPEC (ptr) && IS_DIR_SEPARATOR (ptr[2])) // A:/FOO
#endif
 || false)
    return 

Does (something like) that work?


I tested it and your solution appears to work.
Are you okay with me pushing that solution or do you want me to send a v2 with 
it first?


I think it needs a better introductory comment than the one I slapped in there. 
More explanation of the drive vs partition distinction.  Something along the 
lines of 'things that clearly start as pathnames are header-names, everything 
else is treated as a (possibly malformed) named module.


Feel free to just go with it, or iterate here

nathan

--
Nathan Sidwell



Re: [PATCH] middle-end IFN_ASSUME support [PR106654]

2022-11-08 Thread Jakub Jelinek via Gcc-patches
On Tue, Nov 08, 2022 at 10:19:50AM +0100, Pilar Latiesa via Gcc-patches wrote:
> On Mon, Oct 17, 2022 at 05:32:32AM +0200, Martin Uecker wrote:
> > Hm, that already seems to work with
> >
> > if (!std::isfinite(x))
> >   __builtin_unreachable();
> >
> > https://godbolt.org/z/hj3WrEhjb
> 
> Not anymore. Perhaps after making ranger the VRP default, because I get the
> mentioned outcome with --param=vrp1-mode=vrp

I've filed https://gcc.gnu.org/PR107569 for this.

Jakub



[PATCH][RFC] tree-optimization/99416 - loop distribution wrt vect data dependence

2022-11-08 Thread Richard Biener via Gcc-patches
The following adds additional benefit heuristics for loop distribution
for the case where the distributed loop can be vectorized fine but
when partitions are merged a data dependence prohibits vectorization.

The heuristic computes dependences inside of partitions to determine
whether they are good for vectorization or whether there's a dependence
that is bad and does so before committing to a cost-model based
merging for the result as well, prohibiting this.  It also allows
the special case of two partitions with bad merged dependence but
unmerged OK dependence to slip through the very conservative final
assessment of cases to split.

When this is applied it shows the pre-existing weakness of loop
distribution with regard to CSE and the inability to re-materialize
a register via a memory load from a store of the same value inside
another partition.  So this patch is mostly a RFC, I did experiment
somewhat with brute-forcing of the CSE issue but that didn't end up
very useful either.

Bootstrapped and tested on x86_64-unknown-linux-gnu, queued
for consideration after more loop distribution work.

PR tree-optimization/99416
* tree-loop-distribution.cc (enum partition_deps): New.
(partition::deps): Likewise.
(loop_distribution::classify_dependences): Likewise.
(loop_distribution::classify_and_merge_dependences): Likewise.
(loop_distribution::partition_merge_into): Update dependence
status.
(loop_distribution::finalize_partitions): Prevent merging
all non-builtin partitions if we'd merge a vectorizable and
a non-vectorizable partition.
(loop_distribution::distribute_loop): Prevent the memory
re-use cost model from merging partitions when that would
merge a non-vectorizable and a vectorizable partition.

* gcc.dg/vect/pr99416.c: New testcase.
---
 gcc/testsuite/gcc.dg/vect/pr99416.c |  22 +
 gcc/tree-loop-distribution.cc   | 123 ++--
 2 files changed, 140 insertions(+), 5 deletions(-)
 create mode 100644 gcc/testsuite/gcc.dg/vect/pr99416.c

diff --git a/gcc/testsuite/gcc.dg/vect/pr99416.c 
b/gcc/testsuite/gcc.dg/vect/pr99416.c
new file mode 100644
index 000..40a34c23c81
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/pr99416.c
@@ -0,0 +1,22 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target vect_float } */
+/* { dg-additional-options "-ftree-loop-distribution --param 
vect-epilogues-nomask=0" } */
+
+typedef float real_t;
+
+#define iterations 10
+#define LEN_1D 32000
+real_t a[LEN_1D],b[LEN_1D],c[LEN_1D],d[LEN_1D],e[LEN_1D];
+void foo()
+{
+  for (int nl = 0; nl < iterations; nl++)
+/* To vectorize this loop we have to distribute it since
+   we cannot vectorize (B) but only (A).  */
+for (int i = 1; i < LEN_1D-1; i++)
+  {
+   a[i] = b[i - 1] + c[i] * d[i];  /* (A) */
+   b[i] = b[i + 1] - e[i] * d[i];  /* (B) */
+  }
+}
+
+/* { dg-final { scan-tree-dump "vectorized 2 loops" "vect" } } */
diff --git a/gcc/tree-loop-distribution.cc b/gcc/tree-loop-distribution.cc
index ed3dd73e1a9..5bcfc99480d 100644
--- a/gcc/tree-loop-distribution.cc
+++ b/gcc/tree-loop-distribution.cc
@@ -251,6 +251,20 @@ struct builtin_info
   unsigned HOST_WIDE_INT dst_base_offset;
 };
 
+/* Data dependences within the partition.  */
+enum partition_deps {
+/* Dependences not computed. */
+PDEPS_UNKNOWN = 0,
+/* No dependences.  */
+PDEPS_NONE,
+/* Dependences are OK for vectorization with arbitrary VF.  */
+PDEPS_VECT_OK,
+/* There are dependences.  */
+PDEPS_SOME,
+/* There are dependences that inhibit vectorization with any VF > 1.  */
+PDEPS_VECT_BAD
+};
+
 /* Partition for loop distribution.  */
 struct partition
 {
@@ -261,6 +275,7 @@ struct partition
   location_t loc;
   enum partition_kind kind;
   enum partition_type type;
+  enum partition_deps deps;
   /* Data references in the partition.  */
   bitmap datarefs;
   /* Information of builtin parition.  */
@@ -602,6 +617,11 @@ class loop_distribution
   bool share_memory_accesses (struct graph *rdg,
  partition *partition1, partition *partition2);
 
+  partition_deps classify_dependences (struct graph *rdg, bitmap);
+  partition_deps classify_and_merge_dependences (struct graph *rdg,
+partition *partition1,
+partition *partition2);
+
   /* For each seed statement in STARTING_STMTS, this function builds
  partition for it by adding depended statements according to RDG.
  All partitions are recorded in PARTITIONS.  */
@@ -644,7 +664,7 @@ class loop_distribution
   /* Fuse PARTITIONS of LOOP if necessary before finalizing distribution.
  ALIAS_DDRS contains ddrs which need runtime alias check.  */
   void finalize_partitions (class loop *loop, vec
-   *partitions, vec *alias_ddrs);
+

Re: [PATCH] cdce: Fix up get_no_error_domain for new f{16, 32, 64, 128} builtins [PR107547]

2022-11-08 Thread Richard Biener via Gcc-patches
On Tue, 8 Nov 2022, Jakub Jelinek wrote:

> Hi!
> 
> I've missed that this function needs to handle all the builtins that
> are handled in can_test_argument_range.
> The following patch does that.  For many of the builtins (like acos, or
> log) it is the same range regardless of the floating point type, but for
> some (cosh, sinh, exp{,m1,2}) it is different for each format,
> so I had to compute those ranges.
> 
> Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?

OK.

> Note, seems the existing ranges were in some cases (e.g. for exp2)
> the smallest in absolute value which results infinite result, in others
> the largest which still results in finite result (but consistently so
> for the IEEE single vs. double).  I've followed that for IEEE half and
> quad cases too, just am not sure why it was like that.  I think
> get_domain with true, false is open interval rather than closed
> and the comments indicate that too, conservatively that is certainly
> correct.
> 
> OT, with frange, perhaps we could DCE the calls unconditionally if
> frange can prove we are in the domain range.
> 
> 2022-11-08  Jakub Jelinek  
> 
>   PR tree-optimization/107547
>   * tree-call-cdce.cc (get_no_error_domain): Handle CASE_FLT_FN_FLOATN_NX
>   of BUILT_IN_{ACOS,ASIN,ACOSH,ATANH,LOG,LOG2,LOG10,LOG1P}.  Handle
>   BUILT_IN_{COSH,SINH,EXP,EXPM1,EXP2}F{16,32,64,128}.
> 
>   * gcc.dg/pr107547.c: New test.
> 
> --- gcc/tree-call-cdce.cc.jj  2022-10-31 09:04:56.484075098 +0100
> +++ gcc/tree-call-cdce.cc 2022-11-07 14:51:54.223803618 +0100
> @@ -693,20 +693,31 @@ get_no_error_domain (enum built_in_funct
>  {
>  /* Trig functions: return [-1, +1]  */
>  CASE_FLT_FN (BUILT_IN_ACOS):
> +CASE_FLT_FN_FLOATN_NX (BUILT_IN_ACOS):
>  CASE_FLT_FN (BUILT_IN_ASIN):
> +CASE_FLT_FN_FLOATN_NX (BUILT_IN_ASIN):
>return get_domain (-1, true, true,
>   1, true, true);
>  /* Hyperbolic functions.  */
>  CASE_FLT_FN (BUILT_IN_ACOSH):
> +CASE_FLT_FN_FLOATN_NX (BUILT_IN_ACOSH):
>/* acosh: [1, +inf)  */
>return get_domain (1, true, true,
>   1, false, false);
>  CASE_FLT_FN (BUILT_IN_ATANH):
> +CASE_FLT_FN_FLOATN_NX (BUILT_IN_ATANH):
>/* atanh: (-1, +1)  */
>return get_domain (-1, true, false,
>   1, true, false);
> +case BUILT_IN_COSHF16:
> +case BUILT_IN_SINHF16:
> +  /* coshf16: (-11, +11)  */
> +  return get_domain (-11, true, false,
> +  11, true, false);
>  case BUILT_IN_COSHF:
>  case BUILT_IN_SINHF:
> +case BUILT_IN_COSHF32:
> +case BUILT_IN_SINHF32:
>/* coshf: (-89, +89)  */
>return get_domain (-89, true, false,
>   89, true, false);
> @@ -714,21 +725,39 @@ get_no_error_domain (enum built_in_funct
>  case BUILT_IN_SINH:
>  case BUILT_IN_COSHL:
>  case BUILT_IN_SINHL:
> +case BUILT_IN_COSHF64:
> +case BUILT_IN_SINHF64:
>/* cosh: (-710, +710)  */
>return get_domain (-710, true, false,
>   710, true, false);
> +case BUILT_IN_COSHF128:
> +case BUILT_IN_SINHF128:
> +  /* coshf128: (-11357, +11357)  */
> +  return get_domain (-11357, true, false,
> +  11357, true, false);
>  /* Log functions: (0, +inf)  */
>  CASE_FLT_FN (BUILT_IN_LOG):
> +CASE_FLT_FN_FLOATN_NX (BUILT_IN_LOG):
>  CASE_FLT_FN (BUILT_IN_LOG2):
> +CASE_FLT_FN_FLOATN_NX (BUILT_IN_LOG2):
>  CASE_FLT_FN (BUILT_IN_LOG10):
> +CASE_FLT_FN_FLOATN_NX (BUILT_IN_LOG10):
>return get_domain (0, true, false,
>   0, false, false);
>  CASE_FLT_FN (BUILT_IN_LOG1P):
> +CASE_FLT_FN_FLOATN_NX (BUILT_IN_LOG1P):
>return get_domain (-1, true, false,
>   0, false, false);
>  /* Exp functions.  */
> +case BUILT_IN_EXPF16:
> +case BUILT_IN_EXPM1F16:
> +  /* expf: (-inf, 11)  */
> +  return get_domain (-1, false, false,
> +  11, true, false);
>  case BUILT_IN_EXPF:
>  case BUILT_IN_EXPM1F:
> +case BUILT_IN_EXPF32:
> +case BUILT_IN_EXPM1F32:
>/* expf: (-inf, 88)  */
>return get_domain (-1, false, false,
>   88, true, false);
> @@ -736,18 +765,35 @@ get_no_error_domain (enum built_in_funct
>  case BUILT_IN_EXPM1:
>  case BUILT_IN_EXPL:
>  case BUILT_IN_EXPM1L:
> +case BUILT_IN_EXPF64:
> +case BUILT_IN_EXPM1F64:
>/* exp: (-inf, 709)  */
>return get_domain (-1, false, false,
>   709, true, false);
> +case BUILT_IN_EXPF128:
> +case BUILT_IN_EXPM1F128:
> +  /* expf128: (-inf, 11356)  */
> +  return get_domain (-1, false, false,
> +  11356, true, false);
> +case BUILT_IN_EXP2F16:
> +  /* exp2f16: (-inf, 16)  */
> +  return get_domain (-1, false

Re: [PATCH] [PR24021] Implement PLUS_EXPR range-op entry for floats.

2022-11-08 Thread Aldy Hernandez via Gcc-patches
On Tue, Nov 8, 2022 at 12:07 PM Jakub Jelinek  wrote:
>
> On Mon, Nov 07, 2022 at 04:38:29PM +0100, Aldy Hernandez wrote:
> > From d214bcdff2cb90ad1eb808d29bda6fb98d510b4c Mon Sep 17 00:00:00 2001
> > From: Aldy Hernandez 
> > Date: Mon, 7 Nov 2022 14:18:57 +0100
> > Subject: [PATCH] Provide normalized and denormal format version of
> >  real_isdenormal.
> >
> > Implement real_isdenormal_target() to be used within real.cc where the
> > argument is known to be in denormal format.  Rewrite real_isdenormal()
> > for use outside of real.cc where the argument is known to be
> > normalized.
> >
> > gcc/ChangeLog:
> >
> >   * real.cc (real_isdenormal_target): New.
> >   (encode_ieee_single): Use real_isdenormal_target.
> >   (encode_ieee_double): Same.
> >   (encode_ieee_extended): Same.
> >   (encode_ieee_quad): Same.
> >   (encode_ieee_half): Same.
> >   (encode_arm_bfloat_half): Same.
> >   * value-range.cc (frange::flush_denormals_to_zero): Same.
> >   * real.h (real_isdenormal): Rewrite to look at mode.
>
> I'd make real_isdenormal_target static inline bool
> rather than inline bool, it is only defined in real.cc, so there is
> no point exporting it.

Huh.  I thought inline alone would inhibit the exporting.  Thanks.

> Though, as you've added the mode argument, the real.cc inline
> could very well also be called real_isdenormal too, it wouldn't be
> a redeclaration or ODR violation.

Great, even better.

OK pending tests?
Aldy
From c3ca1d606bfb22bf4f8bc7ac0ce561bd6afc3368 Mon Sep 17 00:00:00 2001
From: Aldy Hernandez 
Date: Mon, 7 Nov 2022 14:18:57 +0100
Subject: [PATCH] Provide normalized and denormal format version of
 real_isdenormal.

Implement a variant of real_isdenormal() to be used within real.cc
where the argument is known to be in denormal format.  Rewrite
real_isdenormal() for use outside of real.cc where the argument is
known to be normalized.

gcc/ChangeLog:

	* real.cc (real_isdenormal): New.
	* real.h (real_isdenormal): Add mode argument.  Rewrite for
	normalized values.
	* value-range.cc (frange::flush_denormals_to_zero): Pass mode to
	real_isdenormal.
---
 gcc/real.cc| 10 ++
 gcc/real.h |  7 ---
 gcc/value-range.cc |  5 +++--
 3 files changed, 17 insertions(+), 5 deletions(-)

diff --git a/gcc/real.cc b/gcc/real.cc
index aae7c335d59..028aad95ec4 100644
--- a/gcc/real.cc
+++ b/gcc/real.cc
@@ -111,6 +111,16 @@ static const REAL_VALUE_TYPE * real_digit (int);
 static void times_pten (REAL_VALUE_TYPE *, int);
 
 static void round_for_format (const struct real_format *, REAL_VALUE_TYPE *);
+
+/* Determine whether a floating-point value X is a denormal.  R is
+   expected to be in denormal form, so this function is only
+   meaningful after a call to round_for_format.  */
+
+static inline bool
+real_isdenormal (const REAL_VALUE_TYPE *r)
+{
+  return (r->sig[SIGSZ-1] & SIG_MSB) == 0;
+}
 
 /* Initialize R with a positive zero.  */
 
diff --git a/gcc/real.h b/gcc/real.h
index 306e9593866..b14bcdd3fde 100644
--- a/gcc/real.h
+++ b/gcc/real.h
@@ -286,11 +286,12 @@ extern bool real_isnan (const REAL_VALUE_TYPE *);
 /* Determine whether a floating-point value X is a signaling NaN.  */
 extern bool real_issignaling_nan (const REAL_VALUE_TYPE *);
 
-/* Determine whether a floating-point value X is a denormal.  */
+/* Determine whether floating-point value R is a denormal.  This
+   function is only valid for normalized values.  */
 inline bool
-real_isdenormal (const REAL_VALUE_TYPE *r)
+real_isdenormal (const REAL_VALUE_TYPE *r, machine_mode mode)
 {
-  return r->cl == rvc_normal && (r->sig[SIGSZ-1] & SIG_MSB) == 0;
+  return r->cl == rvc_normal && REAL_EXP (r) < REAL_MODE_FORMAT (mode)->emin;
 }
 
 /* Determine whether a floating-point value X is finite.  */
diff --git a/gcc/value-range.cc b/gcc/value-range.cc
index a855aaf626c..859c7fb4af9 100644
--- a/gcc/value-range.cc
+++ b/gcc/value-range.cc
@@ -266,15 +266,16 @@ frange::flush_denormals_to_zero ()
   if (undefined_p () || known_isnan ())
 return;
 
+  machine_mode mode = TYPE_MODE (type ());
   // Flush [x, -DENORMAL] to [x, -0.0].
-  if (real_isdenormal (&m_max) && real_isneg (&m_max))
+  if (real_isdenormal (&m_max, mode) && real_isneg (&m_max))
 {
   m_max = dconst0;
   if (HONOR_SIGNED_ZEROS (m_type))
 	m_max.sign = 1;
 }
   // Flush [+DENORMAL, x] to [+0.0, x].
-  if (real_isdenormal (&m_min) && !real_isneg (&m_min))
+  if (real_isdenormal (&m_min, mode) && !real_isneg (&m_min))
 m_min = dconst0;
 }
 
-- 
2.38.1



[PATCH][v2] tree-optimization/107389 - honor __builtin_assume_alignment at -O0

2022-11-08 Thread Richard Biener via Gcc-patches
The following makes sure to set alignment information on the LHS
of __builtin_assume_alignment calls even when not optimizing so
uses as arguments to builtin functions like memcpy or __atomic_load_n
can be reflected at RTL expansion time.

Bootstrap and regtest running on x86_64-unknown-linux-gnu, OK?

Thanks,
Richard.

PR tree-optimization/107389
* gimple-low.cc (lower_builtin_assume_aligned): New.
(lower_stmt): Call it.

* gcc.dg/pr107389.c: New testcase.
---
 gcc/gimple-low.cc   | 41 +
 gcc/testsuite/gcc.dg/pr107389.c | 13 +++
 2 files changed, 54 insertions(+)
 create mode 100644 gcc/testsuite/gcc.dg/pr107389.c

diff --git a/gcc/gimple-low.cc b/gcc/gimple-low.cc
index 512aa9feada..f9bcb772163 100644
--- a/gcc/gimple-low.cc
+++ b/gcc/gimple-low.cc
@@ -84,6 +84,7 @@ static void lower_try_catch (gimple_stmt_iterator *, struct 
lower_data *);
 static void lower_gimple_return (gimple_stmt_iterator *, struct lower_data *);
 static void lower_builtin_setjmp (gimple_stmt_iterator *);
 static void lower_builtin_posix_memalign (gimple_stmt_iterator *);
+static void lower_builtin_assume_aligned (gimple_stmt_iterator *);
 
 
 /* Lower the body of current_function_decl from High GIMPLE into Low
@@ -768,6 +769,13 @@ lower_stmt (gimple_stmt_iterator *gsi, struct lower_data 
*data)
lower_builtin_posix_memalign (gsi);
return;
  }
+   else if (DECL_FUNCTION_CODE (decl) == BUILT_IN_ASSUME_ALIGNED
+&& !optimize)
+ {
+   lower_builtin_assume_aligned (gsi);
+   data->cannot_fallthru = false;
+   return;
+ }
  }
 
if (decl && (flags_from_decl_or_type (decl) & ECF_NORETURN))
@@ -1310,6 +1318,39 @@ lower_builtin_posix_memalign (gimple_stmt_iterator *gsi)
   gsi_insert_after (gsi, stmt, GSI_NEW_STMT);
   gsi_insert_after (gsi, gimple_build_label (noalign_label), GSI_NEW_STMT);
 }
+
+/* Lower calls to __builtin_assume_aligned when not optimizing.  */
+
+static void
+lower_builtin_assume_aligned (gimple_stmt_iterator *gsi)
+{
+  gcall *call = as_a  (gsi_stmt (*gsi));
+
+  tree lhs = gimple_call_lhs (call);
+  if (!lhs || !POINTER_TYPE_P (TREE_TYPE (lhs)))
+return;
+
+  tree align = gimple_call_arg (call, 1);
+  tree misalign = (gimple_call_num_args (call) > 2
+  ? gimple_call_arg (call, 2) : NULL_TREE);
+  if (!tree_fits_uhwi_p (align)
+  || (misalign && !tree_fits_uhwi_p (misalign)))
+return;
+
+  unsigned aligni = TREE_INT_CST_LOW (align);
+  unsigned misaligni = misalign ? TREE_INT_CST_LOW (misalign) : 0;
+  if (aligni <= 1
+  || (aligni & (aligni - 1)) != 0
+  || (misaligni & ~(aligni - 1)) != 0)
+return;
+
+  /* For lowering we simply transfer alignment information to the
+ result and leave the call otherwise unchanged, it will be elided
+ at RTL expansion time.  */
+  ptr_info_def *pi = get_ptr_info (lhs);
+  set_ptr_info_alignment (pi, aligni, misaligni);
+  gsi_next (gsi);
+}
 
 
 /* Record the variables in VARS into function FN.  */
diff --git a/gcc/testsuite/gcc.dg/pr107389.c b/gcc/testsuite/gcc.dg/pr107389.c
new file mode 100644
index 000..deb63380704
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/pr107389.c
@@ -0,0 +1,13 @@
+/* { dg-do compile } */
+/* { dg-options "-fdump-tree-optimized-alias" } */
+
+unsigned foo (void *p)
+{
+  unsigned i;
+  __builtin_memcpy (&i, __builtin_assume_aligned (p, 4), sizeof (unsigned));
+  return i;
+}
+
+/* Even when not optimizing we should have alignment info on the temporary
+   feeding the memcpy.  */
+/* { dg-final { scan-tree-dump "ALIGN = 4" "optimized" } } */
-- 
2.35.3


Re: [PATCH][v2] tree-optimization/107389 - honor __builtin_assume_alignment at -O0

2022-11-08 Thread Jakub Jelinek via Gcc-patches
On Tue, Nov 08, 2022 at 01:53:48PM +0100, Richard Biener wrote:
> The following makes sure to set alignment information on the LHS
> of __builtin_assume_alignment calls even when not optimizing so
> uses as arguments to builtin functions like memcpy or __atomic_load_n
> can be reflected at RTL expansion time.
> 
> Bootstrap and regtest running on x86_64-unknown-linux-gnu, OK?
> 
> Thanks,
> Richard.
> 
>   PR tree-optimization/107389
>   * gimple-low.cc (lower_builtin_assume_aligned): New.
>   (lower_stmt): Call it.
> 
>   * gcc.dg/pr107389.c: New testcase.
> ---
>  gcc/gimple-low.cc   | 41 +
>  gcc/testsuite/gcc.dg/pr107389.c | 13 +++
>  2 files changed, 54 insertions(+)
>  create mode 100644 gcc/testsuite/gcc.dg/pr107389.c
> 
> diff --git a/gcc/gimple-low.cc b/gcc/gimple-low.cc
> index 512aa9feada..f9bcb772163 100644
> --- a/gcc/gimple-low.cc
> +++ b/gcc/gimple-low.cc
> @@ -84,6 +84,7 @@ static void lower_try_catch (gimple_stmt_iterator *, struct 
> lower_data *);
>  static void lower_gimple_return (gimple_stmt_iterator *, struct lower_data 
> *);
>  static void lower_builtin_setjmp (gimple_stmt_iterator *);
>  static void lower_builtin_posix_memalign (gimple_stmt_iterator *);
> +static void lower_builtin_assume_aligned (gimple_stmt_iterator *);
>  
>  
>  /* Lower the body of current_function_decl from High GIMPLE into Low
> @@ -768,6 +769,13 @@ lower_stmt (gimple_stmt_iterator *gsi, struct lower_data 
> *data)
>   lower_builtin_posix_memalign (gsi);
>   return;
> }
> + else if (DECL_FUNCTION_CODE (decl) == BUILT_IN_ASSUME_ALIGNED
> +  && !optimize)
> +   {
> + lower_builtin_assume_aligned (gsi);
> + data->cannot_fallthru = false;
> + return;
> +   }
> }
>  
>   if (decl && (flags_from_decl_or_type (decl) & ECF_NORETURN))
> @@ -1310,6 +1318,39 @@ lower_builtin_posix_memalign (gimple_stmt_iterator 
> *gsi)
>gsi_insert_after (gsi, stmt, GSI_NEW_STMT);
>gsi_insert_after (gsi, gimple_build_label (noalign_label), GSI_NEW_STMT);
>  }
> +
> +/* Lower calls to __builtin_assume_aligned when not optimizing.  */
> +
> +static void
> +lower_builtin_assume_aligned (gimple_stmt_iterator *gsi)
> +{
> +  gcall *call = as_a  (gsi_stmt (*gsi));
> +
> +  tree lhs = gimple_call_lhs (call);
> +  if (!lhs || !POINTER_TYPE_P (TREE_TYPE (lhs)))
> +return;

I think nothing checks that TREE_CODE (lhs) == SSA_NAME,
that isn't guaranteed before ssa pass.
Otherwise LGTM.

Jakub



Re: [PATCH] [PR24021] Implement PLUS_EXPR range-op entry for floats.

2022-11-08 Thread Aldy Hernandez via Gcc-patches
On Tue, Nov 8, 2022 at 12:20 PM Jakub Jelinek  wrote:
>
> On Mon, Nov 07, 2022 at 04:41:23PM +0100, Aldy Hernandez wrote:
> > As suggested upthread, I have also adjusted update_nan_sign() to drop
> > the NAN sign to VARYING if both operands are NAN.  As an optimization
> > I keep the sign if both operands are NAN and have the same sign.
>
> For NaNs this still relies on something IEEE754 doesn't guarantee,
> as I cited, after a binary operation the sign bit of the NaN is
> unspecified, whether there is one NaN operand or two.
> It might be that all CPUs handle it the way you've implemented
> (that for one NaN operand the sign of NaN result will be the same
> as that NaN operand and for two it will be the sign of one of the two
> NaNs operands, never something else), but I think we'd need to check
> more than one implementation for that (I've only tried x86_64 and thus
> SSE behavior in it), so one would need to test i387 long double behavior
> too, ARM/AArch64, PowerPC, s390{,x}, RISCV, ...
> The guarantee given by IEEE754 is only for those copy, negate, abs, copySign
> operations, so copying values around, NEG_EXPR, ABS_EXPR, __builtin_fabs*,
> __builtin_copysign*.

Ughh, that's unfortunate.  OK, I've added a big note.

>
> Otherwise LGTM (but would be nice to get into GCC13 not just
> +, but also -, *, /, sqrt at least).

Minus is trivial as we can implement it with a negate and plus.  I
have a patch queued up for that.  The rest require a bit more thought,
though perhaps with what we have so far can serve as a base.  I'll
look into it.

Attached is the patch I'm retesting.

Thanks for your patience, and copious help here.
Aldy
From 32e9063bbd5a48bf7f7b16077ebc0c1e7bf3c33d Mon Sep 17 00:00:00 2001
From: Aldy Hernandez 
Date: Thu, 13 Oct 2022 08:14:16 +0200
Subject: [PATCH] [PR24021] Implement PLUS_EXPR range-op entry for floats.

This is the range-op entry for floating point PLUS_EXPR.  It's the
most intricate range entry we have so far, because we need to keep
track of rounding and target FP formats.  This will be the last FP
entry I commit, mostly to avoid disturbing the tree any further, and
also because what we have so far is enough for a solid VRP.

So far we track NANs and signs correctly.  We also handle relationals
(symbolics and numeric), both ordered and unordered, ABS_EXPR and
NEGATE_EXPR which are used to fold __builtin_isinf, and __builtin_sign
(__builtin_copysign is coming up).  All in all, I think this provide
more than enough for basic VRP on floats, as well as provide a basis
to flesh out the rest if there's interest.

My goal with this entry is to provide a template for additional binary
operators, as they tend to follow a similar pattern: handle NANs, do
the arithmetic while keeping track of rounding, and adjust for NAN.  I
may abstract the general parts as we do for irange's fold_range and
wi_fold.

	PR tree-optimization/24021

gcc/ChangeLog:

	* range-op-float.cc (update_nan_sign): New.
	(propagate_nans): New.
	(frange_nextafter): New.
	(frange_arithmetic): New.
	(class foperator_plus): New.
	(floating_op_table::floating_op_table): Add PLUS_EXPR entry.

gcc/testsuite/ChangeLog:

	* gcc.dg/tree-ssa/vrp-float-plus.c: New test.
---
 gcc/range-op-float.cc | 165 ++
 .../gcc.dg/tree-ssa/vrp-float-plus.c  |  21 +++
 2 files changed, 186 insertions(+)
 create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/vrp-float-plus.c

diff --git a/gcc/range-op-float.cc b/gcc/range-op-float.cc
index a1f372997bf..1a6913b8b98 100644
--- a/gcc/range-op-float.cc
+++ b/gcc/range-op-float.cc
@@ -192,6 +192,118 @@ frelop_early_resolve (irange &r, tree type,
 	  && relop_early_resolve (r, type, op1, op2, rel, my_rel));
 }
 
+// If R contains a NAN of unknown sign, update the NAN's signbit
+// depending on two operands.
+
+inline void
+update_nan_sign (frange &r, const frange &op1, const frange &op2)
+{
+  if (!r.maybe_isnan ())
+return;
+
+  bool op1_nan = op1.maybe_isnan ();
+  bool op2_nan = op2.maybe_isnan ();
+  bool sign1, sign2;
+
+  gcc_checking_assert (!r.nan_signbit_p (sign1));
+  if (op1_nan && op2_nan)
+{
+  // If boths signs agree, we could use that sign, but IEEE754
+  // does not guarantee this for a binary operator.  The x86_64
+  // architure does keep the common known sign, but further tests
+  // are needed to see if other architectures do the same (i387
+  // long double, ARM/aarch64, PowerPC, s390,{,x}, RSICV, etc).
+  // In the meantime, keep sign VARYING.
+  ;
+}
+  else if (op1_nan)
+{
+  if (op1.nan_signbit_p (sign1))
+	r.update_nan (sign1);
+}
+  else if (op2_nan)
+{
+  if (op2.nan_signbit_p (sign2))
+	r.update_nan (sign2);
+}
+}
+
+// If either operand is a NAN, set R to the combination of both NANs
+// signwise and return TRUE.
+
+inline bool
+propagate_nans (frange &r, const frange &op1, const frange &op2)
+{
+  if (op1.known_isnan () || op2.known_isnan ())
+{
+  r.set_na

Re: [PATCH] [PR24021] Implement PLUS_EXPR range-op entry for floats.

2022-11-08 Thread Jakub Jelinek via Gcc-patches
On Tue, Nov 08, 2022 at 01:47:58PM +0100, Aldy Hernandez wrote:
> On Tue, Nov 8, 2022 at 12:07 PM Jakub Jelinek  wrote:
> >
> > On Mon, Nov 07, 2022 at 04:38:29PM +0100, Aldy Hernandez wrote:
> > > From d214bcdff2cb90ad1eb808d29bda6fb98d510b4c Mon Sep 17 00:00:00 2001
> > > From: Aldy Hernandez 
> > > Date: Mon, 7 Nov 2022 14:18:57 +0100
> > > Subject: [PATCH] Provide normalized and denormal format version of
> > >  real_isdenormal.
> > >
> > > Implement real_isdenormal_target() to be used within real.cc where the
> > > argument is known to be in denormal format.  Rewrite real_isdenormal()
> > > for use outside of real.cc where the argument is known to be
> > > normalized.
> > >
> > > gcc/ChangeLog:
> > >
> > >   * real.cc (real_isdenormal_target): New.
> > >   (encode_ieee_single): Use real_isdenormal_target.
> > >   (encode_ieee_double): Same.
> > >   (encode_ieee_extended): Same.
> > >   (encode_ieee_quad): Same.
> > >   (encode_ieee_half): Same.
> > >   (encode_arm_bfloat_half): Same.
> > >   * value-range.cc (frange::flush_denormals_to_zero): Same.
> > >   * real.h (real_isdenormal): Rewrite to look at mode.
> >
> > I'd make real_isdenormal_target static inline bool
> > rather than inline bool, it is only defined in real.cc, so there is
> > no point exporting it.
> 
> Huh.  I thought inline alone would inhibit the exporting.  Thanks.

That is what happens with C99 inline (unless there is extern for the decl),
but C++ inline is different.  It isn't guaranteed to be exported, but
with -fkeep-inline-functions or if you say take address of the inline
in a way that can't be optimized back into a call to the inline (or even
just call it with -O0), it is exported.
> 
> > Though, as you've added the mode argument, the real.cc inline
> > could very well also be called real_isdenormal too, it wouldn't be
> > a redeclaration or ODR violation.
> 
> Great, even better.
> 
> OK pending tests?
> Aldy

> From c3ca1d606bfb22bf4f8bc7ac0ce561bd6afc3368 Mon Sep 17 00:00:00 2001
> From: Aldy Hernandez 
> Date: Mon, 7 Nov 2022 14:18:57 +0100
> Subject: [PATCH] Provide normalized and denormal format version of
>  real_isdenormal.
> 
> Implement a variant of real_isdenormal() to be used within real.cc
> where the argument is known to be in denormal format.  Rewrite
> real_isdenormal() for use outside of real.cc where the argument is
> known to be normalized.
> 
> gcc/ChangeLog:
> 
>   * real.cc (real_isdenormal): New.
>   * real.h (real_isdenormal): Add mode argument.  Rewrite for
>   normalized values.
>   * value-range.cc (frange::flush_denormals_to_zero): Pass mode to
>   real_isdenormal.
> ---
>  gcc/real.cc| 10 ++
>  gcc/real.h |  7 ---
>  gcc/value-range.cc |  5 +++--
>  3 files changed, 17 insertions(+), 5 deletions(-)
> 
> diff --git a/gcc/real.cc b/gcc/real.cc
> index aae7c335d59..028aad95ec4 100644
> --- a/gcc/real.cc
> +++ b/gcc/real.cc
> @@ -111,6 +111,16 @@ static const REAL_VALUE_TYPE * real_digit (int);
>  static void times_pten (REAL_VALUE_TYPE *, int);
>  
>  static void round_for_format (const struct real_format *, REAL_VALUE_TYPE *);
> +
> +/* Determine whether a floating-point value X is a denormal.  R is
> +   expected to be in denormal form, so this function is only
> +   meaningful after a call to round_for_format.  */
> +
> +static inline bool
> +real_isdenormal (const REAL_VALUE_TYPE *r)
> +{
> +  return (r->sig[SIGSZ-1] & SIG_MSB) == 0;

I would probably keep the r->cl == rvc_normal in here too.
I know the code in real.cc didn't do it before, but what
r->sig is for the rvc_zero/rvc_inf is unclear.  It is true
that get_zero/get_canonical_?nan/get_inf clear the whole sig,
but not really sure if we guarantee that everywhere.
The real.cc uses were like:
  bool denormal = ...;
at the start of the function and then
  switch (...)
{
...
case rvc_normal:
  if (denormal)
...
}
so another even better possibility would be to use your simple
real.cc (real_isdenormal) and drop all the denormal variables, so:
- bool denormal = ...;
  switch (...)
{
...
case rvc_normal:
- if (denormal)
+ if (real_isdenormal (r))
...
}

Otherwise LGTM.

Jakub



Re: [PATCH v2 16/19] modula2 front end: bootstrap and documentation tools

2022-11-08 Thread Gaius Mulley via Gcc-patches
Martin Liška  writes:

> 1) I would prefer using ' instead of ":
>
> $ flake8 ./gcc/m2/tools-src/tidydates.py
> ...
> ./gcc/m2/tools-src/tidydates.py:124:30: Q000 Double quotes found but single 
> quotes preferred
> ./gcc/m2/tools-src/tidydates.py:127:27: Q000 Double quotes found but single 
> quotes preferred
> ./gcc/m2/tools-src/tidydates.py:132:27: Q000 Double quotes found but single 
> quotes preferred
> ./gcc/m2/tools-src/tidydates.py:133:33: Q000 Double quotes found but single 
> quotes preferred
> ./gcc/m2/tools-src/tidydates.py:138:26: Q000 Double quotes found but single 
> quotes preferred
> ./gcc/m2/tools-src/tidydates.py:143:28: Q000 Double quotes found but
> single quotes preferred

ah yes will switch the quotes character.

> 2) Python-names would be nicer:
>
> def writeTemplate(fo, magic, start, end, dates, contribution, summary,
> lic):
>
> def write_template(...)

agreed, will change

> 3) def hasExt(name, ext) - please use Path from pathlib
>
> 4) while (str.find(line, "(*") != -1):
>
> '(*' in line
> ? Similarly elsewhere.
>
> 5) str.find(line, ...)
>
> Use rather directly: line.find(...)
>
> 6) please use flake8:
> https://gcc.gnu.org/codingconventions.html#python

sure will do all above - I used flake8 but maybe the plugins weren't
enabled.  I'll try flake8 on tumbleweed.

> Thanks,
> Martin
>
> P.S. I'm going to merge Sphinx branch this Wednesday, so then we should port 
> your
> conversion scripts to emit .rst instead of .texi.

should be good - I'll complete the rst output in the scripts,

regards,
Gaius


Re: [PATCH] [PR24021] Implement PLUS_EXPR range-op entry for floats.

2022-11-08 Thread Jakub Jelinek via Gcc-patches
On Tue, Nov 08, 2022 at 02:06:58PM +0100, Aldy Hernandez wrote:
> +  gcc_checking_assert (!r.nan_signbit_p (sign1));
> +  if (op1_nan && op2_nan)
> +{
> +  // If boths signs agree, we could use that sign, but IEEE754
> +  // does not guarantee this for a binary operator.  The x86_64
> +  // architure does keep the common known sign, but further tests
> +  // are needed to see if other architectures do the same (i387
> +  // long double, ARM/aarch64, PowerPC, s390,{,x}, RSICV, etc).

s/RSICV/RISCV/

> +  // In the meantime, keep sign VARYING.
> +  ;
> +}
> +  else if (op1_nan)
> +{
> +  if (op1.nan_signbit_p (sign1))
> + r.update_nan (sign1);
> +}
> +  else if (op2_nan)
> +{
> +  if (op2.nan_signbit_p (sign2))
> + r.update_nan (sign2);
> +}

Well, these cases also aren't guaranteed for binary operator.
I think a conforming implementation can say copy the NaN operand
to result and toggle the sign.  Or, if the operand would be a sNaN,
it must turn it into a qNaN (don't remember right now if there are
requirements on what it can do with the mantissa which needs to change
for the sNaN -> qNaN difference at least, but whether it can just
generate a canonical qNaN or needs to preserve at least some bits),
but could e.g. clear or toggle the sign of the NaN as well.
Whether there are any such implementations or not is a question.
For the single qNaN operand case, it would surprise me if anybody
bothered to tweak the sign bit in any way, just copying the input
seems simpler to me, but for the sNaN -> qNaN conversion it wouldn't
surprise me that much.

Jakub



Re: [PATCH][v2] tree-optimization/107389 - honor __builtin_assume_alignment at -O0

2022-11-08 Thread Richard Biener via Gcc-patches
On Tue, 8 Nov 2022, Jakub Jelinek wrote:

> On Tue, Nov 08, 2022 at 01:53:48PM +0100, Richard Biener wrote:
> > The following makes sure to set alignment information on the LHS
> > of __builtin_assume_alignment calls even when not optimizing so
> > uses as arguments to builtin functions like memcpy or __atomic_load_n
> > can be reflected at RTL expansion time.
> > 
> > Bootstrap and regtest running on x86_64-unknown-linux-gnu, OK?
> > 
> > Thanks,
> > Richard.
> > 
> > PR tree-optimization/107389
> > * gimple-low.cc (lower_builtin_assume_aligned): New.
> > (lower_stmt): Call it.
> > 
> > * gcc.dg/pr107389.c: New testcase.
> > ---
> >  gcc/gimple-low.cc   | 41 +
> >  gcc/testsuite/gcc.dg/pr107389.c | 13 +++
> >  2 files changed, 54 insertions(+)
> >  create mode 100644 gcc/testsuite/gcc.dg/pr107389.c
> > 
> > diff --git a/gcc/gimple-low.cc b/gcc/gimple-low.cc
> > index 512aa9feada..f9bcb772163 100644
> > --- a/gcc/gimple-low.cc
> > +++ b/gcc/gimple-low.cc
> > @@ -84,6 +84,7 @@ static void lower_try_catch (gimple_stmt_iterator *, 
> > struct lower_data *);
> >  static void lower_gimple_return (gimple_stmt_iterator *, struct lower_data 
> > *);
> >  static void lower_builtin_setjmp (gimple_stmt_iterator *);
> >  static void lower_builtin_posix_memalign (gimple_stmt_iterator *);
> > +static void lower_builtin_assume_aligned (gimple_stmt_iterator *);
> >  
> >  
> >  /* Lower the body of current_function_decl from High GIMPLE into Low
> > @@ -768,6 +769,13 @@ lower_stmt (gimple_stmt_iterator *gsi, struct 
> > lower_data *data)
> > lower_builtin_posix_memalign (gsi);
> > return;
> >   }
> > +   else if (DECL_FUNCTION_CODE (decl) == BUILT_IN_ASSUME_ALIGNED
> > +&& !optimize)
> > + {
> > +   lower_builtin_assume_aligned (gsi);
> > +   data->cannot_fallthru = false;
> > +   return;
> > + }
> >   }
> >  
> > if (decl && (flags_from_decl_or_type (decl) & ECF_NORETURN))
> > @@ -1310,6 +1318,39 @@ lower_builtin_posix_memalign (gimple_stmt_iterator 
> > *gsi)
> >gsi_insert_after (gsi, stmt, GSI_NEW_STMT);
> >gsi_insert_after (gsi, gimple_build_label (noalign_label), GSI_NEW_STMT);
> >  }
> > +
> > +/* Lower calls to __builtin_assume_aligned when not optimizing.  */
> > +
> > +static void
> > +lower_builtin_assume_aligned (gimple_stmt_iterator *gsi)
> > +{
> > +  gcall *call = as_a  (gsi_stmt (*gsi));
> > +
> > +  tree lhs = gimple_call_lhs (call);
> > +  if (!lhs || !POINTER_TYPE_P (TREE_TYPE (lhs)))
> > +return;
> 
> I think nothing checks that TREE_CODE (lhs) == SSA_NAME,
> that isn't guaranteed before ssa pass.

Yep, noticed in the testsuite and fixed.

Richard.


Re: [PATCH] invoke: RISC-V's -march doesn't take ISA strings

2022-11-08 Thread Christoph Müllner
On Mon, Nov 7, 2022 at 8:01 PM Palmer Dabbelt  wrote:

> The docs say we take ISA strings, but that's never really been the case:
> at a bare minimum we've required lower case strings, but there's
> generally been some subtle differences as well in things like version
> handling and such.  We talked about removing the lower case requirement
> in the last GNU toolchain meeting and we've always called other
> differences just bugs.  We don't have profile support yet, but based on
> the discussions on the RISC-V lists it looks like we're going to have
> some differences there as well.


> So let's just stop pretending these are ISA strings.  That's been a
> headache for years now, if we're meant to just be ISA-string-like here
> then we don't have to worry about all these long-tail ISA string parsing
> issues.
>

You are right, we should first properly specify the -march string,
before we talk about the implementation details of the parser.

I tried to collect all the recent change requests and undocumented
properties of the -march string and worked on a first draft specification.
As the -march flag should share a common behavior across different
compilers and tools, I've made a PR to the RISC-V toolchain-conventions
repo:
  https://github.com/riscv-non-isa/riscv-toolchain-conventions/pull/26

Do you mind if we continue the discussion there?



>
> Link: https://lists.riscv.org/g/sig-toolchains/message/486
>
> gcc/ChangeLog
>
> doc/invoke.texi (RISC-V): -march doesn't take ISA strings.
>
> ---
>
> This is now woefully under-documented, as we can't even fall back on the
> "it's just an ISA string" excuse any more.  I'm happy to go document
> that, but figured I'd just send this along now so we can have the
> discussion.
> ---
>  gcc/doc/invoke.texi | 8 
>  1 file changed, 4 insertions(+), 4 deletions(-)
>
> diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
> index 94a2e20cfc1..780b0364c52 100644
> --- a/gcc/doc/invoke.texi
> +++ b/gcc/doc/invoke.texi
> @@ -28617,11 +28617,11 @@ Produce code conforming to version 20191213.
>  The default is @option{-misa-spec=20191213} unless GCC has been configured
>  with @option{--with-isa-spec=} specifying a different default version.
>
> -@item -march=@var{ISA-string}
> +@item -march=@var{target-string}
>  @opindex march
> -Generate code for given RISC-V ISA (e.g.@: @samp{rv64im}).  ISA strings
> must be
> -lower-case.  Examples include @samp{rv64i}, @samp{rv32g}, @samp{rv32e},
> and
> -@samp{rv32imaf}.
> +Generate code for given target (e.g.@: @samp{rv64im}).  Target strings
> are
> +similar to ISA strings, but must be lower-case.  Examples include
> @samp{rv64i},
> +@samp{rv32g}, @samp{rv32e}, and @samp{rv32imaf}.
>
>  When @option{-march=} is not specified, use the setting from
> @option{-mcpu}.
>
> --
> 2.38.1
>
>


Re: [PATCH] [PR24021] Implement PLUS_EXPR range-op entry for floats.

2022-11-08 Thread Aldy Hernandez via Gcc-patches
On Tue, Nov 8, 2022 at 2:25 PM Jakub Jelinek  wrote:
>
> On Tue, Nov 08, 2022 at 02:06:58PM +0100, Aldy Hernandez wrote:
> > +  gcc_checking_assert (!r.nan_signbit_p (sign1));
> > +  if (op1_nan && op2_nan)
> > +{
> > +  // If boths signs agree, we could use that sign, but IEEE754
> > +  // does not guarantee this for a binary operator.  The x86_64
> > +  // architure does keep the common known sign, but further tests
> > +  // are needed to see if other architectures do the same (i387
> > +  // long double, ARM/aarch64, PowerPC, s390,{,x}, RSICV, etc).
>
> s/RSICV/RISCV/
>
> > +  // In the meantime, keep sign VARYING.
> > +  ;
> > +}
> > +  else if (op1_nan)
> > +{
> > +  if (op1.nan_signbit_p (sign1))
> > + r.update_nan (sign1);
> > +}
> > +  else if (op2_nan)
> > +{
> > +  if (op2.nan_signbit_p (sign2))
> > + r.update_nan (sign2);
> > +}
>
> Well, these cases also aren't guaranteed for binary operator.
> I think a conforming implementation can say copy the NaN operand
> to result and toggle the sign.  Or, if the operand would be a sNaN,
> it must turn it into a qNaN (don't remember right now if there are
> requirements on what it can do with the mantissa which needs to change
> for the sNaN -> qNaN difference at least, but whether it can just
> generate a canonical qNaN or needs to preserve at least some bits),
> but could e.g. clear or toggle the sign of the NaN as well.
> Whether there are any such implementations or not is a question.
> For the single qNaN operand case, it would surprise me if anybody
> bothered to tweak the sign bit in any way, just copying the input
> seems simpler to me, but for the sNaN -> qNaN conversion it wouldn't
> surprise me that much.

Well, perhaps we should just nuke update_nan_sign() altogether, and
always keep the sign varying?

inline bool
propagate_nans (frange &r, const frange &op1, const frange &op2)
{
  if (op1.known_isnan () || op2.known_isnan ())
{
  r.set_nan (op1.type ());
  return true;
}
  return false;
}

I'm fine either way.  The less code the better :).

Aldy



Re: [PATCH] [PR24021] Implement PLUS_EXPR range-op entry for floats.

2022-11-08 Thread Jakub Jelinek via Gcc-patches
On Tue, Nov 08, 2022 at 02:47:35PM +0100, Aldy Hernandez wrote:
> Well, perhaps we should just nuke update_nan_sign() altogether, and
> always keep the sign varying?
> 
> inline bool
> propagate_nans (frange &r, const frange &op1, const frange &op2)
> {
>   if (op1.known_isnan () || op2.known_isnan ())
> {
>   r.set_nan (op1.type ());
>   return true;
> }
>   return false;
> }
> 
> I'm fine either way.  The less code the better :).

Yes, but you had 2 callers, so something needs to be done also if
in foperator_plus::fold_range.

Jakub



Announcement: Porting the Docs to Sphinx - tomorrow

2022-11-08 Thread Martin Liška
Hi.

Tomorrow in the morning (UTC time), I'm going to migrate the documentation
to Sphinx. The final version of the branch can be seen here:

$ git fetch origin refs/users/marxin/heads/sphinx-final
$ git co FETCH_HEAD 

URL: https://splichal.eu/gccsphinx-final/

TL;DR;

After the migration, people should be able to build (and install) GCC even
if they miss Sphinx (similar happens now if you miss makeinfo). However, please
install Sphinx >= 5.3.0 (for manual and info pages - only *core* package is 
necessary) [1]

Steps following the migration:

1) update of web HTML (and PDF documentation) pages:
   I prepared a script and tested our server has all what we need.
2) gcc_release --enable-generated-files-in-srcdir: here I would like
   to ask Joseph for cooperation
3) URL for diagnostics (used for warning) - will utilize [3]
4) package source tarballs - https://gcc.gnu.org/onlinedocs/ (listed here)
5) updating links from gcc.gnu.org that point to documentation
6) removal of the further Texinfo leftovers
...

Cheers,
Martin

[1] 
https://splichal.eu/scripts/sphinx/gccint/_build/html/source-tree-structure-and-build-system/the-gcc-subdirectory/building-documentation.html#sphinx-install
[2] ./maintainer-scripts/update_web_docs_git.py
[3] https://pypi.org/project/sphinx-redirect-by-id/


Re: [PATCH] [PR24021] Implement PLUS_EXPR range-op entry for floats.

2022-11-08 Thread Aldy Hernandez via Gcc-patches
On Tue, Nov 8, 2022 at 2:15 PM Jakub Jelinek  wrote:
>
> On Tue, Nov 08, 2022 at 01:47:58PM +0100, Aldy Hernandez wrote:
> > On Tue, Nov 8, 2022 at 12:07 PM Jakub Jelinek  wrote:
> > >
> > > On Mon, Nov 07, 2022 at 04:38:29PM +0100, Aldy Hernandez wrote:
> > > > From d214bcdff2cb90ad1eb808d29bda6fb98d510b4c Mon Sep 17 00:00:00 2001
> > > > From: Aldy Hernandez 
> > > > Date: Mon, 7 Nov 2022 14:18:57 +0100
> > > > Subject: [PATCH] Provide normalized and denormal format version of
> > > >  real_isdenormal.
> > > >
> > > > Implement real_isdenormal_target() to be used within real.cc where the
> > > > argument is known to be in denormal format.  Rewrite real_isdenormal()
> > > > for use outside of real.cc where the argument is known to be
> > > > normalized.
> > > >
> > > > gcc/ChangeLog:
> > > >
> > > >   * real.cc (real_isdenormal_target): New.
> > > >   (encode_ieee_single): Use real_isdenormal_target.
> > > >   (encode_ieee_double): Same.
> > > >   (encode_ieee_extended): Same.
> > > >   (encode_ieee_quad): Same.
> > > >   (encode_ieee_half): Same.
> > > >   (encode_arm_bfloat_half): Same.
> > > >   * value-range.cc (frange::flush_denormals_to_zero): Same.
> > > >   * real.h (real_isdenormal): Rewrite to look at mode.
> > >
> > > I'd make real_isdenormal_target static inline bool
> > > rather than inline bool, it is only defined in real.cc, so there is
> > > no point exporting it.
> >
> > Huh.  I thought inline alone would inhibit the exporting.  Thanks.
>
> That is what happens with C99 inline (unless there is extern for the decl),
> but C++ inline is different.  It isn't guaranteed to be exported, but
> with -fkeep-inline-functions or if you say take address of the inline
> in a way that can't be optimized back into a call to the inline (or even
> just call it with -O0), it is exported.
> >
> > > Though, as you've added the mode argument, the real.cc inline
> > > could very well also be called real_isdenormal too, it wouldn't be
> > > a redeclaration or ODR violation.
> >
> > Great, even better.
> >
> > OK pending tests?
> > Aldy
>
> > From c3ca1d606bfb22bf4f8bc7ac0ce561bd6afc3368 Mon Sep 17 00:00:00 2001
> > From: Aldy Hernandez 
> > Date: Mon, 7 Nov 2022 14:18:57 +0100
> > Subject: [PATCH] Provide normalized and denormal format version of
> >  real_isdenormal.
> >
> > Implement a variant of real_isdenormal() to be used within real.cc
> > where the argument is known to be in denormal format.  Rewrite
> > real_isdenormal() for use outside of real.cc where the argument is
> > known to be normalized.
> >
> > gcc/ChangeLog:
> >
> >   * real.cc (real_isdenormal): New.
> >   * real.h (real_isdenormal): Add mode argument.  Rewrite for
> >   normalized values.
> >   * value-range.cc (frange::flush_denormals_to_zero): Pass mode to
> >   real_isdenormal.
> > ---
> >  gcc/real.cc| 10 ++
> >  gcc/real.h |  7 ---
> >  gcc/value-range.cc |  5 +++--
> >  3 files changed, 17 insertions(+), 5 deletions(-)
> >
> > diff --git a/gcc/real.cc b/gcc/real.cc
> > index aae7c335d59..028aad95ec4 100644
> > --- a/gcc/real.cc
> > +++ b/gcc/real.cc
> > @@ -111,6 +111,16 @@ static const REAL_VALUE_TYPE * real_digit (int);
> >  static void times_pten (REAL_VALUE_TYPE *, int);
> >
> >  static void round_for_format (const struct real_format *, REAL_VALUE_TYPE 
> > *);
> > +
> > +/* Determine whether a floating-point value X is a denormal.  R is
> > +   expected to be in denormal form, so this function is only
> > +   meaningful after a call to round_for_format.  */
> > +
> > +static inline bool
> > +real_isdenormal (const REAL_VALUE_TYPE *r)
> > +{
> > +  return (r->sig[SIGSZ-1] & SIG_MSB) == 0;
>
> I would probably keep the r->cl == rvc_normal in here too.
> I know the code in real.cc didn't do it before, but what
> r->sig is for the rvc_zero/rvc_inf is unclear.  It is true
> that get_zero/get_canonical_?nan/get_inf clear the whole sig,
> but not really sure if we guarantee that everywhere.
> The real.cc uses were like:
>   bool denormal = ...;
> at the start of the function and then
>   switch (...)
> {
> ...
> case rvc_normal:
>   if (denormal)
> ...
> }
> so another even better possibility would be to use your simple
> real.cc (real_isdenormal) and drop all the denormal variables, so:
> - bool denormal = ...;
>   switch (...)
> {
> ...
> case rvc_normal:
> - if (denormal)
> + if (real_isdenormal (r))
> ...
> }

Sure.

Attached patch in testing.

Aldy
From d02ce8eaf16d2fc6db6472268fd962e09c2fd81e Mon Sep 17 00:00:00 2001
From: Aldy Hernandez 
Date: Mon, 7 Nov 2022 14:18:57 +0100
Subject: [PATCH] Provide normalized and denormal format version of
 real_isdenormal.

Implement a variant of real_isdenormal() to be used within real.cc
where the argument is known to be in denormal format.  Rewrite
real_isdenormal() for use outside of real.cc where the argument is
known to be normalized.

gcc/ChangeLog:

	* 

Re: [PATCH] [PR24021] Implement PLUS_EXPR range-op entry for floats.

2022-11-08 Thread Jakub Jelinek via Gcc-patches
On Tue, Nov 08, 2022 at 03:02:40PM +0100, Aldy Hernandez wrote:
> From d02ce8eaf16d2fc6db6472268fd962e09c2fd81e Mon Sep 17 00:00:00 2001
> From: Aldy Hernandez 
> Date: Mon, 7 Nov 2022 14:18:57 +0100
> Subject: [PATCH] Provide normalized and denormal format version of
>  real_isdenormal.
> 
> Implement a variant of real_isdenormal() to be used within real.cc
> where the argument is known to be in denormal format.  Rewrite
> real_isdenormal() for use outside of real.cc where the argument is
> known to be normalized.
> 
> gcc/ChangeLog:
> 
>   * real.cc (real_isdenormal): New.
>   (encode_ieee_single): Call real_isdenormal.
>   (encode_ieee_double): Same.
>   (encode_ieee_extended): Same.
>   (encode_ieee_quad): Same.
>   (encode_ieee_half): Same.
>   (encode_arm_bfloat_half): Same.
>   * real.h (real_isdenormal): Add mode argument.  Rewrite for
>   normalized values.
>   * value-range.cc (frange::flush_denormals_to_zero): Pass mode to
>   real_isdenormal.

LGTM, thanks.

Jakub



Re: [PATCH] [PR24021] Implement PLUS_EXPR range-op entry for floats.

2022-11-08 Thread Aldy Hernandez via Gcc-patches
On Tue, Nov 8, 2022 at 2:50 PM Jakub Jelinek  wrote:
>
> On Tue, Nov 08, 2022 at 02:47:35PM +0100, Aldy Hernandez wrote:
> > Well, perhaps we should just nuke update_nan_sign() altogether, and
> > always keep the sign varying?
> >
> > inline bool
> > propagate_nans (frange &r, const frange &op1, const frange &op2)
> > {
> >   if (op1.known_isnan () || op2.known_isnan ())
> > {
> >   r.set_nan (op1.type ());
> >   return true;
> > }
> >   return false;
> > }
> >
> > I'm fine either way.  The less code the better :).
>
> Yes, but you had 2 callers, so something needs to be done also if
> in foperator_plus::fold_range.

We can also remove the update_nan_sign() in the other call because the
r.set() before it sets a default NAN (with a varying sign).

Attached patch in testing.

Aldy
From 8da4fc39cc73f0ae785463bd5f371223fa59027e Mon Sep 17 00:00:00 2001
From: Aldy Hernandez 
Date: Thu, 13 Oct 2022 08:14:16 +0200
Subject: [PATCH] [PR24021] Implement PLUS_EXPR range-op entry for floats.

This is the range-op entry for floating point PLUS_EXPR.  It's the
most intricate range entry we have so far, because we need to keep
track of rounding and target FP formats.  This will be the last FP
entry I commit, mostly to avoid disturbing the tree any further, and
also because what we have so far is enough for a solid VRP.

So far we track NANs and signs correctly.  We also handle relationals
(symbolics and numeric), both ordered and unordered, ABS_EXPR and
NEGATE_EXPR which are used to fold __builtin_isinf, and __builtin_sign
(__builtin_copysign is coming up).  All in all, I think this provide
more than enough for basic VRP on floats, as well as provide a basis
to flesh out the rest if there's interest.

My goal with this entry is to provide a template for additional binary
operators, as they tend to follow a similar pattern: handle NANs, do
the arithmetic while keeping track of rounding, and adjust for NAN.  I
may abstract the general parts as we do for irange's fold_range and
wi_fold.

	PR tree-optimization/24021

gcc/ChangeLog:

	* range-op-float.cc (propagate_nans): New.
	(frange_nextafter): New.
	(frange_arithmetic): New.
	(class foperator_plus): New.
	(floating_op_table::floating_op_table): Add PLUS_EXPR entry.

gcc/testsuite/ChangeLog:

	* gcc.dg/tree-ssa/vrp-float-plus.c: New test.
---
 gcc/range-op-float.cc | 128 ++
 .../gcc.dg/tree-ssa/vrp-float-plus.c  |  21 +++
 2 files changed, 149 insertions(+)
 create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/vrp-float-plus.c

diff --git a/gcc/range-op-float.cc b/gcc/range-op-float.cc
index a1f372997bf..502b67aaa66 100644
--- a/gcc/range-op-float.cc
+++ b/gcc/range-op-float.cc
@@ -192,6 +192,81 @@ frelop_early_resolve (irange &r, tree type,
 	  && relop_early_resolve (r, type, op1, op2, rel, my_rel));
 }
 
+// If either operand is a NAN, set R to the combination of both NANs
+// signwise and return TRUE.
+
+inline bool
+propagate_nans (frange &r, const frange &op1, const frange &op2)
+{
+  if (op1.known_isnan () || op2.known_isnan ())
+{
+  r.set_nan (op1.type ());
+  return true;
+}
+  return false;
+}
+
+// Set VALUE to its next real value, or INF if the operation overflows.
+
+inline void
+frange_nextafter (enum machine_mode mode,
+		  REAL_VALUE_TYPE &value,
+		  const REAL_VALUE_TYPE &inf)
+{
+  const real_format *fmt = REAL_MODE_FORMAT (mode);
+  REAL_VALUE_TYPE tmp;
+  real_nextafter (&tmp, fmt, &value, &inf);
+  value = tmp;
+}
+
+// Like real_arithmetic, but round the result to INF if the operation
+// produced inexact results.
+//
+// ?? There is still one problematic case, i387.  With
+// -fexcess-precision=standard we perform most SF/DFmode arithmetic in
+// XFmode (long_double_type_node), so that case is OK.  But without
+// -mfpmath=sse, all the SF/DFmode computations are in XFmode
+// precision (64-bit mantissa) and only occassionally rounded to
+// SF/DFmode (when storing into memory from the 387 stack).  Maybe
+// this is ok as well though it is just occassionally more precise. ??
+
+static void
+frange_arithmetic (enum tree_code code, tree type,
+		   REAL_VALUE_TYPE &result,
+		   const REAL_VALUE_TYPE &op1,
+		   const REAL_VALUE_TYPE &op2,
+		   const REAL_VALUE_TYPE &inf)
+{
+  REAL_VALUE_TYPE value;
+  enum machine_mode mode = TYPE_MODE (type);
+  bool mode_composite = MODE_COMPOSITE_P (mode);
+
+  bool inexact = real_arithmetic (&value, code, &op1, &op2);
+  real_convert (&result, mode, &value);
+
+  // Be extra careful if there may be discrepancies between the
+  // compile and runtime results.
+  if ((mode_composite || (real_isneg (&inf) ? real_less (&result, &value)
+			  : !real_less (&value, &result)))
+  && (inexact || !real_identical (&result, &value)))
+{
+  if (mode_composite)
+	{
+	  if (real_isdenormal (&result, mode)
+	  || real_iszero (&result))
+	{
+	  // IBM extended denormals only have DFmode precision.
+	  REAL_VALUE_TYPE tmp;
+	

Re: [PATCH] [PR24021] Implement PLUS_EXPR range-op entry for floats.

2022-11-08 Thread Jakub Jelinek via Gcc-patches
On Tue, Nov 08, 2022 at 03:06:53PM +0100, Aldy Hernandez wrote:
> +// If either operand is a NAN, set R to the combination of both NANs
> +// signwise and return TRUE.

This comment doesn't describe what it does now.
If either operand is a NAN, set R to NAN with unspecified sign bit and return
TRUE.
?

Other than this LGTM.

Jakub



Re: [PATCH] [PR24021] Implement PLUS_EXPR range-op entry for floats.

2022-11-08 Thread Aldy Hernandez via Gcc-patches
On Tue, Nov 8, 2022 at 3:11 PM Jakub Jelinek  wrote:
>
> On Tue, Nov 08, 2022 at 03:06:53PM +0100, Aldy Hernandez wrote:
> > +// If either operand is a NAN, set R to the combination of both NANs
> > +// signwise and return TRUE.
>
> This comment doesn't describe what it does now.
> If either operand is a NAN, set R to NAN with unspecified sign bit and return
> TRUE.
> ?

OMG, I suck!

// If either operand is a NAN, set R to NAN and return TRUE.

Tests on-going :).

Aldy



Re: [PATCH] Use bit-CCP in range-ops.

2022-11-08 Thread Aldy Hernandez via Gcc-patches
Pushed.

I'd still love to hear feedback though ;-).

Aldy

On Sun, Nov 6, 2022 at 5:14 PM Aldy Hernandez  wrote:
>
> After Jakub and Richi's suggestion of using the same representation
> for tracking known bits as we do in CCP, I took a peek at the code and
> realized there's a plethora of bit-tracking code there that we could
> be sharing with range-ops.  For example, the multiplication
> optimizations are way better than what I had cobbled together.  For
> that matter, our maybe nonzero tracking as a whole has a lot of room
> for improvement.  Being the lazy ass that I am, I think we should just
> use one code base (CCP's).
>
> This patch provides a thin wrapper for converting the irange maybe
> nonzero bits to what CCP requires, and uses that to call into
> bit_value_binop().  I have so far converted the MULT_EXPR range-op
> entry to use it, as the DIV_EXPR entry we have gets a case CCP doesn't
> get so I'd like to contribute the enhancement to CCP before converting
> over.
>
> I'd like to use this approach with the dozen or so tree_code's that
> are handled in CCP, thus saving us from having to implement any of
> them :).
>
> Early next season I'd like to change irange's internal representation
> to a pair of value / mask, and start tracking all known bits.  This
> ties in nicely with our plan for tracking known set bits.
>
> Perhaps if the stars align, we could merge the bit twiddling in CCP
> into range-ops and have a central repository for it.  That is, once we
> make the switch to wide-ints, and assuming there are no performance
> issues.  Note that range-ops is our lowest level abstraction.
> i.e. it's just the math, there's no GORI or ranger, or even the
> concept of a symbolic or SSA.
>
> I'd love to hear comments and ideas, and if no one objects push this.
> Please let me know if I missed anything.
>
> Tested on x86-64 Linux.
>
> gcc/ChangeLog:
>
> * range-op.cc (irange_to_masked_value): New.
> (update_known_bitmask): New.
> (operator_mult::fold_range): Call update_known_bitmask.
> ---
>  gcc/range-op.cc | 63 +++--
>  1 file changed, 50 insertions(+), 13 deletions(-)
>
> diff --git a/gcc/range-op.cc b/gcc/range-op.cc
> index 25c004d8287..6d9914d8d12 100644
> --- a/gcc/range-op.cc
> +++ b/gcc/range-op.cc
> @@ -46,6 +46,54 @@ along with GCC; see the file COPYING3.  If not see
>  #include "wide-int.h"
>  #include "value-relation.h"
>  #include "range-op.h"
> +#include "tree-ssa-ccp.h"
> +
> +// Convert irange bitmasks into a VALUE MASK pair suitable for calling CCP.
> +
> +static void
> +irange_to_masked_value (const irange &r, widest_int &value, widest_int &mask)
> +{
> +  if (r.singleton_p ())
> +{
> +  mask = 0;
> +  value = widest_int::from (r.lower_bound (), TYPE_SIGN (r.type ()));
> +}
> +  else
> +{
> +  mask = widest_int::from (r.get_nonzero_bits (), TYPE_SIGN (r.type ()));
> +  value = 0;
> +}
> +}
> +
> +// Update the known bitmasks in R when applying the operation CODE to
> +// LH and RH.
> +
> +static void
> +update_known_bitmask (irange &r, tree_code code,
> + const irange &lh, const irange &rh)
> +{
> +  if (r.undefined_p ())
> +return;
> +
> +  widest_int value, mask, lh_mask, rh_mask, lh_value, rh_value;
> +  tree type = r.type ();
> +  signop sign = TYPE_SIGN (type);
> +  int prec = TYPE_PRECISION (type);
> +  signop lh_sign = TYPE_SIGN (lh.type ());
> +  signop rh_sign = TYPE_SIGN (rh.type ());
> +  int lh_prec = TYPE_PRECISION (lh.type ());
> +  int rh_prec = TYPE_PRECISION (rh.type ());
> +
> +  irange_to_masked_value (lh, lh_value, lh_mask);
> +  irange_to_masked_value (rh, rh_value, rh_mask);
> +  bit_value_binop (code, sign, prec, &value, &mask,
> +  lh_sign, lh_prec, lh_value, lh_mask,
> +  rh_sign, rh_prec, rh_value, rh_mask);
> +
> +  int_range<2> tmp (type);
> +  tmp.set_nonzero_bits (value | mask);
> +  r.intersect (tmp);
> +}
>
>  // Return the upper limit for a type.
>
> @@ -1774,21 +1822,10 @@ operator_mult::fold_range (irange &r, tree type,
>if (!cross_product_operator::fold_range (r, type, lh, rh, trio))
>  return false;
>
> -  if (lh.undefined_p ())
> +  if (lh.undefined_p () || rh.undefined_p ())
>  return true;
>
> -  tree t;
> -  if (rh.singleton_p (&t))
> -{
> -  wide_int w = wi::to_wide (t);
> -  int shift = wi::exact_log2 (w);
> -  if (shift != -1)
> -   {
> - wide_int nz = lh.get_nonzero_bits ();
> - nz = wi::lshift (nz, shift);
> - r.set_nonzero_bits (nz);
> -   }
> -}
> +  update_known_bitmask (r, MULT_EXPR, lh, rh);
>return true;
>  }
>
> --
> 2.38.1
>



Re: [PATCH] diagnostics: Allow FEs to keep customizations for middle end [PR101551, PR106274]

2022-11-08 Thread Richard Biener via Gcc-patches
On Thu, Nov 3, 2022 at 9:07 PM Lewis Hyatt  wrote:
>
> On Fri, Oct 28, 2022 at 10:28:21AM +0200, Richard Biener wrote:
> > Yes, the idea was also to free up memory but then that part never
> > really materialized - the idea was to always run free-lang-data, not
> > just when later outputting LTO bytecode.  The reason is probably
> > mainly the diagnostic regressions you observe.
> >
> > Maybe a better strathegy than your patch would be to work towards
> > that goal but reduce the number of "freeings", instead adjusting the
> > LTO streamer to properly ignore frontend specific bits where clearing
> > conflicts with the intent to preserve accurate diagnostics throughout
> > the compilation.
> >
> > If you see bits that when not freed would fix some of the observed
> > issues we can see to replicate the freeing in the LTO output machinery.
> >
> > Richard.
>
> Thanks again for the suggestions. I took a look and it seems pretty doable to
> just stop resetting all the diagnostics hooks in free-lang-data. Once that's
> done, the only problematic part that I have been able to identify is here in
> ipa-free-lang-data.c around line 674:
>
> 
>   /* We need to keep field decls associated with their trees. Otherwise tree
>  merging may merge some fields and keep others disjoint which in turn will
>  not do well with TREE_CHAIN pointers linking them.
>
>  Also do not drop containing types for virtual methods and tables because
>  these are needed by devirtualization.
>  C++ destructors are special because C++ frontends sometimes produces
>  virtual destructor as an alias of non-virtual destructor.  In
>  devirutalization code we always walk through aliases and we need
>  context to be preserved too.  See PR89335  */
>   if (TREE_CODE (decl) != FIELD_DECL
>   && ((TREE_CODE (decl) != VAR_DECL && TREE_CODE (decl) != FUNCTION_DECL)
>   || (!DECL_VIRTUAL_P (decl)
>   && (TREE_CODE (decl) != FUNCTION_DECL
>   || !DECL_CXX_DESTRUCTOR_P (decl)
> DECL_CONTEXT (decl) = fld_decl_context (DECL_CONTEXT (decl));
> 
>
> The C++ implementations of the decl_printable_name langhook and the diagnostic
> starter callback do not work as-is when the DECL_CONTEXT for class member
> functions disappears.  So I did have a patch that changes the C++
> implementations to work in this case, but attached here is a new one along the
> lines of what you suggested, rather changing the above part of free-lang-data
> so it doesn't activate as often. The patch is pretty complete (other than
> missing a commit message) and bootstrap + regtest all languages looks good
> with no regressions. I tried the same with BUILD_CONFIG=bootstrap-lto as well,
> and that also looked good when it eventually finished. I added testcases for
> several frontends to verify that the diagnostics still work with -flto. I am
> not sure what are the implications for LTO itself, of changing this part of
> the pass, so I would have to ask you to weigh in on that aspect please. 
> Thanks!

First of all sorry for the delay and thanks for trying.  The effect on LTO is an
increase in the amount of streamed IL since we follow the DECL_CONTEXT
edge when streaming the tree graph.  So my solution for this would be to
reflect the case you remove in free-lang-data in both
lto-streamer-out.cc:DFS::DFS_write_tree_body where we do

  if (TREE_CODE (expr) != TRANSLATION_UNIT_DECL
  && ! DECL_CONTEXT (expr))
DFS_follow_tree_edge ((*all_translation_units)[0]);
  else
DFS_follow_tree_edge (DECL_CONTEXT (expr));

and in tree-streamer-out.cc:write_ts_decl_minimal_tree_pointers which
does

  if (TREE_CODE (expr) != TRANSLATION_UNIT_DECL
  && ! DECL_CONTEXT (expr))
stream_write_tree_ref (ob, (*all_translation_units)[0]);
  else
stream_write_tree_ref (ob, DECL_CONTEXT (expr));

that possibly boils down to "just" doing

   tree ctx = DECL_CONTEXT (..);
   if (TREE_CODE (..) == VAR_DECL || TREE_CODE (..) == FUNCTION_DECL)
 ctx = fld_decl_context (ctx);

and using 'ctx' for DECL_CONTEXT in those two places (and exporting the
fld_decl_context function).

As said the idea for this is that we want to avoid streaming type trees when
not necessary.  When doing an LTO bootstrap with your patch you should
see (slightly) larger object files.

Richard.

>
> -Lewis


[PATCH] CCP: handle division by a power of 2 as a right shift.

2022-11-08 Thread Aldy Hernandez via Gcc-patches
We have some code in range-ops that sets better maybe nonzero bits for
TRUNC_DIV_EXPR by a power of 2 than CCP does, by just shifting the
mask.  I'd like to offload this functionality into the CCP mask
tracking code, which already does the right thing for right shifts.

The testcase for this change is gcc.dg/tree-ssa/vrp123.c and
gcc.dg/tree-ssa/pr107541.c.

Tested on x86-64 Linux.

OK?

gcc/ChangeLog:

* range-op.cc (operator_div::fold_range): Call
update_known_bitmask.
* tree-ssa-ccp.cc (bit_value_binop): Handle divisions by powers of
2 as a right shift.
---
 gcc/range-op.cc | 18 +-
 gcc/tree-ssa-ccp.cc | 12 
 2 files changed, 13 insertions(+), 17 deletions(-)

diff --git a/gcc/range-op.cc b/gcc/range-op.cc
index 846931ddcae..8ff5d5b4c78 100644
--- a/gcc/range-op.cc
+++ b/gcc/range-op.cc
@@ -1995,23 +1995,7 @@ operator_div::fold_range (irange &r, tree type,
   if (!cross_product_operator::fold_range (r, type, lh, rh, trio))
 return false;
 
-  if (lh.undefined_p ())
-return true;
-
-  tree t;
-  if (code == TRUNC_DIV_EXPR
-  && rh.singleton_p (&t)
-  && !wi::neg_p (lh.lower_bound ()))
-{
-  wide_int wi = wi::to_wide (t);
-  int shift = wi::exact_log2 (wi);
-  if (shift != -1)
-   {
- wide_int nz = lh.get_nonzero_bits ();
- nz = wi::rshift (nz, shift, TYPE_SIGN (type));
- r.set_nonzero_bits (nz);
-   }
-}
+  update_known_bitmask (r, code, lh, rh);
   return true;
 }
 
diff --git a/gcc/tree-ssa-ccp.cc b/gcc/tree-ssa-ccp.cc
index 3a4b6bc1118..2bcd90646f6 100644
--- a/gcc/tree-ssa-ccp.cc
+++ b/gcc/tree-ssa-ccp.cc
@@ -1934,6 +1934,18 @@ bit_value_binop (enum tree_code code, signop sgn, int 
width,
   {
widest_int r1max = r1val | r1mask;
widest_int r2max = r2val | r2mask;
+   if (r2mask == 0 && !wi::neg_p (r1max))
+ {
+   widest_int shift = wi::exact_log2 (r2val);
+   if (shift != -1)
+ {
+   // Handle division by a power of 2 as an rshift.
+   bit_value_binop (RSHIFT_EXPR, sgn, width, val, mask,
+r1type_sgn, r1type_precision, r1val, r1mask,
+r2type_sgn, r2type_precision, shift, r2mask);
+   return;
+ }
+ }
if (sgn == UNSIGNED
|| (!wi::neg_p (r1max) && !wi::neg_p (r2max)))
  {
-- 
2.38.1



[PATCH] amdgcn: Add builtins for vectorized native versions of abs, floorf and floor

2022-11-08 Thread Kwok Cheung Yeung

Hello

This patch adds three extra builtins for the vectorized forms of the 
abs, floorf and floor math functions, which are implemented by native 
GCN instructions. I have also added a test to check that they generate 
the expected assembler instructions.


Okay for trunk?

Thanks

KwokFrom 37f49b204d501327d0867b3e8a3f01b9445fb9bd Mon Sep 17 00:00:00 2001
From: Kwok Cheung Yeung 
Date: Tue, 8 Nov 2022 11:59:58 +
Subject: [PATCH] amdgcn: Add builtins for vectorized native versions of abs,
 floorf and floor

2022-11-08  Kwok Cheung Yeung  

gcc/
* config/gcn/gcn-builtins.def (FABSV, FLOORVF, FLOORV): New builtins.
* config/gcn/gcn.cc (gcn_expand_builtin_1): Expand GCN_BUILTIN_FABSV,
GCN_BUILTIN_FLOORVF and GCN_BUILTIN_FLOORV.

gcc/testsuite/
* gcc.target/gcn/math-builtins-1.c: New test.
---
 gcc/config/gcn/gcn-builtins.def   | 15 +
 gcc/config/gcn/gcn.cc | 33 +++
 .../gcc.target/gcn/math-builtins-1.c  | 33 +++
 3 files changed, 81 insertions(+)
 create mode 100644 gcc/testsuite/gcc.target/gcn/math-builtins-1.c

diff --git a/gcc/config/gcn/gcn-builtins.def b/gcc/config/gcn/gcn-builtins.def
index 27691909925..c50777bd3b0 100644
--- a/gcc/config/gcn/gcn-builtins.def
+++ b/gcc/config/gcn/gcn-builtins.def
@@ -64,6 +64,21 @@ DEF_BUILTIN (FABSVF, 3 /*CODE_FOR_fabsvf */,
 _A2 (GCN_BTI_V64SF, GCN_BTI_V64SF),
 gcn_expand_builtin_1)
 
+DEF_BUILTIN (FABSV, 3 /*CODE_FOR_fabsv */,
+"fabsv", B_INSN,
+_A2 (GCN_BTI_V64DF, GCN_BTI_V64DF),
+gcn_expand_builtin_1)
+
+DEF_BUILTIN (FLOORVF, 3 /*CODE_FOR_floorvf */,
+"floorvf", B_INSN,
+_A2 (GCN_BTI_V64SF, GCN_BTI_V64SF),
+gcn_expand_builtin_1)
+
+DEF_BUILTIN (FLOORV, 3 /*CODE_FOR_floorv */,
+"floorv", B_INSN,
+_A2 (GCN_BTI_V64DF, GCN_BTI_V64DF),
+gcn_expand_builtin_1)
+
 DEF_BUILTIN (LDEXPVF, 3 /*CODE_FOR_ldexpvf */,
 "ldexpvf", B_INSN,
 _A3 (GCN_BTI_V64SF, GCN_BTI_V64SF, GCN_BTI_V64SI),
diff --git a/gcc/config/gcn/gcn.cc b/gcc/config/gcn/gcn.cc
index 1996115a686..9c5e3419748 100644
--- a/gcc/config/gcn/gcn.cc
+++ b/gcc/config/gcn/gcn.cc
@@ -4329,6 +4329,39 @@ gcn_expand_builtin_1 (tree exp, rtx target, rtx 
/*subtarget */ ,
emit_insn (gen_absv64sf2 (target, arg));
return target;
   }
+case GCN_BUILTIN_FABSV:
+  {
+   if (ignore)
+ return target;
+   rtx arg = force_reg (V64DFmode,
+expand_expr (CALL_EXPR_ARG (exp, 0), NULL_RTX,
+ V64DFmode,
+ EXPAND_NORMAL));
+   emit_insn (gen_absv64df2 (target, arg));
+   return target;
+  }
+case GCN_BUILTIN_FLOORVF:
+  {
+   if (ignore)
+ return target;
+   rtx arg = force_reg (V64SFmode,
+expand_expr (CALL_EXPR_ARG (exp, 0), NULL_RTX,
+ V64SFmode,
+ EXPAND_NORMAL));
+   emit_insn (gen_floorv64sf2 (target, arg));
+   return target;
+  }
+case GCN_BUILTIN_FLOORV:
+  {
+   if (ignore)
+ return target;
+   rtx arg = force_reg (V64DFmode,
+expand_expr (CALL_EXPR_ARG (exp, 0), NULL_RTX,
+ V64DFmode,
+ EXPAND_NORMAL));
+   emit_insn (gen_floorv64df2 (target, arg));
+   return target;
+  }
 case GCN_BUILTIN_LDEXPVF:
   {
if (ignore)
diff --git a/gcc/testsuite/gcc.target/gcn/math-builtins-1.c 
b/gcc/testsuite/gcc.target/gcn/math-builtins-1.c
new file mode 100644
index 000..e1aadfb40d9
--- /dev/null
+++ b/gcc/testsuite/gcc.target/gcn/math-builtins-1.c
@@ -0,0 +1,33 @@
+/* { dg-do compile } */
+/* { dg-options "-O1" } */
+
+typedef float v64sf __attribute__ ((vector_size (256)));
+typedef double v64df __attribute__ ((vector_size (512)));
+typedef int v64si __attribute__ ((vector_size (256)));
+typedef long v64di __attribute__ ((vector_size (512)));
+
+v64sf f (v64sf _x, v64si _y)
+{
+  v64sf x = _x;
+  v64si y = _y;
+  x = __builtin_gcn_fabsvf (x); /* { dg-final { scan-assembler 
"v_add_f32\\s+v\[0-9\]+, 0, |v\[0-9\]+|" } } */
+  x = __builtin_gcn_floorvf (x); /* { dg-final { scan-assembler 
"v_floor_f32\\s+v\[0-9\]+, v\[0-9\]+" } }*/
+  x = __builtin_gcn_frexpvf_mant (x); /* { dg-final { scan-assembler 
"v_frexp_mant_f32\\s+v\[0-9\]+, v\[0-9\]+" } }*/
+  y = __builtin_gcn_frexpvf_exp (x); /* { dg-final { scan-assembler 
"v_frexp_exp_i32_f32\\s+v\[0-9\]+, v\[0-9\]+" } }*/
+  x = __builtin_gcn_ldexpvf (x, y); /* { dg-final { scan-assembler 
"v_ldexp_f32\\s+v\[0-9\]+, v\[0-9\]+, v\[0-9\]+" } }*/
+
+  return x;
+}
+
+v64df g (v64df _x, v64si _y)
+{
+  v64df x = _x;
+  v64si y = _y;
+  x = __builtin_g

Re: [PATCH] Optimize VEC_PERM_EXPR with same permutation index and operation [PR98167]

2022-11-08 Thread Richard Biener via Gcc-patches
On Fri, Nov 4, 2022 at 7:44 AM Prathamesh Kulkarni via Gcc-patches
 wrote:
>
> On Fri, 4 Nov 2022 at 05:36, Hongyu Wang via Gcc-patches
>  wrote:
> >
> > Hi,
> >
> > This is a follow-up patch for PR98167
> >
> > The sequence
> >  c1 = VEC_PERM_EXPR (a, a, mask)
> >  c2 = VEC_PERM_EXPR (b, b, mask)
> >  c3 = c1 op c2
> > can be optimized to
> >  c = a op b
> >  c3 = VEC_PERM_EXPR (c, c, mask)
> > for all integer vector operation, and float operation with
> > full permutation.
> >
> > Bootstrapped & regrtested on x86_64-pc-linux-gnu.
> >
> > Ok for trunk?
> >
> > gcc/ChangeLog:
> >
> > PR target/98167
> > * match.pd: New perm + vector op patterns for int and fp vector.
> >
> > gcc/testsuite/ChangeLog:
> >
> > PR target/98167
> > * gcc.target/i386/pr98167.c: New test.
> > ---
> >  gcc/match.pd| 49 +
> >  gcc/testsuite/gcc.target/i386/pr98167.c | 44 ++
> >  2 files changed, 93 insertions(+)
> >  create mode 100644 gcc/testsuite/gcc.target/i386/pr98167.c
> >
> > diff --git a/gcc/match.pd b/gcc/match.pd
> > index 194ba8f5188..b85ad34f609 100644
> > --- a/gcc/match.pd
> > +++ b/gcc/match.pd
> > @@ -8189,3 +8189,52 @@ and,
> >   (bit_and (negate @0) integer_onep@1)
> >   (if (!TYPE_OVERFLOW_SANITIZED (type))
> >(bit_and @0 @1)))
> > +
> > +/* Optimize
> > +   c1 = VEC_PERM_EXPR (a, a, mask)
> > +   c2 = VEC_PERM_EXPR (b, b, mask)
> > +   c3 = c1 op c2
> > +   -->
> > +   c = a op b
> > +   c3 = VEC_PERM_EXPR (c, c, mask)
> > +   For all integer non-div operations.  */
> > +(for op (plus minus mult bit_and bit_ior bit_xor
> > +lshift rshift)
> > + (simplify
> > +  (op (vec_perm @0 @0 VECTOR_CST@2) (vec_perm @1 @1 VECTOR_CST@2))
> > +(if (VECTOR_INTEGER_TYPE_P (type))
> > + (vec_perm (op @0 @1) (op @0 @1) @2
> Just wondering, why should mask be CST here ?
> I guess the transform should work as long as mask is same for both
> vectors even if it's
> not constant ?

Yes, please change accordingly (and maybe push separately).

> > +
> > +/* Similar for float arithmetic when permutation constant covers
> > +   all vector elements.  */
> > +(for op (plus minus mult)
> > + (simplify
> > +  (op (vec_perm @0 @0 VECTOR_CST@2) (vec_perm @1 @1 VECTOR_CST@2))
> > +(if (VECTOR_FLOAT_TYPE_P (type))
> > + (with
> > +  {
> > +   tree perm_cst = @2;
> > +   vec_perm_builder builder;
> > +   bool full_perm_p = false;
> > +   if (tree_to_vec_perm_builder (&builder, perm_cst))
> > + {
> > +   /* Create a vec_perm_indices for the integer vector.  */
> > +   int nelts = TYPE_VECTOR_SUBPARTS (type).to_constant ();
> If this transform is meant only for VLS vectors, I guess you should
> bail out if TYPE_VECTOR_SUBPARTS is not constant,
> otherwise it will crash for VLA vectors.

I suppose it's difficult to create a VLA permute that covers all elements
and that is not trivial though.  But indeed add ().is_constant to the
VECTOR_FLOAT_TYPE_P guard.

>
> Thanks,
> Prathamesh
> > +   vec_perm_indices sel (builder, 1, nelts);
> > +
> > +   /* Check if perm indices covers all vector elements.  */
> > +   int count = 0, i, j;
> > +   for (i = 0; i < nelts; i++)
> > + for (j = 0; j < nelts; j++)

Meh, that's quadratic!  I suggest to check .encoding ().encoded_full_vector_p ()
(as said I can't think of a non-full encoding that isn't trivial
but covers all elements) and then simply .qsort () the vector_builder
(it derives
from vec<>) so the scan is O(n log n).

Maybe Richard has a better idea here though.

Otherwise looks OK, though with these kind of (* (op ..) (op ..)) patterns it's
always that they explode the match decision tree, we'd ideally have a way to
match those with (op ..) (op ..) first to be able to share more of the matching
code.  That said, match.pd is a less than ideal place for these (but mostly
because of the way we code generate *-match.cc)

Richard.

> > +   {
> > + if (sel[j].to_constant () == i)
> > +   {
> > + count++;
> > + break;
> > +   }
> > +   }
> > +   full_perm_p = count == nelts;
> > + }
> > +   }
> > +   (if (full_perm_p)
> > +   (vec_perm (op @0 @1) (op @0 @1) @2))
> > diff --git a/gcc/testsuite/gcc.target/i386/pr98167.c 
> > b/gcc/testsuite/gcc.target/i386/pr98167.c
> > new file mode 100644
> > index 000..40e0ac11332
> > --- /dev/null
> > +++ b/gcc/testsuite/gcc.target/i386/pr98167.c
> > @@ -0,0 +1,44 @@
> > +/* PR target/98167 */
> > +/* { dg-do compile } */
> > +/* { dg-options "-O2 -mavx2" } */
> > +
> > +/* { dg-final { scan-assembler-times "vpshufd\t" 8 } } */
> > +/* { dg-final { scan-assembler-times "vpermilps\t" 3 } } */
> > +
> > +#define VEC_PERM_4 \
> > +  2, 3, 1, 0
> > +#define VEC_PERM_8 \
> > +  4, 5, 6, 7, 3, 2,

[RFC PATCH] c++: Minimal handling of carries_dependency attribute

2022-11-08 Thread Jakub Jelinek via Gcc-patches
Hi!

A comment in D2552R1:
"The only questionable (but still conforming) case we found was
[[carries_dependency(some_argument)]] on GCC, where the emitted diagnostic said 
that the
carries_dependency attribute is not supported, but did not specifically call 
out the syntax error
in the argument clause."
made me try the following patch, where we'll error at least
for arguments to the attribute and for some uses of the attribute
appertaining to something not mentioned in the standard warn
with different diagnostics (or should that be an error?; clang++
does that, but I think we never do for any attribute, standard or not).
The diagnostics on toplevel attribute declaration is still an
attribute ignored warning and on empty statement different wording.

The paper additionally mentions
struct X { [[nodiscard]]; }; // no diagnostic on GCC
and 2 cases of missing diagnostics on [[fallthrough]] (guess I should
file a PR about those; one problem is that do { ... } while (0); there
is replaced during genericization just by ... and another that
[[fallthrough]] there is followed by a label, but not user/case/default
label, but an artificial one created from while loop genericization.

Thoughts on this?

2022-11-08  Jakub Jelinek  

* tree.cc (handle_carries_dependency_attribute): New function.
(std_attribute_table): Add carries_dependency attribute.
* parser.cc (cp_parser_check_std_attribute): Add carries_dependency
attribute.

* g++.dg/cpp0x/attr-carries_dependency1.C: New test.

--- gcc/cp/tree.cc.jj   2022-11-07 10:30:42.758629740 +0100
+++ gcc/cp/tree.cc  2022-11-08 14:45:08.853864684 +0100
@@ -4923,6 +4923,32 @@ structural_type_p (tree t, bool explain)
   return true;
 }
 
+/* Partially handle the C++11 [[carries_dependency]] attribute.
+   Just emit a different diagnostics when it is used on something the
+   spec doesn't allow vs. where it allows and we just choose to ignore
+   it.  */
+
+static tree
+handle_carries_dependency_attribute (tree *node, tree name,
+tree ARG_UNUSED (args),
+int ARG_UNUSED (flags),
+bool *no_add_attrs)
+{
+  if (TREE_CODE (*node) != FUNCTION_DECL
+  && TREE_CODE (*node) != PARM_DECL)
+{
+  warning (OPT_Wattributes, "%qE attribute can only be applied to "
+  "functions or parameters", name);
+  *no_add_attrs = true;
+}
+  else
+{
+  warning (OPT_Wattributes, "%qE attribute ignored", name);
+  *no_add_attrs = true;
+}
+  return NULL_TREE;
+}
+
 /* Handle the C++17 [[nodiscard]] attribute, which is similar to the GNU
warn_unused_result attribute.  */
 
@@ -5036,6 +5062,8 @@ const struct attribute_spec std_attribut
 handle_likeliness_attribute, attr_cold_hot_exclusions },
   { "noreturn", 0, 0, true, false, false, false,
 handle_noreturn_attribute, attr_noreturn_exclusions },
+  { "carries_dependency", 0, 0, true, false, false, false,
+handle_carries_dependency_attribute, NULL },
   { NULL, 0, 0, false, false, false, false, NULL, NULL }
 };
 
--- gcc/cp/parser.cc.jj 2022-11-04 18:11:41.523945997 +0100
+++ gcc/cp/parser.cc2022-11-08 13:41:35.075135139 +0100
@@ -29239,8 +29239,7 @@ cp_parser_std_attribute (cp_parser *pars
 
 /* Warn if the attribute ATTRIBUTE appears more than once in the
attribute-list ATTRIBUTES.  This used to be enforced for certain
-   attributes, but the restriction was removed in P2156.  Note that
-   carries_dependency ([dcl.attr.depend]) isn't implemented yet in GCC.
+   attributes, but the restriction was removed in P2156.
LOC is the location of ATTRIBUTE.  Returns true if ATTRIBUTE was not
found in ATTRIBUTES.  */
 
@@ -29249,7 +29248,7 @@ cp_parser_check_std_attribute (location_
 {
   static auto alist = { "noreturn", "deprecated", "nodiscard", "maybe_unused",
"likely", "unlikely", "fallthrough",
-   "no_unique_address" };
+   "no_unique_address", "carries_dependency" };
   if (attributes)
 for (const auto &a : alist)
   if (is_attribute_p (a, get_attribute_name (attribute))
--- gcc/testsuite/g++.dg/cpp0x/attr-carries_dependency1.C.jj2022-11-08 
15:17:43.168238390 +0100
+++ gcc/testsuite/g++.dg/cpp0x/attr-carries_dependency1.C   2022-11-08 
15:16:39.695104787 +0100
@@ -0,0 +1,17 @@
+// { dg-do compile { target c++11 } }
+
+[[carries_dependency]] int *f1 (); // { dg-warning "attribute 
ignored" }
+int f2 (int *x [[carries_dependency]]);// { dg-warning 
"attribute ignored" }
+[[carries_dependency]] int f3 ();  // { dg-warning "attribute 
ignored" }
+int f4 (int x [[carries_dependency]]); // { dg-warning "attribute 
ignored" }
+[[carries_dependency(1)]] int f5 ();   // { dg-error 
"'carries_dependency' attribute does not take any arguments" }
+[[carries_dependency]] int v;  // { dg-warnin

Re: [PATCH] amdgcn: Add builtins for vectorized native versions of abs, floorf and floor

2022-11-08 Thread Andrew Stubbs

On 08/11/2022 14:35, Kwok Cheung Yeung wrote:

Hello

This patch adds three extra builtins for the vectorized forms of the 
abs, floorf and floor math functions, which are implemented by native 
GCN instructions. I have also added a test to check that they generate 
the expected assembler instructions.


Okay for trunk?


OK.

Andrew


[PATCH 2/2] Add a new warning option -Wstrict-flex-arrays.

2022-11-08 Thread Qing Zhao via Gcc-patches
'-Wstrict-flex-arrays'
 Warn about inproper usages of flexible array members according to
 the LEVEL of the 'strict_flex_array (LEVEL)' attribute attached to
 the trailing array field of a structure if it's available,
 otherwise according to the LEVEL of the option
 '-fstrict-flex-arrays=LEVEL'.

 This option is effective only when LEVEL is bigger than 0.
 Otherwise, it will be ignored with a warning.

 when LEVEL=1, warnings will be issued for a trailing array
 reference of a structure that have 2 or more elements if the
 trailing array is referenced as a flexible array member.

 when LEVEL=2, in addition to LEVEL=1, additional warnings will be
 issued for a trailing one-element array reference of a structure if
 the array is referenced as a flexible array member.

 when LEVEL=3, in addition to LEVEL=2, additional warnings will be
 issued for a trailing zero-length array reference of a structure if
 the array is referenced as a flexible array member.

At the same time, keep -Warray-bounds=[1|2] warnings unchanged from
 -fstrict-flex-arrays.

gcc/ChangeLog:

* attribs.cc (strict_flex_array_level_of): New function.
* attribs.h (strict_flex_array_level_of): Prototype for new function.
* doc/invoke.texi: Document -Wstrict-flex-arrays option. Update
-fstrict-flex-arrays[=n] options.
* gimple-array-bounds.cc (array_bounds_checker::check_array_ref):
Issue warnings for -Wstrict-flex-arrays.
(get_up_bounds_for_array_ref): New function.
(check_out_of_bounds_and_warn): New function.
* opts.cc (finish_options): Issue warnings for unsupported combination
of -Warray-bounds and -fstrict-flex-arrays, -Wstrict_flex_arrays and
-fstrict-flex-array.
* tree-vrp.cc (execute_vrp): Enable the pass when
warn_strict_flex_array is true.
(execute_ranger_vrp): Likewise.
* tree.cc (array_ref_flexible_size_p): Add one new argument.
(component_ref_sam_type): New function.
(component_ref_size): Add one new argument,
* tree.h (array_ref_flexible_size_p): Update prototype.
(enum struct special_array_member): Add two new enum values.
(component_ref_sam_type): New prototype.
(component_ref_size): Update prototype.

gcc/c-family/ChangeLog:

* c.opt (Wstrict-flex-arrays): New option.

gcc/c/ChangeLog:

* c-decl.cc (is_flexible_array_member_p): Call new function
strict_flex_array_level_of.

gcc/testsuite/ChangeLog:

* c-c++-common/Wstrict-flex-arrays.c: New test.
* c-c++-common/Wstrict-flex-arrays_2.c: New test.
* gcc.dg/Wstrict-flex-arrays-2.c: New test.
* gcc.dg/Wstrict-flex-arrays-3.c: New test.
* gcc.dg/Wstrict-flex-arrays-4.c: New test.
* gcc.dg/Wstrict-flex-arrays-5.c: New test.
* gcc.dg/Wstrict-flex-arrays-6.c: New test.
* gcc.dg/Wstrict-flex-arrays-7.c: New test.
* gcc.dg/Wstrict-flex-arrays-8.c: New test.
* gcc.dg/Wstrict-flex-arrays-9.c: New test.
* gcc.dg/Wstrict-flex-arrays.c: New test.
---
 gcc/attribs.cc|  30 ++
 gcc/attribs.h |   2 +
 gcc/c-family/c.opt|   5 +
 gcc/c/c-decl.cc   |  22 +-
 gcc/doc/invoke.texi   |  33 ++-
 gcc/gimple-array-bounds.cc| 264 +-
 gcc/opts.cc   |  15 +
 .../c-c++-common/Wstrict-flex-arrays.c|   9 +
 .../c-c++-common/Wstrict-flex-arrays_2.c  |   9 +
 gcc/testsuite/gcc.dg/Wstrict-flex-arrays-2.c  |  46 +++
 gcc/testsuite/gcc.dg/Wstrict-flex-arrays-3.c  |  46 +++
 gcc/testsuite/gcc.dg/Wstrict-flex-arrays-4.c  |  49 
 gcc/testsuite/gcc.dg/Wstrict-flex-arrays-5.c  |  48 
 gcc/testsuite/gcc.dg/Wstrict-flex-arrays-6.c  |  48 
 gcc/testsuite/gcc.dg/Wstrict-flex-arrays-7.c  |  50 
 gcc/testsuite/gcc.dg/Wstrict-flex-arrays-8.c  |  49 
 gcc/testsuite/gcc.dg/Wstrict-flex-arrays-9.c  |  49 
 gcc/testsuite/gcc.dg/Wstrict-flex-arrays.c|  46 +++
 gcc/tree-vrp.cc   |   6 +-
 gcc/tree.cc   | 165 ---
 gcc/tree.h|  15 +-
 21 files changed, 870 insertions(+), 136 deletions(-)
 create mode 100644 gcc/testsuite/c-c++-common/Wstrict-flex-arrays.c
 create mode 100644 gcc/testsuite/c-c++-common/Wstrict-flex-arrays_2.c
 create mode 100644 gcc/testsuite/gcc.dg/Wstrict-flex-arrays-2.c
 create mode 100644 gcc/testsuite/gcc.dg/Wstrict-flex-arrays-3.c
 create mode 100644 gcc/testsuite/gcc.dg/Wstrict-flex-arrays-4.c
 create mode 100644 gcc/testsuite/gcc.dg/Wstrict-flex-arrays-5.c
 create mode 100644 gcc/testsuite/gcc.dg/Wstrict-flex-arrays-6.c
 create mode 100644 gcc/testsuite/gcc.dg/Wstrict-flex-arrays-7.c
 create mode 100644 gcc/testsuite/gcc.dg/Ws

[PATCH 0/2] Add a new warning option -Wstrict-flex-array

2022-11-08 Thread Qing Zhao via Gcc-patches


This patch serie include two changes:
  1. Change the name of array_at_struct_end_p to array_ref_flexible_size_p.
  2. Add a new warning option -Wstrict-flex-arrays and at the same time
keep -Warray-bounds unchanged from -fstrict-flex-arrays.

The new warning -Wstrict-flex-arrays is implemented at the same place as 
-Warray-bounds. Since we need to keep the old behaviors of
-Warray-bounds=[1|2], we refactor the routine
"array_bounds_checker::check_array_ref" to make it work for both
-Warray-bounds and -Wstrict-flex-arrays.  

if -Warray-bounds, -Wstrict-flex-arrays, -fstrict-flex-arrays presents
at the same time:
  A. -Warray-bounds will be not controlled by -fstrict-flex-arrays;
  B. -Wstrict-flex-arrays will be controled by -fstrict-flex-arrays;
  C. both -Warray-bounds and -Wstrict-flex-arrays will report warnings
 according to it's own rules.

bootstrapped and regression tested on both x86 and aarch64. no issue.
Okay for commit?

thanks.

Qing




Re: [PATCH 9/15] arm: Set again stack pointer as CFA reg when popping if necessary

2022-11-08 Thread Richard Earnshaw via Gcc-patches




On 26/10/2022 09:49, Andrea Corallo via Gcc-patches wrote:

Richard Earnshaw  writes:


On 27/09/2022 16:24, Kyrylo Tkachov via Gcc-patches wrote:



-Original Message-
From: Andrea Corallo 
Sent: Tuesday, September 27, 2022 11:06 AM
To: Kyrylo Tkachov 
Cc: Andrea Corallo via Gcc-patches ; Richard
Earnshaw ; nd 
Subject: Re: [PATCH 9/15] arm: Set again stack pointer as CFA reg when
popping if necessary

Kyrylo Tkachov  writes:


Hi Andrea,


-Original Message-
From: Gcc-patches  On Behalf Of Andrea
Corallo via Gcc-patches
Sent: Friday, August 12, 2022 4:34 PM
To: Andrea Corallo via Gcc-patches 
Cc: Richard Earnshaw ; nd 
Subject: [PATCH 9/15] arm: Set again stack pointer as CFA reg when

popping

if necessary

Hi all,

this patch enables 'arm_emit_multi_reg_pop' to set again the stack
pointer as CFA reg when popping if this is necessary.



  From what I can tell from similar functions this is correct, but could you

elaborate on why this change is needed for my understanding please?

Thanks,
Kyrill


Hi Kyrill,

sure, if the frame pointer was set, than it is the current CFA register.
If we request to adjust the current CFA register offset indicating it
being SP (while it's actually FP) that is indeed not correct and the
incoherence we will be detected by an assertion in the dwarf emission
machinery.

Thanks,  the patch is ok
Kyrill



Best Regards

Andrea


Hmm, wait.  Why would a multi-reg pop be updating the stack pointer?


Hi Richard,

not sure I understand, isn't any pop updating SP by definition?


Yes, but the SP must already be the CFA before this instruction, since 
SP must be the base of the pop. So the reg note changing the CFA to SP 
can't be right.  I'm thinking there must be some earlier restore of SP 
that's missing a frame-related note.


R.



BR

   Andrea


Re: [PATCH] [PHIOPT] Add A ? B + CST : B match and simplify optimizations

2022-11-08 Thread Richard Biener via Gcc-patches
On Sat, Nov 5, 2022 at 10:03 AM Zhongyunde via Gcc-patches
 wrote:
>
>
> > -Original Message-
> > From: Andrew Pinski [mailto:pins...@gcc.gnu.org]
> > Sent: Saturday, November 5, 2022 2:34 PM
> > To: Zhongyunde 
> > Cc: hongtao@intel.com; gcc-patches@gcc.gnu.org; Zhangwen(Esan)
> > ; Weiwei (weiwei, Compiler)
> > ; zhong_1985...@163.com
> > Subject: Re: [PATCH] [PHIOPT] Add A ? B + CST : B match and simplify
> > optimizations
> >
> > On Fri, Nov 4, 2022 at 11:17 PM Zhongyunde 
> > wrote:
> > >
> > > hi,
> > >   This patch is try to fix the issue
> > > https://gcc.gnu.org/bugzilla/show_bug.cgi?id=107190,
> > > would you like to give me some suggestion, thanks.
> >
> > This seems like a "simplified" version of
> > https://gcc.gnu.org/pipermail/gcc-patches/2021-November/584411.html
> > which just handles power of 2 constants where we know the cond will be
> > removed.
> > We could do even more "simplified" of 1 if needed really.
> > What is the IR before PHI-OPT? Is it just + 1?
>
> Thanks for your attention. It is + 4294967296 before PHI-OPT  (See detail 
> https://gcc.godbolt.org/z/6zEc6ja1z)
> So we should keep matching the power of 2 constants ?
>
> > Also your pattern can be simplified to use integer_pow2p in the match part
> > instead of INTEGER_CST.
> >
> Apply your comment, thanks

How does the patch fix the mentioned bug?  match.pd patterns should make things
"simpler" but x + (a << C') isn't simpler than a ? x + C : x.  It
looks you are targeting
PHI-OPT here, so maybe instead extend value_replacement to handle this case,
it does look similar to the case with neutral/absorbing element there?

Richard.

>
> > Thanks,
> > Andrew
>
>


Re: [PATCH] CCP: handle division by a power of 2 as a right shift.

2022-11-08 Thread Richard Biener via Gcc-patches
On Tue, Nov 8, 2022 at 3:25 PM Aldy Hernandez  wrote:
>
> We have some code in range-ops that sets better maybe nonzero bits for
> TRUNC_DIV_EXPR by a power of 2 than CCP does, by just shifting the
> mask.  I'd like to offload this functionality into the CCP mask
> tracking code, which already does the right thing for right shifts.
>
> The testcase for this change is gcc.dg/tree-ssa/vrp123.c and
> gcc.dg/tree-ssa/pr107541.c.
>
> Tested on x86-64 Linux.
>
> OK?

LGTM

> gcc/ChangeLog:
>
> * range-op.cc (operator_div::fold_range): Call
> update_known_bitmask.
> * tree-ssa-ccp.cc (bit_value_binop): Handle divisions by powers of
> 2 as a right shift.
> ---
>  gcc/range-op.cc | 18 +-
>  gcc/tree-ssa-ccp.cc | 12 
>  2 files changed, 13 insertions(+), 17 deletions(-)
>
> diff --git a/gcc/range-op.cc b/gcc/range-op.cc
> index 846931ddcae..8ff5d5b4c78 100644
> --- a/gcc/range-op.cc
> +++ b/gcc/range-op.cc
> @@ -1995,23 +1995,7 @@ operator_div::fold_range (irange &r, tree type,
>if (!cross_product_operator::fold_range (r, type, lh, rh, trio))
>  return false;
>
> -  if (lh.undefined_p ())
> -return true;
> -
> -  tree t;
> -  if (code == TRUNC_DIV_EXPR
> -  && rh.singleton_p (&t)
> -  && !wi::neg_p (lh.lower_bound ()))
> -{
> -  wide_int wi = wi::to_wide (t);
> -  int shift = wi::exact_log2 (wi);
> -  if (shift != -1)
> -   {
> - wide_int nz = lh.get_nonzero_bits ();
> - nz = wi::rshift (nz, shift, TYPE_SIGN (type));
> - r.set_nonzero_bits (nz);
> -   }
> -}
> +  update_known_bitmask (r, code, lh, rh);
>return true;
>  }
>
> diff --git a/gcc/tree-ssa-ccp.cc b/gcc/tree-ssa-ccp.cc
> index 3a4b6bc1118..2bcd90646f6 100644
> --- a/gcc/tree-ssa-ccp.cc
> +++ b/gcc/tree-ssa-ccp.cc
> @@ -1934,6 +1934,18 @@ bit_value_binop (enum tree_code code, signop sgn, int 
> width,
>{
> widest_int r1max = r1val | r1mask;
> widest_int r2max = r2val | r2mask;
> +   if (r2mask == 0 && !wi::neg_p (r1max))
> + {
> +   widest_int shift = wi::exact_log2 (r2val);
> +   if (shift != -1)
> + {
> +   // Handle division by a power of 2 as an rshift.
> +   bit_value_binop (RSHIFT_EXPR, sgn, width, val, mask,
> +r1type_sgn, r1type_precision, r1val, r1mask,
> +r2type_sgn, r2type_precision, shift, r2mask);
> +   return;
> + }
> + }
> if (sgn == UNSIGNED
> || (!wi::neg_p (r1max) && !wi::neg_p (r2max)))
>   {
> --
> 2.38.1
>


[COMMITTED] amdgcn: Fix expansion of GCN_BUILTIN_LDEXPV builtin

2022-11-08 Thread Kwok Cheung Yeung

Hello

This patch fixes a bug in the expansion of GCN_BUILTIN_LDEXPV. As this 
is a double-precision operation, the first argument should be expanded 
as a V64DF expression (instead of V64SF).


Committed to trunk as obvious.

KwokFrom cb0a2b1f28cf0c231bf38fcd02c40689739df7bb Mon Sep 17 00:00:00 2001
From: Kwok Cheung Yeung 
Date: Tue, 8 Nov 2022 14:38:23 +
Subject: [PATCH] amdgcn: Fix expansion of GCN_BUILTIN_LDEXPV builtin

2022-11-08  Kwok Cheung Yeung  

gcc/
* config/gcn/gcn.cc (gcn_expand_builtin_1): Expand first argument
of GCN_BUILTIN_LDEXPV to V64DFmode.
---
 gcc/config/gcn/gcn.cc | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/gcc/config/gcn/gcn.cc b/gcc/config/gcn/gcn.cc
index 9c5e3419748..5e6f3b8b74b 100644
--- a/gcc/config/gcn/gcn.cc
+++ b/gcc/config/gcn/gcn.cc
@@ -4383,7 +4383,7 @@ gcn_expand_builtin_1 (tree exp, rtx target, rtx 
/*subtarget */ ,
  return target;
rtx arg1 = force_reg (V64DFmode,
  expand_expr (CALL_EXPR_ARG (exp, 0), NULL_RTX,
-  V64SFmode,
+  V64DFmode,
   EXPAND_NORMAL));
rtx arg2 = force_reg (V64SImode,
  expand_expr (CALL_EXPR_ARG (exp, 1), NULL_RTX,
-- 
2.25.1



Re: [OG12] [committed] amdgcn: Enable SIMD vectorization of math library functions

2022-11-08 Thread Kwok Cheung Yeung

Hello

These additional patches were pushed onto the devel/omp/gcc-12 branch to 
fix various issues with the SIMD math library:


ecf1603b7ad amdgcn: Fix expansion of GCN_BUILTIN_LDEXPV builtin
6c40e3f5daa amdgcn: Various fixes for SIMD math library
8e6c5b18e10 amdgcn: Fixed intermittent failure in vectorized version of rint

Kwok


[PATCH 1/2] Change the name of array_at_struct_end_p to array_ref_flexible_size_p

2022-11-08 Thread Qing Zhao via Gcc-patches
The name of the utility routine "array_at_struct_end_p" is misleading
and should be changed to a new name that more accurately reflects its
real meaning.

The routine "array_at_struct_end_p" is used to check whether an array
reference is to an array whose actual size might be larger than its
upper bound implies, which includes 3 different cases:

   A. a ref to a flexible array member at the end of a structure;
   B. a ref to an array with a different type against the original decl;
   C. a ref to an array that was passed as a parameter;

The old name only reflects the above case A, therefore very confusing
when reading the corresponding gcc source code.

In this patch, A new name "array_ref_flexible_size_p" is used to replace
the old name.

All the references to the routine "array_at_struct_end_p" was replaced
with this new name, and the corresponding comments were updated to make
them clean and consistent.

gcc/ChangeLog:

* gimple-array-bounds.cc (trailing_array): Replace
array_at_struct_end_p with new name and update comments.
* gimple-fold.cc (get_range_strlen_tree): Likewise.
* gimple-ssa-warn-restrict.cc (builtin_memref::builtin_memref):
Likewise.
* graphite-sese-to-poly.cc (bounds_are_valid): Likewise.
* tree-if-conv.cc (idx_within_array_bound): Likewise.
* tree-object-size.cc (addr_object_size): Likewise.
* tree-ssa-alias.cc (component_ref_to_zero_sized_trailing_array_p):
Likewise.
(stmt_kills_ref_p): Likewise.
* tree-ssa-loop-niter.cc (idx_infer_loop_bounds): Likewise.
* tree-ssa-strlen.cc (maybe_set_strlen_range): Likewise.
* tree.cc (array_at_struct_end_p): Rename to ...
(array_ref_flexible_size_p): ... this.
(component_ref_size): Replace array_at_struct_end_p with new name.
* tree.h (array_at_struct_end_p): Rename to ...
(array_ref_flexible_size_p): ... this.
---
 gcc/gimple-array-bounds.cc  |  4 ++--
 gcc/gimple-fold.cc  |  6 ++
 gcc/gimple-ssa-warn-restrict.cc |  5 +++--
 gcc/graphite-sese-to-poly.cc|  4 ++--
 gcc/tree-if-conv.cc |  7 +++
 gcc/tree-object-size.cc |  2 +-
 gcc/tree-ssa-alias.cc   |  8 
 gcc/tree-ssa-loop-niter.cc  | 15 +++
 gcc/tree-ssa-strlen.cc  |  2 +-
 gcc/tree.cc | 11 ++-
 gcc/tree.h  |  8 
 11 files changed, 35 insertions(+), 37 deletions(-)

diff --git a/gcc/gimple-array-bounds.cc b/gcc/gimple-array-bounds.cc
index e190b93aa85..fbf448e045d 100644
--- a/gcc/gimple-array-bounds.cc
+++ b/gcc/gimple-array-bounds.cc
@@ -129,7 +129,7 @@ get_ref_size (tree arg, tree *pref)
 }
 
 /* Return true if REF is (likely) an ARRAY_REF to a trailing array member
-   of a struct.  It refines array_at_struct_end_p by detecting a pointer
+   of a struct.  It refines array_ref_flexible_size_p by detecting a pointer
to an array and an array parameter declared using the [N] syntax (as
opposed to a pointer) and returning false.  Set *PREF to the decl or
expression REF refers to.  */
@@ -167,7 +167,7 @@ trailing_array (tree arg, tree *pref)
return false;
 }
 
-  return array_at_struct_end_p (arg);
+  return array_ref_flexible_size_p (arg);
 }
 
 /* Checks one ARRAY_REF in REF, located at LOCUS. Ignores flexible
diff --git a/gcc/gimple-fold.cc b/gcc/gimple-fold.cc
index 9055cd8982d..cafd331ca98 100644
--- a/gcc/gimple-fold.cc
+++ b/gcc/gimple-fold.cc
@@ -1690,13 +1690,11 @@ get_range_strlen_tree (tree arg, bitmap visited, 
strlen_range_kind rkind,
  /* Handle a MEM_REF into a DECL accessing an array of integers,
 being conservative about references to extern structures with
 flexible array members that can be initialized to arbitrary
-numbers of elements as an extension (static structs are okay).
-FIXME: Make this less conservative -- see
-component_ref_size in tree.cc.  */
+numbers of elements as an extension (static structs are okay).  */
  tree ref = TREE_OPERAND (TREE_OPERAND (arg, 0), 0);
  if ((TREE_CODE (ref) == PARM_DECL || VAR_P (ref))
  && (decl_binds_to_current_def_p (ref)
- || !array_at_struct_end_p (arg)))
+ || !array_ref_flexible_size_p (arg)))
{
  /* Fail if the offset is out of bounds.  Such accesses
 should be diagnosed at some point.  */
diff --git a/gcc/gimple-ssa-warn-restrict.cc b/gcc/gimple-ssa-warn-restrict.cc
index b7ed15c8902..832456ba6fc 100644
--- a/gcc/gimple-ssa-warn-restrict.cc
+++ b/gcc/gimple-ssa-warn-restrict.cc
@@ -296,8 +296,9 @@ builtin_memref::builtin_memref (pointer_query &ptrqry, 
gimple *stmt, tree expr,
   tree basetype = TREE_TYPE (base);
   if (TREE_CODE (basetype) == ARRAY_TYPE)
 {
-  if (ref && array_at_struct_end_p (ref))
-   ;   /* Use the maximum possibl

Re:Re: [PATCH] [PHIOPT] Add A ? B + CST : B match and simplify optimizations

2022-11-08 Thread 钟云德 via Gcc-patches
At 2022-11-08 22:58:34, "Richard Biener"  wrote:

>On Sat, Nov 5, 2022 at 10:03 AM Zhongyunde via Gcc-patches
> wrote:
>>
>>
>> > -Original Message-
>> > From: Andrew Pinski [mailto:pins...@gcc.gnu.org]
>> > Sent: Saturday, November 5, 2022 2:34 PM
>> > To: Zhongyunde 
>> > Cc: hongtao@intel.com; gcc-patches@gcc.gnu.org; Zhangwen(Esan)
>> > ; Weiwei (weiwei, Compiler)
>> > ; zhong_1985...@163.com
>> > Subject: Re: [PATCH] [PHIOPT] Add A ? B + CST : B match and simplify
>> > optimizations
>> >
>> > On Fri, Nov 4, 2022 at 11:17 PM Zhongyunde 
>> > wrote:
>> > >
>> > > hi,
>> > >   This patch is try to fix the issue
>> > > https://gcc.gnu.org/bugzilla/show_bug.cgi?id=107190,
>> > > would you like to give me some suggestion, thanks.
>> >
>> > This seems like a "simplified" version of
>> > https://gcc.gnu.org/pipermail/gcc-patches/2021-November/584411.html
>> > which just handles power of 2 constants where we know the cond will be
>> > removed.
>> > We could do even more "simplified" of 1 if needed really.
>> > What is the IR before PHI-OPT? Is it just + 1?
>>
>> Thanks for your attention. It is + 4294967296 before PHI-OPT  (See detail 
>> https://gcc.godbolt.org/z/6zEc6ja1z)
>> So we should keep matching the power of 2 constants ?
>>
>> > Also your pattern can be simplified to use integer_pow2p in the match part
>> > instead of INTEGER_CST.
>> >
>> Apply your comment, thanks
>
>How does the patch fix the mentioned bug?  match.pd patterns should make things
>"simpler" but x + (a << C') isn't simpler than a ? x + C : x.  It
>looks you are targeting
>PHI-OPT here, so maybe instead extend value_replacement to handle this case,
>it does look similar to the case with neutral/absorbing element there?
>

>Richard.


Thanks. This patch try to fix the 1st issued mentioned in107090 – [aarch64] 
sequence logic should be combined with mul and umulh (gnu.org)
Sure, I'll take a look at the function value_replacement. 
I have also noticed that the function of two_value_replacement is very close to 
patch I want to achieve, and it may be easy to extend.
It seems can be expressed equally in match.pd (called by 
match_simplify_replacement), so how do we
choose where to implement may be better?
```
|
/* Do the replacement of conditional if it can be done.  */if (!early_p 

   && !diamond_p
   && 
two_value_replacement (bb, bb1, e2, phi, arg0, arg1))   
   cfgchanged = true;   
   elseif 
(!diamond_p 
 && match_simplify_replacement (bb, 
bb1, e1, e2, phi,   
arg0, arg1, early_p))   
  cfgchanged = true;
  
|
```
>> > Thanks, >> > Andrew >> >>

[PATCH] Use toplevel configure for GMP and MPFR for gdb

2022-11-08 Thread apinski--- via Gcc-patches
From: Andrew Pinski 

This patch uses the toplevel configure parts for GMP/MPFR for
gdb. The only thing is that gdb now requires MPFR for building.
Before it was a recommended but not required library.
Also this allows building of GMP and MPFR with the toplevel
directory just like how it is done for GCC.
We now error out in the toplevel configure of the version
of GMP and MPFR that is wrong.

OK? Build gdb 3 ways:
with GMP and MPFR in the toplevel (static library used at that point for both)
With only MPFR in the toplevel (GMP distro library used and MPFR built from 
source)
With neither GMP and MPFR in the toplevel (distro libraries used)

Thanks,
Andrew Pinski

ChangeLog:
* Makefile.def: Add configure-gdb dependencies
on all-gmp and all-mpfr.
* configure.ac: Split out MPC checking from MPFR.
Require GMP and MPFR if the gdb directory exist.
* Makefile.in: Regenerate.
* configure: Regenerate.

gdb/ChangeLog:
* configure.ac: Remove AC_LIB_HAVE_LINKFLAGS
for gmp and mpfr.
Use GMPLIBS and GMPINC which is provided by the
toplevel configure.
* Makefile.in (LIBGMP, LIBMPFR): Remove.
(GMPLIBS, GMPINC): Add definition.
(INTERNAL_CFLAGS_BASE): Add GMPINC.
(CLIBS): Exchange LIBMPFR and LIBGMP
for GMPLIBS.
* target-float.c: Make the code conditional on
HAVE_LIBMPFR unconditional.
* top.c: Remove code checking HAVE_LIBMPFR.
* configure: Regenerate.
* config.in: Regenerate.
---
 Makefile.def   |2 +
 Makefile.in|2 +
 configure  |   81 +++-
 configure.ac   |   45 +-
 gdb/Makefile.in|   12 +-
 gdb/config.in  |6 -
 gdb/configure  | 1036 ++--
 gdb/configure.ac   |   31 +-
 gdb/target-float.c |8 -
 gdb/top.c  |8 -
 10 files changed, 147 insertions(+), 1084 deletions(-)

diff --git a/Makefile.def b/Makefile.def
index acdcd625ed6..d5976e61d98 100644
--- a/Makefile.def
+++ b/Makefile.def
@@ -418,6 +418,8 @@ dependencies = { module=configure-isl; on=all-gmp; };
 dependencies = { module=all-intl; on=all-libiconv; };
 
 // Host modules specific to gdb.
+dependencies = { module=configure-gdb; on=all-gmp; };
+dependencies = { module=configure-gdb; on=all-mpfr; };
 dependencies = { module=configure-gdb; on=all-intl; };
 dependencies = { module=configure-gdb; on=configure-sim; };
 dependencies = { module=configure-gdb; on=all-bfd; };
diff --git a/Makefile.in b/Makefile.in
index cb39e4790d6..d0666c75b00 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -63748,6 +63748,8 @@ configure-libcc1: maybe-configure-gcc
 all-libcc1: maybe-all-gcc
 all-c++tools: maybe-all-gcc
 all-utils: maybe-all-libiberty
+configure-gdb: maybe-all-gmp
+configure-gdb: maybe-all-mpfr
 configure-gdb: maybe-all-intl
 configure-gdb: maybe-all-bfd
 configure-gdb: maybe-all-libiconv
diff --git a/configure b/configure
index 7bcb894d1fe..9ee7a1a3abe 100755
--- a/configure
+++ b/configure
@@ -769,6 +769,7 @@ infodir
 docdir
 oldincludedir
 includedir
+runstatedir
 localstatedir
 sharedstatedir
 sysconfdir
@@ -941,6 +942,7 @@ datadir='${datarootdir}'
 sysconfdir='${prefix}/etc'
 sharedstatedir='${prefix}/com'
 localstatedir='${prefix}/var'
+runstatedir='${localstatedir}/run'
 includedir='${prefix}/include'
 oldincludedir='/usr/include'
 docdir='${datarootdir}/doc/${PACKAGE}'
@@ -1193,6 +1195,15 @@ do
   | -silent | --silent | --silen | --sile | --sil)
 silent=yes ;;
 
+  -runstatedir | --runstatedir | --runstatedi | --runstated \
+  | --runstate | --runstat | --runsta | --runst | --runs \
+  | --run | --ru | --r)
+ac_prev=runstatedir ;;
+  -runstatedir=* | --runstatedir=* | --runstatedi=* | --runstated=* \
+  | --runstate=* | --runstat=* | --runsta=* | --runst=* | --runs=* \
+  | --run=* | --ru=* | --r=*)
+runstatedir=$ac_optarg ;;
+
   -sbindir | --sbindir | --sbindi | --sbind | --sbin | --sbi | --sb)
 ac_prev=sbindir ;;
   -sbindir=* | --sbindir=* | --sbindi=* | --sbind=* | --sbin=* \
@@ -1330,7 +1341,7 @@ fi
 for ac_var in  exec_prefix prefix bindir sbindir libexecdir datarootdir \
datadir sysconfdir sharedstatedir localstatedir includedir \
oldincludedir docdir infodir htmldir dvidir pdfdir psdir \
-   libdir localedir mandir
+   libdir localedir mandir runstatedir
 do
   eval ac_val=\$$ac_var
   # Remove trailing slashes.
@@ -1490,6 +1501,7 @@ Fine tuning of the installation directories:
   --sysconfdir=DIRread-only single-machine data [PREFIX/etc]
   --sharedstatedir=DIRmodifiable architecture-independent data [PREFIX/com]
   --localstatedir=DIR modifiable single-machine data [PREFIX/var]
+  --runstatedir=DIR   modifiable per-process data [LOCALSTATEDIR/run]
   --libdir=DIRobject code libraries [EPREFIX/lib]
   --includedir=DIRC header files [PREFIX/include]
   --oldincludedir=DIR C header files f

Re: [PATCH] sched1: Fix -fcompare-debug issue in schedule_region [PR105586]

2022-11-08 Thread Surya Kumari Jangala via Gcc-patches
Hi Richard,

On 21/09/22 1:03 pm, Richard Biener wrote:
> On Tue, Sep 20, 2022 at 9:18 AM Surya Kumari Jangala via Gcc-patches
>  wrote:
>>
>> Hi Jeff, Richard,
>> Thank you for reviewing the patch!
>> I have committed the patch to the gcc repo.
>> Can I backport this patch to prior versions of gcc, as this is an easy patch 
>> to backport and the issue exists in prior versions too?
> 
> It doesn't seem to be a regression so I'd error on the safe side here.

Can you please clarify, should this patch be backported? It is not very clear 
what "safe side" means here.

Thanks!
Surya

> 
> Richard.
> 
>> Regards,
>> Surya
>>
>>
>> On 31/08/22 9:09 pm, Jeff Law via Gcc-patches wrote:
>>>
>>>
>>> On 8/23/2022 5:49 AM, Surya Kumari Jangala via Gcc-patches wrote:
 sched1: Fix -fcompare-debug issue in schedule_region [PR105586]

 In schedule_region(), a basic block that does not contain any real insns
 is not scheduled and the dfa state at the entry of the bb is not copied
 to the fallthru basic block. However a DEBUG insn is treated as a real
 insn, and if a bb contains non-real insns and a DEBUG insn, it's dfa
 state is copied to the fallthru bb. This was resulting in
 -fcompare-debug failure as the incoming dfa state of the fallthru block
 is different with -g. We should always copy the dfa state of a bb to
 it's fallthru bb even if the bb does not contain real insns.

 2022-08-22  Surya Kumari Jangala  

 gcc/
 PR rtl-optimization/105586
 * sched-rgn.cc (schedule_region): Always copy dfa state to
 fallthru block.

 gcc/testsuite/
 PR rtl-optimization/105586
 * gcc.target/powerpc/pr105586.c: New test.
>>> Interesting.We may have stumbled over this bug internally a little 
>>> while ago -- not from a compare-debug standpoint, but from a "why isn't the 
>>> processor state copied to the fallthru block" point of view.   I had it on 
>>> my to investigate list, but hadn't gotten around to it yet.
>>>
>>> I think there were requests for ChangeLog updates and a function comment 
>>> for save_state_for_fallthru_edge.  OK with those updates.
>>>
>>> jeff
>>>


Re: [PATCH] Use toplevel configure for GMP and MPFR for gdb

2022-11-08 Thread Andreas Schwab via Gcc-patches
On Nov 08 2022, apinski--- via Gcc-patches wrote:

> diff --git a/configure b/configure
> index 7bcb894d1fe..9ee7a1a3abe 100755
> --- a/configure
> +++ b/configure
> @@ -769,6 +769,7 @@ infodir
>  docdir
>  oldincludedir
>  includedir
> +runstatedir
>  localstatedir
>  sharedstatedir
>  sysconfdir

Please avoid using a patched autoconf command.

-- 
Andreas Schwab, SUSE Labs, sch...@suse.de
GPG Key fingerprint = 0196 BAD8 1CE9 1970 F4BE  1748 E4D4 88E3 0EEA B9D7
"And now for something completely different."


Re: [PATCH] Use toplevel configure for GMP and MPFR for gdb

2022-11-08 Thread Andrew Pinski via Gcc-patches
On Tue, Nov 8, 2022 at 8:46 AM Andreas Schwab via Gdb-patches
 wrote:
>
> On Nov 08 2022, apinski--- via Gcc-patches wrote:
>
> > diff --git a/configure b/configure
> > index 7bcb894d1fe..9ee7a1a3abe 100755
> > --- a/configure
> > +++ b/configure
> > @@ -769,6 +769,7 @@ infodir
> >  docdir
> >  oldincludedir
> >  includedir
> > +runstatedir
> >  localstatedir
> >  sharedstatedir
> >  sysconfdir
>
> Please avoid using a patched autoconf command.

Sorry about that. I have regenerated it with a plain autoconfig-2.69
and will make sure I will be using that going forward and when
approved use that with the pushed version but I don't see a reason to
resubmit the patch otherwise.

Thanks,
Andrew Pinski

>
> --
> Andreas Schwab, SUSE Labs, sch...@suse.de
> GPG Key fingerprint = 0196 BAD8 1CE9 1970 F4BE  1748 E4D4 88E3 0EEA B9D7
> "And now for something completely different."


Re: [PATCH] sched1: Fix -fcompare-debug issue in schedule_region [PR105586]

2022-11-08 Thread Richard Biener via Gcc-patches
On Tue, Nov 8, 2022 at 5:37 PM Surya Kumari Jangala
 wrote:
>
> Hi Richard,
>
> On 21/09/22 1:03 pm, Richard Biener wrote:
> > On Tue, Sep 20, 2022 at 9:18 AM Surya Kumari Jangala via Gcc-patches
> >  wrote:
> >>
> >> Hi Jeff, Richard,
> >> Thank you for reviewing the patch!
> >> I have committed the patch to the gcc repo.
> >> Can I backport this patch to prior versions of gcc, as this is an easy 
> >> patch to backport and the issue exists in prior versions too?
> >
> > It doesn't seem to be a regression so I'd error on the safe side here.
>
> Can you please clarify, should this patch be backported? It is not very clear 
> what "safe side" means here.

Not backporting is the safe side.

Richard.

> Thanks!
> Surya
>
> >
> > Richard.
> >
> >> Regards,
> >> Surya
> >>
> >>
> >> On 31/08/22 9:09 pm, Jeff Law via Gcc-patches wrote:
> >>>
> >>>
> >>> On 8/23/2022 5:49 AM, Surya Kumari Jangala via Gcc-patches wrote:
>  sched1: Fix -fcompare-debug issue in schedule_region [PR105586]
> 
>  In schedule_region(), a basic block that does not contain any real insns
>  is not scheduled and the dfa state at the entry of the bb is not copied
>  to the fallthru basic block. However a DEBUG insn is treated as a real
>  insn, and if a bb contains non-real insns and a DEBUG insn, it's dfa
>  state is copied to the fallthru bb. This was resulting in
>  -fcompare-debug failure as the incoming dfa state of the fallthru block
>  is different with -g. We should always copy the dfa state of a bb to
>  it's fallthru bb even if the bb does not contain real insns.
> 
>  2022-08-22  Surya Kumari Jangala  
> 
>  gcc/
>  PR rtl-optimization/105586
>  * sched-rgn.cc (schedule_region): Always copy dfa state to
>  fallthru block.
> 
>  gcc/testsuite/
>  PR rtl-optimization/105586
>  * gcc.target/powerpc/pr105586.c: New test.
> >>> Interesting.We may have stumbled over this bug internally a little 
> >>> while ago -- not from a compare-debug standpoint, but from a "why isn't 
> >>> the processor state copied to the fallthru block" point of view.   I had 
> >>> it on my to investigate list, but hadn't gotten around to it yet.
> >>>
> >>> I think there were requests for ChangeLog updates and a function comment 
> >>> for save_state_for_fallthru_edge.  OK with those updates.
> >>>
> >>> jeff
> >>>


RE: [PATCH 1/4]middle-end Support not decomposing specific divisions during vectorization.

2022-11-08 Thread Tamar Christina via Gcc-patches
Ping.

> -Original Message-
> From: Tamar Christina
> Sent: Monday, October 31, 2022 11:35 AM
> To: Richard Biener 
> Cc: gcc-patches@gcc.gnu.org; nd ; jeffreya...@gmail.com
> Subject: RE: [PATCH 1/4]middle-end Support not decomposing specific
> divisions during vectorization.
> 
> >
> > The type of the expression should be available via the mode and the
> > signedness, no?  So maybe to avoid having both RTX and TREE on the
> > target hook pass it a wide_int instead for the divisor?
> >
> 
> Done.
> 
> Bootstrapped Regtested on aarch64-none-linux-gnu, x86_64-pc-linux-gnu
> and no issues.
> 
> Ok for master?
> 
> Thanks,
> Tamar
> 
> gcc/ChangeLog:
> 
>   * expmed.h (expand_divmod): Pass tree operands down in addition
> to RTX.
>   * expmed.cc (expand_divmod): Likewise.
>   * explow.cc (round_push, align_dynamic_address): Likewise.
>   * expr.cc (force_operand, expand_expr_divmod): Likewise.
>   * optabs.cc (expand_doubleword_mod,
> expand_doubleword_divmod):
>   Likewise.
>   * target.h: Include tree-core.
>   * target.def (can_special_div_by_const): New.
>   * targhooks.cc (default_can_special_div_by_const): New.
>   * targhooks.h (default_can_special_div_by_const): New.
>   * tree-vect-generic.cc (expand_vector_operation): Use it.
>   * doc/tm.texi.in: Document it.
>   * doc/tm.texi: Regenerate.
>   * tree-vect-patterns.cc (vect_recog_divmod_pattern): Check for
> support.
>   * tree-vect-stmts.cc (vectorizable_operation): Likewise.
> 
> gcc/testsuite/ChangeLog:
> 
>   * gcc.dg/vect/vect-div-bitmask-1.c: New test.
>   * gcc.dg/vect/vect-div-bitmask-2.c: New test.
>   * gcc.dg/vect/vect-div-bitmask-3.c: New test.
>   * gcc.dg/vect/vect-div-bitmask.h: New file.
> 
> --- inline copy of patch ---
> 
> diff --git a/gcc/doc/tm.texi b/gcc/doc/tm.texi index
> 92bda1a7e14a3c9ea63e151e4a49a818bf4d1bdb..a29f5c39be3f0927f8ef6e094
> c7a712c0604fb77 100644
> --- a/gcc/doc/tm.texi
> +++ b/gcc/doc/tm.texi
> @@ -6112,6 +6112,22 @@ instruction pattern.  There is no need for the hook
> to handle these two  implementation approaches itself.
>  @end deftypefn
> 
> +@deftypefn {Target Hook} bool
> TARGET_VECTORIZE_CAN_SPECIAL_DIV_BY_CONST
> +(enum @var{tree_code}, tree @var{vectype}, wide_int @var{constant}, rtx
> +*@var{output}, rtx @var{in0}, rtx @var{in1}) This hook is used to test
> +whether the target has a special method of division of vectors of type
> +@var{vectype} using the value @var{constant}, and producing a vector of
> type @var{vectype}.  The division will then not be decomposed by the and
> kept as a div.
> +
> +When the hook is being used to test whether the target supports a
> +special divide, @var{in0}, @var{in1}, and @var{output} are all null.
> +When the hook is being used to emit a division, @var{in0} and @var{in1}
> +are the source vectors of type @var{vecttype} and @var{output} is the
> +destination vector of type @var{vectype}.
> +
> +Return true if the operation is possible, emitting instructions for it
> +if rtxes are provided and updating @var{output}.
> +@end deftypefn
> +
>  @deftypefn {Target Hook} tree
> TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION (unsigned
> @var{code}, tree @var{vec_type_out}, tree @var{vec_type_in})  This hook
> should return the decl of a function that implements the  vectorized variant
> of the function with the @code{combined_fn} code diff --git
> a/gcc/doc/tm.texi.in b/gcc/doc/tm.texi.in index
> 112462310b134705d860153294287cfd7d4af81d..d5a745a02acdf051ea1da1b04
> 076d058c24ce093 100644
> --- a/gcc/doc/tm.texi.in
> +++ b/gcc/doc/tm.texi.in
> @@ -4164,6 +4164,8 @@ address;  but often a machine-dependent strategy
> can generate better code.
> 
>  @hook TARGET_VECTORIZE_VEC_PERM_CONST
> 
> +@hook TARGET_VECTORIZE_CAN_SPECIAL_DIV_BY_CONST
> +
>  @hook TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
> 
>  @hook TARGET_VECTORIZE_BUILTIN_MD_VECTORIZED_FUNCTION
> diff --git a/gcc/explow.cc b/gcc/explow.cc index
> ddb4d6ae3600542f8d2bb5617cdd3933a9fae6c0..568e0eb1a158c696458ae678f
> 5e346bf34ba0036 100644
> --- a/gcc/explow.cc
> +++ b/gcc/explow.cc
> @@ -1037,7 +1037,7 @@ round_push (rtx size)
>   TRUNC_DIV_EXPR.  */
>size = expand_binop (Pmode, add_optab, size, alignm1_rtx,
>  NULL_RTX, 1, OPTAB_LIB_WIDEN);
> -  size = expand_divmod (0, TRUNC_DIV_EXPR, Pmode, size, align_rtx,
> +  size = expand_divmod (0, TRUNC_DIV_EXPR, Pmode, NULL, NULL, size,
> + align_rtx,
>   NULL_RTX, 1);
>size = expand_mult (Pmode, size, align_rtx, NULL_RTX, 1);
> 
> @@ -1203,7 +1203,7 @@ align_dynamic_address (rtx target, unsigned
> required_align)
>gen_int_mode (required_align / BITS_PER_UNIT - 1,
>  Pmode),
>NULL_RTX, 1, OPTAB_LIB_WIDEN);
> -  target = expand_divmod (0, TRUNC_DIV_EXPR, Pmode, target,
> +  target = expand_divmod (0, TRUNC_DIV_EXPR, Pmode, NULL, NULL,
> target,
>

Re: [PATCH] libstdc++: Refactor implementation of operator+ for std::string

2022-11-08 Thread Jonathan Wakely via Gcc-patches
On Thu, 20 Oct 2022 at 01:06, Will Hawkins wrote:
>
> Sorry for the delay. Tested on x86-64 Linux.
>
> -->8--
>
> After consultation with Jonathan, it seemed like a good idea to create a
> single function that performed one-allocation string concatenation that
> could be used by various different version of operator+. This patch adds
> such a function and calls it from the relevant implementations.
>
> libstdc++-v3/ChangeLog:
>
> * include/bits/basic_string.h:
> Add common function that performs single-allocation string
> concatenation. (__str_cat)
> Use __str_cat to perform optimized operator+, where relevant.
> * include/bits/basic_string.tcc::
> Remove single-allocation implementation of operator+.
>
> Signed-off-by: Will Hawkins 

I've pushed this patch to trunk now. I changed the commit message
significantly though:

   libstdc++: Refactor implementation of operator+ for std::string

   Until now operator+(char*, string) and operator+(string, char*) had
   different performance characteristics. The former required a single
   memory allocation and the latter required two. This patch makes the
   performance equal.

   After consultation with Jonathan, it seemed like a good idea to create a
   single function that performed one-allocation string concatenation that
   could be used by various different version of operator+. This patch adds
   such a function and calls it from the relevant implementations.

   Co-authored-by: Jonathan Wakely 

   libstdc++-v3/ChangeLog:

   * include/bits/basic_string.h (__str_cat): Add common function
   that performs single-allocation string concatenation.
   (operator+): Use __str_cat.
   * include/bits/basic_string.tcc (operator+): Move to .h and
   define inline using __str_cat.

   Signed-off-by: Will Hawkins 

Specifically, I restored part of your earlier commit's message, which
gives the necessary context for the commit. Just starting with "After
consultation with Jonathan, ..." doesn't say anything about the patch
itself and is not very helpful without the earlier context from the
mailing list.

I added myself as Co-author, since the new patch was largely based on
a patch I sent in a private email.

And I changed the changelog part to better meet the format of GNU ChangeLogs.
https://www.gnu.org/prep/standards/html_node/Style-of-Change-Logs.html

The change is on trunk now (and I didn't see any libgomp test failures
this time).






> ---
>  libstdc++-v3/include/bits/basic_string.h   | 66 --
>  libstdc++-v3/include/bits/basic_string.tcc | 41 --
>  2 files changed, 49 insertions(+), 58 deletions(-)
>
> diff --git a/libstdc++-v3/include/bits/basic_string.h 
> b/libstdc++-v3/include/bits/basic_string.h
> index cd244191df4..9c2b57f5a1d 100644
> --- a/libstdc++-v3/include/bits/basic_string.h
> +++ b/libstdc++-v3/include/bits/basic_string.h
> @@ -3485,6 +3485,24 @@ _GLIBCXX_BEGIN_NAMESPACE_CXX11
>  _GLIBCXX_END_NAMESPACE_CXX11
>  #endif
>
> +  template
> +_GLIBCXX20_CONSTEXPR
> +inline _Str
> +__str_concat(typename _Str::value_type const* __lhs,
> +typename _Str::size_type __lhs_len,
> +typename _Str::value_type const* __rhs,
> +typename _Str::size_type __rhs_len,
> +typename _Str::allocator_type const& __a)
> +{
> +  typedef typename _Str::allocator_type allocator_type;
> +  typedef __gnu_cxx::__alloc_traits _Alloc_traits;
> +  _Str __str(_Alloc_traits::_S_select_on_copy(__a));
> +  __str.reserve(__lhs_len + __rhs_len);
> +  __str.append(__lhs, __lhs_len);
> +  __str.append(__rhs, __rhs_len);
> +  return __str;
> +}
> +
>// operator+
>/**
> *  @brief  Concatenate two strings.
> @@ -3494,13 +3512,14 @@ _GLIBCXX_END_NAMESPACE_CXX11
> */
>template
>  _GLIBCXX_NODISCARD _GLIBCXX20_CONSTEXPR
> -basic_string<_CharT, _Traits, _Alloc>
> +inline basic_string<_CharT, _Traits, _Alloc>
>  operator+(const basic_string<_CharT, _Traits, _Alloc>& __lhs,
>   const basic_string<_CharT, _Traits, _Alloc>& __rhs)
>  {
> -  basic_string<_CharT, _Traits, _Alloc> __str(__lhs);
> -  __str.append(__rhs);
> -  return __str;
> +  typedef basic_string<_CharT, _Traits, _Alloc> _Str;
> +  return std::__str_concat<_Str>(__lhs.c_str(), __lhs.size(),
> +__rhs.c_str(), __rhs.size(),
> +__lhs.get_allocator());
>  }
>
>/**
> @@ -3511,9 +3530,16 @@ _GLIBCXX_END_NAMESPACE_CXX11
> */
>template
>  _GLIBCXX_NODISCARD _GLIBCXX20_CONSTEXPR
> -basic_string<_CharT,_Traits,_Alloc>
> +inline basic_string<_CharT,_Traits,_Alloc>
>  operator+(const _CharT* __lhs,
> - const basic_string<_CharT,_Traits,_Alloc>& __rhs);
> + const basic_string<_CharT,_Traits,_Alloc>& __rhs)
> +{
> +  __gl

Re: [PATCH] [PR24021] Implement PLUS_EXPR range-op entry for floats.

2022-11-08 Thread Andrew Waterman
On Tue, Nov 8, 2022 at 3:20 AM Jakub Jelinek via Gcc-patches
 wrote:
>
> On Mon, Nov 07, 2022 at 04:41:23PM +0100, Aldy Hernandez wrote:
> > As suggested upthread, I have also adjusted update_nan_sign() to drop
> > the NAN sign to VARYING if both operands are NAN.  As an optimization
> > I keep the sign if both operands are NAN and have the same sign.
>
> For NaNs this still relies on something IEEE754 doesn't guarantee,
> as I cited, after a binary operation the sign bit of the NaN is
> unspecified, whether there is one NaN operand or two.
> It might be that all CPUs handle it the way you've implemented
> (that for one NaN operand the sign of NaN result will be the same
> as that NaN operand and for two it will be the sign of one of the two
> NaNs operands, never something else), but I think we'd need to check
> more than one implementation for that (I've only tried x86_64 and thus
> SSE behavior in it), so one would need to test i387 long double behavior
> too, ARM/AArch64, PowerPC, s390{,x}, RISCV, ...
> The guarantee given by IEEE754 is only for those copy, negate, abs, copySign
> operations, so copying values around, NEG_EXPR, ABS_EXPR, __builtin_fabs*,
> __builtin_copysign*.

FWIW, RISC-V canonicalizes NaNs by clearing the sign bit; the signs of
the input NaNs do not factor in.

>
> Otherwise LGTM (but would be nice to get into GCC13 not just
> +, but also -, *, /, sqrt at least).
>
> Jakub
>


Re: [PATCH v2] libstdc++: basic_filebuf: don't flush more often than necessary.

2022-11-08 Thread Jonathan Wakely via Gcc-patches
On Mon, 7 Nov 2022 at 17:00, Jonathan Wakely wrote:
>
> On Thu, 6 Oct 2022 at 20:03, Charles-Francois Natali via Libstdc++
>  wrote:
> >
> > `basic_filebuf::xsputn` would bypass the buffer when passed a chunk of
> > size 1024 and above, seemingly as an optimisation.
> >
> > This can have a significant performance impact if the overhead of a
> > `write` syscall is non-negligible, e.g. on a slow disk, on network
> > filesystems, or simply during IO contention because instead of flushing
> > every `BUFSIZ` (by default), we can flush every 1024 char.
> > The impact is even greater with custom larger buffers, e.g. for network
> > filesystems, because the code could issue `write` for example 1000X more
> > often than necessary with respect to the buffer size.
> > It also introduces a significant discontinuity in performance when
> > writing chunks of size 1024 and above.
> >
> > See this reproducer which writes down a fixed number of chunks to a file
> > open with `O_SYNC` - to replicate high-latency `write` - for varying
> > size of chunks:
> >
> > ```
> > $ cat test_fstream_flush.cpp
> >
> > int
> > main(int argc, char* argv[])
> > {
> >   assert(argc == 3);
> >
> >   const auto* path = argv[1];
> >   const auto chunk_size = std::stoul(argv[2]);
> >
> >   const auto fd =
> > open(path, O_CREAT | O_TRUNC | O_WRONLY | O_SYNC | O_CLOEXEC, 0666);
> >   assert(fd >= 0);
> >
> >   auto filebuf = __gnu_cxx::stdio_filebuf(fd, std::ios_base::out);
> >   auto stream = std::ostream(&filebuf);
> >
> >   const auto chunk = std::vector(chunk_size);
> >
> >   for (auto i = 0; i < 1'000; ++i) {
> > stream.write(chunk.data(), chunk.size());
> >   }
> >
> >   return 0;
> > }
> > ```
> >
> > ```
> > $ g++ -o /tmp/test_fstream_flush test_fstream_flush.cpp -std=c++17
> > $ for i in $(seq 1021 1025); do echo -e "\n$i"; time 
> > /tmp/test_fstream_flush /tmp/foo $i; done
> >
> > 1021
> >
> > real0m0.997s
> > user0m0.000s
> > sys 0m0.038s
> >
> > 1022
> >
> > real0m0.939s
> > user0m0.005s
> > sys 0m0.032s
> >
> > 1023
> >
> > real0m0.954s
> > user0m0.005s
> > sys 0m0.034s
> >
> > 1024
> >
> > real0m7.102s
> > user0m0.040s
> > sys 0m0.192s
> >
> > 1025
> >
> > real0m7.204s
> > user0m0.025s
> > sys 0m0.209s
> > ```
> >
> > See the huge drop in performance at the 1024-boundary.
>
> I've finally found time to properly look at this, sorry for the delay.
>
> I thought I was unable to reproduce these numbers, then I realised I'd
> already installed a build with the patch, so was measuring the patched
> performance for both my "before" and "after" tests. Oops!
>
> My concern is that the patch doesn't only affect files on remote
> filesystems. I assume the original 1024-byte chunking behaviour is
> there for a reason, because for large writes the performance might be
> better if we just write directly instead of buffering and then writing
> again. Assuming we have a fast disk, writing straight to disk avoids
> copying in and out of the buffer. But if we have a slow disk, it's
> better to buffer and reduce the total number of disk writes. I'm
> concerned that the patch optimizes the slow disk case potentially at a
> cost for the fast disk case.
>
> I wonder whether it would make sense to check whether the buffer size
> has been manually changed, i.e. epptr() - pbase() != _M_buf_size. If
> the buffer has been explicitly set by the user, then we should assume
> they really want it to be used and so don't bypass it for writes >=
> 1024.
>
> In the absence of a better idea, I think I'm going to commit the patch
> as-is. I don't see it causing any measurable slowdown for very large
> writes on fast disks, and it's certainly a huge improvement for slow
> disks.

The patch has been pushed to trunk now, thanks for the contribution.

I removed the testcase and results from the commit message as they
don't need to be in the git log. I added a link to your email into
bugzilla though, so we can still find it easily.



[committed] libstdc++: Add always_inline to most allocator functions

2022-11-08 Thread Jonathan Wakely via Gcc-patches
Tested powerpc64le-linux. Pushed to trunk.

-- >8 --

This reduces the abstraction penalty for allocator support in
unoptimized code. Constructing and using allocators in containers calls
many one-line (or completely empty) inline functions. Those can all be
inlined to reduce code size and function call overhead for -O0.

libstdc++-v3/ChangeLog:

* include/bits/alloc_traits.h (allocator_traits): Add
always_inline attribute to all member functions.
(__do_alloc_on_copy, __alloc_on_copy, __do_alloc_on_move)
(__alloc_on_move, __do_alloc_on_swap, __alloc_on_swap)
(_Destroy(FwdIter, FwdIter, allocator&)): : Add
always_inline attribute.
* include/bits/allocator.h (allocator): Add always_inline
attribute to all member functions and equality operators.
* include/bits/new_allocator.h (__new_allocator): Likewise.
* include/ext/alloc_traits.h (__gnu_cxx::__alloc_traits):
Likewise.
---
 libstdc++-v3/include/bits/alloc_traits.h  | 40 ++-
 libstdc++-v3/include/bits/allocator.h | 13 ++--
 libstdc++-v3/include/bits/new_allocator.h | 13 ++--
 libstdc++-v3/include/ext/alloc_traits.h   | 21 ++--
 4 files changed, 72 insertions(+), 15 deletions(-)

diff --git a/libstdc++-v3/include/bits/alloc_traits.h 
b/libstdc++-v3/include/bits/alloc_traits.h
index 8479bfd612f..203988ab933 100644
--- a/libstdc++-v3/include/bits/alloc_traits.h
+++ b/libstdc++-v3/include/bits/alloc_traits.h
@@ -463,7 +463,8 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
*
*  Calls @c a.allocate(n)
   */
-  _GLIBCXX_NODISCARD static _GLIBCXX20_CONSTEXPR pointer
+  [[__nodiscard__,__gnu__::__always_inline__]]
+  static _GLIBCXX20_CONSTEXPR pointer
   allocate(allocator_type& __a, size_type __n)
   { return __a.allocate(__n); }
 
@@ -477,7 +478,8 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
*
*  Returns  a.allocate(n, hint) 
   */
-  _GLIBCXX_NODISCARD static _GLIBCXX20_CONSTEXPR pointer
+  [[__nodiscard__,__gnu__::__always_inline__]]
+  static _GLIBCXX20_CONSTEXPR pointer
   allocate(allocator_type& __a, size_type __n, const_void_pointer __hint)
   {
 #if __cplusplus <= 201703L
@@ -495,6 +497,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
*
*  Calls  a.deallocate(p, n) 
   */
+  [[__gnu__::__always_inline__]]
   static _GLIBCXX20_CONSTEXPR void
   deallocate(allocator_type& __a, pointer __p, size_type __n)
   { __a.deallocate(__p, __n); }
@@ -511,6 +514,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
*  `std::construct_at(__p, std::forward<_Args>(__args)...)` instead.
   */
   template
+   [[__gnu__::__always_inline__]]
static _GLIBCXX20_CONSTEXPR void
construct(allocator_type& __a __attribute__((__unused__)), _Up* __p,
  _Args&&... __args)
@@ -531,6 +535,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
*  Calls @c __a.destroy(__p).
   */
   template
+   [[__gnu__::__always_inline__]]
static _GLIBCXX20_CONSTEXPR void
destroy(allocator_type& __a __attribute__((__unused__)), _Up* __p)
noexcept(is_nothrow_destructible<_Up>::value)
@@ -547,6 +552,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
*  @param  __a  An allocator.
*  @return @c __a.max_size()
   */
+  [[__gnu__::__always_inline__]]
   static _GLIBCXX20_CONSTEXPR size_type
   max_size(const allocator_type& __a __attribute__((__unused__))) noexcept
   {
@@ -562,6 +568,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
*  @param  __rhs  An allocator.
*  @return @c __rhs
   */
+  [[__gnu__::__always_inline__]]
   static _GLIBCXX20_CONSTEXPR allocator_type
   select_on_container_copy_construction(const allocator_type& __rhs)
   { return __rhs; }
@@ -633,6 +640,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
*  `std::construct_at(__p, std::forward<_Args>(__args)...)` instead.
   */
   template
+   [[__gnu__::__always_inline__]]
static _GLIBCXX20_CONSTEXPR void
construct(allocator_type&, _Up* __p, _Args&&... __args)
noexcept(std::is_nothrow_constructible<_Up, _Args...>::value)
@@ -646,6 +654,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
*  Invokes the destructor for `*__p`.
   */
   template
+   [[__gnu__::__always_inline__]]
static _GLIBCXX20_CONSTEXPR void
destroy(allocator_type&, _Up* __p)
noexcept(is_nothrow_destructible<_Up>::value)
@@ -660,6 +669,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
*  @param  __rhs  An allocator.
*  @return `__rhs`
   */
+  [[__gnu__::__always_inline__]]
   static _GLIBCXX20_CONSTEXPR allocator_type
   select_on_container_copy_construction(const allocator_type& __rhs)
   { return __rhs; }
@@ -669,22 +679,26 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
   /// @cond undocumented
 #if __cplusplus < 201703L
   template
+[[__gnu__::__always_inline__]]
 inline

[committed] libstdc++: Fix -Wsystem-headers warnings

2022-11-08 Thread Jonathan Wakely via Gcc-patches
Tested x86_64-linux. Pushed to trunk.

-- >8 --

Fix some problems noticed with -Wsystem-headers.

libstdc++-v3/ChangeLog:

* include/bits/stl_tempbuf.h (_Temporary_buffer): Disable
warnings about get_temporary_buffer being deprecated.
* include/ext/functional (mem_fun1, mem_fun1_ref): Disable
warnings about mem_fun1_t, const_mem_fun1_t, mem_fun1_ref_t and
const_mem_fun1_ref_t being deprecated.
* include/std/array (__array_traits): Remove artificial
attributes which give warnings about being ignored.
* include/std/spanstream (basic_spanbuf::setbuf): Add assertion
and adjust to avoid narrowing warning.
* libsupc++/exception_ptr.h [!__cpp_rtti && !__cpp_exceptions]
(make_exception_ptr): Add missing inline specifier.
---
 libstdc++-v3/include/bits/stl_tempbuf.h | 3 +++
 libstdc++-v3/include/ext/functional | 4 ++--
 libstdc++-v3/include/std/array  | 4 ++--
 libstdc++-v3/include/std/spanstream | 3 ++-
 libstdc++-v3/libsupc++/exception_ptr.h  | 2 +-
 5 files changed, 10 insertions(+), 6 deletions(-)

diff --git a/libstdc++-v3/include/bits/stl_tempbuf.h 
b/libstdc++-v3/include/bits/stl_tempbuf.h
index b13aa3b0fcc..f3d4dd73073 100644
--- a/libstdc++-v3/include/bits/stl_tempbuf.h
+++ b/libstdc++-v3/include/bits/stl_tempbuf.h
@@ -257,6 +257,8 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
  __ucr(__first, __last, __seed);
 }
 
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wdeprecated-declarations"
   template
 _Temporary_buffer<_ForwardIterator, _Tp>::
 _Temporary_buffer(_ForwardIterator __seed, size_type __original_len)
@@ -281,6 +283,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
}
}
 }
+#pragma GCC diagnostic pop
 
 _GLIBCXX_END_NAMESPACE_VERSION
 } // namespace
diff --git a/libstdc++-v3/include/ext/functional 
b/libstdc++-v3/include/ext/functional
index 9cf864d9290..a947ee6384d 100644
--- a/libstdc++-v3/include/ext/functional
+++ b/libstdc++-v3/include/ext/functional
@@ -396,8 +396,6 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
 { _M_initialize(161803398u); }
   };
 
-#pragma GCC diagnostic pop
-
   // Mem_fun adaptor helper functions mem_fun1 and mem_fun1_ref,
   // provided for backward compatibility, they are no longer part of
   // the C++ standard.
@@ -422,6 +420,8 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
 mem_fun1_ref(_Ret (_Tp::*__f)(_Arg) const)
 { return std::const_mem_fun1_ref_t<_Ret, _Tp, _Arg>(__f); }
 
+#pragma GCC diagnostic pop
+
 _GLIBCXX_END_NAMESPACE_VERSION
 } // namespace
 
diff --git a/libstdc++-v3/include/std/array b/libstdc++-v3/include/std/array
index 7ba92d0e90d..e26390e6f80 100644
--- a/libstdc++-v3/include/std/array
+++ b/libstdc++-v3/include/std/array
@@ -64,11 +64,11 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
  struct _Type
  {
// Indexing is undefined.
-   __attribute__((__always_inline__,__artificial__,__noreturn__))
+   __attribute__((__always_inline__,__noreturn__))
_Tp& operator[](size_t) const noexcept { __builtin_trap(); }
 
// Conversion to a pointer produces a null pointer.
-   __attribute__((__always_inline__,__artificial__))
+   __attribute__((__always_inline__))
operator _Tp*() const noexcept { return nullptr; }
  };
 
diff --git a/libstdc++-v3/include/std/spanstream 
b/libstdc++-v3/include/std/spanstream
index 6abf013d41b..483996b274f 100644
--- a/libstdc++-v3/include/std/spanstream
+++ b/libstdc++-v3/include/std/spanstream
@@ -136,7 +136,8 @@ template
 basic_streambuf<_CharT, _Traits>*
 setbuf(_CharT* __s, streamsize __n) override
 {
-  span({__s, __n});
+  __glibcxx_assert(__n >= 0);
+  this->span(std::span<_CharT>(__s, __n));
   return this;
 }
 
diff --git a/libstdc++-v3/libsupc++/exception_ptr.h 
b/libstdc++-v3/libsupc++/exception_ptr.h
index fd9ceec88d4..b0118102123 100644
--- a/libstdc++-v3/libsupc++/exception_ptr.h
+++ b/libstdc++-v3/libsupc++/exception_ptr.h
@@ -280,7 +280,7 @@ namespace std _GLIBCXX_VISIBILITY(default)
   // instead of a working one compiled with RTTI and/or exceptions enabled.
   template
 __attribute__ ((__always_inline__))
-exception_ptr
+inline exception_ptr
 make_exception_ptr(_Ex) _GLIBCXX_USE_NOEXCEPT
 { return exception_ptr(); }
 #endif
-- 
2.38.1



[committed] libstdc++: Fix -Wsystem-headers warnings in tests

2022-11-08 Thread Jonathan Wakely via Gcc-patches
Tested x86_64-linux. Pushed to trunk.

-- >8 --

libstdc++-v3/ChangeLog:

* testsuite/18_support/new_nothrow.cc: Add missing noexcept
to operator delete replacements.
* testsuite/20_util/any/cons/92156.cc: Disable
-Winit-list-lifetime warnings from instantiating invalid
specialization of manager function.
* testsuite/20_util/any/modifiers/92156.cc: Likewise.
* testsuite/20_util/default_delete/void_neg.cc: Prune additional
diagnostics.
* testsuite/20_util/headers/memory/synopsis.cc: Add missing
noexcept.
* testsuite/20_util/shared_ptr/cons/void_neg.cc: Prune
additional diagnostic.
* testsuite/20_util/unique_ptr/creation/for_overwrite.cc: Add
missing noexcept to operator delete replacements.
* testsuite/21_strings/basic_string/cons/char/103919.cc:
Likewise.
* testsuite/23_containers/map/modifiers/emplace/92300.cc:
Likewise.
* testsuite/23_containers/map/modifiers/insert/92300.cc:
Likewise.
* testsuite/24_iterators/headers/iterator/range_access_c++11.cc:
Add missing noexcept to synopsis declarations.
* testsuite/24_iterators/headers/iterator/range_access_c++14.cc:
Likewise.
* testsuite/24_iterators/headers/iterator/range_access_c++17.cc:
Likewise.
---
 libstdc++-v3/testsuite/18_support/new_nothrow.cc   | 14 ++
 libstdc++-v3/testsuite/20_util/any/cons/92156.cc   |  1 +
 .../testsuite/20_util/any/modifiers/92156.cc   |  1 +
 .../testsuite/20_util/default_delete/void_neg.cc   |  3 +++
 .../testsuite/20_util/headers/memory/synopsis.cc   |  2 +-
 .../testsuite/20_util/shared_ptr/cons/void_neg.cc  |  2 ++
 .../20_util/unique_ptr/creation/for_overwrite.cc   |  4 ++--
 .../21_strings/basic_string/cons/char/103919.cc|  4 ++--
 .../23_containers/map/modifiers/emplace/92300.cc   |  4 ++--
 .../23_containers/map/modifiers/insert/92300.cc|  4 ++--
 .../headers/iterator/range_access_c++11.cc |  4 ++--
 .../headers/iterator/range_access_c++14.cc | 12 ++--
 .../headers/iterator/range_access_c++17.cc | 12 ++--
 13 files changed, 40 insertions(+), 27 deletions(-)

diff --git a/libstdc++-v3/testsuite/18_support/new_nothrow.cc 
b/libstdc++-v3/testsuite/18_support/new_nothrow.cc
index d5e7eb58782..37806122bd0 100644
--- a/libstdc++-v3/testsuite/18_support/new_nothrow.cc
+++ b/libstdc++-v3/testsuite/18_support/new_nothrow.cc
@@ -64,7 +64,13 @@ void* operator new (size_t n)
 }
 }
 
-void operator delete (void *p)
+#if __cplusplus >= 201103L
+#define NOEXCEPT noexcept
+#else
+#define NOEXCEPT
+#endif
+
+void operator delete (void *p) NOEXCEPT
 {
 ++delete_called;
 if (p)
@@ -77,18 +83,18 @@ void* operator new[] (size_t n)
 return operator new(n);
 }
 
-void operator delete[] (void *p)
+void operator delete[] (void *p) NOEXCEPT
 {
 ++delete_vec_called;
 operator delete(p);
 }
 
 #if __cplusplus >= 201402L
-void operator delete (void *p, std::size_t)
+void operator delete (void *p, std::size_t) noexcept
 {
   ::operator delete(p);
 }
-void operator delete[] (void *p, std::size_t)
+void operator delete[] (void *p, std::size_t) noexcept
 {
   ::operator delete[](p);
 }
diff --git a/libstdc++-v3/testsuite/20_util/any/cons/92156.cc 
b/libstdc++-v3/testsuite/20_util/any/cons/92156.cc
index 71e9dd94090..0e768df9a00 100644
--- a/libstdc++-v3/testsuite/20_util/any/cons/92156.cc
+++ b/libstdc++-v3/testsuite/20_util/any/cons/92156.cc
@@ -1,4 +1,5 @@
 // { dg-do run { target c++17 } }
+// { dg-options "-Wno-init-list-lifetime" }
 
 // Copyright (C) 2020-2022 Free Software Foundation, Inc.
 //
diff --git a/libstdc++-v3/testsuite/20_util/any/modifiers/92156.cc 
b/libstdc++-v3/testsuite/20_util/any/modifiers/92156.cc
index d8f9893667b..b98d0e8e92a 100644
--- a/libstdc++-v3/testsuite/20_util/any/modifiers/92156.cc
+++ b/libstdc++-v3/testsuite/20_util/any/modifiers/92156.cc
@@ -1,4 +1,5 @@
 // { dg-do run { target c++17 } }
+// { dg-options "-Wno-init-list-lifetime" }
 
 // Copyright (C) 2020-2022 Free Software Foundation, Inc.
 //
diff --git a/libstdc++-v3/testsuite/20_util/default_delete/void_neg.cc 
b/libstdc++-v3/testsuite/20_util/default_delete/void_neg.cc
index f6aefc0a7ff..04042c2d745 100644
--- a/libstdc++-v3/testsuite/20_util/default_delete/void_neg.cc
+++ b/libstdc++-v3/testsuite/20_util/default_delete/void_neg.cc
@@ -27,3 +27,6 @@ void test01()
   d(nullptr);   // { dg-error "here" }
   // { dg-error "delete pointer to incomplete type" "" { target *-*-* } 0 }
 }
+
+// { dg-prune-output "invalid application of 'sizeof' to a void type" }
+// { dg-prune-output "deleting 'void*' is undefined" }
diff --git a/libstdc++-v3/testsuite/20_util/headers/memory/synopsis.cc 
b/libstdc++-v3/testsuite/20_util/headers/memory/synopsis.cc
index 15437c72ee0..b14c4278cd3 100644
--- a/libstdc++-v3/testsuite/20_util/headers/memory/synopsis.cc
+++ b/libstdc++-v3/testsuite/

[PATCH 1/3] Compute a table of DWARF register sizes at compile

2022-11-08 Thread Florian Weimer via Gcc-patches
The sizes are compile-time constants.  Create a vector with them,
so that they can be inspected at compile time.

* gcc/dwarf2cfi.cc (init_return_column_size): Remove.
(init_one_dwarf_reg_size): Adjust.
(generate_dwarf_reg_sizes): New function.  Extracted
from expand_builtin_init_dwarf_reg_sizes.
(expand_builtin_init_dwarf_reg_sizes): Call
generate_dwarf_reg_sizes.
* gcc/target.def (init_dwarf_reg_sizes_extra): Adjust
hook signature.
* gcc/config/msp430/msp430.cc
(msp430_init_dwarf_reg_sizes_extra): Adjust.
* gcc/config/rs6000.cc (rs6000_init_dwarf_reg_sizes_extra):
Likewise.
* gcc/doc/tm.texi: Update.
---
 gcc/config/msp430/msp430.cc | 11 +
 gcc/config/rs6000/rs6000.cc | 14 +-
 gcc/doc/tm.texi |  7 +--
 gcc/dwarf2cfi.cc| 93 ++---
 gcc/target.def  |  8 ++--
 5 files changed, 58 insertions(+), 75 deletions(-)

diff --git a/gcc/config/msp430/msp430.cc b/gcc/config/msp430/msp430.cc
index 6c15780a2b6..dbea8d7f50f 100644
--- a/gcc/config/msp430/msp430.cc
+++ b/gcc/config/msp430/msp430.cc
@@ -3202,11 +3202,9 @@ msp430_expand_eh_return (rtx eh_handler)
 #undef  TARGET_INIT_DWARF_REG_SIZES_EXTRA
 #define TARGET_INIT_DWARF_REG_SIZES_EXTRA msp430_init_dwarf_reg_sizes_extra
 void
-msp430_init_dwarf_reg_sizes_extra (tree address)
+msp430_init_dwarf_reg_sizes_extra (poly_uint16 *sizes)
 {
   int i;
-  rtx addr = expand_normal (address);
-  rtx mem = gen_rtx_MEM (BLKmode, addr);
 
   /* This needs to match msp430_unwind_word_mode (above).  */
   if (!msp430x)
@@ -3218,12 +3216,7 @@ msp430_init_dwarf_reg_sizes_extra (tree address)
   unsigned int rnum = DWARF2_FRAME_REG_OUT (dnum, 1);
 
   if (rnum < DWARF_FRAME_REGISTERS)
-   {
- HOST_WIDE_INT offset = rnum * GET_MODE_SIZE (QImode);
-
- emit_move_insn (adjust_address (mem, QImode, offset),
- gen_int_mode (4, QImode));
-   }
+   sizes[rnum] = 4;
 }
 }
 
diff --git a/gcc/config/rs6000/rs6000.cc b/gcc/config/rs6000/rs6000.cc
index a85d7630b41..fddb6a8a0f7 100644
--- a/gcc/config/rs6000/rs6000.cc
+++ b/gcc/config/rs6000/rs6000.cc
@@ -23783,27 +23783,17 @@ rs6000_initial_elimination_offset (int from, int to)
 /* Fill in sizes of registers used by unwinder.  */
 
 static void
-rs6000_init_dwarf_reg_sizes_extra (tree address)
+rs6000_init_dwarf_reg_sizes_extra (poly_uint16 *sizes)
 {
   if (TARGET_MACHO && ! TARGET_ALTIVEC)
 {
   int i;
-  machine_mode mode = TYPE_MODE (char_type_node);
-  rtx addr = expand_expr (address, NULL_RTX, VOIDmode, EXPAND_NORMAL);
-  rtx mem = gen_rtx_MEM (BLKmode, addr);
-  rtx value = gen_int_mode (16, mode);
 
   /* On Darwin, libgcc may be built to run on both G3 and G4/5.
 The unwinder still needs to know the size of Altivec registers.  */
 
   for (i = FIRST_ALTIVEC_REGNO; i < LAST_ALTIVEC_REGNO+1; i++)
-   {
- int column = DWARF_REG_TO_UNWIND_COLUMN
-   (DWARF2_FRAME_REG_OUT (DWARF_FRAME_REGNUM (i), true));
- HOST_WIDE_INT offset = column * GET_MODE_SIZE (mode);
-
- emit_move_insn (adjust_address (mem, mode, offset), value);
-   }
+   sizes[i] = 16;
 }
 }
 
diff --git a/gcc/doc/tm.texi b/gcc/doc/tm.texi
index 8572313b308..09a3ab3e55c 100644
--- a/gcc/doc/tm.texi
+++ b/gcc/doc/tm.texi
@@ -9824,13 +9824,14 @@ used to return a smaller mode than the raw mode to 
prevent call
 clobbered parts of a register altering the frame register size
 @end deftypefn
 
-@deftypefn {Target Hook} void TARGET_INIT_DWARF_REG_SIZES_EXTRA (tree 
@var{address})
+@deftypefn {Target Hook} void TARGET_INIT_DWARF_REG_SIZES_EXTRA (poly_uint16 
*@var{sizes})
 If some registers are represented in Dwarf-2 unwind information in
 multiple pieces, define this hook to fill in information about the
 sizes of those pieces in the table used by the unwinder at runtime.
-It will be called by @code{expand_builtin_init_dwarf_reg_sizes} after
+It will be called by @code{generate_dwarf_reg_sizes} after
 filling in a single size corresponding to each hard register;
-@var{address} is the address of the table.
+@var{sizes} is the address of the table.  It will contain
+@code{DWARF_FRAME_REGISTERS} elements when this hook is called.
 @end deftypefn
 
 @deftypefn {Target Hook} bool TARGET_ASM_TTYPE (rtx @var{sym})
diff --git a/gcc/dwarf2cfi.cc b/gcc/dwarf2cfi.cc
index bef3165e691..b29173b2156 100644
--- a/gcc/dwarf2cfi.cc
+++ b/gcc/dwarf2cfi.cc
@@ -36,7 +36,7 @@ along with GCC; see the file COPYING3.  If not see
 
 #include "except.h"/* expand_builtin_dwarf_sp_column */
 #include "profile-count.h" /* For expr.h */
-#include "expr.h"  /* init_return_column_size */
+#include "expr.h"  /* expand_normal, emit_move_insn */
 #include "output.h"/* asm_out_file */
 #include "debug.h" /* dwarf2out_do_frame, dw

[PATCH 0/3] Further libgcc unwinder improvements

2022-11-08 Thread Florian Weimer via Gcc-patches
This series makes some further unwinder improvements.  Unfortunately,
not many targets define __LIBGCC_DWARF_REG_SIZES_CONSTANT__; x86-64
does, and it makes uw_install_context_1 quite a bit faster because GCC
no longer has to emit generic memcpy code for it.  In general, it may be
worthwhile to replace this code with target-specific implementations.

Tested on powerpc64le-linux-gnu, x86_64-linux-gnu; I didn't see any test
result differences.  Built GCC for msp430-elf, too.

The revision for the patch I posted earlier (using SWAR techniques for
get_cie_encoding) is not ready yet and probably won't make GCC 13.  It
requires some header cleanups first.

Thanks,
Florian

Florian Weimer (3):
  Compute a table of DWARF register sizes at compile
  Define __LIBGCC_DWARF_REG_SIZES_CONSTANT__ if DWARF register size is
constant
  libgcc: Specialize execute_cfa_program in DWARF unwinder for
alignments

 gcc/c-family/c-cppbuiltin.cc|   8 +
 gcc/config/msp430/msp430.cc |  11 +-
 gcc/config/rs6000/rs6000.cc |  14 +-
 gcc/debug.h |   2 +
 gcc/doc/tm.texi |   7 +-
 gcc/dwarf2cfi.cc| 116 +-
 gcc/target.def  |   8 +-
 libgcc/unwind-dw2-execute_cfa.h | 322 
 libgcc/unwind-dw2.c | 360 ++--
 9 files changed, 472 insertions(+), 376 deletions(-)
 create mode 100644 libgcc/unwind-dw2-execute_cfa.h


base-commit: 5d060d8b0477ff4911f41c816281daaa24b41a13
-- 
2.38.1



[PATCH 3/3] libgcc: Specialize execute_cfa_program in DWARF unwinder for alignments

2022-11-08 Thread Florian Weimer via Gcc-patches
The parameters fs->data_align and fs->code_align always have fixed
values for a particular target in GCC-generated code.  Specialize
execute_cfa_program for these values, to avoid multiplications.

gcc/

* c-family/c-cppbuiltin.c (c_cpp_builtins): Define
__LIBGCC_DWARF_CIE_DATA_ALIGNMENT__.

libgcc/

* unwind-dw2-execute_cfa.h: New file.  Extracted from
the execute_cfa_program function in unwind-dw2.c.
* unwind-dw2.c (execute_cfa_program_generic): New function.
(execute_cfa_program_specialized): Likewise.
(execute_cfa_program): Call execute_cfa_program_specialized
or execute_cfa_program_generic, as appropriate.
---
 gcc/c-family/c-cppbuiltin.cc|   2 +
 libgcc/unwind-dw2-execute_cfa.h | 322 
 libgcc/unwind-dw2.c | 319 +++
 3 files changed, 354 insertions(+), 289 deletions(-)
 create mode 100644 libgcc/unwind-dw2-execute_cfa.h

diff --git a/gcc/c-family/c-cppbuiltin.cc b/gcc/c-family/c-cppbuiltin.cc
index ab98bf3b059..c8c327b3b2e 100644
--- a/gcc/c-family/c-cppbuiltin.cc
+++ b/gcc/c-family/c-cppbuiltin.cc
@@ -1521,6 +1521,8 @@ c_cpp_builtins (cpp_reader *pfile)
  builtin_define_with_int_value ("__LIBGCC_DWARF_REG_SIZES_CONSTANT__",
 value);
   }
+  builtin_define_with_int_value ("__LIBGCC_DWARF_CIE_DATA_ALIGNMENT__",
+DWARF_CIE_DATA_ALIGNMENT);
 #ifdef EH_RETURN_STACKADJ_RTX
   cpp_define (pfile, "__LIBGCC_EH_RETURN_STACKADJ_RTX__");
 #endif
diff --git a/libgcc/unwind-dw2-execute_cfa.h b/libgcc/unwind-dw2-execute_cfa.h
new file mode 100644
index 000..dd97b786668
--- /dev/null
+++ b/libgcc/unwind-dw2-execute_cfa.h
@@ -0,0 +1,322 @@
+/* DWARF2 exception handling CFA execution engine.
+   Copyright (C) 1997-2022 Free Software Foundation, Inc.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3, or (at your option)
+   any later version.
+
+   GCC is distributed in the hope that it will be useful, but WITHOUT
+   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+   License for more details.
+
+   Under Section 7 of GPL version 3, you are granted additional
+   permissions described in the GCC Runtime Library Exception, version
+   3.1, as published by the Free Software Foundation.
+
+   You should have received a copy of the GNU General Public License and
+   a copy of the GCC Runtime Library Exception along with this program;
+   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+   .  */
+
+/* This file is included from unwind-dw2.c to specialize the code for certain
+   values of DATA_ALIGN and CODE_ALIGN.  These macros must be defined prior to
+   including this file.  */
+
+{
+  struct frame_state_reg_info *unused_rs = NULL;
+
+  /* Don't allow remember/restore between CIE and FDE programs.  */
+  fs->regs.prev = NULL;
+
+  /* The comparison with the return address uses < rather than <= because
+ we are only interested in the effects of code before the call; for a
+ noreturn function, the return address may point to unrelated code with
+ a different stack configuration that we are not interested in.  We
+ assume that the call itself is unwind info-neutral; if not, or if
+ there are delay instructions that adjust the stack, these must be
+ reflected at the point immediately before the call insn.
+ In signal frames, return address is after last completed instruction,
+ so we add 1 to return address to make the comparison <=.  */
+  while (insn_ptr < insn_end
+&& fs->pc < context->ra + _Unwind_IsSignalFrame (context))
+{
+  unsigned char insn = *insn_ptr++;
+  _uleb128_t reg, utmp;
+  _sleb128_t offset, stmp;
+
+  if ((insn & 0xc0) == DW_CFA_advance_loc)
+   fs->pc += (insn & 0x3f) * CODE_ALIGN;
+  else if ((insn & 0xc0) == DW_CFA_offset)
+   {
+ reg = insn & 0x3f;
+ insn_ptr = read_uleb128 (insn_ptr, &utmp);
+ offset = (_Unwind_Sword) utmp * DATA_ALIGN;
+ reg = DWARF_REG_TO_UNWIND_COLUMN (reg);
+ if (UNWIND_COLUMN_IN_RANGE (reg))
+   {
+ fs->regs.how[reg] = REG_SAVED_OFFSET;
+ fs->regs.reg[reg].loc.offset = offset;
+   }
+   }
+  else if ((insn & 0xc0) == DW_CFA_restore)
+   {
+ reg = insn & 0x3f;
+ reg = DWARF_REG_TO_UNWIND_COLUMN (reg);
+ if (UNWIND_COLUMN_IN_RANGE (reg))
+   fs->regs.how[reg] = REG_UNSAVED;
+   }
+  else switch (insn)
+   {
+   case DW_CFA_set_loc:
+ {
+   _Unwind_Ptr pc;
+
+   insn_ptr = read_encoded_value (context, 

[PATCH 2/3] Define __LIBGCC_DWARF_REG_SIZES_CONSTANT__ if DWARF register size is constant

2022-11-08 Thread Florian Weimer via Gcc-patches
And use that to speed up the libgcc unwinder.

* gcc/debug.h (dwarf_reg_sizes_constant): Declare.
* gcc/dwarf2cfi.cc (dwarf_reg_sizes_constant): New function.
* gcc/c-family/c-cppbuiltin.c
(__LIBGCC_DWARF_REG_SIZES_CONSTANT__): Define if constant is
known.

libgcc/

* unwind-dw2.c (dwarf_reg_size): New function.
(_Unwind_GetGR, _Unwind_SetGR, _Unwind_SetGRPtr)
(_Unwind_SetSpColumn, uw_install_context_1): Use it.
(uw_init_context_1): Do not initialize dwarf_reg_size_table
if not in use.
---
 gcc/c-family/c-cppbuiltin.cc |  6 ++
 gcc/debug.h  |  2 ++
 gcc/dwarf2cfi.cc | 23 
 libgcc/unwind-dw2.c  | 41 +---
 4 files changed, 60 insertions(+), 12 deletions(-)

diff --git a/gcc/c-family/c-cppbuiltin.cc b/gcc/c-family/c-cppbuiltin.cc
index cdb658f6ac9..ab98bf3b059 100644
--- a/gcc/c-family/c-cppbuiltin.cc
+++ b/gcc/c-family/c-cppbuiltin.cc
@@ -1515,6 +1515,12 @@ c_cpp_builtins (cpp_reader *pfile)
 #endif
   builtin_define_with_int_value ("__LIBGCC_DWARF_FRAME_REGISTERS__",
 DWARF_FRAME_REGISTERS);
+  {
+   int value = dwarf_reg_sizes_constant ();
+   if (value > 0)
+ builtin_define_with_int_value ("__LIBGCC_DWARF_REG_SIZES_CONSTANT__",
+value);
+  }
 #ifdef EH_RETURN_STACKADJ_RTX
   cpp_define (pfile, "__LIBGCC_EH_RETURN_STACKADJ_RTX__");
 #endif
diff --git a/gcc/debug.h b/gcc/debug.h
index fe85115d5f3..6bcc8da1f76 100644
--- a/gcc/debug.h
+++ b/gcc/debug.h
@@ -245,6 +245,8 @@ extern const struct gcc_debug_hooks vmsdbg_debug_hooks;
 
 /* Dwarf2 frame information.  */
 
+extern int dwarf_reg_sizes_constant ();
+
 extern void dwarf2out_begin_prologue (unsigned int, unsigned int,
  const char *);
 extern void dwarf2out_vms_end_prologue (unsigned int, const char *);
diff --git a/gcc/dwarf2cfi.cc b/gcc/dwarf2cfi.cc
index b29173b2156..d45d20478b4 100644
--- a/gcc/dwarf2cfi.cc
+++ b/gcc/dwarf2cfi.cc
@@ -334,6 +334,29 @@ generate_dwarf_reg_sizes (poly_uint16 *sizes)
 targetm.init_dwarf_reg_sizes_extra (sizes);
 }
 
+/* Return 0 if the DWARF register sizes are not constant, otherwise
+   return the size constant.  */
+
+int
+dwarf_reg_sizes_constant ()
+{
+  poly_uint16 *sizes = XALLOCAVEC (poly_uint16, DWARF_FRAME_REGISTERS);
+  generate_dwarf_reg_sizes (sizes);
+
+  int result;
+  for (unsigned int i = 0; i < DWARF_FRAME_REGISTERS; i++)
+{
+  unsigned short value;
+  if (!sizes[i].is_constant (&value))
+   return 0;
+  if (i == 0)
+   result = value;
+  else if (result != value)
+   return 0;
+}
+  return result;
+}
+
 /* Generate code to initialize the dwarf register size table located
at the provided ADDRESS.  */
 
diff --git a/libgcc/unwind-dw2.c b/libgcc/unwind-dw2.c
index eaceace2029..c370121bb29 100644
--- a/libgcc/unwind-dw2.c
+++ b/libgcc/unwind-dw2.c
@@ -148,9 +148,25 @@ struct _Unwind_Context
   char by_value[__LIBGCC_DWARF_FRAME_REGISTERS__+1];
 };
 
+#ifdef __LIBGCC_DWARF_REG_SIZES_CONSTANT__
+static inline unsigned char
+dwarf_reg_size (int index __attribute__ ((__unused__)))
+{
+  return __LIBGCC_DWARF_REG_SIZES_CONSTANT__;
+}
+#else
 /* Byte size of every register managed by these routines.  */
 static unsigned char dwarf_reg_size_table[__LIBGCC_DWARF_FRAME_REGISTERS__+1];
 
+
+static inline unsigned char
+dwarf_reg_size (unsigned index)
+{
+  gcc_assert (index < sizeof (dwarf_reg_size_table));
+  return dwarf_reg_size_table[index];
+}
+#endif
+
 
 /* Read unaligned data from the instruction buffer.  */
 
@@ -232,8 +248,7 @@ _Unwind_GetGR (struct _Unwind_Context *context, int regno)
 #endif
 
   index = DWARF_REG_TO_UNWIND_COLUMN (regno);
-  gcc_assert (index < (int) sizeof(dwarf_reg_size_table));
-  size = dwarf_reg_size_table[index];
+  size = dwarf_reg_size (index);
   val = context->reg[index];
 
   if (_Unwind_IsExtendedContext (context) && context->by_value[index])
@@ -280,8 +295,7 @@ _Unwind_SetGR (struct _Unwind_Context *context, int index, 
_Unwind_Word val)
   void *ptr;
 
   index = DWARF_REG_TO_UNWIND_COLUMN (index);
-  gcc_assert (index < (int) sizeof(dwarf_reg_size_table));
-  size = dwarf_reg_size_table[index];
+  size = dwarf_reg_size (index);
 
   if (_Unwind_IsExtendedContext (context) && context->by_value[index])
 {
@@ -329,9 +343,8 @@ _Unwind_SetGRValue (struct _Unwind_Context *context, int 
index,
_Unwind_Word val)
 {
   index = DWARF_REG_TO_UNWIND_COLUMN (index);
-  gcc_assert (index < (int) sizeof(dwarf_reg_size_table));
   /* Return column size may be smaller than _Unwind_Context_Reg_Val.  */
-  gcc_assert (dwarf_reg_size_table[index] <= sizeof (_Unwind_Context_Reg_Val));
+  gcc_assert (dwarf_reg_size (index) <= sizeof (_Unwind_Context_Reg_Val));
 
   context->by_value[index] = 1;
   context->reg[index] 

  1   2   >