[PATCH] Revert parts of ADDR_EXPR/CONSTRUCTOR treatment change in match.pd

2022-12-12 Thread Richard Biener via Gcc-patches
This reverts the part that substitutes from the definition of an
SSA name to the capture, thus ADDR_EXPR@0 eventually yielding
&y_1->a[i_2] instead of _3.  That's because I didn't think of
how to deal with substituting @0 in the result pattern.  So
the following re-instantiates the SSA def CONSTRUCTOR handling
and in the ADDR_EXPR helpers used by match.pd handles SSA names
defined to ADDR_EXPRs transparently.

Bootstrapped and tested on x86_64-unknown-linux-gnu, pushed.

* genmatch.cc (dt_simplify::gen): Revert last change.
* match.pd: Revert simplification of CONSTUCTOR leaf handling.
(&x cmp SSA_NAME): Handle ADDR_EXPR in SSA defs.
* fold-const.cc (split_address_to_core_and_offset): Handle
ADDR_EXPRs in SSA defs.
(address_compare): Likewise.
---
 gcc/fold-const.cc |  9 +
 gcc/genmatch.cc   | 20 
 gcc/match.pd  | 22 +++---
 3 files changed, 28 insertions(+), 23 deletions(-)

diff --git a/gcc/fold-const.cc b/gcc/fold-const.cc
index b72cc0a1d51..d81289ec9ae 100644
--- a/gcc/fold-const.cc
+++ b/gcc/fold-const.cc
@@ -16317,6 +16317,11 @@ split_address_to_core_and_offset (tree exp,
   poly_int64 bitsize;
   location_t loc = EXPR_LOCATION (exp);
 
+  if (TREE_CODE (exp) == SSA_NAME)
+if (gassign *def = dyn_cast  (SSA_NAME_DEF_STMT (exp)))
+  if (gimple_assign_rhs_code (def) == ADDR_EXPR)
+   exp = gimple_assign_rhs1 (def);
+
   if (TREE_CODE (exp) == ADDR_EXPR)
 {
   core = get_inner_reference (TREE_OPERAND (exp, 0), &bitsize, pbitpos,
@@ -16601,6 +16606,10 @@ address_compare (tree_code code, tree type, tree op0, 
tree op1,
 tree &base0, tree &base1, poly_int64 &off0, poly_int64 &off1,
 bool generic)
 {
+  if (TREE_CODE (op0) == SSA_NAME)
+op0 = gimple_assign_rhs1 (SSA_NAME_DEF_STMT (op0));
+  if (TREE_CODE (op1) == SSA_NAME)
+op1 = gimple_assign_rhs1 (SSA_NAME_DEF_STMT (op1));
   gcc_checking_assert (TREE_CODE (op0) == ADDR_EXPR);
   gcc_checking_assert (TREE_CODE (op1) == ADDR_EXPR);
   base0 = get_addr_base_and_unit_offset (TREE_OPERAND (op0, 0), &off0);
diff --git a/gcc/genmatch.cc b/gcc/genmatch.cc
index f53e61a4a8d..b56c17711e0 100644
--- a/gcc/genmatch.cc
+++ b/gcc/genmatch.cc
@@ -3607,26 +3607,14 @@ dt_simplify::gen (FILE *f, int indent, bool gimple, int 
depth ATTRIBUTE_UNUSED)
   if (s->capture_max >= 0)
 {
   char opname[20];
-  fprintf_indent (f, indent, "tree captures[%u] ATTRIBUTE_UNUSED = {",
- s->capture_max + 1);
+  fprintf_indent (f, indent, "tree captures[%u] ATTRIBUTE_UNUSED = { %s",
+ s->capture_max + 1, indexes[0]->get_name (opname));
 
-  for (int i = 0; i <= s->capture_max; ++i)
+  for (int i = 1; i <= s->capture_max; ++i)
{
  if (!indexes[i])
break;
- const char *opstr = indexes[i]->get_name (opname);
- expr *e = dyn_cast  (indexes[i]->op);
- fputs (i == 0 ? " " : ", ", f);
- if (e && gimple
- /* Transparently handle picking up CONSTRUCTOR and ADDR_EXPR
-leafs if they appear in a separate definition.  */
- && (*e->operation == CONSTRUCTOR
- || *e->operation == ADDR_EXPR))
-   fprintf (f, "(TREE_CODE (%s) == SSA_NAME "
-"? gimple_assign_rhs1 (SSA_NAME_DEF_STMT (%s)) : %s)",
-opstr, opstr, opstr);
- else
-   fprintf (f, "%s", opstr);
+ fprintf (f, ", %s", indexes[i]->get_name (opname));
}
   fprintf (f, " };\n");
 }
diff --git a/gcc/match.pd b/gcc/match.pd
index 3517605c04d..fdba5833beb 100644
--- a/gcc/match.pd
+++ b/gcc/match.pd
@@ -3936,7 +3936,8 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
 (simplify
   (view_convert CONSTRUCTOR@0)
   (with
-   { tree ctor = @0; }
+   { tree ctor = (TREE_CODE (@0) == SSA_NAME
+ ? gimple_assign_rhs1 (SSA_NAME_DEF_STMT (@0)) : @0); }
(switch
 (if (CONSTRUCTOR_NELTS (ctor) == 0)
  { build_zero_cst (type); })
@@ -5735,19 +5736,23 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
   /* SSA names are canonicalized to 2nd place.  */
   (cmp addr@0 SSA_NAME@1)
   (with
-   { poly_int64 off; tree base; }
+   {
+ poly_int64 off; tree base;
+ tree addr = (TREE_CODE (@0) == SSA_NAME
+ ? gimple_assign_rhs1 (SSA_NAME_DEF_STMT (@0)) : @0);
+   }
/* A local variable can never be pointed to by
   the default SSA name of an incoming parameter.  */
(if (SSA_NAME_IS_DEFAULT_DEF (@1)
&& TREE_CODE (SSA_NAME_VAR (@1)) == PARM_DECL
-   && (base = get_base_address (TREE_OPERAND (@0, 0)))
+   && (base = get_base_address (TREE_OPERAND (addr, 0)))
&& TREE_CODE (base) == VAR_DECL
&& auto_var_in_fn_p (base, current_function_decl))
 (if (cmp == NE_EXPR)
  { constant_boolean_node (true, type); }
  { constant_boolean_node (false, type); })
 /* If the address is based on @1

Re: [PATCH v4 1/19] modula2 front end: changes outside gcc/m2, libgm2 and gcc/testsuite.

2022-12-12 Thread Gaius Mulley via Gcc-patches
Richard Biener  writes:

> On Sat, Dec 10, 2022 at 1:49 AM Gaius Mulley via Gcc-patches
>  wrote:
>>
>>
>> While writing the ChangeLog entries git gcc-verify spotted an oversight
>> with v3 of this patch set.  I had forgotten to post gm2.texi and also a
>> tiny patchlet in gcc/configure.ac (to detect Python).  HAVE_PYTHON is
>> used within gcc/m2/Make-lang.in to avoid generating the library section
>> included by gm2.texi should Python not be available.
>
> The configure check doesn't verify the python version.

indeed - I'll fix this

>  Does --enable-generated-files-in-srcdir (used when building release
> tarballs) generate the required section so it's packed up in the
> release tarball and avoid requiring python3 when building from the
> release tarball?

currently working on it - no it fails - should be done rsn.

> Note these kind of dependences should be mentioned in
> install.texi - for python also listing required modules would be nice.

I'll add these.

> Thanks,
> Richard.
>
>> ok to commit?  I've included gm2-lang.cc and lang.opt for reference.
>
> The python script would have been more informative, the generated
> .texi fragment and the Make-lang.in part as well

ah yes this is true, sorry.

regards,
Gaius


Re: [PATCH 1/3] Rework 128-bit complex multiply and divide, PR target/107299

2022-12-12 Thread Kewen.Lin via Gcc-patches
on 2022/12/9 06:04, Michael Meissner wrote:
> On Wed, Dec 07, 2022 at 03:55:41PM +0800, Kewen.Lin wrote:
>> Hi Mike,
>>
>> on 2022/12/7 14:44, Michael Meissner wrote:
>>> On Tue, Dec 06, 2022 at 05:36:54PM +0800, Kewen.Lin wrote:
 Hi Mike,

 Thanks for fixing this!

 Could you help to elaborate why we need to disable it during libgcc 
 building?
>>>
>>> When you are building libgcc, you are building the __mulkc3, __divkc3
>>> functions.  The mapping in the compiler interferes with those functions,
>>> because at the moment, libgcc uses an alternate IEEE 128-bit type.
>>>
>>
>> But I'm still confused.  For __mulkc3 (__divkc3 is similar),
>>
>> 1) with -mabi=ieeelongdouble (TARGET_IEEEQUAD true, define 
>> __LONG_DOUBLE_IEEE128__),
>>the used types are:
>>
>>typedef float TFtype __attribute__ ((mode (TF)));
>>typedef __complex float TCtype __attribute__ ((mode (TC)));
>>
>> 2) with -mabi=ibmlongdouble (TARGET_IEEEQUAD false, not 
>> __LONG_DOUBLE_IEEE128__ defined),
>>the used types are:
>>
>>typedef float TFtype __attribute__ ((mode (KF)));
>>typedef __complex float TCtype __attribute__ ((mode (KC)));
>>
>> The proposed mapping in the current patch is:
>>
>> +
>> +  if (id == complex_multiply_builtin_code (KCmode))
>> +newname = "__mulkc3";
>> +
>> +  else if (id == complex_multiply_builtin_code (ICmode))
>> +newname = "__multc3";
>> +
>> +  else if (id == complex_multiply_builtin_code (TCmode))
>> +newname = (TARGET_IEEEQUAD) ? "__mulkc3" : "__multc3";
>>
>> for 1), TCmode && TARGET_IEEEQUAD => "__mulkc3"
>> for 2), KCmode => "__mulkc3"
>>
>> Both should be still with name "__mulkc3", do I miss anything?
>>
>> BR,
>> Kewen
> 
> The reason is due to the different internal types, the value range propigation
> pass throws an error when we are trying to build libgcc.  This is due to the
> underlying problem of different IEEE 128-bit types within the compiler.
> 

But this is the reason why we need patch #2 and #3, not the reason why we need
the special handling for building_libgcc in patch #1, right?

Without or with patch #1, the below ICE in libgcc exists, the ICE should have
nothing to do with the special handling for building_libgcc in patch #1.  I
think patch #2 which makes _Float128 and __float128 use the same internal
type fixes that ICE.

I still don't get the point why we need the special handling for 
building_libgcc,
I also tested on top of patch #1 and #2 w/ and w/o the special handling for
building_libgcc, both bootstrapped and regress-tested.

Could you have a double check?

> The 128-bit IEEE support in libgcc was written before _Float128 was added to
> GCC.  One consequence is that you can't get to the complex variant of
> __float128.  So libgcc needs to use the attribute mode to get to that type.
> 
> But with the support for IEEE 128-bit long double changing things, it makes 
> the
> libgcc code use the wrong code.
> 
> /home/meissner/fsf-src/work102/libgcc/config/rs6000/_mulkc3.c: In function 
> ‘__mulkc3_sw’:
> /home/meissner/fsf-src/work102/libgcc/config/rs6000/_mulkc3.c:97:1: internal 
> compiler error: in fold_stmt, at gimple-range-fold.cc:522
>97 | }
>   | ^
> 0x122784f3 fold_using_range::fold_stmt(vrange&, gimple*, fur_source&, 
> tree_node*)
> /home/meissner/fsf-src/work102/gcc/gimple-range-fold.cc:522
> 0x1226477f gimple_ranger::fold_range_internal(vrange&, gimple*, tree_node*)
> /home/meissner/fsf-src/work102/gcc/gimple-range.cc:257
> 0x12264b1f gimple_ranger::range_of_stmt(vrange&, gimple*, tree_node*)
> /home/meissner/fsf-src/work102/gcc/gimple-range.cc:318
> 0x113bdd8b range_query::value_of_stmt(gimple*, tree_node*)
> /home/meissner/fsf-src/work102/gcc/value-query.cc:134
> 0x1134838f rvrp_folder::value_of_stmt(gimple*, tree_node*)
> /home/meissner/fsf-src/work102/gcc/tree-vrp.cc:1023
> 0x111344cf 
> substitute_and_fold_dom_walker::before_dom_children(basic_block_def*)
> /home/meissner/fsf-src/work102/gcc/tree-ssa-propagate.cc:819
> 0x121ecbd3 dom_walker::walk(basic_block_def*)
> /home/meissner/fsf-src/work102/gcc/domwalk.cc:311
> 0x11134ee7 substitute_and_fold_engine::substitute_and_fold(basic_block_def*)
> /home/meissner/fsf-src/work102/gcc/tree-ssa-propagate.cc:998
> 0x11346bb7 execute_ranger_vrp(function*, bool, bool)
> /home/meissner/fsf-src/work102/gcc/tree-vrp.cc:1084
> 0x11347063 execute
> /home/meissner/fsf-src/work102/gcc/tree-vrp.cc:1165
> Please submit a full bug report, with preprocessed source (by using 
> -freport-bug).
> Please include the complete backtrace with any bug report.
> See  for instructions.
> make[1]: *** [/home/meissner/fsf-src/work102/libgcc/shared-object.mk:14: 
> _mulkc3.o] Error 1
> make[1]: Leaving directory 
> '/home/meissner/fsf-build-ppc64le/work102/powerpc64le-unknown-linux-gnu/libgcc'
> make: *** [Makefile:20623: all-target-libgcc] Error 2
> 
>>> I have a pa

Re: [PATCH] tree-optimization/99919 - bogus uninit diagnostic with bitfield guards

2022-12-12 Thread Richard Biener via Gcc-patches
On Thu, Dec 8, 2022 at 12:07 PM Richard Biener via Fortran
 wrote:
>
> For the testcase in this PR what fold-const.cc optimize_bit_field_compare
> does to bitfield against constant compares is confusing the uninit
> predicate analysis and it also makes SRA obfuscate instead of optimize
> the code.  We've long had the opinion that those optimizations are
> premature but we do not have any replacement for the more complicated
> ones combining multiple bitfield tests.  The following disables mangling
> the case of a single bitfield test against constants but preserving
> the existing diagnostic and optimization to a compile-time determined
> value.
>
> This requires silencing a bogus uninit diagnostic in the Fortran
> frontend which I've done in a minimal way, avoiding initializing
> the 40 byte symbol_attribute structure.  There's several issues,
> one is the flag_coarrays is a global variable likely not CSEd
> to help the uninit predicate analysis, the other is us short-circuiting
> the flag_coarray == GFC_FCOARRAY_LIB && lhs_caf_attr.codimension
> accesses as both have no side-effects so the guard isn't effective.
> If the frontend folks are happy with this I can localize both
> lhs_caf_attr and rhs_caf_attr and copy out the two only flags
> tested by the code instead of the somewhat incomplete approach in
> the patch.  Any opinions here?
>
> Bootstrapped on x86_64-unknown-linux-gnu, testing in progress.

Testing revealed this regresses

FAIL: c-c++-common/fold-masked-cmp-1.c  -Wc++-compat
scan-assembler-times testn?b 2
FAIL: c-c++-common/fold-masked-cmp-2.c  -Wc++-compat
scan-assembler-times testn?b 2

I filed PR108070 for the failure to optimize this on the RTL level
and put this change on hold :/

Richard.

> OK for the fortran parts?
>
> Thanks,
> Richard.
>
> PR tree-optimization/99919
> * fold-const.cc (optimize_bit_field_compare): Disable
> transforming the bitfield against constant compare optimization
> if the result is not statically determinable.
>
> gcc/fortran/
> * trans-expr.cc (gfc_trans_assignment_1): Split out
> lhs_codimension from lhs_caf_attr to avoid bogus uninit
> diagnostics.
>
> * gcc.dg/uninit-pr99919.c: New testcase.
> ---
>  gcc/fold-const.cc | 37 +++
>  gcc/fortran/trans-expr.cc |  6 +++--
>  gcc/testsuite/gcc.dg/uninit-pr99919.c | 22 
>  3 files changed, 30 insertions(+), 35 deletions(-)
>  create mode 100644 gcc/testsuite/gcc.dg/uninit-pr99919.c
>
> diff --git a/gcc/fold-const.cc b/gcc/fold-const.cc
> index cdfe3f50ae3..b72cc0a1d51 100644
> --- a/gcc/fold-const.cc
> +++ b/gcc/fold-const.cc
> @@ -4559,7 +4559,6 @@ optimize_bit_field_compare (location_t loc, enum 
> tree_code code,
>  {
>poly_int64 plbitpos, plbitsize, rbitpos, rbitsize;
>HOST_WIDE_INT lbitpos, lbitsize, nbitpos, nbitsize;
> -  tree type = TREE_TYPE (lhs);
>tree unsigned_type;
>int const_p = TREE_CODE (rhs) == INTEGER_CST;
>machine_mode lmode, rmode;
> @@ -4667,13 +4666,7 @@ optimize_bit_field_compare (location_t loc, enum 
> tree_code code,
>  }
>
>/* Otherwise, we are handling the constant case.  See if the constant is 
> too
> - big for the field.  Warn and return a tree for 0 (false) if so.  We do
> - this not only for its own sake, but to avoid having to test for this
> - error case below.  If we didn't, we might generate wrong code.
> -
> - For unsigned fields, the constant shifted right by the field length 
> should
> - be all zero.  For signed fields, the high-order bits should agree with
> - the sign bit.  */
> + big for the field.  Warn and return a tree for 0 (false) if so.  */
>
>if (lunsignedp)
>  {
> @@ -4695,31 +4688,9 @@ optimize_bit_field_compare (location_t loc, enum 
> tree_code code,
> }
>  }
>
> -  if (nbitpos < 0)
> -return 0;
> -
> -  /* Single-bit compares should always be against zero.  */
> -  if (lbitsize == 1 && ! integer_zerop (rhs))
> -{
> -  code = code == EQ_EXPR ? NE_EXPR : EQ_EXPR;
> -  rhs = build_int_cst (type, 0);
> -}
> -
> -  /* Make a new bitfield reference, shift the constant over the
> - appropriate number of bits and mask it with the computed mask
> - (in case this was a signed field).  If we changed it, make a new one.  
> */
> -  lhs = make_bit_field_ref (loc, linner, lhs, unsigned_type,
> -   nbitsize, nbitpos, 1, lreversep);
> -
> -  rhs = const_binop (BIT_AND_EXPR,
> -const_binop (LSHIFT_EXPR,
> - fold_convert_loc (loc, unsigned_type, rhs),
> - size_int (lbitpos)),
> -mask);
> -
> -  lhs = build2_loc (loc, code, compare_type,
> -   build2 (BIT_AND_EXPR, unsigned_type, lhs, mask), rhs);
> -  return lhs;
> +  /* Otherwise do not prematurely optimize compares of bitfield members
> + 

Re: [PATCH v5, rs6000] Change mode and insn condition for VSX scalar extract/insert instructions

2022-12-12 Thread Kewen.Lin via Gcc-patches
on 2022/12/12 11:23, HAO CHEN GUI wrote:
> Hi Kewen,
> 
> 在 2022/12/8 16:47, Kewen.Lin 写道:
>> This documentation update reminds me of that the current prototype of 
>> __ieee128
>> variant can be:
>>
>>   unsigned int scalar_extract_exp (__ieee128 source);
>>
>> type unsigned int is enough for the exponent.  It means xsxexpqp_ can 
>> also
>> use SImode rather than DImode.
> 
> 
> Could I put the changes of __ieee128 bifs in another patch? So, this patch 
> doesn't
> touch anything about __ieee128.

Sure, or you can have a separated one for scalar_extract_exp (both double and
__ieee128), and the other one for the others.

BR,
Kewen


Re: [PATCH 10/15 V5] arm: Implement cortex-M return signing address codegen

2022-12-12 Thread Richard Earnshaw via Gcc-patches




On 09/12/2022 14:16, Andrea Corallo via Gcc-patches wrote:

Hi Richard,

thanks for reviewing.

Richard Earnshaw  writes:


On 07/11/2022 08:57, Andrea Corallo via Gcc-patches wrote:

Hi all,
please find attached the lastest version of this patch incorporating
some
more improvents.  Feel free to ignore V3.
Best Regards
Andrea




As part of previous upstream suggestions a test for varargs has been
added and '-mtpcs-frame' is deemed being incompatible with this return
signing address feature being introduced.


I don't see any check for the tpcs-frame incompatibility?  What
happens if a user does combine the options?


Check added.


gcc/Changelog

2021-11-03  Andrea Corallo  

* config/arm/arm.h (arm_arch8m_main): Declare it.
* config/arm/arm.cc (arm_arch8m_main): Define it.
(arm_option_reconfigure_globals): Set arm_arch8m_main.
(arm_compute_frame_layout, arm_expand_prologue)
(thumb2_expand_return, arm_expand_epilogue)
(arm_conditional_register_usage): Update for pac codegen.
(arm_current_function_pac_enabled_p): New function.
* config/arm/arm.md (pac_ip_lr_sp, pacbti_ip_lr_sp, aut_ip_lr_sp):
Add new patterns.
* config/arm/unspecs.md (UNSPEC_PAC_IP_LR_SP)
(UNSPEC_PACBTI_IP_LR_SP, UNSPEC_AUT_IP_LR_SP): Add unspecs.

You're missing an entry for aarch_bti_enabled () - yes I realize
that's just a placeholder at present and will be fully defined in
patch 12.


Fixed


+static bool
+aarch_bti_enabled ()
+{
+  return false;
+}
+

No comment on this function (and in patch 12 it moves to a different
location).  It would be best to have it in the right place at this
point in time.

+  clobber_ip = (IS_NESTED (func_type)
+&& (((TARGET_APCS_FRAME && frame_pointer_needed &&
TARGET_ARM)
+ || ((flag_stack_check == STATIC_BUILTIN_STACK_CHECK
+  || flag_stack_clash_protection)
+ && !df_regs_ever_live_p (LR_REGNUM)
+ && arm_r3_live_at_start_p ()))
+|| (arm_current_function_pac_enabled_p (;

Redundant parenthesis around arm_current_function_pac_enabled_p () call.


Fixed


+ gcc_assert(arm_compute_static_chain_stack_bytes() == 4
+ || arm_current_function_pac_enabled_p ());

I wonder if this assert is now really serving a useful purpose.  I'd
consider removing it.


Removed


@@ -27309,7 +27340,7 @@ thumb2_expand_return (bool simple_return)
 to assert it for now to ensure that future code changes do not silently
 change this behavior.  */
gcc_assert (!IS_CMSE_ENTRY (arm_current_func_type ()));
-  if (num_regs == 1)
+  if (num_regs == 1 && !arm_current_function_pac_enabled_p ())
  {
rtx par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
rtx reg = gen_rtx_REG (SImode, PC_REGNUM);
@@ -27324,10 +27355,20 @@ thumb2_expand_return (bool simple_return)
  }
else
  {
-  saved_regs_mask &= ~ (1 << LR_REGNUM);
-  saved_regs_mask |=   (1 << PC_REGNUM);
-  arm_emit_multi_reg_pop (saved_regs_mask);
-}
+ if (arm_current_function_pac_enabled_p ())
+   {
+ gcc_assert (!(saved_regs_mask & (1 << PC_REGNUM)));
+ arm_emit_multi_reg_pop (saved_regs_mask);
+ emit_insn (gen_aut_nop ());
+ emit_jump_insn (simple_return_rtx);
+   }
+ else
+   {
+ saved_regs_mask &= ~ (1 << LR_REGNUM);
+ saved_regs_mask |=   (1 << PC_REGNUM);
+ arm_emit_multi_reg_pop (saved_regs_mask);
+   }
+   }
  }
else

The logic for these blocks would, I think, be better expressed as

if (pac_enabled)
...
else if (num_regs == 1)
  ...  // existing code
else
  ...  // existing code


Done


Also, I think (out of an abundance of caution) we really need a
scheduling barrier placed before calls to gen_aut_nop() pattern is
emitted, to ensure that the scheduler never tries to move this
instruction away from the position we place it.  Use gen_blockage()
for that (see TARGET_SCHED_PROLOG).  Alternatively, we could make the
UNSPEC_PAC_NOP an unspec_volatile, which has the same effect (IIRC)
without needing an additional insn - if you use this approach, then
please make sure this is explained in a comment.

+(define_insn "pacbti_nop"
+  [(set (reg:SI IP_REGNUM)
+   (unspec:SI [(reg:SI SP_REGNUM) (reg:SI LR_REGNUM)]
+  UNSPEC_PACBTI_NOP))]
+  "arm_arch8m_main"
+  "pacbti\t%|ip, %|lr, %|sp"
+  [(set_attr "conds" "unconditional")])

The additional side-effect of this being a BTI landing pad means that
we mustn't move any other instruction before it.  So I think this
needs to be an unspec_volatile as well.


Done


On the tests, they are OK as they stand, but we lack anything that
will be tested when suitable hardware is unavailable (all

[PATCH][committed] aarch64: Add __ARM_FEATURE_PAUTH and __ARM_FEATURE_BTI ACLE defines

2022-12-12 Thread Kyrylo Tkachov via Gcc-patches
Hi all,

Recent ACLE additions specified the __ARM_FEATURE_PAUTH and __ARM_FEATURE_BTI 
macros [1] that the compiler
should define when the pointer authentication and BTI instructions are 
available (and don't act as NOPs).
We've received requests to enable them in GCC for aarch64, similar to clang [2].
It's a fairly simple patch and should be non-intrusive at this stage.
Pointer authentication has its own "pauth" feature flag, whereas BTI depends on 
an architecture level
of Armv8.5-a or later.

Bootstrapped and tested on aarch64-none-linux-gnu.
Pushing to trunk.
Thanks,
Kyrill

[1] 
https://github.com/ARM-software/acle/blob/main/main/acle.md#pointer-authentication
[2] https://reviews.llvm.org/rG7d40baa82b1f272f68de63f3c4f68d970bdcd6ed

gcc/ChangeLog:

* config/aarch64/aarch64-c.cc (aarch64_update_cpp_builtins): Define
__ARM_FEATURE_PAUTH and __ARM_FEATURE_BTI when appropriate.
* config/aarch64/aarch64.h (TARGET_BTI): Define.

gcc/testsuite/ChangeLog:

* gcc.target/aarch64/acle/bti_def.c: New test.
* gcc.target/aarch64/acle/pauth_def.c: New test.


acle-pacbti.patch
Description: acle-pacbti.patch


[Ping x6] Re: [PATCH, nvptx, 1/2] Reimplement libgomp barriers for nvptx

2022-12-12 Thread Chung-Lin Tang via Gcc-patches
Ping x6

On 2022/12/6 12:21 AM, Chung-Lin Tang wrote:
> Ping x5
> 
> On 2022/11/22 12:24 上午, Chung-Lin Tang wrote:
>> Ping x4
>>
>> On 2022/11/8 12:34 AM, Chung-Lin Tang wrote:
>>> Ping x3.
>>>
>>> On 2022/10/31 10:18 PM, Chung-Lin Tang wrote:
 Ping x2.

 On 2022/10/17 10:29 PM, Chung-Lin Tang wrote:
> Ping.
>
> On 2022/9/21 3:45 PM, Chung-Lin Tang via Gcc-patches wrote:
>> Hi Tom,
>> I had a patch submitted earlier, where I reported that the current way 
>> of implementing
>> barriers in libgomp on nvptx created a quite significant performance 
>> drop on some SPEChpc2021
>> benchmarks:
>> https://gcc.gnu.org/pipermail/gcc-patches/2022-September/600818.html 
>> That previous patch wasn't accepted well (admittedly, it was kind of a 
>> hack).
>> So in this patch, I tried to (mostly) re-implement team-barriers for 
>> NVPTX.
>>
>> Basically, instead of trying to have the GPU do CPU-with-OS-like things 
>> that it isn't suited for,
>> barriers are implemented simplistically with bar.* synchronization 
>> instructions.
>> Tasks are processed after threads have joined, and only if 
>> team->task_count != 0
>>
>> (arguably, there might be a little bit of performance forfeited where 
>> earlier arriving threads
>> could've been used to process tasks ahead of other threads. But that 
>> again falls into requiring
>> implementing complex futex-wait/wake like behavior. Really, that kind of 
>> tasking is not what target
>> offloading is usually used for)
>>
>> Implementation highlight notes:
>> 1. gomp_team_barrier_wake() is now an empty function (threads never 
>> "wake" in the usual manner)
>> 2. gomp_team_barrier_cancel() now uses the "exit" PTX instruction.
>> 3. gomp_barrier_wait_last() now is implemented using "bar.arrive"
>>
>> 4. gomp_team_barrier_wait_end()/gomp_team_barrier_wait_cancel_end():
>> The main synchronization is done using a 'bar.red' instruction. This 
>> reduces across all threads
>> the condition (team->task_count != 0), to enable the task processing 
>> down below if any thread
>> created a task. (this bar.red usage required the need of the second 
>> GCC patch in this series)
>>
>> This patch has been tested on x86_64/powerpc64le with nvptx offloading, 
>> using libgomp, ovo, omptests,
>> and sollve_vv testsuites, all without regressions. Also verified that 
>> the SPEChpc 2021 521.miniswp_t
>> and 534.hpgmgfv_t performance regressions that occurred in the GCC12 
>> cycle has been restored to
>> devel/omp/gcc-11 (OG11) branch levels. Is this okay for trunk?
>>
>> (also suggest backporting to GCC12 branch, if performance regression can 
>> be considered a defect)
>>
>> Thanks,
>> Chung-Lin
>>
>> libgomp/ChangeLog:
>>
>> 2022-09-21  Chung-Lin Tang  
>>
>>  * config/nvptx/bar.c (generation_to_barrier): Remove.
>>  (futex_wait,futex_wake,do_spin,do_wait): Remove.
>>  (GOMP_WAIT_H): Remove.
>>  (#include "../linux/bar.c"): Remove.
>>  (gomp_barrier_wait_end): New function.
>>  (gomp_barrier_wait): Likewise.
>>  (gomp_barrier_wait_last): Likewise.
>>  (gomp_team_barrier_wait_end): Likewise.
>>  (gomp_team_barrier_wait): Likewise.
>>  (gomp_team_barrier_wait_final): Likewise.
>>  (gomp_team_barrier_wait_cancel_end): Likewise.
>>  (gomp_team_barrier_wait_cancel): Likewise.
>>  (gomp_team_barrier_cancel): Likewise.
>>  * config/nvptx/bar.h (gomp_team_barrier_wake): Remove
>>  prototype, add new static inline function.
>>>
>>
> 



Re: [PATCH] libstdc++: Make chrono::hh_mm_ss more compact

2022-12-12 Thread Jonathan Wakely via Gcc-patches
On Mon, 12 Dec 2022 at 07:12, Stephan Bergmann via Libstdc++
 wrote:
>
> On 11/21/22 21:43, Jonathan Wakely via Libstdc++ wrote:
> > + static constexpr bool __is_unsigned
> > +   = __and_v,
> > + is_unsigned>;
>
> Using `__is_unsigned` as an identifier here causes compilation issues
> with Clang, which predefines that as a builtin predicate.

Great, those are going to break a lot of code:

include/bits/charconv.h:  = ! __gnu_cxx::__int_traits<_Tp>::__is_signed;
include/bits/locale_facets.tcc:   (__negative && __num_traits::__is_signed)
include/bits/locale_facets.tcc: if (__negative && __num_traits::__is_signed)
include/bits/locale_facets.tcc: &&
__gnu_cxx::__numeric_traits<_ValueT>::__is_signed)
include/bits/uniform_int_dist.h:
static_assert(!_Up_traits::__is_signed, "U must be unsigned");
include/bits/uniform_int_dist.h:
static_assert(!_Wp_traits::__is_signed, "W must be unsigned");
include/ext/numeric_traits.h:  static const bool __is_signed =
(_Value)(-1) < 0;
include/ext/numeric_traits.h:   =
__is_integer_nonstrict<_Value>::__width - __is_signed;
include/ext/numeric_traits.h:  static const _Value __max = __is_signed
include/ext/numeric_traits.h:  static const _Value __min =
__is_signed ? -__max - 1 : (_Value)0;
include/ext/numeric_traits.h:const bool
__numeric_traits_integer<_Value>::__is_signed;
include/ext/numeric_traits.h:  static const bool __is_signed = true;
include/ext/numeric_traits.h:const bool
__numeric_traits_floating<_Value>::__is_signed;
include/ext/numeric_traits.h:  static const bool __is_signed = true;
include/ext/numeric_traits.h:  static const bool __is_signed = true;

If Clang already treats __is_signed as a conditionally-active
built-in, it should do the same for __is_unsigned.


Re: [PATCH] Backport gcc-12: jobserver FIFO support

2022-12-12 Thread Martin Liška
On 12/11/22 11:47, Richard Biener wrote:
> On Fri, Dec 9, 2022 at 11:07 AM Martin Liška  wrote:
>>
>> Hi.
>>
>> As make 4.4 has been release, it switches to FIFO by default. That makes
>> troubles to the latest GCC release, version 12.
> 
> Can you elaborate on "troubles" here?

Sure. Problem with the latest GNU make and current gcc-12 branch is that GCC
can't properly detect jobserver because --jobserver-auth=fifo:$path is unknown
to it. Thus we fallback to auto parallelism detection in most cases. The 
mentioned
patches properly detect both the old and the new --jobserver-auth format.

> I'm fine with backporting the whole
> series to GCC 12 but I wonder if earlier still maintained versions are also
> affected (noting that the series also addresses WPA streaming which is
> not part of the "troubles" here).

Yes, they are also affected.

So ready for all active branches?

Cheers,
Martin

> 
> Thanks,
> Richard.
> 
>> Right now, we've been using
>> the following 4 patches in openSUSE gcc12 package:
>>
>> 1270ccda70ca09f7d4fe76b5156dca8992bd77a6
>> 53e3b2bf16a486c15c20991c6095f7be09012b55
>> fed766af32ed6cd371016cc24e931131e19b4eb1
>> 3f1c2f89f6b8b8d23a9072f8549b0a2c1de06b03
>>
>> Would it be fine to backport it to gcc-12 branch? Arsen asked me the today
>> as Gentoo people want it as well.
>>
>> Patch can bootstrap on x86_64-linux-gnu and survives regression tests.
>>
>> Ready to be installed?
>> Thanks,
>> Martin



Re: [PATCH] Backport gcc-12: jobserver FIFO support

2022-12-12 Thread Jakub Jelinek via Gcc-patches
On Mon, Dec 12, 2022 at 12:39:36PM +0100, Martin Liška wrote:
> > I'm fine with backporting the whole
> > series to GCC 12 but I wonder if earlier still maintained versions are also
> > affected (noting that the series also addresses WPA streaming which is
> > not part of the "troubles" here).
> 
> Yes, they are also affected.
> 
> So ready for all active branches?

Can't you backport instead a minimal fix that will not change other stuff?

Jakub



Re: [PATCH] Backport gcc-12: jobserver FIFO support

2022-12-12 Thread Martin Liška
On 12/12/22 12:42, Jakub Jelinek wrote:
> On Mon, Dec 12, 2022 at 12:39:36PM +0100, Martin Liška wrote:
>>> I'm fine with backporting the whole
>>> series to GCC 12 but I wonder if earlier still maintained versions are also
>>> affected (noting that the series also addresses WPA streaming which is
>>> not part of the "troubles" here).
>>
>> Yes, they are also affected.
>>
>> So ready for all active branches?
> 
> Can't you backport instead a minimal fix that will not change other stuff?

Yes, then it would be exactly the following 2 patches:

1270ccda70ca09f7d4fe76b5156dca8992bd77a6
53e3b2bf16a486c15c20991c6095f7be09012b55

Martin

> 
>   Jakub
> 



[PATCH (pushed)] mklog: do not parse binary file for PR entry

2022-12-12 Thread Martin Liška
contrib/ChangeLog:

* mklog.py: Do not search PR entry in a file that is binary.
---
 contrib/mklog.py | 34 ++
 1 file changed, 18 insertions(+), 16 deletions(-)

diff --git a/contrib/mklog.py b/contrib/mklog.py
index 3a6ec681b65..358b7fc6b8b 100755
--- a/contrib/mklog.py
+++ b/contrib/mklog.py
@@ -186,23 +186,25 @@ def generate_changelog(data, no_functions=False, 
fill_pr_titles=False,
 # contains commented code which a note that it
 # has not been tested due to a certain PR or DR.
 this_file_prs = []
-for line in list(file)[0][0:10]:
-m = pr_regex.search(line.value)
-if m:
-pr = m.group('pr')
-if pr not in prs:
-prs.append(pr)
-this_file_prs.append(pr.split('/')[-1])
-else:
-m = dr_regex.search(line.value)
+if not file.is_binary_file:
+for line in list(file)[0][0:10]:
+m = pr_regex.search(line.value)
 if m:
-dr = m.group('dr')
-if dr not in prs:
-prs.append(dr)
-this_file_prs.append(dr.split('/')[-1])
-elif dg_regex.search(line.value):
-# Found dg-warning/dg-error line
-break
+pr = m.group('pr')
+if pr not in prs:
+prs.append(pr)
+this_file_prs.append(pr.split('/')[-1])
+else:
+m = dr_regex.search(line.value)
+if m:
+dr = m.group('dr')
+if dr not in prs:
+prs.append(dr)
+this_file_prs.append(dr.split('/')[-1])
+elif dg_regex.search(line.value):
+# Found dg-warning/dg-error line
+break
+
 # PR number in the file name
 fname = os.path.basename(file.path)
 m = pr_filename_regex.search(fname)
-- 
2.38.1



RE: [PATCH] Fix memory constraint on MVE v[ld/st][2/4] instructions [PR107714]

2022-12-12 Thread Kyrylo Tkachov via Gcc-patches
Hi Stam,

> -Original Message-
> From: Stam Markianos-Wright 
> Sent: Friday, December 9, 2022 1:32 PM
> To: gcc-patches@gcc.gnu.org
> Cc: Kyrylo Tkachov ; Richard Earnshaw
> ; Ramana Radhakrishnan
> ; ni...@redhat.com
> Subject: [PATCH] Fix memory constraint on MVE v[ld/st][2/4] instructions
> [PR107714]
> 
> Hi all,
> 
> In the M-Class Arm-ARM:
> 
> https://developer.arm.com/documentation/ddi0553/bu/?lang=en
> 
> these MVE instructions only have '!' writeback variant and at:
> 
> https://gcc.gnu.org/bugzilla/show_bug.cgi?id=107714
> 
> we found that the Um constraint would also allow through a
> register offset writeback, resulting in an assembler error.
> 
> Here I have added a new constraint and predicate for these
> instructions, which (uniquely, AFAICT), only support a `!` writeback
> increment by the data size (inside the compiler this is a POST_INC).
> 
> No regressions in arm-none-eabi with MVE and MVE.FP.
> 
> Ok for trunk, and backport to GCC11 and GCC12 (testing pending)?
> 
> Thanks,
> Stam
> 
> gcc/ChangeLog:
>      PR target/107714
>      * config/arm/arm-protos.h (mve_struct_mem_operand): New
> protoype.
>      * config/arm/arm.cc (mve_struct_mem_operand): New function.
>      * config/arm/constraints.md (Ug): New constraint.
>      * config/arm/mve.md (mve_vst4q): Change constraint.
>      (mve_vst2q): Likewise.
>      (mve_vld4q): Likewise.
>      (mve_vld2q): Likewise.
>      * config/arm/predicates.md (mve_struct_operand): New predicate.
> 
> gcc/testsuite/ChangeLog:
>      PR target/107714
>      * gcc.target/arm/mve/intrinsics/vldst24q_reg_offset.c: New test.


diff --git a/gcc/config/arm/constraints.md b/gcc/config/arm/constraints.md
index 
e5a36d29c7135943b9bb5ea396f70e2e4beb1e4a..8908b7f5b15ce150685868e78e75280bf32053f1
 100644
--- a/gcc/config/arm/constraints.md
+++ b/gcc/config/arm/constraints.md
@@ -474,6 +474,12 @@
  (and (match_code "mem")
   (match_test "TARGET_32BIT && arm_coproc_mem_operand (op, FALSE)")))
 
+(define_memory_constraint "Ug"
+ "@internal
+  In Thumb-2 state a valid MVE struct load/store address."
+ (and (match_code "mem")
+  (match_test "TARGET_HAVE_MVE && mve_struct_mem_operand (op)")))
+

I think you can define the constraints in terms of the new mve_struct_operand 
predicate directly (see how we define the "Ua" constraint, for example).
Ok if that works (and testing passes of course).
Thanks,
Kyrill



[committed] libstdc++: Make operator<< for stacktraces less templated (LWG 3515)

2022-12-12 Thread Jonathan Wakely via Gcc-patches
Tested x86_64-linux. Pushed to trunk.

-- >8--

This change was approved for C++23 last month.

libstdc++-v3/ChangeLog:

* include/std/stacktrace (operator<<): Only output to narrow
ostreams (LWG 3515).
* testsuite/19_diagnostics/stacktrace/synopsis.cc:
---
 libstdc++-v3/include/std/stacktrace   | 38 +--
 .../19_diagnostics/stacktrace/synopsis.cc | 11 +++---
 2 files changed, 22 insertions(+), 27 deletions(-)

diff --git a/libstdc++-v3/include/std/stacktrace 
b/libstdc++-v3/include/std/stacktrace
index ec3335e89d8..83c6463b0d8 100644
--- a/libstdc++-v3/include/std/stacktrace
+++ b/libstdc++-v3/include/std/stacktrace
@@ -165,9 +165,8 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
   return __state;
 }
 
-template
-  friend basic_ostream<_CharT, _Traits>&
-  operator<<(basic_ostream<_CharT, _Traits>&, const stacktrace_entry&);
+friend ostream&
+operator<<(ostream&, const stacktrace_entry&);
 
 bool
 _M_get_info(string* __desc, string* __file, int* __line) const
@@ -720,25 +719,22 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
 noexcept(noexcept(__a.swap(__b)))
 { __a.swap(__b); }
 
-  template
-inline basic_ostream<_CharT, _Traits>&
-operator<<(basic_ostream<_CharT, _Traits>& __os,
-  const stacktrace_entry& __f)
-{
-  string __desc, __file;
-  int __line;
-  if (__f._M_get_info(&__desc, &__file, &__line))
-   {
- __os.width(4);
- __os << __desc << " at " << __file << ':' << __line;
-   }
-  return __os;
-}
+  inline ostream&
+  operator<<(ostream& __os, const stacktrace_entry& __f)
+  {
+string __desc, __file;
+int __line;
+if (__f._M_get_info(&__desc, &__file, &__line))
+  {
+   __os.width(4);
+   __os << __desc << " at " << __file << ':' << __line;
+  }
+return __os;
+  }
 
-  template
-inline basic_ostream<_CharT, _Traits>&
-operator<<(basic_ostream<_CharT, _Traits>& __os,
-  const basic_stacktrace<_Allocator>& __st)
+  template
+inline ostream&
+operator<<(ostream& __os, const basic_stacktrace<_Allocator>& __st)
 {
   for (stacktrace::size_type __i = 0; __i < __st.size(); ++__i)
{
diff --git a/libstdc++-v3/testsuite/19_diagnostics/stacktrace/synopsis.cc 
b/libstdc++-v3/testsuite/19_diagnostics/stacktrace/synopsis.cc
index 72582fa53c6..262abea21ec 100644
--- a/libstdc++-v3/testsuite/19_diagnostics/stacktrace/synopsis.cc
+++ b/libstdc++-v3/testsuite/19_diagnostics/stacktrace/synopsis.cc
@@ -28,13 +28,12 @@ namespace std
   template
 string to_string(const basic_stacktrace& st);
 
-  template
-basic_ostream&
-operator<<(basic_ostream& os, const stacktrace_entry& f);
+  ostream&
+  operator<<(ostream& os, const stacktrace_entry& f);
 
-  template
-basic_ostream&
-operator<<(basic_ostream& os, const 
basic_stacktrace& st);
+  template
+ostream&
+operator<<(ostream& os, const basic_stacktrace& st);
 
   namespace pmr {
 using stacktrace = 
basic_stacktrace>;
-- 
2.38.1



[committed] libstdc++: Define atomic lock-free type aliases for C++20 [PR98034]

2022-12-12 Thread Jonathan Wakely via Gcc-patches
Tested x86_64-linux. Pushed to trunk.

-- >8--

libstdc++-v3/ChangeLog:

PR libstdc++/98034
* include/std/atomic (__cpp_lib_atomic_lock_free_type_aliases):
Define macro.
(atomic_signed_lock_free, atomic_unsigned_lock_free): Define
aliases.
* include/std/version (__cpp_lib_atomic_lock_free_type_aliases):
Define macro.
* testsuite/29_atomics/atomic/lock_free_aliases.cc: New test.
---
 libstdc++-v3/include/std/atomic   | 17 ++
 libstdc++-v3/include/std/version  |  1 +
 .../29_atomics/atomic/lock_free_aliases.cc| 34 +++
 3 files changed, 52 insertions(+)
 create mode 100644 
libstdc++-v3/testsuite/29_atomics/atomic/lock_free_aliases.cc

diff --git a/libstdc++-v3/include/std/atomic b/libstdc++-v3/include/std/atomic
index 356f1458f44..857f9270049 100644
--- a/libstdc++-v3/include/std/atomic
+++ b/libstdc++-v3/include/std/atomic
@@ -1727,6 +1727,23 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
   using __atomic_ref<_Tp>::operator=;
 };
 
+#define __cpp_lib_atomic_lock_free_type_aliases 201907L
+#ifdef _GLIBCXX_HAVE_PLATFORM_WAIT
+  using atomic_signed_lock_free
+= atomic>;
+  using atomic_unsigned_lock_free
+= atomic>;
+#elif ATOMIC_INT_LOCK_FREE || !(ATOMIC_LONG_LOCK_FREE || ATOMIC_CHAR_LOCK_FREE)
+  using atomic_signed_lock_free = atomic;
+  using atomic_unsigned_lock_free = atomic;
+#elif ATOMIC_LONG_LOCK_FREE
+  using atomic_signed_lock_free = atomic;
+  using atomic_unsigned_lock_free = atomic;
+#elif ATOMIC_CHAR_LOCK_FREE
+  using atomic_signed_lock_free = atomic;
+  using atomic_unsigned_lock_free = atomic;
+#endif
+
 #endif // C++2a
 
   /// @} group atomics
diff --git a/libstdc++-v3/include/std/version b/libstdc++-v3/include/std/version
index 3c7c440bd80..61718ebad74 100644
--- a/libstdc++-v3/include/std/version
+++ b/libstdc++-v3/include/std/version
@@ -187,6 +187,7 @@
 #define __cpp_lib_assume_aligned 201811L
 #define __cpp_lib_atomic_flag_test 201907L
 #define __cpp_lib_atomic_float 201711L
+#define __cpp_lib_atomic_lock_free_type_aliases 201907L
 #define __cpp_lib_atomic_ref 201806L
 #define __cpp_lib_atomic_value_initialization 201911L
 #define __cpp_lib_bind_front 201907L
diff --git a/libstdc++-v3/testsuite/29_atomics/atomic/lock_free_aliases.cc 
b/libstdc++-v3/testsuite/29_atomics/atomic/lock_free_aliases.cc
new file mode 100644
index 000..02c4ccc3aa3
--- /dev/null
+++ b/libstdc++-v3/testsuite/29_atomics/atomic/lock_free_aliases.cc
@@ -0,0 +1,34 @@
+// { dg-options "-std=gnu++20" }
+// { dg-do compile { target c++20 } }
+
+#include 
+
+#ifndef __cpp_lib_atomic_lock_free_type_aliases
+# error "Feature test macro for lock-free type aliases is missing in "
+#elif __cpp_lib_atomic_lock_free_type_aliases != 201907L
+# error "Feature test macro for lock-free type aliases has wrong value in 
"
+#endif
+
+template
+constexpr bool is_atomic_specialization = false;
+template
+constexpr bool is_atomic_specialization> = true;
+
+// The type aliases atomic_signed_lock_free and atomic_unsigned_lock_free
+// name specializations of atomic
+static_assert( is_atomic_specialization );
+static_assert( is_atomic_specialization );
+
+#include 
+
+// ... whose template arguments are integral types,
+static_assert( std::is_integral_v );
+static_assert( std::is_integral_v 
);
+
+// ... respectively signed and unsigned,
+static_assert( std::is_signed_v );
+static_assert( std::is_unsigned_v 
);
+
+// and whose is_always_lock_free property is true.
+static_assert( std::atomic_signed_lock_free::is_always_lock_free );
+static_assert( std::atomic_unsigned_lock_free::is_always_lock_free );
-- 
2.38.1



[committed] libstdc++: Change names that clash with Win32 or Clang

2022-12-12 Thread Jonathan Wakely via Gcc-patches
Tested x86_64-linux. Pushed to trunk.

-- >8--

Clang now defines an __is_unsigned built-in, and Windows defines an
_Out_ macro. Replace uses of those as identifiers.

There might also be a problem with __is_signed, which we use in several
places.

libstdc++-v3/ChangeLog:

* include/std/chrono (hh_mm_ss): Rename __is_unsigned member to
_S_is_unsigned.
* include/std/format (basic_format_context): Rename _Out_
template parameter to _Out2.
* testsuite/17_intro/names.cc: Add Windows SAL annotation
macros.
---
 libstdc++-v3/include/std/chrono  |  6 +++---
 libstdc++-v3/include/std/format  |  8 
 libstdc++-v3/testsuite/17_intro/names.cc | 11 +++
 3 files changed, 18 insertions(+), 7 deletions(-)

diff --git a/libstdc++-v3/include/std/chrono b/libstdc++-v3/include/std/chrono
index 38ecd3142bc..4c5fbfaeb83 100644
--- a/libstdc++-v3/include/std/chrono
+++ b/libstdc++-v3/include/std/chrono
@@ -2344,7 +2344,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
constexpr bool
is_negative() const noexcept
{
- if constexpr (!__is_unsigned)
+ if constexpr (!_S_is_unsigned)
return _M_is_neg;
  else
return false;
@@ -2373,7 +2373,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
constexpr precision
to_duration() const noexcept
{
- if constexpr (!__is_unsigned)
+ if constexpr (!_S_is_unsigned)
if (_M_is_neg)
  return -(_M_h + _M_m + _M_s + subseconds());
  return _M_h + _M_m + _M_s + subseconds();
@@ -2382,7 +2382,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
// TODO: Implement operator<<.
 
   private:
-   static constexpr bool __is_unsigned
+   static constexpr bool _S_is_unsigned
  = __and_v,
is_unsigned>;
 
diff --git a/libstdc++-v3/include/std/format b/libstdc++-v3/include/std/format
index 1072e2c17db..269f8f6cc6a 100644
--- a/libstdc++-v3/include/std/format
+++ b/libstdc++-v3/include/std/format
@@ -3345,10 +3345,10 @@ namespace __format
   : _M_args(__args), _M_out(std::move(__out)), _M_loc(__loc)
   { }
 
-  template
-   friend _Out_
-   __format::__do_vformat_to(_Out_, basic_string_view<_CharT_>,
- const basic_format_args<_Context_>&,
+  template
+   friend _Out2
+   __format::__do_vformat_to(_Out2, basic_string_view<_CharT2>,
+ const basic_format_args<_Context2>&,
  const locale*);
 
 public:
diff --git a/libstdc++-v3/testsuite/17_intro/names.cc 
b/libstdc++-v3/testsuite/17_intro/names.cc
index 963f74c11f3..16c104a438d 100644
--- a/libstdc++-v3/testsuite/17_intro/names.cc
+++ b/libstdc++-v3/testsuite/17_intro/names.cc
@@ -205,6 +205,17 @@
 #define ValueT ValueT is not a reserved name
 #define ValueType  ValueType is not a reserved name
 
+#ifndef _WIN32
+// Windows SAL annotations
+#define _In_   cannot be used as an identifier
+#define _Inout_cannot be used as an identifier
+#define _Out_  cannot be used as an identifier
+#define _Reserved_ cannot be used as an identifier
+#define __inoutcannot be used as an identifier
+#define __in_opt   cannot be used as an identifier
+#define __out_opt  cannot be used as an identifier
+#endif
+
 #ifdef _AIX
 // See https://gcc.gnu.org/ml/libstdc++/2017-03/msg00015.html
 #undef f
-- 
2.38.1



[committed] libstdc++: Add a test checking for chrono::duration overflows

2022-12-12 Thread Jonathan Wakely via Gcc-patches
Tested x86_64-linux. Pushed to trunk.

-- >8--

This test fails if chrono::days::rep or chrono::years::rep is a 32-bit
type, because a large days or years value silently overflows a 32-bit
integer when converted to seconds. It would be conforming to implement
chrono::days as chrono::duration>, but would make
this overflow case more likely. Similarly for chrono::years,
chrono::months and chrono::weeks. This test is here to remind us not to
make that change lightly.

libstdc++-v3/ChangeLog:

* testsuite/20_util/duration/arithmetic/overflow_c++20.cc: New
test.
---
 .../duration/arithmetic/overflow_c++20.cc | 29 +++
 1 file changed, 29 insertions(+)
 create mode 100644 
libstdc++-v3/testsuite/20_util/duration/arithmetic/overflow_c++20.cc

diff --git 
a/libstdc++-v3/testsuite/20_util/duration/arithmetic/overflow_c++20.cc 
b/libstdc++-v3/testsuite/20_util/duration/arithmetic/overflow_c++20.cc
new file mode 100644
index 000..42fa7ea55fb
--- /dev/null
+++ b/libstdc++-v3/testsuite/20_util/duration/arithmetic/overflow_c++20.cc
@@ -0,0 +1,29 @@
+// { dg-options "-std=gnu++20" }
+// { dg-do run { target c++20 } }
+
+#include 
+#include 
+#include 
+
+void
+test_overflow()
+{
+  using namespace std::chrono;
+
+  using seconds32_t = duration;
+  seconds32_t t = 14h + 25min + 55s;
+  auto snow = sys_days(1854y/December/11);
+  auto snow_t = snow + t;
+  // Fails if days::rep is 32-bit:
+  VERIFY( snow_t.time_since_epoch() < seconds::zero() );
+  auto y = floor(snow);
+  auto y_t = y + t;
+  // Fails if years::rep is 32-bit:
+  VERIFY( y_t.time_since_epoch() < seconds::zero() );
+  VERIFY( y_t < snow_t );
+}
+
+int main()
+{
+  test_overflow();
+}
-- 
2.38.1



[committed] libstdc++: Fix constraint on std::basic_format_string [PR108024]

2022-12-12 Thread Jonathan Wakely via Gcc-patches
Tested x86_64-linux. Pushed to trunk.

-- >8--

Also remove some redundant std::move calls for return statements.

libstdc++-v3/ChangeLog:

PR libstdc++/108024
* include/std/format (basic_format_string): Fix constraint.
* testsuite/std/format/format_string.cc: New test.
---
 libstdc++-v3/include/std/format  | 10 ++
 .../testsuite/std/format/format_string.cc| 16 
 2 files changed, 22 insertions(+), 4 deletions(-)
 create mode 100644 libstdc++-v3/testsuite/std/format/format_string.cc

diff --git a/libstdc++-v3/include/std/format b/libstdc++-v3/include/std/format
index 269f8f6cc6a..9c928371415 100644
--- a/libstdc++-v3/include/std/format
+++ b/libstdc++-v3/include/std/format
@@ -110,7 +110,8 @@ namespace __format
   template
 struct basic_format_string
 {
-  template> _Tp>
+  template
+   requires convertible_to>
consteval
basic_format_string(const _Tp& __s);
 
@@ -609,7 +610,7 @@ namespace __format
   else
for (_CharT __c : __str)
  *__out++ = __c;
-  return std::move(__out);
+  return __out;
 }
 
   // Write STR to OUT with NFILL copies of FILL_CHAR specified by ALIGN.
@@ -664,7 +665,7 @@ namespace __format
   __out = __format::__write(std::move(__out), __str);
   __pad(__r, __out);
 
-  return std::move(__out);
+  return __out;
 }
 
   // A lightweight optional.
@@ -3626,7 +3627,8 @@ namespace __format
 /// @endcond
 
   template
-template> _Tp>
+template
+  requires convertible_to>
   consteval
   basic_format_string<_CharT, _Args...>::
   basic_format_string(const _Tp& __s)
diff --git a/libstdc++-v3/testsuite/std/format/format_string.cc 
b/libstdc++-v3/testsuite/std/format/format_string.cc
new file mode 100644
index 000..1dd6ca8f125
--- /dev/null
+++ b/libstdc++-v3/testsuite/std/format/format_string.cc
@@ -0,0 +1,16 @@
+// { dg-options "-std=gnu++20" }
+// { dg-do compile { target c++20 } }
+
+#include 
+
+struct Str
+{
+  consteval operator std::string_view() const { return ""; }
+  operator std::string_view() = delete;
+};
+
+// PR libstdc++/108024
+static_assert( std::is_constructible_v, const Str&> );
+static_assert( std::is_convertible_v> );
+
+constinit std::format_string<> s = Str();
-- 
2.38.1



Re: [PATCH] libstdc++: Make chrono::hh_mm_ss more compact

2022-12-12 Thread Jonathan Wakely via Gcc-patches
On Mon, 12 Dec 2022 at 11:18, Jonathan Wakely wrote:
>
> On Mon, 12 Dec 2022 at 07:12, Stephan Bergmann via Libstdc++
>  wrote:
> >
> > On 11/21/22 21:43, Jonathan Wakely via Libstdc++ wrote:
> > > + static constexpr bool __is_unsigned
> > > +   = __and_v,
> > > + is_unsigned>;
> >
> > Using `__is_unsigned` as an identifier here causes compilation issues
> > with Clang, which predefines that as a builtin predicate.
>
> Great, those are going to break a lot of code:
>
> include/bits/charconv.h:  = ! __gnu_cxx::__int_traits<_Tp>::__is_signed;
> include/bits/locale_facets.tcc:   (__negative && __num_traits::__is_signed)
> include/bits/locale_facets.tcc: if (__negative && 
> __num_traits::__is_signed)
> include/bits/locale_facets.tcc: &&
> __gnu_cxx::__numeric_traits<_ValueT>::__is_signed)
> include/bits/uniform_int_dist.h:
> static_assert(!_Up_traits::__is_signed, "U must be unsigned");
> include/bits/uniform_int_dist.h:
> static_assert(!_Wp_traits::__is_signed, "W must be unsigned");
> include/ext/numeric_traits.h:  static const bool __is_signed =
> (_Value)(-1) < 0;
> include/ext/numeric_traits.h:   =
> __is_integer_nonstrict<_Value>::__width - __is_signed;
> include/ext/numeric_traits.h:  static const _Value __max = __is_signed
> include/ext/numeric_traits.h:  static const _Value __min =
> __is_signed ? -__max - 1 : (_Value)0;
> include/ext/numeric_traits.h:const bool
> __numeric_traits_integer<_Value>::__is_signed;
> include/ext/numeric_traits.h:  static const bool __is_signed = true;
> include/ext/numeric_traits.h:const bool
> __numeric_traits_floating<_Value>::__is_signed;
> include/ext/numeric_traits.h:  static const bool __is_signed = true;
> include/ext/numeric_traits.h:  static const bool __is_signed = true;
>
> If Clang already treats __is_signed as a conditionally-active
> built-in, it should do the same for __is_unsigned.

The __is_unsigned use has been removed in r13-4613-gcb363fd9f19eb7


Re: [PATCH 9/12 V2] arm: Make libgcc bti compatible

2022-12-12 Thread Andrea Corallo via Gcc-patches
Richard Earnshaw  writes:

> On 22/07/2022 16:09, Andrea Corallo via Gcc-patches wrote:
>> Richard Earnshaw  writes:
>> 
>>> On 21/07/2022 10:17, Andrea Corallo via Gcc-patches wrote:
 Richard Earnshaw  writes:

> On 28/04/2022 10:48, Andrea Corallo via Gcc-patches wrote:
>> This change add bti instructions at the beginning of arm specific
>> libgcc hand written assembly routines.
>> 2022-03-31  Andrea Corallo  
>>  * libgcc/config/arm/crti.S (FUNC_START): Add bti instruction
>> if
>>  necessary.
>>  * libgcc/config/arm/lib1funcs.S (THUMB_FUNC_START, FUNC_START):
>>  Likewise.
>>
>
> +#if defined(__ARM_FEATURE_BTI)
>
> Wouldn't it be better to use __ARM_FEATURE_BTI_DEFAULT?  That way we
> only get BTI instructions in multilib variants that have asked for
> BTI.
>
> R.
 Hi Richard,
 good point, yes I think so.
 Please find attached the updated patch.
 BR
 Andrea

>>>
>>> I've been pondering this patch.  The way it is implemented would put a
>>> BTI instruction at the start of every assembler routine in libgcc.
>>> But the vast majority of functions in libgcc cannot have their address
>>> taken, so a BTI isn't needed (BTI is only needed when an indirect jump
>>> could be used).  So I wonder if we really need to do this so
>>> aggressively?
>>>
>>> Perhaps a better approach would be to define a macro (eg MAYBEBTI)
>>> which expands a BTI if the compilation requires it and nothing
>>> otherwise), and then manually insert that in any functions that really
>>> need this (if any).
>> I guess the main downside of this approach would be the maintanace
>> burden, we'll have to remember forever that every time an asm function
>> is called by function pointer we have to add the bti landing pad
>> manually, otherwise this will be broken when pacbti enabled. WDYT?
>> If we want to go this way I'll start reworking the patch in this
>> direction (tho this might not be trivial).
>> 
>
> Yes, it's a trade-off.  The lazy way, however, costs all users even if
> a function is never addressed (which I think is the case for
> practically all functions in libgcc).
>
> So I think in this case it's worth taking that extra development pain.
>
> R.

As a late follow-up to this.

I believe there are no hand written asm functions in libgcc that are
addressed, so this patch was dropped from the series in the following
iteration.  It is true that we could pac instrument them but ATM we
don't.

  Andrea


Re: [PATCH v5 2/4] OpenMP/OpenACC: Rework clause expansion and nested struct handling

2022-12-12 Thread Julian Brown
On Wed, 7 Dec 2022 17:13:42 +0100
Tobias Burnus  wrote:

> I think we need to distinguish:
> 
>#pragma omp target enter data map(to: s.w[:10])
> 
> from
> 
>#pragma omp target map(tofrom: s.arr[:20])
>  s.arr[0] = 5;
> 
> As in the latter case 's' gets implicitly mapped and then applies to
> the base pointer 's.arr' of 's.arr[:20]'. While in the former case,
> only the pointee gets mapped without the pointer 's.arr' (and, hence,
> there is also no pointer attachment).

Here's an incremental patch that fixes the mapping behaviour in that
case. This is to be applied on top of the approved (but not committed)
parent patch:

  https://gcc.gnu.org/pipermail/gcc-patches/2022-October/603792.html

and also the unreviewed patch posted here (ping?):

  https://gcc.gnu.org/pipermail/gcc-patches/2022-November/607543.html

though it might actually make more sense to commit the three patches
squashed together.

Tested with offloading to NVPTX. OK?

Thanks,

Julian
commit abb1e04f9ef93221ecd4292f43cc1ea901843766
Author: Julian Brown 
Date:   Thu Dec 8 13:31:01 2022 +

OpenMP: implicitly map base pointer for array-section pointer components

Following from discussion in:

  https://gcc.gnu.org/pipermail/gcc-patches/2021-May/570075.html

and:

  https://gcc.gnu.org/pipermail/gcc-patches/2022-December/608100.html

and also upstream OpenMP issue 342, this patch changes mapping for array
sections of pointer components on compute regions like this:

  #pragma omp target map(s.ptr[0:10])
  {
...use of 's'...
  }

so the base pointer 's.ptr' is implicitly mapped, and thus pointer
attachment happens.  This is subtly different in the "enter data"
case, e.g:

  #pragma omp target enter data map(s.ptr[0:10])

if 's.ptr' (or the whole of 's') is not present on the target before
the directive is executed, the array section is copied to the target
but pointer attachment does *not* take place, since 's' (or 's.ptr')
is not mapped implicitly for "enter data".

To get a pointer attachment with "enter data", you can do, e.g:

  #pragma omp target enter data map(s.ptr, s.ptr[0:10])

  #pragma omp target
  {
...implicit use of 's'...
  }

That is, once the attachment has happened, implicit mapping of 's'
and uses of 's.ptr[...]' work correctly in the target region.

ChangeLog

2022-12-12  Julian Brown  

gcc/
* gimplify.cc (omp_accumulate_sibling_list): Don't require
explicitly-mapped base pointer for compute regions.

gcc/testsuite/
* c-c++-comon/gomp/target-implicit-map-2.c: Update expected scan output.

libgomp/
* testsuite/libgomp.c-c++-common/target-implicit-map-2.c: Fix missing
"free".
* testsuite/libgomp.c-c++-common/target-implicit-map-3.c: New test.
* testsuite/libgomp.c-c++-common/target-map-zlas-1.c: New test.
* testsuite/libgomp.c/target-22.c: Remove explicit base pointer
mappings.

diff --git a/gcc/gimplify.cc b/gcc/gimplify.cc
index 34cac30d7d92..a8dd298559e8 100644
--- a/gcc/gimplify.cc
+++ b/gcc/gimplify.cc
@@ -10617,6 +10617,7 @@ omp_accumulate_sibling_list (enum omp_region_type region_type,
   poly_int64 cbitpos;
   tree ocd = OMP_CLAUSE_DECL (grp_end);
   bool openmp = !(region_type & ORT_ACC);
+  bool target = (region_type & ORT_TARGET) != 0;
   tree *continue_at = NULL;
 
   while (TREE_CODE (ocd) == ARRAY_REF)
@@ -10721,9 +10722,9 @@ omp_accumulate_sibling_list (enum omp_region_type region_type,
 	}
 
 	  /* For OpenMP semantics, we don't want to implicitly allocate
-	 space for the pointer here.  A FRAGILE_P node is only being
-	 created so that omp-low.cc is able to rewrite the struct
-	 properly.
+	 space for the pointer here for non-compute regions (e.g. "enter
+	 data").  A FRAGILE_P node is only being created so that
+	 omp-low.cc is able to rewrite the struct properly.
 	 For references (to pointers), we want to actually allocate the
 	 space for the reference itself in the sorted list following the
 	 struct node.
@@ -10731,6 +10732,7 @@ omp_accumulate_sibling_list (enum omp_region_type region_type,
 	 mapping of the attachment point, but not otherwise.  */
 	  if (*fragile_p
 	  || (openmp
+		  && !target
 		  && attach_detach
 		  && TREE_CODE (TREE_TYPE (ocd)) == POINTER_TYPE
 		  && !OMP_CLAUSE_ATTACHMENT_MAPPING_ERASED (grp_end)))
@@ -11043,6 +11045,7 @@ omp_accumulate_sibling_list (enum omp_region_type region_type,
 
 	  if (*fragile_p
 	  || (openmp
+		  && !target
 		  && attach_detach
 		  && TREE_CODE (TREE_TYPE (ocd)) == POINTER_TYPE
 		  && !OMP_CLAUSE_ATTACHMENT_MAPPING_ERASED (grp_end)))
diff --git a/gcc/testsuite/c-c++-common/gomp/target-implicit-map-2.c b/gcc/testsuite/c-c++-common/gomp/target-implicit-map-2.c
index 5ba

Re: [PATCH v5, rs6000] Change mode and insn condition for VSX scalar extract/insert instructions

2022-12-12 Thread Segher Boessenkool
Hi!

On Fri, Dec 02, 2022 at 03:03:46PM +0800, HAO CHEN GUI wrote:
>   For scalar extract/insert instructions, exponent field can be stored in a
> 32-bit register. So this patch changes the mode of exponent field from DI to
> SI so that these instructions can be generated in a 32-bit environment. Also
> it removes TARGET_64BIT check for these instructions.

>   This patch also changes prototypes and catagories of relevant built-ins and
> effective target checks of test cases.

You can not change the types of builtins, unless they were completely
broken before or something serious like that.  Existing user code should
keep doing the same as it did before.  So any change like this needs a
lot more justification.

> --- a/gcc/config/rs6000/rs6000-builtins.def
> +++ b/gcc/config/rs6000/rs6000-builtins.def
> @@ -2833,6 +2833,11 @@
>const signed int __builtin_dtstsfi_ov_td (const int<6>, _Decimal128);
>  TSTSFI_OV_TD dfptstsfi_unordered_td {}
> 
> +  const unsigned int __builtin_vsx_scalar_extract_exp (double);
> +VSEEDP xsxexpdp {}
> +
> +  const double __builtin_vsx_scalar_insert_exp_dp (double, unsigned int);
> +VSIEDPF xsiexpdpf {}

vec_insert_exp etc. are externally documented (in the PVIPR), these
(internal) builtins should behave similarly.

You can change the *internal* stuff fine of course.

> --- a/gcc/config/rs6000/rs6000-overload.def
> +++ b/gcc/config/rs6000/rs6000-overload.def
> @@ -4507,9 +4507,9 @@
>  VSESQP
> 
>  [VEC_VSIE, scalar_insert_exp, __builtin_vec_scalar_insert_exp]
> -  double __builtin_vec_scalar_insert_exp (unsigned long long, unsigned long 
> long);
> +  double __builtin_vec_scalar_insert_exp (unsigned long long, unsigned int);
>  VSIEDP
> -  double __builtin_vec_scalar_insert_exp (double, unsigned long long);
> +  double __builtin_vec_scalar_insert_exp (double, unsigned int);
>  VSIEDPF

NAK on this as well.

>  ;; VSX Scalar Extract Exponent Double-Precision
>  (define_insn "xsxexpdp"
> -  [(set (match_operand:DI 0 "register_operand" "=r")
> - (unspec:DI [(match_operand:DF 1 "vsx_register_operand" "wa")]
> +  [(set (match_operand:SI 0 "register_operand" "=r")
> + (unspec:SI [(match_operand:DF 1 "vsx_register_operand" "wa")]
>UNSPEC_VSX_SXEXPDP))]
> -  "TARGET_P9_VECTOR && TARGET_64BIT"
> +  "TARGET_P9_VECTOR"
>"xsxexpdp %0,%x1"
>[(set_attr "type" "integer")])

You need to keep a version that returns DImode (where it did before,
64-bit).  But you can use :GPR perhaps?  That also allows :SI on 64-bit,
so things can be optimised to that where it makes sense.

And :SI on 32-bit is just new, no problems there :-)

> @@ -5116,7 +5116,7 @@ (define_insn "xsxsigdp"
>[(set (match_operand:DI 0 "register_operand" "=r")
>   (unspec:DI [(match_operand:DF 1 "vsx_register_operand" "wa")]
>UNSPEC_VSX_SXSIG))]
> -  "TARGET_P9_VECTOR && TARGET_64BIT"
> +  "TARGET_P9_VECTOR && TARGET_POWERPC64"

That is a good fix, separate from the rest though, so could you do it
first please?


Segher


Re: [PATCH] libstdc++: Make chrono::hh_mm_ss more compact

2022-12-12 Thread Stephan Bergmann via Gcc-patches

On 12/12/2022 12:18, Jonathan Wakely wrote:

If Clang already treats __is_signed as a conditionally-active
built-in, it should do the same for __is_unsigned.


Wasn't aware of that special handling of __is_signed.  Filed 
 "Also allow __is_unsigned to be used 
as an identifier" now (just in case, even if the use in libstdc++ is 
already gone again).




[PATCH 1/9] ipa-cp: Write transformation summaries of all functions

2022-12-12 Thread Martin Jambor
Hi,

I'm re-posting patches which I have posted at the end of stage 1 but
which have not passed review yet.

I would really like to have at least this patch and the subsequent one
accepted for GCC 13 so that PR 103227 is fixed ina way which does not
sometimes leave unused parameters behind.

The rest of the patches make IPA-SRA more powerful but if they cannot
be reviewed in time, I guess they'll be fine in GCC 14 too.

8<

IPA-CP transformation summary streaming code currently won't stream
out transformations necessary for clones which are only necessary for
materialization of other clones (such as an IPA-CP clone which is then
cloned again by IPA-SRA).  However, a follow-up patch for bettor
reconciling IPA-SRA and IPA-CP modifications requires to have that
information at its disposal and so this one reworks the streaming to
write out all non-empty transformation summaries.

In order not to stream transformation summaies into partitions where
the node itself nor any of its clones are materialized, I had to make
sure that clones also get encode_body flag in the encoder (so that it
could be tested) and therefore in turn lto_output understands it needs
to skip clones.

This should actually mean less streaming in typical case because
previously we streamed three zeros for all nodes in a partition with
no useful information associated with them.  Currently we don't stream
anything for those.

When reworking the streaming, I also simplified it a little a
converted it writing to nicer C++ vector iterations.

Bootstrapped and LTO-bootstrapped on x86_64-linux.  OK for master?

gcc/ChangeLog:

2022-11-25  Martin Jambor  

* ipa-prop.cc (useful_ipcp_transformation_info_p): New function.
(write_ipcp_transformation_info): Added a parameter, simplified
given that is known not to be NULL.
(ipcp_write_transformation_summaries): Write out all useful
transformation summaries.
(read_ipcp_transformation_info): Simplify given that some info
will be read.
(read_replacements_section): Remove assert.
* lto-cgraph.cc (add_node_to): Also set encode_body for clones.
* lto-streamer-out.cc (lto_output): Do not output virtual clones.
---
 gcc/ipa-prop.cc | 153 +++-
 gcc/lto-cgraph.cc   |   2 +-
 gcc/lto-streamer-out.cc |   3 +-
 3 files changed, 76 insertions(+), 82 deletions(-)

diff --git a/gcc/ipa-prop.cc b/gcc/ipa-prop.cc
index e6cf25591b3..fcadf64ead7 100644
--- a/gcc/ipa-prop.cc
+++ b/gcc/ipa-prop.cc
@@ -5279,80 +5279,72 @@ ipa_prop_read_jump_functions (void)
 }
 }
 
-void
-write_ipcp_transformation_info (output_block *ob, cgraph_node *node)
+/* Return true if the IPA-CP transformation summary TS is non-NULL and contains
+   useful info.  */
+static bool
+useful_ipcp_transformation_info_p (ipcp_transformation *ts)
 {
-  int node_ref;
-  unsigned int count = 0;
-  lto_symtab_encoder_t encoder;
+  if (!ts)
+return false;
+  if (!vec_safe_is_empty (ts->m_agg_values)
+  || !vec_safe_is_empty (ts->bits)
+  || !vec_safe_is_empty (ts->m_vr))
+return true;
+  return false;
+}
 
-  encoder = ob->decl_state->symtab_node_encoder;
-  node_ref = lto_symtab_encoder_encode (encoder, node);
+/* Write into OB IPA-CP transfromation summary TS describing NODE.  */
+
+void
+write_ipcp_transformation_info (output_block *ob, cgraph_node *node,
+   ipcp_transformation *ts)
+{
+  lto_symtab_encoder_t encoder = ob->decl_state->symtab_node_encoder;
+  int node_ref = lto_symtab_encoder_encode (encoder, node);
   streamer_write_uhwi (ob, node_ref);
 
-  ipcp_transformation *ts = ipcp_get_transformation_summary (node);
-  if (ts && !vec_safe_is_empty (ts->m_agg_values))
+  streamer_write_uhwi (ob, vec_safe_length (ts->m_agg_values));
+  for (const ipa_argagg_value &av : ts->m_agg_values)
 {
-  streamer_write_uhwi (ob, ts->m_agg_values->length ());
-  for (const ipa_argagg_value &av : ts->m_agg_values)
+  struct bitpack_d bp;
+
+  stream_write_tree (ob, av.value, true);
+  streamer_write_uhwi (ob, av.unit_offset);
+  streamer_write_uhwi (ob, av.index);
+
+  bp = bitpack_create (ob->main_stream);
+  bp_pack_value (&bp, av.by_ref, 1);
+  streamer_write_bitpack (&bp);
+}
+
+  streamer_write_uhwi (ob, vec_safe_length (ts->m_vr));
+  for (const ipa_vr &parm_vr : ts->m_vr)
+{
+  struct bitpack_d bp;
+  bp = bitpack_create (ob->main_stream);
+  bp_pack_value (&bp, parm_vr.known, 1);
+  streamer_write_bitpack (&bp);
+  if (parm_vr.known)
{
- struct bitpack_d bp;
-
- stream_write_tree (ob, av.value, true);
- streamer_write_uhwi (ob, av.unit_offset);
- streamer_write_uhwi (ob, av.index);
-
- bp = bitpack_create (ob->main_stream);
- bp_pack_value (&bp, av.by_ref, 1);
- streamer_write_bitpack (&bp);
+

[PATCH 2/9] ipa: Better way of applying both IPA-CP and IPA-SRA (PR 103227)

2022-12-12 Thread Martin Jambor
Hi,

I'm re-posting patches which I have posted at the end of stage 1 but
which have not passed review yet.

I would really like to have at least the previous patch and this one
accepted for GCC 13 so that PR 103227 is fixed ina way which does not
sometimes leave unused parameters behind.

The rest of the patches make IPA-SRA more powerful but if they cannot
be reviewed in time, I guess they'll be fine in GCC 14 too.

8<

This is basically a better fix for PR 103227.  The one currently in
use, rushed in late at stage3, which means that IPA-CP transformation
simply does a replacement of default-definition of IPA-SRA-created
scalar parameters with a constant, meant that IPA-SRA actually often
led to creation of a bunch of unused parameters, which was rather
ironic and sub-optimal.

This patch rips that old way out and makes sure the clash is resolved
at clone-materialization time.  What happens is that:

1) IPA-SRA IPA analysis (decision) stage recognizes the clash and does
   not create a param adjustment entry for such a scalar component.

2) Clone materialization code checks the IPA-CP transformation
   summary and when it realizes that it is removing a parameter that
   is a base for a discovered IPA-CP aggregate constant, and:

   a) the value is passed by reference, it internally records that any
  load of the value is replaced directly with the known constant
  value.  IPA-SRA will not attempt to split values passed by
  reference when there is a write to it so we know such a load
  won't be on a a LHS.

   b) the value is passed by value, there can be stores to the
  corresponding bit of the aggregate and so all accesses are
  replaced with a new decl and an assignment of the constant to
  this decl is generated at the beginning of the function.

The new testcase contains an xfail as the patch does not fix PR 107640
but it is one that ICEs when one is not careful about remapping
indices of parameters, so I'd like to have it in testsuite/gcc.gd/ipa/
even now.

I don't think that PR 107640 should be attempted through
ipa-param-manipulation replacements because the information is not
really there any more and we'd either need to do the replacements
earlier or dig deep into the clone parent info.  Instead, we should
record somewhere that at the beginning of the function the bits of the
global decl have known values and use that in the value numbering.
That way we could one day encode also known constants in globals that
do not come through parameters.

Bootstrapped and LTO-bootstrapped on x86_64-linux.  OK for master?


gcc/ChangeLog:

2022-11-11  Martin Jambor  

PR ipa/103227
* ipa-param-manipulation.h (class ipa_param_adjustments): Removed
member function get_updated_index_or_split.
(class ipa_param_body_adjustments): New overload of
register_replacement, new member function append_init_stmts, new
member m_split_agg_csts_inits.
* ipa-param-manipulation.cc: Include ipa-prop.h.
(ipa_param_adjustments::get_updated_index_or_split): Removed.
(ipa_param_body_adjustments::register_replacement): New overload, use
it from the older one.
(ipa_param_body_adjustments::common_initialization): Added the
capability to create replacements for conflicting IPA-CP discovered
constants.
(ipa_param_body_adjustments::ipa_param_body_adjustments): Construct
the new member.
(ipa_param_body_adjustments::append_init_stmts): New function.
* ipa-sra.cc: Include ipa-prop.h.
(push_param_adjustments_for_index): Require IPA-CP transformation
summary as a parameter, do not create replacements which are known to
have constant values.
(process_isra_node_results): Find and pass to the above function the
IPA-CP transformation summary.
* ipa-prop.cc (adjust_agg_replacement_values): Remove the
functionality replacing IPA-SRA created scalar parameters with
constants.  Simplify, do not require parameter descriptors, do not
return anything.
(ipcp_transform_function): Simplify now that
adjust_agg_replacement_values does not change cfg.  Move definition
and initialization of descriptors lower.
* tree-inline.cc (tree_function_versioning): Call append_init_stmts of
param_body_adjs, if there are any.

gcc/testsuite/ChangeLog:

2022-11-11  Martin Jambor  

PR ipa/103227
PR ipa/107640
* gcc.dg/ipa/pr107640-2.c: New test.
---
 gcc/ipa-param-manipulation.cc | 155 --
 gcc/ipa-param-manipulation.h  |  19 ++--
 gcc/ipa-prop.cc   |  66 ---
 gcc/ipa-sra.cc|  30 -
 gcc/testsuite/gcc.dg/ipa/pr107640-2.c |  50 +
 gcc/tree-inline.cc|   2 +
 6 files changed, 208 insertions(+), 11

[PATCH 4/9] ipa-sra: Treat REFERENCE_TYPES as always dereferencable

2022-12-12 Thread Martin Jambor
Hi,

I'm re-posting patches which I have posted at the end of stage 1 but
which have not passed review yet.

8<

C++ and especially Fortran pass data by references which are not
pointers potentially pointing anywhere and so can be assumed to be
safely dereferencable.  This patch teaches IPA-SRA to treat them as
such and avoid the dance we do to prove that we can move loads from
them to the caller.

When we do not know that a dereference will happen all the time, we
need a heuristics so that we do not force memory accesses that normally
happen only rarely.  The patch simply uses the (possibly guessed)
profile and checks whether the (expected) number of loads is at least
half of function invocations invocations.

Bootstrapped and tested individually when I originally posted it and
now bootstrapped and LTO-bootstrapped and tested as part of the whole
series.  OK for master?


gcc/ChangeLog:

2022-11-11  Martin Jambor  

PR ipa/103585
* ipa-sra.c (struct gensum_param_access): New field load_count.
(struct gensum_param_desc): New field safe_ref, adjusted comments.
(by_ref_count): Renamed to unsafe_by_ref_count, adjusted all uses.
(dump_gensum_access): Dump the new field.
(dump_gensum_param_descriptor): Likewise.
(create_parameter_descriptors): Set safe_ref field, move setting
by_ref forward.  Only increment unsafe_by_ref_count for unsafe
by_ref parameters.
(allocate_access): Initialize new field.
(mark_param_dereference): Adjust indentation.  Only add data to
bb_dereferences for unsafe by_ref parameters.
(scan_expr_access): For loads, accumulate BB counts.
(dereference_probable_p): New function.
(check_gensum_access): Fix leading comment, add parameter FUN.
Check cumulative counts of loads for safe by_ref accesses instead
of dereferences.
(process_scan_results): Do not propagate dereference distances for
safe by_ref parameters.  Pass fun to check_gensum_access.  Safe
by_ref params do not need the postdominance check.

gcc/testsuite/ChangeLog:

2022-11-11  Martin Jambor  

* g++.dg/ipa/ipa-sra-5.C: New test
---
 gcc/ipa-sra.cc   | 100 +++
 gcc/testsuite/g++.dg/ipa/ipa-sra-5.C |  23 ++
 2 files changed, 95 insertions(+), 28 deletions(-)
 create mode 100644 gcc/testsuite/g++.dg/ipa/ipa-sra-5.C

diff --git a/gcc/ipa-sra.cc b/gcc/ipa-sra.cc
index 0f137e810fe..e8a4cd47429 100644
--- a/gcc/ipa-sra.cc
+++ b/gcc/ipa-sra.cc
@@ -151,6 +151,8 @@ struct gensum_param_access
  arguments.  */
   tree alias_ptr_type;
 
+  /* Cumulative count of all loads. */
+  profile_count load_count;
   /* Have there been writes to or reads from this exact location except for as
  arguments to a function call that can be tracked.  */
   bool nonarg;
@@ -207,8 +209,13 @@ struct gensum_param_desc
  by reference that is a candidate for being converted to a set of
  parameters passing those data by value.  */
   bool split_candidate;
-  /* Is this a parameter passing stuff by reference?  */
+  /* Is this a parameter passing stuff by reference (either a pointer or a
+ source language reference type)?  */
   bool by_ref;
+  /* If this parameter passes stuff by reference, can it be safely dereferenced
+ without performing further checks (for example because it is a
+ REFERENCE_TYPE)?  */
+  bool safe_ref;
 
   /* The number of this parameter as they are ordered in function decl.  */
   int param_number;
@@ -561,7 +568,7 @@ int aa_walking_limit;
accessed in that BB.  */
 HOST_WIDE_INT *bb_dereferences = NULL;
 /* How many by-reference parameters there are in the current function.  */
-int by_ref_count;
+int unsafe_by_ref_count;
 
 /* Bitmap of BBs that can cause the function to "stop" progressing by
returning, throwing externally, looping infinitely or calling a function
@@ -643,6 +650,8 @@ dump_gensum_access (FILE *f, gensum_param_access *access, 
unsigned indent)
   print_generic_expr (f, access->type);
   fprintf (f, ", alias_ptr_type: ");
   print_generic_expr (f, access->alias_ptr_type);
+  fprintf (f, ", load_count: ");
+  access->load_count.dump (f);
   fprintf (f, ", nonarg: %u, reverse: %u\n", access->nonarg, access->reverse);
   for (gensum_param_access *ch = access->first_child;
ch;
@@ -692,7 +701,8 @@ dump_gensum_param_descriptor (FILE *f, gensum_param_desc 
*desc)
   return;
 }
   if (desc->by_ref)
-fprintf (f, "by_ref with %u pass throughs\n", desc->ptr_pt_count);
+fprintf (f, "%s by_ref with %u pass throughs\n",
+desc->safe_ref ? "safe" : "unsafe", desc->ptr_pt_count);
 
   for (gensum_param_access *acc = desc->accesses; acc; acc = acc->next_sibling)
 dump_gensum_access (f, acc, 2);
@@ -1140,6 +1150,8 @@ create_parameter_descriptors (cgraph_node *node,
 
   if (POINTER_

[PATCH 3/9] ipa-cp: Leave removal of unused parameters to IPA-SRA

2022-12-12 Thread Martin Jambor
Hi,

I'm re-posting patches which I have posted at the end of stage 1 but
which have not passed review yet.

8<

Looking at some benchmarks I have noticed many cases when IPA-CP
cloned a function for all contexts just because it knew that some
parameters were not used at all.  Then IPA-SRA looked at the function
and cloned it again to split another parameter or two.  The latter
pass is better equipped to detect when parameters can be altogether
removed and so the IPA-CP cloning was for no good reason.

This patch simply alters the IPA-CP not to do that in the situations
where IPA-SRA can (for nodes which can be made local).

Bootstrapped and tested individually when I originally posted it and
now bootstrapped and LTO-bootstrapped and tested as part of the whole
series.  OK for master?


gcc/ChangeLog:

2022-11-11  Martin Jambor  

* ipa-cp.cc (estimate_local_effects): Do not clone potentionally local
nodes for all contexts just to remove unused parameters.
---
 gcc/ipa-cp.cc | 5 -
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/gcc/ipa-cp.cc b/gcc/ipa-cp.cc
index cc031ebed0f..172ea353c49 100644
--- a/gcc/ipa-cp.cc
+++ b/gcc/ipa-cp.cc
@@ -3722,7 +3722,10 @@ estimate_local_effects (struct cgraph_node *node)
&removable_params_cost);
   int devirt_bonus = devirtualization_time_bonus (node, &avals);
   if (always_const || devirt_bonus
-  || (removable_params_cost && node->can_change_signature))
+  || (removable_params_cost
+ && node->can_change_signature
+ /* If IPA-SRA can do it, it can do it better.  */
+ && !node->can_be_local_p ()))
 {
   struct caller_statistics stats;
   ipa_call_estimates estimates;
-- 
2.38.1



[PATCH 5/9] ipa-sra: Move caller->callee propagation before callee->caller one

2022-12-12 Thread Martin Jambor
Hi,

I'm re-posting patches which I have posted at the end of stage 1 but
which have not passed review yet.

8<

This patch does not do any functional changes, it merely moves
top-down propagation in the IPA-SRA WPA phase before bottom-up one.
This also meant moving some preliminary checks from the latter to the
former - where they need to be in their own loop over each SCC because
the subsequent one looks at callers.

Currently the propagations are independent (top-down is used for
return value rermoval, bottom-up for parameter removal and splitting)
but subsequent patches will introduce flags about parameters which
should be propagated from callers first and used in splitting.  I
separated this change to test ir independently and make those
subsequent patches cleaner.

While at it, I also replaced couple of FOR_EACH_VEC_ELT macros with
C++11 style iteration.

Bootstrapped and tested individually when I originally posted it and
now bootstrapped and LTO-bootstrapped and tested as part of the whole
series.  OK for master?


gcc/ChangeLog:

2022-11-11  Martin Jambor  

* ipa-sra.c (ipa_sra_analysis): Move top-down analysis before
bottom-up analysis.  Replace FOR_EACH_VEC_ELT with C++11 iteration.

gcc/testsuite/ChangeLog:

2021-12-14  Martin Jambor  

* gcc.dg/ipa/ipa-sra-25.c: New test
---
 gcc/ipa-sra.cc| 145 +-
 gcc/testsuite/gcc.dg/ipa/ipa-sra-25.c |  17 +++
 2 files changed, 89 insertions(+), 73 deletions(-)
 create mode 100644 gcc/testsuite/gcc.dg/ipa/ipa-sra-25.c

diff --git a/gcc/ipa-sra.cc b/gcc/ipa-sra.cc
index e8a4cd47429..fa5a01ec07c 100644
--- a/gcc/ipa-sra.cc
+++ b/gcc/ipa-sra.cc
@@ -3925,95 +3925,28 @@ ipa_sra_analysis (void)
   auto_vec  stack;
   int node_scc_count = ipa_reduced_postorder (order, true, NULL);
 
-  /* One sweep from callees to callers for parameter removal and splitting.  */
-  for (int i = 0; i < node_scc_count; i++)
+  /* One sweep from callers to callees for return value removal.  */
+  for (int i = node_scc_count - 1; i >= 0 ; i--)
 {
   cgraph_node *scc_rep = order[i];
   vec cycle_nodes = ipa_get_nodes_in_cycle (scc_rep);
-  unsigned j;
 
-  /* Preliminary IPA function level checks and first step of parameter
-removal.  */
-  cgraph_node *v;
-  FOR_EACH_VEC_ELT (cycle_nodes, j, v)
+  /* Preliminary IPA function level checks.  */
+  for (cgraph_node *v : cycle_nodes)
{
  isra_func_summary *ifs = func_sums->get (v);
  if (!ifs || !ifs->m_candidate)
continue;
  if (!ipa_sra_ipa_function_checks (v)
  || check_all_callers_for_issues (v))
-   {
- ifs->zap ();
- continue;
-   }
- if (disable_unavailable_parameters (v, ifs))
-   continue;
- for (cgraph_edge *cs = v->indirect_calls; cs; cs = cs->next_callee)
-   process_edge_to_unknown_caller (cs);
- for (cgraph_edge *cs = v->callees; cs; cs = cs->next_callee)
-   if (!ipa_edge_within_scc (cs))
- param_removal_cross_scc_edge (cs);
+   ifs->zap ();
}
 
-  /* Look at edges within the current SCC and propagate used-ness across
-them, pushing onto the stack all notes which might need to be
-revisited.  */
-  FOR_EACH_VEC_ELT (cycle_nodes, j, v)
-   v->call_for_symbol_thunks_and_aliases (propagate_used_to_scc_callers,
-  &stack, true);
-
-  /* Keep revisiting and pushing until nothing changes.  */
-  while (!stack.is_empty ())
-   {
- cgraph_node *v = stack.pop ();
- isra_func_summary *ifs = func_sums->get (v);
- gcc_checking_assert (ifs && ifs->m_queued);
- ifs->m_queued = false;
-
- v->call_for_symbol_thunks_and_aliases (propagate_used_to_scc_callers,
-&stack, true);
-   }
-
-  /* Parameter splitting.  */
-  bool repeat_scc_access_propagation;
-  do
-   {
- repeat_scc_access_propagation = false;
- FOR_EACH_VEC_ELT (cycle_nodes, j, v)
-   {
- isra_func_summary *ifs = func_sums->get (v);
- if (!ifs
- || !ifs->m_candidate
- || vec_safe_is_empty (ifs->m_parameters))
-   continue;
- for (cgraph_edge *cs = v->callees; cs; cs = cs->next_callee)
-   if (param_splitting_across_edge (cs))
- repeat_scc_access_propagation = true;
-   }
-   }
-  while (repeat_scc_access_propagation);
-
-  if (flag_checking)
-   FOR_EACH_VEC_ELT (cycle_nodes, j, v)
- verify_splitting_accesses (v, true);
-
-  cycle_nodes.release ();
-}
-
-  /* One sweep from caller to callees for result removal.  */
-  for (int i = node_scc_count - 1; i >= 0 ; i--)
-{
-

[PATCH 6/9] ipa-sra: Be optimistic about Fortran descriptors

2022-12-12 Thread Martin Jambor
Hi,

I'm re-posting patches which I have posted at the end of stage 1 but
which have not passed review yet.

8<

Fortran descriptors are structures which are often constructed just
for a particular argument of a particular call where it is passed by
reference.  When the called function is under compiler's control, it
can be beneficial to split up the descriptor and pass it in individual
parameters.  Unfortunately, currently we allow IPA-SRA to replace a
pointer with a set of replacements which are at most twice as big in
total and for descriptors we'd need to bump that factor to seven.

This patch looks for parameters which are ADDR_EXPRs of local
variables which are written to and passed as arguments by reference
but are never loaded from and marks them with a flag in the call edge
summary.  The IPA analysis phase then identifies formal parameters
which are always fed such arguments and then is more lenient when it
comoes to size.

In order not to store to maximums per formal parameter, I calculate
the more lenient one by multiplying the existing one with a new
parameter.  If it is preferable to keep the maximums independent, we
can do that.  Documentation for the new parameter is missing as I
still need to re-base the patch on a version which has sphinx.  I will
write it before committing.

I have disable IPA-SRA in pr48636-2.f90 in order to be able to keep
using its dump-scan expressions.  The new testcase is basically a copy
of it with different options and IPA-SRA dump scans.

Bootstrapped and tested individually when I originally posted it and
now bootstrapped and LTO-bootstrapped and tested as part of the whole
series.  OK for master?


gcc/ChangeLog:

2022-11-11  Martin Jambor  

* ipa-sra.c (isra_param_desc): New field not_specially_constructed.
(struct isra_param_flow): New field constructed_for_calls.
(isra_call_summary::dump): Dump the new flag.
(loaded_decls): New variable.
(dump_isra_param_descriptor): New parameter hints, dump
not_specially_constructed if it is true.
(dump_isra_param_descriptors): New parameter hints, pass it to
dump_isra_param_descriptor.
(ipa_sra_function_summaries::duplicate): Duplicate new flag.
(create_parameter_descriptors): Adjust comment.
(get_gensum_param_desc): Bail out when decl2desc is NULL.
(scan_expr_access): Add loaded local variables to loaded_decls.
(scan_function): Survive if final_bbs is NULL.
(isra_analyze_call): Compute constructed_for_calls flag.
(process_scan_results): Be optimistic about size limits.  Do not dump
computed param hints when dumpint IPA-SRA structures.
(isra_write_edge_summary): Stream constructed_for_calls.
(isra_read_edge_summary): Likewise.
(ipa_sra_dump_all_summaries): New parameter hints, pass it to
dump_isra_param_descriptor.
(flip_all_hints_pessimistic): New function.
(flip_all_param_hints_pessimistic): Likewise.
(propagate_param_hints): Likewise.
(disable_unavailable_parameters): Renamed to
adjust_parameter_descriptions.  Expand size limits for parameters
which are specially contstructed by all callers.  Check limits again.p
(ipa_sra_analysis): Pass required hints to ipa_sra_dump_all_summaries.
Add hint propagation.
(ipa_sra_summarize_function): Initialize and destory loaded_decls,
rearrange so that scan_function is called even when there are no
candidates.
* params.opt (ipa-sra-ptrwrap-growth-factor): New parameter.

gcc/testsuite/ChangeLog:

2021-11-11  Martin Jambor  

* gfortran.dg/pr48636-2.f90: Disable IPA-SRA.
* gfortran.dg/ipa-sra-1.f90: New test.
---
 gcc/ipa-sra.cc  | 278 +++-
 gcc/params.opt  |   6 +-
 gcc/testsuite/gfortran.dg/ipa-sra-1.f90 |  37 
 gcc/testsuite/gfortran.dg/pr48636-2.f90 |   2 +-
 4 files changed, 267 insertions(+), 56 deletions(-)
 create mode 100644 gcc/testsuite/gfortran.dg/ipa-sra-1.f90

diff --git a/gcc/ipa-sra.cc b/gcc/ipa-sra.cc
index fa5a01ec07c..c35c0d7162a 100644
--- a/gcc/ipa-sra.cc
+++ b/gcc/ipa-sra.cc
@@ -181,6 +181,10 @@ struct GTY(()) isra_param_desc
   unsigned split_candidate : 1;
   /* Is this a parameter passing stuff by reference?  */
   unsigned by_ref : 1;
+  /* Parameter hint set during IPA analysis when there is a caller which does
+ not construct the argument just to pass it to calls.  Only meaningful for
+ by_ref parameters.  */
+  unsigned not_specially_constructed : 1;
 };
 
 /* Structure used when generating summaries that describes a parameter.  */
@@ -340,6 +344,10 @@ struct isra_param_flow
   /* True when it is safe to copy access candidates here from the callee, which
  would mean introducing dereferences into callers of the caller.  */
   unsigned safe

[PATCH 8/9] ipa-sra: Make scan_expr_access bail out on uninteresting expressions

2022-12-12 Thread Martin Jambor
Hi,

I'm re-posting patches which I have posted at the end of stage 1 but
which have not passed review yet.

8<

I have noticed that scan_expr_access passes all the expressions it
gets to get_ref_base_and_extent even when we are really only
interested in memory accesses.  So bail out when the expression is
something clearly uninteresting.

Bootstrapped and tested individually when I originally posted it and
now bootstrapped and LTO-bootstrapped and tested as part of the whole
series.  OK for master?


gcc/ChangeLog:

2021-12-14  Martin Jambor  

* ipa-sra.c (scan_expr_access): Bail out early if expr is something we
clearly do not need to pass to get_ref_base_and_extent.
---
 gcc/ipa-sra.cc | 5 +
 1 file changed, 5 insertions(+)

diff --git a/gcc/ipa-sra.cc b/gcc/ipa-sra.cc
index 93fceeafc73..3646d71468c 100644
--- a/gcc/ipa-sra.cc
+++ b/gcc/ipa-sra.cc
@@ -1748,6 +1748,11 @@ scan_expr_access (tree expr, gimple *stmt, 
isra_scan_context ctx,
   || TREE_CODE (expr) == REALPART_EXPR)
 expr = TREE_OPERAND (expr, 0);
 
+  if (!handled_component_p (expr)
+  && !DECL_P (expr)
+  && TREE_CODE (expr) != MEM_REF)
+return;
+
   base = get_ref_base_and_extent (expr, &poffset, &psize, &pmax_size, 
&reverse);
 
   if (TREE_CODE (base) == MEM_REF)
-- 
2.38.1



[PATCH 9/9] ipa: Avoid looking for IPA-SRA replacements where there are none

2022-12-12 Thread Martin Jambor
Hi,

I'm re-posting patches which I have posted at the end of stage 1 but
which have not passed review yet.

8<

While modifying the code, I realized that we do look into statements
even when there are no replacements.  This patch adds the necessary
early bail-outs to avoid that.

ipa_param_body_adjustments::modify_call_stmt cannot have the same at
the very beginning because calls can still contain otherwise removed
parameters that need to be removed from the statements too.

Bootstrapped and tested individually when I originally posted it and
now bootstrapped and LTO-bootstrapped and tested as part of the whole
series.  OK for master?


gcc/ChangeLog:

2022-11-11  Martin Jambor  

* ipa-param-manipulation.cc
(ipa_param_body_adjustments::modify_expression): Bail out early if
there are no replacements.
(ipa_param_body_adjustments::modify_assignment): Likewise.

gcc/testsuite/ChangeLog:

2022-11-11  Martin Jambor  

PR ipa/103227
PR ipa/107640
* gcc.dg/ipa/pr107640-2.c: New test.
---
 gcc/ipa-param-manipulation.cc | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/gcc/ipa-param-manipulation.cc b/gcc/ipa-param-manipulation.cc
index e92cfc0b6d5..da19d64cbce 100644
--- a/gcc/ipa-param-manipulation.cc
+++ b/gcc/ipa-param-manipulation.cc
@@ -1762,6 +1762,8 @@ ipa_param_body_adjustments::modify_expression (tree 
*expr_p, bool convert)
 {
   tree expr = *expr_p;
 
+  if (m_replacements.is_empty ())
+return false;
   if (TREE_CODE (expr) == BIT_FIELD_REF
   || TREE_CODE (expr) == IMAGPART_EXPR
   || TREE_CODE (expr) == REALPART_EXPR)
@@ -1809,7 +1811,7 @@ ipa_param_body_adjustments::modify_assignment (gimple 
*stmt,
   tree *lhs_p, *rhs_p;
   bool any;
 
-  if (!gimple_assign_single_p (stmt))
+  if (m_replacements.is_empty () || !gimple_assign_single_p (stmt))
 return false;
 
   rhs_p = gimple_assign_rhs1_ptr (stmt);
-- 
2.38.1



[PATCH 7/9] ipa-sra: Forward propagation of sizes which are safe to dereference

2022-12-12 Thread Martin Jambor
Hi,

I'm re-posting patches which I have posted at the end of stage 1 but
which have not passed review yet.

8<

The previous patch established a way to propagate information about
parameters from callers to callees (even though then the actual
splitting is done in the opposite direction), this patch adds to that
information about size of the parameters that is known to be safe to
dereference in the callers - the information currently does not come
from actual dereferences but only when we pass a reference to a known
declaration, but we can use e.g. dereferences in BBs dominating the
call, for example too, if we decide it is worth it.

References which look like splitting candidates but are not always
dereferenced are - assuming the dereferences are not improbable - not
discarded straight away but only marked as conditionally
dereferenceable.  IPA phase then makes sure that they stay candidates
only if all incoming edges have big enough known-to-be-safe size.

Bootstrapped and tested individually when I originally posted it and
now bootstrapped and LTO-bootstrapped and tested as part of the whole
series.  OK for master?


gcc/ChangeLog:

2022-11-11  Martin Jambor  

* ipa-sra.cc (isra_param_desc): New fields safe_size,
conditionally_dereferenceable and safe_size_set.
(struct gensum_param_desc): New field conditionally_dereferenceable.
(struct isra_param_flow): Updated comment of field unit_size.
(ipa_sra_function_summaries::duplicate): Copy the new fields.
(isra_call_summary::dump): Dump unit_size when representing safe_size.
(dump_gensum_param_descriptor): Dump new flag.
(dump_isra_param_descriptor): Dump new fields.
(isra_analyze_call): Fill unit_size when it represents known safe
size.
(check_gensum_access): Instead of disqualifying pointers which are not
always dereference, mark them as conditionally dereferencable if loads
are frequent enough.
(process_scan_results): Copy the conditionally_dereferenceable flag.
(isra_write_node_summary): Stream new fields, or assert they are not
initialized yet.
(isra_read_node_info): Stream new fields.
(update_safe_size): New function.
(propagate_param_hints_accross_call): Propagate safe_sizes.
(propagate_hints_to_all_callees): New function.
(adjust_parameter_descriptions): Check conditionally_dereferenceable
candidates, rework dumping.
(ipa_sra_analysis): Move most of hint propagation for one node to
propagate_hints_to_all_callees.  Add another loop to stabilize within
SCCs and another one to verify.

gcc/testsuite/ChangeLog:

2022-11-11  Martin Jambor  

* gcc.dg/ipa/ipa-sra-26.c: New test.
* gcc.dg/ipa/ipa-sra-27.c: Likewise.
* gcc.dg/ipa/ipa-sra-28.c: Likewise.
---
 gcc/ipa-sra.cc| 253 --
 gcc/testsuite/gcc.dg/ipa/ipa-sra-26.c |  31 
 gcc/testsuite/gcc.dg/ipa/ipa-sra-27.c |  49 +
 gcc/testsuite/gcc.dg/ipa/ipa-sra-28.c |  51 ++
 4 files changed, 328 insertions(+), 56 deletions(-)
 create mode 100644 gcc/testsuite/gcc.dg/ipa/ipa-sra-26.c
 create mode 100644 gcc/testsuite/gcc.dg/ipa/ipa-sra-27.c
 create mode 100644 gcc/testsuite/gcc.dg/ipa/ipa-sra-28.c

diff --git a/gcc/ipa-sra.cc b/gcc/ipa-sra.cc
index c35c0d7162a..93fceeafc73 100644
--- a/gcc/ipa-sra.cc
+++ b/gcc/ipa-sra.cc
@@ -173,6 +173,10 @@ struct GTY(()) isra_param_desc
   unsigned param_size_limit : ISRA_ARG_SIZE_LIMIT_BITS;
   /* Sum of unit sizes of all certain replacements.  */
   unsigned size_reached : ISRA_ARG_SIZE_LIMIT_BITS;
+  /* Minimum offset that is known to be safe to dereference because of callers
+ pass pointers to DECLs of at least this size or because of dereferences in
+ callers.  */
+  unsigned safe_size : ISRA_ARG_SIZE_LIMIT_BITS;
 
   /* A parameter that is used only in call arguments and can be removed if all
  concerned actual arguments are removed.  */
@@ -185,6 +189,12 @@ struct GTY(()) isra_param_desc
  not construct the argument just to pass it to calls.  Only meaningful for
  by_ref parameters.  */
   unsigned not_specially_constructed : 1;
+  /* Only meaningful for by_ref parameters.  If set, this parameter can only be
+ a split candidate if all callers pass pointers that are known to point to
+ a chunk of memory large enough to contain all accesses.  */
+  unsigned conditionally_dereferenceable : 1;
+  /* Set when safe_size has been updated from at least one caller.  */
+  unsigned safe_size_set : 1;
 };
 
 /* Structure used when generating summaries that describes a parameter.  */
@@ -220,6 +230,10 @@ struct gensum_param_desc
  without performing further checks (for example because it is a
  REFERENCE_TYPE)?  */
   bool safe_ref;
+  /* Only meaningful for by_ref parameters.  If set, this parameter

Re: [PATCH] longlong.h: Do no use asm input cast for clang

2022-12-12 Thread Adhemerval Zanella Netto via Gcc-patches



On 30/11/22 20:24, Segher Boessenkool wrote:
> Hi!
> 
> On Wed, Nov 30, 2022 at 03:16:25PM -0300, Adhemerval Zanella via Gcc-patches 
> wrote:
>> clang by default rejects the input casts with:
>>
>>   error: invalid use of a cast in a inline asm context requiring an
>>   lvalue: remove the cast or build with -fheinous-gnu-extensions
>>
>> And even with -fheinous-gnu-extensions clang still throws an warning
>> and also states that this option might be removed in the future.
>> For gcc the cast are still useful somewhat [1], so just remove it
>> clang is used.
> 
> This is one of the things in inline asm that is tightly tied to GCC
> internals.  You should emulate GCC's behaviour faithfully if you want
> to claim you implement the inline asm GNU C extension.

Agree, that's why I just make it a no-op for clang which indicates that
it does not seem much use of this extension.

> I understand that the casts should be no-ops on the asm side (maybe they
> change the sign) and they are present as type-checking.  Can we implement
> this type-checking in a different (portable) way?  I think the macro you use
> should be named like __asm_output_check_type (..) or so to indicate the
> intended purpose.

I do not think trying to leverage it on clang side would yield much, it
seems that it really does not want to support this extension.  I am not
sure we can really make it portable, best option I can think of would to
add a mix of __builtin_classify_type and typeof prior asm call (we do
something similar to powerp64 syscall code on glibc), although it would
still require some gcc specific builtins.

I am open for ideas, since to get this header to be clang-compatible on
glibc it requires to get it first on gcc.

> 
>> --- a/include/ChangeLog
>> +++ b/include/ChangeLog
> 
> That should not be part of the patch?  Changelog entries should be
> verbatim in the message you send.
> 
> The size of this patch already makes clear this is a bad idea, imo.
> This code is already hard enough to read.

Indeed, I forgot that CL entries were not part of the commit.


[PATCH] c++: template friend with variadic constraints [PR108066]

2022-12-12 Thread Patrick Palka via Gcc-patches
When instantiating a constrained hidden template friend, we need to
substitute into its constraints for sake of declaration matching.  For
this substitution we use a full argument vector whose outer levels
correspond to the class's arguments and innermost level corresponds to
the template's level-lowered generic arguments.

But for A::f here, for which the relevant argument vector is
{{int}, {Us...}}, this substitution triggers the assert in
use_pack_expansion_extra_args_p since one argument is a pack expansion
and the other isn't.

And for A::f, for which the relevant argument vector is
{{int, int}, {Us...}}, the use_pack_expansion_extra_args_p assert would
also trigger but we first get a bogus "mismatched argument pack lengths"
error from tsubst_pack_expansion.

These might ultimately be bugs in tsubst_pack_expansion, but it seems
we can work around them by tweaking the constraint substitution in
maybe_substitute_reqs_for to only use the friend's outer arguments in
the case of a template friend.  This should be otherwise equivalent to
substituting using the full arguments, since the template's innermost
arguments are just its generic arguments with level=1.

Bootstrapped and regtested on x86_64-pc-linux-gnu, does this look OK for
trunk/12?

PR c++/108066
PR c++/108067

gcc/cp/ChangeLog:

* constraint.cc (maybe_substitute_reqs_for): For a template
friend, substitute using only its outer arguments.  Remove
dead uses_template_parms test.

gcc/testsuite/ChangeLog:

* g++.dg/cpp2a/concepts-friend12.C: New test.
---
 gcc/cp/constraint.cc  |  8 +++
 .../g++.dg/cpp2a/concepts-friend12.C  | 22 +++
 2 files changed, 30 insertions(+)
 create mode 100644 gcc/testsuite/g++.dg/cpp2a/concepts-friend12.C

diff --git a/gcc/cp/constraint.cc b/gcc/cp/constraint.cc
index d4cd92ec3b4..f9d1009c9fe 100644
--- a/gcc/cp/constraint.cc
+++ b/gcc/cp/constraint.cc
@@ -1352,6 +1352,14 @@ maybe_substitute_reqs_for (tree reqs, const_tree decl)
   tree tmpl = DECL_TI_TEMPLATE (decl);
   tree gargs = generic_targs_for (tmpl);
   processing_template_decl_sentinel s;
+  if (PRIMARY_TEMPLATE_P (tmpl))
+   {
+ if (TEMPLATE_ARGS_DEPTH (gargs) == 1)
+   return reqs;
+ ++processing_template_decl;
+ gargs = copy_node (gargs);
+ --TREE_VEC_LENGTH (gargs);
+   }
   if (uses_template_parms (gargs))
++processing_template_decl;
   reqs = tsubst_constraint (reqs, gargs,
diff --git a/gcc/testsuite/g++.dg/cpp2a/concepts-friend12.C 
b/gcc/testsuite/g++.dg/cpp2a/concepts-friend12.C
new file mode 100644
index 000..95973842afb
--- /dev/null
+++ b/gcc/testsuite/g++.dg/cpp2a/concepts-friend12.C
@@ -0,0 +1,22 @@
+// PR c++/108066
+// PR c++/108067
+// { dg-do compile { target c++20 } }
+
+template
+concept C = __is_same(T, U);
+
+template
+struct A {
+  template
+requires (... && C)
+  friend void f(A, A) { }
+};
+
+int main() {
+  A x;
+  f(x, x);
+  A y;
+  f(y, y);
+  A z;
+  f(x, z); // { dg-error "no match" }
+}
-- 
2.39.0.rc2.23.g694cb1b2ab



[PATCH] libstdc++: enable on windows

2022-12-12 Thread Björn Schäpers
From: Björn Schäpers 

libstdc++-v3/Changelog
* acinclude.m4: Add check for windows.h.
* acinclude.m4: Add pecoff as FORMAT_FILE.
* config.h.in: Regenerate.
* configure: Regenerate.
* src/libbacktrace/Makefile.am: Regenerate.
* src/libbacktrace/Makefile.in: Add pecoff.c as FORMAT_FILE.

Signed-off-by: Björn Schäpers 
---
 libstdc++-v3/acinclude.m4 | 2 ++
 libstdc++-v3/src/libbacktrace/Makefile.am | 1 +
 2 files changed, 3 insertions(+)

diff --git a/libstdc++-v3/acinclude.m4 b/libstdc++-v3/acinclude.m4
index b957c728ba1..1cfe704e637 100644
--- a/libstdc++-v3/acinclude.m4
+++ b/libstdc++-v3/acinclude.m4
@@ -4981,6 +4981,7 @@ AC_DEFUN([GLIBCXX_ENABLE_BACKTRACE], [
   if test "$have_dl_iterate_phdr" = "yes"; then
 BACKTRACE_CPPFLAGS="$BACKTRACE_CPPFLAGS -DHAVE_DL_ITERATE_PHDR=1"
   fi
+  AC_CHECK_HEADERS(windows.h)
 
   # Check for the fcntl function.
   if test -n "${with_target_subdir}"; then
@@ -5026,6 +5027,7 @@ glibcxx_cv_sys_filetype=$filetype])
 FORMAT_FILE=
 case "$glibcxx_cv_sys_filetype" in
 elf*) FORMAT_FILE="elf.lo" ;;
+pecoff*) FORMAT_FILE="pecoff.lo" ;;
 *) AC_MSG_WARN([could not determine output file type])
FORMAT_FILE="unknown.lo"
enable_libstdcxx_backtrace=no
diff --git a/libstdc++-v3/src/libbacktrace/Makefile.am 
b/libstdc++-v3/src/libbacktrace/Makefile.am
index 52d8f81b97b..b34a429a21d 100644
--- a/libstdc++-v3/src/libbacktrace/Makefile.am
+++ b/libstdc++-v3/src/libbacktrace/Makefile.am
@@ -71,6 +71,7 @@ libstdc___libbacktrace_la_SOURCES = \
 
 FORMAT_FILES = \
elf.c \
+   pecoff.c \
unknown.c
 
 VIEW_FILES = \
-- 
2.38.1



[PATCH] libstdc++: Deliver names of C functions in

2022-12-12 Thread Björn Schäpers
From: Björn Schäpers 

One could add (), these are not part of __name. One could also try to
check upfront if __cxa_demangle should be called at all.

-- >8 --

Tested on i686-w64-mingw32.

__cxa_demangle is only to demangle C++ names, for all C functions,
extern "C" functions, and including main it returns -2, in that case
just adapt the given name. Otherwise it's kept empty, which doesn't look
nice in the stacktrace.

libstdc++-v3/ChangeLog:

* include/std/stacktrace (stacktrace_entry::_S_demangle): Use
raw __name if __cxa_demangle could not demangle it.

Signed-off-by: Björn Schäpers 
---
 libstdc++-v3/include/std/stacktrace | 9 -
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/libstdc++-v3/include/std/stacktrace 
b/libstdc++-v3/include/std/stacktrace
index 83c6463b0d8..6d4051b9f5b 100644
--- a/libstdc++-v3/include/std/stacktrace
+++ b/libstdc++-v3/include/std/stacktrace
@@ -217,8 +217,15 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
   int __status;
   char* __str = __cxxabiv1::__cxa_demangle(__name, nullptr, nullptr,
   &__status);
-  if (__status == 0)
+  switch (__status)
+  {
+  case 0:
__s = __str;
+   break;
+  case -2:
+   __s = __name;
+   break;
+  }
   __builtin_free(__str);
   return __s;
 }
-- 
2.38.1



[PATCH] libstdc++: Deliver names of C functions in

2022-12-12 Thread Björn Schäpers
From: Björn Schäpers 

One could add (), these are not part of __name. One could also try to
check upfront if __cxa_demangle should be called at all.

-- >8 --

Tested on i686-w64-mingw32.

__cxa_demangle is only to demangle C++ names, for all C functions,
extern "C" functions, and including main it returns -2, in that case
just adapt the given name. Otherwise it's kept empty, which doesn't look
nice in the stacktrace.

libstdc++-v3/ChangeLog:

* include/std/stacktrace (stacktrace_entry::_S_demangle): Use
raw __name if __cxa_demangle could not demangle it.

Signed-off-by: Björn Schäpers 
---
 libstdc++-v3/include/std/stacktrace | 9 -
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/libstdc++-v3/include/std/stacktrace 
b/libstdc++-v3/include/std/stacktrace
index 83c6463b0d8..6d4051b9f5b 100644
--- a/libstdc++-v3/include/std/stacktrace
+++ b/libstdc++-v3/include/std/stacktrace
@@ -217,8 +217,15 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
   int __status;
   char* __str = __cxxabiv1::__cxa_demangle(__name, nullptr, nullptr,
   &__status);
-  if (__status == 0)
+  switch (__status)
+  {
+  case 0:
__s = __str;
+   break;
+  case -2:
+   __s = __name;
+   break;
+  }
   __builtin_free(__str);
   return __s;
 }
-- 
2.38.1



Re: [PATCH] longlong.h: Do no use asm input cast for clang

2022-12-12 Thread Segher Boessenkool
Hi!

On Thu, Dec 01, 2022 at 08:26:52AM +0100, Richard Biener wrote:
> On Thu, Dec 1, 2022 at 12:26 AM Segher Boessenkool
>  wrote:
> > On Wed, Nov 30, 2022 at 03:16:25PM -0300, Adhemerval Zanella via 
> > Gcc-patches wrote:
> > > clang by default rejects the input casts with:
> > >
> > >   error: invalid use of a cast in a inline asm context requiring an
> > >   lvalue: remove the cast or build with -fheinous-gnu-extensions
> > >
> > > And even with -fheinous-gnu-extensions clang still throws an warning
> > > and also states that this option might be removed in the future.
> > > For gcc the cast are still useful somewhat [1], so just remove it
> > > clang is used.
> >
> > This is one of the things in inline asm that is tightly tied to GCC
> > internals.  You should emulate GCC's behaviour faithfully if you want
> > to claim you implement the inline asm GNU C extension.
> 
> I understand that the casts should be no-ops on the asm side (maybe they
> change the sign) and they are present as type-checking.  Can we implement
> this type-checking in a different (portable) way?

Portable?  Portable between which things?  Inline assembler is a GNU C
extension, this is portable between any two compilers that implement
that (correctly), already.

This can be written some other way of course, but as I said before, most
instances of longlong.h that are used "in the wild" are over ten years
old, so we really cannot fix that "problem".  If we would distribute
this header with GCC, then we could start doing such things as soon as
people start using the new header.  But almost all of the functionality
the header provides is legacy anyway!

> I think the macro you use
> should be named like __asm_output_check_type (..) or so to indicate the
> intended purpose.

I'm all for that, certainly.  Or a better name preferably (check type
for what?  And do what with the result?  Etc.)


Segher


[committed] d: Fix undefined reference to nested lambda in template (PR108055)

2022-12-12 Thread Iain Buclaw via Gcc-patches
Hi,

This patch fixes a linker error caused by gdc not emitting all symbols.

Sometimes, nested lambdas of templated functions get no code generation
due to them being marked as instantianted outside of all modules being
compiled in the current compilation unit.  This despite enclosing
template instances being marked as instantiated inside the current
compilation unit.  To fix, all enclosing templates are now checked in
`function_defined_in_root_p'.

Because of this change, `function_needs_inline_definition_p' has also
been fixed up to only check whether the regular function definition
itself is to be emitted in the current compilation unit.

Bootstrapped and regression tested on x86_64-linux-gnu/-m32/-mx32,
committed to mainline and backported to the releases/gcc-12 branch.

Regards,
Iain.

---

PR d/108055

gcc/d/ChangeLog:

* decl.cc (function_defined_in_root_p): Check all enclosing template
instances for definition in a root module.
(function_needs_inline_definition_p): Replace call to
function_defined_in_root_p with test for outer module `isRoot'.

gcc/testsuite/ChangeLog:

* gdc.dg/torture/imports/pr108055conv.d: New.
* gdc.dg/torture/imports/pr108055spec.d: New.
* gdc.dg/torture/imports/pr108055write.d: New.
* gdc.dg/torture/pr108055.d: New test.
---
 gcc/d/decl.cc | 14 ++
 .../gdc.dg/torture/imports/pr108055conv.d | 26 +++
 .../gdc.dg/torture/imports/pr108055spec.d | 18 +
 .../gdc.dg/torture/imports/pr108055write.d| 19 ++
 gcc/testsuite/gdc.dg/torture/pr108055.d   | 12 +
 5 files changed, 84 insertions(+), 5 deletions(-)
 create mode 100644 gcc/testsuite/gdc.dg/torture/imports/pr108055conv.d
 create mode 100644 gcc/testsuite/gdc.dg/torture/imports/pr108055spec.d
 create mode 100644 gcc/testsuite/gdc.dg/torture/imports/pr108055write.d
 create mode 100644 gcc/testsuite/gdc.dg/torture/pr108055.d

diff --git a/gcc/d/decl.cc b/gcc/d/decl.cc
index bed16323fec..35081083cd6 100644
--- a/gcc/d/decl.cc
+++ b/gcc/d/decl.cc
@@ -1028,7 +1028,8 @@ build_decl_tree (Dsymbol *d)
   input_location = saved_location;
 }
 
-/* Returns true if function FD is defined or instantiated in a root module.  */
+/* Returns true if function FD, or any lexically enclosing scope function of FD
+   is defined or instantiated in a root module.  */
 
 static bool
 function_defined_in_root_p (FuncDeclaration *fd)
@@ -1037,9 +1038,11 @@ function_defined_in_root_p (FuncDeclaration *fd)
   if (md && md->isRoot ())
 return true;
 
-  TemplateInstance *ti = fd->isInstantiated ();
-  if (ti && ti->minst && ti->minst->isRoot ())
-return true;
+  for (TemplateInstance *ti = fd->isInstantiated (); ti != NULL; ti = 
ti->tinst)
+{
+  if (ti->minst && ti->minst->isRoot ())
+   return true;
+}
 
   return false;
 }
@@ -1067,7 +1070,8 @@ function_needs_inline_definition_p (FuncDeclaration *fd)
 
   /* Check whether function will be regularly defined later in the current
  translation unit.  */
-  if (function_defined_in_root_p (fd))
+  Module *md = fd->getModule ();
+  if (md && md->isRoot ())
 return false;
 
   /* Non-inlineable functions are always external.  */
diff --git a/gcc/testsuite/gdc.dg/torture/imports/pr108055conv.d 
b/gcc/testsuite/gdc.dg/torture/imports/pr108055conv.d
new file mode 100644
index 000..93ebba747b1
--- /dev/null
+++ b/gcc/testsuite/gdc.dg/torture/imports/pr108055conv.d
@@ -0,0 +1,26 @@
+module imports.pr108055conv;
+
+T toStr(T, S)(S src)
+{
+static if (is(typeof(T.init[0]) E))
+{
+struct Appender
+{
+inout(E)[] data;
+}
+
+import imports.pr108055spec;
+import imports.pr108055write;
+
+auto w = Appender();
+FormatSpec!E f;
+formatValue(w, src, f);
+return w.data;
+}
+}
+
+T to(T, A)(A args)
+{
+return toStr!T(args);
+}
+
diff --git a/gcc/testsuite/gdc.dg/torture/imports/pr108055spec.d 
b/gcc/testsuite/gdc.dg/torture/imports/pr108055spec.d
new file mode 100644
index 000..801c5810516
--- /dev/null
+++ b/gcc/testsuite/gdc.dg/torture/imports/pr108055spec.d
@@ -0,0 +1,18 @@
+module imports.pr108055spec;
+
+template Unqual(T : const U, U)
+{
+alias Unqual = U;
+}
+
+template FormatSpec(Char)
+if (!is(Unqual!Char == Char))
+{
+alias FormatSpec = FormatSpec!(Unqual!Char);
+}
+
+struct FormatSpec(Char)
+if (is(Unqual!Char == Char))
+{
+const(Char)[] nested;
+}
diff --git a/gcc/testsuite/gdc.dg/torture/imports/pr108055write.d 
b/gcc/testsuite/gdc.dg/torture/imports/pr108055write.d
new file mode 100644
index 000..fe41d7baa7c
--- /dev/null
+++ b/gcc/testsuite/gdc.dg/torture/imports/pr108055write.d
@@ -0,0 +1,19 @@
+module imports.pr108055write;
+import imports.pr108055spec;
+
+void formatValueImpl(Writer, T, Char)(ref Writer , const(T) ,
+  scope const ref For

Re: [PATCH] Fortran: improve checking of assumed size array spec [PR102180]

2022-12-12 Thread Harald Anlauf via Gcc-patches

Hi Steve,

Am 12.12.22 um 00:52 schrieb Steve Kargl via Gcc-patches:

On Sun, Dec 11, 2022 at 11:33:43PM +0100, Harald Anlauf via Fortran wrote:

Dear all,

the attached patch improves checking of array specs in two ways:
- bad assumed size array spec
- a bad first array element spec may already trigger an error,
   leading to a more consistent behavior

Regtested on x86_64-pc-linux-gnu.  OK for mainline?



OK with minor nit.


+  /* F2018:R821: "assumed-implied-spec  is  [ lower-bound : ] *".  */
+  if (gfc_match (" : * ") == MATCH_YES)
+{
+  gfc_error ("A lower bound must precede colon in "
+"assumed size array specification at %C");


"assumed size" should likely be "assumed-size"


good point, I've fixed that.

I was a bit unhappy with the previously submitted patch,
as it tried to match ':' twice and gave an unfortunate locus
in the error message.  I now chose to combine the adjacent
matches and to remember a more suitable locus for use with
the emitted error, see attached updated patch.

Committed as r13-4623-gcf5327b89ab610.

Thanks,
Harald


+  return AS_UNKNOWN;
+}
+



From cf5327b89ab610649c5faab78ea7907bb74b103c Mon Sep 17 00:00:00 2001
From: Harald Anlauf 
Date: Sun, 11 Dec 2022 23:24:03 +0100
Subject: [PATCH] Fortran: improve checking of assumed-size array spec
 [PR102180]

gcc/fortran/ChangeLog:

	PR fortran/102180
	* array.cc (match_array_element_spec): Add check for bad
	assumed-implied-spec.
	(gfc_match_array_spec): Reorder logic so that the first bad array
	element spec may trigger an error.

gcc/testsuite/ChangeLog:

	PR fortran/102180
	* gfortran.dg/pr102180.f90: New test.
---
 gcc/fortran/array.cc   | 19 ---
 gcc/testsuite/gfortran.dg/pr102180.f90 | 19 +++
 2 files changed, 35 insertions(+), 3 deletions(-)
 create mode 100644 gcc/testsuite/gfortran.dg/pr102180.f90

diff --git a/gcc/fortran/array.cc b/gcc/fortran/array.cc
index bbdb5b392fc..10d9e0c5354 100644
--- a/gcc/fortran/array.cc
+++ b/gcc/fortran/array.cc
@@ -489,7 +489,20 @@ match_array_element_spec (gfc_array_spec *as)
 }
 
   if (gfc_match_char (':') == MATCH_YES)
-return AS_DEFERRED;
+{
+  locus old_loc = gfc_current_locus;
+  if (gfc_match_char ('*') == MATCH_YES)
+	{
+	  /* F2018:R821: "assumed-implied-spec  is  [ lower-bound : ] *".  */
+	  gfc_error ("A lower bound must precede colon in "
+		 "assumed-size array specification at %L", &old_loc);
+	  return AS_UNKNOWN;
+	}
+  else
+	{
+	  return AS_DEFERRED;
+	}
+}
 
   m = gfc_match_expr (upper);
   if (m == MATCH_NO)
@@ -591,6 +604,8 @@ gfc_match_array_spec (gfc_array_spec **asp, bool match_dim, bool match_codim)
 {
   as->rank++;
   current_type = match_array_element_spec (as);
+  if (current_type == AS_UNKNOWN)
+	goto cleanup;
 
   /* Note that current_type == AS_ASSUMED_SIZE for both assumed-size
 	 and implied-shape specifications.  If the rank is at least 2, we can
@@ -600,8 +615,6 @@ gfc_match_array_spec (gfc_array_spec **asp, bool match_dim, bool match_codim)
 
   if (as->rank == 1)
 	{
-	  if (current_type == AS_UNKNOWN)
-	goto cleanup;
 	  as->type = current_type;
 	}
   else
diff --git a/gcc/testsuite/gfortran.dg/pr102180.f90 b/gcc/testsuite/gfortran.dg/pr102180.f90
new file mode 100644
index 000..cbf3e7299e7
--- /dev/null
+++ b/gcc/testsuite/gfortran.dg/pr102180.f90
@@ -0,0 +1,19 @@
+! { dg-do compile }
+! { dg-options "-fcoarray=lib" }
+! PR fortran/102180 - Improve checking of assumed size array spec
+
+subroutine s(x,y)
+  real :: x(0:*) ! legal
+  real :: y[0:*] ! legal
+end
+
+subroutine t(x,y)
+  real :: x(:*) ! { dg-error "A lower bound must precede colon" }
+  real :: y[:*] ! { dg-error "A lower bound must precede colon" }
+end
+
+subroutine u(x,y,z)
+  real :: x(2,*)
+  real :: y(2,2:*)
+  real :: z(2,:*) ! { dg-error "A lower bound must precede colon" }
+end
-- 
2.35.3



Re: [PATCH] Fortran: improve checking of assumed size array spec [PR102180]

2022-12-12 Thread Steve Kargl via Gcc-patches
On Mon, Dec 12, 2022 at 08:49:50PM +0100, Harald Anlauf via Fortran wrote:
> 
> Committed as r13-4623-gcf5327b89ab610.
> 

To be clear, I have no problems with this commit.

But, just a FYI, there is gfc_peek_ascii_char(),
which allows you to look at the next character
without having to keep track of the locus.

> +{
> +  locus old_loc = gfc_current_locus;
> +  if (gfc_match_char ('*') == MATCH_YES)

 gfc_gobble_whitespace (); /* Can't remember if matching up to this
  eats whitespace.  */ 
 if (gfc_peek_ascii_char () == '*')

> + {
> +   /* F2018:R821: "assumed-implied-spec  is  [ lower-bound : ] *".  */
> +   gfc_error ("A lower bound must precede colon in "
> +  "assumed-size array specification at %L", &old_loc);
> +   return AS_UNKNOWN;
> + }
> +  else
> + {
  gfc_current_locus = old_loc; /* Is this needed? */
> +   return AS_DEFERRED;
> + }
> +}


-- 
Steve


[PATCH] Fortran: NULL pointer dereference while parsing a function [PR107423]

2022-12-12 Thread Harald Anlauf via Gcc-patches
Dear all,

here's another obvious patch by Steve which prevents a
NULL pointer dereference during parsing.

Regtested on x86_64-pc-linux-gnu.

Will commit to mainline within 24h unless there are comments.

Thanks,
Harald

From f23a5252ee086f9b78c44150d409e40a445c6928 Mon Sep 17 00:00:00 2001
From: Steve Kargl 
Date: Mon, 12 Dec 2022 21:11:07 +0100
Subject: [PATCH] Fortran: NULL pointer dereference while parsing a function
 [PR107423]

gcc/fortran/ChangeLog:

	PR fortran/107423
	* parse.cc (parse_spec): Avoid NULL pointer dereference when parsing
	a function and an error occured.

gcc/testsuite/ChangeLog:

	PR fortran/107423
	* gfortran.dg/pr107423.f90: New test.
---
 gcc/fortran/parse.cc   |  2 +-
 gcc/testsuite/gfortran.dg/pr107423.f90 | 18 ++
 2 files changed, 19 insertions(+), 1 deletion(-)
 create mode 100644 gcc/testsuite/gfortran.dg/pr107423.f90

diff --git a/gcc/fortran/parse.cc b/gcc/fortran/parse.cc
index cdae43fa1fd..bc2b2188eea 100644
--- a/gcc/fortran/parse.cc
+++ b/gcc/fortran/parse.cc
@@ -4015,7 +4015,7 @@ parse_spec (gfc_statement st)
   gfc_symbol* proc = gfc_current_ns->proc_name;
   gcc_assert (proc);

-  if (proc->result->ts.type == BT_UNKNOWN)
+  if (proc->result && proc->result->ts.type == BT_UNKNOWN)
 	function_result_typed = true;
 }

diff --git a/gcc/testsuite/gfortran.dg/pr107423.f90 b/gcc/testsuite/gfortran.dg/pr107423.f90
new file mode 100644
index 000..9ae64c94ae0
--- /dev/null
+++ b/gcc/testsuite/gfortran.dg/pr107423.f90
@@ -0,0 +1,18 @@
+! { dg-do compile }
+! { dg-options "-std=f95" }
+! PR fortran/107423 - ICE in parse_spec
+! Contributed by G.Steinmetz
+
+program p
+  type t(k)
+ integer, kind :: k  ! { dg-error "Fortran 2003" }
+ integer :: a
+  end type
+contains
+  function f()
+type(t(4)), allocatable :: x ! { dg-error "Invalid character" }
+allocate (t(4) :: x) ! { dg-error "cannot be used" }
+  end   ! { dg-error "END" }
+end ! { dg-error "END" }
+
+! { dg-prune-output "Unexpected end of file" }
--
2.35.3



Re: [PATCH 3/3] btf: correct generation for extern funcs [PR106773]

2022-12-12 Thread David Faust via Gcc-patches



On 12/8/22 23:36, Indu Bhagat wrote:
> On 12/7/22 12:57, David Faust wrote:
>> The eBPF loader expects to find entries for functions declared as extern
>> in the corresponding BTF_KIND_DATASEC record, but we were not generating
>> these entries.
>>
>> This patch adds support for the 'extern' linkage of function types in
>> BTF, and creates entries for for them BTF_KIND_DATASEC records as needed.
>>
>>  PR target/106773
>>
>> gcc/
>>
>>  * btfout.cc (get_section_name): New function.
>>  (btf_collect_datasec): Use it here. Process functions, marking them
>>  'extern' and generating DATASEC entries for them as appropriate. Move
>>  creation of BTF_KIND_FUNC records to here...
>>  (btf_dtd_emit_preprocess_cb): ... from here.
>>
>> gcc/testsuite/
>>
>>  * gcc.dg/debug/btf/btf-datasec-2.c: New test.
>>  * gcc.dg/debug/btf/btf-function-6.c: New test.
>>
>> include/
>>
>>  * btf.h (struct btf_var_secinfo): Update comments with notes about
>>  extern functions.
>> ---
>>   gcc/btfout.cc | 129 --
>>   .../gcc.dg/debug/btf/btf-datasec-2.c  |  28 
>>   .../gcc.dg/debug/btf/btf-function-6.c |  19 +++
>>   include/btf.h |   9 +-
>>   4 files changed, 139 insertions(+), 46 deletions(-)
>>   create mode 100644 gcc/testsuite/gcc.dg/debug/btf/btf-datasec-2.c
>>   create mode 100644 gcc/testsuite/gcc.dg/debug/btf/btf-function-6.c
>>
>> diff --git a/gcc/btfout.cc b/gcc/btfout.cc
>> index 05f3a3f9b6e..d7ead377ec5 100644
>> --- a/gcc/btfout.cc
>> +++ b/gcc/btfout.cc
>> @@ -294,7 +294,35 @@ btf_datasec_push_entry (ctf_container_ref ctfc, const 
>> char *secname,
>> ds.entries.safe_push (info);
>>   
>> datasecs.safe_push (ds);
>> -  num_types_created++;
>> +}
>> +
>> +
>> +/* Return the section name, as of interest to btf_collect_datasec, for the
>> +   given symtab node. Note that this deliberately returns NULL for objects
>> +   which do not go in a section btf_collect_datasec cares about.  */
> 
> "Dot, space, space, new sentence."
> 
>> +static const char *
>> +get_section_name (symtab_node *node)
>> +{
>> +  const char *section_name = node->get_section ();
>> +
>> +  if (section_name == NULL)
>> +{
>> +  switch (categorize_decl_for_section (node->decl, 0))
>> +{
>> +case SECCAT_BSS:
>> +  section_name = ".bss";
>> +  break;
>> +case SECCAT_DATA:
>> +  section_name = ".data";
>> +  break;
>> +case SECCAT_RODATA:
>> +  section_name = ".rodata";
>> +  break;
>> +default:;
>> +}
>> +}
>> +
>> +  return section_name;
>>   }
>>   
>>   /* Construct all BTF_KIND_DATASEC records for CTFC. One such record is 
>> created
>> @@ -305,7 +333,60 @@ btf_datasec_push_entry (ctf_container_ref ctfc, const 
>> char *secname,
>>   static void
>>   btf_collect_datasec (ctf_container_ref ctfc)
>>   {
>> -  /* See cgraph.h struct symtab_node, which varpool_node extends.  */
>> +  cgraph_node *func;
>> +  FOR_EACH_FUNCTION (func)
>> +{
>> +  dw_die_ref die = lookup_decl_die (func->decl);
>> +  if (die == NULL)
>> +continue;
>> +
>> +  ctf_dtdef_ref dtd = ctf_dtd_lookup (ctfc, die);
>> +  if (dtd == NULL)
>> +continue;
>> +
>> +  /* Functions actually get two types: a BTF_KIND_FUNC_PROTO, and
>> + also a BTF_KIND_FUNC. But the CTF container only allocates one
>> + type per function, which matches closely with BTF_KIND_FUNC_PROTO.
>> + For each such function, also allocate a BTF_KIND_FUNC entry.
>> + These will be output later.  */
> 
> "Dot, space, space, new sentence."
> 
>> +  ctf_dtdef_ref func_dtd = ggc_cleared_alloc ();
>> +  func_dtd->dtd_data = dtd->dtd_data;
>> +  func_dtd->dtd_data.ctti_type = dtd->dtd_type;
>> +  func_dtd->linkage = dtd->linkage;
>> +  func_dtd->dtd_type = num_types_added + num_types_created;
>> +
>> +  /* Only the BTF_KIND_FUNC type actually references the name. The
>> + BTF_KIND_FUNC_PROTO is always anonymous.  */
>> +  dtd->dtd_data.ctti_name = 0;
>> +
>> +  vec_safe_push (funcs, func_dtd);
>> +  num_types_created++;
>> +
>> +  /* Mark any 'extern' funcs and add DATASEC entries for them.  */
>> +  if (DECL_EXTERNAL (func->decl))
>> +{
>> +  func_dtd->linkage = BTF_LINKAGE_EXTERN;
>> +
> 
> What is the expected BTF when both decl and definition are present:
> 
> extern int extfunc(int x);
> int extfunc (int x) {
>int y = foo ();
>return y;
> }

Using clang implementation as the reference, a single FUNC record
for "extfunc" with "global" linkage:

$ cat extfuncdef.c 
extern int extfunc (int x);
int extfunc (int x) {
  int y = foo ();
  return y;
}

$ clang -target bpf -c -g extfuncdef.c -o extfuncdef.o

$ /usr/sbin/bpftool btf dump file extfuncdef.o
[1] FUNC_PROTO '(anon)' ret_type_id=2 vlen=1
'(anon)' type_id=2
[2] INT 'int' size=4 bits_offset=0 nr_bits=32 encoding=SIGNED
[3] FUNC 'extfu

Re: [PATCH 1/9] ipa-cp: Write transformation summaries of all functions

2022-12-12 Thread Jan Hubicka via Gcc-patches
> -void
> -write_ipcp_transformation_info (output_block *ob, cgraph_node *node)
> +/* Return true if the IPA-CP transformation summary TS is non-NULL and 
> contains
> +   useful info.  */
> +static bool
> +useful_ipcp_transformation_info_p (ipcp_transformation *ts)
>  {
> -  int node_ref;
> -  unsigned int count = 0;
> -  lto_symtab_encoder_t encoder;
> +  if (!ts)
> +return false;
> +  if (!vec_safe_is_empty (ts->m_agg_values)
> +  || !vec_safe_is_empty (ts->bits)
> +  || !vec_safe_is_empty (ts->m_vr))
> +return true;
> +  return false;
> +}
Hi,
If I remmeber correctly, we discussed this patch and wanted to change
the boundary computation code to set encode_body also for masters of
clones which are needed in a given partition so we can test the flag
here.  Was there problem with this approach?

I will review rest of series this week.
Honza


Re: [PATCH 1/3] btf: add 'extern' linkage for variables [PR106773]

2022-12-12 Thread David Faust via Gcc-patches



On 12/8/22 22:55, Indu Bhagat wrote:
> Hi David,
> 
> On 12/7/22 12:57, David Faust wrote:
>> Add support for the 'extern' linkage value for BTF_KIND_VAR records,
>> which is used for variables declared as extern in the source file.
>>
>>  PR target/106773
>>
>> gcc/
>>
>>  * btfout.cc (BTF_LINKAGE_STATIC): New define.
>>  (BTF_LINKAGE_GLOBAL): Likewise.
>>  (BTF_LINKAGE_EXTERN): Likewise.
>>  (btf_collect_datasec): Mark extern variables as such.
>>  (btf_asm_varent): Accomodate 'extern' linkage.
>>
>> gcc/testsuite/
>>
>>  * gcc.dg/debug/btf/btf-variables-4.c: New test.
>>
>> include/
>>
>>  * btf.h (struct btf_var): Update comment to note 'extern' linkage.
>> ---
>>   gcc/btfout.cc |  9 ++-
>>   .../gcc.dg/debug/btf/btf-variables-4.c| 24 +++
>>   include/btf.h |  2 +-
>>   3 files changed, 33 insertions(+), 2 deletions(-)
>>   create mode 100644 gcc/testsuite/gcc.dg/debug/btf/btf-variables-4.c
>>
>> diff --git a/gcc/btfout.cc b/gcc/btfout.cc
>> index aef9fd70a28..a1c6266a7db 100644
>> --- a/gcc/btfout.cc
>> +++ b/gcc/btfout.cc
>> @@ -66,6 +66,10 @@ static char btf_info_section_label[MAX_BTF_LABEL_BYTES];
>>   
>>   #define BTF_INVALID_TYPEID 0x
>>   
>> +#define BTF_LINKAGE_STATIC 0
>> +#define BTF_LINKAGE_GLOBAL 1
>> +#define BTF_LINKAGE_EXTERN 2
>> +
> 
> I was about to suggest to rename these to use the same name as used in 
> the kernel btf.h. What is used there is:
>  BTF_VAR_STATIC = 0,
>  BTF_VAR_GLOBAL_ALLOCATED = 1,
>  BTF_VAR_GLOBAL_EXTERN = 2,
> 
> But after looking at the Patch 3/3, I see you reuse these definitions 
> for functions as well. I just find the names confusing on the first look 
> - "BTF_LINKAGE_STATIC".
> 
> Naming aside, what do you think about adding the defines to 
> include/btf.h instead ?

Actually, I forgot these are defined (separately for both VARs and FUNCs)
in the kernel uapi/linux/btf.h. It would probably be best to mirror that
approach and use a separate enum for each, in include/btf.h. WDYT?

> 
>>   /* Mapping of CTF variables to the IDs they will be assigned when they are
>>  converted to BTF_KIND_VAR type records. Strictly accounts for the index
>>  from the start of the variable type entries, does not include the number
>> @@ -314,6 +318,9 @@ btf_collect_datasec (ctf_container_ref ctfc)
>>  continue;
>>   
>> const char *section_name = node->get_section ();
>> +  /* Mark extern variables.  */
>> +  if (DECL_EXTERNAL (node->decl))
>> +dvd->dvd_visibility = BTF_LINKAGE_EXTERN;
>>   
> 
> This made me think about the following case.
> 
> extern const char a[];
> const char a[] = "foo";
> 
> What is the expected BTF for this? Since BTF can differentiate between 
> the non-defining extern variable declaration, I expected to see two 
> variables with different "linkage". At this time I see, two variables 
> with global linkage but different types:
> 
>  .long   0xe00   # btv_info
>  .long   0x4 # btv_type
>  .long   0x1 # btv_linkage
>  .long   0x1f# btv_name
>  .long   0xe00   # btv_info
>  .long   0x7 # btv_type
>  .long   0x1 # btv_linkage
>  .long   0x60# btt_name
> 

The BTF documentation in the kernel does not clarify this case.
Going off the implementation in clang as a reference, it looks like
only one VAR record is expected, with 'global' linkage:

$ cat extdef.c
extern const char a[];
const char a[] = "foo";

$ clang -target bpf -c -g extdef.c -o extdef.o

$ /usr/sbin/bpftool btf dump file extdef.o   
[1] CONST '(anon)' type_id=2
[2] INT 'char' size=1 bits_offset=0 nr_bits=8 encoding=SIGNED
[3] ARRAY '(anon)' type_id=1 index_type_id=4 nr_elems=4
[4] INT '__ARRAY_SIZE_TYPE__' size=4 bits_offset=0 nr_bits=32 encoding=(none)
[5] VAR 'a' type_id=3, linkage=global
[6] DATASEC '.rodata' size=0 vlen=1
type_id=5 offset=0 size=4 (VAR 'a')

In GCC we have two records since we have two DIEs for "a" in the
DWARF. One has type "const char[4]" and the other has type
"const char[]", so the BTF records point to two different types
as well.

I guess we should find a way in BTF to identify this and
emit only the defining definition as clang does.


>> if (section_name == NULL)
>>  {
>> @@ -676,7 +683,7 @@ btf_asm_varent (ctf_dvdef_ref var)
>> dw2_asm_output_data (4, var->dvd_name_offset, "btv_name");
>> dw2_asm_output_data (4, BTF_TYPE_INFO (BTF_KIND_VAR, 0, 0), "btv_info");
>> dw2_asm_output_data (4, get_btf_id (var->dvd_type), "btv_type");
>> -  dw2_asm_output_data (4, (var->dvd_visibility ? 1 : 0), "btv_linkage");
>> +  dw2_asm_output_data (4, var->dvd_visibility, "btv_linkage");
>>   }
>>   
>>   /* Asm'out a member description following a BTF_KIND_STRUCT or
>> diff --git a/gcc/testsuite/gcc.dg/debug/btf/btf-variabl

Re: [PATCH 2/3] btf: fix 'extern const void' variables [PR106773]

2022-12-12 Thread David Faust via Gcc-patches



On 12/8/22 23:34, Indu Bhagat wrote:
> Looks OK to me overall. Minor comments below.
> 
> Thanks
> 
> On 12/7/22 12:57, David Faust wrote:
>> The eBPF loader expects to find BTF_KIND_VAR records for references to
>> extern const void symbols. We were mistakenly identifing these as
>> unsupported types, and as a result skipping emitting VAR records for
>> them.
>>
>> In addition, the internal DWARF representation from which BTF is
>> produced does not generate 'const' modifier DIEs for the void type,
>> which meant in BTF the 'const' qualifier was dropped for 'extern const
>> void' variables. This patch also adds support for generating a const
>> void type in BTF to correct emission for these variables.
>>
>>  PR target/106773
>>
>> gcc/
>>
>>  * btfout.cc (btf_collect_datasec): Correct size of void entries.
>>  (btf_dvd_emit_preprocess_cb): Do not skip emitting variables which
>>  refer to void types.
>>  (btf_init_postprocess): Create 'const void' type record if needed and
>>  adjust variables to refer to it as appropriate.
>>
>> gcc/testsuite/
>>
>>  * gcc.dg/debug/btf/btf-pr106773.c: New test.
>> ---
>>   gcc/btfout.cc | 44 +--
>>   gcc/testsuite/gcc.dg/debug/btf/btf-pr106773.c | 25 +++
>>   2 files changed, 65 insertions(+), 4 deletions(-)
>>   create mode 100644 gcc/testsuite/gcc.dg/debug/btf/btf-pr106773.c
>>
>> diff --git a/gcc/btfout.cc b/gcc/btfout.cc
>> index a1c6266a7db..05f3a3f9b6e 100644
>> --- a/gcc/btfout.cc
>> +++ b/gcc/btfout.cc
>> @@ -354,6 +354,8 @@ btf_collect_datasec (ctf_container_ref ctfc)
>> tree size = DECL_SIZE_UNIT (node->decl);
>> if (tree_fits_uhwi_p (size))
>>  info.size = tree_to_uhwi (size);
>> +  else if (VOID_TYPE_P (TREE_TYPE (node->decl)))
>> +info.size = 1;
>>   
>> /* Offset is left as 0 at compile time, to be filled in by loaders 
>> such
>>   as libbpf.  */
>> @@ -439,7 +441,7 @@ btf_dvd_emit_preprocess_cb (ctf_dvdef_ref *slot, 
>> ctf_container_ref arg_ctfc)
>> ctf_dvdef_ref var = (ctf_dvdef_ref) * slot;
>>   
>> /* Do not add variables which refer to unsupported types.  */
>> -  if (btf_removed_type_p (var->dvd_type))
>> +  if (!voids.contains (var->dvd_type) && btf_removed_type_p (var->dvd_type))
>>   return 1;
>>   
>> arg_ctfc->ctfc_vars_list[num_vars_added] = var;
>> @@ -1073,15 +1075,49 @@ btf_init_postprocess (void)
>>   {
>> ctf_container_ref tu_ctfc = ctf_get_tu_ctfc ();
>>   
>> -  size_t i;
>> -  size_t num_ctf_types = tu_ctfc->ctfc_types->elements ();
>> -
>> holes.create (0);
>> voids.create (0);
>>   
>> num_types_added = 0;
>> num_types_created = 0;
>>   
>> +  /* Workaround for 'const void' variables. These variables are sometimes 
>> used
>> + in eBPF programs to address kernel symbols. DWARF does not generate 
>> const
>> + qualifier on void type, so we would incorrectly emit these variables
>> + without the const qualifier.
>> + Unfortunately we need the TREE node to know it was const, and we need
>> + to create the const modifier type (if needed) now, before making the 
>> types
>> + list. So we can't avoid iterating with FOR_EACH_VARIABLE here, and then
>> + again when creating the DATASEC entries.  */
> 
> "Dot, space, space, new sentence." in 3 places.
> 
> 
>> +  ctf_id_t constvoid_id = CTF_NULL_TYPEID;
>> +  varpool_node *var;
>> +  FOR_EACH_VARIABLE (var)
>> +{
>> +  if (!var->decl)
>> +continue;
>> +
>> +  tree type = TREE_TYPE (var->decl);
>> +  if (type && VOID_TYPE_P (type) && TYPE_READONLY (type))
>> +{
>> +  dw_die_ref die = lookup_decl_die (var->decl);
>> +  if (die == NULL)
>> +continue;
>> +
>> +  ctf_dvdef_ref dvd = ctf_dvd_lookup (tu_ctfc, die);
>> +  if (dvd == NULL)
>> +continue;
>> +
>> +  /* Create the 'const' modifier type for void.  */
>> +  if (constvoid_id == CTF_NULL_TYPEID)
>> +constvoid_id = ctf_add_reftype (tu_ctfc, CTF_ADD_ROOT,
>> +dvd->dvd_type, CTF_K_CONST, NULL);
> 
> No de-duplication of the const void type.  I assume libbpf will take 
> care of this eventually.

Hm, not sure I follow. Where is the duplication? The const void type is
only created once here, for the first such variable which needs it, and
reused for subsequent variables. And it does not already exist in the
CTF which we are translating into BTF.

In any case, yes libbpf can handle duplicated types. Though it would
still be good to minimize that where we can to not bloat the BTF info.

> 
>> +  dvd->dvd_type = constvoid_id;
>> +}
>> +}
>> +
>> +  size_t i;
>> +  size_t num_ctf_types = tu_ctfc->ctfc_types->elements ();
>> +
>> if (num_ctf_types)
>>   {
>> init_btf_id_map (num_ctf_types + 1);
>> diff --git a/gcc/testsuite/gcc.dg/debug/btf/btf-pr106773.c 
>> b/gcc/testsuite/gcc.dg/debug/btf/btf-pr106773.c
>> new file mode 100

Re: [PATCH][X86_64] Separate znver4 insn reservations from older znvers

2022-12-12 Thread Jan Hubička via Gcc-patches
> I have addressed all your comments in this revised patch, PFA and inlined
> below.
>
> Is it ok for trunk?
>
> Thanks and Regards,
> Tejas
>
> gcc/ChangeLog:
>
> * gcc/common/config/i386/i386-common.cc (processor_alias_table):
> Use CPU_ZNVER4 for znver4.
> * config/i386/i386.md: Add znver4.md.
> * config/i386/znver4.md: New.
>

Hi,
I went through the patch and compared with Agner's table and have few
comments below.

>
> ---
>  gcc/common/config/i386/i386-common.cc |2 +-
>  gcc/config/i386/i386.md   |1 +
>  gcc/config/i386/znver4.md | 1027 +
>  3 files changed, 1029 insertions(+), 1 deletion(-)
>  create mode 100644 gcc/config/i386/znver4.md
>
> diff --git a/gcc/common/config/i386/i386-common.cc
> b/gcc/common/config/i386/i386-common.cc
> index 6ce2a588adc..6d941642911 100644
> --- a/gcc/common/config/i386/i386-common.cc
> +++ b/gcc/common/config/i386/i386-common.cc
> @@ -2207,7 +2207,7 @@ const pta processor_alias_table[] =
>{"znver3", PROCESSOR_ZNVER3, CPU_ZNVER3,
>  PTA_ZNVER3,
>  M_CPU_SUBTYPE (AMDFAM19H_ZNVER3), P_PROC_AVX2},
> -  {"znver4", PROCESSOR_ZNVER4, CPU_ZNVER3,
> +  {"znver4", PROCESSOR_ZNVER4, CPU_ZNVER4,
>  PTA_ZNVER4,
>  M_CPU_SUBTYPE (AMDFAM19H_ZNVER4), P_PROC_AVX512F},
>{"btver1", PROCESSOR_BTVER1, CPU_GENERIC,
> diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
> index 01faa911b77..ebb4eec1961 100644
> --- a/gcc/config/i386/i386.md
> +++ b/gcc/config/i386/i386.md
> @@ -1318,6 +1318,7 @@
>  (include "bdver3.md")
>  (include "btver2.md")
>  (include "znver.md")
> +(include "znver4.md")
>  (include "geode.md")
>  (include "atom.md")
>  (include "slm.md")
> diff --git a/gcc/config/i386/znver4.md b/gcc/config/i386/znver4.md
> new file mode 100644
> index 000..9d52dc517f5
> --- /dev/null
> +++ b/gcc/config/i386/znver4.md
> @@ -0,0 +1,1027 @@
> +;; Copyright (C) 2012-2022 Free Software Foundation, Inc.
> +;;
> +;; This file is part of GCC.
> +;;
> +;; GCC is free software; you can redistribute it and/or modify
> +;; it under the terms of the GNU General Public License as published by
> +;; the Free Software Foundation; either version 3, or (at your option)
> +;; any later version.
> +;;
> +;; GCC is distributed in the hope that it will be useful,
> +;; but WITHOUT ANY WARRANTY; without even the implied warranty of
> +;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
> +;; GNU General Public License for more details.
> +;;
> +;; You should have received a copy of the GNU General Public License
> +;; along with GCC; see the file COPYING3.  If not see
> +;; .
> +;;
> +
> +
> +(define_attr "znver4_decode" "direct,vector,double"
> +  (const_string "direct"))
> +
> +;; AMD znver4 Scheduling
> +;; Modeling automatons for zen decoders, integer execution pipes,
> +;; AGU pipes, branch, floating point execution and fp store units.
> +(define_automaton "znver4, znver4_ieu, znver4_idiv, znver4_fdiv,
> znver4_agu, znver4_bru, znver4_fpu, znver4_fp_store")
> +
> +;; Decoders unit has 4 decoders and all of them can decode fast path
> +;; and vector type instructions.
> +(define_cpu_unit "znver4-decode0" "znver4")
> +(define_cpu_unit "znver4-decode1" "znver4")
> +(define_cpu_unit "znver4-decode2" "znver4")
> +(define_cpu_unit "znver4-decode3" "znver4")
> +
> +;; Currently blocking all decoders for vector path instructions as
> +;; they are dispatched separetely as microcode sequence.
> +(define_reservation "znver4-vector"
> "znver4-decode0+znver4-decode1+znver4-decode2+znver4-decode3")
> +
> +;; Direct instructions can be issued to any of the four decoders.
> +(define_reservation "znver4-direct"
> "znver4-decode0|znver4-decode1|znver4-decode2|znver4-decode3")
> +
> +;; Fix me: Need to revisit this later to simulate fast path double
> behavior.
> +(define_reservation "znver4-double" "znver4-direct")
> +
> +
> +;; Integer unit 4 ALU pipes.
> +(define_cpu_unit "znver4-ieu0" "znver4_ieu")
> +(define_cpu_unit "znver4-ieu1" "znver4_ieu")
> +(define_cpu_unit "znver4-ieu2" "znver4_ieu")
> +(define_cpu_unit "znver4-ieu3" "znver4_ieu")
> +(define_reservation "znver4-ieu"
> "znver4-ieu0|znver4-ieu1|znver4-ieu2|znver4-ieu3")
> +
> +;; 3 AGU pipes in znver4
> +(define_cpu_unit "znver4-agu0" "znver4_agu")
> +(define_cpu_unit "znver4-agu1" "znver4_agu")
> +(define_cpu_unit "znver4-agu2" "znver4_agu")
> +(define_reservation "znver4-agu-reserve"
> "znver4-agu0|znver4-agu1|znver4-agu2")
> +
> +;; Load is 4 cycles. We do not model reservation of load unit.
> +(define_reservation "znver4-load" "znver4-agu-reserve")
> +(define_reservation "znver4-store" "znver4-agu-reserve")
> +
> +;; vectorpath (microcoded) instructions are single issue instructions.
> +;; So, they occupy all the integer units.
> +(define_reservation "znver4-ivector" "znver4-ieu0+znver4-ieu1
> + +znver4-ieu2+znver4-ieu3
> +
>  +znver4-agu0+znver4-a

Re: [PATCH 2/9] ipa: Better way of applying both IPA-CP and IPA-SRA (PR 103227)

2022-12-12 Thread Jan Hubicka via Gcc-patches
> 2022-11-11  Martin Jambor  
> 
>   PR ipa/103227
>   * ipa-param-manipulation.h (class ipa_param_adjustments): Removed
>   member function get_updated_index_or_split.
>   (class ipa_param_body_adjustments): New overload of
>   register_replacement, new member function append_init_stmts, new
>   member m_split_agg_csts_inits.
>   * ipa-param-manipulation.cc: Include ipa-prop.h.
>   (ipa_param_adjustments::get_updated_index_or_split): Removed.
>   (ipa_param_body_adjustments::register_replacement): New overload, use
>   it from the older one.
>   (ipa_param_body_adjustments::common_initialization): Added the
>   capability to create replacements for conflicting IPA-CP discovered
>   constants.
>   (ipa_param_body_adjustments::ipa_param_body_adjustments): Construct
>   the new member.
>   (ipa_param_body_adjustments::append_init_stmts): New function.
>   * ipa-sra.cc: Include ipa-prop.h.
>   (push_param_adjustments_for_index): Require IPA-CP transformation
>   summary as a parameter, do not create replacements which are known to
>   have constant values.
>   (process_isra_node_results): Find and pass to the above function the
>   IPA-CP transformation summary.
>   * ipa-prop.cc (adjust_agg_replacement_values): Remove the
>   functionality replacing IPA-SRA created scalar parameters with
>   constants.  Simplify, do not require parameter descriptors, do not
>   return anything.
>   (ipcp_transform_function): Simplify now that
>   adjust_agg_replacement_values does not change cfg.  Move definition
>   and initialization of descriptors lower.
>   * tree-inline.cc (tree_function_versioning): Call append_init_stmts of
>   param_body_adjs, if there are any.
OK,
Honza
> 
> gcc/testsuite/ChangeLog:
> 
> 2022-11-11  Martin Jambor  
> 
>   PR ipa/103227
>   PR ipa/107640
>   * gcc.dg/ipa/pr107640-2.c: New test.
> ---
>  gcc/ipa-param-manipulation.cc | 155 --
>  gcc/ipa-param-manipulation.h  |  19 ++--
>  gcc/ipa-prop.cc   |  66 ---
>  gcc/ipa-sra.cc|  30 -
>  gcc/testsuite/gcc.dg/ipa/pr107640-2.c |  50 +
>  gcc/tree-inline.cc|   2 +
>  6 files changed, 208 insertions(+), 114 deletions(-)
>  create mode 100644 gcc/testsuite/gcc.dg/ipa/pr107640-2.c
> 
> diff --git a/gcc/ipa-param-manipulation.cc b/gcc/ipa-param-manipulation.cc
> index cee0e23f946..e92cfc0b6d5 100644
> --- a/gcc/ipa-param-manipulation.cc
> +++ b/gcc/ipa-param-manipulation.cc
> @@ -46,7 +46,7 @@ along with GCC; see the file COPYING3.  If not see
>  #include "tree-phinodes.h"
>  #include "cfgexpand.h"
>  #include "attribs.h"
> -
> +#include "ipa-prop.h"
>  
>  /* Actual prefixes of different newly synthetized parameters.  Keep in sync
> with IPA_PARAM_PREFIX_* defines.  */
> @@ -449,39 +449,6 @@ ipa_param_adjustments::get_updated_indices (vec 
> *new_indices)
>  }
>  }
>  
> -/* If a parameter with original INDEX has survived intact, return its new
> -   index.  Otherwise return -1.  In that case, if it has been split and there
> -   is a new parameter representing a portion at unit OFFSET for which a value
> -   of a TYPE can be substituted, store its new index into SPLIT_INDEX,
> -   otherwise store -1 there.  */
> -int
> -ipa_param_adjustments::get_updated_index_or_split (int index,
> -unsigned unit_offset,
> -tree type, int *split_index)
> -{
> -  unsigned adj_len = vec_safe_length (m_adj_params);
> -  for (unsigned i = 0; i < adj_len ; i++)
> -{
> -  ipa_adjusted_param *apm = &(*m_adj_params)[i];
> -  if (apm->base_index != index)
> - continue;
> -  if (apm->op == IPA_PARAM_OP_COPY)
> - return i;
> -  if (apm->op == IPA_PARAM_OP_SPLIT
> -   && apm->unit_offset == unit_offset)
> - {
> -   if (useless_type_conversion_p (apm->type, type))
> - *split_index = i;
> -   else
> - *split_index = -1;
> -   return -1;
> - }
> -}
> -
> -  *split_index = -1;
> -  return -1;
> -}
> -
>  /* Return the original index for the given new parameter index.  Return a
> negative number if not available.  */
>  
> @@ -1020,6 +987,21 @@ ipa_param_adjustments::debug ()
>dump (stderr);
>  }
>  
> +/* Register a REPLACEMENT for accesses to BASE at UNIT_OFFSET.  */
> +
> +void
> +ipa_param_body_adjustments::register_replacement (tree base,
> +   unsigned unit_offset,
> +   tree replacement)
> +{
> +  ipa_param_body_replacement psr;
> +  psr.base = base;
> +  psr.repl = replacement;
> +  psr.dummy = NULL_TREE;
> +  psr.unit_offset = unit_offset;
> +  m_replacements.safe_push (psr);
> +}
> +
>  /* Register that REPLACEMENT should replace parameter described i

Re: [PATCH 1/9] ipa-cp: Write transformation summaries of all functions

2022-12-12 Thread Jan Hubicka via Gcc-patches
> gcc/ChangeLog:
> 
> 2022-11-25  Martin Jambor  
> 
>   * ipa-prop.cc (useful_ipcp_transformation_info_p): New function.
>   (write_ipcp_transformation_info): Added a parameter, simplified
>   given that is known not to be NULL.
>   (ipcp_write_transformation_summaries): Write out all useful
>   transformation summaries.
>   (read_ipcp_transformation_info): Simplify given that some info
>   will be read.
>   (read_replacements_section): Remove assert.
>   * lto-cgraph.cc (add_node_to): Also set encode_body for clones.
>   * lto-streamer-out.cc (lto_output): Do not output virtual clones.
Aha,
sorry, you included the change we discussed. The patch is OK.

Honza
> ---
>  gcc/ipa-prop.cc | 153 +++-
>  gcc/lto-cgraph.cc   |   2 +-
>  gcc/lto-streamer-out.cc |   3 +-
>  3 files changed, 76 insertions(+), 82 deletions(-)
> 
> diff --git a/gcc/ipa-prop.cc b/gcc/ipa-prop.cc
> index e6cf25591b3..fcadf64ead7 100644
> --- a/gcc/ipa-prop.cc
> +++ b/gcc/ipa-prop.cc
> @@ -5279,80 +5279,72 @@ ipa_prop_read_jump_functions (void)
>  }
>  }
>  
> -void
> -write_ipcp_transformation_info (output_block *ob, cgraph_node *node)
> +/* Return true if the IPA-CP transformation summary TS is non-NULL and 
> contains
> +   useful info.  */
> +static bool
> +useful_ipcp_transformation_info_p (ipcp_transformation *ts)
>  {
> -  int node_ref;
> -  unsigned int count = 0;
> -  lto_symtab_encoder_t encoder;
> +  if (!ts)
> +return false;
> +  if (!vec_safe_is_empty (ts->m_agg_values)
> +  || !vec_safe_is_empty (ts->bits)
> +  || !vec_safe_is_empty (ts->m_vr))
> +return true;
> +  return false;
> +}
>  
> -  encoder = ob->decl_state->symtab_node_encoder;
> -  node_ref = lto_symtab_encoder_encode (encoder, node);
> +/* Write into OB IPA-CP transfromation summary TS describing NODE.  */
> +
> +void
> +write_ipcp_transformation_info (output_block *ob, cgraph_node *node,
> + ipcp_transformation *ts)
> +{
> +  lto_symtab_encoder_t encoder = ob->decl_state->symtab_node_encoder;
> +  int node_ref = lto_symtab_encoder_encode (encoder, node);
>streamer_write_uhwi (ob, node_ref);
>  
> -  ipcp_transformation *ts = ipcp_get_transformation_summary (node);
> -  if (ts && !vec_safe_is_empty (ts->m_agg_values))
> +  streamer_write_uhwi (ob, vec_safe_length (ts->m_agg_values));
> +  for (const ipa_argagg_value &av : ts->m_agg_values)
>  {
> -  streamer_write_uhwi (ob, ts->m_agg_values->length ());
> -  for (const ipa_argagg_value &av : ts->m_agg_values)
> +  struct bitpack_d bp;
> +
> +  stream_write_tree (ob, av.value, true);
> +  streamer_write_uhwi (ob, av.unit_offset);
> +  streamer_write_uhwi (ob, av.index);
> +
> +  bp = bitpack_create (ob->main_stream);
> +  bp_pack_value (&bp, av.by_ref, 1);
> +  streamer_write_bitpack (&bp);
> +}
> +
> +  streamer_write_uhwi (ob, vec_safe_length (ts->m_vr));
> +  for (const ipa_vr &parm_vr : ts->m_vr)
> +{
> +  struct bitpack_d bp;
> +  bp = bitpack_create (ob->main_stream);
> +  bp_pack_value (&bp, parm_vr.known, 1);
> +  streamer_write_bitpack (&bp);
> +  if (parm_vr.known)
>   {
> -   struct bitpack_d bp;
> -
> -   stream_write_tree (ob, av.value, true);
> -   streamer_write_uhwi (ob, av.unit_offset);
> -   streamer_write_uhwi (ob, av.index);
> -
> -   bp = bitpack_create (ob->main_stream);
> -   bp_pack_value (&bp, av.by_ref, 1);
> -   streamer_write_bitpack (&bp);
> +   streamer_write_enum (ob->main_stream, value_rang_type,
> +VR_LAST, parm_vr.type);
> +   streamer_write_wide_int (ob, parm_vr.min);
> +   streamer_write_wide_int (ob, parm_vr.max);
>   }
>  }
> -  else
> -streamer_write_uhwi (ob, 0);
>  
> -  if (ts && vec_safe_length (ts->m_vr) > 0)
> +  streamer_write_uhwi (ob, vec_safe_length (ts->bits));
> +  for (const ipa_bits *bits_jfunc : ts->bits)
>  {
> -  count = ts->m_vr->length ();
> -  streamer_write_uhwi (ob, count);
> -  for (unsigned i = 0; i < count; ++i)
> +  struct bitpack_d bp = bitpack_create (ob->main_stream);
> +  bp_pack_value (&bp, !!bits_jfunc, 1);
> +  streamer_write_bitpack (&bp);
> +  if (bits_jfunc)
>   {
> -   struct bitpack_d bp;
> -   ipa_vr *parm_vr = &(*ts->m_vr)[i];
> -   bp = bitpack_create (ob->main_stream);
> -   bp_pack_value (&bp, parm_vr->known, 1);
> -   streamer_write_bitpack (&bp);
> -   if (parm_vr->known)
> - {
> -   streamer_write_enum (ob->main_stream, value_rang_type,
> -VR_LAST, parm_vr->type);
> -   streamer_write_wide_int (ob, parm_vr->min);
> -   streamer_write_wide_int (ob, parm_vr->max);
> - }
> +   streamer_write_widest_int (ob, bits_jfunc->value);
> +   streamer_write_widest_int (ob, bits_jfunc->mask);
>   }
>  }
> -  el

Re: [PATCH 3/9] ipa-cp: Leave removal of unused parameters to IPA-SRA

2022-12-12 Thread Jan Hubicka via Gcc-patches
> Hi,
> 
> I'm re-posting patches which I have posted at the end of stage 1 but
> which have not passed review yet.
> 
> 8<
> 
> Looking at some benchmarks I have noticed many cases when IPA-CP
> cloned a function for all contexts just because it knew that some
> parameters were not used at all.  Then IPA-SRA looked at the function
> and cloned it again to split another parameter or two.  The latter
> pass is better equipped to detect when parameters can be altogether
> removed and so the IPA-CP cloning was for no good reason.
> 
> This patch simply alters the IPA-CP not to do that in the situations
> where IPA-SRA can (for nodes which can be made local).
> 
> Bootstrapped and tested individually when I originally posted it and
> now bootstrapped and LTO-bootstrapped and tested as part of the whole
> series.  OK for master?
> 
> 
> gcc/ChangeLog:
> 
> 2022-11-11  Martin Jambor  
> 
>   * ipa-cp.cc (estimate_local_effects): Do not clone potentionally local
>   nodes for all contexts just to remove unused parameters.
> ---
>  gcc/ipa-cp.cc | 5 -
>  1 file changed, 4 insertions(+), 1 deletion(-)
> 
> diff --git a/gcc/ipa-cp.cc b/gcc/ipa-cp.cc
> index cc031ebed0f..172ea353c49 100644
> --- a/gcc/ipa-cp.cc
> +++ b/gcc/ipa-cp.cc
> @@ -3722,7 +3722,10 @@ estimate_local_effects (struct cgraph_node *node)
>   &removable_params_cost);
>int devirt_bonus = devirtualization_time_bonus (node, &avals);
>if (always_const || devirt_bonus
> -  || (removable_params_cost && node->can_change_signature))
> +  || (removable_params_cost
> +   && node->can_change_signature
> +   /* If IPA-SRA can do it, it can do it better.  */
> +   && !node->can_be_local_p ()))
Perhaps we could dump that into dump file (i.e. not cloning because
ipa-sra will do it later).  That could save me from debugging session
in future :)
OK with that change.
Honza
>  {
>struct caller_statistics stats;
>ipa_call_estimates estimates;
> -- 
> 2.38.1
> 


Re: [PATCH 4/9] ipa-sra: Treat REFERENCE_TYPES as always dereferencable

2022-12-12 Thread Jan Hubicka via Gcc-patches
> Hi,
> 
> I'm re-posting patches which I have posted at the end of stage 1 but
> which have not passed review yet.
> 
> 8<
> 
> C++ and especially Fortran pass data by references which are not
> pointers potentially pointing anywhere and so can be assumed to be
> safely dereferencable.  This patch teaches IPA-SRA to treat them as
> such and avoid the dance we do to prove that we can move loads from
> them to the caller.
> 
> When we do not know that a dereference will happen all the time, we
> need a heuristics so that we do not force memory accesses that normally
> happen only rarely.  The patch simply uses the (possibly guessed)
> profile and checks whether the (expected) number of loads is at least
> half of function invocations invocations.
> 
> Bootstrapped and tested individually when I originally posted it and
> now bootstrapped and LTO-bootstrapped and tested as part of the whole
> series.  OK for master?
> 
> 
> gcc/ChangeLog:
> 
> 2022-11-11  Martin Jambor  
> 
>   PR ipa/103585
>   * ipa-sra.c (struct gensum_param_access): New field load_count.
>   (struct gensum_param_desc): New field safe_ref, adjusted comments.
>   (by_ref_count): Renamed to unsafe_by_ref_count, adjusted all uses.
>   (dump_gensum_access): Dump the new field.
>   (dump_gensum_param_descriptor): Likewise.
>   (create_parameter_descriptors): Set safe_ref field, move setting
>   by_ref forward.  Only increment unsafe_by_ref_count for unsafe
>   by_ref parameters.
>   (allocate_access): Initialize new field.
>   (mark_param_dereference): Adjust indentation.  Only add data to
>   bb_dereferences for unsafe by_ref parameters.
>   (scan_expr_access): For loads, accumulate BB counts.
>   (dereference_probable_p): New function.
>   (check_gensum_access): Fix leading comment, add parameter FUN.
>   Check cumulative counts of loads for safe by_ref accesses instead
>   of dereferences.
>   (process_scan_results): Do not propagate dereference distances for
>   safe by_ref parameters.  Pass fun to check_gensum_access.  Safe
>   by_ref params do not need the postdominance check.
> 
> gcc/testsuite/ChangeLog:
> 
> 2022-11-11  Martin Jambor  
> 
> * g++.dg/ipa/ipa-sra-5.C: New test
> -/* Perform basic checks on ACCESS to PARM described by DESC and all its
> -   children, return true if the parameter cannot be split, otherwise return
> -   true and update *TOTAL_SIZE and *ONLY_CALLS.  ENTRY_BB_INDEX must be the
> -   index of the entry BB in the function of PARM.  */
> +/* Return true if the ACCESS loads happen frequently enough in FUN to risk
> +   moving them to the caller and only pass the result.  */
>  
>  static bool
> -check_gensum_access (tree parm, gensum_param_desc *desc,
> +dereference_probable_p (struct function *fun, gensum_param_access *access)
> +{
> +  return access->load_count
> +>= ENTRY_BLOCK_PTR_FOR_FN (fun)->count.apply_scale (1, 2);

We may want to have --param for this.

Otherwise the patch is OK.
Honza


Re: [PATCH 5/9] ipa-sra: Move caller->callee propagation before callee->caller one

2022-12-12 Thread Jan Hubicka via Gcc-patches
> gcc/ChangeLog:
> 
> 2022-11-11  Martin Jambor  
> 
>   * ipa-sra.c (ipa_sra_analysis): Move top-down analysis before
>   bottom-up analysis.  Replace FOR_EACH_VEC_ELT with C++11 iteration.
> 
> gcc/testsuite/ChangeLog:
> 
> 2021-12-14  Martin Jambor  
> 
>   * gcc.dg/ipa/ipa-sra-25.c: New test
OK,
Thanks!
Honza
> ---
>  gcc/ipa-sra.cc| 145 +-
>  gcc/testsuite/gcc.dg/ipa/ipa-sra-25.c |  17 +++
>  2 files changed, 89 insertions(+), 73 deletions(-)
>  create mode 100644 gcc/testsuite/gcc.dg/ipa/ipa-sra-25.c
> 
> diff --git a/gcc/ipa-sra.cc b/gcc/ipa-sra.cc
> index e8a4cd47429..fa5a01ec07c 100644
> --- a/gcc/ipa-sra.cc
> +++ b/gcc/ipa-sra.cc
> @@ -3925,95 +3925,28 @@ ipa_sra_analysis (void)
>auto_vec  stack;
>int node_scc_count = ipa_reduced_postorder (order, true, NULL);
>  
> -  /* One sweep from callees to callers for parameter removal and splitting.  
> */
> -  for (int i = 0; i < node_scc_count; i++)
> +  /* One sweep from callers to callees for return value removal.  */
> +  for (int i = node_scc_count - 1; i >= 0 ; i--)
>  {
>cgraph_node *scc_rep = order[i];
>vec cycle_nodes = ipa_get_nodes_in_cycle (scc_rep);
> -  unsigned j;
>  
> -  /* Preliminary IPA function level checks and first step of parameter
> -  removal.  */
> -  cgraph_node *v;
> -  FOR_EACH_VEC_ELT (cycle_nodes, j, v)
> +  /* Preliminary IPA function level checks.  */
> +  for (cgraph_node *v : cycle_nodes)
>   {
> isra_func_summary *ifs = func_sums->get (v);
> if (!ifs || !ifs->m_candidate)
>   continue;
> if (!ipa_sra_ipa_function_checks (v)
> || check_all_callers_for_issues (v))
> - {
> -   ifs->zap ();
> -   continue;
> - }
> -   if (disable_unavailable_parameters (v, ifs))
> - continue;
> -   for (cgraph_edge *cs = v->indirect_calls; cs; cs = cs->next_callee)
> - process_edge_to_unknown_caller (cs);
> -   for (cgraph_edge *cs = v->callees; cs; cs = cs->next_callee)
> - if (!ipa_edge_within_scc (cs))
> -   param_removal_cross_scc_edge (cs);
> + ifs->zap ();
>   }
>  
> -  /* Look at edges within the current SCC and propagate used-ness across
> -  them, pushing onto the stack all notes which might need to be
> -  revisited.  */
> -  FOR_EACH_VEC_ELT (cycle_nodes, j, v)
> - v->call_for_symbol_thunks_and_aliases (propagate_used_to_scc_callers,
> -&stack, true);
> -
> -  /* Keep revisiting and pushing until nothing changes.  */
> -  while (!stack.is_empty ())
> - {
> -   cgraph_node *v = stack.pop ();
> -   isra_func_summary *ifs = func_sums->get (v);
> -   gcc_checking_assert (ifs && ifs->m_queued);
> -   ifs->m_queued = false;
> -
> -   v->call_for_symbol_thunks_and_aliases (propagate_used_to_scc_callers,
> -  &stack, true);
> - }
> -
> -  /* Parameter splitting.  */
> -  bool repeat_scc_access_propagation;
> -  do
> - {
> -   repeat_scc_access_propagation = false;
> -   FOR_EACH_VEC_ELT (cycle_nodes, j, v)
> - {
> -   isra_func_summary *ifs = func_sums->get (v);
> -   if (!ifs
> -   || !ifs->m_candidate
> -   || vec_safe_is_empty (ifs->m_parameters))
> - continue;
> -   for (cgraph_edge *cs = v->callees; cs; cs = cs->next_callee)
> - if (param_splitting_across_edge (cs))
> -   repeat_scc_access_propagation = true;
> - }
> - }
> -  while (repeat_scc_access_propagation);
> -
> -  if (flag_checking)
> - FOR_EACH_VEC_ELT (cycle_nodes, j, v)
> -   verify_splitting_accesses (v, true);
> -
> -  cycle_nodes.release ();
> -}
> -
> -  /* One sweep from caller to callees for result removal.  */
> -  for (int i = node_scc_count - 1; i >= 0 ; i--)
> -{
> -  cgraph_node *scc_rep = order[i];
> -  vec cycle_nodes = ipa_get_nodes_in_cycle (scc_rep);
> -  unsigned j;
> -
> -  cgraph_node *v;
> -  FOR_EACH_VEC_ELT (cycle_nodes, j, v)
> +  for (cgraph_node *v : cycle_nodes)
>   {
> isra_func_summary *ifs = func_sums->get (v);
> if (!ifs || !ifs->m_candidate)
>   continue;
> -
> bool return_needed
>   = (ifs->m_returns_value
>  && (!dbg_cnt (ipa_sra_retvalues)
> @@ -4048,6 +3981,72 @@ ipa_sra_analysis (void)
>cycle_nodes.release ();
>  }
>  
> +  /* One sweep from callees to callers for parameter removal and splitting.  
> */
> +  for (int i = 0; i < node_scc_count; i++)
> +{
> +  cgraph_node *scc_rep = order[i];
> +  vec cycle_nodes = ipa_get_nodes_in_cycle (scc_rep);
> +
> +  /* First step of parameter removal.  */
> +  for (cgraph_node *v : cycle_nodes)
> + {
> +   isra_func_summary *ifs = fu

Re: [PATCH 6/9] ipa-sra: Be optimistic about Fortran descriptors

2022-12-12 Thread Jan Hubicka via Gcc-patches
> Hi,
> 
> I'm re-posting patches which I have posted at the end of stage 1 but
> which have not passed review yet.
> 
> 8<
> 
> Fortran descriptors are structures which are often constructed just
> for a particular argument of a particular call where it is passed by
> reference.  When the called function is under compiler's control, it
> can be beneficial to split up the descriptor and pass it in individual
> parameters.  Unfortunately, currently we allow IPA-SRA to replace a
> pointer with a set of replacements which are at most twice as big in
> total and for descriptors we'd need to bump that factor to seven.
> 
> This patch looks for parameters which are ADDR_EXPRs of local
> variables which are written to and passed as arguments by reference
> but are never loaded from and marks them with a flag in the call edge
> summary.  The IPA analysis phase then identifies formal parameters
> which are always fed such arguments and then is more lenient when it
> comoes to size.
> 
> In order not to store to maximums per formal parameter, I calculate
> the more lenient one by multiplying the existing one with a new
> parameter.  If it is preferable to keep the maximums independent, we
> can do that.  Documentation for the new parameter is missing as I
> still need to re-base the patch on a version which has sphinx.  I will
> write it before committing.
> 
> I have disable IPA-SRA in pr48636-2.f90 in order to be able to keep
> using its dump-scan expressions.  The new testcase is basically a copy
> of it with different options and IPA-SRA dump scans.
> 
> Bootstrapped and tested individually when I originally posted it and
> now bootstrapped and LTO-bootstrapped and tested as part of the whole
> series.  OK for master?
> 
> 
> gcc/ChangeLog:
> 
> 2022-11-11  Martin Jambor  
> 
>   * ipa-sra.c (isra_param_desc): New field not_specially_constructed.
>   (struct isra_param_flow): New field constructed_for_calls.
>   (isra_call_summary::dump): Dump the new flag.
>   (loaded_decls): New variable.
>   (dump_isra_param_descriptor): New parameter hints, dump
>   not_specially_constructed if it is true.
>   (dump_isra_param_descriptors): New parameter hints, pass it to
>   dump_isra_param_descriptor.
>   (ipa_sra_function_summaries::duplicate): Duplicate new flag.
>   (create_parameter_descriptors): Adjust comment.
>   (get_gensum_param_desc): Bail out when decl2desc is NULL.
>   (scan_expr_access): Add loaded local variables to loaded_decls.
>   (scan_function): Survive if final_bbs is NULL.
>   (isra_analyze_call): Compute constructed_for_calls flag.
>   (process_scan_results): Be optimistic about size limits.  Do not dump
>   computed param hints when dumpint IPA-SRA structures.
>   (isra_write_edge_summary): Stream constructed_for_calls.
>   (isra_read_edge_summary): Likewise.
>   (ipa_sra_dump_all_summaries): New parameter hints, pass it to
>   dump_isra_param_descriptor.
>   (flip_all_hints_pessimistic): New function.
>   (flip_all_param_hints_pessimistic): Likewise.
>   (propagate_param_hints): Likewise.
>   (disable_unavailable_parameters): Renamed to
>   adjust_parameter_descriptions.  Expand size limits for parameters
>   which are specially contstructed by all callers.  Check limits again.p
>   (ipa_sra_analysis): Pass required hints to ipa_sra_dump_all_summaries.
>   Add hint propagation.
>   (ipa_sra_summarize_function): Initialize and destory loaded_decls,
>   rearrange so that scan_function is called even when there are no
>   candidates.
>   * params.opt (ipa-sra-ptrwrap-growth-factor): New parameter.

Hmm, this is quite specific heuristics, but I do not have much better
ideas, so it is OK :)

Can this be useful also for inlining?

Honza


Re: [PATCH 7/9] ipa-sra: Forward propagation of sizes which are safe to dereference

2022-12-12 Thread Jan Hubicka via Gcc-patches
> gcc/ChangeLog:
> 
> 2022-11-11  Martin Jambor  
> 
>   * ipa-sra.cc (isra_param_desc): New fields safe_size,
>   conditionally_dereferenceable and safe_size_set.
>   (struct gensum_param_desc): New field conditionally_dereferenceable.
>   (struct isra_param_flow): Updated comment of field unit_size.
>   (ipa_sra_function_summaries::duplicate): Copy the new fields.
>   (isra_call_summary::dump): Dump unit_size when representing safe_size.
>   (dump_gensum_param_descriptor): Dump new flag.
>   (dump_isra_param_descriptor): Dump new fields.
>   (isra_analyze_call): Fill unit_size when it represents known safe
>   size.
>   (check_gensum_access): Instead of disqualifying pointers which are not
>   always dereference, mark them as conditionally dereferencable if loads
>   are frequent enough.
>   (process_scan_results): Copy the conditionally_dereferenceable flag.
>   (isra_write_node_summary): Stream new fields, or assert they are not
>   initialized yet.
>   (isra_read_node_info): Stream new fields.
>   (update_safe_size): New function.
>   (propagate_param_hints_accross_call): Propagate safe_sizes.
>   (propagate_hints_to_all_callees): New function.
>   (adjust_parameter_descriptions): Check conditionally_dereferenceable
>   candidates, rework dumping.
>   (ipa_sra_analysis): Move most of hint propagation for one node to
>   propagate_hints_to_all_callees.  Add another loop to stabilize within
>   SCCs and another one to verify.
> 
> gcc/testsuite/ChangeLog:
> 
> 2022-11-11  Martin Jambor  
> 
>   * gcc.dg/ipa/ipa-sra-26.c: New test.
>   * gcc.dg/ipa/ipa-sra-27.c: Likewise.
>   * gcc.dg/ipa/ipa-sra-28.c: Likewise.

OK,
thanks!
Honza


Re: [PATCH 8/9] ipa-sra: Make scan_expr_access bail out on uninteresting expressions

2022-12-12 Thread Jan Hubicka via Gcc-patches
> Hi,
> 
> I'm re-posting patches which I have posted at the end of stage 1 but
> which have not passed review yet.
> 
> 8<
> 
> I have noticed that scan_expr_access passes all the expressions it
> gets to get_ref_base_and_extent even when we are really only
> interested in memory accesses.  So bail out when the expression is
> something clearly uninteresting.
> 
> Bootstrapped and tested individually when I originally posted it and
> now bootstrapped and LTO-bootstrapped and tested as part of the whole
> series.  OK for master?
> 
> 
> gcc/ChangeLog:
> 
> 2021-12-14  Martin Jambor  
> 
>   * ipa-sra.c (scan_expr_access): Bail out early if expr is something we
>   clearly do not need to pass to get_ref_base_and_extent.
> ---
>  gcc/ipa-sra.cc | 5 +
>  1 file changed, 5 insertions(+)
> 
> diff --git a/gcc/ipa-sra.cc b/gcc/ipa-sra.cc
> index 93fceeafc73..3646d71468c 100644
> --- a/gcc/ipa-sra.cc
> +++ b/gcc/ipa-sra.cc
> @@ -1748,6 +1748,11 @@ scan_expr_access (tree expr, gimple *stmt, 
> isra_scan_context ctx,
>|| TREE_CODE (expr) == REALPART_EXPR)
>  expr = TREE_OPERAND (expr, 0);
>  
> +  if (!handled_component_p (expr)
> +  && !DECL_P (expr)
> +  && TREE_CODE (expr) != MEM_REF)
> +return;
Is this needed because get_ref_base_and_extend crashes if given SSA_NAME
or something else or is it just optimization?
Perhaps Richi will know if there is better test for this.

Honza
> +
>base = get_ref_base_and_extent (expr, &poffset, &psize, &pmax_size, 
> &reverse);
>  
>if (TREE_CODE (base) == MEM_REF)
> -- 
> 2.38.1
> 


Re: [PATCH 9/9] ipa: Avoid looking for IPA-SRA replacements where there are none

2022-12-12 Thread Jan Hubicka via Gcc-patches
> Hi,
> 
> I'm re-posting patches which I have posted at the end of stage 1 but
> which have not passed review yet.
> 
> 8<
> 
> While modifying the code, I realized that we do look into statements
> even when there are no replacements.  This patch adds the necessary
> early bail-outs to avoid that.
> 
> ipa_param_body_adjustments::modify_call_stmt cannot have the same at
> the very beginning because calls can still contain otherwise removed
> parameters that need to be removed from the statements too.
> 
> Bootstrapped and tested individually when I originally posted it and
> now bootstrapped and LTO-bootstrapped and tested as part of the whole
> series.  OK for master?
> 
> 
> gcc/ChangeLog:
> 
> 2022-11-11  Martin Jambor  
> 
>   * ipa-param-manipulation.cc
>   (ipa_param_body_adjustments::modify_expression): Bail out early if
>   there are no replacements.
>   (ipa_param_body_adjustments::modify_assignment): Likewise.
> 
> gcc/testsuite/ChangeLog:
> 
> 2022-11-11  Martin Jambor  
> 
>   PR ipa/103227
>   PR ipa/107640
>   * gcc.dg/ipa/pr107640-2.c: New test.
OK,
thanks!
Honza


Re: [PATCH 8/9] ipa-sra: Make scan_expr_access bail out on uninteresting expressions

2022-12-12 Thread Jan Hubicka via Gcc-patches
> > Hi,
> > 
> > I'm re-posting patches which I have posted at the end of stage 1 but
> > which have not passed review yet.
> > 
> > 8<
> > 
> > I have noticed that scan_expr_access passes all the expressions it
> > gets to get_ref_base_and_extent even when we are really only
> > interested in memory accesses.  So bail out when the expression is
> > something clearly uninteresting.
> > 
> > Bootstrapped and tested individually when I originally posted it and
> > now bootstrapped and LTO-bootstrapped and tested as part of the whole
> > series.  OK for master?
> > 
> > 
> > gcc/ChangeLog:
> > 
> > 2021-12-14  Martin Jambor  
> > 
> > * ipa-sra.c (scan_expr_access): Bail out early if expr is something we
> > clearly do not need to pass to get_ref_base_and_extent.
> > ---
> >  gcc/ipa-sra.cc | 5 +
> >  1 file changed, 5 insertions(+)
> > 
> > diff --git a/gcc/ipa-sra.cc b/gcc/ipa-sra.cc
> > index 93fceeafc73..3646d71468c 100644
> > --- a/gcc/ipa-sra.cc
> > +++ b/gcc/ipa-sra.cc
> > @@ -1748,6 +1748,11 @@ scan_expr_access (tree expr, gimple *stmt, 
> > isra_scan_context ctx,
> >|| TREE_CODE (expr) == REALPART_EXPR)
> >  expr = TREE_OPERAND (expr, 0);
> >  
> > +  if (!handled_component_p (expr)
> > +  && !DECL_P (expr)
> > +  && TREE_CODE (expr) != MEM_REF)
> > +return;
> Is this needed because get_ref_base_and_extend crashes if given SSA_NAME
> or something else or is it just optimization?
> Perhaps Richi will know if there is better test for this.
Looking at:

static inline bool
gimple_assign_load_p (const gimple *gs)
{
  tree rhs;
  if (!gimple_assign_single_p (gs))
return false;
  rhs = gimple_assign_rhs1 (gs);
  if (TREE_CODE (rhs) == WITH_SIZE_EXPR)
return true;
  rhs = get_base_address (rhs);
  return (DECL_P (rhs)
  || TREE_CODE (rhs) == MEM_REF || TREE_CODE (rhs) == TARGET_MEM_REF);
} 

I wonder if we don't want to avoid get_base_address (which is loopy) and
use same check and move it into a new predicate that is more convenient
to use?

Honza
> 
> Honza
> > +
> >base = get_ref_base_and_extent (expr, &poffset, &psize, &pmax_size, 
> > &reverse);
> >  
> >if (TREE_CODE (base) == MEM_REF)
> > -- 
> > 2.38.1
> > 


libgo patch committed: Bump major version

2022-12-12 Thread Ian Lance Taylor via Gcc-patches
This libgo patch bumps the major version.  The current version is the
same as for GCC 12, but there have been minor changes like new type
descriptors that make it impossible to run Go programs built with GCC
12 with the current GCC.  This fixes https://gcc.gnu.org/PR108057.
Bootstrapped on x86_64-pc-linux-gnu.  Committed to mainline.

Ian
3c2e866832dd5612de1468807bc144e4824593c7
diff --git a/gcc/go/gofrontend/MERGE b/gcc/go/gofrontend/MERGE
index a26f779557d..d8c4e02d6e6 100644
--- a/gcc/go/gofrontend/MERGE
+++ b/gcc/go/gofrontend/MERGE
@@ -1,4 +1,4 @@
-1c5bfd57131b68b91d8400bb017f35d416f7aa7b
+9906861dc86c1733bb304d3d45b1534adb32712c
 
 The first line of this file holds the git revision number of the last
 merge done from the gofrontend repository.
diff --git a/libgo/configure.ac b/libgo/configure.ac
index 274fcfc35c7..09554a37a23 100644
--- a/libgo/configure.ac
+++ b/libgo/configure.ac
@@ -10,7 +10,7 @@ AC_INIT(package-unused, version-unused,, libgo)
 AC_CONFIG_SRCDIR(Makefile.am)
 AC_CONFIG_HEADER(config.h)
 
-libtool_VERSION=21:0:0
+libtool_VERSION=22:0:0
 AC_SUBST(libtool_VERSION)
 
 AM_ENABLE_MULTILIB(, ..)


Re: [PATCH] longlong.h: Do no use asm input cast for clang

2022-12-12 Thread Segher Boessenkool
On Mon, Dec 12, 2022 at 02:10:16PM -0300, Adhemerval Zanella Netto wrote:
> On 30/11/22 20:24, Segher Boessenkool wrote:
> > I understand that the casts should be no-ops on the asm side (maybe they
> > change the sign) and they are present as type-checking.  Can we implement
> > this type-checking in a different (portable) way?  I think the macro you use
> > should be named like __asm_output_check_type (..) or so to indicate the
> > intended purpose.

I didn't write that.  Please quote correctly.  Thanks!

> I do not think trying to leverage it on clang side would yield much, it
> seems that it really does not want to support this extension.  I am not
> sure we can really make it portable, best option I can think of would to
> add a mix of __builtin_classify_type and typeof prior asm call (we do
> something similar to powerp64 syscall code on glibc), although it would
> still require some gcc specific builtins.
> 
> I am open for ideas, since to get this header to be clang-compatible on
> glibc it requires to get it first on gcc.

How do you intend to modify all the existing copies of the header that
haven't been updated for over a decade already?

If you think changing all user code that uses longlong.h is a good idea,
please change it to not use inline asm, use builtins in some cases but
mostly just rewrite things in plain C.  But GCC cannot rewrite user code
(not preemptively anyway ;-) ) -- and longlong.h as encountered in the
wild (not the one in our libgcc source code) is user code.

If you think changing the copy in libgcc is a good idea, please change
the original in glibc first?


Segher


[PATCH PING] GC unused SIMD clones

2022-12-12 Thread Sandra Loosemore

https://gcc.gnu.org/pipermail/gcc-patches/2022-November/607195.html

The patch referenced in the original post is now approved and committed.

-Sandra


Re: [PATCH Rust front-end v4 46/46] gccrs: Add README, CONTRIBUTING and compiler logo

2022-12-12 Thread Joseph Myers
On Fri, 9 Dec 2022, Martin Liška wrote:

> On 12/6/22 11:14, arthur.co...@embecosm.com wrote:
> > |We still need to write out a documentation section, but these READMEs will 
> > help in the meantime.|
> 
> Hello.
> 
> Just a quick comment: The Sphinx conversion didn't make it for all GCC 
> manuals. However, you can still use Sphinx for a newly created manual, 
> similarly to what libgccjit or Ada manuals do.

I would also encourage people using Sphinx for a newly created manual to 
consider setting up common build infrastructure for such manuals, possibly 
based on that used in the attempted Sphinx conversion.  It may be easier 
to get common infrastructure for such manuals into shape if it's initially 
only being used for one or two manuals - that is, if the addition of such 
infrastructure isn't done at the same time as converting any existing 
manuals to use Sphinx, or even converting any existing manuals using 
Sphinx to use such infrastructure.

-- 
Joseph S. Myers
jos...@codesourcery.com


[PATCH] libgccjit: Allow comparing vector types

2022-12-12 Thread Antoni Boucher via Gcc-patches
Hi.
This fixes bug 108078.
Thanks for the review.
From 016d23eeab1536e2ce0607d422fe4bb42b55c2dc Mon Sep 17 00:00:00 2001
From: Antoni Boucher 
Date: Fri, 24 Jun 2022 21:05:29 -0400
Subject: [PATCH] libgccjit: Allow comparing vector types

gcc/jit/ChangeLog:
	PR jit/108078
	* jit-recording.h: Add vector_type::is_same_type_as method

gcc/testsuite/ChangeLog:
	PR jit/108078
	* jit.dg/test-vector-types.cc: Add tests for vector type comparison

Co-authored-by: Guillaume Gomez 
---
 gcc/jit/jit-recording.h   |  9 +
 gcc/testsuite/jit.dg/test-vector-types.cc | 13 +
 2 files changed, 22 insertions(+)

diff --git a/gcc/jit/jit-recording.h b/gcc/jit/jit-recording.h
index 5d7c7177cc3..4ec0fff4843 100644
--- a/gcc/jit/jit-recording.h
+++ b/gcc/jit/jit-recording.h
@@ -806,6 +806,15 @@ public:
 
   void replay_into (replayer *) final override;
 
+  virtual bool is_same_type_as (type *other)
+  {
+vector_type *other_vec_type = other->dyn_cast_vector_type ();
+if (other_vec_type == NULL)
+  return false;
+return get_num_units () == other_vec_type->get_num_units ()
+  && get_element_type () == other_vec_type->get_element_type ();
+  }
+
   vector_type *is_vector () final override { return this; }
 
 private:
diff --git a/gcc/testsuite/jit.dg/test-vector-types.cc b/gcc/testsuite/jit.dg/test-vector-types.cc
index 1f49be6b59f..5661d1b9eb4 100644
--- a/gcc/testsuite/jit.dg/test-vector-types.cc
+++ b/gcc/testsuite/jit.dg/test-vector-types.cc
@@ -105,6 +105,19 @@ create_code (gcc_jit_context *ctxt, void *user_data)
 		 v4f_type, GCC_JIT_BINARY_OP_MULT);
   create_vec_fn (ctxt, "jit_v4f_div",
 		 v4f_type, GCC_JIT_BINARY_OP_DIVIDE);
+
+  // Checking compatibility between types.
+  CHECK_VALUE(gcc_jit_compatible_types(v4si_type, v4ui_type), 0);
+  CHECK_VALUE(gcc_jit_compatible_types(v4si_type, v4f_type), 0);
+  CHECK_VALUE(gcc_jit_compatible_types(v4ui_type, v4f_type), 0);
+
+  gcc_jit_type *v4si_type2 = gcc_jit_type_get_vector (int_type, 4);
+  gcc_jit_type *v4ui_type2 = gcc_jit_type_get_vector (unsigned_type, 4);
+  gcc_jit_type *v4f_type2 = gcc_jit_type_get_vector (float_type, 4);
+
+  CHECK_VALUE(gcc_jit_compatible_types(v4si_type, v4si_type2), 1);
+  CHECK_VALUE(gcc_jit_compatible_types(v4ui_type, v4ui_type2), 1);
+  CHECK_VALUE(gcc_jit_compatible_types(v4f_type, v4f_type2), 1);
 }
 
 template 
-- 
2.26.2.7.g19db9cfb68.dirty



Ping^5: [PATCH V6] rs6000: Optimize cmp on rotated 16bits constant

2022-12-12 Thread Jiufu Guo via Gcc-patches
Hi,

I would like to ping this:
https://gcc.gnu.org/pipermail/gcc-patches/2022-August/600475.html

BR,
Jeff (Jiufu)

Jiufu Guo via Gcc-patches  writes:

> Hi,
>
> Gentle ping:
> https://gcc.gnu.org/pipermail/gcc-patches/2022-August/600475.html
>
> BR,
> Jeff(Jiufu)
>
> Jiufu Guo via Gcc-patches  writes:
>
>> Hi,
>>
>> Gentle ping this:
>> https://gcc.gnu.org/pipermail/gcc-patches/2022-August/600475.html
>>
>> BR,
>> Jeff (Jiufu)
>>
>>
>> Jiufu Guo via Gcc-patches  writes:
>>
>>> Gentle ping:
>>> https://gcc.gnu.org/pipermail/gcc-patches/2022-August/600475.html
>>>
>>> BR,
>>> Jeff (Jiufu)
>>>
>>> Jiufu Guo via Gcc-patches  writes:
>>>
 Ping: https://gcc.gnu.org/pipermail/gcc-patches/2022-August/600475.html

 BR,
 Jeff(Jiufu)


 Jiufu Guo  writes:

> Hi,
>
> When checking eq/ne with a constant which has only 16bits, it can be
> optimized to check the rotated data.  By this, the constant building
> is optimized.
>
> As the example in PR103743:
> For "in == 0x8000LL", this patch generates:
> rotldi %r3,%r3,16
> cmpldi %cr0,%r3,32768
> instead:
> li %r9,-1
> rldicr %r9,%r9,0,0
> cmpd %cr0,%r3,%r9
>
> Compare with previous patchs:
> https://gcc.gnu.org/pipermail/gcc-patches/2022-August/600385.html
> https://gcc.gnu.org/pipermail/gcc-patches/2022-August/600198.html
>
> This patch releases the condition on can_create_pseudo_p and adds
> clobbers to allow the splitter can be run both before and after RA.
>
> This is updated patch based on previous patch and comments:
> https://gcc.gnu.org/pipermail/gcc-patches/2022-August/600315.html
>
> This patch pass bootstrap and regtest on ppc64 and ppc64le.
> Is it ok for trunk?  Thanks for comments!
>
> BR,
> Jeff(Jiufu)
>
>
>   PR target/103743
>
> gcc/ChangeLog:
>
>   * config/rs6000/rs6000-protos.h (rotate_from_leading_zeros_const): New.
>   (compare_rotate_immediate_p): New.
>   * config/rs6000/rs6000.cc (rotate_from_leading_zeros_const): New
>   definition.
>   (compare_rotate_immediate_p): New definition.
>   * config/rs6000/rs6000.md (EQNE): New code_attr.
>   (*rotate_on_cmpdi): New define_insn_and_split.
>
> gcc/testsuite/ChangeLog:
>
>   * gcc.target/powerpc/pr103743.c: New test.
>   * gcc.target/powerpc/pr103743_1.c: New test.
>
> ---
>  gcc/config/rs6000/rs6000-protos.h |  2 +
>  gcc/config/rs6000/rs6000.cc   | 41 
>  gcc/config/rs6000/rs6000.md   | 62 +++-
>  gcc/testsuite/gcc.target/powerpc/pr103743.c   | 52 ++
>  gcc/testsuite/gcc.target/powerpc/pr103743_1.c | 95 +++
>  5 files changed, 251 insertions(+), 1 deletion(-)
>  create mode 100644 gcc/testsuite/gcc.target/powerpc/pr103743.c
>  create mode 100644 gcc/testsuite/gcc.target/powerpc/pr103743_1.c
>
> diff --git a/gcc/config/rs6000/rs6000-protos.h 
> b/gcc/config/rs6000/rs6000-protos.h
> index b3c16e7448d..78847e6b3db 100644
> --- a/gcc/config/rs6000/rs6000-protos.h
> +++ b/gcc/config/rs6000/rs6000-protos.h
> @@ -35,6 +35,8 @@ extern bool xxspltib_constant_p (rtx, machine_mode, int 
> *, int *);
>  extern int vspltis_shifted (rtx);
>  extern HOST_WIDE_INT const_vector_elt_as_int (rtx, unsigned int);
>  extern bool macho_lo_sum_memory_operand (rtx, machine_mode);
> +extern int rotate_from_leading_zeros_const (unsigned HOST_WIDE_INT, int);
> +extern bool compare_rotate_immediate_p (unsigned HOST_WIDE_INT);
>  extern int num_insns_constant (rtx, machine_mode);
>  extern int small_data_operand (rtx, machine_mode);
>  extern bool mem_operand_gpr (rtx, machine_mode);
> diff --git a/gcc/config/rs6000/rs6000.cc b/gcc/config/rs6000/rs6000.cc
> index df491bee2ea..a548db42660 100644
> --- a/gcc/config/rs6000/rs6000.cc
> +++ b/gcc/config/rs6000/rs6000.cc
> @@ -14797,6 +14797,47 @@ rs6000_reverse_condition (machine_mode mode, 
> enum rtx_code code)
>  return reverse_condition (code);
>  }
>  
> +/* Check if C can be rotated from an immediate which starts (as 64bit 
> integer)
> +   with at least CLZ bits zero.
> +
> +   Return the number by which C can be rotated from the immediate.
> +   Return -1 if C can not be rotated as from.  */
> +
> +int
> +rotate_from_leading_zeros_const (unsigned HOST_WIDE_INT c, int clz)
> +{
> +  /* case a. 0..0xxx: already at least clz zeros.  */
> +  int lz = clz_hwi (c);
> +  if (lz >= clz)
> +return 0;
> +
> +  /* case b. 0..0xxx0..0: at least clz zeros.  */
> +  int tz = ctz_hwi (c);
> +  if (lz + tz >= clz)
> +return tz;
> +
> +  /* case c. xx10.0xx: rotate 'clz + 1' bits firstly, then check 
>>>

Re: [PATCH PING] build: add -Wconditionally-supported to strict_warn [PR64867]

2022-12-12 Thread Jason Merrill via Gcc-patches

On 12/6/22 08:26, Jason Merrill wrote:

Tested x86_64-pc-linux-gnu, OK for trunk?


Ping.


-- 8< --

The PR (which isn't resolved by this commit) pointed out to me that GCC
should build with -Wconditionally-supported to support bootstrapping with a
C++11 compiler that makes different choices.

PR c++/64867

gcc/ChangeLog:

* configure.ac (strict_warn): Add -Wconditionally-supported.
* configure: Regenerate.
---
  gcc/configure.ac | 2 +-
  1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/gcc/configure.ac b/gcc/configure.ac
index 7ca08726efa..12771fc292c 100644
--- a/gcc/configure.ac
+++ b/gcc/configure.ac
@@ -580,7 +580,7 @@ ACX_PROG_CC_WARNING_OPTS(
m4_quote(m4_do([-Wstrict-prototypes -Wmissing-prototypes ],
   [])), [c_loose_warn])
  ACX_PROG_CXX_WARNING_OPTS(
-   m4_quote(m4_do([-Wmissing-format-attribute ],
+   m4_quote(m4_do([-Wmissing-format-attribute ], 
[-Wconditionally-supported ],
   [-Woverloaded-virtual])), [strict_warn])
  ACX_PROG_CC_WARNING_OPTS(
m4_quote(m4_do([-Wold-style-definition -Wc++-compat])), [c_strict_warn])

base-commit: d19aa6af6634b1e97f38431ad091f3b3f12baf2f




Re: Extend fold_vec_perm to fold VEC_PERM_EXPR in VLA manner

2022-12-12 Thread Prathamesh Kulkarni via Gcc-patches
On Tue, 6 Dec 2022 at 21:00, Richard Sandiford
 wrote:
>
> Prathamesh Kulkarni via Gcc-patches  writes:
> > On Fri, 4 Nov 2022 at 14:00, Prathamesh Kulkarni
> >  wrote:
> >>
> >> On Mon, 31 Oct 2022 at 15:27, Richard Sandiford
> >>  wrote:
> >> >
> >> > Prathamesh Kulkarni  writes:
> >> > > On Wed, 26 Oct 2022 at 21:07, Richard Sandiford
> >> > >  wrote:
> >> > >>
> >> > >> Sorry for the slow response.  I wanted to find some time to think
> >> > >> about this a bit more.
> >> > >>
> >> > >> Prathamesh Kulkarni  writes:
> >> > >> > On Fri, 30 Sept 2022 at 21:38, Richard Sandiford
> >> > >> >  wrote:
> >> > >> >>
> >> > >> >> Richard Sandiford via Gcc-patches  writes:
> >> > >> >> > Prathamesh Kulkarni  writes:
> >> > >> >> >> Sorry to ask a silly question but in which case shall we select 
> >> > >> >> >> 2nd vector ?
> >> > >> >> >> For num_poly_int_coeffs == 2,
> >> > >> >> >> a1 /trunc n1 == (a1 + 0x) / (n1.coeffs[0] + n1.coeffs[1]*x)
> >> > >> >> >> If a1/trunc n1 succeeds,
> >> > >> >> >> 0 / n1.coeffs[1] == a1/n1.coeffs[0] == 0.
> >> > >> >> >> So, a1 has to be < n1.coeffs[0] ?
> >> > >> >> >
> >> > >> >> > Remember that a1 is itself a poly_int.  It's not necessarily a 
> >> > >> >> > constant.
> >> > >> >> >
> >> > >> >> > E.g. the TRN1 .D instruction maps to a VEC_PERM_EXPR with the 
> >> > >> >> > selector:
> >> > >> >> >
> >> > >> >> >   { 0, 2 + 2x, 1, 4 + 2x, 2, 6 + 2x, ... }
> >> > >> >>
> >> > >> >> Sorry, should have been:
> >> > >> >>
> >> > >> >>   { 0, 2 + 2x, 2, 4 + 2x, 4, 6 + 2x, ... }
> >> > >> > Hi Richard,
> >> > >> > Thanks for the clarifications, and sorry for late reply.
> >> > >> > I have attached POC patch that tries to implement the above 
> >> > >> > approach.
> >> > >> > Passes bootstrap+test on x86_64-linux-gnu and aarch64-linux-gnu for 
> >> > >> > VLS vectors.
> >> > >> >
> >> > >> > For VLA vectors, I have only done limited testing so far.
> >> > >> > It seems to pass couple of tests written in the patch for
> >> > >> > nelts_per_pattern == 3,
> >> > >> > and folds the following svld1rq test:
> >> > >> > int32x4_t v = {1, 2, 3, 4};
> >> > >> > return svld1rq_s32 (svptrue_b8 (), &v[0])
> >> > >> > into:
> >> > >> > return {1, 2, 3, 4, ...};
> >> > >> > I will try to bootstrap+test it on SVE machine to test further for 
> >> > >> > VLA folding.
> >> > >> >
> >> > >> > I have a couple of questions:
> >> > >> > 1] When mask selects elements from same vector but from different 
> >> > >> > patterns:
> >> > >> > For eg:
> >> > >> > arg0 = {1, 11, 2, 12, 3, 13, ...},
> >> > >> > arg1 = {21, 31, 22, 32, 23, 33, ...},
> >> > >> > mask = {0, 0, 0, 1, 0, 2, ... },
> >> > >> > All have npatterns = 2, nelts_per_pattern = 3.
> >> > >> >
> >> > >> > With above mask,
> >> > >> > Pattern {0, ...} selects arg0[0], ie {1, ...}
> >> > >> > Pattern {0, 1, 2, ...} selects arg0[0], arg0[1], arg0[2], ie {1, 
> >> > >> > 11, 2, ...}
> >> > >> > While arg0[0] and arg0[2] belong to same pattern, arg0[1] belongs 
> >> > >> > to different
> >> > >> > pattern in arg0.
> >> > >> > The result is:
> >> > >> > res = {1, 1, 1, 11, 1, 2, ...}
> >> > >> > In this case, res's 2nd pattern {1, 11, 2, ...} is encoded with:
> >> > >> > with a0 = 1, a1 = 11, S = -9.
> >> > >> > Is that expected tho ? It seems to create a new encoding which
> >> > >> > wasn't present in the input vector. For instance, the next elem in
> >> > >> > sequence would be -7,
> >> > >> > which is not present originally in arg0.
> >> > >>
> >> > >> Yeah, you're right, sorry.  Going back to:
> >> > >>
> >> > >> (2) The explicit encoding can be used to produce a sequence of N*Ex*Px
> >> > >> elements for any integer N.  This extended sequence can be 
> >> > >> reencoded
> >> > >> as having N*Px patterns, with Ex staying the same.
> >> > >>
> >> > >> I guess we need to pick an N for the selector such that each new
> >> > >> selector pattern (each one out of the N*Px patterns) selects from
> >> > >> the *same pattern* of the same data input.
> >> > >>
> >> > >> So if a particular pattern in the selector has a step S, and the data
> >> > >> input it selects from has Pi patterns, N*S must be a multiple of Pi.
> >> > >> N must be a multiple of least_common_multiple(S,Pi)/S.
> >> > >>
> >> > >> I think that means that the total number of patterns in the result
> >> > >> (Pr from previous messages) can safely be:
> >> > >>
> >> > >>   Ps * least_common_multiple(
> >> > >> least_common_multiple(S[1], P[input(1)]) / S[1],
> >> > >> ...
> >> > >> least_common_multiple(S[Ps], P[input(Ps)]) / S[Ps]
> >> > >>   )
> >> > >>
> >> > >> where:
> >> > >>
> >> > >>   Ps = the number of patterns in the selector
> >> > >>   S[I] = the step for selector pattern I (I being 1-based)
> >> > >>   input(I) = the data input selected by selector pattern I (I being 
> >> > >> 1-based)
> >> > >>   P[I] = the number of patterns in data input I
> >> > >>
> >> > >> That's getting quite complicated :-)  If we allow arbitrary P[...]
> >> > >> and S[...] then 

Re: [PATCH 2/3] btf: fix 'extern const void' variables [PR106773]

2022-12-12 Thread Indu Bhagat via Gcc-patches

On 12/12/22 12:59, David Faust wrote:



On 12/8/22 23:34, Indu Bhagat wrote:

Looks OK to me overall. Minor comments below.

Thanks

On 12/7/22 12:57, David Faust wrote:

The eBPF loader expects to find BTF_KIND_VAR records for references to
extern const void symbols. We were mistakenly identifing these as
unsupported types, and as a result skipping emitting VAR records for
them.

In addition, the internal DWARF representation from which BTF is
produced does not generate 'const' modifier DIEs for the void type,
which meant in BTF the 'const' qualifier was dropped for 'extern const
void' variables. This patch also adds support for generating a const
void type in BTF to correct emission for these variables.

PR target/106773

gcc/

* btfout.cc (btf_collect_datasec): Correct size of void entries.
(btf_dvd_emit_preprocess_cb): Do not skip emitting variables which
refer to void types.
(btf_init_postprocess): Create 'const void' type record if needed and
adjust variables to refer to it as appropriate.

gcc/testsuite/

* gcc.dg/debug/btf/btf-pr106773.c: New test.
---
   gcc/btfout.cc | 44 +--
   gcc/testsuite/gcc.dg/debug/btf/btf-pr106773.c | 25 +++
   2 files changed, 65 insertions(+), 4 deletions(-)
   create mode 100644 gcc/testsuite/gcc.dg/debug/btf/btf-pr106773.c

diff --git a/gcc/btfout.cc b/gcc/btfout.cc
index a1c6266a7db..05f3a3f9b6e 100644
--- a/gcc/btfout.cc
+++ b/gcc/btfout.cc
@@ -354,6 +354,8 @@ btf_collect_datasec (ctf_container_ref ctfc)
 tree size = DECL_SIZE_UNIT (node->decl);
 if (tree_fits_uhwi_p (size))
info.size = tree_to_uhwi (size);
+  else if (VOID_TYPE_P (TREE_TYPE (node->decl)))
+   info.size = 1;
   
 /* Offset is left as 0 at compile time, to be filled in by loaders such

 as libbpf.  */
@@ -439,7 +441,7 @@ btf_dvd_emit_preprocess_cb (ctf_dvdef_ref *slot, 
ctf_container_ref arg_ctfc)
 ctf_dvdef_ref var = (ctf_dvdef_ref) * slot;
   
 /* Do not add variables which refer to unsupported types.  */

-  if (btf_removed_type_p (var->dvd_type))
+  if (!voids.contains (var->dvd_type) && btf_removed_type_p (var->dvd_type))
   return 1;
   
 arg_ctfc->ctfc_vars_list[num_vars_added] = var;

@@ -1073,15 +1075,49 @@ btf_init_postprocess (void)
   {
 ctf_container_ref tu_ctfc = ctf_get_tu_ctfc ();
   
-  size_t i;

-  size_t num_ctf_types = tu_ctfc->ctfc_types->elements ();
-
 holes.create (0);
 voids.create (0);
   
 num_types_added = 0;

 num_types_created = 0;
   
+  /* Workaround for 'const void' variables. These variables are sometimes used

+ in eBPF programs to address kernel symbols. DWARF does not generate const
+ qualifier on void type, so we would incorrectly emit these variables
+ without the const qualifier.
+ Unfortunately we need the TREE node to know it was const, and we need
+ to create the const modifier type (if needed) now, before making the types
+ list. So we can't avoid iterating with FOR_EACH_VARIABLE here, and then
+ again when creating the DATASEC entries.  */


"Dot, space, space, new sentence." in 3 places.



+  ctf_id_t constvoid_id = CTF_NULL_TYPEID;
+  varpool_node *var;
+  FOR_EACH_VARIABLE (var)
+{
+  if (!var->decl)
+   continue;
+
+  tree type = TREE_TYPE (var->decl);
+  if (type && VOID_TYPE_P (type) && TYPE_READONLY (type))
+   {
+ dw_die_ref die = lookup_decl_die (var->decl);
+ if (die == NULL)
+   continue;
+
+ ctf_dvdef_ref dvd = ctf_dvd_lookup (tu_ctfc, die);
+ if (dvd == NULL)
+   continue;
+
+ /* Create the 'const' modifier type for void.  */
+ if (constvoid_id == CTF_NULL_TYPEID)
+   constvoid_id = ctf_add_reftype (tu_ctfc, CTF_ADD_ROOT,
+   dvd->dvd_type, CTF_K_CONST, NULL);


No de-duplication of the const void type.  I assume libbpf will take
care of this eventually.


Hm, not sure I follow. Where is the duplication? The const void type is
only created once here, for the first such variable which needs it, and
reused for subsequent variables. And it does not already exist in the
CTF which we are translating into BTF.



You're right - you are reusing the const void type once generated for a 
CU for each usage. My bad - I didnt follow the code properly :)



In any case, yes libbpf can handle duplicated types. Though it would
still be good to minimize that where we can to not bloat the BTF info.




+ dvd->dvd_type = constvoid_id;
+   }
+}
+
+  size_t i;
+  size_t num_ctf_types = tu_ctfc->ctfc_types->elements ();
+
 if (num_ctf_types)
   {
 init_btf_id_map (num_ctf_types + 1);
diff --git a/gcc/testsuite/gcc.dg/debug/btf/btf-pr106773.c 
b/gcc/testsuite/gcc.dg/debug/btf/btf-pr106773.c
new file mode 100644
index 000..f90fa773a4b
--- /dev/null
+++ b/gcc/testsuite/g

Re: [PATCH 3/3] btf: correct generation for extern funcs [PR106773]

2022-12-12 Thread Indu Bhagat via Gcc-patches

On 12/12/22 12:31, David Faust wrote:



On 12/8/22 23:36, Indu Bhagat wrote:

On 12/7/22 12:57, David Faust wrote:

The eBPF loader expects to find entries for functions declared as extern
in the corresponding BTF_KIND_DATASEC record, but we were not generating
these entries.

This patch adds support for the 'extern' linkage of function types in
BTF, and creates entries for for them BTF_KIND_DATASEC records as needed.

PR target/106773

gcc/

* btfout.cc (get_section_name): New function.
(btf_collect_datasec): Use it here. Process functions, marking them
'extern' and generating DATASEC entries for them as appropriate. Move
creation of BTF_KIND_FUNC records to here...
(btf_dtd_emit_preprocess_cb): ... from here.

gcc/testsuite/

* gcc.dg/debug/btf/btf-datasec-2.c: New test.
* gcc.dg/debug/btf/btf-function-6.c: New test.

include/

* btf.h (struct btf_var_secinfo): Update comments with notes about
extern functions.
---
   gcc/btfout.cc | 129 --
   .../gcc.dg/debug/btf/btf-datasec-2.c  |  28 
   .../gcc.dg/debug/btf/btf-function-6.c |  19 +++
   include/btf.h |   9 +-
   4 files changed, 139 insertions(+), 46 deletions(-)
   create mode 100644 gcc/testsuite/gcc.dg/debug/btf/btf-datasec-2.c
   create mode 100644 gcc/testsuite/gcc.dg/debug/btf/btf-function-6.c

diff --git a/gcc/btfout.cc b/gcc/btfout.cc
index 05f3a3f9b6e..d7ead377ec5 100644
--- a/gcc/btfout.cc
+++ b/gcc/btfout.cc
@@ -294,7 +294,35 @@ btf_datasec_push_entry (ctf_container_ref ctfc, const char 
*secname,
 ds.entries.safe_push (info);
   
 datasecs.safe_push (ds);

-  num_types_created++;
+}
+
+
+/* Return the section name, as of interest to btf_collect_datasec, for the
+   given symtab node. Note that this deliberately returns NULL for objects
+   which do not go in a section btf_collect_datasec cares about.  */


"Dot, space, space, new sentence."


+static const char *
+get_section_name (symtab_node *node)
+{
+  const char *section_name = node->get_section ();
+
+  if (section_name == NULL)
+{
+  switch (categorize_decl_for_section (node->decl, 0))
+   {
+   case SECCAT_BSS:
+ section_name = ".bss";
+ break;
+   case SECCAT_DATA:
+ section_name = ".data";
+ break;
+   case SECCAT_RODATA:
+ section_name = ".rodata";
+ break;
+   default:;
+   }
+}
+
+  return section_name;
   }
   
   /* Construct all BTF_KIND_DATASEC records for CTFC. One such record is created

@@ -305,7 +333,60 @@ btf_datasec_push_entry (ctf_container_ref ctfc, const char 
*secname,
   static void
   btf_collect_datasec (ctf_container_ref ctfc)
   {
-  /* See cgraph.h struct symtab_node, which varpool_node extends.  */
+  cgraph_node *func;
+  FOR_EACH_FUNCTION (func)
+{
+  dw_die_ref die = lookup_decl_die (func->decl);
+  if (die == NULL)
+   continue;
+
+  ctf_dtdef_ref dtd = ctf_dtd_lookup (ctfc, die);
+  if (dtd == NULL)
+   continue;
+
+  /* Functions actually get two types: a BTF_KIND_FUNC_PROTO, and
+also a BTF_KIND_FUNC. But the CTF container only allocates one
+type per function, which matches closely with BTF_KIND_FUNC_PROTO.
+For each such function, also allocate a BTF_KIND_FUNC entry.
+These will be output later.  */


"Dot, space, space, new sentence."


+  ctf_dtdef_ref func_dtd = ggc_cleared_alloc ();
+  func_dtd->dtd_data = dtd->dtd_data;
+  func_dtd->dtd_data.ctti_type = dtd->dtd_type;
+  func_dtd->linkage = dtd->linkage;
+  func_dtd->dtd_type = num_types_added + num_types_created;
+
+  /* Only the BTF_KIND_FUNC type actually references the name. The
+BTF_KIND_FUNC_PROTO is always anonymous.  */
+  dtd->dtd_data.ctti_name = 0;
+
+  vec_safe_push (funcs, func_dtd);
+  num_types_created++;
+
+  /* Mark any 'extern' funcs and add DATASEC entries for them.  */
+  if (DECL_EXTERNAL (func->decl))
+   {
+ func_dtd->linkage = BTF_LINKAGE_EXTERN;
+


What is the expected BTF when both decl and definition are present:

extern int extfunc(int x);
int extfunc (int x) {
int y = foo ();
return y;
}


Using clang implementation as the reference, a single FUNC record
for "extfunc" with "global" linkage:

$ cat extfuncdef.c
extern int extfunc (int x);
int extfunc (int x) {
   int y = foo ();
   return y;
}

$ clang -target bpf -c -g extfuncdef.c -o extfuncdef.o

$ /usr/sbin/bpftool btf dump file extfuncdef.o
[1] FUNC_PROTO '(anon)' ret_type_id=2 vlen=1
'(anon)' type_id=2
[2] INT 'int' size=4 bits_offset=0 nr_bits=32 encoding=SIGNED
[3] FUNC 'extfunc' type_id=1 linkage=global

With this patch we do the same in GCC.



OK. Thanks for confirming.




+ const char *section_name = get_section_name (func);
+ /* Note: get_section_name () returns NULL for fu

Re: [PATCH 1/3] Rework 128-bit complex multiply and divide, PR target/107299

2022-12-12 Thread Michael Meissner via Gcc-patches
On Mon, Dec 12, 2022 at 06:20:14PM +0800, Kewen.Lin wrote:
> Without or with patch #1, the below ICE in libgcc exists, the ICE should have
> nothing to do with the special handling for building_libgcc in patch #1.  I
> think patch #2 which makes _Float128 and __float128 use the same internal
> type fixes that ICE.
> 
> I still don't get the point why we need the special handling for 
> building_libgcc,
> I also tested on top of patch #1 and #2 w/ and w/o the special handling for
> building_libgcc, both bootstrapped and regress-tested.
> 
> Could you have a double check?

As long as patch #2 and #3 are installed, we don't need the special handling
for building_libgcc.  Good catch.

I will send out a replacement patch for it.

> Since your patch #2 (and #3) fixes ICE and some exposed problems, and 
> _Float128
> is to use the same internal type as __float128, types with 
> attribute((mode(TF)))
> and attribute((mode(TC))) should be correct, I assume that this patch is just
> to make the types explicit be with _Float128 (for better readability and
> maintainance), but not for any correctness issues.

Yes, the patch is mainly for clarity.  The history is the libgcc support went
in before _Float128 went in, and I never went back to use those types when we
could use them.

With _Float128, we can just use _Complex _Float128 and not
bother with trying to get the right KC/TC for the attribute mode stuff.

However, if patches 1-3 aren't put in, just applying the patch to use _Float128
and _Complex _Float128 would fix the immediate problem (of not building GCC on
systems with IEEE 128-bit long double).  However, it is a band-aid that just
works around the problem of building __mulkc3 and __divkc3.  It doesn't fix the
other problems between __float128 and _Float128 that show up in some places
that I would like to get fixed.

So I haven't submitted the patch, because I think it is more important to get
the other issues fixed.

> > Now, this patch fixes the specific problem of not being able to build libgcc
> > (along with patch #1 of the series).  But other things show the differences
> > from time time because we are using different internal types and the middle 
> > end
> > doesn't know that these types are really the same bits.
> > 
> > It is better long term (IMHO) if we have the two types (__float128 and
> > _Float128) use the same internal type (which is what is done in patches #2 
> > and
> > #3).  This fixes the other issues that show up, such as creating signaling 
> > NaNs
> > for one internal type, and converting it to the other internal type, loses 
> > that
> > the NaN is signalling.
> > 
> 
> I see, nice!
> 
> BR,
> Kewen

-- 
Michael Meissner, IBM
PO Box 98, Ayer, Massachusetts, USA, 01432
email: meiss...@linux.ibm.com


Re: [PATCH 1/3] btf: add 'extern' linkage for variables [PR106773]

2022-12-12 Thread Indu Bhagat via Gcc-patches

On 12/12/22 12:47, David Faust wrote:



On 12/8/22 22:55, Indu Bhagat wrote:

Hi David,

On 12/7/22 12:57, David Faust wrote:

Add support for the 'extern' linkage value for BTF_KIND_VAR records,
which is used for variables declared as extern in the source file.

PR target/106773

gcc/

* btfout.cc (BTF_LINKAGE_STATIC): New define.
(BTF_LINKAGE_GLOBAL): Likewise.
(BTF_LINKAGE_EXTERN): Likewise.
(btf_collect_datasec): Mark extern variables as such.
(btf_asm_varent): Accomodate 'extern' linkage.

gcc/testsuite/

* gcc.dg/debug/btf/btf-variables-4.c: New test.

include/

* btf.h (struct btf_var): Update comment to note 'extern' linkage.
---
   gcc/btfout.cc |  9 ++-
   .../gcc.dg/debug/btf/btf-variables-4.c| 24 +++
   include/btf.h |  2 +-
   3 files changed, 33 insertions(+), 2 deletions(-)
   create mode 100644 gcc/testsuite/gcc.dg/debug/btf/btf-variables-4.c

diff --git a/gcc/btfout.cc b/gcc/btfout.cc
index aef9fd70a28..a1c6266a7db 100644
--- a/gcc/btfout.cc
+++ b/gcc/btfout.cc
@@ -66,6 +66,10 @@ static char btf_info_section_label[MAX_BTF_LABEL_BYTES];
   
   #define BTF_INVALID_TYPEID 0x
   
+#define BTF_LINKAGE_STATIC 0

+#define BTF_LINKAGE_GLOBAL 1
+#define BTF_LINKAGE_EXTERN 2
+


I was about to suggest to rename these to use the same name as used in
the kernel btf.h. What is used there is:
  BTF_VAR_STATIC = 0,
  BTF_VAR_GLOBAL_ALLOCATED = 1,
  BTF_VAR_GLOBAL_EXTERN = 2,

But after looking at the Patch 3/3, I see you reuse these definitions
for functions as well. I just find the names confusing on the first look
- "BTF_LINKAGE_STATIC".

Naming aside, what do you think about adding the defines to
include/btf.h instead ?


Actually, I forgot these are defined (separately for both VARs and FUNCs)
in the kernel uapi/linux/btf.h. It would probably be best to mirror that
approach and use a separate enum for each, in include/btf.h. WDYT?



Yes, mirroring in include/btf.h sounds good.




   /* Mapping of CTF variables to the IDs they will be assigned when they are
  converted to BTF_KIND_VAR type records. Strictly accounts for the index
  from the start of the variable type entries, does not include the number
@@ -314,6 +318,9 @@ btf_collect_datasec (ctf_container_ref ctfc)
continue;
   
 const char *section_name = node->get_section ();

+  /* Mark extern variables.  */
+  if (DECL_EXTERNAL (node->decl))
+   dvd->dvd_visibility = BTF_LINKAGE_EXTERN;
   


This made me think about the following case.

extern const char a[];
const char a[] = "foo";

What is the expected BTF for this? Since BTF can differentiate between
the non-defining extern variable declaration, I expected to see two
variables with different "linkage". At this time I see, two variables
with global linkage but different types:

  .long   0xe00   # btv_info
  .long   0x4 # btv_type
  .long   0x1 # btv_linkage
  .long   0x1f# btv_name
  .long   0xe00   # btv_info
  .long   0x7 # btv_type
  .long   0x1 # btv_linkage
  .long   0x60# btt_name



The BTF documentation in the kernel does not clarify this case.
Going off the implementation in clang as a reference, it looks like
only one VAR record is expected, with 'global' linkage:

$ cat extdef.c
extern const char a[];
const char a[] = "foo";

$ clang -target bpf -c -g extdef.c -o extdef.o

$ /usr/sbin/bpftool btf dump file extdef.o
[1] CONST '(anon)' type_id=2
[2] INT 'char' size=1 bits_offset=0 nr_bits=8 encoding=SIGNED
[3] ARRAY '(anon)' type_id=1 index_type_id=4 nr_elems=4
[4] INT '__ARRAY_SIZE_TYPE__' size=4 bits_offset=0 nr_bits=32 encoding=(none)
[5] VAR 'a' type_id=3, linkage=global
[6] DATASEC '.rodata' size=0 vlen=1
type_id=5 offset=0 size=4 (VAR 'a')

In GCC we have two records since we have two DIEs for "a" in the
DWARF. One has type "const char[4]" and the other has type
"const char[]", so the BTF records point to two different types
as well.

I guess we should find a way in BTF to identify this and
emit only the defining definition as clang does.




CTF had a similar issue earlier. See PR105089.
https://gcc.gnu.org/PR105089


 if (section_name == NULL)
{
@@ -676,7 +683,7 @@ btf_asm_varent (ctf_dvdef_ref var)
 dw2_asm_output_data (4, var->dvd_name_offset, "btv_name");
 dw2_asm_output_data (4, BTF_TYPE_INFO (BTF_KIND_VAR, 0, 0), "btv_info");
 dw2_asm_output_data (4, get_btf_id (var->dvd_type), "btv_type");
-  dw2_asm_output_data (4, (var->dvd_visibility ? 1 : 0), "btv_linkage");
+  dw2_asm_output_data (4, var->dvd_visibility, "btv_linkage");
   }
   
   /* Asm'out a member description following a BTF_KIND_STRUCT or

diff --git a/gcc/testsuite/gcc.dg/debug/btf/btf-variables-4.c 
b/gcc/testsuite/gcc.dg/debug/btf/btf-variables-

[PATCH V2] Rework 128-bit complex multiply and divide, PR target/107299

2022-12-12 Thread Michael Meissner via Gcc-patches
In the patch I previously submitted:

| Date: Tue, 1 Nov 2022 22:40:43 -0400
| Subject: [PATCH 1/3] Rework 128-bit complex multiply and divide, PR 
target/107299
| Message-ID: 

Kewen.Lin questioned whether we needed to disable the special handling of the
IEEE 128-bit complex multiply/divide if we are building libgcc.  I looked at
it, and we don't need to disable doing the special handling when building
libgcc.  But in order for it to work, patches #2 and #3 need to be applied.

This patch is a replacement patch for that previous patch.

This function reworks how the complex multiply and divide built-in functions are
done.  Previously we created built-in declarations for doing long double complex
multiply and divide when long double is IEEE 128-bit.  The old code also did not
support __ibm128 complex multiply and divide if long double is IEEE 128-bit.

In terms of history, I wrote the original code just as I was starting to test
GCC on systems where IEEE 128-bit long double was the default.  At the time, we
had not yet started mangling the built-in function names as a way to bridge
going from a system with 128-bit IBM long double to 128-bin IEEE long double.

The original code depends on there only being two 128-bit types invovled.  With
the next patch in this series, this assumption will no longer be true.  When
long double is IEEE 128-bit, there will be 2 IEEE 128-bit types (one for the
explicit __float128/_Float128 type and one for long double).

The problem is we cannot create two separate built-in functions that resolve to
the same name.  This is a requirement of add_builtin_function and the C front
end.  That means for the 3 possible modes (IFmode, KFmode, and TFmode), you can
only use 2 of them.

This code does not create the built-in declaration with the changed name.
Instead, it uses the TARGET_MANGLE_DECL_ASSEMBLER_NAME hook to change the name
before it is written out to the assembler file like it now does for all of the
other long double built-in functions.

When I wrote these patches, I discovered that __ibm128 complex multiply and
divide had originally not been supported if long double is IEEE 128-bit as it
would generate calls to __mulic3 and __divic3.  I added tests in the testsuite
to verify that the correct name (i.e. __multc3 and __divtc3) is used in this
case.

I tested all 3 patchs for PR target/107299 on:

1)  LE Power10 using --with-cpu=power10 --with-long-double-format=ieee
2)  LE Power10 using --with-cpu=power10 --with-long-double-format=ibm
3)  LE Power9  using --with-cpu=power9  --with-long-double-format=ibm
4)  BE Power8  using --with-cpu=power8  --with-long-double-format=ibm

Once all 3 patches have been applied, we can once again build GCC when long
double is IEEE 128-bit.  There were no other regressions with these patches.
Can I check these patches into the trunk?

2022-12-13   Michael Meissner  

gcc/

PR target/107299
* config/rs6000/rs6000.cc (create_complex_muldiv): Delete.
(init_float128_ieee): Delete code to switch complex multiply and divide
for long double.
(complex_multiply_builtin_code): New helper function.
(complex_divide_builtin_code): Likewise.
(rs6000_mangle_decl_assembler_name): Add support for mangling the name
of complex 128-bit multiply and divide built-in functions.

gcc/testsuite/

PR target/107299
* gcc.target/powerpc/divic3-1.c: New test.
* gcc.target/powerpc/divic3-2.c: Likewise.
* gcc.target/powerpc/mulic3-1.c: Likewise.
* gcc.target/powerpc/mulic3-2.c: Likewise.
---
 gcc/config/rs6000/rs6000.cc | 109 +++-
 gcc/testsuite/gcc.target/powerpc/divic3-1.c |  18 
 gcc/testsuite/gcc.target/powerpc/divic3-2.c |  17 +++
 gcc/testsuite/gcc.target/powerpc/mulic3-1.c |  18 
 gcc/testsuite/gcc.target/powerpc/mulic3-2.c |  17 +++
 5 files changed, 132 insertions(+), 47 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/powerpc/divic3-1.c
 create mode 100644 gcc/testsuite/gcc.target/powerpc/divic3-2.c
 create mode 100644 gcc/testsuite/gcc.target/powerpc/mulic3-1.c
 create mode 100644 gcc/testsuite/gcc.target/powerpc/mulic3-2.c

diff --git a/gcc/config/rs6000/rs6000.cc b/gcc/config/rs6000/rs6000.cc
index 70a3ca801fe..b5a5ecbf51a 100644
--- a/gcc/config/rs6000/rs6000.cc
+++ b/gcc/config/rs6000/rs6000.cc
@@ -11120,26 +11120,6 @@ init_float128_ibm (machine_mode mode)
 }
 }
 
-/* Create a decl for either complex long double multiply or complex long double
-   divide when long double is IEEE 128-bit floating point.  We can't use
-   __multc3 and __divtc3 because the original long double using IBM extended
-   double used those names.  The complex multiply/divide functions are encoded
-   as builtin functions with a complex result and 4 scalar inputs.  */
-
-static void
-create_complex_muldiv (const char *name, built_in_function fncode, tree fntype)
-{
-  tree fndecl = add_builtin_function (name, fntype, 

Re: [PATCH 1/3] Rework 128-bit complex multiply and divide, PR target/107299

2022-12-12 Thread Michael Meissner via Gcc-patches
I have submitted a new replacement patch for this patch:

| Date: Tue, 1 Nov 2022 22:40:43 -0400
| Subject: [PATCH 1/3] Rework 128-bit complex multiply and divide, PR 
target/107299
| Message-ID: 
https://gcc.gnu.org/pipermail/gcc-patches/2022-December/608368.html

-- 
Michael Meissner, IBM
PO Box 98, Ayer, Massachusetts, USA, 01432
email: meiss...@linux.ibm.com


[RFC]RISC-V: Support RV64-ILP32

2022-12-12 Thread shihua
From: Liao Shihua 

  This patch support rv64 insn in ilp32 ABI. It was inspired by aarch64 
both 
   support 64-bit and 32-bit ABI with the same set of instructions.


gcc/ChangeLog:

* config.gcc:  Implememt ilp32* with rv64*.
* config/riscv/riscv.cc (riscv_option_override): Remove the constraint 
between RV64 and ILP32.
* config/riscv/riscv.h (TARGET_ILP32): Define TARGET_ILP32 with 
riscv_abi.
(POINTER_SIZE):POINTER_SIZE will change with TARGET_ILP32.
(Pmode):Likewise.
* config/riscv/riscv.md: Convert split mode with Pmode and change mode 
form Xmode to Pmode in stack_tie.

---
 gcc/config.gcc| 3 +++
 gcc/config/riscv/riscv.cc | 4 
 gcc/config/riscv/riscv.h  | 8 ++--
 gcc/config/riscv/riscv.md | 8 ++--
 4 files changed, 15 insertions(+), 8 deletions(-)

diff --git a/gcc/config.gcc b/gcc/config.gcc
index c5064dd3766..069293a6e19 100644
--- a/gcc/config.gcc
+++ b/gcc/config.gcc
@@ -4748,6 +4748,9 @@ case "${target}" in
ilp32,rv32* | ilp32e,rv32e* \
| ilp32f,rv32*f* | ilp32f,rv32g* \
| ilp32d,rv32*d* | ilp32d,rv32g* \
+   | ilp32f,rv64*f* | ilp32f,rv64g* \
+   | ilp32d,rv64*d* | ilp32d,rv64g* \
+   | ilp32,rv64* \
| lp64,rv64* \
| lp64f,rv64*f* | lp64f,rv64g* \
| lp64d,rv64*d* | lp64d,rv64g*)
diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc
index ee756aab694..03f313e2b28 100644
--- a/gcc/config/riscv/riscv.cc
+++ b/gcc/config/riscv/riscv.cc
@@ -5026,10 +5026,6 @@ riscv_option_override (void)
   if (TARGET_RVE && riscv_abi != ABI_ILP32E)
 error ("rv32e requires ilp32e ABI");
 
-  /* We do not yet support ILP32 on RV64.  */
-  if (BITS_PER_WORD != POINTER_SIZE)
-error ("ABI requires %<-march=rv%d%>", POINTER_SIZE);
-
   /* Validate -mpreferred-stack-boundary= value.  */
   riscv_stack_boundary = ABI_STACK_BOUNDARY;
   if (riscv_preferred_stack_boundary_arg)
diff --git a/gcc/config/riscv/riscv.h b/gcc/config/riscv/riscv.h
index 8a4d2cf7f85..63aece89878 100644
--- a/gcc/config/riscv/riscv.h
+++ b/gcc/config/riscv/riscv.h
@@ -79,6 +79,10 @@ extern const char *riscv_default_mtune (int argc, const char 
**argv);
 #define TARGET_64BIT   (__riscv_xlen == 64)
 #endif /* IN_LIBGCC2 */
 
+#ifndef TARGET_ILP32
+#define TARGET_ILP32   (riscv_abi <= ABI_ILP32D)
+#endif /*TARGET_ILP32*/
+
 #ifdef HAVE_AS_MISA_SPEC
 #define ASM_MISA_SPEC "%{misa-spec=*}"
 #else
@@ -167,7 +171,7 @@ ASM_MISA_SPEC
 #define SHORT_TYPE_SIZE 16
 #define INT_TYPE_SIZE 32
 #define LONG_LONG_TYPE_SIZE 64
-#define POINTER_SIZE (riscv_abi >= ABI_LP64 ? 64 : 32)
+#define POINTER_SIZE   (TARGET_ILP32 ? 32 : 64)
 #define LONG_TYPE_SIZE POINTER_SIZE
 
 #define FLOAT_TYPE_SIZE 32
@@ -729,7 +733,7 @@ typedef struct {
After generation of rtl, the compiler makes no further distinction
between pointers and any other objects of this machine mode.  */
 
-#define Pmode word_mode
+#define Pmode (TARGET_ILP32 ? SImode : DImode)
 
 /* Give call MEMs SImode since it is the "most permissive" mode
for both 32-bit and 64-bit targets.  */
diff --git a/gcc/config/riscv/riscv.md b/gcc/config/riscv/riscv.md
index b3c5bce842a..34034aec8c0 100644
--- a/gcc/config/riscv/riscv.md
+++ b/gcc/config/riscv/riscv.md
@@ -2559,6 +2559,10 @@
   "reload_completed"
   [(const_int 0)]
 {
+  if (GET_MODE (operands[0]) != Pmode)
+operands[0] = convert_to_mode (Pmode, operands[0], 0);
+  if (GET_MODE (operands[1]) != Pmode)
+operands[1] = convert_to_mode (Pmode, operands[1], 0);
   riscv_set_return_address (operands[0], operands[1]);
   DONE;
 })
@@ -2759,8 +2763,8 @@
 
 (define_insn "stack_tie"
   [(set (mem:BLK (scratch))
-   (unspec:BLK [(match_operand:X 0 "register_operand" "r")
-(match_operand:X 1 "register_operand" "r")]
+   (unspec:BLK [(match_operand:P 0 "register_operand" "r")
+(match_operand:P 1 "register_operand" "r")]
UNSPEC_TIE))]
   ""
   ""
-- 
2.38.0.windows.1