[PATCH 0/1 V3] RISC-V: Support Zmmul extension

2022-07-11 Thread shihua
From: LiaoShihua 

Zmmul extension is Multiply only extension for RISC-V.It implements the 
multiplication subset of the M extension. 
The encodings are identical to those of the corresponding M-extension 
instructions.
When You both use M extension add Zmmul extension, it will warning "-mdiv 
cannot use when the ZMMUL extension is present"

LiaoShihua (1):
  RISC-V: Support Zmmul extension

 gcc/common/config/riscv/riscv-common.cc  |  3 +++
 gcc/config/riscv/riscv-c.cc  |  4 ++--
 gcc/config/riscv/riscv-opts.h|  3 +++
 gcc/config/riscv/riscv.cc|  4 +++-
 gcc/config/riscv/riscv.md| 28 
 gcc/config/riscv/riscv.opt   |  3 +++
 gcc/testsuite/gcc.target/riscv/zmmul-1.c | 20 +
 gcc/testsuite/gcc.target/riscv/zmmul-2.c | 20 +
 8 files changed, 68 insertions(+), 17 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/riscv/zmmul-1.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/zmmul-2.c

-- 
2.31.1.windows.1



[PATCH 1/1 V3] RISC-V: Support Zmmul extension

2022-07-11 Thread shihua
From: LiaoShihua 

gcc/ChangeLog:

* common/config/riscv/riscv-common.cc:
* config/riscv/riscv-c.cc (riscv_cpu_cpp_builtins):
* config/riscv/riscv-opts.h (MASK_ZMMUL):
(TARGET_ZMMUL):
* config/riscv/riscv.cc (riscv_option_override):
* config/riscv/riscv.md:
* config/riscv/riscv.opt:

gcc/testsuite/ChangeLog:

* gcc.target/riscv/zmmul-1.c: New test.
* gcc.target/riscv/zmmul-2.c: New test.

---
 gcc/common/config/riscv/riscv-common.cc  |  3 +++
 gcc/config/riscv/riscv-c.cc  |  4 ++--
 gcc/config/riscv/riscv-opts.h|  3 +++
 gcc/config/riscv/riscv.cc|  4 +++-
 gcc/config/riscv/riscv.md| 28 
 gcc/config/riscv/riscv.opt   |  3 +++
 gcc/testsuite/gcc.target/riscv/zmmul-1.c | 20 +
 gcc/testsuite/gcc.target/riscv/zmmul-2.c | 20 +
 8 files changed, 68 insertions(+), 17 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/riscv/zmmul-1.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/zmmul-2.c

diff --git a/gcc/common/config/riscv/riscv-common.cc 
b/gcc/common/config/riscv/riscv-common.cc
index 0e5be2ce105..a4539067403 100644
--- a/gcc/common/config/riscv/riscv-common.cc
+++ b/gcc/common/config/riscv/riscv-common.cc
@@ -193,6 +193,8 @@ static const struct riscv_ext_version 
riscv_ext_version_table[] =
   {"zvl32768b", ISA_SPEC_CLASS_NONE, 1, 0},
   {"zvl65536b", ISA_SPEC_CLASS_NONE, 1, 0},
 
+  {"zmmul", ISA_SPEC_CLASS_NONE, 1, 0},
+
   /* Terminate the list.  */
   {NULL, ISA_SPEC_CLASS_NONE, 0, 0}
 };
@@ -1148,6 +1150,7 @@ static const riscv_ext_flag_table_t 
riscv_ext_flag_table[] =
   {"zvl32768b", &gcc_options::x_riscv_zvl_flags, MASK_ZVL32768B},
   {"zvl65536b", &gcc_options::x_riscv_zvl_flags, MASK_ZVL65536B},
 
+  {"zmmul", &gcc_options::x_riscv_zmmul_subext, MASK_ZMMUL},
 
   {NULL, NULL, 0}
 };
diff --git a/gcc/config/riscv/riscv-c.cc b/gcc/config/riscv/riscv-c.cc
index eb7ef09297e..fb52f69c44c 100644
--- a/gcc/config/riscv/riscv-c.cc
+++ b/gcc/config/riscv/riscv-c.cc
@@ -47,11 +47,11 @@ riscv_cpu_cpp_builtins (cpp_reader *pfile)
   if (TARGET_ATOMIC)
 builtin_define ("__riscv_atomic");
 
-  if (TARGET_MUL)
+  if (TARGET_MUL || TARGET_ZMMUL)
 builtin_define ("__riscv_mul");
   if (TARGET_DIV)
 builtin_define ("__riscv_div");
-  if (TARGET_DIV && TARGET_MUL)
+  if (!TARGET_ZMMUL && TARGET_DIV && TARGET_MUL)
 builtin_define ("__riscv_muldiv");
 
   builtin_define_with_int_value ("__riscv_xlen", UNITS_PER_WORD * 8);
diff --git a/gcc/config/riscv/riscv-opts.h b/gcc/config/riscv/riscv-opts.h
index 1e153b3a6e7..55d9fa49782 100644
--- a/gcc/config/riscv/riscv-opts.h
+++ b/gcc/config/riscv/riscv-opts.h
@@ -153,6 +153,9 @@ enum stack_protector_guard {
 #define TARGET_ZICBOM ((riscv_zicmo_subext & MASK_ZICBOM) != 0)
 #define TARGET_ZICBOP ((riscv_zicmo_subext & MASK_ZICBOP) != 0)
 
+#define MASK_ZMMUL  (1 << 0)
+#define TARGET_ZMMUL((riscv_zmmul_subext & MASK_ZMMUL) != 0)
+
 /* Bit of riscv_zvl_flags will set contintuly, N-1 bit will set if N-bit is
set, e.g. MASK_ZVL64B has set then MASK_ZVL32B is set, so we can use
popcount to caclulate the minimal VLEN.  */
diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc
index 2e83ca07394..f11941e1653 100644
--- a/gcc/config/riscv/riscv.cc
+++ b/gcc/config/riscv/riscv.cc
@@ -4998,8 +4998,10 @@ riscv_option_override (void)
 
   /* The presence of the M extension implies that division instructions
  are present, so include them unless explicitly disabled.  */
-  if (TARGET_MUL && (target_flags_explicit & MASK_DIV) == 0)
+  if (!TARGET_ZMMUL && TARGET_MUL && (target_flags_explicit & MASK_DIV) == 0)
 target_flags |= MASK_DIV;
+  else if(TARGET_ZMMUL && TARGET_MUL)
+warning (0, "%<-mdiv%> cannot use when the % extension is 
present");
   else if (!TARGET_MUL && TARGET_DIV)
 error ("%<-mdiv%> requires %<-march%> to subsume the % extension");
 
diff --git a/gcc/config/riscv/riscv.md b/gcc/config/riscv/riscv.md
index 308b64dd30d..d4e171464ea 100644
--- a/gcc/config/riscv/riscv.md
+++ b/gcc/config/riscv/riscv.md
@@ -763,7 +763,7 @@
   [(set (match_operand:SI  0 "register_operand" "=r")
(mult:SI (match_operand:SI 1 "register_operand" " r")
 (match_operand:SI 2 "register_operand" " r")))]
-  "TARGET_MUL"
+  "TARGET_ZMMUL || TARGET_MUL"
   { return TARGET_64BIT ? "mulw\t%0,%1,%2" : "mul\t%0,%1,%2"; }
   [(set_attr "type" "imul")
(set_attr "mode" "SI")])
@@ -772,7 +772,7 @@
   [(set (match_operand:DI  0 "register_operand" "=r")
(mult:DI (match_operand:DI 1 "register_operand" " r")
 (match_operand:DI 2 "register_operand" " r")))]
-  "TARGET_MUL && TARGET_64BIT"
+  "TARGET_ZMMUL || TARGET_MUL && TARGET_64BIT"
   "mul\t%0,%1,%2"
   [(set_attr "type" "imul")
(set_attr "mode" "DI")])
@@ -782,7 +782,7 @@
(mult:GPR (match_operand:GPR 1 "register_operand" 

Re: [PATCH] Move reload_completed and other rtl.h globals to crtl structure.

2022-07-11 Thread Richard Biener via Gcc-patches
On Sun, 10 Jul 2022, Roger Sayle wrote:

> 
> This patch builds upon Richard Biener's suggestion of avoiding global
> variables to track state/identify which passes have already been run.
> In the early middle-end, the tree-ssa passes use the curr_properties
> field in cfun to track this.  This patch uses a new rtl_pass_progress
> int field in crtl to do something similar.

Why not simply add PROP_rtl_... and use the existing curr_properties
for this?  RTL passes are also passes and this has the advantage
you can add things like reload_completed to the passes properties_set
field hand have the flag setting handled by the pass manager as it was
intended?

Otherwise I of course welcome the change - since I suggested it I'd
like somebody else to ack it as well of course.

Thanks,
Richard.

> This patch allows the global variables lra_in_progress, reload_in_progress,
> reload_completed, epilogue_completed and regstack_completed to be removed
> from rtl.h and implemented as bits within the new crtl->rtl_pass_progress.
> I've also taken the liberty of adding a new combine_completed bit at the
> same time [to respond the Segher's comment it's easy to change this to
> combine1_completed and combine2_completed if we ever perform multiple
> combine passes (or multiple reload/regstack passes)].  At the same time,
> I've also refactored bb_reorder_complete into the same new field;
> interestingly bb_reorder_complete was already a bool in crtl.
>
> One very minor advantage of this implementation/refactoring is that the
> predicate "can_create_pseudo_p ()" which is semantically defined to be
> !reload_in_progress && !reload_completed, can now be performed very
> efficiently as effectively the test (progress & 12) == 0, i.e. a single
> test instruction on x86.
> 
> For consistency, I've also moved cse_not_expected (the last remaining
> global variable in rtl.h) into crtl, as its own bool field.
> 
> The vast majority of this patch is then churn to handle these changes.
> Thanks to macros, most code is unaffected, assuming it treats those
> global variables as r-values, though some source files required/may
> require tweaks as these "variables" are now defined in emit-rtl.h
> instead of rtl.h.
> 
> This patch has been tested on x86_64-pc-linux-gnu with make bootstrap
> and make -k check, both with and without --target_board=unix{-m32},
> with no new failures.  Might this clean-up be acceptable in stage 1,
> given the possible temporary disruption transitioning some backends?
> I'll start checking various backends myself with cross-compilers, but if
> Jeff Law could spin this patch on his build farm, that would help
> identify targets that need attention.
> 
> 2022-07-10  Roger Sayle  
> 
> gcc/ChangeLog
> * bb-reorder.cc (reorder_basic_blocks): bb_reorder_complete is
> now a bit in crtl->rtl_pass_progress.
> * cfgrtl.cc (rtl_split_edge): Likewise.
> (fixup_partitions): Likewise.
> (verify_hot_cold_block_grouping): Likewise.
> (cfg_layout_initialize): Likewise.
>  * combine.cc (rest_of_handle_combine): Set combine_completed
> bit in crtl->rtl_pass_progress.
> * cse.cc (rest_of_handle_cse): cse_not_expected is now a field
> in crtl.
> (rest_of_handle_cse2): Likewise.
> (rest_of_handle_cse_after_global_opts): Likewise.
> * df-problems.cc: Include emit-rtl.h to access RTL pass progress
> variables.
> 
> * emit-rtl.h (PROGRESS_reload_completed): New bit masks.
> (rtl_data::rtl_pass_progress): New integer field to track progress.
> (rtl_data::bb_reorder_complete): Delete, no part of
> rtl_pass_progress.
> (rtl_data::cse_not_expected): New bool field, previously a global
> variable.
> (crtl_pass_progress): New convience macro.
> (combine_completed): New macro.
> (lra_in_progress): New macro replacing global variable.
> (reload_in_progress): Likewise.
> (reload_completed): Likewise.
> (bb_reorder_complete): New macro replacing bool field in crtl.
> (epilogue_completed): New macro replacing global variable.
> (regstack_completed): Likewise.
> (can_create_pseudo_p): Move from rtl.h and update definition.
> 
> * explow.cc (memory_address_addr_space): cse_not_expected is now
> a field in crtl.
> (use_anchored_address): Likewise.
> * final.c (rest_of_clean_state): Reset crtl->rtl_pass_progress
> to zero.
> * function.cc (prepare_function_start): cse_not_expected is now
> a field in crtl.
> (thread_prologue_and_epilogue_insns): epilogue_completed is now
> a bit in crtl->rtl_pass_progress.
> * ifcvt.cc (noce_try_cmove_arith): cse_not_expected is now a
> field in crtl.
> * lra-eliminations.cc (init_elim_table): lra_in_progress is now
> a bit in crtl->rtl_pass_progress.
> * lra.cc (lra_in_progress): Delete gl

Re: [PATCH 3/3] lto-plugin: implement LDPT_GET_API_VERSION

2022-07-11 Thread Rui Ueyama via Gcc-patches
On Fri, Jul 8, 2022 at 8:41 PM Alexander Monakov  wrote:
>
>
> On Fri, 8 Jul 2022, Martin Liška wrote:
>
> > Hi.
> >
> > All right, there's updated version of the patch that reflects the following 
> > suggestions:
> >
> > 1) strings are used for version identification
> > 2) thread-safe API version (1) is not used if target does not support 
> > locking via pthreads
> >
> > Patch can bootstrap on x86_64-linux-gnu and survives regression tests.
> >
> > Ready to be installed?
>
> Note that mold side will need to be adjusted, because it did not really
> implement the proposed contract. Maybe we should be more clear how the
> linker is supposed to implement this? Preliminary mold patch does this:
>
> static PluginLinkerAPIVersion
> get_api_version(const char *plugin_identifier,
> unsigned plugin_version,
> PluginLinkerAPIVersion minimal_api_supported,
> PluginLinkerAPIVersion maximal_api_supported,
> const char **linker_identifier,
> unsigned *linker_version) {
>   assert(maximal_api_supported >= LAPI_V1);
>   *linker_identifier = "mold";
>   *linker_version = get_mold_version();
>   is_gcc_linker_api_v1 = true;
>   return LAPI_V1;
> }
>
> but ignoring min_api_supported is wrong, and assuming max_api_supported > 0
> is also wrong. It really should check how given [min; max] range intersects
> with its own range of supported versions.

Currently only one version is defined which is LAPI_V1. I don't think
LAPI_UNSPECIFIED is a version number; rather, it's an unspecified
value. No ordering should be defined between a defined value and an
unspecified value. If LAPI_UNSPECIFIED < LAPI_V1, it should be renamed
LAPI_V0.


[PATCH] tree-optimization/106228 - fix vect_setup_realignment virtual SSA handling

2022-07-11 Thread Richard Biener via Gcc-patches
The following adds missing assignment of a virtual use operand to a
created load to vect_setup_realignment which shows as bootstrap
failure on powerpc64-linux and extra testsuite fails for targets
when misaligned loads are not supported or not optimal.

Bootstrapped on x86_64-unknown-linux-gnu, testing in progress.

PR tree-optimization/106228
* tree-vect-data-refs.cc (vect_setup_realignment): Properly
set a VUSE operand on the emitted load.
---
 gcc/tree-vect-data-refs.cc | 8 
 1 file changed, 8 insertions(+)

diff --git a/gcc/tree-vect-data-refs.cc b/gcc/tree-vect-data-refs.cc
index d20a10a1524..53e52cb58cb 100644
--- a/gcc/tree-vect-data-refs.cc
+++ b/gcc/tree-vect-data-refs.cc
@@ -5780,6 +5780,13 @@ vect_setup_realignment (vec_info *vinfo, stmt_vec_info 
stmt_info,
   if (loop_for_initial_load)
 pe = loop_preheader_edge (loop_for_initial_load);
 
+  tree vuse;
+  gphi *vphi = get_virtual_phi (loop_for_initial_load->header);
+  if (vphi)
+vuse = PHI_ARG_DEF_FROM_EDGE (vphi, pe);
+  else
+vuse = gimple_vuse (gsi_stmt (*gsi));
+
   /* 3. For the case of the optimized realignment, create the first vector
   load at the loop preheader.  */
 
@@ -5813,6 +5820,7 @@ vect_setup_realignment (vec_info *vinfo, stmt_vec_info 
stmt_info,
   new_stmt = gimple_build_assign (vec_dest, data_ref);
   new_temp = make_ssa_name (vec_dest, new_stmt);
   gimple_assign_set_lhs (new_stmt, new_temp);
+  gimple_set_vuse (new_stmt, vuse);
   if (pe)
 {
   new_bb = gsi_insert_on_edge_immediate (pe, new_stmt);
-- 
2.35.3


Re: [PATCH] Add condition coverage profiling

2022-07-11 Thread Jørgen Kvalsvik via Gcc-patches
On 08/07/2022 15:45, Sebastian Huber wrote:
> Hello Jørgen,
> 
> some time passed. It would be nice if you could give a status update. I am 
> quite
> interested in your work.
> 
> Best regards,
>     Sebastian
> 

I'm sorry for the late updates. There have have been a lot of issues (with the
patch) to sort out and some other immediate priorities getting in the way. I
will resubmit the updated patch now.


Re: [PATCH] Control flow redundancy hardening

2022-07-11 Thread Richard Biener via Gcc-patches
On Fri, Jul 8, 2022 at 7:00 PM Alexandre Oliva  wrote:
>
> On Jul  8, 2022, Richard Biener  wrote:
>
> > I'm possibly missing the importance of 'redundancy' in -fharden-control-flow
>
> I took "Control Flow Redundancy" as a term of the art and never
> questioned it.  I think the "redundancy" has to do with the fact that
> control flow is generally affected by tests and conditionals, and the
> checks that an expected path was seemingly taken is redundant with
> those.
>
> > but how can you, from a set of visited blocks local to a function,
> > determine whether the control flow through the function is "expected"
>
> Hmm, maybe the definition should be in the negated form: what the check
> catches is *unexpected* execution flows, e.g. when a block that
> shouldn't have been reached (because none of its predecessors was)
> somehow was.  This unexpected circumstance indicates some kind of fault
> or attack, which is what IIUC this check is about.
>
> Whether the fault was that the hardware took a wrong turn because it was
> power deprived, or some software exploit returned to an artifact at the
> end of a function to get it to serve an alternate purpose, the check at
> the end of the function would catch the unexpected execution of a block
> that couldn't be reached under normal circumstances, and flag the error
> before further damage occurs.
>
> > Can you elaborate on what kind of "derailed" control flow this catches
> > (example?) and what cases it does not?
>
> As in the comments for the pass: for each visited block, check that at
> least one predecessor and at least one successor were also visited.

I see.  So for all of the above being a bit more elaborate in the
documentation might be nice.  Still 'harden' and 'redundancy'
in -fharden-control-flow-redundancy sound redundant, depending
on the target audience -fharden-control-flow or -fcontrol-flow-redundancy
might be a good fit.

> > I'm also curious as of how this compares to hardware
> > mitigations like x86 indirect branch tracking and shadow stack
>
> I'm not expert in the field, but my understanding is that these are
> complementary.
>
> Indirect branch tracking constrains the set of available artifacts one
> might indirectly branch to, but if you reach one of them, you'd be no
> wiser that something fishy was going on without checking that you got
> there from some of the predecessor blocks.  (we don't really check
> precisely that, nor do we check at that precise time, but we check at
> the end of the function that at least one of the predecessor blocks was
> run.)  Constraining the available indirect branch targets helps avoid
> bypassing the code that sets the bit corresponding to that block, which
> might enable an attacker to use an artifact without detection., if
> there's no subsequent block that would be inexplicably reached.
>
> Shadow stacks avoid corruption of return addresses, so you're less
> likely to reach an unexpected block by means of buffer overruns that
> corrupt the stack and overwrite the return address.  Other means to land
> in the middle of a function, such as corrupting memory or logical units
> through power deprivation remain, and this pass helps guard against
> those too.
>
> > and how this relates to the LLVM control flow hardening (ISTR such
> > thing exists).
>
> I've never heard of it.  I've just tried to learn about it, but I
> couldn't find anything pertinent.
>
> Are you by any chance thinking of
> https://clang.llvm.org/docs/ControlFlowIntegrity.html
> ?

Yeah, I probably thought of this ...

> This appears to be entirely unrelated: the control flow nodes it's
> concerned with are functions/methods/subprograms in a program, rather
> than basic blocks within a function.
>
>
> Thanks a lot for these questions.  They're going to help me be better
> prepared for a presentation about various hardening features (*) that
> I've submitted and am preparing for the upcoming Cauldron.

Heh, I was about to suggest that!

Richard.

> (*) 
> https://docs.adacore.com/live/wave/gnat_rm/html/gnat_rm/gnat_rm/security_hardening_features.html
>
> --
> Alexandre Oliva, happy hackerhttps://FSFLA.org/blogs/lxo/
>Free Software Activist   GNU Toolchain Engineer
> Disinformation flourishes because many people care deeply about injustice
> but very few check the facts.  Ask me about 


Re: [PATCH] Allocate general register(memory/immediate) for 16/32/64-bit vector bit_op patterns.

2022-07-11 Thread Uros Bizjak via Gcc-patches
On Mon, Jul 11, 2022 at 3:15 AM liuhongt  wrote:
>
> And split it to GPR-version instruction after reload.
>
> This will enable below optimization for 16/32/64-bit vector bit_op
>
> -   movd(%rdi), %xmm0
> -   movd(%rsi), %xmm1
> -   pand%xmm1, %xmm0
> -   movd%xmm0, (%rdi)
> +   movl(%rsi), %eax
> +   andl%eax, (%rdi)
>
> Bootstrapped and regtested on x86_64-pc-linux-gnu{-m32,}.
> Ok for trunk?

The patch will create many interunit moves (xmm <-> gpr) for anything
but the most simple logic sequences, because operations with
memory/immediate will be forced into GPR registers, while reg/reg
operations will remain in XMM registers.

I tried to introduce GPR registers to MMX logic insns in the past and
observed the above behavior, but perhaps RA evolved in the mean time
to handle different register sets better (especially under register
pressure). However, I would advise to be careful with this
functionality.

Perhaps this problem should be attacked in stages. First, please
introduce GPR registers to MMX logic instructions (similar to how
VI_16_32 mode instructions are handled). After RA effects will be
analysed, only then memory/immediate handling should be added. Also,
please don't forget to handle ANDNOT insn - TARGET_BMI slightly
complicates this part, but this is also solved with VI_16_32 mode
instructions.

Uros.

>
> gcc/ChangeLog:
>
> PR target/106038
> * config/i386/mmx.md (3): Expand
> with (clobber (reg:CC flags_reg)) under TARGET_64BIT
> (mmx_code>3): Ditto.
> (*mmx_3_1): New define_insn, add post_reload
> splitter after it.
> (*3): New define_insn, also add post_reload
> splitter after it.
> (mmxinsnmode): New mode attribute.
> (VI_16_32_64): New mode iterator.
> (*mov_imm): Refactor with mmxinsnmode.
> * config/i386/predicates.md
> (nonimmediate_or_x86_64_vector_cst): New predicate.
>
> gcc/testsuite/ChangeLog:
>
> * gcc.target/i386/pr106038-1.c: New test.
> * gcc.target/i386/pr106038-2.c: New test.
> * gcc.target/i386/pr106038-3.c: New test.
> ---
>  gcc/config/i386/mmx.md | 131 +++--
>  gcc/config/i386/predicates.md  |   4 +
>  gcc/testsuite/gcc.target/i386/pr106038-1.c |  61 ++
>  gcc/testsuite/gcc.target/i386/pr106038-2.c |  35 ++
>  gcc/testsuite/gcc.target/i386/pr106038-3.c |  17 +++
>  5 files changed, 213 insertions(+), 35 deletions(-)
>  create mode 100644 gcc/testsuite/gcc.target/i386/pr106038-1.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/pr106038-2.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/pr106038-3.c
>
> diff --git a/gcc/config/i386/mmx.md b/gcc/config/i386/mmx.md
> index 3294c1e6274..85b06abea27 100644
> --- a/gcc/config/i386/mmx.md
> +++ b/gcc/config/i386/mmx.md
> @@ -75,6 +75,11 @@ (define_mode_iterator V_16_32_64
>  (V8QI "TARGET_64BIT") (V4HI "TARGET_64BIT") (V4HF "TARGET_64BIT")
>  (V2SI "TARGET_64BIT") (V2SF "TARGET_64BIT")])
>
> +(define_mode_iterator VI_16_32_64
> +   [V2QI V4QI V2HI
> +(V8QI "TARGET_64BIT") (V4HI "TARGET_64BIT")
> +(V2SI "TARGET_64BIT")])
> +
>  ;; V2S* modes
>  (define_mode_iterator V2FI [V2SF V2SI])
>
> @@ -86,6 +91,14 @@ (define_mode_attr mmxvecsize
>[(V8QI "b") (V4QI "b") (V2QI "b")
> (V4HI "w") (V2HI "w") (V2SI "d") (V1DI "q")])
>
> +;; Mapping to same size integral mode.
> +(define_mode_attr mmxinsnmode
> +  [(V8QI "DI") (V4QI "SI") (V2QI "HI")
> +   (V4HI "DI") (V2HI "SI")
> +   (V2SI "DI")
> +   (V4HF "DI") (V2HF "SI")
> +   (V2SF "DI")])
> +
>  (define_mode_attr mmxdoublemode
>[(V8QI "V8HI") (V4HI "V4SI")])
>
> @@ -350,22 +363,7 @@ (define_insn_and_split "*mov_imm"
>HOST_WIDE_INT val = ix86_convert_const_vector_to_integer (operands[1],
> mode);
>operands[1] = GEN_INT (val);
> -  machine_mode mode;
> -  switch (GET_MODE_SIZE (mode))
> -{
> -case 2:
> -  mode = HImode;
> -  break;
> -case 4:
> -  mode = SImode;
> -  break;
> -case 8:
> -  mode = DImode;
> -  break;
> -default:
> -  gcc_unreachable ();
> -}
> -  operands[0] = lowpart_subreg (mode, operands[0], mode);
> +  operands[0] = lowpart_subreg (mode, operands[0], mode);
>  })
>
>  ;; For TARGET_64BIT we always round up to 8 bytes.
> @@ -2948,14 +2946,28 @@ (define_expand "mmx_3"
>   (match_operand:MMXMODEI 1 "register_mmxmem_operand")
>   (match_operand:MMXMODEI 2 "register_mmxmem_operand")))]
>"TARGET_MMX || TARGET_MMX_WITH_SSE"
> -  "ix86_fixup_binary_operands_no_copy (, mode, operands);")
> +{
> +  ix86_fixup_binary_operands_no_copy (, mode, operands);
> +  if (TARGET_64BIT)
> +  {
> +ix86_expand_binary_operator (, mode, operands);
> +DONE;
> +  }
> +})
>
>  (define_expand "3"
>[(set (match_operand:MMXMODEI 0 "register_operand")
> (any_logic:MMXMODEI
> 

Re: Modula-2: merge followup (brief update on the progress of the new linking implementation)

2022-07-11 Thread Rainer Orth
Hi Gaius,

> A very brief update to say that I've merged the new linking
> implementation back onto the devel/modula-2 branch,

unfortunately, that branch doesn't bootstrap for me anywere:

* On both x86_64-pc-linux-gnu and i386-pc-solaris2.11:

libtool: compile:  /var/gcc/modula-2/11.4-gcc-modula-2/./gcc/gm2 
-B/var/gcc/modula-2/11.4-gcc-modula-2/./gcc/ -c -g -O2 -g -O2 -I. 
-I/vol/gcc/src/hg/master/modula-2/gcc/m2/gm2-libs-min 
-I/vol/gcc/src/hg/master/modula-2/gcc/m2/gm2-libs -fno-exceptions 
-fno-m2-plugin 
/vol/gcc/src/hg/master/modula-2/libgm2/libm2min/../../gcc/m2/gm2-libs-min/M2RTS.mod
  -fPIC -DPIC -o .libs/M2RTS.o
/vol/gcc/src/hg/master/modula-2/libgm2/libm2min/../../gcc/m2/gm2-libs-min/M2RTS.mod:29:1:
 error: In implementation module ‘M2RTS’: module does not export procedure 
which is a necessary component of the runtime system, hint check the path and 
library/language variant
   29 | IMPORT libc, SYSTEM ;
  | ^~
make[5]: *** [Makefile:746: M2RTS.lo] Error 1

  and several more.

* On sparc-sun-solaris2.11:

/bin/ksh /vol/gcc/src/hg/master/modula-2/gcc/m2/tools-src/makeSystem -fpim \
 /vol/gcc/src/hg/master/modula-2/gcc/m2/gm2-libs/SYSTEM.def \
 /vol/gcc/src/hg/master/modula-2/gcc/m2/gm2-libs/SYSTEM.mod \
 -I/vol/gcc/src/hg/master/modula-2/gcc/m2/gm2-libs \
 "/var/gcc/modula-2/11.4-gcc-modula-2/./gcc/gm2 
-B/var/gcc/modula-2/11.4-gcc-modula-2/./gcc/ -fno-checking" 
/var/gcc/modula-2/11.4-gcc-modula-2/gcc/m2/gm2-libs/SYSTEM.def
gm2: internal compiler error: Segmentation Fault signal terminated program 
cc1gm2
Please submit a full bug report, with preprocessed source (by using 
-freport-bug).
See  for instructions.
SYSTEM module creates type: LOC
SYSTEM module creates type: WORD
SYSTEM module creates type: BYTE
SYSTEM module creates type: ADDRESS
SYSTEM module creates type: INTEGER8
SYSTEM module creates type: INTEGER16
SYSTEM module creates type: INTEGER32
SYSTEM module creates type: INTEGER64
SYSTEM module creates type: CARDINAL8
SYSTEM module creates type: CARDINAL16
SYSTEM module creates type: CARDINAL32
SYSTEM module creates type: CARDINAL64
SYSTEM module creates type: WORD16
SYSTEM module creates type: WORD32
SYSTEM module creates type: WORD64
SYSTEM module creates type: BITSET8
SYSTEM module creates type: BITSET16
SYSTEM module creates type: BITSET32
SYSTEM module creates type: REAL32
SYSTEM module creates type: REAL64
SYSTEM module creates type: REAL128
SYSTEM module creates type: COMPLEX32
SYSTEM module creates type: COMPLEX64
SYSTEM module creates type: COMPLEX128
SYSTEM module creates type: CSIZE_T
SYSTEM module creates type: CSSIZE_T
gm2: internal compiler error: Segmentation Fault signal terminated program 
cc1gm2
Please submit a full bug report, with preprocessed source (by using 
-freport-bug).
See  for instructions.
make[3]: *** [/vol/gcc/src/hg/master/modula-2/gcc/m2/Make-lang.in:1237: 
/var/gcc/modula-2/11.4-gcc-modula-2/gcc/m2/gm2-libs/SYSTEM.def] Error 4
make[3]: Leaving directory '/var/gcc/modula-2/11.4-gcc-modula-2/gcc'
make[2]: *** [Makefile:5055: all-stage2-gcc] Error 2

cc1gm2 -quiet -mcpu=v9 -fpim -fno-scaffold-main -fno-scaffold-dynamic 
-fno-scaffold-static -fno-m2-plugin -fdump-system-exports -c 
-I/vol/gcc/src/hg/master/modula-2/gcc/m2/gm2-libs 
-I/usr/local/lib/gcc/sparc-sun-solaris2.11/13.0.0/m2/m2pim 
/vol/gcc/src/hg/master/modula-2/gcc/m2/gm2-libs/SYSTEM.mod -o SYSTEM.s

Thread 2 received signal SIGSEGV, Segmentation fault.
[Switching to Thread 1 (LWP 1)]
0xffbfe364 in ?? ()
(gdb) bt
#0  0xffbfe364 in ?? ()
#1  0x0064be24 in m2expr_BuildBinarySetDo ()
#2  0x006a5634 in CodeBinarySetShift(m2expr_BuildSetProcedure_p, DoProcedure_p, 
unsigned int, unsigned int, unsigned int, unsigned int, unsigned int, unsigned 
int, unsigned int) ()
#3  0x006ae0e8 in M2GenGCC_ConvertQuadsToTree ()
#4  0x006ccc14 in M2Scope_ForeachScopeBlockDo ()
#5  0x006a2014 in M2Code_CodeBlock ()
#6  0x006ca324 in Lists_ForeachItemInListDo ()
#7  0x006936a0 in SymbolTable_ForeachProcedureDo ()
#8  0x006a2194 in M2Code_CodeBlock ()
#9  0x006a2608 in M2Code_Code ()
#10 0x00684214 in M2Comp_compile ()
#11 0x00627b5c in gm2_langhook_parse_file() ()
#12 0x00d8c968 in compile_file() ()
#13 0x00d905fc in toplev::main(int, char**) ()
#14 0x01a092dc in main ()

  Seems like PR modula2/101392 raised its ugly head again, after it had
  been gone for a while.  I'm currently trying sparcv9-sun-solaris2.11
  instead to see how that fares.

Rainer

-- 
-
Rainer Orth, Center for Biotechnology, Bielefeld University


RE: [PATCH] Move reload_completed and other rtl.h globals to crtl structure.

2022-07-11 Thread Roger Sayle
On 11 July 2022 08:20, Richard Biener wrote:
> On Sun, 10 Jul 2022, Roger Sayle wrote:
> 
> > This patch builds upon Richard Biener's suggestion of avoiding global
> > variables to track state/identify which passes have already been run.
> > In the early middle-end, the tree-ssa passes use the curr_properties
> > field in cfun to track this.  This patch uses a new rtl_pass_progress
> > int field in crtl to do something similar.
> 
> Why not simply add PROP_rtl_... and use the existing curr_properties for
this?
> RTL passes are also passes and this has the advantage you can add things
like
> reload_completed to the passes properties_set field hand have the flag
setting
> handled by the pass manager as it was intended?
> 

Great question, and I did initially consider simply adding more RTL fields
to
curr_properties.  My hesitation was from the comments/documentation that
the curr_properties field is used by the pass manager as a way to track
(and verify) the properties/invariants that are required, provided and
destroyed
by each pass.  This semantically makes sense for properties such as accurate
data flow, ssa form, cfg_layout, nonzero_bits etc, where hypothetically the
pass manager can dynamically schedule a pass/analysis to ensure the next
pass
has the pre-requisite information it needs.

This seems semantically slightly different from tracking time/progress,
where
we really want something more like DEBUG_COUNTER that simply provides
the "tick-tock" of a pass clock.  Alas, the "pass number", as used in the
suffix
of dump-files (where 302 currently means reload) isn't particularly useful
as
these change/evolve continually.

Perhaps the most obvious indication of this (subtle) difference is the
curr_properties field (PROP_rtl_split_insns) which tracks whether
instructions
have been split, where at a finer granularity rtl_pass_progress may wish to
distinguish split1 (after combine before reload), split2 (after reload
before
peephole2) and split3 (after peephole2).  It's conceptually not a simple
property, whether all insns have been split or not, as in practice this is
more subtle with backends choosing which instructions get split at which
"times".

There's also the concern that we've a large number of passes (currently
62 RTL passes), and only a finite number of bits (in curr_properties), so
having two integers reduces the risk of running out of bits and needing
to use a "wider" data structure.

To be honest, I just didn't want to hijack curr_properties to abuse it for a

use that didn't quite match the original intention, without checking with
you and the other maintainers first.  If the above reasoning isn't
convincing,
I can try spinning an alternate patch using curr_properties (but I'd expect
even more churn as backend source files would now need to #include
tree-passes.h and function.h to get reload_completed).  And of course,
a volunteer is welcome to contribute that re-refactoring after this one.

I've no strong feelings either way.  It was an almost arbitrary engineering
decision (but hopefully the above explains the balance of my reasoning).

Roger
--




Re: [PATCH] d: Move DSO registry support code from compiler to drtstuff in library (PR100062)

2022-07-11 Thread Rainer Orth
Hi Iain,

> Currently the DSO support for D runtime is generated by the compiler in
> every object, when really it is only required once per shared object.
>
> This patch moves that support logic from the compiler itself to the
> library as part of the drtstuff code.  The object files drtbegin.o and
> drtend.o are now always linked in.
>
> Bootstrapped and tested on x86_64-linux-gnu/-m32/-mx32, with no
> observable regressions.
>
> @Rainer, as you provided the original, would be good to validate this is
> fine for Solaris too.

I did, with mixed success: initially, the patch broke bootstrap on both
i386-pc-solaris2.11 and sparc-sun-solaris2.11 linking libgdruntime.so:

Text relocation remainsreferenced
against symbol  offset  in file
__start_minfo   0x2 gcc/drtbegin.o
_D8drtstuff7dsoSlotPv   0x7 gcc/drtbegin.o
_D8drtstuff7dsoDataS3gcc8sections6common15CompilerDSOData 0x1e  
gcc/drtbegin.o
_D8drtstuff7dsoDataS3gcc8sections6common15CompilerDSOData 0x28  
gcc/drtbegin.o
__stop_minfo0x2cgcc/drtbegin.o
_D8drtstuff7dsoDataS3gcc8sections6common15CompilerDSOData 0x34  
gcc/drtbegin.o
_D8drtstuff7dsoDataS3gcc8sections6common15CompilerDSOData 0x39  
gcc/drtbegin.o
_D8drtstuff7dsoDataS3gcc8sections6common15CompilerDSOData 0x7   
gcc/drtbegin.o
_d_dso_registry 0x3egcc/drtbegin.o
_d_dso_registry 0xc gcc/drtbegin.o
ld: fatal: relocations remain against allocatable but non-writable sections
collect2: error: ld returned 1 exit status
make[5]: *** [Makefile:1943: libgdruntime.la] Error 1

Solaris ld defaults to -z text, and gcc is still non-pic/pie by
default.  The attached patch fixes the issue by building
drtbegin.o/drtend.o as libtool objects.

There are a couple of caveats, though:

* It directs gdc to libdruntime/gcc/.libs instead of libdruntime/gcc.
  I've no idea what happens when configuring with --disable-shared,
  building for targets without PIC support, or on Windows where that
  directory is called _libs instead (I've found no variable to use
  here).

  OTOH, testsuite_flags.in already hardcodes .libs, so this is a
  preexisting problem.

* I haven't checked if the PIC drt*.o files are actually installed,
  otherwise gdc wouldn't be able to create shared objects on Solaris.

* This comment is misleading now that those files are used unconditionally:

# Provide __start_minfo, __stop_minfo if linker doesn't.
DRTSTUFF = gcc/drtbegin.o gcc/drtend.o

At least with this incremental patch, I've been able to successfully
bootstrap and test trunk on i386-pc-solaris2.11 (Solaris 11.3 which
lacks ld support for __start_/__stop_diff -rup dist/libphobos/libdruntime/Makefile.am local/libphobos/libdruntime/Makefile.am
--- dist/libphobos/libdruntime/Makefile.am	2022-07-11 09:40:43.798158000 +0200
+++ local/libphobos/libdruntime/Makefile.am	2022-07-10 22:04:18.941416000 +0200
@@ -104,16 +104,16 @@ if DRUNTIME_CPU_S390
 endif
 
 # Provide __start_minfo, __stop_minfo if linker doesn't.
-DRTSTUFF = gcc/drtbegin.o gcc/drtend.o
+DRTSTUFF = gcc/drtbegin.lo gcc/drtend.lo
 
 toolexeclib_DATA = $(DRTSTUFF)
 
-gcc/drtbegin.o: gcc/drtstuff.d
-	$(GDC) -fno-druntime -fversion=DRT_BEGIN $(GDCFLAGS) $(MULTIFLAGS) \
+gcc/drtbegin.lo: gcc/drtstuff.d
+	$(LTDCOMPILE) -fno-druntime -fversion=DRT_BEGIN $(GDCFLAGS) $(MULTIFLAGS) \
 		$(D_EXTRA_DFLAGS) -c -o $@ $<
 
-gcc/drtend.o: gcc/drtstuff.d
-	$(GDC) -fno-druntime -fversion=DRT_END $(GDCFLAGS) $(MULTIFLAGS) \
+gcc/drtend.lo: gcc/drtstuff.d
+	$(LTDCOMPILE) -fno-druntime -fversion=DRT_END $(GDCFLAGS) $(MULTIFLAGS) \
 		$(D_EXTRA_DFLAGS) -c -o $@ $<
 
 # Generated by configure
@@ -128,7 +128,7 @@ ALL_DRUNTIME_SOURCES = $(DRUNTIME_DSOURC
 toolexeclib_LTLIBRARIES = libgdruntime.la
 libgdruntime_la_SOURCES = $(ALL_DRUNTIME_SOURCES)
 libgdruntime_la_LIBTOOLFLAGS =
-libgdruntime_la_LDFLAGS = -Wc,-nophoboslib,-dstartfiles,-B../src,-Bgcc \
+libgdruntime_la_LDFLAGS = -Wc,-nophoboslib,-dstartfiles,-B../src,-Bgcc/.libs \
 -version-info $(libtool_VERSION)
 libgdruntime_la_LIBADD = $(LIBATOMIC) $(LIBBACKTRACE)
 libgdruntime_la_DEPENDENCIES = $(DRTSTUFF)
diff -rup dist/libphobos/src/Makefile.am local/libphobos/src/Makefile.am
--- dist/libphobos/src/Makefile.am	2022-04-22 10:35:48.16065 +0200
+++ local/libphobos/src/Makefile.am	2022-07-10 22:04:32.081171000 +0200
@@ -44,7 +44,7 @@ toolexeclib_DATA = libgphobos.spec
 toolexeclib_LTLIBRARIES = libgphobos.la
 libgphobos_la_SOURCES = $(ALL_PHOBOS_SOURCES)
 libgphobos_la_LIBTOOLFLAGS =
-libgphobos_la_LDFLAGS = -Wc,-nophoboslib,-dstartfiles,-B../libdruntime/gcc \
+libgphobos_la_LDFLAGS = -Wc,-nophoboslib,-dstartfiles,-B../libdruntime/gcc/.libs \
 -version-info $(libtool_VERSION)
 if ENABLE_LIBDRUNTIME_ONLY
 libgphobos_la_LIBADD = ../libdruntime/libgdruntime_convenience.la
diff -rup dist/libphobos/testsuite/testsuite_flags.in local/libp

Re: [PATCH 1/1 V3] RISC-V: Support Zmmul extension

2022-07-11 Thread Kito Cheng via Gcc-patches
On Mon, Jul 11, 2022 at 3:15 PM  wrote:
>
> From: LiaoShihua 
>
> gcc/ChangeLog:
>
> * common/config/riscv/riscv-common.cc:
> * config/riscv/riscv-c.cc (riscv_cpu_cpp_builtins):
> * config/riscv/riscv-opts.h (MASK_ZMMUL):
> (TARGET_ZMMUL):
> * config/riscv/riscv.cc (riscv_option_override):
> * config/riscv/riscv.md:
> * config/riscv/riscv.opt:
>
> gcc/testsuite/ChangeLog:
>
> * gcc.target/riscv/zmmul-1.c: New test.
> * gcc.target/riscv/zmmul-2.c: New test.
>
> ---
>  gcc/common/config/riscv/riscv-common.cc  |  3 +++
>  gcc/config/riscv/riscv-c.cc  |  4 ++--
>  gcc/config/riscv/riscv-opts.h|  3 +++
>  gcc/config/riscv/riscv.cc|  4 +++-
>  gcc/config/riscv/riscv.md| 28 
>  gcc/config/riscv/riscv.opt   |  3 +++
>  gcc/testsuite/gcc.target/riscv/zmmul-1.c | 20 +
>  gcc/testsuite/gcc.target/riscv/zmmul-2.c | 20 +
>  8 files changed, 68 insertions(+), 17 deletions(-)
>  create mode 100644 gcc/testsuite/gcc.target/riscv/zmmul-1.c
>  create mode 100644 gcc/testsuite/gcc.target/riscv/zmmul-2.c
>
> diff --git a/gcc/common/config/riscv/riscv-common.cc 
> b/gcc/common/config/riscv/riscv-common.cc
> index 0e5be2ce105..a4539067403 100644
> --- a/gcc/common/config/riscv/riscv-common.cc
> +++ b/gcc/common/config/riscv/riscv-common.cc
> @@ -193,6 +193,8 @@ static const struct riscv_ext_version 
> riscv_ext_version_table[] =
>{"zvl32768b", ISA_SPEC_CLASS_NONE, 1, 0},
>{"zvl65536b", ISA_SPEC_CLASS_NONE, 1, 0},
>
> +  {"zmmul", ISA_SPEC_CLASS_NONE, 1, 0},
> +
>/* Terminate the list.  */
>{NULL, ISA_SPEC_CLASS_NONE, 0, 0}
>  };
> @@ -1148,6 +1150,7 @@ static const riscv_ext_flag_table_t 
> riscv_ext_flag_table[] =
>{"zvl32768b", &gcc_options::x_riscv_zvl_flags, MASK_ZVL32768B},
>{"zvl65536b", &gcc_options::x_riscv_zvl_flags, MASK_ZVL65536B},
>
> +  {"zmmul", &gcc_options::x_riscv_zmmul_subext, MASK_ZMMUL},
>
>{NULL, NULL, 0}
>  };
> diff --git a/gcc/config/riscv/riscv-c.cc b/gcc/config/riscv/riscv-c.cc
> index eb7ef09297e..fb52f69c44c 100644
> --- a/gcc/config/riscv/riscv-c.cc
> +++ b/gcc/config/riscv/riscv-c.cc
> @@ -47,11 +47,11 @@ riscv_cpu_cpp_builtins (cpp_reader *pfile)
>if (TARGET_ATOMIC)
>  builtin_define ("__riscv_atomic");
>
> -  if (TARGET_MUL)
> +  if (TARGET_MUL || TARGET_ZMMUL)
>  builtin_define ("__riscv_mul");
>if (TARGET_DIV)
>  builtin_define ("__riscv_div");
> -  if (TARGET_DIV && TARGET_MUL)
> +  if (!TARGET_ZMMUL && TARGET_DIV && TARGET_MUL)

This should checked in riscv_option_override, so no need to check
TARGET_ZMMUL again here

>  builtin_define ("__riscv_muldiv");
>
>builtin_define_with_int_value ("__riscv_xlen", UNITS_PER_WORD * 8);
> diff --git a/gcc/config/riscv/riscv-opts.h b/gcc/config/riscv/riscv-opts.h
> index 1e153b3a6e7..55d9fa49782 100644
> --- a/gcc/config/riscv/riscv-opts.h
> +++ b/gcc/config/riscv/riscv-opts.h
> @@ -153,6 +153,9 @@ enum stack_protector_guard {
>  #define TARGET_ZICBOM ((riscv_zicmo_subext & MASK_ZICBOM) != 0)
>  #define TARGET_ZICBOP ((riscv_zicmo_subext & MASK_ZICBOP) != 0)
>
> +#define MASK_ZMMUL  (1 << 0)
> +#define TARGET_ZMMUL((riscv_zmmul_subext & MASK_ZMMUL) != 0)
> +
>  /* Bit of riscv_zvl_flags will set contintuly, N-1 bit will set if N-bit is
> set, e.g. MASK_ZVL64B has set then MASK_ZVL32B is set, so we can use
> popcount to caclulate the minimal VLEN.  */
> diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc
> index 2e83ca07394..f11941e1653 100644
> --- a/gcc/config/riscv/riscv.cc
> +++ b/gcc/config/riscv/riscv.cc
> @@ -4998,8 +4998,10 @@ riscv_option_override (void)
>
>/* The presence of the M extension implies that division instructions
>   are present, so include them unless explicitly disabled.  */
> -  if (TARGET_MUL && (target_flags_explicit & MASK_DIV) == 0)
> +  if (!TARGET_ZMMUL && TARGET_MUL && (target_flags_explicit & MASK_DIV) == 0)
>  target_flags |= MASK_DIV;
> +  else if(TARGET_ZMMUL && TARGET_MUL)

m and zmmul are not incompatible

> +warning (0, "%<-mdiv%> cannot use when the % extension is 
> present");

Move it to a standalone check e.g. if (TARGET_DIV && TARGET_ZMMUL &&
!TARGET_MUL).

>else if (!TARGET_MUL && TARGET_DIV)
>  error ("%<-mdiv%> requires %<-march%> to subsume the % extension");
>
> diff --git a/gcc/config/riscv/riscv.md b/gcc/config/riscv/riscv.md
> index 308b64dd30d..d4e171464ea 100644
> --- a/gcc/config/riscv/riscv.md
> +++ b/gcc/config/riscv/riscv.md
> @@ -763,7 +763,7 @@
>[(set (match_operand:SI  0 "register_operand" "=r")
> (mult:SI (match_operand:SI 1 "register_operand" " r")
>  (match_operand:SI 2 "register_operand" " r")))]
> -  "TARGET_MUL"
> +  "TARGET_ZMMUL || TARGET_MUL"
>{ return TARGET_64BIT ? "mulw\t%0,%1,%2" : "mul\t%0,%1,%2"; }
>[(set_attr "type" "imul")
> (set_a

Re: [PATCH 3/3] lto-plugin: implement LDPT_GET_API_VERSION

2022-07-11 Thread Alexander Monakov via Gcc-patches
On Mon, 11 Jul 2022, Rui Ueyama wrote:

> > but ignoring min_api_supported is wrong, and assuming max_api_supported > 0
> > is also wrong. It really should check how given [min; max] range intersects
> > with its own range of supported versions.
> 
> Currently only one version is defined which is LAPI_V1. I don't think
> LAPI_UNSPECIFIED is a version number; rather, it's an unspecified
> value. No ordering should be defined between a defined value and an
> unspecified value. If LAPI_UNSPECIFIED < LAPI_V1, it should be renamed
> LAPI_V0.

You still cannot rely on API guarantees of LAPI_V1 when the plugin does not
advertise it (thread safety of claim_file in this particular case).

And you still should check the intersection of supported API ranges
and give a sane diagnostic when min_api_supported advertised by the plugin
exceeds LAPI_V1 (though, granted, the plugin could error out as well in this
case).

Alexander


Enhance '_Pragma' diagnostics verification in OMP C/C++ test cases (was: [PATCH] c: Fix location for _Pragma tokens [PR97498])

2022-07-11 Thread Thomas Schwinge
Hi!

On 2022-07-10T16:51:11-0400, Lewis Hyatt via Gcc-patches 
 wrote:
> On Sat, Jul 9, 2022 at 11:59 PM Jeff Law via Gcc-patches
>  wrote:
>> On 7/9/2022 2:52 PM, Lewis Hyatt via Gcc-patches wrote:
>> > PR97498 (https://gcc.gnu.org/bugzilla/show_bug.cgi?id=97498) is another PR
>> > related to the fact that imprecise locations for _Pragma result in
>> > counterintuitive behavior for GCC diagnostic pragmas

>> > I think the main source of problems for all remaining issues is that we use
>> > the global input_location for deciding when/if a diagnostic should apply. I
>> > think it should be eventually doable to eliminate this, and rather properly
>> > resolve the token locations to the place they need to be
>> I've long wanted to see our dependency on input_location be diminished
>> with the goal of making it go away completely.
> [...]

> Then I will plan to work on
> eliminating input_location from c-pragma.cc as a longer term goal.

Great; I too am looking forward to that.  There, and then elsewhere,
everywhere.  :-)

>> > The rest of [patch] is just tweaking a couple tests which were sensitive 
>> > to the
>> > location being output. In all these cases, the new locations seem more
>> > informative to me than the old ones.

ACK, thanks.

On top of that, I've just pushed to master branch
commit 06b2a2abe26554c6f9365676683d67368cbba206
"Enhance '_Pragma' diagnostics verification in OMP C/C++ test cases", see
attached.


Grüße
 Thomas


-
Siemens Electronic Design Automation GmbH; Anschrift: Arnulfstraße 201, 80634 
München; Gesellschaft mit beschränkter Haftung; Geschäftsführer: Thomas 
Heurung, Frank Thürauf; Sitz der Gesellschaft: München; Registergericht 
München, HRB 106955
>From 06b2a2abe26554c6f9365676683d67368cbba206 Mon Sep 17 00:00:00 2001
From: Thomas Schwinge 
Date: Mon, 11 Jul 2022 09:33:19 +0200
Subject: [PATCH] Enhance '_Pragma' diagnostics verification in OMP C/C++ test
 cases

Follow-up to recent commit 0587cef3d7962a8b0f44779589ba2920dd3d71e5
"c: Fix location for _Pragma tokens [PR97498]".

	gcc/testsuite/
	* c-c++-common/gomp/pragma-3.c: Enhance '_Pragma' diagnostics
	verification.
	* c-c++-common/gomp/pragma-5.c: Likewise.
	libgomp/
	* testsuite/libgomp.oacc-c-c++-common/reduction-5.c: Enhance
	'_Pragma' diagnostics verification.
---
 gcc/testsuite/c-c++-common/gomp/pragma-3.c| 8 +---
 gcc/testsuite/c-c++-common/gomp/pragma-5.c| 8 +---
 libgomp/testsuite/libgomp.oacc-c-c++-common/reduction-5.c | 8 +---
 3 files changed, 15 insertions(+), 9 deletions(-)

diff --git a/gcc/testsuite/c-c++-common/gomp/pragma-3.c b/gcc/testsuite/c-c++-common/gomp/pragma-3.c
index ae18e9b8886..3e1b2111c3d 100644
--- a/gcc/testsuite/c-c++-common/gomp/pragma-3.c
+++ b/gcc/testsuite/c-c++-common/gomp/pragma-3.c
@@ -2,13 +2,15 @@
 /* PR preprocessor/103165  */
 
 #define inner(...) #__VA_ARGS__ ; _Pragma("omp error severity(warning) message (\"Test\") at(compilation)") /* { dg-line inner_location } */
-#define outer(...) inner(__VA_ARGS__)
+#define outer(...) inner(__VA_ARGS__) /* { dg-line outer_location } */
 
 void
 f (void)
 {
-  const char *str = outer(inner(1,2));
-  /* { dg-warning "'pragma omp error' encountered: Test" "inner expansion" { target *-*-* } inner_location } */
+  const char *str = outer(inner(1,2)); /* { dg-line str_location } */
+  /* { dg-warning "35:'pragma omp error' encountered: Test" "" { target *-*-* } inner_location }
+ { dg-note "20:in expansion of macro 'inner'" "" { target *-*-* } outer_location }
+ { dg-note "21:in expansion of macro 'outer'" "" { target *-*-* } str_location } */
 }
 
 #if 0
diff --git a/gcc/testsuite/c-c++-common/gomp/pragma-5.c b/gcc/testsuite/c-c++-common/gomp/pragma-5.c
index 8124f701502..173c25e803a 100644
--- a/gcc/testsuite/c-c++-common/gomp/pragma-5.c
+++ b/gcc/testsuite/c-c++-common/gomp/pragma-5.c
@@ -2,13 +2,15 @@
 /* PR preprocessor/103165  */
 
 #define inner(...) #__VA_ARGS__ ; _Pragma   (	"   omp		error severity   (warning)	message (\"Test\") at(compilation)" ) /* { dg-line inner_location } */
-#define outer(...) inner(__VA_ARGS__)
+#define outer(...) inner(__VA_ARGS__) /* { dg-line outer_location } */
 
 void
 f (void)
 {
-  const char *str = outer(inner(1,2));
-  /* { dg-warning "'pragma omp error' encountered: Test" "inner expansion" { target *-*-* } inner_location } */
+  const char *str = outer(inner(1,2)); /* { dg-line str_location } */
+  /* { dg-warning "35:'pragma omp error' encountered: Test" "" { target *-*-* } inner_location }
+ { dg-note "20:in expansion of macro 'inner'" "" { target *-*-* } outer_location }
+ { dg-note "21:in expansion of macro 'outer'" "" { target *-*-* } str_location } */
 }
 
 #if 0
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/reduction-5.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/reduction-5.c
index 16aa0dd4ac1..72094609f0f 100644
--- a/libgomp/testsuite/libgomp.oacc-c-c++-common/reduction-5.c
+++ b/libgomp/testsuite/libgomp.

RE: [PATCH] Move reload_completed and other rtl.h globals to crtl structure.

2022-07-11 Thread Richard Biener via Gcc-patches
On Mon, 11 Jul 2022, Roger Sayle wrote:

> On 11 July 2022 08:20, Richard Biener wrote:
> > On Sun, 10 Jul 2022, Roger Sayle wrote:
> > 
> > > This patch builds upon Richard Biener's suggestion of avoiding global
> > > variables to track state/identify which passes have already been run.
> > > In the early middle-end, the tree-ssa passes use the curr_properties
> > > field in cfun to track this.  This patch uses a new rtl_pass_progress
> > > int field in crtl to do something similar.
> > 
> > Why not simply add PROP_rtl_... and use the existing curr_properties for
> this?
> > RTL passes are also passes and this has the advantage you can add things
> like
> > reload_completed to the passes properties_set field hand have the flag
> setting
> > handled by the pass manager as it was intended?
> > 
> 
> Great question, and I did initially consider simply adding more RTL fields
> to
> curr_properties.  My hesitation was from the comments/documentation that
> the curr_properties field is used by the pass manager as a way to track
> (and verify) the properties/invariants that are required, provided and
> destroyed
> by each pass.  This semantically makes sense for properties such as accurate
> data flow, ssa form, cfg_layout, nonzero_bits etc, where hypothetically the
> pass manager can dynamically schedule a pass/analysis to ensure the next
> pass
> has the pre-requisite information it needs.

Yeah, that might have been some of the design inspiring bits but
actually the pass manager does nothing of this sorts instead it
only verifies the statically scheduled producers/consumers match
expectations ...

We also track in these bits how the IL evolves from high to low
GIMPLE, to CFG/SSA, to after the point where complex or vector ops
are lowered, etc.  To me whether we've assigned hard regs (after-reload)
is quite similar, the after_combine would be similar to
PROP_gimple_opt_math.  You've already found PROP_rtl_split_insns.

> This seems semantically slightly different from tracking time/progress,
> where
> we really want something more like DEBUG_COUNTER that simply provides
> the "tick-tock" of a pass clock.  Alas, the "pass number", as used in the
> suffix
> of dump-files (where 302 currently means reload) isn't particularly useful
> as
> these change/evolve continually.
> 
> Perhaps the most obvious indication of this (subtle) difference is the
> curr_properties field (PROP_rtl_split_insns) which tracks whether
> instructions
> have been split, where at a finer granularity rtl_pass_progress may wish to
> distinguish split1 (after combine before reload), split2 (after reload
> before
> peephole2) and split3 (after peephole2).  It's conceptually not a simple
> property, whether all insns have been split or not, as in practice this is
> more subtle with backends choosing which instructions get split at which
> "times".

I'm not sure it's good to allow such fine-grained control since it
makes the pass pipeline x N-target.md a quite fragile setup.  You
could argue you want such flag on the actual RTL insn
(was-split-from-insn-X) even ...

> There's also the concern that we've a large number of passes (currently
> 62 RTL passes), and only a finite number of bits (in curr_properties), so
> having two integers reduces the risk of running out of bits and needing
> to use a "wider" data structure.

As said whether an individual pass has run or not is not what this
bitmask is for nor is it what we should do.  We should provide a more
abstract thing to test for and I doubt we can make one for each of the
62 RTL passes ;)

> To be honest, I just didn't want to hijack curr_properties to abuse it for a
> 
> use that didn't quite match the original intention, without checking with
> you and the other maintainers first.  If the above reasoning isn't
> convincing,
> I can try spinning an alternate patch using curr_properties (but I'd expect
> even more churn as backend source files would now need to #include
> tree-passes.h and function.h to get reload_completed).  And of course,
> a volunteer is welcome to contribute that re-refactoring after this one.
>
> I've no strong feelings either way.  It was an almost arbitrary engineering
> decision (but hopefully the above explains the balance of my reasoning).

I definitely lean towards using curr_properties and new PROP_rtl_...,
but if anybody else has opposite views I won't insist.

+  /* Track progress of RTL passes, reload_completed etc.  */
+  int rtl_pass_progress;

btw, this suggests that we don't need a bitmask but instead can define
a strong order, making the tests with a relational compare?  The
reason we do not have this with PROP_ is twofold, one, we're mixing
with flags that come and go away, second, some of the "order" stuff
might depend on the optimization level since we have separate
pipelines for -O0 and -O1 (and -Og) and whether for example complex
or vectors are lowered might happen in different order (I didn't
actually check whether that's the case right now).  It 

Re: [PATCH 3/3] lto-plugin: implement LDPT_GET_API_VERSION

2022-07-11 Thread Richard Biener via Gcc-patches
On Mon, Jul 11, 2022 at 11:16 AM Alexander Monakov  wrote:
>
> On Mon, 11 Jul 2022, Rui Ueyama wrote:
>
> > > but ignoring min_api_supported is wrong, and assuming max_api_supported > > > > 0
> > > is also wrong. It really should check how given [min; max] range 
> > > intersects
> > > with its own range of supported versions.
> >
> > Currently only one version is defined which is LAPI_V1. I don't think
> > LAPI_UNSPECIFIED is a version number; rather, it's an unspecified
> > value. No ordering should be defined between a defined value and an
> > unspecified value. If LAPI_UNSPECIFIED < LAPI_V1, it should be renamed
> > LAPI_V0.
>
> You still cannot rely on API guarantees of LAPI_V1 when the plugin does not
> advertise it (thread safety of claim_file in this particular case).

So with LAPI_UNSPECIFIED all the plugin gets is the linker name and version.
Clarifying the documentation on LAPI_UNSPECIFIED might be nice, also
what the expectation is on the linker when the plugin returns
LAPI_UNSPECIFIED when it speficied minimal_api_supported == V1.
"minimal_api_supported == LAPI_UNSPECIFIED" does not make much
sense if using Ruis reading of this value?

> And you still should check the intersection of supported API ranges
> and give a sane diagnostic when min_api_supported advertised by the plugin
> exceeds LAPI_V1 (though, granted, the plugin could error out as well in this
> case).
>
> Alexander


Re: [PATCH] Introduce hardbool attribute for C

2022-07-11 Thread Richard Biener via Gcc-patches
On Fri, Jul 8, 2022 at 5:29 PM Alexandre Oliva  wrote:
>
> On Jul  8, 2022, Richard Biener  wrote:
>
> > Does this follow some other compilers / language?
>
> It is analogous to Ada's booleans with representation clauses and
> runtime validation checking at use points.
>
> > Is such feature used in existing code?
>
> Not that I know.  The attribute name was my choice.
>
> That said, we have already delivered the experimental implementation to
> the customer who requested it (GCC was in stage3, thus the delayed
> submission), so by now they may already have some code using it.
>
> > Why is it useful to allow arbitrary values for true/false?
>
> Increasing the hamming distance between legitimate values is desirable
> to catch hardware-based attacks, but booleans are probably the only
> builtin type that has room for that.
>
> > Why is the default 0 and ~0 rather than 0 and 1 as for _Bool?
>
> My understanding is that the goal is to maximize the hamming distance
> between the legitimate values, so as to increase the sensibility to
> errors.
>
> There was no requirement for defaults to be these values, however.  The
> examples often used 0x5a and 0xa5, but those seemed too arbitrary to be
> defaults.  I found ~1 and 1 to be too nasty, so I went for 0 and ~0,
> that are still recognizable as false and true values, respectively,
> though I'm not sure whether this is advantageous.
>
> >> +@smallexample
> >> +typedef char __attribute__ ((__hardbool__ (0x5a))) hbool;
> >> +hbool first = 0;   /* False, stored as (char)0x5a.  */
> >> +hbool second = !first; /* True, stored as ~(char)0x5a.  */
> > hbool thrid;
>
> > what's the initial value of 'third'?
>
> If it's an automatic variable, it's uninitialized, as expected for C.
> It might by chance happen to hold one of the legitimate values, but odds
> are it doesn't, and if so, accessing it will trap.
>
> If it's a static-storage variable, it will be zero-initialized as
> expected, but the zero will be mapped to the representation for false.
>
> > The documentation should probably be explicit about this case.
>
> Agreed, thanks, will do.
>
> > If the underlying representation is an Enum, why not have
> > hardened_enum instead?
>
> In Ada, Booleans are enumerator types with various conventions and
> builtin operations, with or without a representation clause, that sets
> the representation values for the enumerators.  Other enumerations
> aren't subject to such conventions as automatic conversions between
> Boolean types with different representations, that enable all of them to
> be used interchangeably (source-wise) in logical expressions.
>
> It would nevertheless be feasible to implement hardened enumerator
> types, that, like Ada, perform runtime validation checking that the
> stored value corresponds to one of the enumerators.  This would not fit
> some common use cases of enumerator types, e.g. those in which
> enumerators define bits or masks, and different enumerators are combined
> into a single variable.  This was not the feature that we were asked to
> implement, though.
>
> > A hardened _Bool might want to have a special NaT value as well I
> > guess?
>
> That might sound appealing, but ISTM that it would instead break the
> symmetry of the maximal hamming distance between the representation
> values for true and false.  OTOH, NaB, if so desired, would be just any
> other value; the challenge would be to get such a value stored in a
> variable, given that actual booleans can only hold true (nonzero) or
> false (zero), and neither would convert to NaB.

Thanks for all the detailed answers - I do see limited use of this
feature (the method and robustness only works for _Bool, not other
types which is rather limiting), but it's sound.

Note the actual change should be reviewed by frontend maintainers.

I suppose a C++ hardbool would be implemented as a (standard library)
class instead.

Thanks,
Richard.

>
> --
> Alexandre Oliva, happy hackerhttps://FSFLA.org/blogs/lxo/
>Free Software Activist   GNU Toolchain Engineer
> Disinformation flourishes because many people care deeply about injustice
> but very few check the facts.  Ask me about 


[PATCH] Add condition coverage profiling

2022-07-11 Thread Jørgen Kvalsvik via Gcc-patches
This patch adds support in gcc+gcov for modified condition/decision
coverage (MC/DC) with the -fprofile-conditions flag. MC/DC is a type of
test/code coverage and it is particularly important in the avation and
automotive industries for safety-critical applications. MC/DC it is
required for or recommended by:

* DO-178C for the most critical software (Level A) in avionics
* IEC 61508 for SIL 4
* ISO 26262-6 for ASIL D

>From the SQLite webpage:

Two methods of measuring test coverage were described above:
"statement" and "branch" coverage. There are many other test
coverage metrics besides these two. Another popular metric is
"Modified Condition/Decision Coverage" or MC/DC. Wikipedia defines
MC/DC as follows:

* Each decision tries every possible outcome.
* Each condition in a decision takes on every possible outcome.
* Each entry and exit point is invoked.
* Each condition in a decision is shown to independently affect
  the outcome of the decision.

In the C programming language where && and || are "short-circuit"
operators, MC/DC and branch coverage are very nearly the same thing.
The primary difference is in boolean vector tests. One can test for
any of several bits in bit-vector and still obtain 100% branch test
coverage even though the second element of MC/DC - the requirement
that each condition in a decision take on every possible outcome -
might not be satisfied.

https://sqlite.org/testing.html#mcdc

Wahlen, Heimdahl, and De Silva "Efficient Test Coverage Measurement for
MC/DC" describes an algorithm for adding instrumentation by carrying
over information from the AST, but my algorithm analyses the the control
flow graph to instrument for coverage. This has the benefit of being
programming language independent and faithful to compiler decisions
and transformations, although I have only tested it on constructs in C
and C++, see testsuite/gcc.misc-tests and testsuite/g++.dg.

Like Wahlen et al this implementation records coverage in fixed-size
bitsets which gcov knows how to interpret. This is very fast, but
introduces a limit on the number of terms in a single boolean
expression, the number of bits in a gcov_unsigned_type (which is
typedef'd to uint64_t), so for most practical purposes this would be
acceptable. This limitation is in the implementation and not the
algorithm, so support for more conditions can be added by also
introducing arbitrary-sized bitsets.

For space overhead, the instrumentation needs two accumulators
(gcov_unsigned_type) per condition in the program which will be written
to the gcov file. In addition, every function gets a pair of local
accumulators, but these accmulators are reused between conditions in the
same function.

For time overhead, there is a zeroing of the local accumulators for
every condition and one or two bitwise operation on every edge taken in
the an expression.

In action it looks pretty similar to the branch coverage. The -g short
opt carries no significance, but was chosen because it was an available
option with the upper-case free too.

gcov --conditions:

3:   17:void fn (int a, int b, int c, int d) {
3:   18:if ((a && (b || c)) && d)
conditions covered 3/8
condition  0 not covered (true)
condition  0 not covered (false)
condition  1 not covered (true)
condition  2 not covered (true)
condition  3 not covered (true)
1:   19:x = 1;
-:   20:else
2:   21:x = 2;
3:   22:}

gcov --conditions --json-format:

"conditions": [
{
"not_covered_false": [
0
],
"count": 8,
"covered": 3,
"not_covered_true": [
0,
1,
2,
3
]
}
],

Some expressions, mostly those without else-blocks, are effectively
"rewritten" in the CFG construction making the algorithm unable to
distinguish them:

and.c:

if (a && b && c)
x = 1;

ifs.c:

if (a)
if (b)
if (c)
x = 1;

gcc will build the same graph for both these programs, and gcov will
report boths as 3-term expressions. It is vital that it is not
interpreted the other way around (which is consistent with the shape of
the graph) because otherwise the masking would be wrong for the and.c
program which is a more severe error. While surprising, users would
probably expect some minor rewriting of semantically-identical
expressions.

and.c.gcov:
#:2:if (a && b && c)
conditions covered 6/6
#:3:x = 1;

ifs.c.gcov:
#:2:if (a)
#:3:if (b)
#:4:if (c)
#:5:x = 1;
conditions covered 6/6

Adding else clauses alters the program (ifs.c can have 3 elses, and.c
only 1) and coverage becomes less surprising

ifs.c.gcov:
#:2:

[PATCH] tree-optimization/106228 - fixup last change

2022-07-11 Thread Richard Biener via Gcc-patches
The following fixes the last commit to honor the case we are not
vectorizing a loop.

Bootstrapped on x86_64-unknown-linux-gnu, pushed.

PR tree-optimization/106228
* tree-vect-data-refs.cc (vect_setup_realignment): Adjust
VUSE compute for the non-loop case.
---
 gcc/tree-vect-data-refs.cc | 14 +++---
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/gcc/tree-vect-data-refs.cc b/gcc/tree-vect-data-refs.cc
index 53e52cb58cb..609cacc4971 100644
--- a/gcc/tree-vect-data-refs.cc
+++ b/gcc/tree-vect-data-refs.cc
@@ -5777,14 +5777,14 @@ vect_setup_realignment (vec_info *vinfo, stmt_vec_info 
stmt_info,
   if (at_loop)
 *at_loop = loop_for_initial_load;
 
+  tree vuse = NULL_TREE;
   if (loop_for_initial_load)
-pe = loop_preheader_edge (loop_for_initial_load);
-
-  tree vuse;
-  gphi *vphi = get_virtual_phi (loop_for_initial_load->header);
-  if (vphi)
-vuse = PHI_ARG_DEF_FROM_EDGE (vphi, pe);
-  else
+{
+  pe = loop_preheader_edge (loop_for_initial_load);
+  if (gphi *vphi = get_virtual_phi (loop_for_initial_load->header))
+   vuse = PHI_ARG_DEF_FROM_EDGE (vphi, pe);
+}
+  if (!vuse)
 vuse = gimple_vuse (gsi_stmt (*gsi));
 
   /* 3. For the case of the optimized realignment, create the first vector
-- 
2.35.3


Re: [PATCH] c++: Add __reference_con{struc, ver}ts_from_temporary [PR104477]

2022-07-11 Thread Jonathan Wakely via Gcc-patches
On Fri, 8 Jul 2022 at 18:41, Marek Polacek wrote:
> The patch also adds the relevant class and variable templates to 
> .


+  template
+struct reference_constructs_from_temp
orary
+: public __bool_constant<__reference_constructs_from_temporary(_Tp, _Up)>

This can use bool_constant, as that was added in C++17.
__bool_constant is the internal equivalent for pre-C++17.

The library parts are fine with that change, thanks for implementing this!



Re: [PATCH 3/3] lto-plugin: implement LDPT_GET_API_VERSION

2022-07-11 Thread Martin Liška
On 7/11/22 11:55, Richard Biener wrote:
> On Mon, Jul 11, 2022 at 11:16 AM Alexander Monakov  wrote:
>>
>> On Mon, 11 Jul 2022, Rui Ueyama wrote:
>>
 but ignoring min_api_supported is wrong, and assuming max_api_supported > 0
 is also wrong. It really should check how given [min; max] range intersects
 with its own range of supported versions.
>>>
>>> Currently only one version is defined which is LAPI_V1. I don't think
>>> LAPI_UNSPECIFIED is a version number; rather, it's an unspecified
>>> value. No ordering should be defined between a defined value and an
>>> unspecified value. If LAPI_UNSPECIFIED < LAPI_V1, it should be renamed
>>> LAPI_V0.
>>
>> You still cannot rely on API guarantees of LAPI_V1 when the plugin does not
>> advertise it (thread safety of claim_file in this particular case).
> 

Hi.

All right, I think we should rename LAPI_UNSPECIFIED to LAPI_V0 in order
to support minimal_api_supported == LAPI_V0.

> So with LAPI_UNSPECIFIED all the plugin gets is the linker name and version.
> Clarifying the documentation on LAPI_UNSPECIFIED might be nice, also
> what the expectation is on the linker when the plugin returns
> LAPI_UNSPECIFIED when it speficied minimal_api_supported == V1.

I've clarified that linker should return a value that is in range
[minimal_api_supported, maximal_api_supported] and added an abort
if it's not the case.

Having that, mold should respect if maximal_api_supported == LAPI_V0 is returned
by a plug-in (happens now as we miss locking for some targets).

Martin

> "minimal_api_supported == LAPI_UNSPECIFIED" does not make much
> sense if using Ruis reading of this value?
> 
>> And you still should check the intersection of supported API ranges
>> and give a sane diagnostic when min_api_supported advertised by the plugin
>> exceeds LAPI_V1 (though, granted, the plugin could error out as well in this
>> case).
>>
>> Alexander
From 7905b58c062774deffcd3d5adfa893e17bcc0f26 Mon Sep 17 00:00:00 2001
From: Martin Liska 
Date: Mon, 16 May 2022 14:01:52 +0200
Subject: [PATCH] lto-plugin: implement LDPT_GET_API_VERSION

include/ChangeLog:

	* plugin-api.h (enum linker_api_version): New enum.
	(ld_plugin_get_api_version): New.
	(enum ld_plugin_tag): Add LDPT_GET_API_VERSION.
	(struct ld_plugin_tv): Add tv_get_api_version.

lto-plugin/ChangeLog:

	* lto-plugin.c (negotiate_api_version): New.
	(onload): Negotiate API version.
	* Makefile.am: Add -DBASE_VERSION.
	* Makefile.in: Regenerate.
---
 include/plugin-api.h| 33 +
 lto-plugin/Makefile.am  |  2 +-
 lto-plugin/Makefile.in  |  2 +-
 lto-plugin/lto-plugin.c | 47 +
 4 files changed, 82 insertions(+), 2 deletions(-)

diff --git a/include/plugin-api.h b/include/plugin-api.h
index 8aebe2ff267..e4ed1d139bb 100644
--- a/include/plugin-api.h
+++ b/include/plugin-api.h
@@ -483,6 +483,37 @@ enum ld_plugin_level
   LDPL_FATAL
 };
 
+/* Contract between a plug-in and a linker.  */
+
+enum linker_api_version
+{
+   /* The linker/plugin do not implement any of the API levels below, the API
+   is determined solely via the transfer vector.  */
+   LAPI_V0,
+
+   /* API level v1.  The linker provides get_symbols_v3, add_symbols_v2,
+  the plugin will use that and not any lower versions.
+  claim_file is thread-safe on the plugin side and
+  add_symbols on the linker side.  */
+   LAPI_V1
+};
+
+/* The linker's interface for API version negotiation.  A plug-in calls
+  the function (with its IDENTIFIER and VERSION), plus minimal and maximal
+  version of linker_api_version is provided.  Linker then returns selected
+  API version and provides its IDENTIFIER and VERSION.  The returned value
+  by linker must be in range [MINIMAL_API_SUPPORTED, MAXIMAL_API_SUPPORTED].
+  Identifier pointers remain valid as long as the plugin is loaded.  */
+
+typedef
+enum linker_api_version
+(*ld_plugin_get_api_version) (const char *plugin_identifier,
+			  const char *plugin_version,
+			  enum linker_api_version minimal_api_supported,
+			  enum linker_api_version maximal_api_supported,
+			  const char **linker_identifier,
+			  const char **linker_version);
+
 /* Values for the tv_tag field of the transfer vector.  */
 
 enum ld_plugin_tag
@@ -521,6 +552,7 @@ enum ld_plugin_tag
   LDPT_REGISTER_NEW_INPUT_HOOK,
   LDPT_GET_WRAP_SYMBOLS,
   LDPT_ADD_SYMBOLS_V2,
+  LDPT_GET_API_VERSION,
 };
 
 /* The plugin transfer vector.  */
@@ -556,6 +588,7 @@ struct ld_plugin_tv
 ld_plugin_get_input_section_size tv_get_input_section_size;
 ld_plugin_register_new_input tv_register_new_input;
 ld_plugin_get_wrap_symbols tv_get_wrap_symbols;
+ld_plugin_get_api_version tv_get_api_version;
   } tv_u;
 };
 
diff --git a/lto-plugin/Makefile.am b/lto-plugin/Makefile.am
index 81362eafc36..482946e4dd5 100644
--- a/lto-plugin/Makefile.am
+++ b/lto-plugin/Makefile.am
@@ -8,7 +8,7 @@ target_noncanonical := @target_noncanonical@
 libexecsubdir := $(libexe

Re: Modula-2: merge followup (brief update on the progress of the new linking implementation)

2022-07-11 Thread Rainer Orth
Hi Gaius,

>> A very brief update to say that I've merged the new linking
>> implementation back onto the devel/modula-2 branch,
>
> unfortunately, that branch doesn't bootstrap for me anywere:
[...]
> * On sparc-sun-solaris2.11:
[...]
>   Seems like PR modula2/101392 raised its ugly head again, after it had
>   been gone for a while.  I'm currently trying sparcv9-sun-solaris2.11
>   instead to see how that fares.

as expacted the Solaris/sparcv9 bootstrap got into building the target
libs, then failing in the same way as on Solaris/x86 and Linux/x86_64.

Rainer

-- 
-
Rainer Orth, Center for Biotechnology, Bielefeld University


Re: [PATCH] Move reload_completed and other rtl.h globals to crtl structure.

2022-07-11 Thread Richard Sandiford via Gcc-patches
I know it'll seem like make-work, but could you put the combine flag
in a separate follow-on patch?  Reorganising the existing flags
(very welcome!) and adding new ones seem like different things.

TBH I'm a bit suspicious of the combine flag.  What fundamental
property holds true after combine that doesn't hold before it,
or vice versa?  There are other passes that do combine-like things,
such as fwprop and postreload.

The reason for asking is that the RA flags -- before RA, during RA,
after RA -- describe a clear lowering process.  Before RA pseudos
are freely used (and preferred where valid), and insn validity depends
only on C conditions and predicates.  After RA pseudos aren't allowed
and insn validity also depends on constraints.  During RA is a
half-way house.

cse_not_expected always felt like a bit of a hack, but I think the
principle was that, when cse_not_expected is true, you shouldn't
force complex operations to be split into simpler ones (with the
intention of promoting reuse of the simpler operations).  So the
purpose of the flag can be described in terms of what the .md file
should do, rather than being tied to the behaviour of a specific pass.

Sorry for being awkward.  It's just that...

"Roger Sayle"  writes:
> On 11 July 2022 08:20, Richard Biener wrote:
>> On Sun, 10 Jul 2022, Roger Sayle wrote:
>> 
>> > This patch builds upon Richard Biener's suggestion of avoiding global
>> > variables to track state/identify which passes have already been run.
>> > In the early middle-end, the tree-ssa passes use the curr_properties
>> > field in cfun to track this.  This patch uses a new rtl_pass_progress
>> > int field in crtl to do something similar.
>> 
>> Why not simply add PROP_rtl_... and use the existing curr_properties for
> this?
>> RTL passes are also passes and this has the advantage you can add things
> like
>> reload_completed to the passes properties_set field hand have the flag
> setting
>> handled by the pass manager as it was intended?
>> 
>
> Great question, and I did initially consider simply adding more RTL fields
> to
> curr_properties.  My hesitation was from the comments/documentation that
> the curr_properties field is used by the pass manager as a way to track
> (and verify) the properties/invariants that are required, provided and
> destroyed
> by each pass.  This semantically makes sense for properties such as accurate
> data flow, ssa form, cfg_layout, nonzero_bits etc, where hypothetically the
> pass manager can dynamically schedule a pass/analysis to ensure the next
> pass
> has the pre-requisite information it needs.
>
> This seems semantically slightly different from tracking time/progress,
> where
> we really want something more like DEBUG_COUNTER that simply provides
> the "tick-tock" of a pass clock.  Alas, the "pass number", as used in the
> suffix
> of dump-files (where 302 currently means reload) isn't particularly useful
> as
> these change/evolve continually.
>
> Perhaps the most obvious indication of this (subtle) difference is the
> curr_properties field (PROP_rtl_split_insns) which tracks whether
> instructions
> have been split, where at a finer granularity rtl_pass_progress may wish to
> distinguish split1 (after combine before reload), split2 (after reload
> before
> peephole2) and split3 (after peephole2).  It's conceptually not a simple
> property, whether all insns have been split or not, as in practice this is
> more subtle with backends choosing which instructions get split at which
> "times".

...this made it seem like adding the combine flag was a slippery slope
towards adding flags for potentially any pass.  I think we should avoid
exposing this (3-split) level of granularity if we can help it, since it
would make it harder to rejig the pipeline in future.

Where possible, it would be good to give target-independent code more
information about what the target needs, rather than give the target
more information about the inner workings of target-independent code.

That's also a reason why I agree with Richard that it would be better
to use property flags.  That might help to enforce the principle that
the flags should describe properties of the current IL, rather than
a list of what has and hasn't happened so far.

Thanks,
Richard

> There's also the concern that we've a large number of passes (currently
> 62 RTL passes), and only a finite number of bits (in curr_properties), so
> having two integers reduces the risk of running out of bits and needing
> to use a "wider" data structure.
>
> To be honest, I just didn't want to hijack curr_properties to abuse it for a
>
> use that didn't quite match the original intention, without checking with
> you and the other maintainers first.  If the above reasoning isn't
> convincing,
> I can try spinning an alternate patch using curr_properties (but I'd expect
> even more churn as backend source files would now need to #include
> tree-passes.h and function.h to get reload_completed).  And of course,
> a vo

Re: [PATCH] [RFC]Support vectorization for Complex type.

2022-07-11 Thread Richard Biener via Gcc-patches
On Mon, Jul 11, 2022 at 5:44 AM liuhongt  wrote:
>
> The patch only handles load/store(including ctor/permutation, except
> gather/scatter) for complex type, other operations don't needs to be
> handled since they will be lowered by pass cplxlower.(MASK_LOAD is not
> supported for complex type, so no need to handle either).

(*)

> Instead of support vector(2) _Complex double, this patch takes vector(4)
> double as vector type of _Complex double. Since vectorizer originally
> takes TYPE_VECTOR_SUBPARTS as nunits which is not true for complex
> type, the patch handles nunits/ncopies/vf specially for complex type.

For the limited set above(*) can you explain what's "special" about
vector(2) _Complex
vs. vector(4) double, thus why we need to have STMT_VINFO_COMPLEX_P at all?

I wonder to what extent your handling can be extended to support re-vectorizing
(with a higher VF for example) already vectorized code?  The vectorizer giving
up on vector(2) double looks quite obviously similar to it giving up
on _Complex double ...
It would be a shame to not use the same underlying mechanism for dealing with
both, where for the vector case obviously vector(4) would be supported as well.

In principle _Complex double operations should be two SLP lanes but it seems you
are handling them with classical interleaving as well?

Thanks,
Richard.

> Bootstrapped and regtested on x86_64-pc-linux-gnu{-m32,}.
> Also test the patch for SPEC2017 and find there's complex type vectorization
> in 510/549(but no performance impact).
>
> Any comments?
>
> gcc/ChangeLog:
>
> PR tree-optimization/106010
> * tree-vect-data-refs.cc (vect_get_data_access_cost):
> Pass complex_p to vect_get_num_copies to avoid ICE.
> (vect_analyze_data_refs): Support vectorization for Complex
> type with vector scalar types.
> * tree-vect-loop.cc (vect_determine_vf_for_stmt_1): VF should
> be half of TYPE_VECTOR_SUBPARTS when complex_p.
> * tree-vect-slp.cc (vect_record_max_nunits): nunits should be
> half of TYPE_VECTOR_SUBPARTS when complex_p.
> (vect_optimize_slp): Support permutation for complex type.
> (vect_slp_analyze_node_operations_1): Double nunits in
> vect_get_num_vectors to get right SLP_TREE_NUMBER_OF_VEC_STMTS
> when complex_p.
> (vect_slp_analyze_node_operations): Ditto.
> (vect_create_constant_vectors): Support CTOR for complex type.
> (vect_transform_slp_perm_load): Support permutation for
> complex type.
> * tree-vect-stmts.cc (vect_init_vector): Support complex type.
> (vect_get_vec_defs_for_operand): Get vector type for
> complex type.
> (vectorizable_store): Get right ncopies/nunits for complex
> type, also return false when complex_p and
> !TYPE_VECTOR_SUBPARTS.is_constant ().
> (vectorizable_load): Ditto.
> (vect_get_vector_types_for_stmt): Get vector type for complex type.
> * tree-vectorizer.h (STMT_VINFO_COMPLEX_P): New macro.
> (vect_get_num_copies): New overload.
>
> gcc/testsuite/ChangeLog:
>
> * gcc.target/i386/pr106010-1a.c: New test.
> * gcc.target/i386/pr106010-1b.c: New test.
> * gcc.target/i386/pr106010-1c.c: New test.
> * gcc.target/i386/pr106010-2a.c: New test.
> * gcc.target/i386/pr106010-2b.c: New test.
> * gcc.target/i386/pr106010-2c.c: New test.
> * gcc.target/i386/pr106010-3a.c: New test.
> * gcc.target/i386/pr106010-3b.c: New test.
> * gcc.target/i386/pr106010-3c.c: New test.
> * gcc.target/i386/pr106010-4a.c: New test.
> * gcc.target/i386/pr106010-4b.c: New test.
> * gcc.target/i386/pr106010-4c.c: New test.
> * gcc.target/i386/pr106010-5a.c: New test.
> * gcc.target/i386/pr106010-5b.c: New test.
> * gcc.target/i386/pr106010-5c.c: New test.
> * gcc.target/i386/pr106010-6a.c: New test.
> * gcc.target/i386/pr106010-6b.c: New test.
> * gcc.target/i386/pr106010-6c.c: New test.
> * gcc.target/i386/pr106010-7a.c: New test.
> * gcc.target/i386/pr106010-7b.c: New test.
> * gcc.target/i386/pr106010-7c.c: New test.
> * gcc.target/i386/pr106010-8a.c: New test.
> * gcc.target/i386/pr106010-8b.c: New test.
> * gcc.target/i386/pr106010-8c.c: New test.
> ---
>  gcc/testsuite/gcc.target/i386/pr106010-1a.c |  58 +++
>  gcc/testsuite/gcc.target/i386/pr106010-1b.c |  63 +++
>  gcc/testsuite/gcc.target/i386/pr106010-1c.c |  41 +
>  gcc/testsuite/gcc.target/i386/pr106010-2a.c |  82 +
>  gcc/testsuite/gcc.target/i386/pr106010-2b.c |  62 +++
>  gcc/testsuite/gcc.target/i386/pr106010-2c.c |  47 ++
>  gcc/testsuite/gcc.target/i386/pr106010-3a.c |  80 +
>  gcc/testsuite/gcc.target/i386/pr106010-3b.c | 126 ++
>  gcc/testsuite/gcc.target/i386/pr106010-3c.c |  69 
>  gcc/testsuite/gcc.tar

[PATCH] More update-ssa speedup

2022-07-11 Thread Richard Biener via Gcc-patches
When working on a smaller region like a loop version copy the main
time spent is now dominance fast query recompute which does a full
function DFS walk.  The dominance queries within the region of
interest should be O(log n) without fast queries and we should do
on the order of O(n) of them which overall means reasonable
complexity.

For the artificial testcase I'm looking at this shaves off
considerable time again.

Bootstrapped and tested on x86_64-unknown-linux-gnu, pushed.

* tree-into-ssa.cc (update_ssa): Do not forcefully
re-compute dominance fast queries for TODO_update_ssa_no_phi.
---
 gcc/tree-into-ssa.cc | 8 +---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/gcc/tree-into-ssa.cc b/gcc/tree-into-ssa.cc
index be71b629f97..d13fb720b37 100644
--- a/gcc/tree-into-ssa.cc
+++ b/gcc/tree-into-ssa.cc
@@ -3451,11 +3451,13 @@ update_ssa (unsigned update_flags)
 phis_to_rewrite.create (last_basic_block_for_fn (cfun) + 1);
   blocks_to_update = BITMAP_ALLOC (NULL);
 
-  /* Ensure that the dominance information is up-to-date.  */
-  calculate_dominance_info (CDI_DOMINATORS);
-
   insert_phi_p = (update_flags != TODO_update_ssa_no_phi);
 
+  /* Ensure that the dominance information is up-to-date and when we
+ are going to compute dominance frontiers fast queries are possible.  */
+  if (insert_phi_p || dom_info_state (CDI_DOMINATORS) == DOM_NONE)
+calculate_dominance_info (CDI_DOMINATORS);
+
   /* If there are names defined in the replacement table, prepare
  definition and use sites for all the names in NEW_SSA_NAMES and
  OLD_SSA_NAMES.  */
-- 
2.35.3


Re: [PATCH 3/3] lto-plugin: implement LDPT_GET_API_VERSION

2022-07-11 Thread Rui Ueyama via Gcc-patches
I updated my patch to support the proposed API:
https://github.com/rui314/mold/commit/22bbfa9bba9beeaf40b76481d175939ee2c62ec8

Martin,

I think you want to apply this patch. Currently, your API always
passes LAPI_V0 as the maximum API version.

diff --git a/lto-plugin/lto-plugin.c b/lto-plugin/lto-plugin.c
index e9afd2fb76d..c97bda9de91 100644
--- a/lto-plugin/lto-plugin.c
+++ b/lto-plugin/lto-plugin.c
@@ -1441,15 +1441,15 @@ negotiate_api_version (void)
   const char *linker_version;

   enum linker_api_version supported_api = LAPI_V0;
 #if HAVE_PTHREAD_LOCKING
   supported_api = LAPI_V1;
 #endif

-  api_version = get_api_version ("GCC", BASE_VERSION, LAPI_V0,
+  api_version = get_api_version ("GCC", BASE_VERSION, LAPI_V1,
 supported_api, &linker_identifier,
&linker_version);
   if (api_version > supported_api)
 {
   fprintf (stderr, "requested an unsupported API version (%d)\n",
api_version);
   abort ();
 }

On Mon, Jul 11, 2022 at 6:51 PM Martin Liška  wrote:
>
> On 7/11/22 11:55, Richard Biener wrote:
> > On Mon, Jul 11, 2022 at 11:16 AM Alexander Monakov  
> > wrote:
> >>
> >> On Mon, 11 Jul 2022, Rui Ueyama wrote:
> >>
>  but ignoring min_api_supported is wrong, and assuming max_api_supported 
>  > 0
>  is also wrong. It really should check how given [min; max] range 
>  intersects
>  with its own range of supported versions.
> >>>
> >>> Currently only one version is defined which is LAPI_V1. I don't think
> >>> LAPI_UNSPECIFIED is a version number; rather, it's an unspecified
> >>> value. No ordering should be defined between a defined value and an
> >>> unspecified value. If LAPI_UNSPECIFIED < LAPI_V1, it should be renamed
> >>> LAPI_V0.
> >>
> >> You still cannot rely on API guarantees of LAPI_V1 when the plugin does not
> >> advertise it (thread safety of claim_file in this particular case).
> >
>
> Hi.
>
> All right, I think we should rename LAPI_UNSPECIFIED to LAPI_V0 in order
> to support minimal_api_supported == LAPI_V0.
>
> > So with LAPI_UNSPECIFIED all the plugin gets is the linker name and version.
> > Clarifying the documentation on LAPI_UNSPECIFIED might be nice, also
> > what the expectation is on the linker when the plugin returns
> > LAPI_UNSPECIFIED when it speficied minimal_api_supported == V1.
>
> I've clarified that linker should return a value that is in range
> [minimal_api_supported, maximal_api_supported] and added an abort
> if it's not the case.
>
> Having that, mold should respect if maximal_api_supported == LAPI_V0 is 
> returned
> by a plug-in (happens now as we miss locking for some targets).
>
> Martin
>
> > "minimal_api_supported == LAPI_UNSPECIFIED" does not make much
> > sense if using Ruis reading of this value?
> >
> >> And you still should check the intersection of supported API ranges
> >> and give a sane diagnostic when min_api_supported advertised by the plugin
> >> exceeds LAPI_V1 (though, granted, the plugin could error out as well in 
> >> this
> >> case).
> >>
> >> Alexander


Re: [PATCH 3/3] lto-plugin: implement LDPT_GET_API_VERSION

2022-07-11 Thread Alexander Monakov via Gcc-patches


On Mon, 11 Jul 2022, Rui Ueyama wrote:

> I updated my patch to support the proposed API:
> https://github.com/rui314/mold/commit/22bbfa9bba9beeaf40b76481d175939ee2c62ec8

This still seems to ignore the thread safety aspect.

Alexander


Re: [PATCH 3/3] lto-plugin: implement LDPT_GET_API_VERSION

2022-07-11 Thread Martin Liška
On 7/11/22 14:24, Rui Ueyama wrote:
> I updated my patch to support the proposed API:
> https://github.com/rui314/mold/commit/22bbfa9bba9beeaf40b76481d175939ee2c62ec8
> 
> Martin,
> 
> I think you want to apply this patch. Currently, your API always
> passes LAPI_V0 as the maximum API version.

Are you sure?

The function signature is:

(*ld_plugin_get_api_version) (const char *plugin_identifier,
 const char *plugin_version,
 enum linker_api_version minimal_api_supported,
 enum linker_api_version maximal_api_supported,
...

Which means the plug-in always set minimal as LAPI_V0 and maximal
LAPI_V0/V1. That seems correct to me.

Martin


> 
> diff --git a/lto-plugin/lto-plugin.c b/lto-plugin/lto-plugin.c
> index e9afd2fb76d..c97bda9de91 100644
> --- a/lto-plugin/lto-plugin.c
> +++ b/lto-plugin/lto-plugin.c
> @@ -1441,15 +1441,15 @@ negotiate_api_version (void)
>const char *linker_version;
> 
>enum linker_api_version supported_api = LAPI_V0;
>  #if HAVE_PTHREAD_LOCKING
>supported_api = LAPI_V1;
>  #endif
> 
> -  api_version = get_api_version ("GCC", BASE_VERSION, LAPI_V0,
> +  api_version = get_api_version ("GCC", BASE_VERSION, LAPI_V1,
>  supported_api, &linker_identifier,
> &linker_version);
>if (api_version > supported_api)
>  {
>fprintf (stderr, "requested an unsupported API version (%d)\n",
> api_version);
>abort ();
>  }
> 
> On Mon, Jul 11, 2022 at 6:51 PM Martin Liška  wrote:
>>
>> On 7/11/22 11:55, Richard Biener wrote:
>>> On Mon, Jul 11, 2022 at 11:16 AM Alexander Monakov  
>>> wrote:

 On Mon, 11 Jul 2022, Rui Ueyama wrote:

>> but ignoring min_api_supported is wrong, and assuming max_api_supported 
>> > 0
>> is also wrong. It really should check how given [min; max] range 
>> intersects
>> with its own range of supported versions.
>
> Currently only one version is defined which is LAPI_V1. I don't think
> LAPI_UNSPECIFIED is a version number; rather, it's an unspecified
> value. No ordering should be defined between a defined value and an
> unspecified value. If LAPI_UNSPECIFIED < LAPI_V1, it should be renamed
> LAPI_V0.

 You still cannot rely on API guarantees of LAPI_V1 when the plugin does not
 advertise it (thread safety of claim_file in this particular case).
>>>
>>
>> Hi.
>>
>> All right, I think we should rename LAPI_UNSPECIFIED to LAPI_V0 in order
>> to support minimal_api_supported == LAPI_V0.
>>
>>> So with LAPI_UNSPECIFIED all the plugin gets is the linker name and version.
>>> Clarifying the documentation on LAPI_UNSPECIFIED might be nice, also
>>> what the expectation is on the linker when the plugin returns
>>> LAPI_UNSPECIFIED when it speficied minimal_api_supported == V1.
>>
>> I've clarified that linker should return a value that is in range
>> [minimal_api_supported, maximal_api_supported] and added an abort
>> if it's not the case.
>>
>> Having that, mold should respect if maximal_api_supported == LAPI_V0 is 
>> returned
>> by a plug-in (happens now as we miss locking for some targets).
>>
>> Martin
>>
>>> "minimal_api_supported == LAPI_UNSPECIFIED" does not make much
>>> sense if using Ruis reading of this value?
>>>
 And you still should check the intersection of supported API ranges
 and give a sane diagnostic when min_api_supported advertised by the plugin
 exceeds LAPI_V1 (though, granted, the plugin could error out as well in 
 this
 case).

 Alexander



[PATCH] c++: dependence of constrained memfn from current inst [PR105842]

2022-07-11 Thread Patrick Palka via Gcc-patches
Here we incorrectly deem the calls to func1, func2 and tmpl2 as
ambiguous ahead of time ultimately because we mishandle dependence
of a constrained member function from the current instantiation.

In type_dependent_expression_p, we consider the dependence of a
TEMPLATE_DECL's constraints (via uses_outer_template_parms), but
neglect to do the same for a FUNCTION_DECL such as func1.

And in satisfy_declaration_constraints, we give up if _any_ template
argument is dependent, but for non-dependent member functions from
the current instantiation such as func2 and tmpl2, we can and must
check constraints as long as the innermost arguments aren't dependent.

Tested on x86_64-pc-linux-gnu, does this look OK for trunk/12?

PR c++/105842

gcc/cp/ChangeLog:

* constraint.cc (satisfy_declaration_constraints): Refine
early exit test for argument dependence.
* cp-tree.h (uses_outer_template_parms_in_constraints): Declare.
* pt.cc (template_class_depth): Handle TI_TEMPLATE being a
FIELD_DECL.
(usse_outer_template_parms): Factor out constraint dependence
check to ...
(uses_outer_template_parms_in_constraints): ... here.
(type_dependent_expression_p): Use it for FUNCTION_DECL.

gcc/testsuite/ChangeLog:

* g++.dg/cpp2a/concepts-memtmpl6.C: New test.
---
 gcc/cp/constraint.cc  | 20 +++
 gcc/cp/cp-tree.h  |  1 +
 gcc/cp/pt.cc  | 34 ---
 .../g++.dg/cpp2a/concepts-memtmpl6.C  | 34 +++
 4 files changed, 79 insertions(+), 10 deletions(-)
 create mode 100644 gcc/testsuite/g++.dg/cpp2a/concepts-memtmpl6.C

diff --git a/gcc/cp/constraint.cc b/gcc/cp/constraint.cc
index 591155cee22..99b97d24eae 100644
--- a/gcc/cp/constraint.cc
+++ b/gcc/cp/constraint.cc
@@ -3176,9 +3176,13 @@ satisfy_declaration_constraints (tree t, sat_info info)
args = regen_args;
 }
 
-  /* If any arguments depend on template parameters, we can't
- check constraints. Pretend they're satisfied for now.  */
-  if (uses_template_parms (args))
+  /* If the innermost arguments are dependent, or if the outer arguments
+ are dependent and are needed by the constraints, we can't check
+ satisfaction yet so pretend they're satisfied for now.  */
+  if (uses_template_parms (args)
+  && (TMPL_ARGS_DEPTH (args) == 1
+ || uses_template_parms (INNERMOST_TEMPLATE_ARGS (args))
+ || uses_outer_template_parms_in_constraints (t)))
 return boolean_true_node;
 
   /* Get the normalized constraints.  */
@@ -3240,9 +3244,13 @@ satisfy_declaration_constraints (tree t, tree args, 
sat_info info)
   else
 args = add_outermost_template_args (t, args);
 
-  /* If any arguments depend on template parameters, we can't
- check constraints. Pretend they're satisfied for now.  */
-  if (uses_template_parms (args))
+  /* If the innermost arguments are dependent, or if the outer arguments
+ are dependent and are needed by the constraints, we can't check
+ satisfaction yet so pretend they're satisfied for now.  */
+  if (uses_template_parms (args)
+  && (TMPL_ARGS_DEPTH (args) == 1
+ || uses_template_parms (INNERMOST_TEMPLATE_ARGS (args))
+ || uses_outer_template_parms_in_constraints (t)))
 return boolean_true_node;
 
   tree result = boolean_true_node;
diff --git a/gcc/cp/cp-tree.h b/gcc/cp/cp-tree.h
index 2fde4f83b41..bec98aa2ac3 100644
--- a/gcc/cp/cp-tree.h
+++ b/gcc/cp/cp-tree.h
@@ -7297,6 +7297,7 @@ extern tree lookup_template_function  (tree, 
tree);
 extern tree lookup_template_variable   (tree, tree);
 extern bool uses_template_parms(tree);
 extern bool uses_template_parms_level  (tree, int);
+extern bool uses_outer_template_parms_in_constraints (tree);
 extern bool in_template_function   (void);
 extern bool need_generic_capture   (void);
 extern tree instantiate_class_template (tree);
diff --git a/gcc/cp/pt.cc b/gcc/cp/pt.cc
index 59ee50c152d..de5d3a5cd78 100644
--- a/gcc/cp/pt.cc
+++ b/gcc/cp/pt.cc
@@ -391,7 +391,9 @@ template_class_depth (tree type)
 {
   tree tinfo = get_template_info (type);
 
-  if (tinfo && PRIMARY_TEMPLATE_P (TI_TEMPLATE (tinfo))
+  if (tinfo
+ && TREE_CODE (TI_TEMPLATE (tinfo)) == TEMPLATE_DECL
+ && PRIMARY_TEMPLATE_P (TI_TEMPLATE (tinfo))
  && uses_template_parms (INNERMOST_TEMPLATE_ARGS (TI_ARGS (tinfo
++depth;
 
@@ -11011,7 +11013,7 @@ uses_template_parms_level (tree t, int level)
 /* Returns true if the signature of DECL depends on any template parameter from
its enclosing class.  */
 
-bool
+static bool
 uses_outer_template_parms (tree decl)
 {
   int depth = template_class_depth (CP_DECL_CONTEXT (decl));
@@ -11042,11 +11044,27 @@ uses_outer_template_parms (tree decl)
return true;
}
 }
+  if (us

Re: [pushed 1/2] Convert label_text to C++11 move semantics

2022-07-11 Thread Martin Liška
On 7/7/22 22:00, David Malcolm via Gcc-patches wrote:
> |libcpp's class label_text stores a char * for a string and a flag saying 
> whether it owns the buffer. I added this class before we could use C++11, and 
> so to avoid lots of copying it required an explicit call to 
> label_text::maybe_free to potentially free the buffer. Now that we can use 
> C++11, this patch removes label_text::maybe_free in favor of doing the 
> cleanup in the destructor, and using C++ move semantics to avoid any copying. 
> This allows lots of messy cleanup code to be eliminated in favor of implicit 
> destruction (mostly in the analyzer). No functional change intended.|

Hi.

I've just noticed Clang complains about the change:

/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang/build/libcpp/include/line-map.h:1876:12:
 warning: moving a temporary object prevents copy elision [-Wpessimizing-move]
/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang/build/libcpp/include/line-map.h:1882:12:
 warning: moving a temporary object prevents copy elision [-Wpessimizing-move]

Cheers,
Martin


[COMMITTED] c-family: Fix option check in handle_pragma_diagnostic [PR106252]

2022-07-11 Thread Lewis Hyatt via Gcc-patches
Hello-

PR106252 notes an error found by the address sanitizer from r13-1544. This was
my mistake during refactoring of handle_pragma_diagnostic(), the check
for the option arg was moved earlier than it should be. Committed the obvious
fix putting this back where it was before.

-Lewis
c-family: Fix option check in handle_pragma_diagnostic [PR106252]

In r13-1544, handle_pragma_diagnostic was refactored to support processing
early pragmas. During that process the part looking up option arguments was
inadvertenly moved too early, prior to checking the option was valid, causing
PR106252. Fixed by moving the check back where it goes.

gcc/c-family/ChangeLog:

PR preprocessor/106252
* c-pragma.cc (handle_pragma_diagnostic_impl): Don't look up the
option argument prior to verifying the option was found.

diff --git a/gcc/c-family/c-pragma.cc b/gcc/c-family/c-pragma.cc
index 62bce2ed0f5..789719e6e6a 100644
--- a/gcc/c-family/c-pragma.cc
+++ b/gcc/c-family/c-pragma.cc
@@ -1009,10 +1009,6 @@ handle_pragma_diagnostic_impl ()
   if (early && !c_option_is_from_cpp_diagnostics (option_index))
 return;
 
-  const char *arg = NULL;
-  if (cl_options[option_index].flags & CL_JOINED)
-arg = data.option_str + 1 + cl_options[option_index].opt_len;
-
   if (option_index == OPT_SPECIAL_unknown)
 {
   if (want_diagnostics)
@@ -1052,6 +1048,10 @@ handle_pragma_diagnostic_impl ()
   return;
 }
 
+  const char *arg = NULL;
+  if (cl_options[option_index].flags & CL_JOINED)
+arg = data.option_str + 1 + cl_options[option_index].opt_len;
+
   struct cl_option_handlers handlers;
   set_default_handlers (&handlers, NULL);
   /* FIXME: input_location isn't the best location here, but it is


[committed] vect: Restore optab_vector argument [PR106250]

2022-07-11 Thread Richard Sandiford via Gcc-patches
In g:76c3041b856cb0 I'd removed a "C ? optab_vector : optab_mixed_sign"
argument from a call to directly_supported_p, thinking that the argument
only existed because of the condition (which I was removing).  But the
difference between the scalar and vector forms matters for shifts,
so we do still need the argument.

Tested on aarch64-linux-gnu and pushed as obvious.

Richard


gcc/
PR tree-optimization/106250
* tree-vect-loop.cc (vectorizable_reduction): Reinstate final
argument to directly_supported_p.
---
 gcc/testsuite/gcc.dg/vect/pr106250.c | 17 +
 gcc/tree-vect-loop.cc|  2 +-
 2 files changed, 18 insertions(+), 1 deletion(-)
 create mode 100644 gcc/testsuite/gcc.dg/vect/pr106250.c

diff --git a/gcc/testsuite/gcc.dg/vect/pr106250.c 
b/gcc/testsuite/gcc.dg/vect/pr106250.c
new file mode 100644
index 000..7f25f551869
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/pr106250.c
@@ -0,0 +1,17 @@
+/* { dg-do compile } */
+
+int
+foo (unsigned long int x, int y, int z)
+{
+  int ret = 0;
+
+  while (y < 1)
+{
+  x *= 2;
+  ret = x == z;
+  z = y;
+  ++y;
+}
+
+  return ret;
+}
diff --git a/gcc/tree-vect-loop.cc b/gcc/tree-vect-loop.cc
index 3a70c15b593..2257b29a652 100644
--- a/gcc/tree-vect-loop.cc
+++ b/gcc/tree-vect-loop.cc
@@ -7369,7 +7369,7 @@ vectorizable_reduction (loop_vec_info loop_vinfo,
 dot-products.  */
   machine_mode vec_mode = TYPE_MODE (vectype_in);
   if (!lane_reduc_code_p
- && !directly_supported_p (op.code, vectype_in))
+ && !directly_supported_p (op.code, vectype_in, optab_vector))
 {
   if (dump_enabled_p ())
 dump_printf (MSG_NOTE, "op not supported by target.\n");
-- 
2.25.1



Re: Modula-2: merge followup (brief update on the progress of the new linking implementation)

2022-07-11 Thread Gaius Mulley via Gcc-patches
Rainer Orth  writes:

> Hi Gaius,
>
>> A very brief update to say that I've merged the new linking
>> implementation back onto the devel/modula-2 branch,
>
> unfortunately, that branch doesn't bootstrap for me anywere:
>
> * On both x86_64-pc-linux-gnu and i386-pc-solaris2.11:
>
> libtool: compile:  /var/gcc/modula-2/11.4-gcc-modula-2/./gcc/gm2 
> -B/var/gcc/modula-2/11.4-gcc-modula-2/./gcc/ -c -g -O2 -g -O2 -I. 
> -I/vol/gcc/src/hg/master/modula-2/gcc/m2/gm2-libs-min 
> -I/vol/gcc/src/hg/master/modula-2/gcc/m2/gm2-libs -fno-exceptions 
> -fno-m2-plugin 
> /vol/gcc/src/hg/master/modula-2/libgm2/libm2min/../../gcc/m2/gm2-libs-min/M2RTS.mod
>   -fPIC -DPIC -o .libs/M2RTS.o
> /vol/gcc/src/hg/master/modula-2/libgm2/libm2min/../../gcc/m2/gm2-libs-min/M2RTS.mod:29:1:
>  error: In implementation module ‘M2RTS’: module does not export procedure 
> which is a necessary component of the runtime system, hint check the path and 
> library/language variant
>29 | IMPORT libc, SYSTEM ;
>   | ^~
> make[5]: *** [Makefile:746: M2RTS.lo] Error 1

Hi Rainer,

thanks for the report - the first bug report (above) is now fixed on
devel/modula-2.

Thanks for the gdb output for PR modula2/101392 (still outstanding)

regards,
Gaius


Re: [PATCH 3/3] lto-plugin: implement LDPT_GET_API_VERSION

2022-07-11 Thread Alexander Monakov via Gcc-patches
On Mon, 11 Jul 2022, Martin Liška wrote:

> I've clarified that linker should return a value that is in range
> [minimal_api_supported, maximal_api_supported] and added an abort
> if it's not the case.

I noticed that we are placing a trap for C++ consumers such as mold
by passing min/max_api_supported as enum values. Unlike C, C++ disallows
out-of-range enum values, so when mold does

enum PluginLinkerAPIVersion {
  LAPI_V0 = 0,
  LAPI_V1,
};

get_api_version(const char *plugin_identifier,
unsigned plugin_version,
PluginLinkerAPIVersion minimal_api_supported,
PluginLinkerAPIVersion maximal_api_supported,
const char **linker_identifier,
const char **linker_version) {

checks such as 'min_api_supported > LAPI_V1' can be optimized out. Also,
if a future tool passes LAPI_V2, it will trigger Clang's UBSan (GCC
-fsanitize-undefined=enum instruments loads but not retrieval of function
arguments).

I'd suggest to fix this on both sides by changing the arguments to plain
integer types.

Alexander


PING: [PATCH v5] tree-optimization/101186 - extend FRE with "equivalence map" for condition prediction

2022-07-11 Thread Di Zhao OS via Gcc-patches
Updated the patch in the attachment, so it can apply.

Thanks,
Di Zhao

> -Original Message-
> From: Di Zhao OS
> Sent: Sunday, May 29, 2022 11:59 PM
> To: gcc-patches@gcc.gnu.org
> Cc: Richard Biener 
> Subject: [PATCH v5] tree-optimization/101186 - extend FRE with "equivalence
> map" for condition prediction
> 
> Hi, attached is a new version of the patch. The changes are:
> - Skip using temporary equivalences for floating-point values, because
> folding expressions can generate incorrect values. For example,
> operations on 0.0 and -0.0 may have different results.
> - Avoid inserting duplicated back-refs from value-number to predicates.
> - Disable fre in testsuite/g++.dg/pr83541.C .
> 
> Summary of the previous versions:
> https://gcc.gnu.org/pipermail/gcc-patches/2021-December/587346.html
> 
> Is the patch still considered?
> 
> Thanks,
> Di Zhao
> 
> ---
> 
> Extend FRE with temporary equivalences.
> 
> 2022-05-29  Di Zhao  
> 
> gcc/ChangeLog:
> PR tree-optimization/101186
> * tree-ssa-sccvn.c (VN_INFO): remove assertions (there could be a
> predicate already).
> (dominated_by_p_w_unex): Moved upward.
> (vn_nary_op_get_predicated_value): Moved upward.
> (is_vn_valid_at_bb): Check if vn_pval is valid at BB.
> (lookup_equiv_head): Lookup the "equivalence head" of given node.
> (lookup_equiv_heads): Lookup the "equivalence head"s of given nodes.
> (vn_tracking_edge): Extracted utility function.
> (init_vn_nary_op_from_stmt): Insert and lookup by "equivalence head"s.
> (vn_nary_op_insert_into): Insert new value at the front.
> (vn_nary_op_insert_pieces_predicated_1): Insert as predicated values
> from pieces.
> (fold_const_from_equiv_heads): Fold N-ary expression of equiv-heads.
> (push_new_nary_ref): Insert a back-reference to vn_nary_op_t.
> (val_equiv_insert): Record temporary equivalence.
> (vn_nary_op_insert_pieces_predicated): Record equivalences instead of
> some predicates; insert back-refs.
> (record_equiv_from_prev_phi_1): Record temporary equivalences
> generated
> by PHI nodes.
> (record_equiv_from_prev_phi): Given an outgoing edge of a conditional
> expression taken, record equivalences generated by PHI nodes.
> (visit_nary_op): Add lookup previous results of N-ary operations by
> equivalences.
> (insert_related_predicates_on_edge): Some predicates can be computed
> from equivalences, no need to insert them.
> (process_bb): Add lookup predicated values by equivalences.
> (struct unwind_state): Unwind state of back-refs to vn_nary_op_t.
> (do_unwind): Unwind the back-refs to vn_nary_op_t.
> (do_rpo_vn): Update back-reference unwind state.
> * tree-ssa-sccvn.h (struct nary_ref): hold a lists of references to
> the
> nary map entries.
> 
> gcc/testsuite/ChangeLog:
> 
> * g++.dg/pr83541.C: Disable fre.
> * gcc.dg/tree-ssa/pr68619-2.c: Disable fre.
> * gcc.dg/tree-ssa/pr71947-1.c: Disable fre.
> * gcc.dg/tree-ssa/pr71947-2.c: Disable fre.
> * gcc.dg/tree-ssa/pr71947-3.c: Disable fre.
> * gcc.dg/tree-ssa/pr71947-5.c: Disable fre.
> * gcc.dg/tree-ssa/pr71947-7.c: Disable fre.
> * gcc.dg/tree-ssa/pr71947-8.c: Disable fre.
> * gcc.dg/tree-ssa/pr71947-9.c: Disable fre.
> * gcc.dg/tree-ssa/vrp03.c: Disable fre.
> * gcc.dg/tree-ssa/ssa-fre-100.c: New test.
> * gcc.dg/tree-ssa/ssa-fre-101.c: New test.
> * gcc.dg/tree-ssa/ssa-fre-102.c: New test.
> * gcc.dg/tree-ssa/ssa-pre-34.c: New test.


v5-tree-optimization-101186.patch
Description: v5-tree-optimization-101186.patch


Re: [PATCH v2] Modify combine pattern by a pseudo AND with its nonzero bits [PR93453]

2022-07-11 Thread Segher Boessenkool
Hi!

On Mon, Jul 11, 2022 at 10:13:41AM +0800, HAO CHEN GUI wrote:
> I did a biset for the problem. After commit "commit 8d2d39587: combine: Do 
> not combine
> moves from hard registers", the case fails. The root cause is it can't 
> combine from the
> hard registers and has to use subreg which causes its high part to be 
> undefined. Thus,
> there is an additional "AND" generated.
> 
> Before the commit
> Trying 2 -> 7:
> 2: r125:DI=%3:DI
>   REG_DEAD %3:DI
> 7: r128:SI=r125:DI#0 0>>0x1f
>   REG_DEAD r125:DI
> Successfully matched this instruction:
> (set (reg:SI 128 [ x ])
> (lshiftrt:SI (reg:SI 3 3 [ x ])
> (const_int 31 [0x1f])))
> allowing combination of insns 2 and 7
> 
> After the commit
> Trying 20 -> 7:
>20: r125:DI=r132:DI
>   REG_DEAD r132:DI
> 7: r128:SI=r125:DI#0 0>>0x1f
>   REG_DEAD r125:DI
> Failed to match this instruction:
> (set (subreg:DI (reg:SI 128 [ x ]) 0)
> (zero_extract:DI (reg:DI 132)
> (const_int 32 [0x20])
> (const_int 1 [0x1])))
> Successfully matched this instruction:
> (set (subreg:DI (reg:SI 128 [ x ]) 0)
> (and:DI (lshiftrt:DI (reg:DI 132)
> (const_int 31 [0x1f]))
> (const_int 4294967295 [0x])))
> allowing combination of insns 20 and 7
> 
> The problem should be fixed in another case? Please advice.

You should not change the expected counts to what is currently
generated.  What is currently generated is sub-optimal.  It all starts
with those zero_extracts, which are always bad for us -- it is a harder
to manipulate representation of a limited subset of more basic
operations we *do* have.  And combine and simplify can handle the more
general and simpler formulation just fine.

Ideally combine would not try to use *_extract at all if this is not
used in the machine description (compare to rotatert for example, a
similarly redundant operation).  But it currently needs it as
intermediate form, untangling this all is quite a bit of work.

These testcases (all the rl* ones) should have a big fat comment
explaining what the expected, wanted code is.

This was easier to do originally, when I actually tested all 65536
possibly combinations, because the expected counts were more "regular"
numbers.  But this is too slow to test in normal testsuite runs :-)

It is wrong to pretend the current state makes the wanted code, these
testcases are meant to show exactly when we make suboptimal machine
code :-)


Segher


[PATCH] libsanitizer: cherry-pick 9cf13067cb5088626ba7 from upstream

2022-07-11 Thread Martin Liška
I'm going to push the following cherry-pick which fixes libasan
build with top-of-tree glibc.

Martin

9cf13067cb5088626ba7ee1ec4c42ec59c7995a0 [sanitizer] Remove #include 
 to resolve fsconfig_command/mount_attr conflict with glibc 2.36
---
 .../sanitizer_platform_limits_posix.cpp| 10 ++
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/libsanitizer/sanitizer_common/sanitizer_platform_limits_posix.cpp 
b/libsanitizer/sanitizer_common/sanitizer_platform_limits_posix.cpp
index 8ed3e92d270..97fd07acf9d 100644
--- a/libsanitizer/sanitizer_common/sanitizer_platform_limits_posix.cpp
+++ b/libsanitizer/sanitizer_common/sanitizer_platform_limits_posix.cpp
@@ -73,7 +73,9 @@
 #include 
 #include 
 #include 
+#if SANITIZER_ANDROID
 #include 
+#endif
 #include 
 #include 
 #include 
@@ -869,10 +871,10 @@ unsigned struct_ElfW_Phdr_sz = sizeof(Elf_Phdr);
   unsigned IOCTL_EVIOCGPROP = IOCTL_NOT_PRESENT;
   unsigned IOCTL_EVIOCSKEYCODE_V2 = IOCTL_NOT_PRESENT;
 #endif
-  unsigned IOCTL_FS_IOC_GETFLAGS = FS_IOC_GETFLAGS;
-  unsigned IOCTL_FS_IOC_GETVERSION = FS_IOC_GETVERSION;
-  unsigned IOCTL_FS_IOC_SETFLAGS = FS_IOC_SETFLAGS;
-  unsigned IOCTL_FS_IOC_SETVERSION = FS_IOC_SETVERSION;
+  unsigned IOCTL_FS_IOC_GETFLAGS = _IOR('f', 1, long);
+  unsigned IOCTL_FS_IOC_GETVERSION = _IOR('v', 1, long);
+  unsigned IOCTL_FS_IOC_SETFLAGS = _IOW('f', 2, long);
+  unsigned IOCTL_FS_IOC_SETVERSION = _IOW('v', 2, long);
   unsigned IOCTL_GIO_CMAP = GIO_CMAP;
   unsigned IOCTL_GIO_FONT = GIO_FONT;
   unsigned IOCTL_GIO_UNIMAP = GIO_UNIMAP;
-- 
2.36.1



Re: [PATCH] Move reload_completed and other rtl.h globals to crtl structure.

2022-07-11 Thread Jeff Law via Gcc-patches




On 7/10/2022 12:19 PM, Roger Sayle wrote:

This patch builds upon Richard Biener's suggestion of avoiding global
variables to track state/identify which passes have already been run.
In the early middle-end, the tree-ssa passes use the curr_properties
field in cfun to track this.  This patch uses a new rtl_pass_progress
int field in crtl to do something similar.

This patch allows the global variables lra_in_progress, reload_in_progress,
reload_completed, epilogue_completed and regstack_completed to be removed
from rtl.h and implemented as bits within the new crtl->rtl_pass_progress.
I've also taken the liberty of adding a new combine_completed bit at the
same time [to respond the Segher's comment it's easy to change this to
combine1_completed and combine2_completed if we ever perform multiple
combine passes (or multiple reload/regstack passes)].  At the same time,
I've also refactored bb_reorder_complete into the same new field;
interestingly bb_reorder_complete was already a bool in crtl.

One very minor advantage of this implementation/refactoring is that the
predicate "can_create_pseudo_p ()" which is semantically defined to be
!reload_in_progress && !reload_completed, can now be performed very
efficiently as effectively the test (progress & 12) == 0, i.e. a single
test instruction on x86.

For consistency, I've also moved cse_not_expected (the last remaining
global variable in rtl.h) into crtl, as its own bool field.

The vast majority of this patch is then churn to handle these changes.
Thanks to macros, most code is unaffected, assuming it treats those
global variables as r-values, though some source files required/may
require tweaks as these "variables" are now defined in emit-rtl.h
instead of rtl.h.

This patch has been tested on x86_64-pc-linux-gnu with make bootstrap
and make -k check, both with and without --target_board=unix{-m32},
with no new failures.  Might this clean-up be acceptable in stage 1,
given the possible temporary disruption transitioning some backends?
I'll start checking various backends myself with cross-compilers, but if
Jeff Law could spin this patch on his build farm, that would help
identify targets that need attention.


2022-07-10  Roger Sayle  

gcc/ChangeLog
 * bb-reorder.cc (reorder_basic_blocks): bb_reorder_complete is
 now a bit in crtl->rtl_pass_progress.
 * cfgrtl.cc (rtl_split_edge): Likewise.
 (fixup_partitions): Likewise.
 (verify_hot_cold_block_grouping): Likewise.
 (cfg_layout_initialize): Likewise.
  * combine.cc (rest_of_handle_combine): Set combine_completed
 bit in crtl->rtl_pass_progress.
 * cse.cc (rest_of_handle_cse): cse_not_expected is now a field
 in crtl.
 (rest_of_handle_cse2): Likewise.
 (rest_of_handle_cse_after_global_opts): Likewise.
 * df-problems.cc: Include emit-rtl.h to access RTL pass progress
 variables.

 * emit-rtl.h (PROGRESS_reload_completed): New bit masks.
 (rtl_data::rtl_pass_progress): New integer field to track progress.
 (rtl_data::bb_reorder_complete): Delete, no part of
rtl_pass_progress.
 (rtl_data::cse_not_expected): New bool field, previously a global
 variable.
 (crtl_pass_progress): New convience macro.
 (combine_completed): New macro.
 (lra_in_progress): New macro replacing global variable.
 (reload_in_progress): Likewise.
 (reload_completed): Likewise.
 (bb_reorder_complete): New macro replacing bool field in crtl.
 (epilogue_completed): New macro replacing global variable.
 (regstack_completed): Likewise.
 (can_create_pseudo_p): Move from rtl.h and update definition.

 * explow.cc (memory_address_addr_space): cse_not_expected is now
 a field in crtl.
 (use_anchored_address): Likewise.
 * final.c (rest_of_clean_state): Reset crtl->rtl_pass_progress
 to zero.
 * function.cc (prepare_function_start): cse_not_expected is now
 a field in crtl.
 (thread_prologue_and_epilogue_insns): epilogue_completed is now
 a bit in crtl->rtl_pass_progress.
 * ifcvt.cc (noce_try_cmove_arith): cse_not_expected is now a
 field in crtl.
 * lra-eliminations.cc (init_elim_table): lra_in_progress is now
 a bit in crtl->rtl_pass_progress.
 * lra.cc (lra_in_progress): Delete global variable.
 (lra): lra_in_progress and reload_completed are now bits in
 crtl->rtl_pass_progress.
 * modulo-sched.cc (sms_schedule): reload_completed is now a bit
 in crtl->rtl_pass_progress.
 * passes.cc (skip_pass): reload_completed and epilogue_completed
 are now bits in crtl->rtl_pass_progress.
 * recog.cc (reload_completed): Delete global variable.
 (epilogue_completed): Likewise.
 * reg-stack.cc (regstack_completed): Likewise.
 (rest_of_handle_stack_re

Re: Modula-2: merge followup (brief update on the progress of the new linking implementation)

2022-07-11 Thread Rainer Orth
Hi Gaius,

> thanks for the report - the first bug report (above) is now fixed on
> devel/modula-2.

thanks, that got me closer, but not quite there yet:

* First, I get:

/vol/gcc/src/hg/master/modula-2/gcc/m2/gm2-libs-min/SYSTEM.mod:29:1: error: In 
implementation module 'SYSTEM': module 'M2RTS' does not export procedure 
'RequestDependant'
/vol/gcc/src/hg/master/modula-2/libgm2/libm2min/../../gcc/m2/gm2-libs-min/M2RTS.mod:29:1:
 error: In implementation module 'M2RTS': module 'M2RTS' does not export 
procedure 'RegisterModule'
/vol/gcc/src/hg/master/modula-2/libgm2/libm2min/../../gcc/m2/gm2-libs-min/SYSTEM.mod:29:1:
 error: In implementation module 'SYSTEM': module 'M2RTS' does not export 
procedure 'RequestDependant'

  Fixed by adding dummy declarations/definitions as in the attached patch.

* Then, I run into

make[9]: Entering directory 
'/var/gcc/modula-2/11.4-gcc-modula-2/i386-pc-solaris2.11/amd64/libgm2/libm2pim'
Makefile:913: warning: ignoring prerequisites on suffix rule definition
make[9]: *** No rule to make target 'UnixArgs.c', needed by 
'libm2pim_la-UnixArgs.lo'.
make[9]: *** No rule to make target 'Selective.c', needed by 
'libm2pim_la-Selective.lo'.
make[9]: *** No rule to make target 'sckt.c', needed by 'libm2pim_la-sckt.lo'.
make[9]: *** No rule to make target 'errno.c', needed by 'libm2pim_la-errno.lo'.
make[9]: *** No rule to make target 'dtoa.c', needed by 'libm2pim_la-dtoa.lo'.
make[9]: *** No rule to make target 'ldtoa.c', needed by 'libm2pim_la-ldtoa.lo'.
make[9]: *** No rule to make target 'termios.c', needed by 
'libm2pim_la-termios.lo'.
make[9]: *** No rule to make target 'SysExceptions.c', needed by 
'libm2pim_la-SysExceptions.lo'.
make[9]: Target 'all-am' not remade because of errors.
make[9]: Leaving directory 
'/var/gcc/modula-2/11.4-gcc-modula-2/i386-pc-solaris2.11/amd64/libgm2/libm2pim'

  It turns out none of the generated auto* files had been regenerated in
  tree; fixed by running autoconf; autoheader; automake.

* Next, on Solaris only:

/vol/gcc/src/hg/master/modula-2/libgm2/libm2pim/dtoa.cc: In function 'int 
dtoa_calcmaxsig(char*, int)':
/vol/gcc/src/hg/master/modula-2/libgm2/libm2pim/dtoa.cc:139:7: error: 'index' 
was not declared in this scope; did you mean 'index_t'?
  139 |   e = index (p, 'E');
  |   ^
  |   index_t
/vol/gcc/src/hg/master/modula-2/libgm2/libm2pim/dtoa.cc: In function 'int 
dtoa_calcdecimal(char*, int, int)':
/vol/gcc/src/hg/master/modula-2/libgm2/libm2pim/dtoa.cc:171:7: error: 'index' 
was not declared in this scope; did you mean 'index_t'?
  171 |   e = index (p, 'E');
  |   ^
  |   index_t

  While index is declared in , the declaration is guarded by
  !_XPG7 || __EXTENSIONS__, but g++ defines _XPG7/_XOPEN_SOURCE=700 on
  Solaris.  I think it's better to just use the standard strchr instead,
  as the attached patch does.

* While this lets the build finish on all of i386-pc-solaris2.11,
  sparcv9-sun-solaris2.11, and x86_64-pc-linux-gnu, I get thousands of
  testsuite failures, all of the same kind:

Undefined   first referenced
 symbol in file
RTco_signal 
/var/gcc/modula-2/11.4-gcc-modula-2/i386-pc-solaris2.11/./libgm2/libm2pim/.libs/libm2pim.so
RTco_select 
/var/gcc/modula-2/11.4-gcc-modula-2/i386-pc-solaris2.11/./libgm2/libm2pim/.libs/libm2pim.so
RTco_initSemaphore  
/var/gcc/modula-2/11.4-gcc-modula-2/i386-pc-solaris2.11/./libgm2/libm2pim/.libs/libm2pim.so
RTco_wait   
/var/gcc/modula-2/11.4-gcc-modula-2/i386-pc-solaris2.11/./libgm2/libm2pim/.libs/libm2pim.so
ld: fatal: symbol referencing errors
collect2: error: ld returned 1 exit status
compiler exited with status 1
FAIL: gm2/exceptions/run/pass/libexcept.mod compilation,  -g

  I haven't yet tried to fix those.

Rainer

-- 
-
Rainer Orth, Center for Biotechnology, Bielefeld University


diff --git a/gcc/m2/gm2-libs-min/M2RTS.def b/gcc/m2/gm2-libs-min/M2RTS.def
--- a/gcc/m2/gm2-libs-min/M2RTS.def
+++ b/gcc/m2/gm2-libs-min/M2RTS.def
@@ -31,11 +31,17 @@ DEFINITION MODULE M2RTS ;
 
 FROM SYSTEM IMPORT ADDRESS ;
 
+TYPE
+   ArgCVEnvP = PROCEDURE (INTEGER, ADDRESS, ADDRESS) ;
 
 (*
all these procedures do nothing except satisfy the linker.
 *)
 
+PROCEDURE RegisterModule (name: ADDRESS;
+  init, fini:  ArgCVEnvP;
+  dependencies: PROC) ;
+PROCEDURE RequestDependant (modulename, dependantmodule: ADDRESS) ;
 PROCEDURE ExecuteTerminationProcedures ;
 PROCEDURE ExecuteInitialProcedures ;
 PROCEDURE HALT ;
diff --git a/gcc/m2/gm2-libs-min/M2RTS.mod b/gcc/m2/gm2-libs-min/M2RTS.mod
--- a/gcc/m2/gm2-libs-min/M2RTS.mod
+++ b/gcc/m2/gm2-libs-min/M2RTS.mod
@@ -70,4 +70,10 @@ BEGIN
 END DeconstructModules ;
 
 
+PROCEDURE RegisterModule (name: ADDRESS;
+  init, fini:  Arg

Re: [PATCH] btf: emit linkage information in BTF_KIND_FUNC entries

2022-07-11 Thread Indu Bhagat via Gcc-patches

On 7/8/22 11:30 AM, Jose E. Marchesi via Gcc-patches wrote:



The kernel bpftool expects BTF_KIND_FUNC entries in BTF to include an
annotation reflecting the linkage of functions (static, global).  For
whatever reason they (ab)use the `vlen' field of the BTF_KIND_FUNC entry
instead of adding a variable-part to the record like it is done with
other entry kinds.



For BTF Variables, we have the linkage information in the output section 
as "btv_linkage".  To propagate that information from DWARF to BTF, we 
have the dvd_visibility in struct ctf_dvdef (in ctfc.h). Now that the 
linkage information is needed for the BTF_KIND_FUNC entries, what do you 
think about - adding something like dtd_visibility to ctf_dtdef.


Updating the BTF format documentation will be useful 
https://www.kernel.org/doc/Documentation/bpf/btf.rst. Let's see what can 
be done for that...


Also, adding some testcases with the current patch will be great.

I have created PR debug/106263 "BTF_KIND_FUNC type does not encode 
linkage" to track this.




This patch makes GCC to include this linkage info in BTF_KIND_FUNC
entries.

Tested in bpf-unknown-none target.

gcc/ChangeLog:

* ctfc.h (struct ctf_itype): Add field ctti_linkage.
* ctfc.cc (ctf_add_function): Set ctti_linkage.
* dwarf2ctf.cc (gen_ctf_function_type): Pass a linkage for
function types and subprograms.
* btfout.cc (btf_asm_func_type): Emit linkage information for the
function.
---
  gcc/btfout.cc| 3 ++-
  gcc/ctfc.cc  | 3 ++-
  gcc/ctfc.h   | 3 ++-
  gcc/dwarf2ctf.cc | 4 +++-
  4 files changed, 9 insertions(+), 4 deletions(-)

diff --git a/gcc/btfout.cc b/gcc/btfout.cc
index 31af50521da..417d87cf519 100644
--- a/gcc/btfout.cc
+++ b/gcc/btfout.cc
@@ -740,7 +740,8 @@ static void
  btf_asm_func_type (ctf_dtdef_ref dtd)
  {
dw2_asm_output_data (4, dtd->dtd_data.ctti_name, "btt_name");
-  dw2_asm_output_data (4, BTF_TYPE_INFO (BTF_KIND_FUNC, 0, 0), "btt_info");
+  dw2_asm_output_data (4, BTF_TYPE_INFO (BTF_KIND_FUNC, 0,
+ dtd->dtd_data.ctti_linkage), 
"btt_info");
dw2_asm_output_data (4, get_btf_id (dtd->dtd_data.ctti_type), "btt_type");
  }
  
diff --git a/gcc/ctfc.cc b/gcc/ctfc.cc

index f24e7bff948..ad7f8bb8e86 100644
--- a/gcc/ctfc.cc
+++ b/gcc/ctfc.cc
@@ -777,7 +777,7 @@ ctf_add_function_arg (ctf_container_ref ctfc, dw_die_ref 
func,
  ctf_id_t
  ctf_add_function (ctf_container_ref ctfc, uint32_t flag, const char * name,
  const ctf_funcinfo_t * ctc, dw_die_ref die,
- bool from_global_func)
+ bool from_global_func, int linkage)
  {
ctf_dtdef_ref dtd;
ctf_id_t type;
@@ -792,6 +792,7 @@ ctf_add_function (ctf_container_ref ctfc, uint32_t flag, 
const char * name,
  
dtd->from_global_func = from_global_func;

dtd->dtd_data.ctti_info = CTF_TYPE_INFO (CTF_K_FUNCTION, flag, vlen);
+  dtd->dtd_data.ctti_linkage = linkage;
/* Caller must make sure CTF types for ctc->ctc_return are already added.  
*/
dtd->dtd_data.ctti_type = (uint32_t) ctc->ctc_return;
/* Caller must make sure CTF types for function arguments are already added
diff --git a/gcc/ctfc.h b/gcc/ctfc.h
index 001e544ef08..273997a2302 100644
--- a/gcc/ctfc.h
+++ b/gcc/ctfc.h
@@ -116,6 +116,7 @@ typedef struct GTY (()) ctf_itype
} _u;
uint32_t ctti_lsizehi;  /* High 32 bits of type size in bytes.  */
uint32_t ctti_lsizelo;  /* Low 32 bits of type size in bytes.  */
+  uint32_t ctti_linkage;   /* Linkage info for function types.  */
  } ctf_itype_t;
  
  #define ctti_size _u._size

@@ -423,7 +424,7 @@ extern ctf_id_t ctf_add_forward (ctf_container_ref, 
uint32_t, const char *,
  extern ctf_id_t ctf_add_typedef (ctf_container_ref, uint32_t, const char *,
 ctf_id_t, dw_die_ref);
  extern ctf_id_t ctf_add_function (ctf_container_ref, uint32_t, const char *,
- const ctf_funcinfo_t *, dw_die_ref, bool);
+ const ctf_funcinfo_t *, dw_die_ref, bool, 
int);
  extern ctf_id_t ctf_add_sou (ctf_container_ref, uint32_t, const char *,
 uint32_t, size_t, dw_die_ref);
  
diff --git a/gcc/dwarf2ctf.cc b/gcc/dwarf2ctf.cc

index a6329ab6ee4..39714c2 100644
--- a/gcc/dwarf2ctf.cc
+++ b/gcc/dwarf2ctf.cc
@@ -644,6 +644,7 @@ gen_ctf_function_type (ctf_container_ref ctfc, dw_die_ref 
function,
  
ctf_funcinfo_t func_info;

uint32_t num_args = 0;
+  int linkage = get_AT_flag (function, DW_AT_external);
  
ctf_id_t return_type_id;

ctf_id_t function_type_id;
@@ -687,7 +688,8 @@ gen_ctf_function_type (ctf_container_ref ctfc, dw_die_ref 
function,
   function_name,
   (const ctf_funcinfo_t *)&func_info,
   function,
-  from_global_func);
+

[PATCH] preprocessor: Set input_location to the most recently seen token

2022-07-11 Thread Lewis Hyatt via Gcc-patches
Hello-

As discussed here:
https://gcc.gnu.org/pipermail/gcc-patches/2022-July/598136.html

Here is another short patch that improves "#pragma GCC diagnostic" handling.
Longer term, it will be desirable to make the handling of this pragma
independent of the global input_location. But in the meantime, some glitches
like this one can be readily addressed by making input_location point to
something better. In this case, input_location during preprocessing (-E or
-save-temps) is made to point to the most recently seen token rather than the
beginning of the file. To the best of my knowledge, nothing else besides
"#pragma GCC diagnostic" handling can observe input_location during
token streaming, so this is expected not to have any other
repercussions. Bootstrap + regtest does look clean on x86-64 Linux.

By the way, the new testcase fails when compiled with C++, but it's not
because of pragma handling, it's rather because the C++ frontend changes the
location on the warning to the wrong place. Once done_lexing has been set to
true, it changes the location of all warnings to input_location, however
that's not correct when the location is the cached location of a macro
definition; the original location is preferable. I will file a separate PR
about that, and have xfailed that testcase for now, since I am not quite there
with grokking the reason it behaves this way, and anyway it's not related to
this 1-line fix for gcc -E.

Please let me know how it looks? Thanks!

-Lewis
[PATCH] preprocessor: Set input_location to the most recently seen token

When preprocessing with -E and -save-temps, input_location points always to the
first character of the current file. This was previously irrelevant because
nothing was called during the token streaming process that would inspect
input_location. But since r13-1544, "#pragma GCC diagnostic" is supported in
preprocess-only mode, and that pragma relies on input_location to decide if a
given source code location is subject to a diagnostic or not. Most diagnostics
work fine anyway, because they are handled as soon as they are seen and so
everything is still seen in the expected order even though all the diagnostic
pragmas are treated as if they applied at the start of the file. One example
that doesn't work correctly is the new testcase, since here the warning is not
triggered until the end of the file and so it is necessary to track the location
properly.

Fixed by setting input_location to point to each token as it is being
streamed, similar to how C++ mode sets it.

gcc/c-family/ChangeLog:

* c-ppoutput.cc (token_streamer::stream): Update input_location
prior to streaming each token.

gcc/testsuite/ChangeLog:

* c-c++-common/pragma-diag-14.c: New test.
* c-c++-common/pragma-diag-15.c: New test.

diff --git a/gcc/c-family/c-ppoutput.cc b/gcc/c-family/c-ppoutput.cc
index cd38c969ea0..98081ccfbb0 100644
--- a/gcc/c-family/c-ppoutput.cc
+++ b/gcc/c-family/c-ppoutput.cc
@@ -210,6 +210,10 @@ void
 token_streamer::stream (cpp_reader *pfile, const cpp_token *token,
location_t loc)
 {
+  /* Keep input_location up to date, since it is needed for processing early
+ pragmas such as #pragma GCC diagnostic.  */
+  input_location = loc;
+
   if (token->type == CPP_PADDING)
 {
   avoid_paste = true;
diff --git a/gcc/testsuite/c-c++-common/pragma-diag-14.c 
b/gcc/testsuite/c-c++-common/pragma-diag-14.c
new file mode 100644
index 000..618e7e1ef27
--- /dev/null
+++ b/gcc/testsuite/c-c++-common/pragma-diag-14.c
@@ -0,0 +1,9 @@
+/* { dg-do preprocess } */
+/* { dg-additional-options "-Wunused-macros" } */
+
+/* In the past, the pragma has erroneously disabled the warning because the
+   location was not tracked properly with -E or -save-temps; check that it 
works
+   now.  */
+
+#define X /* { dg-warning "-:-Wunused-macros" } */
+#pragma GCC diagnostic ignored "-Wunused-macros"
diff --git a/gcc/testsuite/c-c++-common/pragma-diag-15.c 
b/gcc/testsuite/c-c++-common/pragma-diag-15.c
new file mode 100644
index 000..d8076b4f93a
--- /dev/null
+++ b/gcc/testsuite/c-c++-common/pragma-diag-15.c
@@ -0,0 +1,13 @@
+/* { dg-do compile } */
+/* { dg-additional-options "-Wunused-macros" } */
+
+/* In the past, the pragma has erroneously disabled the warning because the
+   location was not tracked properly with -E or -save-temps; check that it 
works
+   now.
+
+   This test currently fails for C++ but it's not because of the pragma, it's
+   because the location of the macro definition is incorrectly set.  This is a
+   separate issue, will resolve it in a later patch.  */
+
+#define X /* { dg-warning "-:-Wunused-macros" {} { xfail c++ } } */
+#pragma GCC diagnostic ignored "-Wunused-macros"


[PATCH 1/2] Add gcc/make-unique.h

2022-07-11 Thread David Malcolm via Gcc-patches
On Fri, 2022-07-08 at 22:16 +0100, Jonathan Wakely wrote:
> On Fri, 8 Jul 2022 at 21:47, David Malcolm via Gcc 
> wrote:
> > 
> > std::unique_ptr is C++11, and I'd like to use it in the
> > gcc/analyzer
> > subdirectory, at least.  The following patch eliminates a bunch of
> > "takes ownership" comments and manual "delete" invocations in favor
> > of simply using std::unique_ptr.
> > 
> > The problem is that the patch makes use of std::make_unique, but
> > that
> > was added in C++14.
> > 
> > I've heard that it's reasonably easy to reimplement
> > std::make_unique,
> > but I'm not sure that my C++11 skills are up to it.
> 
> You know we have an implementation of std::make_unique in GCC, with a
> GCC-compatible licence that you can look at, right? :-)
> 
> But it's not really necessary. There are only two reasons to prefer
> make_unique over just allocating an object with new and constructing
> a
> unique_ptr from it:
> 
> 1) avoid a "naked" new in your code (some coding styles like this,
> but
> it's not really important as long as the 'delete' is managed
> automatically by unique_ptr).
> 
> 2) exception-safety when allocating multiple objects as args to a
> function, see https://herbsutter.com/gotw/_102/ for details.
> Irrelevant for GCC, because we build without exceptions.

[moving from gcc to gcc-patches mailing list]

Also, I *think* it's a lot less typing, since I can write just:

  std::make_unique (args)

rather than

  std::unique_ptr (new 
name_of_type_which_could_be_long (args));

> 
> 
> 
> > Is there:
> > (a) an easy way to implement a std::make_unique replacement
> > (e.g. in system.h? what to call it?), or
> 
> If you don't care about using it to create unique_ptr arrays,
> it's trivial:
> 
>   template
> inline typename std::enable_if::value,
> std::unique_ptr>::type
> make_unique(Args&&... args)
> { return std::unique_ptr(new T(std::forward(args)...));
> }
> 
> To add the overload that works for arrays is a little trickier.

Thanks!

I tried adding it to gcc/system.h, but anything that uses it needs to
have std::unique_ptr declared, which meant forcibly including 
from gcc/system.h

So instead, here's a patch that adds a new gcc/make-unique.h header,
containing just the template decl above (in the root namespace, rather
than std::, which saves a bit more typing).

I've successfully bootstrapped®ression-tested a version of my earlier
analyzer patch that uses this patch (see patch 2 of the kit, which has
lots of usage examples).

OK for trunk?

Dave



This patch adds gcc/make-unique.h, containing a minimal C++11
implementation of make_unique (std::make_unique is C++14).

gcc/ChangeLog:
* make-unique.h: New file.

Signed-off-by: David Malcolm 
---
 gcc/make-unique.h | 41 +
 1 file changed, 41 insertions(+)
 create mode 100644 gcc/make-unique.h

diff --git a/gcc/make-unique.h b/gcc/make-unique.h
new file mode 100644
index 000..c99c5328545
--- /dev/null
+++ b/gcc/make-unique.h
@@ -0,0 +1,41 @@
+/* Minimal implementation of make_unique for C++11 compatibility.
+   Copyright (C) 2022 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 3, or (at your option) any later
+version.
+
+GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+.  */
+
+#ifndef GCC_MAKE_UNIQUE
+#define GCC_MAKE_UNIQUE
+
+/* This header uses std::unique_ptr, but  can't be directly
+   included due to issues with macros.  Hence it must be included from
+   system.h by defining INCLUDE_MEMORY in any source file using it.  */
+
+#ifndef INCLUDE_MEMORY
+# error "You must define INCLUDE_MEMORY before including system.h to use 
make-unique.h"
+#endif
+
+/* Minimal implementation of make_unique for C++11 compatibility
+   (std::make_unique is C++14).  */
+
+template
+inline typename std::enable_if::value, 
std::unique_ptr>::type
+make_unique(Args&&... args)
+{
+  return std::unique_ptr (new T (std::forward (args)...));
+}
+
+#endif /* ! GCC_MAKE_UNIQUE */
-- 
2.26.3



[PATCH 2/2] analyzer: use std::unique_ptr for pending_diagnostic/note

2022-07-11 Thread David Malcolm via Gcc-patches
This patch uses the new gcc/make-unique.h header for C++11
compatibility.

Successfully bootstrapped & regrtested on x86_64-pc-linux-gnu.

I can self-approve this, but it's dependent on the patch to add
gcc/make-unique.h.

gcc/analyzer/ChangeLog:
* call-info.cc: Add define of INCLUDE_MEMORY.
* checker-path.cc: Likewise.
* constraint-manager.cc: Likewise.
* diagnostic-manager.cc: Likewise.
(saved_diagnostic::saved_diagnostic): Use std::unique_ptr for
param d and field m_d.
(saved_diagnostic::~saved_diagnostic): Remove explicit delete of m_d.
(saved_diagnostic::add_note): Use std::unique_ptr for
param pn.
(saved_diagnostic::get_pending_diagnostic): Update for conversion
of m_sd.m_d to unique_ptr.
(diagnostic_manager::add_diagnostic): Use std::unique_ptr for
param d.  Remove explicit deletion.
(diagnostic_manager::add_note): Use std::unique_ptr for param pn.
(diagnostic_manager::emit_saved_diagnostic): Update for conversion
of m_sd.m_d to unique_ptr.
(null_assignment_sm_context::warn): Use std::unique_ptr for
param d.  Remove explicit deletion.
* diagnostic-manager.h (saved_diagnostic::saved_diagnostic): Use
std::unique_ptr for param d.
(saved_diagnostic::add_note): Likewise for param pn.
(saved_diagnostic::m_d): Likewise.
(diagnostic_manager::add_diagnostic): Use std::unique_ptr for
param d.
(diagnostic_manager::add_note): Use std::unique_ptr for param pn.
* engine.cc: Include "make-unique.h".
(impl_region_model_context::warn): Update to use std::unique_ptr
for param, removing explicit deletion.
(impl_region_model_context::add_note): Likewise.
(impl_sm_context::warn): Update to use std::unique_ptr
for param.
(impl_region_model_context::on_state_leak): Likewise for result of
on_leak.
(exploded_node::on_longjmp): Use make_unique when creating
pending_diagnostic.
* exploded-graph.h (impl_region_model_context::warn): Update to
use std::unique_ptr for param.
(impl_region_model_context::add_note): Likewise.
* feasible-graph.cc: Add define of INCLUDE_MEMORY.
* pending-diagnostic.cc: Likewise.
* pending-diagnostic.h: Include analyzer.sm.h"
* program-point.cc: Add define of INCLUDE_MEMORY.
* program-state.cc: Likewise.
* region-model-asm.cc: Likewise.
* region-model-impl-calls.cc: Likewise.
* region-model-manager.cc: Likewise.
* region-model-reachability.cc: Likewise.
* region-model.cc: Likewise.  Include "make-unique.h".
(region_model::get_gassign_result): Use make_unique when creating
pending_diagnostic.
(region_model::check_for_poison): Likewise.
(region_model::on_stmt_pre): Likewise.
(annotating_ctxt: make_node): Use std::unique_ptr for result.
(region_model::deref_rvalue): Use make_unique when creating
pending_diagnostic.
(region_model::check_for_writable_region): Likewise.
(region_model::check_region_size): Likewise.
(noop_region_model_context::add_note): Use std::unique_ptr for
param.  Remove explicit deletion.
* region-model.h: Include "analyzer/pending-diagnostic.h".
(region_model_context::warn): Convert param to std::unique_ptr.
(region_model_context::add_note): Likewise.
(noop_region_model_context::warn): Likewise.
(noop_region_model_context::add_note): Likewise.
(region_model_context_decorator::warn): Likewise.
(region_model_context_decorator::add_note): Likewise.
(note_adding_context::warn): Likewise.
(note_adding_context::make_note): Likewise for return type.
(test_region_model_context::warn): Convert param to
std::unique_ptr.
* region.cc: Add define of INCLUDE_MEMORY.
* sm-fd.cc: Likewise.  Include "make-unique.h".
(fd_state_machine::on_open): Use make_unique when creating
pending_diagnostic.
(fd_state_machine::on_close): Likewise.
(fd_state_machine::check_for_open_fd): Likewise.
(fd_state_machine::on_leak): Likewise, converting return type to
std::unique_ptr.
* sm-file.cc: Add define of INCLUDE_MEMORY.  Include
"make-unique.h".
(fileptr_state_machine::on_stmt): Use make_unique when creating
pending_diagnostic.
(fileptr_state_machine::on_leak): Likewise, converting return type
to std::unique_ptr.
* sm-malloc.cc: Add define of INCLUDE_MEMORY.  Include
"make-unique.h".
(malloc_state_machine::on_stmt): Use make_unique when creating
pending_diagnostic.
(malloc_state_machine::handle_free_of_non_heap): Likewise.
(malloc_state_machine::on_deallocator_call): Likewise.
(malloc_state_machine::on_re

[PATCH] c++: non-dependent call to consteval operator [PR105912]

2022-07-11 Thread Patrick Palka via Gcc-patches
Here we were crashing when substituting a non-dependent call to a
consteval operator, whose CALL_EXPR_OPERATOR_SYNTAX flag we try to
propagate to the result, but the result isn't a CALL_EXPR since the
called function is consteval.  This patch fixes this by checking the
result of extract_call_expr accordingly.  (Note that we can't easily
check DECL_IMMEDIATE_FUNCTION_P here because we don't know which
function was selected by overload resolution from this call frame.)

Bootstrapped and regtested on x86_64-pc-linux-gnu, does this look OK
for trunk/12?

PR c++/105912

gcc/cp/ChangeLog:

* pt.cc (tsubst_copy_and_build) : Guard against
NULL_TREE extract_call_expr result.

gcc/testsuite/ChangeLog:

* g++.dg/cpp2a/consteval31.C: New test.
---
 gcc/cp/pt.cc | 10 +
 gcc/testsuite/g++.dg/cpp2a/consteval31.C | 26 
 2 files changed, 32 insertions(+), 4 deletions(-)
 create mode 100644 gcc/testsuite/g++.dg/cpp2a/consteval31.C

diff --git a/gcc/cp/pt.cc b/gcc/cp/pt.cc
index 232964c2831..8a6ae5c42fe 100644
--- a/gcc/cp/pt.cc
+++ b/gcc/cp/pt.cc
@@ -21207,10 +21207,12 @@ tsubst_copy_and_build (tree t,
bool rev = CALL_EXPR_REVERSE_ARGS (t);
if (op || ord || rev)
  {
-   function = extract_call_expr (ret);
-   CALL_EXPR_OPERATOR_SYNTAX (function) = op;
-   CALL_EXPR_ORDERED_ARGS (function) = ord;
-   CALL_EXPR_REVERSE_ARGS (function) = rev;
+   if (tree call = extract_call_expr (ret))
+ {
+   CALL_EXPR_OPERATOR_SYNTAX (call) = op;
+   CALL_EXPR_ORDERED_ARGS (call) = ord;
+   CALL_EXPR_REVERSE_ARGS (call) = rev;
+ }
  }
  }
 
diff --git a/gcc/testsuite/g++.dg/cpp2a/consteval31.C 
b/gcc/testsuite/g++.dg/cpp2a/consteval31.C
new file mode 100644
index 000..85a4d1794e5
--- /dev/null
+++ b/gcc/testsuite/g++.dg/cpp2a/consteval31.C
@@ -0,0 +1,26 @@
+// PR c++/105912
+// { dg-do compile { target c++20 } }
+
+struct A {
+  consteval A operator+() {
+return {};
+  }
+};
+
+consteval A operator~(A) {
+  return {};
+}
+
+consteval A operator+(A, A) {
+  return {};
+}
+
+template
+void f() {
+  A a;
+  ~a;
+  a + a;
+  +a;
+}
+
+template void f();
-- 
2.37.0.3.g30cc8d0f14



Re: [PATCH 3/3] lto-plugin: implement LDPT_GET_API_VERSION

2022-07-11 Thread Rui Ueyama via Gcc-patches
I updated my code so that it now acquires a lock before calling
claim_file if v0.
https://github.com/rui314/mold/commit/54fedf005fb35acdcb0408395aa403f94262190a

On Mon, Jul 11, 2022 at 8:51 PM Martin Liška  wrote:
>
> On 7/11/22 14:24, Rui Ueyama wrote:
> > I updated my patch to support the proposed API:
> > https://github.com/rui314/mold/commit/22bbfa9bba9beeaf40b76481d175939ee2c62ec8
> >
> > Martin,
> >
> > I think you want to apply this patch. Currently, your API always
> > passes LAPI_V0 as the maximum API version.
>
> Are you sure?
>
> The function signature is:
>
> (*ld_plugin_get_api_version) (const char *plugin_identifier,
>  const char *plugin_version,
>  enum linker_api_version minimal_api_supported,
>  enum linker_api_version maximal_api_supported,
> ...
>
> Which means the plug-in always set minimal as LAPI_V0 and maximal
> LAPI_V0/V1. That seems correct to me.

I'm sorry I misunderstood your code. It looks correct.

> Martin
>
>
> >
> > diff --git a/lto-plugin/lto-plugin.c b/lto-plugin/lto-plugin.c
> > index e9afd2fb76d..c97bda9de91 100644
> > --- a/lto-plugin/lto-plugin.c
> > +++ b/lto-plugin/lto-plugin.c
> > @@ -1441,15 +1441,15 @@ negotiate_api_version (void)
> >const char *linker_version;
> >
> >enum linker_api_version supported_api = LAPI_V0;
> >  #if HAVE_PTHREAD_LOCKING
> >supported_api = LAPI_V1;
> >  #endif
> >
> > -  api_version = get_api_version ("GCC", BASE_VERSION, LAPI_V0,
> > +  api_version = get_api_version ("GCC", BASE_VERSION, LAPI_V1,
> >  supported_api, &linker_identifier,
> > &linker_version);
> >if (api_version > supported_api)
> >  {
> >fprintf (stderr, "requested an unsupported API version (%d)\n",
> > api_version);
> >abort ();
> >  }
> >
> > On Mon, Jul 11, 2022 at 6:51 PM Martin Liška  wrote:
> >>
> >> On 7/11/22 11:55, Richard Biener wrote:
> >>> On Mon, Jul 11, 2022 at 11:16 AM Alexander Monakov  
> >>> wrote:
> 
>  On Mon, 11 Jul 2022, Rui Ueyama wrote:
> 
> >> but ignoring min_api_supported is wrong, and assuming 
> >> max_api_supported > 0
> >> is also wrong. It really should check how given [min; max] range 
> >> intersects
> >> with its own range of supported versions.
> >
> > Currently only one version is defined which is LAPI_V1. I don't think
> > LAPI_UNSPECIFIED is a version number; rather, it's an unspecified
> > value. No ordering should be defined between a defined value and an
> > unspecified value. If LAPI_UNSPECIFIED < LAPI_V1, it should be renamed
> > LAPI_V0.
> 
>  You still cannot rely on API guarantees of LAPI_V1 when the plugin does 
>  not
>  advertise it (thread safety of claim_file in this particular case).
> >>>
> >>
> >> Hi.
> >>
> >> All right, I think we should rename LAPI_UNSPECIFIED to LAPI_V0 in order
> >> to support minimal_api_supported == LAPI_V0.
> >>
> >>> So with LAPI_UNSPECIFIED all the plugin gets is the linker name and 
> >>> version.
> >>> Clarifying the documentation on LAPI_UNSPECIFIED might be nice, also
> >>> what the expectation is on the linker when the plugin returns
> >>> LAPI_UNSPECIFIED when it speficied minimal_api_supported == V1.
> >>
> >> I've clarified that linker should return a value that is in range
> >> [minimal_api_supported, maximal_api_supported] and added an abort
> >> if it's not the case.
> >>
> >> Having that, mold should respect if maximal_api_supported == LAPI_V0 is 
> >> returned
> >> by a plug-in (happens now as we miss locking for some targets).
> >>
> >> Martin
> >>
> >>> "minimal_api_supported == LAPI_UNSPECIFIED" does not make much
> >>> sense if using Ruis reading of this value?
> >>>
>  And you still should check the intersection of supported API ranges
>  and give a sane diagnostic when min_api_supported advertised by the 
>  plugin
>  exceeds LAPI_V1 (though, granted, the plugin could error out as well in 
>  this
>  case).
> 
>  Alexander
>


[PATCH] i386 testsuite: cope with --enable-default-pie

2022-07-11 Thread Alexandre Oliva via Gcc-patches


Running the testsuite on a toolchain build with --enable-default-pie
had some unexpected fails.  Adjust the tests to tolerate the effects
of this configuration option on x86_64-linux-gnu and i686-linux-gnu.

The cet-sjlj* tests get offsets before the base symbol name with PIC
or PIE.  A single pattern covering both alternatives somehow triggered
two matches rather than the single expected match, thus my narrowing
the '.*' to not skip line breaks, but that was not enough.  Still
puzzled, I separated the patterns into nonpic and !nonpic, and we get
the expected matchcounts this way.

Tests for -mfentry require an mfentry effective target, which excludes
32-bit x86 with PIC or PIE enabled, that's why the patterns that
accept the PIC sym@RELOC annotations only cover x86_64.

The pr24414 test stores in an unadorned named variable in an old-style
asm statement, to check that such asm statements get an implicit
memory clobber.  Rewriting the asm into a GCC extended asm with the
variable as an output would remove the regression it checks against.
Problem is, the literal reference to the variable is not PIC, so it's
rejected by the elf64 linker with an error, and flagged with a warning
by the elf32 one.  We could presumably make the variable references
PIC-friendly with #ifdefs, but I doubt that's worth the trouble.  I'm
just arranging for the test to be skipped if PIC or PIE are enabled by
default.

Regstrapped on x86_64-linux-gnu, and also tested on i686-linux-gnu, with
and without --enable-default-pie on both platforms.  Ok to install?

PS: There's at least one additional FAIL with --enable-default-pie in
the trunk, namely gcc.target/i386/mvc7.c, compared with the build
without default PIE, but I believe that's out of scope for my current
project.  I could sneak in a fix for it if just rerunning this specific
testcase is enough; rerunning all the tests over it, not so much.  At
the end of the patch is the patchlet that enables this one test to also
pass on all 4 test combinations.  WDYT?


for  gcc/testsuite/ChangeLog

* gcc.target/i386/cet-sjlj-6a.c: Cope with --enable-default-pie.
* gcc.target/i386/cet-sjlj-6b.c: Likewise.
* gcc.target/i386/fentryname3.c: Likewise.
* gcc.target/i386/pr24414.c: Likewise.
* gcc.target/i386/pr93492-3.c: Likewise.
* gcc.target/i386/pr93492-5.c: Likewise.
* gcc.target/i386/pr98482-1.c: Likewise.
---
 gcc/testsuite/gcc.target/i386/cet-sjlj-6a.c |6 --
 gcc/testsuite/gcc.target/i386/cet-sjlj-6b.c |6 --
 gcc/testsuite/gcc.target/i386/fentryname3.c |3 ++-
 gcc/testsuite/gcc.target/i386/pr24414.c |1 +
 gcc/testsuite/gcc.target/i386/pr93492-3.c   |2 +-
 gcc/testsuite/gcc.target/i386/pr93492-5.c   |2 +-
 gcc/testsuite/gcc.target/i386/pr98482-1.c   |3 ++-
 7 files changed, 15 insertions(+), 8 deletions(-)

diff --git a/gcc/testsuite/gcc.target/i386/cet-sjlj-6a.c 
b/gcc/testsuite/gcc.target/i386/cet-sjlj-6a.c
index 040b297aeb023..c3d0eb929424d 100644
--- a/gcc/testsuite/gcc.target/i386/cet-sjlj-6a.c
+++ b/gcc/testsuite/gcc.target/i386/cet-sjlj-6a.c
@@ -2,8 +2,10 @@
 /* { dg-require-effective-target maybe_x32 } */
 /* { dg-options "-O -maddress-mode=short -fcf-protection -mx32" } */
 /* { dg-final { scan-assembler-times "endbr64" 2 } } */
-/* { dg-final { scan-assembler-times "movq\t.*buf\\+8" 1 } } */
-/* { dg-final { scan-assembler-times "subq\tbuf\\+8" 1 } } */
+/* { dg-final { scan-assembler-times "movq\t\[^\n\]*buf\\+8" 1 { target nonpic 
} } } */
+/* { dg-final { scan-assembler-times "movq\t\[^\n\]*8\\+buf" 1 { target { ! 
nonpic } } } } */
+/* { dg-final { scan-assembler-times "subq\tbuf\\+8" 1 { target nonpic } } } */
+/* { dg-final { scan-assembler-times "subq\t8\\+buf" 1 { target { ! nonpic } } 
} } */
 /* { dg-final { scan-assembler-times "shrl\t\\\$3," 1 } } */
 /* { dg-final { scan-assembler-times "rdsspq" 2 } } */
 /* { dg-final { scan-assembler-times "incsspq" 2 } } */
diff --git a/gcc/testsuite/gcc.target/i386/cet-sjlj-6b.c 
b/gcc/testsuite/gcc.target/i386/cet-sjlj-6b.c
index b2376e710df6a..4c52685d7d1e1 100644
--- a/gcc/testsuite/gcc.target/i386/cet-sjlj-6b.c
+++ b/gcc/testsuite/gcc.target/i386/cet-sjlj-6b.c
@@ -2,8 +2,10 @@
 /* { dg-require-effective-target maybe_x32 } */
 /* { dg-options "-O -maddress-mode=long -fcf-protection -mx32" } */
 /* { dg-final { scan-assembler-times "endbr64" 2 } } */
-/* { dg-final { scan-assembler-times "movq\t.*buf\\+16" 1 } } */
-/* { dg-final { scan-assembler-times "subq\tbuf\\+16" 1 } } */
+/* { dg-final { scan-assembler-times "movq\t\[^\n\]*buf\\+16" 1 { target 
nonpic } } } */
+/* { dg-final { scan-assembler-times "movq\t\[^\n\]*16\\+buf" 1 { target { ! 
nonpic } } } } */
+/* { dg-final { scan-assembler-times "subq\tbuf\\+16" 1 { target nonpic } } } 
*/
+/* { dg-final { scan-assembler-times "subq\t16\\+buf" 1 { target { ! nonpic } 
} } } */
 /* { dg-final { scan-assembler-times "shrl\t\\\$3," 1 } } */
 /* { dg-final { scan-assembler-times "rds

Re: [PATCH] i386 testsuite: cope with --enable-default-pie

2022-07-11 Thread Mike Stump via Gcc-patches
On Jul 11, 2022, at 6:47 PM, Alexandre Oliva  wrote:
> 
> Running the testsuite on a toolchain build with --enable-default-pie
> had some unexpected fails.

> Regstrapped on x86_64-linux-gnu, and also tested on i686-linux-gnu, with
> and without --enable-default-pie on both platforms.  Ok to install?

Ok.

> PS: There's at least one additional FAIL with --enable-default-pie in
> the trunk, namely gcc.target/i386/mvc7.c

> WDYT?

Seems reasonable.


Re: [PATCH] [RFC]Support vectorization for Complex type.

2022-07-11 Thread Hongtao Liu via Gcc-patches
On Mon, Jul 11, 2022 at 7:47 PM Richard Biener via Gcc-patches
 wrote:
>
> On Mon, Jul 11, 2022 at 5:44 AM liuhongt  wrote:
> >
> > The patch only handles load/store(including ctor/permutation, except
> > gather/scatter) for complex type, other operations don't needs to be
> > handled since they will be lowered by pass cplxlower.(MASK_LOAD is not
> > supported for complex type, so no need to handle either).
>
> (*)
>
> > Instead of support vector(2) _Complex double, this patch takes vector(4)
> > double as vector type of _Complex double. Since vectorizer originally
> > takes TYPE_VECTOR_SUBPARTS as nunits which is not true for complex
> > type, the patch handles nunits/ncopies/vf specially for complex type.
>
> For the limited set above(*) can you explain what's "special" about
> vector(2) _Complex
> vs. vector(4) double, thus why we need to have STMT_VINFO_COMPLEX_P at all?
Supporting a vector(2) complex  is a straightforward idea, just like
supporting other scalar type in vectorizer, but it requires more
efforts(in the backend and frontend), considering that most of
operations of complex type will be lowered into realpart and imagpart
operations, supporting a vector(2) complex does not look that
necessary. Then it comes up with supporting vector(4) double(with
adjustment of vf/ctor/permutation), the vectorizer only needs to
handle the vectorization of the move operation of the complex type(no
need to worry about wrongly mapping vector(4) double multiplication to
complex type multiplication since it's already lowered before
vectorizer).
stmt_info does not record the scalar type, in order to avoid duplicate
operation like getting a lhs type from stmt to determine whether it is
a complex type, STMT_VINFO_COMPLEX_P bit is added, this bit is mainly
initialized in vect_analyze_data_refs and vect_get_vector_types_for_
stmt.
>
> I wonder to what extent your handling can be extended to support 
> re-vectorizing
> (with a higher VF for example) already vectorized code?  The vectorizer giving
> up on vector(2) double looks quite obviously similar to it giving up
> on _Complex double ...
Yes, it can be extended to vector(2) double/float/int/ with a bit
adjustment(exacting element by using bit_field instead of
imagpart_expr/realpart_expr).
> It would be a shame to not use the same underlying mechanism for dealing with
> both, where for the vector case obviously vector(4) would be supported as 
> well.
>
> In principle _Complex double operations should be two SLP lanes but it seems 
> you
> are handling them with classical interleaving as well?
I'm only handling move operations, for other operations it will be
lowered to realpart and imagpart and thus two SLP lanes.
>
> Thanks,
> Richard.
>
> > Bootstrapped and regtested on x86_64-pc-linux-gnu{-m32,}.
> > Also test the patch for SPEC2017 and find there's complex type vectorization
> > in 510/549(but no performance impact).
> >
> > Any comments?
> >
> > gcc/ChangeLog:
> >
> > PR tree-optimization/106010
> > * tree-vect-data-refs.cc (vect_get_data_access_cost):
> > Pass complex_p to vect_get_num_copies to avoid ICE.
> > (vect_analyze_data_refs): Support vectorization for Complex
> > type with vector scalar types.
> > * tree-vect-loop.cc (vect_determine_vf_for_stmt_1): VF should
> > be half of TYPE_VECTOR_SUBPARTS when complex_p.
> > * tree-vect-slp.cc (vect_record_max_nunits): nunits should be
> > half of TYPE_VECTOR_SUBPARTS when complex_p.
> > (vect_optimize_slp): Support permutation for complex type.
> > (vect_slp_analyze_node_operations_1): Double nunits in
> > vect_get_num_vectors to get right SLP_TREE_NUMBER_OF_VEC_STMTS
> > when complex_p.
> > (vect_slp_analyze_node_operations): Ditto.
> > (vect_create_constant_vectors): Support CTOR for complex type.
> > (vect_transform_slp_perm_load): Support permutation for
> > complex type.
> > * tree-vect-stmts.cc (vect_init_vector): Support complex type.
> > (vect_get_vec_defs_for_operand): Get vector type for
> > complex type.
> > (vectorizable_store): Get right ncopies/nunits for complex
> > type, also return false when complex_p and
> > !TYPE_VECTOR_SUBPARTS.is_constant ().
> > (vectorizable_load): Ditto.
> > (vect_get_vector_types_for_stmt): Get vector type for complex type.
> > * tree-vectorizer.h (STMT_VINFO_COMPLEX_P): New macro.
> > (vect_get_num_copies): New overload.
> >
> > gcc/testsuite/ChangeLog:
> >
> > * gcc.target/i386/pr106010-1a.c: New test.
> > * gcc.target/i386/pr106010-1b.c: New test.
> > * gcc.target/i386/pr106010-1c.c: New test.
> > * gcc.target/i386/pr106010-2a.c: New test.
> > * gcc.target/i386/pr106010-2b.c: New test.
> > * gcc.target/i386/pr106010-2c.c: New test.
> > * gcc.target/i386/pr106010-3a.c: New test.
> > * gcc

Re: [PATCH 3/3] lto-plugin: implement LDPT_GET_API_VERSION

2022-07-11 Thread Richard Biener via Gcc-patches
On Mon, Jul 11, 2022 at 6:35 PM Alexander Monakov  wrote:
>
> On Mon, 11 Jul 2022, Martin Liška wrote:
>
> > I've clarified that linker should return a value that is in range
> > [minimal_api_supported, maximal_api_supported] and added an abort
> > if it's not the case.
>
> I noticed that we are placing a trap for C++ consumers such as mold
> by passing min/max_api_supported as enum values. Unlike C, C++ disallows
> out-of-range enum values, so when mold does
>
> enum PluginLinkerAPIVersion {
>   LAPI_V0 = 0,
>   LAPI_V1,
> };
>
> get_api_version(const char *plugin_identifier,
> unsigned plugin_version,
> PluginLinkerAPIVersion minimal_api_supported,
> PluginLinkerAPIVersion maximal_api_supported,
> const char **linker_identifier,
> const char **linker_version) {
>
> checks such as 'min_api_supported > LAPI_V1' can be optimized out. Also,
> if a future tool passes LAPI_V2, it will trigger Clang's UBSan (GCC
> -fsanitize-undefined=enum instruments loads but not retrieval of function
> arguments).
>
> I'd suggest to fix this on both sides by changing the arguments to plain
> integer types.

That's a good point - the enum itself is then also somewhat redundant
if it just specifies symbolic names for 0, 1, ... but we can keep it for
documentation purposes.

>
> Alexander


XFAIL 'offloading_enabled' diagnostics issue in 'libgomp.oacc-c-c++-common/reduction-5.c' [PR101551] (was: Enhance '_Pragma' diagnostics verification in OMP C/C++ test cases)

2022-07-11 Thread Thomas Schwinge
Hi!

On 2022-07-11T11:27:12+0200, I wrote:
> [...], I've just pushed to master branch
> commit 06b2a2abe26554c6f9365676683d67368cbba206
> "Enhance '_Pragma' diagnostics verification in OMP C/C++ test cases"

> --- a/libgomp/testsuite/libgomp.oacc-c-c++-common/reduction-5.c
> +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/reduction-5.c
> @@ -17,7 +17,7 @@ const int n = 100;
>  #define check_reduction(gwv_par, gwv_loop)   \
>{  \
>s1 = 2; s2 = 5;\
> -DO_PRAGMA (acc parallel gwv_par copy (s1, s2))   \
> +DO_PRAGMA (acc parallel gwv_par copy (s1, s2)) /* { dg-line DO_PRAGMA_loc } 
> */ \
>  DO_PRAGMA (acc loop gwv_loop reduction (+:s1, s2))   \
>  for (i = 0; i < n; i++)  \
>{  \
> @@ -45,8 +45,10 @@ main (void)
>
>/* Nvptx targets require a vector_length or 32 in to allow spinlocks with
>   gangs.  */
> -  check_reduction (num_workers (nw) vector_length (vl), worker);
> -  /* { dg-warning "region is vector partitioned but does not contain vector 
> partitioned code" "test1" { target *-*-* } pragma_loc } */
> +  check_reduction (num_workers (nw) vector_length (vl), worker); /* { 
> dg-line check_reduction_loc }
> +  /* { dg-warning "22:region is vector partitioned but does not contain 
> vector partitioned code" "" { target *-*-* } pragma_loc }
> + { dg-note "1:in expansion of macro 'DO_PRAGMA'" "" { target *-*-* } 
> DO_PRAGMA_loc }
> + { dg-note "3:in expansion of macro 'check_reduction'" "" { target *-*-* 
> } check_reduction_loc } */

Oh my, PR101551 "[offloading] Differences in diagnostics etc."
strikes again...  The latter two 'note' diagnostics are currently
only emitted in non-offloading configurations.  I've now pushed to
master branch commit 3723aedaad20a129741c2f6f3c22b3dd1220a3fc
"XFAIL 'offloading_enabled' diagnostics issue in
'libgomp.oacc-c-c++-common/reduction-5.c' [PR101551]", see attached.


Grüße
 Thomas


-
Siemens Electronic Design Automation GmbH; Anschrift: Arnulfstraße 201, 80634 
München; Gesellschaft mit beschränkter Haftung; Geschäftsführer: Thomas 
Heurung, Frank Thürauf; Sitz der Gesellschaft: München; Registergericht 
München, HRB 106955
>From 3723aedaad20a129741c2f6f3c22b3dd1220a3fc Mon Sep 17 00:00:00 2001
From: Thomas Schwinge 
Date: Tue, 12 Jul 2022 08:17:37 +0200
Subject: [PATCH] XFAIL 'offloading_enabled' diagnostics issue in
 'libgomp.oacc-c-c++-common/reduction-5.c' [PR101551]

Fix-up for recent commit 06b2a2abe26554c6f9365676683d67368cbba206
"Enhance '_Pragma' diagnostics verification in OMP C/C++ test cases".
Supposedly it's the same issue as in
, where I'd
noted that:

| [...] with an offloading-enabled build of GCC we're losing
| "note: in expansion of macro '[...]'" diagnostics.
| (Effectively '-ftrack-macro-expansion=0'?)

	PR middle-end/101551
	libgomp/
	* testsuite/libgomp.oacc-c-c++-common/reduction-5.c: XFAIL
	'offloading_enabled' diagnostics issue.
---
 libgomp/testsuite/libgomp.oacc-c-c++-common/reduction-5.c | 7 ---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/reduction-5.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/reduction-5.c
index 72094609f0f..ddccfe89e73 100644
--- a/libgomp/testsuite/libgomp.oacc-c-c++-common/reduction-5.c
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/reduction-5.c
@@ -45,10 +45,11 @@ main (void)
 
   /* Nvptx targets require a vector_length or 32 in to allow spinlocks with
  gangs.  */
-  check_reduction (num_workers (nw) vector_length (vl), worker); /* { dg-line check_reduction_loc }
+  check_reduction (num_workers (nw) vector_length (vl), worker); /* { dg-line check_reduction_loc } */
   /* { dg-warning "22:region is vector partitioned but does not contain vector partitioned code" "" { target *-*-* } pragma_loc }
- { dg-note "1:in expansion of macro 'DO_PRAGMA'" "" { target *-*-* } DO_PRAGMA_loc }
- { dg-note "3:in expansion of macro 'check_reduction'" "" { target *-*-* } check_reduction_loc } */
+ { dg-note "1:in expansion of macro 'DO_PRAGMA'" "" { target *-*-* xfail offloading_enabled } DO_PRAGMA_loc }
+ { dg-note "3:in expansion of macro 'check_reduction'" "" { target *-*-* xfail offloading_enabled } check_reduction_loc }
+ TODO See PR101551 for 'offloading_enabled' XFAILs.  */
   check_reduction (vector_length (vl), vector);
   check_reduction (num_gangs (ng) num_workers (nw) vector_length (vl), gang
 		   worker vector);
-- 
2.35.1



Re: [PATCH] Allocate general register(memory/immediate) for 16/32/64-bit vector bit_op patterns.

2022-07-11 Thread Hongtao Liu via Gcc-patches
On Mon, Jul 11, 2022 at 4:03 PM Uros Bizjak via Gcc-patches
 wrote:
>
> On Mon, Jul 11, 2022 at 3:15 AM liuhongt  wrote:
> >
> > And split it to GPR-version instruction after reload.
> >
> > This will enable below optimization for 16/32/64-bit vector bit_op
> >
> > -   movd(%rdi), %xmm0
> > -   movd(%rsi), %xmm1
> > -   pand%xmm1, %xmm0
> > -   movd%xmm0, (%rdi)
> > +   movl(%rsi), %eax
> > +   andl%eax, (%rdi)
> >
> > Bootstrapped and regtested on x86_64-pc-linux-gnu{-m32,}.
> > Ok for trunk?
>
> The patch will create many interunit moves (xmm <-> gpr) for anything
> but the most simple logic sequences, because operations with
> memory/immediate will be forced into GPR registers, while reg/reg
> operations will remain in XMM registers.
Agree not to deal with mem/immediate at first.
>
> I tried to introduce GPR registers to MMX logic insns in the past and
> observed the above behavior, but perhaps RA evolved in the mean time
> to handle different register sets better (especially under register
> pressure). However, I would advise to be careful with this
> functionality.
>
> Perhaps this problem should be attacked in stages. First, please
> introduce GPR registers to MMX logic instructions (similar to how
> VI_16_32 mode instructions are handled). After RA effects will be
There's "?r" in VI_16_32 logic instructions which prevent RA allocate
gpr for testcase in the patch.
Is it ok to remove "?" for them(Also add alternative "r" instead of
"?r" in mmx logic insns)?
If there's other instructions that prefer "v to "r", then RA will
allocate "v", but for logic instructions, "r" and “v" should be
treated equally, just as in the 16/32/64-bit vector
mov_internal.
> analysed, only then memory/immediate handling should be added. Also,
> please don't forget to handle ANDNOT insn - TARGET_BMI slightly
> complicates this part, but this is also solved with VI_16_32 mode
> instructions.
>
> Uros.
>
> >
> > gcc/ChangeLog:
> >
> > PR target/106038
> > * config/i386/mmx.md (3): Expand
> > with (clobber (reg:CC flags_reg)) under TARGET_64BIT
> > (mmx_code>3): Ditto.
> > (*mmx_3_1): New define_insn, add post_reload
> > splitter after it.
> > (*3): New define_insn, also add post_reload
> > splitter after it.
> > (mmxinsnmode): New mode attribute.
> > (VI_16_32_64): New mode iterator.
> > (*mov_imm): Refactor with mmxinsnmode.
> > * config/i386/predicates.md
> > (nonimmediate_or_x86_64_vector_cst): New predicate.
> >
> > gcc/testsuite/ChangeLog:
> >
> > * gcc.target/i386/pr106038-1.c: New test.
> > * gcc.target/i386/pr106038-2.c: New test.
> > * gcc.target/i386/pr106038-3.c: New test.
> > ---
> >  gcc/config/i386/mmx.md | 131 +++--
> >  gcc/config/i386/predicates.md  |   4 +
> >  gcc/testsuite/gcc.target/i386/pr106038-1.c |  61 ++
> >  gcc/testsuite/gcc.target/i386/pr106038-2.c |  35 ++
> >  gcc/testsuite/gcc.target/i386/pr106038-3.c |  17 +++
> >  5 files changed, 213 insertions(+), 35 deletions(-)
> >  create mode 100644 gcc/testsuite/gcc.target/i386/pr106038-1.c
> >  create mode 100644 gcc/testsuite/gcc.target/i386/pr106038-2.c
> >  create mode 100644 gcc/testsuite/gcc.target/i386/pr106038-3.c
> >
> > diff --git a/gcc/config/i386/mmx.md b/gcc/config/i386/mmx.md
> > index 3294c1e6274..85b06abea27 100644
> > --- a/gcc/config/i386/mmx.md
> > +++ b/gcc/config/i386/mmx.md
> > @@ -75,6 +75,11 @@ (define_mode_iterator V_16_32_64
> >  (V8QI "TARGET_64BIT") (V4HI "TARGET_64BIT") (V4HF "TARGET_64BIT")
> >  (V2SI "TARGET_64BIT") (V2SF "TARGET_64BIT")])
> >
> > +(define_mode_iterator VI_16_32_64
> > +   [V2QI V4QI V2HI
> > +(V8QI "TARGET_64BIT") (V4HI "TARGET_64BIT")
> > +(V2SI "TARGET_64BIT")])
> > +
> >  ;; V2S* modes
> >  (define_mode_iterator V2FI [V2SF V2SI])
> >
> > @@ -86,6 +91,14 @@ (define_mode_attr mmxvecsize
> >[(V8QI "b") (V4QI "b") (V2QI "b")
> > (V4HI "w") (V2HI "w") (V2SI "d") (V1DI "q")])
> >
> > +;; Mapping to same size integral mode.
> > +(define_mode_attr mmxinsnmode
> > +  [(V8QI "DI") (V4QI "SI") (V2QI "HI")
> > +   (V4HI "DI") (V2HI "SI")
> > +   (V2SI "DI")
> > +   (V4HF "DI") (V2HF "SI")
> > +   (V2SF "DI")])
> > +
> >  (define_mode_attr mmxdoublemode
> >[(V8QI "V8HI") (V4HI "V4SI")])
> >
> > @@ -350,22 +363,7 @@ (define_insn_and_split "*mov_imm"
> >HOST_WIDE_INT val = ix86_convert_const_vector_to_integer (operands[1],
> > mode);
> >operands[1] = GEN_INT (val);
> > -  machine_mode mode;
> > -  switch (GET_MODE_SIZE (mode))
> > -{
> > -case 2:
> > -  mode = HImode;
> > -  break;
> > -case 4:
> > -  mode = SImode;
> > -  break;
> > -case 8:
> > -  mode = DImode;
> > -  break;
> > -default:
> > -  gcc_unreachable ();
> > -}
> > -  operands[

Re: Mips: Fix kernel_stat structure size

2022-07-11 Thread Dimitrije Milosevic
Hi Hans-Peter,
You're right, this is not ok for the O32 ABI. Your change however, broke the 
functionality
for the N32 ABI. AFAIK, the changes like this should go through LLVM first 
(yours didn't),
so I will send out a review, covering both 32 ABIs, meaning that both O32 and 
N32 ABIs 
will be working properly. As for this change, I'm not sure what should be done? 
Should this be committed now, while the LLVM change is cherry-picked once it's 
committed.
Best regards,
Dimitrije Milosevic


From: Hans-Peter Nilsson 
Sent: Saturday, July 9, 2022 4:44 PM
To: Xi Ruoyao 
Cc: Dimitrije Milosevic ; Djordje Todorovic 
; gcc-patches@gcc.gnu.org 

Subject: Re: Mips: Fix kernel_stat structure size 
 
On Sat, 9 Jul 2022, Xi Ruoyao wrote:

> On Fri, 2022-07-08 at 21:42 -0400, Hans-Peter Nilsson wrote:
> > On Fri, 1 Jul 2022, Dimitrije Milosevic wrote:
> >
> > > Fix kernel_stat structure size for non-Android 32-bit Mips.
> > > LLVM currently has this value for the kernel_stat structure size,
> > > as per compiler-rt/lib/sanitizer-
> > > common/sanitizer_platform_limits_posix.h.
> > > This also resolves one of the build issues for non-Android 32-bit
> > > Mips.
> >
> > I insist that PR105614 comment #7 is the way to go, i.e. fix
> > the merge error, avoiding the faulty include that it
> > reintroduced.? Was this tested on O32?
>
> I'm pretty sure it is *not* the way to go.
>
> Sanitizer does not really intercept system call.  It intercepts libc
> stat() or lstat() etc. calls.  So you need to keep struct_kernel_stat_sz
> same as the size of struct stat in libc, i. e. "the size of buffer which
> *libc* stat()-like functions writing into".
>
> The "kernel_" in the name is just misleading.

You make a sound argument, and I just refer to my old commit
9f943b2446f2d0a.  Either way, the proof is in the pussing: was
this tested for O32 or not?

> And, if you still think it should be the way to go, let's submit the
> change to LLVM and get it reviewed properly.

Sorry, I've no longer interest in mips besides I'd like to see
un-broken what I helped getting in a working state (support ASAN
in gcc for mips O32).

brgds, H-P

Re: [PATCH 1/2] Add gcc/make-unique.h

2022-07-11 Thread Jonathan Wakely via Gcc-patches
On Tue, 12 Jul 2022, 01:25 David Malcolm,  wrote:

> On Fri, 2022-07-08 at 22:16 +0100, Jonathan Wakely wrote:
> > On Fri, 8 Jul 2022 at 21:47, David Malcolm via Gcc 
> > wrote:
> > >
> > > std::unique_ptr is C++11, and I'd like to use it in the
> > > gcc/analyzer
> > > subdirectory, at least.  The following patch eliminates a bunch of
> > > "takes ownership" comments and manual "delete" invocations in favor
> > > of simply using std::unique_ptr.
> > >
> > > The problem is that the patch makes use of std::make_unique, but
> > > that
> > > was added in C++14.
> > >
> > > I've heard that it's reasonably easy to reimplement
> > > std::make_unique,
> > > but I'm not sure that my C++11 skills are up to it.
> >
> > You know we have an implementation of std::make_unique in GCC, with a
> > GCC-compatible licence that you can look at, right? :-)
> >
> > But it's not really necessary. There are only two reasons to prefer
> > make_unique over just allocating an object with new and constructing
> > a
> > unique_ptr from it:
> >
> > 1) avoid a "naked" new in your code (some coding styles like this,
> > but
> > it's not really important as long as the 'delete' is managed
> > automatically by unique_ptr).
> >
> > 2) exception-safety when allocating multiple objects as args to a
> > function, see https://herbsutter.com/gotw/_102/ for details.
> > Irrelevant for GCC, because we build without exceptions.
>
> [moving from gcc to gcc-patches mailing list]
>
> Also, I *think* it's a lot less typing, since I can write just:
>
>   std::make_unique (args)
>
> rather than
>
>   std::unique_ptr (new
> name_of_type_which_could_be_long (args));
>
> >
> >
> >
> > > Is there:
> > > (a) an easy way to implement a std::make_unique replacement
> > > (e.g. in system.h? what to call it?), or
> >
> > If you don't care about using it to create unique_ptr arrays,
> > it's trivial:
> >
> >   template
> > inline typename std::enable_if::value,
> > std::unique_ptr>::type
> > make_unique(Args&&... args)
> > { return std::unique_ptr(new T(std::forward(args)...));
> > }
> >
> > To add the overload that works for arrays is a little trickier.
>
> Thanks!
>
> I tried adding it to gcc/system.h, but anything that uses it needs to
> have std::unique_ptr declared, which meant forcibly including 
> from gcc/system.h
>
> So instead, here's a patch that adds a new gcc/make-unique.h header,
> containing just the template decl above (in the root namespace, rather
> than std::, which saves a bit more typing).
>

Adding things to std isn't allowed anyway, so that's correct.


> I've successfully bootstrapped®ression-tested a version of my earlier
> analyzer patch that uses this patch (see patch 2 of the kit, which has
> lots of usage examples).
>
> OK for trunk?
>
> Dave
>
>
>
> This patch adds gcc/make-unique.h, containing a minimal C++11
> implementation of make_unique (std::make_unique is C++14).
>
> gcc/ChangeLog:
> * make-unique.h: New file.
>
> Signed-off-by: David Malcolm 
> ---
>  gcc/make-unique.h | 41 +
>  1 file changed, 41 insertions(+)
>  create mode 100644 gcc/make-unique.h
>
> diff --git a/gcc/make-unique.h b/gcc/make-unique.h
> new file mode 100644
> index 000..c99c5328545
> --- /dev/null
> +++ b/gcc/make-unique.h
> @@ -0,0 +1,41 @@
> +/* Minimal implementation of make_unique for C++11 compatibility.
> +   Copyright (C) 2022 Free Software Foundation, Inc.
> +
> +This file is part of GCC.
> +
> +GCC is free software; you can redistribute it and/or modify it under
> +the terms of the GNU General Public License as published by the Free
> +Software Foundation; either version 3, or (at your option) any later
> +version.
> +
> +GCC is distributed in the hope that it will be useful, but WITHOUT ANY
> +WARRANTY; without even the implied warranty of MERCHANTABILITY or
> +FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
> +for more details.
> +
> +You should have received a copy of the GNU General Public License
> +along with GCC; see the file COPYING3.  If not see
> +.  */
> +
> +#ifndef GCC_MAKE_UNIQUE
> +#define GCC_MAKE_UNIQUE
> +
> +/* This header uses std::unique_ptr, but  can't be directly
> +   included due to issues with macros.  Hence it must be included from
> +   system.h by defining INCLUDE_MEMORY in any source file using it.  */
> +
> +#ifndef INCLUDE_MEMORY
> +# error "You must define INCLUDE_MEMORY before including system.h to use
> make-unique.h"
> +#endif
>

You also need  for the enable_if and is_array traits. With
libstdc++ that gets included by  but that's guaranteed for other
library implementations.

I don't know if that had the same kind of issues as other system headers or
if it can just be included here.


+
> +/* Minimal implementation of make_unique for C++11 compatibility
> +   (std::make_unique is C++14).  */
> +
> +template
> +inline typename std::enable_if::value,
> std::unique_ptr>::type
> +make_unique(A