[PATCH] varasm: Fix up constpool alias handling [PR99872]

2021-04-07 Thread Jakub Jelinek via Gcc-patches
Hi!

Last year, I have added in r11-2944-g0106300f6c3f7bae5eb1c46dbd45aa07c94e1b15
(aka PR54201 fix) code to find bitwise duplicates in constant pool and output
them as aliases instead of duplicating the data.

Unfortunately this broke mingw32 -m32.
On most targets, ASM_GENERATE_INTERNAL_LABEL with "LC" emits something like
*.LC123 and the targets don't add user label prefixes, so the aliases
that we print should be something like
.set.LC5, .LC6
or
.set.LC5, .LC6 + 8
and I wasn't sure if ASM_OUTPUT_DEF can handle the * and therefore I have
stripped it.
But, on mingw32 -m32, ASM_GENERATE_INTERNAL_LABEL with "LC" emits
*LC123 and the target has user label prefixes, which means what I wrote
results in
LC6:
...
.set_LC5, _LC6
which results in unresolved symbols.  I went through the ASM_OUTPUT_DEF
definitions of all targets and all of them use assemble_name twice under
the hood (with various differences on what they print before, in between or
after those names).  And assemble_name handles the name encoding properly,
so if we pass it ASM_OUTPUT_DEF (..., "*.LC123", "*.LC456+16") it will
emit .LC123 and .LC456+16 and if we pass it "*LC789", it will emit
LC789.

Bootstrapped/regtested on x86_64-linux and i686-linux and Jonathan said in
the PR he has tested it on mingw32.

Ok for trunk?

2021-04-07  Jakub Jelinek  

PR target/99872
* varasm.c (output_constant_pool_contents): Don't strip name encoding
from XSTR (desc->sym, 0) or from label before passing those to
ASM_OUTPUT_DEF.

--- gcc/varasm.c.jj 2021-02-19 12:14:11.082112515 +0100
+++ gcc/varasm.c2021-04-06 13:26:21.708116790 +0200
@@ -4290,13 +4290,13 @@ output_constant_pool_contents (struct rt
 if (desc->mark < 0)
   {
 #ifdef ASM_OUTPUT_DEF
-   const char *name = targetm.strip_name_encoding (XSTR (desc->sym, 0));
+   const char *name = XSTR (desc->sym, 0);
char label[256];
char buffer[256 + 32];
const char *p;
 
ASM_GENERATE_INTERNAL_LABEL (label, "LC", ~desc->mark);
-   p = targetm.strip_name_encoding (label);
+   p = label;
if (desc->offset)
  {
sprintf (buffer, "%s+%ld", p, (long) (desc->offset));

Jakub



[PATCH] Add debug_vn_reference_ops helper

2021-04-07 Thread Richard Biener
This factors out a helper to dump VN reference operands, sth that
proves useful in debugging VN issues.

Bootstrapped and tested on x86_64-unknown-linux-gnu, pushed.

2021-04-07  Richard Biener  

* tree-ssa-sccvn.h (print_vn_reference_ops): Declare.
* tree-ssa-pre.c (print_pre_expr): Factor out VN reference operand
printing...
* tree-ssa-sccvn.c (print_vn_reference_ops): ... into this new
function.
(debug_vn_reference_ops): New.
---
 gcc/tree-ssa-pre.c   | 39 +--
 gcc/tree-ssa-sccvn.c | 49 
 gcc/tree-ssa-sccvn.h |  1 +
 3 files changed, 51 insertions(+), 38 deletions(-)

diff --git a/gcc/tree-ssa-pre.c b/gcc/tree-ssa-pre.c
index 1a022fbaf8d..91dd49200c3 100644
--- a/gcc/tree-ssa-pre.c
+++ b/gcc/tree-ssa-pre.c
@@ -1067,45 +1067,8 @@ print_pre_expr (FILE *outfile, const pre_expr expr)
 
 case REFERENCE:
   {
-   vn_reference_op_t vro;
-   unsigned int i;
vn_reference_t ref = PRE_EXPR_REFERENCE (expr);
-   fprintf (outfile, "{");
-   for (i = 0;
-ref->operands.iterate (i, &vro);
-i++)
- {
-   bool closebrace = false;
-   if (vro->opcode != SSA_NAME
-   && TREE_CODE_CLASS (vro->opcode) != tcc_declaration)
- {
-   fprintf (outfile, "%s", get_tree_code_name (vro->opcode));
-   if (vro->op0)
- {
-   fprintf (outfile, "<");
-   closebrace = true;
- }
- }
-   if (vro->op0)
- {
-   print_generic_expr (outfile, vro->op0);
-   if (vro->op1)
- {
-   fprintf (outfile, ",");
-   print_generic_expr (outfile, vro->op1);
- }
-   if (vro->op2)
- {
-   fprintf (outfile, ",");
-   print_generic_expr (outfile, vro->op2);
- }
- }
-   if (closebrace)
-   fprintf (outfile, ">");
-   if (i != ref->operands.length () - 1)
- fprintf (outfile, ",");
- }
-   fprintf (outfile, "}");
+   print_vn_reference_ops (outfile, ref->operands);
if (ref->vuse)
  {
fprintf (outfile, "@");
diff --git a/gcc/tree-ssa-sccvn.c b/gcc/tree-ssa-sccvn.c
index 0567a2e9ff5..16e75b518b0 100644
--- a/gcc/tree-ssa-sccvn.c
+++ b/gcc/tree-ssa-sccvn.c
@@ -249,6 +249,55 @@ vn_reference_hasher::equal (const vn_reference_s *v, const 
vn_reference_s *c)
 typedef hash_table vn_reference_table_type;
 typedef vn_reference_table_type::iterator vn_reference_iterator_type;
 
+/* Pretty-print OPS to OUTFILE.  */
+
+void
+print_vn_reference_ops (FILE *outfile, const vec ops)
+{
+  vn_reference_op_t vro;
+  unsigned int i;
+  fprintf (outfile, "{");
+  for (i = 0; ops.iterate (i, &vro); i++)
+{
+  bool closebrace = false;
+  if (vro->opcode != SSA_NAME
+ && TREE_CODE_CLASS (vro->opcode) != tcc_declaration)
+   {
+ fprintf (outfile, "%s", get_tree_code_name (vro->opcode));
+ if (vro->op0)
+   {
+ fprintf (outfile, "<");
+ closebrace = true;
+   }
+   }
+  if (vro->op0)
+   {
+ print_generic_expr (outfile, vro->op0);
+ if (vro->op1)
+   {
+ fprintf (outfile, ",");
+ print_generic_expr (outfile, vro->op1);
+   }
+ if (vro->op2)
+   {
+ fprintf (outfile, ",");
+ print_generic_expr (outfile, vro->op2);
+   }
+   }
+  if (closebrace)
+   fprintf (outfile, ">");
+  if (i != ops.length () - 1)
+   fprintf (outfile, ",");
+}
+  fprintf (outfile, "}");
+}
+
+DEBUG_FUNCTION void
+debug_vn_reference_ops (const vec ops)
+{
+  print_vn_reference_ops (stderr, ops);
+  fputc ('\n', stderr);
+}
 
 /* The set of VN hashtables.  */
 
diff --git a/gcc/tree-ssa-sccvn.h b/gcc/tree-ssa-sccvn.h
index e4f1ff1eb20..6df526c269b 100644
--- a/gcc/tree-ssa-sccvn.h
+++ b/gcc/tree-ssa-sccvn.h
@@ -265,6 +265,7 @@ void vn_reference_lookup_call (gcall *, vn_reference_t *, 
vn_reference_t);
 vn_reference_t vn_reference_insert_pieces (tree, alias_set_type, 
alias_set_type,
   tree, vec,
   tree, unsigned int);
+void print_vn_reference_ops (FILE *, const vec);
 
 bool vn_nary_op_eq (const_vn_nary_op_t const vno1,
const_vn_nary_op_t const vno2);
-- 
2.26.2


[PATCH] tree-optimization/99947 - avoid v.safe_push (v[0])

2021-04-07 Thread Richard Biener
This avoids (again) the C++ pitfall of pushing a reference to
sth being reallocated.

Bootstrapped and tested on x86_64-unknown-linux-gnu, pushed.

2021-04-07  Richard Biener  

PR tree-optimization/99947
* tree-vect-loop.c (vectorizable_induction): Pre-allocate
steps vector to avoid pushing elements from the reallocated
vector.

* gcc.dg/torture/pr99947.c: New testcase.
---
 gcc/testsuite/gcc.dg/torture/pr99947.c | 18 ++
 gcc/tree-vect-loop.c   |  3 ++-
 2 files changed, 20 insertions(+), 1 deletion(-)
 create mode 100644 gcc/testsuite/gcc.dg/torture/pr99947.c

diff --git a/gcc/testsuite/gcc.dg/torture/pr99947.c 
b/gcc/testsuite/gcc.dg/torture/pr99947.c
new file mode 100644
index 000..2cf3ec6aeb8
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/torture/pr99947.c
@@ -0,0 +1,18 @@
+/* { dg-do compile } */
+
+int a, b, d, e;
+short c;
+void f() {
+  for (; e; e++) {
+int g = 6;
+for (; g > 2; g--) {
+  int i = -8;
+  while (i < 20) {
+i += 5;
+a += b;
+  }
+  c *= d;
+}
+b--;
+  }
+}
diff --git a/gcc/tree-vect-loop.c b/gcc/tree-vect-loop.c
index 4e928c65b31..93fa2928e00 100644
--- a/gcc/tree-vect-loop.c
+++ b/gcc/tree-vect-loop.c
@@ -8202,11 +8202,12 @@ vectorizable_induction (loop_vec_info loop_vinfo,
  /* Fill up to the number of vectors we need for the whole group.  */
  nivs = least_common_multiple (group_size,
const_nunits) / const_nunits;
+ vec_steps.reserve (nivs-ivn);
  for (; ivn < nivs; ++ivn)
{
  SLP_TREE_VEC_STMTS (slp_node)
.quick_push (SLP_TREE_VEC_STMTS (slp_node)[0]);
- vec_steps.safe_push (vec_steps[0]);
+ vec_steps.quick_push (vec_steps[0]);
}
}
 
-- 
2.26.2


[PATCH] arm: Don't try and vmov labels [PR99647]

2021-04-07 Thread Alex Coplan via Gcc-patches
Hi all,

When investigating this PR, I was initially confused as to why we were
matching a vec_duplicate on the RHS of *mve_mov (since
general_operand does not match vec_duplicates). It turns out that there
are two patterns in mve.md named *mve_mov and the second one
matches vec_duplicates. I've renamed this pattern to *mve_vdup to
avoid further confusion.

The issue in the PR is that the predicate for the operand of the
vec_duplicate in *mve_vdup is not strict enough: it allows (e.g.)
labels when we really only want to allow registers and const_ints.

Testing:
 * Bootstrapped and regtested on arm-linux-gnueabihf, no regressions.
 * Regtested an arm-eabi cross configured with --with-float=hard
 --with-arch=armv8.1-m.main+mve: no regressions. Also fixes a couple of
 existing torture test failures at -O3 for gcc.dg/torture/pr47958-1.c.

OK for trunk and eventual backport to the GCC 10 branch?

Thanks,
Alex

gcc/ChangeLog:

PR target/99647
* config/arm/mve.md (*mve_mov): Rename to...
(*mve_vdup): ... this. Also tighten up predicate for
vec_duplicate operand.

gcc/testsuite/ChangeLog:

PR target/99647
* gcc.c-torture/compile/pr99647.c: New test.
diff --git a/gcc/config/arm/mve.md b/gcc/config/arm/mve.md
index 135186371e7..d79b3156077 100644
--- a/gcc/config/arm/mve.md
+++ b/gcc/config/arm/mve.md
@@ -104,10 +104,10 @@ (define_insn "*mve_mov"
(set_attr "thumb2_pool_range" "*,*,*,*,1018,*,*,*,*")
(set_attr "neg_pool_range" "*,*,*,*,996,*,*,*,*")])
 
-(define_insn "*mve_mov"
+(define_insn "*mve_vdup"
   [(set (match_operand:MVE_types 0 "s_register_operand" "=w,w")
(vec_duplicate:MVE_types
- (match_operand:SI 1 "nonmemory_operand" "r,i")))]
+ (match_operand:SI 1 "reg_or_int_operand" "r,i")))]
   "TARGET_HAVE_MVE || TARGET_HAVE_MVE_FLOAT"
 {
   if (which_alternative == 0)
diff --git a/gcc/testsuite/gcc.c-torture/compile/pr99647.c 
b/gcc/testsuite/gcc.c-torture/compile/pr99647.c
new file mode 100644
index 000..701155dd856
--- /dev/null
+++ b/gcc/testsuite/gcc.c-torture/compile/pr99647.c
@@ -0,0 +1,5 @@
+/* { dg-do assemble } */
+typedef int __attribute((vector_size(16))) V;
+V f(void) {
+  return (V){ (int)f, (int)f, (int)f, (int)f };
+}


Re: vect: Don't split store groups if we have IFN_STORE_LANES [PR99873]

2021-04-07 Thread Richard Sandiford via Gcc-patches
Richard Biener via Gcc-patches  writes:
> On Tue, Apr 6, 2021 at 12:03 PM Richard Sandiford via Gcc-patches
>  wrote:
>>
>> As noted in the PR, we were no longer using ST3 for the testcase and
>> instead stored each lane individually.  This is because we'd split
>> the store group during SLP and couldn't recover when SLP failed.
>>
>> However, we seem to get better code with ST3 and ST4 even if
>> SLP would have succeeded, such as for vect-complex-5.c.
>> I think the best thing for GCC 11 would therefore be to skip
>> the split entirely if we could use IFN_STORE_LANES.  A downside
>> of this is that SLP can handle smaller iteration counts than
>> IFN_STORE_LANES can, but we don't have the infrastructure to
>> choose reliably based on that.
>>
>> Tested on aarch64-linux-gnu (with and without SVE), arm-linux-gnueabihf,
>> armeb-eabi and x86_64-linux-gnu.  OK to install?
>
> One of the cases where splitting helps is if you have say V2DFmode
> and a group size of 4 but if you break up the group into sizes of 2
> then you get two V2DFmode group size 2 SLP subgraphs.  So I wonder
> if, since you look for a vector type, want to only disable splitting
> in case the resulting vector type has the same number of lanes
> as the group size?  (and if not, instead limit where we consider splitting)

Hmm, yeah.  If we can apply SLP to full vectors within a loop iteration
then I agree it's still better to split.  I think the test should favour
ST3/ST4 more though: split only if one of the new group sizes is a
multiple of the vector size.  If the vector type has more lanes than the
group size then we should still use ST3/ST4.

How does this version look?  Tested as before.

Thanks,
Richard


gcc/
PR tree-optimization/99873
* tree-vect-slp.c (vect_slp_prefer_store_lanes_p): New function.
(vect_build_slp_instance): Don't split store groups that could
use IFN_STORE_LANES.

gcc/testsuite/
* gcc.dg/vect/slp-21.c: Only expect 2 of the loops to use SLP
if IFN_STORE_LANES is available.
* vect/vect-complex-5.c: Expect no loops to use SLP if
IFN_STORE_LANES is available.
* gcc.target/aarch64/pr99873_1.c: New test.
* gcc.target/aarch64/pr99873_2.c: Likewise.
* gcc.target/aarch64/pr99873_3.c: Likewise.
* gcc.target/aarch64/sve/pr99873_1.c: Likewise.
* gcc.target/aarch64/sve/pr99873_2.c: Likewise.
* gcc.target/aarch64/sve/pr99873_3.c: Likewise.
---
 gcc/testsuite/gcc.dg/vect/slp-21.c|  4 +--
 gcc/testsuite/gcc.dg/vect/vect-complex-5.c|  3 +-
 gcc/testsuite/gcc.target/aarch64/pr99873_1.c  | 17 ++
 gcc/testsuite/gcc.target/aarch64/pr99873_2.c  | 20 
 gcc/testsuite/gcc.target/aarch64/pr99873_3.c  | 20 
 .../gcc.target/aarch64/sve/pr99873_1.c| 15 +
 .../gcc.target/aarch64/sve/pr99873_2.c| 18 +++
 .../gcc.target/aarch64/sve/pr99873_3.c| 18 +++
 gcc/tree-vect-slp.c   | 31 ++-
 9 files changed, 142 insertions(+), 4 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/aarch64/pr99873_1.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/pr99873_2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/pr99873_3.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/pr99873_1.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/pr99873_2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/pr99873_3.c

diff --git a/gcc/tree-vect-slp.c b/gcc/tree-vect-slp.c
index ceec7f5c410..eadbd521966 100644
--- a/gcc/tree-vect-slp.c
+++ b/gcc/tree-vect-slp.c
@@ -2458,6 +2458,34 @@ vect_match_slp_patterns (slp_instance instance, vec_info 
*vinfo,
   return vect_match_slp_patterns_2 (ref_node, vinfo, perm_cache, visited);
 }
 
+/* STMT_INFO is a store group of size GROUP_SIZE that we are considering
+   splitting into two, with the first split group having size NEW_GROUP_SIZE.
+   Return true if we could use IFN_STORE_LANES instead and if that appears
+   to be the better approach.  */
+
+static bool
+vect_slp_prefer_store_lanes_p (vec_info *vinfo, stmt_vec_info stmt_info,
+  unsigned int group_size,
+  unsigned int new_group_size)
+{
+  tree scalar_type = TREE_TYPE (DR_REF (STMT_VINFO_DATA_REF (stmt_info)));
+  tree vectype = get_vectype_for_scalar_type (vinfo, scalar_type);
+  if (!vectype)
+return false;
+  /* Allow the split if one of the two new groups would operate on full
+ vectors *within* rather than across one scalar loop iteration.
+ This is purely a heuristic, but it should work well for group
+ sizes of 3 and 4, where the possible splits are:
+
+   3->2+1:  OK if the vector has exactly two elements
+   4->2+2:  Likewise
+   4->3+1:  Less clear-cut.  */
+  if (multiple_p (group_size - new_group_size, TYPE_VECTOR_SUBPARTS (vectype))
+  || multiple_p (new_group_size, TYPE_VECTOR_SUBPARTS (vectype)))

Re: [PATCH] Improve rtx insn vec output

2021-04-07 Thread Xionghu Luo via Gcc-patches



On 2021/4/7 14:57, Richard Biener wrote:

On Wed, Apr 7, 2021 at 7:42 AM Xionghu Luo  wrote:


print_rtl will dump the rtx_insn from current until LAST.  But it is only
useful to see the particular insn that called by print_rtx_insn_vec,
Let's call print_rtl_single to display that insn in the gcse and store-motion
pass dump.


Can you cite a before/after dump snippet to clarify?


Before the patch, pr24257.c.258r.store_motion dumps:

  Pattern (  1): (mem:SI (plus:DI (reg/v/f:DI 124 [ s ])
(const_int 4 [0x4])) [1 s_2(D)->left+0 S4 A32])
 ANTIC stores : {(insn 18 17 19 3 (set (mem:SI (plus:DI 
(reg/v/f:DI 124 [ s ])

(const_int 4 [0x4])) [1 s_2(D)->left+0 S4 A32])
(reg:SI 131)) 516 {*movsi_internal1}
 (expr_list:REG_DEAD (reg:SI 131)
(nil)))

(code_label 19 18 20 4 2 (nil) [1 uses])

(note 20 19 21 4 [bb 4] NOTE_INSN_BASIC_BLOCK)

(insn 21 20 22 4 (set (reg:SI 132)
(const_int 0 [0])) "pr24257.c":16:9 516 {*movsi_internal1}
 (nil))

(insn 22 21 0 4 (set (mem:SI (reg/v/f:DI 124 [ s ]) [1 s_2(D)->buf+0 S4 
A32])

(reg:SI 132)) "pr24257.c":16:9 516 {*movsi_internal1}
 (expr_list:REG_DEAD (reg:SI 132)
(expr_list:REG_DEAD (reg/v/f:DI 124 [ s ])
(nil
}
 AVAIL stores : {(insn 7 4 10 2 (set (mem:SI (plus:DI 
(reg/v/f:DI 124 [ s ])

(const_int 4 [0x4])) [1 s_2(D)->left+0 S4 A32])
(subreg/s/u:SI (reg/v:DI 125 [ n ]) 0)) "pr24257.c":23:11 516 
{*movsi_internal1}

 (nil))

(insn 10 7 11 2 (set (reg:CC 126)
(compare:CC (subreg/s/u:SI (reg/v:DI 125 [ n ]) 0)
(const_int 31 [0x1f]))) "pr24257.c":10:18 751 {*cmpsi_signed}
 (nil))

(jump_insn 11 10 12 2 (set (pc)
(if_then_else (gt (reg:CC 126)
(const_int 0 [0]))
(label_ref 19)
(pc))) "pr24257.c":10:18 834 {*cbranch}
 (expr_list:REG_DEAD (reg:CC 126)
(int_list:REG_BR_PROB 118111604 (nil)))
 -> 19)

(note 12 11 13 3 [bb 3] NOTE_INSN_BASIC_BLOCK)

(insn 13 12 14 3 (parallel [
(set (reg:SI 127)
(minus:SI (const_int 31 [0x1f])
(subreg/u:SI (reg/v:DI 125 [ n ]) 0)))
(clobber (reg:SI 98 ca))
]) "pr24257.c":12:15 100 {subfsi3_imm}
 (expr_list:REG_UNUSED (reg:SI 98 ca)
(nil)))
(insn 14 13 15 3 (set (reg:SI 128)
(lshiftrt:SI (reg:SI 127)
(const_int 3 [0x3]))) "pr24257.c":12:15 273 {lshrsi3}
 (expr_list:REG_DEAD (reg:SI 127)
(nil)))

(insn 15 14 16 3 (set (reg:SI 129)
(ashift:SI (reg:SI 128)
(const_int 3 [0x3]))) "pr24257.c":12:15 263 {ashlsi3}
 (expr_list:REG_DEAD (reg:SI 128)
(nil)))

(insn 16 15 17 3 (set (reg:SI 130)
(plus:SI (subreg/s/u:SI (reg/v:DI 125 [ n ]) 0)
(const_int 8 [0x8]))) "pr24257.c":12:15 65 {*addsi3}
 (expr_list:REG_DEAD (reg/v:DI 125 [ n ])
(nil)))

(insn 17 16 18 3 (set (reg:SI 131)
(plus:SI (reg:SI 129)
(reg:SI 130))) "pr24257.c":12:15 65 {*addsi3}
 (expr_list:REG_DEAD (reg:SI 130)
(expr_list:REG_DEAD (reg:SI 129)
(nil

(insn 18 17 19 3 (set (mem:SI (plus:DI (reg/v/f:DI 124 [ s ])
(const_int 4 [0x4])) [1 s_2(D)->left+0 S4 A32])
(reg:SI 131)) 516 {*movsi_internal1}
 (expr_list:REG_DEAD (reg:SI 131)
(nil)))

(code_label 19 18 20 4 2 (nil) [1 uses])

(note 20 19 21 4 [bb 4] NOTE_INSN_BASIC_BLOCK)

(insn 21 20 22 4 (set (reg:SI 132)
(const_int 0 [0])) "pr24257.c":16:9 516 {*movsi_internal1}
 (nil))

(insn 22 21 0 4 (set (mem:SI (reg/v/f:DI 124 [ s ]) [1 s_2(D)->buf+0 S4 
A32])

(reg:SI 132)) "pr24257.c":16:9 516 {*movsi_internal1}
 (expr_list:REG_DEAD (reg:SI 132)
(expr_list:REG_DEAD (reg/v/f:DI 124 [ s ])
(nil
, (insn 18 17 19 3 (set (mem:SI (plus:DI (reg/v/f:DI 124 [ s ])
(const_int 4 [0x4])) [1 s_2(D)->left+0 S4 A32])
(reg:SI 131)) 516 {*movsi_internal1}
 (expr_list:REG_DEAD (reg:SI 131)
(nil)))

(code_label 19 18 20 4 2 (nil) [1 uses])

(note 20 19 21 4 [bb 4] NOTE_INSN_BASIC_BLOCK)

(insn 21 20 22 4 (set (reg:SI 132)
(const_int 0 [0])) "pr24257.c":16:9 516 {*movsi_internal1}
 (nil))

(insn 22 21 0 4 (set (mem:SI (reg/v/f:DI 124 [ s ]) [1 s_2(D)->buf+0 S4 
A32])

(reg:SI 132)) "pr24257.c":16:9 516 {*movsi_internal1}
 (expr_list:REG_DEAD (reg:SI 132)
(expr_list:REG_DEAD (reg/v/f:DI 124 [ s ])
(nil
}



With this patch, it only dumps instructions we care about for
ANTIC stores and AVAIL stores:



  Pattern (  1): (mem:SI (plus:DI (reg/v/f:DI 124 [ s ])
(const_int 4 [0x4])) [1 s_2(D)->left+0 S4 A32])
 ANTIC stores : {(insn 18 17 19 3 (set (mem:SI (plus:DI 
(reg/v/f:DI 124 [ s ])

(const_int 4 [0x4])) [1 s_2(D)->left+0 S4 A32])
(reg:SI 131)) 516 {*movsi_internal1}
 (expr_list:REG_DEAD (reg:SI 131)
(nil)))
}
 

Re: [PATCH] arm: Don't try and vmov labels [PR99647]

2021-04-07 Thread Richard Sandiford via Gcc-patches
Alex Coplan via Gcc-patches  writes:
> Hi all,
>
> When investigating this PR, I was initially confused as to why we were
> matching a vec_duplicate on the RHS of *mve_mov (since
> general_operand does not match vec_duplicates). It turns out that there
> are two patterns in mve.md named *mve_mov and the second one
> matches vec_duplicates. I've renamed this pattern to *mve_vdup to
> avoid further confusion.
>
> The issue in the PR is that the predicate for the operand of the
> vec_duplicate in *mve_vdup is not strict enough: it allows (e.g.)
> labels when we really only want to allow registers and const_ints.
>
> Testing:
>  * Bootstrapped and regtested on arm-linux-gnueabihf, no regressions.
>  * Regtested an arm-eabi cross configured with --with-float=hard
>  --with-arch=armv8.1-m.main+mve: no regressions. Also fixes a couple of
>  existing torture test failures at -O3 for gcc.dg/torture/pr47958-1.c.
>
> OK for trunk and eventual backport to the GCC 10 branch?
>
> Thanks,
> Alex
>
> gcc/ChangeLog:
>
>   PR target/99647
>   * config/arm/mve.md (*mve_mov): Rename to...
>   (*mve_vdup): ... this. Also tighten up predicate for
>   vec_duplicate operand.
>
> gcc/testsuite/ChangeLog:
>
>   PR target/99647
>   * gcc.c-torture/compile/pr99647.c: New test.
>
> diff --git a/gcc/config/arm/mve.md b/gcc/config/arm/mve.md
> index 135186371e7..d79b3156077 100644
> --- a/gcc/config/arm/mve.md
> +++ b/gcc/config/arm/mve.md
> @@ -104,10 +104,10 @@ (define_insn "*mve_mov"
> (set_attr "thumb2_pool_range" "*,*,*,*,1018,*,*,*,*")
> (set_attr "neg_pool_range" "*,*,*,*,996,*,*,*,*")])
>  
> -(define_insn "*mve_mov"
> +(define_insn "*mve_vdup"
>[(set (match_operand:MVE_types 0 "s_register_operand" "=w,w")
>   (vec_duplicate:MVE_types
> -   (match_operand:SI 1 "nonmemory_operand" "r,i")))]
> +   (match_operand:SI 1 "reg_or_int_operand" "r,i")))]

When an operand accepts registers, having a predicate that's too lenient
can be bad for optimisation (and so is worth fixing for that reason),
but it shouldn't affect correctness.  I think the bug here is really
with the "i" constraint.  When constant and non-constant alternatives
are combined like this, the constraints have to be precise about which
constants they accept.  (When all alternatives are constant, there is
nothing to reload, and so checking in the predicate is enough.)

As it stands, if the RA sees:

  (set (reg:SI R1) (symbol_ref:SI …)); only assignment to R1
  ...
  (set (reg:V4SI R2) (vec_duplicate:V4SI (reg:SI R1)))

and can't allocate a register to R1, it can try to rematerialise R1
directly inside the vec_duplicate:

  (set (reg:V4SI R2) (vec_duplicate:V4SI (symbol_ref:SI …)))

This will match the "i" constraint and so appear to be OK.  We'll probably
later get an ICE because the symbol_ref doesn't match the predicate
(which is something that the RA isn't required to check).

In practice, the MVE instruction doesn't accept all const_ints either,
so there needs to be a range check of some kind.

However, (vec_duplicate (const_int …)) isn't canonical RTL: it should be
a (const_vector …) instead.  Those const_vectors should already be going
through the “real” move patterns.

I think the easiest fix would therefore be to remove the second alternative
and make this a register-only pattern.

There's a later:

(define_insn "*mve_vec_duplicate"

which I think would be worth removing as well, since it provides a subset
of the same functionality.  However, that pattern is correct in using
 as the mode of the element, whereas above we use SI for all modes.

Thanks,
Richard


RE: [PATCH] arm: Don't try and vmov labels [PR99647]

2021-04-07 Thread Kyrylo Tkachov via Gcc-patches
Hi Alex,

> -Original Message-
> From: Alex Coplan 
> Sent: 07 April 2021 09:56
> To: gcc-patches@gcc.gnu.org
> Cc: ni...@redhat.com; Richard Earnshaw ;
> Ramana Radhakrishnan ; Kyrylo
> Tkachov 
> Subject: [PATCH] arm: Don't try and vmov labels [PR99647]
> 
> Hi all,
> 
> When investigating this PR, I was initially confused as to why we were
> matching a vec_duplicate on the RHS of *mve_mov (since
> general_operand does not match vec_duplicates). It turns out that there
> are two patterns in mve.md named *mve_mov and the second one
> matches vec_duplicates. I've renamed this pattern to *mve_vdup to
> avoid further confusion.
> 
> The issue in the PR is that the predicate for the operand of the
> vec_duplicate in *mve_vdup is not strict enough: it allows (e.g.)
> labels when we really only want to allow registers and const_ints.
> 
> Testing:
>  * Bootstrapped and regtested on arm-linux-gnueabihf, no regressions.
>  * Regtested an arm-eabi cross configured with --with-float=hard
>  --with-arch=armv8.1-m.main+mve: no regressions. Also fixes a couple of
>  existing torture test failures at -O3 for gcc.dg/torture/pr47958-1.c.
> 
> OK for trunk and eventual backport to the GCC 10 branch?

Ok for trunk.
I think GCC 10.3 is due for release today or tomorrow, so it'll likely have to 
wait for GCC 10.4 backport.
Thanks,
Kyrill

> 
> Thanks,
> Alex
> 
> gcc/ChangeLog:
> 
>   PR target/99647
>   * config/arm/mve.md (*mve_mov): Rename to...
>   (*mve_vdup): ... this. Also tighten up predicate for
>   vec_duplicate operand.
> 
> gcc/testsuite/ChangeLog:
> 
>   PR target/99647
>   * gcc.c-torture/compile/pr99647.c: New test.


[PATCH] testsuite/99955 - fix may_alias declaration of vector

2021-04-07 Thread Richard Biener
This fixes the order of the type attributes to preserve may_alias
for the vector type.

Pushed.

2021-04-07  Richard Biener  

PR testsuite/99955
* gcc.c-torture/execute/pr92618.c: Move may_alias attributes
last.
---
 gcc/testsuite/gcc.c-torture/execute/pr92618.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/gcc/testsuite/gcc.c-torture/execute/pr92618.c 
b/gcc/testsuite/gcc.c-torture/execute/pr92618.c
index 2a5e565559f..88f2f68a20b 100644
--- a/gcc/testsuite/gcc.c-torture/execute/pr92618.c
+++ b/gcc/testsuite/gcc.c-torture/execute/pr92618.c
@@ -1,6 +1,6 @@
 /* PR tree-optimization/92618 */
 
-typedef long long __m128i __attribute__((__may_alias__, __vector_size__(2 * 
sizeof (long long;
+typedef long long __m128i __attribute__((__vector_size__(2 * sizeof (long 
long)),__may_alias__));
 
 double a[4];
 unsigned long long b[4];
@@ -14,7 +14,7 @@ bar (void)
 }
 
 #if __SIZEOF_LONG_LONG__ == __SIZEOF_DOUBLE__
-typedef double __m128d __attribute__((__may_alias__, __vector_size__(2 * 
sizeof (double;
+typedef double __m128d __attribute__((__vector_size__(2 * sizeof 
(double)),__may_alias__));
 
 __attribute__((noipa)) __m128i
 qux (void)
-- 
2.26.2


[PATCH] tree-optimization/99954 - fix loop distribution memcpy classification

2021-04-07 Thread Richard Biener
This fixes bogus classification of a copy as memcpy.  We cannot use
plain dependence analysis to decide between memcpy and memmove when
it computes no dependence.  Instead we have to try harder later which
the patch does for the gcc.dg/tree-ssa/ldist-24.c testcase by resorting
to tree-affine to compute the difference between src and dest and
compare against the copy size.


Bootstrapped and tested on x86_64-unknown-linux-gnu, pushed.

2021-04-07  Richard Biener  

PR tree-optimization/99954
* tree-loop-distribution.c: Include tree-affine.h.
(generate_memcpy_builtin): Try using tree-affine to prove
non-overlap.
(loop_distribution::classify_builtin_ldst): Always classify
as PKIND_MEMMOVE.

* gcc.dg/torture/pr99954.c: New testcase.
---
 gcc/testsuite/gcc.dg/torture/pr99954.c | 30 ++
 gcc/tree-loop-distribution.c   | 17 +--
 2 files changed, 45 insertions(+), 2 deletions(-)
 create mode 100644 gcc/testsuite/gcc.dg/torture/pr99954.c

diff --git a/gcc/testsuite/gcc.dg/torture/pr99954.c 
b/gcc/testsuite/gcc.dg/torture/pr99954.c
new file mode 100644
index 000..7d447035912
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/torture/pr99954.c
@@ -0,0 +1,30 @@
+/* { dg-do run } */
+
+#include 
+
+#define CONTAINER_KIND union
+
+typedef CONTAINER_KIND container { int value; } container;
+
+void move(container* end, container* start) {
+container* p;
+for (p = end; p > start; p--) {
+   (p)->value = (p-1)->value;
+}
+}
+
+#define N 100
+
+int main(int argc, char* argv[]) {
+container vals[N];
+int i;
+for (i=0; ibuiltin = alloc_builtin (dst_dr, src_dr, base, src_base, 
size);
-  partition->kind = PKIND_MEMCPY;
+  partition->kind = PKIND_MEMMOVE;
   return;
 }
 
-- 
2.26.2


[PATCH][GCC12] tree-optimization/99912 - schedule DSE before SRA

2021-04-07 Thread Richard Biener
For the testcase in the PR the main SRA pass is unable to do some
important scalarizations because dead stores of addresses make
the candiate variables disqualified.  The following patch adds
another DSE pass before SRA forming a DCE/DSE pair and moves the
DSE pass that is currently closely after SRA up to after the
next DCE pass, forming another DCE/DSE pair now residing after PRE.

This currently FAILs

FAIL: gfortran.dg/pr81303.f   -O   scan-tree-dump-times linterchange "is interch
anged" 1
FAIL: gfortran.dg/vect/pr81303.f   -O   scan-tree-dump-times linterchange "is in
terchanged" 1

where I filed PR99956 for, loop interchange needs some improvement here.

Bootstrapped and tested on x86_64-unknown-linux-gnu, queued for GCC 12.

2021-04-07  Richard Biener  

PR tree-optimization/99912
* passes.def (pass_all_optimizations): Add pass_dse before
the first pass_dce, move the first pass_dse before the
pass_dce following pass_pre.

* gcc.dg/tree-ssa/ldist-33.c: Disable PRE and LIM.
* gcc.dg/tree-ssa/pr96789.c: Adjust dump file scanned.
* gcc.dg/tree-ssa/ssa-dse-28.c: Likewise.
* gcc.dg/tree-ssa/ssa-dse-29.c: Likewise.
---
 gcc/passes.def | 3 ++-
 gcc/testsuite/gcc.dg/tree-ssa/ldist-33.c   | 5 -
 gcc/testsuite/gcc.dg/tree-ssa/pr96789.c| 2 +-
 gcc/testsuite/gcc.dg/tree-ssa/ssa-dse-28.c | 3 ++-
 gcc/testsuite/gcc.dg/tree-ssa/ssa-dse-29.c | 3 ++-
 5 files changed, 11 insertions(+), 5 deletions(-)

diff --git a/gcc/passes.def b/gcc/passes.def
index e9ed3c7bc57..61fe9fdc42c 100644
--- a/gcc/passes.def
+++ b/gcc/passes.def
@@ -210,6 +210,7 @@ along with GCC; see the file COPYING3.  If not see
   NEXT_PASS (pass_merge_phi);
   NEXT_PASS (pass_thread_jumps);
   NEXT_PASS (pass_vrp, true /* warn_array_bounds_p */);
+  NEXT_PASS (pass_dse);
   NEXT_PASS (pass_dce);
   NEXT_PASS (pass_stdarg);
   NEXT_PASS (pass_call_cdce);
@@ -236,7 +237,6 @@ along with GCC; see the file COPYING3.  If not see
   /* Identify paths that should never be executed in a conforming
 program and isolate those paths.  */
   NEXT_PASS (pass_isolate_erroneous_paths);
-  NEXT_PASS (pass_dse);
   NEXT_PASS (pass_reassoc, true /* insert_powi_p */);
   NEXT_PASS (pass_dce);
   NEXT_PASS (pass_forwprop);
@@ -254,6 +254,7 @@ along with GCC; see the file COPYING3.  If not see
   NEXT_PASS (pass_sancov);
   NEXT_PASS (pass_asan);
   NEXT_PASS (pass_tsan);
+  NEXT_PASS (pass_dse);
   NEXT_PASS (pass_dce);
   /* Pass group that runs when 1) enabled, 2) there are loops
 in the function.  Make sure to run pass_fix_loops before
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ldist-33.c 
b/gcc/testsuite/gcc.dg/tree-ssa/ldist-33.c
index 9e0cedf4fa3..67846a5b183 100644
--- a/gcc/testsuite/gcc.dg/tree-ssa/ldist-33.c
+++ b/gcc/testsuite/gcc.dg/tree-ssa/ldist-33.c
@@ -1,5 +1,8 @@
 /* { dg-do compile { target size32plus } } */
-/* { dg-options "-O2 -ftree-loop-distribution -ftree-loop-distribute-patterns 
-fdump-tree-ldist-details" } */
+/* The desire is to show we can generate a memset from the outer loop
+   store.  Both store motion and PRE expose a DSE opportunity for this
+   zeroing - while desirable this defeats the purpose of this testcase.  */
+/* { dg-options "-O2 -fno-tree-loop-im -fno-tree-pre -ftree-loop-distribution 
-ftree-loop-distribute-patterns -fdump-tree-ldist-details" } */
 
 #define N (1024)
 double a[N][N], b[N][N], c[N][N];
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr96789.c 
b/gcc/testsuite/gcc.dg/tree-ssa/pr96789.c
index 5704952309b..4d4d4c8ac6f 100644
--- a/gcc/testsuite/gcc.dg/tree-ssa/pr96789.c
+++ b/gcc/testsuite/gcc.dg/tree-ssa/pr96789.c
@@ -58,4 +58,4 @@ bar (int16_t res[16], uint8_t *val1, uint8_t *val2)
 }
 }
 
-/* { dg-final { scan-tree-dump {Deleted dead store:.*tmp} "dse3" } } */
+/* { dg-final { scan-tree-dump {Deleted dead store:.*tmp} "dse4" } } */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ssa-dse-28.c 
b/gcc/testsuite/gcc.dg/tree-ssa/ssa-dse-28.c
index b81cabe604a..3bf4e76c965 100644
--- a/gcc/testsuite/gcc.dg/tree-ssa/ssa-dse-28.c
+++ b/gcc/testsuite/gcc.dg/tree-ssa/ssa-dse-28.c
@@ -17,5 +17,6 @@ int foo (int *p, int b)
 
 /* { dg-final { scan-tree-dump-not "Deleted dead store" "dse1"} } */
 /* { dg-final { scan-tree-dump-not "Deleted dead store" "dse2"} } */
-/* { dg-final { scan-tree-dump-not "Deleted dead store" "dse4"} } */
+/* { dg-final { scan-tree-dump-not "Deleted dead store" "dse3"} } */
+/* { dg-final { scan-tree-dump-not "Deleted dead store" "dse5"} } */
 
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ssa-dse-29.c 
b/gcc/testsuite/gcc.dg/tree-ssa/ssa-dse-29.c
index f4ef89c590c..4990ae0d18c 100644
--- a/gcc/testsuite/gcc.dg/tree-ssa/ssa-dse-29.c
+++ b/gcc/testsuite/gcc.dg/tree-ssa/ssa-dse-29.c
@@ -22,5 +22,6 @@ foo(int cond, struct z *s)
 
 /* { dg-final { scan-tree-dump-times "Deleted dead store" 3 "dse1"} } */
 /* { dg-final { 

[PATCH v2] arm: Various MVE vec_duplicate fixes [PR99647]

2021-04-07 Thread Alex Coplan via Gcc-patches
Hi,

Here is a v2 of my previous attempt:
https://gcc.gnu.org/pipermail/gcc-patches/2021-April/567724.html
to fix this PR.

---

This patch fixes various issues with vec_duplicate in the MVE patterns.
Currently there are two patterns named *mve_mov. The second of
these is really a vector duplicate rather than a move, so I've renamed
it accordingly.

As it stands, there are several issues with this pattern:
1. The MVE_types iterator has an entry for TImode, but
   vec_duplicate:TI is invalid.
2. The mode of the operand to vec_duplicate is SImode, but it should
   vary according to the vector mode iterator.
3. The second alternative of this pattern is bogus: it allows matching
   symbol_refs (the cause of the PR) and const_ints (which means that it
   matches (vec_duplicate (const_int ...)) which is non-canonical: such
   rtxes should be const_vectors instead and handled by the main vector
   move pattern).

This patch fixes all of these issues, and removes the redundant
*mve_vec_duplicate pattern.

Testing:
 * Bootstrapped and regtested on arm-linux-gnueabihf, no regressions.
 * Tested an arm-eabi cross configured with --with-float=hard
   --with-arch=armv8.1-m.main+mve, no regressions.

OK for trunk and eventual backport to GCC 10?

Thanks,
Alex

gcc/ChangeLog:

PR target/99647
* config/arm/iterators.md (MVE_vecs): New.
(V_elem): Also handle V2DF.
* config/arm/mve.md (*mve_mov): Rename to ...
(*mve_vdup): ... this. Remove second alternative since
vec_duplicate of const_int is not canonical RTL, and we don't
want to match symbol_refs. Fix up modes.
(*mve_vec_duplicate): Delete (pattern is redundant).

gcc/testsuite/ChangeLog:

PR target/99647
* gcc.c-torture/compile/pr99647.c: New test.
diff --git a/gcc/config/arm/iterators.md b/gcc/config/arm/iterators.md
index 43aab2346c4..8fb723e65cd 100644
--- a/gcc/config/arm/iterators.md
+++ b/gcc/config/arm/iterators.md
@@ -261,6 +261,7 @@ (define_mode_iterator VBFCVTM [V2SI SF])
 
 ;; MVE mode iterator.
 (define_mode_iterator MVE_types [V16QI V8HI V4SI V2DI TI V8HF V4SF V2DF])
+(define_mode_iterator MVE_vecs [V16QI V8HI V4SI V2DI V8HF V4SF V2DF])
 (define_mode_iterator MVE_VLD_ST [V16QI V8HI V4SI V8HF V4SF])
 (define_mode_iterator MVE_0 [V8HF V4SF])
 (define_mode_iterator MVE_1 [V16QI V8HI V4SI V2DI])
@@ -567,9 +568,10 @@ (define_mode_attr V_elem [(V8QI "QI") (V16QI "QI")
  (V4HI "HI") (V8HI "HI")
  (V4HF "HF") (V8HF "HF")
  (V4BF "BF") (V8BF "BF")
-  (V2SI "SI") (V4SI "SI")
-  (V2SF "SF") (V4SF "SF")
-  (DI "DI")   (V2DI "DI")])
+ (V2SI "SI") (V4SI "SI")
+ (V2SF "SF") (V4SF "SF")
+ (DI   "DI") (V2DI "DI")
+ (V2DF "DF")])
 
 ;; As above but in lower case.
 (define_mode_attr V_elem_l [(V8QI "qi") (V16QI "qi")
diff --git a/gcc/config/arm/mve.md b/gcc/config/arm/mve.md
index 135186371e7..7467d5f4d57 100644
--- a/gcc/config/arm/mve.md
+++ b/gcc/config/arm/mve.md
@@ -104,18 +104,14 @@ (define_insn "*mve_mov"
(set_attr "thumb2_pool_range" "*,*,*,*,1018,*,*,*,*")
(set_attr "neg_pool_range" "*,*,*,*,996,*,*,*,*")])
 
-(define_insn "*mve_mov"
-  [(set (match_operand:MVE_types 0 "s_register_operand" "=w,w")
-   (vec_duplicate:MVE_types
- (match_operand:SI 1 "nonmemory_operand" "r,i")))]
+(define_insn "*mve_vdup"
+  [(set (match_operand:MVE_vecs 0 "s_register_operand" "=w")
+   (vec_duplicate:MVE_vecs
+ (match_operand: 1 "s_register_operand" "r")))]
   "TARGET_HAVE_MVE || TARGET_HAVE_MVE_FLOAT"
-{
-  if (which_alternative == 0)
-return "vdup.\t%q0, %1";
-  return "vmov.\t%q0, %1";
-}
-  [(set_attr "length" "4,4")
-   (set_attr "type" "mve_move,mve_move")])
+  "vdup.\t%q0, %1"
+  [(set_attr "length" "4")
+   (set_attr "type" "mve_move")])
 
 ;;
 ;; [vst4q])
@@ -10737,13 +10733,6 @@ (define_insn "mve_vshlcq_m_"
  [(set_attr "type" "mve_move")
   (set_attr "length" "8")])
 
-(define_insn "*mve_vec_duplicate"
- [(set (match_operand:MVE_VLD_ST 0 "s_register_operand" "=w")
-   (vec_duplicate:MVE_VLD_ST (match_operand: 1 "general_operand" 
"r")))]
- "TARGET_HAVE_MVE || TARGET_HAVE_MVE_FLOAT"
- "vdup.\t%q0, %1"
- [(set_attr "type" "mve_move")])
-
 ;; CDE instructions on MVE registers.
 
 (define_insn "arm_vcx1qv16qi"
diff --git a/gcc/testsuite/gcc.c-torture/compile/pr99647.c 
b/gcc/testsuite/gcc.c-torture/compile/pr99647.c
new file mode 100644
index 000..701155dd856
--- /dev/null
+++ b/gcc/testsuite/gcc.c-torture/compile/pr99647.c
@@ -0,0 +1,5 @@
+/* { dg-do assemble } */
+typedef int __attribute((vector_size(16))) V;
+V f(void) {
+  return (V){ (int)f, (int)f, (int)f, (int)f };
+}


Re: [PATCH] varasm: Fix up constpool alias handling [PR99872]

2021-04-07 Thread Richard Biener
On Wed, 7 Apr 2021, Jakub Jelinek wrote:

> Hi!
> 
> Last year, I have added in r11-2944-g0106300f6c3f7bae5eb1c46dbd45aa07c94e1b15
> (aka PR54201 fix) code to find bitwise duplicates in constant pool and output
> them as aliases instead of duplicating the data.
> 
> Unfortunately this broke mingw32 -m32.
> On most targets, ASM_GENERATE_INTERNAL_LABEL with "LC" emits something like
> *.LC123 and the targets don't add user label prefixes, so the aliases
> that we print should be something like
>   .set.LC5, .LC6
> or
>   .set.LC5, .LC6 + 8
> and I wasn't sure if ASM_OUTPUT_DEF can handle the * and therefore I have
> stripped it.
> But, on mingw32 -m32, ASM_GENERATE_INTERNAL_LABEL with "LC" emits
> *LC123 and the target has user label prefixes, which means what I wrote
> results in
> LC6:
>   ...
>   .set_LC5, _LC6
> which results in unresolved symbols.  I went through the ASM_OUTPUT_DEF
> definitions of all targets and all of them use assemble_name twice under
> the hood (with various differences on what they print before, in between or
> after those names).  And assemble_name handles the name encoding properly,
> so if we pass it ASM_OUTPUT_DEF (..., "*.LC123", "*.LC456+16") it will
> emit .LC123 and .LC456+16 and if we pass it "*LC789", it will emit
> LC789.
> 
> Bootstrapped/regtested on x86_64-linux and i686-linux and Jonathan said in
> the PR he has tested it on mingw32.
> 
> Ok for trunk?

OK.

Thanks.
Richard.

> 2021-04-07  Jakub Jelinek  
> 
>   PR target/99872
>   * varasm.c (output_constant_pool_contents): Don't strip name encoding
>   from XSTR (desc->sym, 0) or from label before passing those to
>   ASM_OUTPUT_DEF.
> 
> --- gcc/varasm.c.jj   2021-02-19 12:14:11.082112515 +0100
> +++ gcc/varasm.c  2021-04-06 13:26:21.708116790 +0200
> @@ -4290,13 +4290,13 @@ output_constant_pool_contents (struct rt
>  if (desc->mark < 0)
>{
>  #ifdef ASM_OUTPUT_DEF
> - const char *name = targetm.strip_name_encoding (XSTR (desc->sym, 0));
> + const char *name = XSTR (desc->sym, 0);
>   char label[256];
>   char buffer[256 + 32];
>   const char *p;
>  
>   ASM_GENERATE_INTERNAL_LABEL (label, "LC", ~desc->mark);
> - p = targetm.strip_name_encoding (label);
> + p = label;
>   if (desc->offset)
> {
>   sprintf (buffer, "%s+%ld", p, (long) (desc->offset));
> 
>   Jakub
> 
> 

-- 
Richard Biener 
SUSE Software Solutions Germany GmbH, Maxfeldstrasse 5, 90409 Nuernberg,
Germany; GF: Felix Imendörffer; HRB 36809 (AG Nuernberg)


Re: vect: Don't split store groups if we have IFN_STORE_LANES [PR99873]

2021-04-07 Thread Richard Biener via Gcc-patches
On Wed, Apr 7, 2021 at 11:10 AM Richard Sandiford
 wrote:
>
> Richard Biener via Gcc-patches  writes:
> > On Tue, Apr 6, 2021 at 12:03 PM Richard Sandiford via Gcc-patches
> >  wrote:
> >>
> >> As noted in the PR, we were no longer using ST3 for the testcase and
> >> instead stored each lane individually.  This is because we'd split
> >> the store group during SLP and couldn't recover when SLP failed.
> >>
> >> However, we seem to get better code with ST3 and ST4 even if
> >> SLP would have succeeded, such as for vect-complex-5.c.
> >> I think the best thing for GCC 11 would therefore be to skip
> >> the split entirely if we could use IFN_STORE_LANES.  A downside
> >> of this is that SLP can handle smaller iteration counts than
> >> IFN_STORE_LANES can, but we don't have the infrastructure to
> >> choose reliably based on that.
> >>
> >> Tested on aarch64-linux-gnu (with and without SVE), arm-linux-gnueabihf,
> >> armeb-eabi and x86_64-linux-gnu.  OK to install?
> >
> > One of the cases where splitting helps is if you have say V2DFmode
> > and a group size of 4 but if you break up the group into sizes of 2
> > then you get two V2DFmode group size 2 SLP subgraphs.  So I wonder
> > if, since you look for a vector type, want to only disable splitting
> > in case the resulting vector type has the same number of lanes
> > as the group size?  (and if not, instead limit where we consider splitting)
>
> Hmm, yeah.  If we can apply SLP to full vectors within a loop iteration
> then I agree it's still better to split.  I think the test should favour
> ST3/ST4 more though: split only if one of the new group sizes is a
> multiple of the vector size.  If the vector type has more lanes than the
> group size then we should still use ST3/ST4.
>
> How does this version look?  Tested as before.

Looks good.

Thanks,
Richard.

> Thanks,
> Richard
>
>
> gcc/
> PR tree-optimization/99873
> * tree-vect-slp.c (vect_slp_prefer_store_lanes_p): New function.
> (vect_build_slp_instance): Don't split store groups that could
> use IFN_STORE_LANES.
>
> gcc/testsuite/
> * gcc.dg/vect/slp-21.c: Only expect 2 of the loops to use SLP
> if IFN_STORE_LANES is available.
> * vect/vect-complex-5.c: Expect no loops to use SLP if
> IFN_STORE_LANES is available.
> * gcc.target/aarch64/pr99873_1.c: New test.
> * gcc.target/aarch64/pr99873_2.c: Likewise.
> * gcc.target/aarch64/pr99873_3.c: Likewise.
> * gcc.target/aarch64/sve/pr99873_1.c: Likewise.
> * gcc.target/aarch64/sve/pr99873_2.c: Likewise.
> * gcc.target/aarch64/sve/pr99873_3.c: Likewise.
> ---
>  gcc/testsuite/gcc.dg/vect/slp-21.c|  4 +--
>  gcc/testsuite/gcc.dg/vect/vect-complex-5.c|  3 +-
>  gcc/testsuite/gcc.target/aarch64/pr99873_1.c  | 17 ++
>  gcc/testsuite/gcc.target/aarch64/pr99873_2.c  | 20 
>  gcc/testsuite/gcc.target/aarch64/pr99873_3.c  | 20 
>  .../gcc.target/aarch64/sve/pr99873_1.c| 15 +
>  .../gcc.target/aarch64/sve/pr99873_2.c| 18 +++
>  .../gcc.target/aarch64/sve/pr99873_3.c| 18 +++
>  gcc/tree-vect-slp.c   | 31 ++-
>  9 files changed, 142 insertions(+), 4 deletions(-)
>  create mode 100644 gcc/testsuite/gcc.target/aarch64/pr99873_1.c
>  create mode 100644 gcc/testsuite/gcc.target/aarch64/pr99873_2.c
>  create mode 100644 gcc/testsuite/gcc.target/aarch64/pr99873_3.c
>  create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/pr99873_1.c
>  create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/pr99873_2.c
>  create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/pr99873_3.c
>
> diff --git a/gcc/tree-vect-slp.c b/gcc/tree-vect-slp.c
> index ceec7f5c410..eadbd521966 100644
> --- a/gcc/tree-vect-slp.c
> +++ b/gcc/tree-vect-slp.c
> @@ -2458,6 +2458,34 @@ vect_match_slp_patterns (slp_instance instance, 
> vec_info *vinfo,
>return vect_match_slp_patterns_2 (ref_node, vinfo, perm_cache, visited);
>  }
>
> +/* STMT_INFO is a store group of size GROUP_SIZE that we are considering
> +   splitting into two, with the first split group having size NEW_GROUP_SIZE.
> +   Return true if we could use IFN_STORE_LANES instead and if that appears
> +   to be the better approach.  */
> +
> +static bool
> +vect_slp_prefer_store_lanes_p (vec_info *vinfo, stmt_vec_info stmt_info,
> +  unsigned int group_size,
> +  unsigned int new_group_size)
> +{
> +  tree scalar_type = TREE_TYPE (DR_REF (STMT_VINFO_DATA_REF (stmt_info)));
> +  tree vectype = get_vectype_for_scalar_type (vinfo, scalar_type);
> +  if (!vectype)
> +return false;
> +  /* Allow the split if one of the two new groups would operate on full
> + vectors *within* rather than across one scalar loop iteration.
> + This is purely a heuristic, but it should work well for group
> + sizes of 3 and 4, where the possible split

Aarch64 patch ping^2

2021-04-07 Thread Jakub Jelinek via Gcc-patches
On Mon, Mar 29, 2021 at 11:16:55AM +0200, Jakub Jelinek wrote:
> > Looks good to me.  Richard E knows this code better than I do though,
> > so I think he should have the final say.  He's currently on holiday
> > but will be back next week.
> 
> I'd like to ping this patch.

Ping.

> > > 2021-03-18  Jakub Jelinek  
> > >
> > >   PR target/91710
> > >   * config/aarch64/aarch64.c (aarch64_function_arg_alignment): Change
> > >   abi_break argument from bool * to unsigned *, store there the pre-GCC 9
> > >   alignment.
> > >   (aarch64_layout_arg, aarch64_gimplify_va_arg_expr): Adjust callers.
> > >   (aarch64_function_arg_regno_p): Likewise.  Only emit -Wpsabi note if
> > >   the old and new alignment after applying MIN/MAX to it is different.
> > >
> > >   * gcc.target/aarch64/pr91710.c: New test.

Thanks.

Jakub



Re: [PATCH] Improve rtx insn vec output

2021-04-07 Thread Richard Biener via Gcc-patches
On Wed, Apr 7, 2021 at 11:17 AM Xionghu Luo  wrote:
>
>
> On 2021/4/7 14:57, Richard Biener wrote:
> > On Wed, Apr 7, 2021 at 7:42 AM Xionghu Luo  wrote:
> >>
> >> print_rtl will dump the rtx_insn from current until LAST.  But it is only
> >> useful to see the particular insn that called by print_rtx_insn_vec,
> >> Let's call print_rtl_single to display that insn in the gcse and 
> >> store-motion
> >> pass dump.
> >
> > Can you cite a before/after dump snippet to clarify?
>
> Before the patch, pr24257.c.258r.store_motion dumps:
>
>Pattern (  1): (mem:SI (plus:DI (reg/v/f:DI 124 [ s ])
>  (const_int 4 [0x4])) [1 s_2(D)->left+0 S4 A32])
>   ANTIC stores : {(insn 18 17 19 3 (set (mem:SI (plus:DI
> (reg/v/f:DI 124 [ s ])
>  (const_int 4 [0x4])) [1 s_2(D)->left+0 S4 A32])
>  (reg:SI 131)) 516 {*movsi_internal1}
>   (expr_list:REG_DEAD (reg:SI 131)
>  (nil)))
>
> (code_label 19 18 20 4 2 (nil) [1 uses])
>
> (note 20 19 21 4 [bb 4] NOTE_INSN_BASIC_BLOCK)
>
> (insn 21 20 22 4 (set (reg:SI 132)
>  (const_int 0 [0])) "pr24257.c":16:9 516 {*movsi_internal1}
>   (nil))
>
> (insn 22 21 0 4 (set (mem:SI (reg/v/f:DI 124 [ s ]) [1 s_2(D)->buf+0 S4
> A32])
>  (reg:SI 132)) "pr24257.c":16:9 516 {*movsi_internal1}
>   (expr_list:REG_DEAD (reg:SI 132)
>  (expr_list:REG_DEAD (reg/v/f:DI 124 [ s ])
>  (nil
> }
>   AVAIL stores : {(insn 7 4 10 2 (set (mem:SI (plus:DI
> (reg/v/f:DI 124 [ s ])
>  (const_int 4 [0x4])) [1 s_2(D)->left+0 S4 A32])
>  (subreg/s/u:SI (reg/v:DI 125 [ n ]) 0)) "pr24257.c":23:11 516
> {*movsi_internal1}
>   (nil))
>
> (insn 10 7 11 2 (set (reg:CC 126)
>  (compare:CC (subreg/s/u:SI (reg/v:DI 125 [ n ]) 0)
>  (const_int 31 [0x1f]))) "pr24257.c":10:18 751 {*cmpsi_signed}
>   (nil))
>
> (jump_insn 11 10 12 2 (set (pc)
>  (if_then_else (gt (reg:CC 126)
>  (const_int 0 [0]))
>  (label_ref 19)
>  (pc))) "pr24257.c":10:18 834 {*cbranch}
>   (expr_list:REG_DEAD (reg:CC 126)
>  (int_list:REG_BR_PROB 118111604 (nil)))
>   -> 19)
>
> (note 12 11 13 3 [bb 3] NOTE_INSN_BASIC_BLOCK)
>
> (insn 13 12 14 3 (parallel [
>  (set (reg:SI 127)
>  (minus:SI (const_int 31 [0x1f])
>  (subreg/u:SI (reg/v:DI 125 [ n ]) 0)))
>  (clobber (reg:SI 98 ca))
>  ]) "pr24257.c":12:15 100 {subfsi3_imm}
>   (expr_list:REG_UNUSED (reg:SI 98 ca)
>  (nil)))
> (insn 14 13 15 3 (set (reg:SI 128)
>  (lshiftrt:SI (reg:SI 127)
>  (const_int 3 [0x3]))) "pr24257.c":12:15 273 {lshrsi3}
>   (expr_list:REG_DEAD (reg:SI 127)
>  (nil)))
>
> (insn 15 14 16 3 (set (reg:SI 129)
>  (ashift:SI (reg:SI 128)
>  (const_int 3 [0x3]))) "pr24257.c":12:15 263 {ashlsi3}
>   (expr_list:REG_DEAD (reg:SI 128)
>  (nil)))
>
> (insn 16 15 17 3 (set (reg:SI 130)
>  (plus:SI (subreg/s/u:SI (reg/v:DI 125 [ n ]) 0)
>  (const_int 8 [0x8]))) "pr24257.c":12:15 65 {*addsi3}
>   (expr_list:REG_DEAD (reg/v:DI 125 [ n ])
>  (nil)))
>
> (insn 17 16 18 3 (set (reg:SI 131)
>  (plus:SI (reg:SI 129)
>  (reg:SI 130))) "pr24257.c":12:15 65 {*addsi3}
>   (expr_list:REG_DEAD (reg:SI 130)
>  (expr_list:REG_DEAD (reg:SI 129)
>  (nil
>
> (insn 18 17 19 3 (set (mem:SI (plus:DI (reg/v/f:DI 124 [ s ])
>  (const_int 4 [0x4])) [1 s_2(D)->left+0 S4 A32])
>  (reg:SI 131)) 516 {*movsi_internal1}
>   (expr_list:REG_DEAD (reg:SI 131)
>  (nil)))
>
> (code_label 19 18 20 4 2 (nil) [1 uses])
>
> (note 20 19 21 4 [bb 4] NOTE_INSN_BASIC_BLOCK)
>
> (insn 21 20 22 4 (set (reg:SI 132)
>  (const_int 0 [0])) "pr24257.c":16:9 516 {*movsi_internal1}
>   (nil))
>
> (insn 22 21 0 4 (set (mem:SI (reg/v/f:DI 124 [ s ]) [1 s_2(D)->buf+0 S4
> A32])
>  (reg:SI 132)) "pr24257.c":16:9 516 {*movsi_internal1}
>   (expr_list:REG_DEAD (reg:SI 132)
>  (expr_list:REG_DEAD (reg/v/f:DI 124 [ s ])
>  (nil
> , (insn 18 17 19 3 (set (mem:SI (plus:DI (reg/v/f:DI 124 [ s ])
>  (const_int 4 [0x4])) [1 s_2(D)->left+0 S4 A32])
>  (reg:SI 131)) 516 {*movsi_internal1}
>   (expr_list:REG_DEAD (reg:SI 131)
>  (nil)))
>
> (code_label 19 18 20 4 2 (nil) [1 uses])
>
> (note 20 19 21 4 [bb 4] NOTE_INSN_BASIC_BLOCK)
>
> (insn 21 20 22 4 (set (reg:SI 132)
>  (const_int 0 [0])) "pr24257.c":16:9 516 {*movsi_internal1}
>   (nil))
>
> (insn 22 21 0 4 (set (mem:SI (reg/v/f:DI 124 [ s ]) [1 s_2(D)->buf+0 S4
> A32])
>  (reg:SI 132)) "pr24257.c":16:9 516 {*movsi_internal1}
>   (expr_list:REG_DEAD (reg:SI 132)
>  (expr_list:REG_DEAD (reg/v/f:DI 124 [ s ])
>  (nil
> }
>
>
>
> With this patch, it only dumps instructions we care about for
> ANTIC stores and AVAIL

Re: RFC: Sphinx for GCC documentation

2021-04-07 Thread Martin Liška
On 4/1/21 4:31 PM, David Malcolm wrote:
> I think some further tweaking of the conversion script is needed first;

Sure, I've got a collection of patches and I'm planning to make pull requests.

> I'd rather iron out the script rather than play whack-a-mole manually.
> I enjoy proofreading conversions, but I'm trying to focus right now on
> analyzer bugfixing for GCC 11.
> 
>> So far I focused on the 2 biggest manuals we have:
>>
>> (1) User documentation
>> https://splichal.eu/sphinx/
>> https://splichal.eu/sphinx/gcc.pdf
>>
>> (2) GCC Internals Manual
>> https://splichal.eu/sphinx-internal
>> https://splichal.eu/sphinx-internal/gccint.pdf
>>
>> I'm aware of missing pieces (thanks Joseph) and I'm willing to finish
>> it.
> One important requirement is manpage output.  Sphinx can do that - what
> does the output look like based on the texi2rst output?
> 

I was able to create some man pages and they are listed here:
https://splichal.eu/gccsphinx/

The main gcc.1 man page contains complete content of GCC manual, that needs to 
be resolved.
Another minor issue is SYNOPSIS, which cannot be generated automatically and we 
should
replace the man directives (@c, ...).

Martin


Re: RFC: Sphinx for GCC documentation

2021-04-07 Thread Martin Liška
On 4/2/21 5:40 PM, Martin Sebor wrote:
> On 4/1/21 7:30 AM, Martin Liška wrote:
>> Hey.
>>
>> I've returned to the David's project and I'm willing to finish his 
>> transition effort.
>> I believe using Sphinx documentation can rapidly improve readability, both 
>> HTML and PDF version,
>> of various GCC manuals ([1]). I've spent some time working on the David's 
>> texi2rsf conversion tool ([2])
>> and I'm presenting a result that is not perfect, but can be acceptable after 
>> a reasonable
>> amount of manual modifications.
>>
>> So far I focused on the 2 biggest manuals we have:
>>
>> (1) User documentation
>> https://splichal.eu/sphinx/
>> https://splichal.eu/sphinx/gcc.pdf
>>
>> (2) GCC Internals Manual
>> https://splichal.eu/sphinx-internal
>> https://splichal.eu/sphinx-internal/gccint.pdf
>>
>> I'm aware of missing pieces (thanks Joseph) and I'm willing to finish it.
>>
>> That said, I'm asking the GCC community for a green light before I invest
>> more time on it?

Hello.

> 
> I like the output quite a bit, especially that things like option
> references are automagically linked to their documentation.  I spent
> just a bit of time looking through the HTML and PDF above and found
> a few minor issues.  I suspect they're due to the conversion script
> not being finished yet but just to confirm please see below.

Exactly, the script is supposed to do 99% of the transition automatically,
the rest needs a manual intervention.

> 
> Is the source we'd work with the same as what shows up when I click
> the 'View page source' link?  (Or is there some preprocessing involved?)

Yes, it's the source as is.

> 
> I'm not excited about changing tools.  I like that Texinfo is a GNU
> project; AFACT, Sphinx is not.

I don't any particular benefit. As already mentioned, Sphinx is a open-source
project with rich history and broad collection of projects using it:
https://www.sphinx-doc.org/en/master/examples.html

> In the original thread David linked
> to there was a suggestion to use Texinfo and Sphinx side by side.
> I.e., keep .texi as the master source format and generate the .rst
> files using the conversion script on demand.  Would making that
> the first step of the transition be a viable option?  (With the idea
> of giving it a try and deciding whether to convert after some time?)

I've just set up a pipeline that can generate all the manuals from texi file:
https://splichal.eu/gccsphinx/

However, the generated .rst files will need a manual changes before we can
use it as a replacement. So once we make a decision, .texi files will be left
after the transition. Note it's very difficult to write a 1:1 conversion tool
from TEXI to RST.

> 
> Martin
> 
> [*] The linking doesn't always seem to work (e.g.,
> :option:`-fabi-version=4` under -Wabi).  I'm guessing it's not yet
> done in the conversion script (or there's no -fabi-version=4 in
> the index) and not a Sphinx limitation.

Should be fixed now, one needs to split the '=4' part out of :option: directibe.

> 
> It doesn't add links for attributes but presumably that could also
> be made to happen in the conversion, right?

I partially added that.

> 
> It doesn't seem to preserve newlines in long lists of options (like
> under -Wall), so the result is just one long running series of options
> that's hard (for me) to follow. Presumably there's a way to do that
> too (add a newline after each :option?)

That's not desired and it's up to builder (PDF, HTML, ...) how the options
should be formatted.

> 
> It mentions both the positive and the negative forms of options as
> headings (like -Wmain, -Wno-main).  That seems superflous and, with
> very long option names (I noticed this for example with
> -Wno-analyzer-use-of-pointer-in-stale-stack-frame) the heading in
> the PDF can run off the right edge of the page.  But based on
> the source it also looks like it should be easy to adjust in
> the conversion script if we wanted to keep just one form, right?

Yes. I updated the script to split the lines when we reach a long list of 
options.
Note that we likely want to preserve both position and negative options because
of references and in order to express what is default.

I've created a version 2 that contains various manuals (and man pages):
https://splichal.eu/gccsphinx/

Martin

> 
> 
>>
>> Cheers,
>> Martin
>>
>> [1] https://gcc.gnu.org/onlinedocs/
>> [2] https://github.com/davidmalcolm/texi2rst
>>
> 



Re: [patch for gcc12 stage1][version 2] add -ftrivial-auto-var-init and variable attribute "uninitialized" to gcc

2021-04-07 Thread Qing Zhao via Gcc-patches
Ping

> On Mar 24, 2021, at 4:21 PM, Qing Zhao via Gcc-patches 
>  wrote:
> 
> Hi, 
> 
> This is the 2nd version of the patch for the new security feature for GCC.
> 
> Could you please take a look at it and let me know any comments and issues.
> 
> Thanks.
> 
> Qing
> 
> **compared to Version 1, this version added the following new features to 
> address Kees’s comments:
> 
> 1.  correctly handle VLA inside a structure for pattern initialization.
> In tree.c (build_pattern_cst):
> 
> +   /* if the field is a variable length array, it should be the last
> +  field of the record, and no need to initialize.  */
> +   if (TREE_CODE (TREE_TYPE (field)) == ARRAY_TYPE
> +   && TYPE_SIZE (TREE_TYPE (field)) == NULL_TREE
> +   && ((TYPE_DOMAIN (TREE_TYPE (field)) != NULL_TREE
> +   && TYPE_MAX_VALUE (TYPE_DOMAIN (TREE_TYPE (field)))
> +  == NULL_TREE)
> +  || TYPE_DOMAIN (TREE_TYPE (field)) == NULL_TREE))
> + continue;
> 
> 2.  initialize all paddings to zero when -ftrivial-auto-var-init is present.
> In expr.c (store_constructor):
> 
> Clear the whole structure when
> -ftrivial-auto-var-init and the structure has paddings.
> 
> In gimplify.c (gimplify_init_constructor):
> 
> Clear the whole structure when
> -ftrivial-auto-var-init and the structure has paddings.
> 
> As agreed with Kees, treat the issue related to auto variables outside of the 
> cases and inside the switch as a low priority one. 
> 
> 3. Add  the following new testing cases for the above 1 and 2:
> 
> * c-c++-common/auto-init-13.c: New test.
> * c-c++-common/auto-init-14.c: New test. 
> 
> * gcc.target/aarch64/auto-init-9.c: New test.
> * gcc.target/aarch64/auto-init-10.c: New test.
> * gcc.target/aarch64/auto-init-11.c: New test.
> * gcc.target/aarch64/auto-init-12.c: New test.
> * gcc.target/aarch64/auto-init-13.c: New test.
> * gcc.target/aarch64/auto-init-14.c: New test.
> * gcc.target/aarch64/auto-init-15.c: New test.
> * gcc.target/aarch64/auto-init-16.c: New test.
> * gcc.target/aarch64/auto-init-17.c: New test.
> * gcc.target/aarch64/auto-init-18.c: New test.
> * gcc.target/aarch64/auto-init-19.c: New test.
> * gcc.target/aarch64/auto-init-20.c: New test.
> 
> * gcc.target/i386/auto-init-9.c: New test.
> * gcc.target/i386/auto-init-10.c: New test.
> * gcc.target/i386/auto-init-11.c: New test.
> * gcc.target/i386/auto-init-12.c: New test.
> * gcc.target/i386/auto-init-13.c: New test.
> * gcc.target/i386/auto-init-14.c: New test.
> * gcc.target/i386/auto-init-15.c: New test.
> * gcc.target/i386/auto-init-16.c: New test.
> * gcc.target/i386/auto-init-17.c: New test.
> * gcc.target/i386/auto-init-18.c: New test.
> * gcc.target/i386/auto-init-19.c: New test.
> * gcc.target/i386/auto-init-20.c: New test.
> 
> 4. Update the default approach as D,  then when specify 
> -ftrivial-auto-var-init,  the default approach is the “.DEFERRED_INIT” 
> Approach. No need to add -fauto-var-init-approach=D anymore.
> 
> If we need to compare approach A and D, we can add -fauto-var-init-approach=A 
> to get that implementation. 
> 
> 5.  Delete all -fauto-var-init-approach=D in the testing cases. 
> 
> ** others are the same as Version 1, please see the version 1 description 
> at:
> 
> https://gcc.gnu.org/pipermail/gcc-patches/2021-February/565581.html 
> 
> 
> 
> ***Changelog:
> 
> gcc/:
> 
> 2021-03-24  qing zhao  
> 
> * common.opt (ftrivial-auto-var-init=): New.
> (fauto-var-init-approach=): Likewise.
> * doc/extend.texi: Document the uninitialized attribute.
> * doc/invoke.texi: Document -ftrivial-auto-var-init.
> * expr.c (store_constructor): Clear the whole structure when
> -ftrivial-auto-var-init and the structure has paddings.
> * flag-types.h (enum auto_init_type): New enumerated type
> auto_init_type.
> (enum auto_init_approach): New enumerated type auto_init_approach.
> * gimple.h (enum gf_mask): Add GF_CALL_MEMSET_FOR_UNINIT case.
> (gimple_call_set_memset_for_uninit): New function.
> (gimple_call_memset_for_uninit_p): Likewise.
> * gimplify.c (gimplify_vla_decl): Add initialization to vla per users'
> requests. 
> (build_deferred_init): New function.
> (gimple_add_init_for_auto_var): Likewise.
> (gimplify_decl_expr): Add initialization to automatic variables per
> users' requests.
> (gimplify_init_constructor): Clear the whole structure when
> -ftrivial-auto-var-init and the structure has paddings.
> * internal-fn.c (expand_DEFERRED_INI

Re: RFC: Sphinx for GCC documentation

2021-04-07 Thread Michael Matz
Hello,

On Wed, 7 Apr 2021, Martin Liška wrote:

> > I like the output quite a bit, especially that things like option
> > references are automagically linked to their documentation.  I spent
> > just a bit of time looking through the HTML and PDF above and found
> > a few minor issues.  I suspect they're due to the conversion script
> > not being finished yet but just to confirm please see below.
> 
> Exactly, the script is supposed to do 99% of the transition automatically,
> the rest needs a manual intervention.
> 
> > 
> > Is the source we'd work with the same as what shows up when I click
> > the 'View page source' link?  (Or is there some preprocessing involved?)
> 
> Yes, it's the source as is.

I think doing that might be a mistake.  I do like the end result quite 
much (ignoring things missing, like @itemx), no doubt, but if the .rst 
files are the sources then we loose something: they contain markup to 
describe presentation ("make this bold/italic/a list").  The .texi files 
(want to) contain markup to describe meaning ("this is an 
chapter/section/option/attribute/code").  The former can always be 
generated from the latter, the other direction is not possible.  For 
maintaining and writing documentation it's usually better (because more 
future proof) to describe meaning.  (I'm aware that our .texi files don't 
completely adhere to that ideal and also contain quite some presentation 
markup)

Random snippet for what I mean: the .texi contains:

"The @code{access} attribute specifies that a function to whose 
by-reference arguments the attribute applies accesses the referenced 
object according to @var{access-mode}.  The @var{access-mode} argument is 
required and must be"

the .rst has:

"The ``access`` attribute specifies that a function to whose by-reference 
arguments the attribute applies accesses the referenced object according 
to :samp:`{access-mode}`.  The :samp:`{access-mode}` argument is required 
and must be"

So, @code{}/@var{} vs. `` `` / :samp:``.  Keeping in mind that 
documentation also needs to be written it's inconceivable why there should 
be such difference in expressing code vs var markup in .rst.  
Now, you could say, just use `` in the .rst file, it's rendered the same 
anyway; but then the distinction gets lost.

(Again, I'm aware that the .texi files aren't perfect here, and @code/@var 
also seems a bit forced :) )

(I'm not proposing we go the full extreme of semantic markup that docbook 
tries, but the other end of the spectrum, i.e. markdown/rst with only 
presentation markup, also doesn't seem very well fitting)

One other practical concern: it seems there's a one-to-one correspondence 
of .rst files and (web)page.  Do we really want to maintain hundreds (or 
how many?) .rst files, instead of 60 .texi files?


Ciao,
Michael.


[committed] libstdc++: Fix filesystem::path construction from COW string [PR 99805]

2021-04-07 Thread Jonathan Wakely via Gcc-patches
Calling the non-const data() member on a COW string makes it "leaked",
possibly resulting in reallocating the string to ensure a unique owner.

The path::_M_split_cmpts() member parses its _M_pathname string using
string_view objects and then calls _M_pathname.data() to find the offset
of each string_view from the start of the string. However because
_M_pathname is non-const that will cause a COW string to reallocate if
it happens to be shared with another string object. This results in the
offsets calculated for each component being wrong (i.e. undefined)
because the string views no longer refer to substrings of the
_M_pathname member. The fix is to use the parse.offset(c) member which
gets the offset safely.

The bug only happens for the path(string_type&&) constructor and only
for COW strings. When constructed from an lvalue string the string's
contents are copied rather than just incrementing the refcount, so
there's no reallocation when calling the non-const data() member. The
testsuite changes check the lvalue case anyway, because we should
probably change the deep copying to just be a refcount increment (by
adding a path(const string_type&) constructor or an overload for
__effective_range(const string_type&), for COW strings only).

libstdc++-v3/ChangeLog:

PR libstdc++/99805
* src/c++17/fs_path.cc (path::_M_split_cmpts): Do not call
non-const member on _M_pathname, to avoid copy-on-write.
* testsuite/27_io/filesystem/path/decompose/parent_path.cc:
Check construction from strings that might be shared.

Tested powerpc64le-linux (SSO an COW strings). Committed to trunk.

This needs to be backported to 9 and 10 too, but after the 10.3
release.


commit e06d3f5dd7d0c6b4a20fe813e6ee5addd097f560
Author: Jonathan Wakely 
Date:   Wed Apr 7 16:05:42 2021

libstdc++: Fix filesystem::path construction from COW string [PR 99805]

Calling the non-const data() member on a COW string makes it "leaked",
possibly resulting in reallocating the string to ensure a unique owner.

The path::_M_split_cmpts() member parses its _M_pathname string using
string_view objects and then calls _M_pathname.data() to find the offset
of each string_view from the start of the string. However because
_M_pathname is non-const that will cause a COW string to reallocate if
it happens to be shared with another string object. This results in the
offsets calculated for each component being wrong (i.e. undefined)
because the string views no longer refer to substrings of the
_M_pathname member. The fix is to use the parse.offset(c) member which
gets the offset safely.

The bug only happens for the path(string_type&&) constructor and only
for COW strings. When constructed from an lvalue string the string's
contents are copied rather than just incrementing the refcount, so
there's no reallocation when calling the non-const data() member. The
testsuite changes check the lvalue case anyway, because we should
probably change the deep copying to just be a refcount increment (by
adding a path(const string_type&) constructor or an overload for
__effective_range(const string_type&), for COW strings only).

libstdc++-v3/ChangeLog:

PR libstdc++/99805
* src/c++17/fs_path.cc (path::_M_split_cmpts): Do not call
non-const member on _M_pathname, to avoid copy-on-write.
* testsuite/27_io/filesystem/path/decompose/parent_path.cc:
Check construction from strings that might be shared.

diff --git a/libstdc++-v3/src/c++17/fs_path.cc 
b/libstdc++-v3/src/c++17/fs_path.cc
index 2d9e29d9e7a..506ff25f9a6 100644
--- a/libstdc++-v3/src/c++17/fs_path.cc
+++ b/libstdc++-v3/src/c++17/fs_path.cc
@@ -1907,10 +1907,9 @@ path::_M_split_cmpts()
  _M_cmpts.type(_Type::_Multi);
  _M_cmpts.reserve(_M_cmpts.size() + buf.size());
  auto output = _M_cmpts._M_impl->end();
- for (auto& c : buf)
+ for (const auto& c : buf)
{
- auto pos = c.str.data() - _M_pathname.data();
- ::new(output++) _Cmpt(c.str, c.type, pos);
+ ::new(output++) _Cmpt(c.str, c.type, parser.offset(c));
  ++_M_cmpts._M_impl->_M_size;
}
  next = buf.begin();
@@ -1930,9 +1929,8 @@ path::_M_split_cmpts()
   auto output = _M_cmpts._M_impl->end();
   for (int i = 0; i < n; ++i)
{
- auto c = buf[i];
- auto pos = c.str.data() - _M_pathname.data();
- ::new(output++) _Cmpt(c.str, c.type, pos);
+ const auto& c = buf[i];
+ ::new(output++) _Cmpt(c.str, c.type, parser.offset(c));
  ++_M_cmpts._M_impl->_M_size;
}
 }
diff --git 
a/libstdc++-v3/testsuite/27_io/filesystem/path/decompose/parent_path.cc 
b/libstdc++-v3/testsuite/27_io/filesystem/path/decompose/parent_path.cc
index 84e86ec7a19..b6ca525bc82 100644
--- a/libstdc++-v3/testsuite/27

[PATCH] c++: Don't substitute into constraints on lambdas [PR99874]

2021-04-07 Thread Patrick Palka via Gcc-patches
We currently substitute through a lambda's constraints whenever we
regenerate it via tsubst_lambda_expr.  This is the wrong approach
because it can lead to hard errors due to constraints being evaluated
out of order (as in the testcase concepts-lambda17.C below), and because
it doesn't mesh well with the recently added REQUIRES_EXPR_EXTRA_ARGS
mechanism for delaying substitution into requires-expressions, which is
the cause of this PR.

But in order to avoid substituting through a lambda's constraints during
regeneration, we we need to be able to get at all in-scope template
parameters and the corresponding template arguments during constraint
checking of a lambda's op().  And this information is not easily
available where we need it, it seems.

To that end, the approach that this patch takes is to add two new fields
to LAMBDA_EXPR (and remove one): LAMBDA_EXPR_REGENERATED_FROM
(replacing LAMBDA_EXPR_INSTANTIATED), and LAMBDA_EXPR_REGENERATING_TARGS.
The former allows us to obtain the complete set of template parameters
that are in-scope for a lambda's op(), and the latter gives us all outer
template arguments that were used to regenerate the lambda.

LAMBDA_EXPR_REGENERATING_TARGS is not strictly necessary -- in an
earlier version of the patch, I walked LAMBDA_EXPR_EXTRA_SCOPE to build
up this set of outer template arguments on demand, but it's cleaner to
do it this way.  (We'd need to walk LAMBDA_EXPR_EXTRA_SCOPE and not
DECL/TYPE_CONTEXT because the latter skips over variable template
scopes.)

This patch also renames the predicate instantiated_lambda_fn_p to
regenerated_lambda_fn_p, for sake of consistency with the rest of the
patch which uses "regenerated" instead of "instantiated".

Bootstrapped and regtested on x86_64-pc-linux-gnu, and also tested on
cmcstl2 and range-v3.  Does this look OK for trunk?

gcc/cp/ChangeLog:

PR c++/99874
* constraint.cc (get_normalized_constraints_from_decl): Handle
regenerated lambdas.
(satisfy_declaration_constraints): Likewise.  Check for
dependent args later.
* cp-tree.h (LAMBDA_EXPR_INSTANTIATED): Replace with ...
(LAMBDA_EXPR_REGENERATED_FROM): ... this.
(LAMBDA_EXPR_REGENERATING_TARGS): New.
(tree_lambda_expr::regenerated_from): New data member.
(tree_lambda_expr::regenerating_targs): New data member.
(add_to_template_args): Declare.
(regenerated_lambda_fn_p): Likewise.
(most_general_lambda): Likewise.
* lambda.c (build_lambda_expr): Set LAMBDA_EXPR_REGENERATED_FROM
and LAMBDA_EXPR_REGENERATING_TARGS.
* pt.c (add_to_template_args): No longer static.
(tsubst_function_decl): Unconditionally propagate constraints on
the substituted function decl.
(instantiated_lambda_fn_p): Rename to ...
(regenerated_lambda_fn_p): ... this.  Check
LAMBDA_EXPR_REGENERATED_FROM instead of
LAMBDA_EXPR_INSTANTIATED.
(most_general_lambda): Define.
(enclosing_instantiation_of): Adjust after renaming
instantiated_lambda_fn_p.
(tsubst_lambda_expr): Don't substitute or set constraints on
the regenerated lambda.

gcc/testsuite/ChangeLog:

PR c++/99874
* g++.dg/cpp2a/concepts-lambda16.C: New test.
* g++.dg/cpp2a/concepts-lambda17.C: New test.
---
 gcc/cp/constraint.cc  | 43 +++--
 gcc/cp/cp-tree.h  | 20 --
 gcc/cp/lambda.c   |  2 +
 gcc/cp/pt.c   | 42 ++---
 .../g++.dg/cpp2a/concepts-lambda16.C  | 61 +++
 .../g++.dg/cpp2a/concepts-lambda17.C  | 14 +
 6 files changed, 150 insertions(+), 32 deletions(-)
 create mode 100644 gcc/testsuite/g++.dg/cpp2a/concepts-lambda16.C
 create mode 100644 gcc/testsuite/g++.dg/cpp2a/concepts-lambda17.C

diff --git a/gcc/cp/constraint.cc b/gcc/cp/constraint.cc
index 5cf43bd6c18..bd526f669ab 100644
--- a/gcc/cp/constraint.cc
+++ b/gcc/cp/constraint.cc
@@ -886,6 +886,16 @@ get_normalized_constraints_from_decl (tree d, bool diag = 
false)
  it has the correct template information attached. */
   d = strip_inheriting_ctors (d);
 
+  if (regenerated_lambda_fn_p (d))
+{
+  /* If this lambda was regenerated, DECL_TEMPLATE_PARMS doesn't contain
+all in-scope template parameters, but the lambda from which it was
+ultimately regenerated does, so use that instead.  */
+  tree lambda = CLASSTYPE_LAMBDA_EXPR (DECL_CONTEXT (d));
+  lambda = most_general_lambda (lambda);
+  d = lambda_function (lambda);
+}
+
   if (TREE_CODE (d) == TEMPLATE_DECL)
 {
   tmpl = d;
@@ -3174,13 +3184,27 @@ satisfy_declaration_constraints (tree t, sat_info info)
   args = TI_ARGS (ti);
   if (inh_ctor_targs)
args = add_outermost_template_args (args, inh_ctor_targs);
+}
 
-  /* If any arguments depend on template parameters, w

[PATCH] tree-optimization/99956 - improve loop interchange

2021-04-07 Thread Richard Biener
When we apply store motion and DSE manually to the bwaves kernel
in gfortran.dg/pr81303.f loop interchange no longer happens because
the perfect nest considered covers outer loops we cannot analyze
strides for.  The following compensates for this by shrinking the
nest in this analysis which was already possible but on a too coarse
granularity.  It shares the shrinked nest with the rest of the DRs
so the complexity overhead should be negligible.

Bootstrapped & tested on x86_64-unknown-linux-gnu, SPEC FP CPU 2017
build and there's still only the single loop interchange in bwaves.

Queued for stage1.

2021-04-07  Richard Biener  

PR tree-optimization/99956
* gimple-loop-interchange.cc (compute_access_stride):
Try instantiating the access in a shallower loop nest
if instantiating failed.
(compute_access_strides): Pass adjustable loop_nest
to compute_access_stride.

* gfortran.dg/pr99956.f: New testcase.
---
 gcc/gimple-loop-interchange.cc  | 68 +
 gcc/testsuite/gfortran.dg/pr99956.f | 45 +++
 2 files changed, 85 insertions(+), 28 deletions(-)
 create mode 100644 gcc/testsuite/gfortran.dg/pr99956.f

diff --git a/gcc/gimple-loop-interchange.cc b/gcc/gimple-loop-interchange.cc
index f45b9364644..80f749b6071 100644
--- a/gcc/gimple-loop-interchange.cc
+++ b/gcc/gimple-loop-interchange.cc
@@ -1280,12 +1280,15 @@ tree_loop_interchange::move_code_to_inner_loop (class 
loop *outer,
   arr[i][j - 1][k] = 0;  */
 
 static void
-compute_access_stride (class loop *loop_nest, class loop *loop,
+compute_access_stride (class loop *&loop_nest, class loop *loop,
   data_reference_p dr)
 {
   vec *strides = new vec ();
-  basic_block bb = gimple_bb (DR_STMT (dr));
+  dr->aux = strides;
 
+  basic_block bb = gimple_bb (DR_STMT (dr));
+  if (!flow_bb_inside_loop_p (loop_nest, bb))
+return;
   while (!flow_bb_inside_loop_p (loop, bb))
 {
   strides->safe_push (build_int_cst (sizetype, 0));
@@ -1313,39 +1316,47 @@ compute_access_stride (class loop *loop_nest, class 
loop *loop,
}
   /* Otherwise punt.  */
   else
-   {
- dr->aux = strides;
- return;
-   }
+   return;
 }
   tree scev_base = build_fold_addr_expr (ref);
   tree scev = analyze_scalar_evolution (loop, scev_base);
-  scev = instantiate_scev (loop_preheader_edge (loop_nest), loop, scev);
-  if (! chrec_contains_undetermined (scev))
+  if (chrec_contains_undetermined (scev))
+return;
+
+  tree orig_scev = scev;
+  do
+{
+  scev = instantiate_scev (loop_preheader_edge (loop_nest),
+  loop, orig_scev);
+  if (! chrec_contains_undetermined (scev))
+   break;
+
+  /* If we couldn't instantiate for the desired nest, shrink it.  */
+  if (loop_nest == loop)
+   return;
+  loop_nest = loop_nest->inner;
+} while (1);
+
+  tree sl = scev;
+  class loop *expected = loop;
+  while (TREE_CODE (sl) == POLYNOMIAL_CHREC)
 {
-  tree sl = scev;
-  class loop *expected = loop;
-  while (TREE_CODE (sl) == POLYNOMIAL_CHREC)
+  class loop *sl_loop = get_chrec_loop (sl);
+  while (sl_loop != expected)
{
- class loop *sl_loop = get_chrec_loop (sl);
- while (sl_loop != expected)
-   {
- strides->safe_push (size_int (0));
- expected = loop_outer (expected);
-   }
- strides->safe_push (CHREC_RIGHT (sl));
- sl = CHREC_LEFT (sl);
+ strides->safe_push (size_int (0));
  expected = loop_outer (expected);
}
-  if (! tree_contains_chrecs (sl, NULL))
-   while (expected != loop_outer (loop_nest))
- {
-   strides->safe_push (size_int (0));
-   expected = loop_outer (expected);
- }
+  strides->safe_push (CHREC_RIGHT (sl));
+  sl = CHREC_LEFT (sl);
+  expected = loop_outer (expected);
 }
-
-  dr->aux = strides;
+  if (! tree_contains_chrecs (sl, NULL))
+while (expected != loop_outer (loop_nest))
+  {
+   strides->safe_push (size_int (0));
+   expected = loop_outer (expected);
+  }
 }
 
 /* Given loop nest LOOP_NEST with innermost LOOP, the function computes
@@ -1363,9 +1374,10 @@ compute_access_strides (class loop *loop_nest, class 
loop *loop,
   data_reference_p dr;
   vec *stride;
 
+  class loop *interesting_loop_nest = loop_nest;
   for (i = 0; datarefs.iterate (i, &dr); ++i)
 {
-  compute_access_stride (loop_nest, loop, dr);
+  compute_access_stride (interesting_loop_nest, loop, dr);
   stride = DR_ACCESS_STRIDE (dr);
   if (stride->length () < num_loops)
{
diff --git a/gcc/testsuite/gfortran.dg/pr99956.f 
b/gcc/testsuite/gfortran.dg/pr99956.f
new file mode 100644
index 000..b5c0be3912d
--- /dev/null
+++ b/gcc/testsuite/gfortran.dg/pr99956.f
@@ -0,0 +1,45 @@
+! { dg-do compile }
+! { dg-options "-O3 -ffast-math

[PATCH] tree-optimization/99912 - remove unnecessary CLOBBERs after DSE

2021-04-07 Thread Richard Biener
The PR shows that the default walking limit of DSE (256) can be
insufficient for C++ code with large abstraction simply due to the
fact of hundreds of unused aggregates that are just CLOBBERed.
The remove_unused_locals phase has special code to deal with those
but it is just invoked in very few places.  The following triggers
it after DSE if DSE performed any dead store removal - maybe we
should unconditionally do so or make DSE remove such CLOBBERs
itself somehow.  Or we might want to place explicit schedules of
remove_unused_locals in the pass pipeline.

This avoids the need for increasing --param dse-max-alias-queries-per-store
to make DSE remove all dead stores for the testcase in the PR.

Any comments?

2021-04-07  Richard Biener  

PR tree-optimization/99912
* tree-ssa-dse.c (n_stores_removed): New global.
(delete_dead_or_redundant_assignment): Increment
n_stores_removed.
(pass_dse::execute): Initialize n_stores_removed, schedule
TODO_remove_unused_locals if nonzero.  Release
post-dominators before calling CFG cleanup.
---
 gcc/tree-ssa-dse.c | 14 +++---
 1 file changed, 11 insertions(+), 3 deletions(-)

diff --git a/gcc/tree-ssa-dse.c b/gcc/tree-ssa-dse.c
index 4967a5a9927..83879688abf 100644
--- a/gcc/tree-ssa-dse.c
+++ b/gcc/tree-ssa-dse.c
@@ -85,6 +85,8 @@ static void delete_dead_or_redundant_call 
(gimple_stmt_iterator *, const char *)
remove their dead edges eventually.  */
 static bitmap need_eh_cleanup;
 
+static unsigned n_stores_removed;
+
 /* STMT is a statement that may write into memory.  Analyze it and
initialize WRITE to describe how STMT affects memory.
 
@@ -1031,6 +1033,8 @@ delete_dead_or_redundant_assignment (gimple_stmt_iterator 
*gsi, const char *type
   /* And release any SSA_NAMEs set in this statement back to the
  SSA_NAME manager.  */
   release_defs (stmt);
+
+  n_stores_removed++;
 }
 
 /* Attempt to eliminate dead stores in the statement referenced by BSI.
@@ -1222,6 +1226,7 @@ unsigned int
 pass_dse::execute (function *fun)
 {
   need_eh_cleanup = BITMAP_ALLOC (NULL);
+  n_stores_removed = 0;
 
   renumber_gimple_stmt_uids (cfun);
 
@@ -1236,6 +1241,9 @@ pass_dse::execute (function *fun)
  tree and a backwards walk of statements within each block.  */
   dse_dom_walker (CDI_POST_DOMINATORS).walk (fun->cfg->x_exit_block_ptr);
 
+  /* Wipe the post-dominator information.  */
+  free_dominance_info (CDI_POST_DOMINATORS);
+
   /* Removal of stores may make some EH edges dead.  Purge such edges from
  the CFG as needed.  */
   if (!bitmap_empty_p (need_eh_cleanup))
@@ -1246,9 +1254,9 @@ pass_dse::execute (function *fun)
 
   BITMAP_FREE (need_eh_cleanup);
 
-  /* For now, just wipe the post-dominator information.  */
-  free_dominance_info (CDI_POST_DOMINATORS);
-  return 0;
+  /* If we elided a store try to get rid of unused locals and in
+ particular stray CLOBBERs of otherwise unused variables.  */
+  return n_stores_removed != 0 ? TODO_remove_unused_locals : 0;
 }
 
 } // anon namespace
-- 
2.26.2


[PATCH] testsuite: Fix many UNRESOLVEDs for gcc.dg/vect

2021-04-07 Thread Richard Sandiford via Gcc-patches
It turns out that, on targets that use testglue, many gcc.dg/vect
scan-assembler tests became UNRESOLVED after the change to the dump
file naming scheme.

The problem is that, when creating an executable, we normally name
the dump file after both the executable and the source file name.
However, as an exception, we name it after only the source file
name if:

(a) there is only one source file name and
(b) the source file and the executable have the same basename

Both (a) and (b) are normally true when building executables from
gcc.dg/vect.  But (a) is not true when linking against testglue.
The harness was therefore looking for a dump file based only on the
source file name while the compiler was producing a dump file that
contained both names.

We get around this for dg-additional-sources using:

# This option restores naming of aux and dump output files
# after input files when multiple input files are named,
# instead of getting them combined with the output name.
lappend options "additional_flags=-dumpbase \"\""

This patch does the same thing for executables that are linked
against testglue.  This removes over 2400 UNRESOLVEDs from an
armeb-eabi test run, but in so doing introduces FAILs for some
tests that were previously skipped.

Tested on armeb-eabi.  OK to install?

Richard


gcc/testsuite/
* lib/gcc.exp (gcc_target_compile): Add -dumpbase ""
when building an executable with testglue.
---
 gcc/testsuite/lib/gcc.exp | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/gcc/testsuite/lib/gcc.exp b/gcc/testsuite/lib/gcc.exp
index dda4d0e6366..4d80606d386 100644
--- a/gcc/testsuite/lib/gcc.exp
+++ b/gcc/testsuite/lib/gcc.exp
@@ -134,6 +134,9 @@ proc gcc_target_compile { source dest type options } {
[info exists gluefile] } {
lappend options "libs=${gluefile}"
lappend options "ldflags=$wrap_flags"
+   if { $type == "executable" } {
+   set options [concat "{additional_flags=-dumpbase \"\"}" $options]
+   }
 }
 
 global TEST_EXTRA_LIBS


Re: RFC: Sphinx for GCC documentation

2021-04-07 Thread Joseph Myers
On Wed, 7 Apr 2021, Michael Matz wrote:

> Random snippet for what I mean: the .texi contains:
> 
> "The @code{access} attribute specifies that a function to whose 
> by-reference arguments the attribute applies accesses the referenced 
> object according to @var{access-mode}.  The @var{access-mode} argument is 
> required and must be"
> 
> the .rst has:
> 
> "The ``access`` attribute specifies that a function to whose by-reference 
> arguments the attribute applies accesses the referenced object according 
> to :samp:`{access-mode}`.  The :samp:`{access-mode}` argument is required 
> and must be"
> 
> So, @code{}/@var{} vs. `` `` / :samp:``.  Keeping in mind that 

@var in Texinfo is orthogonal to whether something is literal code.  It 
looks like Sphinx's equivalent is {} inside :samp:`` (so not supporting 
the use case of @var outside literal code)?

Although there *is* a difference in output as well as semantics between 
@code and @samp in Texinfo (@samp quotes the output in some cases where 
@code does not, I think), I'm not convinced the GCC manuals actually make 
much distinction between the two.

> (Again, I'm aware that the .texi files aren't perfect here, and @code/@var 
> also seems a bit forced :) )

@var is a metasyntactic variable (the text inside @var is converted to 
uppercase for info output), completely different in usage from @code.  
It's @code/@samp where there isn't much consistency (and other cases such 
as e.g. whether @option or @samp is used for a sequence of more than one 
option).

> (I'm not proposing we go the full extreme of semantic markup that docbook 
> tries, but the other end of the spectrum, i.e. markdown/rst with only 
> presentation markup, also doesn't seem very well fitting)

Presumably we could add our own roles (:gcc:our_own_role:`something`) and 
directives with custom Python code to implement them, whenever we want to 
keep semantics that aren't covered by the default Sphinx-flavoured reST?

However, I don't know how easy it is to keep such code (and manuals) 
compatible with a wide range of Sphinx versions.  Documentation requiring 
the latest version of Sphinx to build cleanly is a pain, I'd prefer to aim 
for e.g. working with any Sphinx release from the past five years.

-- 
Joseph S. Myers
jos...@codesourcery.com


Re: [PATCH] testsuite: Fix many UNRESOLVEDs for gcc.dg/vect

2021-04-07 Thread Richard Biener via Gcc-patches
On April 7, 2021 7:37:36 PM GMT+02:00, Richard Sandiford via Gcc-patches 
 wrote:
>It turns out that, on targets that use testglue, many gcc.dg/vect
>scan-assembler tests became UNRESOLVED after the change to the dump
>file naming scheme.
>
>The problem is that, when creating an executable, we normally name
>the dump file after both the executable and the source file name.
>However, as an exception, we name it after only the source file
>name if:
>
>(a) there is only one source file name and
>(b) the source file and the executable have the same basename
>
>Both (a) and (b) are normally true when building executables from
>gcc.dg/vect.  But (a) is not true when linking against testglue.
>The harness was therefore looking for a dump file based only on the
>source file name while the compiler was producing a dump file that
>contained both names.
>
>We get around this for dg-additional-sources using:
>
>   # This option restores naming of aux and dump output files
>   # after input files when multiple input files are named,
>   # instead of getting them combined with the output name.
>   lappend options "additional_flags=-dumpbase \"\""
>
>This patch does the same thing for executables that are linked
>against testglue.  This removes over 2400 UNRESOLVEDs from an
>armeb-eabi test run, but in so doing introduces FAILs for some
>tests that were previously skipped.
>
>Tested on armeb-eabi.  OK to install?

Ok. 

Richard. 

>Richard
>
>
>gcc/testsuite/
>   * lib/gcc.exp (gcc_target_compile): Add -dumpbase ""
>   when building an executable with testglue.
>---
> gcc/testsuite/lib/gcc.exp | 3 +++
> 1 file changed, 3 insertions(+)
>
>diff --git a/gcc/testsuite/lib/gcc.exp b/gcc/testsuite/lib/gcc.exp
>index dda4d0e6366..4d80606d386 100644
>--- a/gcc/testsuite/lib/gcc.exp
>+++ b/gcc/testsuite/lib/gcc.exp
>@@ -134,6 +134,9 @@ proc gcc_target_compile { source dest type options
>} {
>   [info exists gluefile] } {
>   lappend options "libs=${gluefile}"
>   lappend options "ldflags=$wrap_flags"
>+  if { $type == "executable" } {
>+  set options [concat "{additional_flags=-dumpbase \"\"}" $options]
>+  }
> }
> 
> global TEST_EXTRA_LIBS



[pushed] c++: using overloaded with local decl [PR92918]

2021-04-07 Thread Jason Merrill via Gcc-patches
The problem here was that the lookup for 'impl' when parsing the template
only found the using-declaration, not the member function declaration.

This happened because when trying to add the member function declaration,
push_class_level_binding_1 saw that the current binding was a USING_DECL and
the new value is an overload, and decided to just return success.

That 'return true' dates back to r69921.  In
https://gcc.gnu.org/pipermail/gcc-patches/2003-July/110632.html Nathan
mentions that we only push dependent USING_DECLs, which is no longer the
case; now that we retain more USING_DECLs, handling this case like the other
overloaded function cases seems like the obvious solution.

Tested x86_64-pc-linux-gnu, applying to trunk.

gcc/cp/ChangeLog:

PR c++/92918
* name-lookup.c (push_class_level_binding_1): Do overload a new
function with a previous using-declaration.

gcc/testsuite/ChangeLog:

PR c++/92918
* g++.dg/lookup/using66.C: New test.
---
 gcc/cp/name-lookup.c  |  2 +-
 gcc/testsuite/g++.dg/lookup/using66.C | 23 +++
 2 files changed, 24 insertions(+), 1 deletion(-)
 create mode 100644 gcc/testsuite/g++.dg/lookup/using66.C

diff --git a/gcc/cp/name-lookup.c b/gcc/cp/name-lookup.c
index 3bce3d597dc..4e84e2f9987 100644
--- a/gcc/cp/name-lookup.c
+++ b/gcc/cp/name-lookup.c
@@ -5520,7 +5520,7 @@ push_class_level_binding_1 (tree name, tree x)
old_decl = bval;
   else if (TREE_CODE (bval) == USING_DECL
   && OVL_P (target_decl))
-   return true;
+   old_decl = bval;
   else if (OVL_P (target_decl)
   && OVL_P (target_bval))
old_decl = bval;
diff --git a/gcc/testsuite/g++.dg/lookup/using66.C 
b/gcc/testsuite/g++.dg/lookup/using66.C
new file mode 100644
index 000..02383bbea3e
--- /dev/null
+++ b/gcc/testsuite/g++.dg/lookup/using66.C
@@ -0,0 +1,23 @@
+// PR c++/92918
+// { dg-do compile { target c++11 } }
+
+struct Base03
+{
+static void impl();
+};
+
+struct Problem : Base03
+{
+using Base03::impl;
+static int impl(char const *);
+
+template 
+auto f(const T &t) const
+-> decltype(impl(t))
+{
+return impl(t);
+}
+};
+
+Problem t;
+int i = t.f("42");

base-commit: d11bcbe166c03f722c0e0d41d6e87ac445758fba
-- 
2.27.0



[PATCH v10] Practical improvement to libgcc complex divide

2021-04-07 Thread Patrick McGehearty via Gcc-patches
Changes in this version from Version 9:

Replaced all uses of alloca with XALLOCAVEC in
c_cpp_builtins() in c-cppbuiltin.c
Did not replace alloca elsewhere in the same file.

Fixed type of name_len.
Fixed prototypes for abort & exit in new test programs.
Fixed spelling errors and omitted words in comments.
Changed XMTYPE to AMTYPE to avoid confusion with extended types.
Removed XCTYPE as unused. (A for additional)

Correctness and performance test programs used during development of
this project may be found in the attachment to:
https://www.mail-archive.com/gcc-patches@gcc.gnu.org/msg254210.html

Summary of Purpose

This patch to libgcc/libgcc2.c __divdc3 provides an
opportunity to gain important improvements to the quality of answers
for the default complex divide routine (half, float, double, extended,
long double precisions) when dealing with very large or very small exponents.

The current code correctly implements Smith's method (1962) [2]
further modified by c99's requirements for dealing with NaN (not a
number) results. When working with input values where the exponents
are greater than *_MAX_EXP/2 or less than -(*_MAX_EXP)/2, results are
substantially different from the answers provided by quad precision
more than 1% of the time. This error rate may be unacceptable for many
applications that cannot a priori restrict their computations to the
safe range. The proposed method reduces the frequency of
"substantially different" answers by more than 99% for double
precision at a modest cost of performance.

Differences between current gcc methods and the new method will be
described. Then accuracy and performance differences will be discussed.

Background

This project started with an investigation related to
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=59714.  Study of Beebe[1]
provided an overview of past and recent practice for computing complex
divide. The current glibc implementation is based on Robert Smith's
algorithm [2] from 1962.  A google search found the paper by Baudin
and Smith [3] (same Robert Smith) published in 2012. Elen Kalda's
proposed patch [4] is based on that paper.

I developed two sets of test data by randomly distributing values over
a restricted range and the full range of input values. The current
complex divide handled the restricted range well enough, but failed on
the full range more than 1% of the time. Baudin and Smith's primary
test for "ratio" equals zero reduced the cases with 16 or more error
bits by a factor of 5, but still left too many flawed answers. Adding
debug print out to cases with substantial errors allowed me to see the
intermediate calculations for test values that failed. I noted that
for many of the failures, "ratio" was a subnormal. Changing the
"ratio" test from check for zero to check for subnormal reduced the 16
bit error rate by another factor of 12. This single modified test
provides the greatest benefit for the least cost, but the percentage
of cases with greater than 16 bit errors (double precision data) is
still greater than 0.027% (2.7 in 10,000).

Continued examination of remaining errors and their intermediate
computations led to the various tests of input value tests and scaling
to avoid under/overflow. The current patch does not handle some of the
rare and most extreme combinations of input values, but the random
test data is only showing 1 case in 10 million that has an error of
greater than 12 bits. That case has 18 bits of error and is due to
subtraction cancellation. These results are significantly better
than the results reported by Baudin and Smith.

Support for half, float, double, extended, and long double precision
is included as all are handled with suitable preprocessor symbols in a
single source routine. Since half precision is computed with float
precision as per current libgcc practice, the enhanced algorithm
provides no benefit for half precision and would cost performance.
Further investigation showed changing the half precision algorithm
to use the simple formula (real=a*c+b*d imag=b*c-a*d) caused no
loss of precision and modest improvement in performance.

The existing constants for each precision:
float: FLT_MAX, FLT_MIN;
double: DBL_MAX, DBL_MIN;
extended and/or long double: LDBL_MAX, LDBL_MIN
are used for avoiding the more common overflow/underflow cases.  This
use is made generic by defining appropriate __LIBGCC2_* macros in
c-cppbuiltin.c.

Tests are added for when both parts of the denominator have exponents
small enough to allow shifting any subnormal values to normal values
all input values could be scaled up without risking overflow. That
gained a clear improvement in accuracy. Similarly, when either
numerator was subnormal and the other numerator and both denominator
values were not too large, scaling could be used to reduce risk of
computing with subnormals.  The test and scaling values used all fit
within the allowed exponent range for each precision required by the C
standard.

Float precision has more diffic

[PATCH] c++: constrained CTAD for nested class template [PR97679]

2021-04-07 Thread Patrick Palka via Gcc-patches
In the testcase below, we're crashing during constraint checking of the
implicitly generated deduction guides for the nested class template A::B
because we never substitute the outer template arguments (for A) into
the constraint, neither ahead of time nor as part of satisfaction.

Ideally we'd like to avoid substituting into a constraint ahead of
time, but the "flattening" vector 'tsubst_args' is constructed under the
assumption that all outer template arguments are already substituted in,
and eliminating this assumption to yield a flattening vector that
includes outer (generic) template arguments suitable for substituting
into the constraint would be tricky.  So this patch takes the
approximate approach of substituting the outer arguments into the
constraint ahead of time, so that we could subsequently substitute
'tsubst_args' into the constraint.

NB: I noticed that [over.match.class.deduct]/1, which describes how
guides are formed from constructors of a class template, doesn't mention
that a guide inherits the constraints of the corresponding constructor.
Though [over.match.class.deduct]/2 later suggests that guides do have
constraints: "The associated constraints [of f'] are the conjunction of
the associated constraints of [the guide] g and ..."  So I'm unsure if
we're being conforming by giving guides the constraints of the
corresponding constructor.

Bootstrapped and regtested on x86_64-pc-linux-gnu, does this look OK for
trunk and 10 branch (after 10.3 is released)?

gcc/cp/ChangeLog:

PR c++/97679
* pt.c (build_deduction_guide): Substitute outer template
arguments into the constraints.

gcc/testsuite/ChangeLog:

PR c++/97679
* g++.dg/cpp2a/concepts-ctad3.C: New test.
---
 gcc/cp/pt.c | 17 +++--
 gcc/testsuite/g++.dg/cpp2a/concepts-ctad3.C | 16 
 2 files changed, 31 insertions(+), 2 deletions(-)
 create mode 100644 gcc/testsuite/g++.dg/cpp2a/concepts-ctad3.C

diff --git a/gcc/cp/pt.c b/gcc/cp/pt.c
index 7917a280804..6f44199e8eb 100644
--- a/gcc/cp/pt.c
+++ b/gcc/cp/pt.c
@@ -28719,7 +28719,15 @@ build_deduction_guide (tree type, tree ctor, tree 
outer_args, tsubst_flags_t com
  if (fparms == error_mark_node)
ok = false;
  if (ci)
-   ci = tsubst_constraint_info (ci, tsubst_args, complain, ctor);
+   {
+ if (outer_args)
+   /* FIXME: We'd like to avoid substituting outer template
+  arguments into the constraint ahead of time, but the
+  construction of tsubst_args assumes that outer arguments
+  are already substituted in.  */
+   ci = tsubst_constraint_info (ci, outer_args, complain, ctor);
+ ci = tsubst_constraint_info (ci, tsubst_args, complain, ctor);
+   }
 
  /* Parms are to have DECL_CHAIN tsubsted, which would be skipped if
 cp_unevaluated_operand.  */
@@ -28735,7 +28743,12 @@ build_deduction_guide (tree type, tree ctor, tree 
outer_args, tsubst_flags_t com
  fparms = tsubst_arg_types (fparms, targs, NULL_TREE, complain, ctor);
  fargs = tsubst (fargs, targs, complain, ctor);
  if (ci)
-   ci = tsubst_constraint_info (ci, targs, complain, ctor);
+   {
+ if (outer_args)
+   /* FIXME: As above.  */
+   ci = tsubst_constraint_info (ci, outer_args, complain, ctor);
+ ci = tsubst_constraint_info (ci, targs, complain, ctor);
+   }
}
 
   --processing_template_decl;
diff --git a/gcc/testsuite/g++.dg/cpp2a/concepts-ctad3.C 
b/gcc/testsuite/g++.dg/cpp2a/concepts-ctad3.C
new file mode 100644
index 000..3546b7461a8
--- /dev/null
+++ b/gcc/testsuite/g++.dg/cpp2a/concepts-ctad3.C
@@ -0,0 +1,16 @@
+// PR c++/97679
+// { dg-do compile { target c++20 } }
+
+template  struct A {
+  template  struct B {
+B(T) requires V;
+template  B(T, U) requires V || (__is_same(T, char) && 
__is_same(U, int));
+  };
+};
+
+A::B x1(0);
+A::B x2(0); // { dg-error "deduction|no match" }
+
+A::B y1(0, '0');
+A::B y2(0, '0'); // { dg-error "deduction|no match" }
+A::B y3('0', 0);
-- 
2.31.1.189.g2e36527f23



[pushed] c++: base template friend [PR52625]

2021-04-07 Thread Jason Merrill via Gcc-patches
Here we were mistakenly treating the injected-class-name as a partial
specialization.

Tested x86_64-pc-linux-gnu, applying to trunk.

gcc/cp/ChangeLog:

PR c++/52625
* pt.c (maybe_process_partial_specialization): Check
DECL_SELF_REFERENCE_P.

gcc/testsuite/ChangeLog:

PR c++/52625
* g++.dg/template/friend70.C: New test.
---
 gcc/cp/pt.c  | 4 
 gcc/testsuite/g++.dg/template/friend70.C | 9 +
 2 files changed, 13 insertions(+)
 create mode 100644 gcc/testsuite/g++.dg/template/friend70.C

diff --git a/gcc/cp/pt.c b/gcc/cp/pt.c
index a08d08d2834..dee802121e5 100644
--- a/gcc/cp/pt.c
+++ b/gcc/cp/pt.c
@@ -984,6 +984,10 @@ maybe_process_partial_specialization (tree type)
   if (CLASS_TYPE_P (type) && CLASSTYPE_LAMBDA_EXPR (type))
 return type;
 
+  /* An injected-class-name is not a specialization.  */
+  if (DECL_SELF_REFERENCE_P (TYPE_NAME (type)))
+return type;
+
   if (TREE_CODE (type) == BOUND_TEMPLATE_TEMPLATE_PARM)
 {
   error ("name of class shadows template template parameter %qD",
diff --git a/gcc/testsuite/g++.dg/template/friend70.C 
b/gcc/testsuite/g++.dg/template/friend70.C
new file mode 100644
index 000..54965486f79
--- /dev/null
+++ b/gcc/testsuite/g++.dg/template/friend70.C
@@ -0,0 +1,9 @@
+// PR c++/52625
+
+template
+class base {};
+
+class derived : public base
+{
+  template friend class base;
+};

base-commit: a528594cf9a74e5a0fbac13ef673064ed73e1b89
-- 
2.27.0



[pushed] c++: friend with redundant qualification [PR41723]

2021-04-07 Thread Jason Merrill via Gcc-patches
Different code paths were correctly choosing to look up D directly, since C
is the current instantiation, but here we decided to try to make it a
typename type, leading to confusion.  Fixed by using dependent_scope_p as we
do elsewhere.

Tested x86_64-pc-linux-gnu, applying to trunk.

gcc/cp/ChangeLog:

PR c++/41723
* parser.c (cp_parser_class_name): Check dependent_scope_p.

gcc/testsuite/ChangeLog:

PR c++/41723
* g++.dg/template/friend71.C: New test.
---
 gcc/cp/parser.c  | 2 +-
 gcc/testsuite/g++.dg/template/friend71.C | 8 
 2 files changed, 9 insertions(+), 1 deletion(-)
 create mode 100644 gcc/testsuite/g++.dg/template/friend71.C

diff --git a/gcc/cp/parser.c b/gcc/cp/parser.c
index c915d6415de..59adac4492a 100644
--- a/gcc/cp/parser.c
+++ b/gcc/cp/parser.c
@@ -24639,7 +24639,7 @@ cp_parser_class_name (cp_parser *parser,
   const bool typename_p = (typename_keyword_p
   && parser->scope
   && TYPE_P (parser->scope)
-  && dependent_type_p (parser->scope));
+  && dependent_scope_p (parser->scope));
   /* Handle the common case (an identifier, but not a template-id)
  efficiently.  */
   if (token->type == CPP_NAME
diff --git a/gcc/testsuite/g++.dg/template/friend71.C 
b/gcc/testsuite/g++.dg/template/friend71.C
new file mode 100644
index 000..939ea6b5264
--- /dev/null
+++ b/gcc/testsuite/g++.dg/template/friend71.C
@@ -0,0 +1,8 @@
+// PR c++/41723
+
+template
+class C {
+  template  class D {};
+
+  friend class C::D;
+};

base-commit: a528594cf9a74e5a0fbac13ef673064ed73e1b89
prerequisite-patch-id: 6c2b63b290a1c22c08dd307fcd872aeb647f67a5
-- 
2.27.0



[PATCH] c++: Fix ICE with unexpanded parameter pack [PR99844]

2021-04-07 Thread Marek Polacek via Gcc-patches
In explicit17.C, we weren't detecting an unexpanded parameter pack in
explicit(bool), so we crashed on a TEMPLATE_PARM_INDEX in constexpr.

I noticed the same is true for noexcept(), but only since my patch to
implement delayed parsing of noexcept.  Previously, we would detect the
unexpanded pack in push_template_decl but now the noexcept expression
has not yet been parsed, so we need to do it a bit later.

Bootstrapped/regtested on x86_64-pc-linux-gnu, ok for trunk?

gcc/cp/ChangeLog:

PR c++/99844
* decl.c (build_explicit_specifier): Call
check_for_bare_parameter_packs.
* except.c (build_noexcept_spec): Likewise.

gcc/testsuite/ChangeLog:

PR c++/99844
* g++.dg/cpp2a/explicit16.C: Use c++20.
* g++.dg/cpp0x/noexcept66.C: New test.
* g++.dg/cpp2a/explicit17.C: New test.
---
 gcc/cp/decl.c   |  3 +++
 gcc/cp/except.c |  2 ++
 gcc/testsuite/g++.dg/cpp0x/noexcept66.C | 13 +
 gcc/testsuite/g++.dg/cpp2a/explicit16.C |  2 +-
 gcc/testsuite/g++.dg/cpp2a/explicit17.C |  9 +
 5 files changed, 28 insertions(+), 1 deletion(-)
 create mode 100644 gcc/testsuite/g++.dg/cpp0x/noexcept66.C
 create mode 100644 gcc/testsuite/g++.dg/cpp2a/explicit17.C

diff --git a/gcc/cp/decl.c b/gcc/cp/decl.c
index edab147c78d..3294b4fa943 100644
--- a/gcc/cp/decl.c
+++ b/gcc/cp/decl.c
@@ -17991,6 +17991,9 @@ require_deduced_type (tree decl, tsubst_flags_t 
complain)
 tree
 build_explicit_specifier (tree expr, tsubst_flags_t complain)
 {
+  if (check_for_bare_parameter_packs (expr))
+return error_mark_node;
+
   if (instantiation_dependent_expression_p (expr))
 /* Wait for instantiation, tsubst_function_decl will handle it.  */
 return expr;
diff --git a/gcc/cp/except.c b/gcc/cp/except.c
index 0bbc229490e..cbafc09629b 100644
--- a/gcc/cp/except.c
+++ b/gcc/cp/except.c
@@ -1228,6 +1228,8 @@ type_throw_all_p (const_tree type)
 tree
 build_noexcept_spec (tree expr, tsubst_flags_t complain)
 {
+  if (check_for_bare_parameter_packs (expr))
+return error_mark_node;
   if (TREE_CODE (expr) != DEFERRED_NOEXCEPT
   && !value_dependent_expression_p (expr))
 {
diff --git a/gcc/testsuite/g++.dg/cpp0x/noexcept66.C 
b/gcc/testsuite/g++.dg/cpp0x/noexcept66.C
new file mode 100644
index 000..6c76d9146ad
--- /dev/null
+++ b/gcc/testsuite/g++.dg/cpp0x/noexcept66.C
@@ -0,0 +1,13 @@
+// PR c++/99844
+// { dg-do compile { target c++11 } }
+
+template 
+struct S {
+ void fn() noexcept(B); // { dg-error "parameter packs not expanded" }
+};
+
+void fn ()
+{
+  S s;
+  s.fn();
+}
diff --git a/gcc/testsuite/g++.dg/cpp2a/explicit16.C 
b/gcc/testsuite/g++.dg/cpp2a/explicit16.C
index 9d95b0d669e..9c20f6332e9 100644
--- a/gcc/testsuite/g++.dg/cpp2a/explicit16.C
+++ b/gcc/testsuite/g++.dg/cpp2a/explicit16.C
@@ -1,5 +1,5 @@
 // PR c++/95066 - explicit malfunction with dependent expression.
-// { dg-do compile { target c++2a } }
+// { dg-do compile { target c++20 } }
 
 template 
 struct Foo {
diff --git a/gcc/testsuite/g++.dg/cpp2a/explicit17.C 
b/gcc/testsuite/g++.dg/cpp2a/explicit17.C
new file mode 100644
index 000..38a61f4a273
--- /dev/null
+++ b/gcc/testsuite/g++.dg/cpp2a/explicit17.C
@@ -0,0 +1,9 @@
+// PR c++/99844
+// { dg-do compile { target c++20 } }
+
+template 
+struct S {
+  constexpr explicit(B) S() {} // { dg-error "parameter packs not expanded" }
+};
+
+constexpr S s;

base-commit: 2f3d9104610cb2058cf091707a20c1c6eff8d470
-- 
2.30.2



Re: [patch for gcc12 stage1][version 2] add -ftrivial-auto-var-init and variable attribute "uninitialized" to gcc

2021-04-07 Thread Kees Cook via Gcc-patches
On Wed, Mar 24, 2021 at 04:21:49PM -0500, Qing Zhao wrote:
> This is the 2nd version of the patch for the new security feature for GCC.
> 
> Could you please take a look at it and let me know any comments and issues.

This behaves perfectly as far as I'm able to test in the Linux kernel!
Thank you!

For comparison to v1, here's the stack init test output for version 2 of the 
patch:

test_stackinit: u8_zero ok
test_stackinit: u16_zero ok
test_stackinit: u32_zero ok
test_stackinit: u64_zero ok
test_stackinit: char_array_zero ok
test_stackinit: small_hole_zero ok
test_stackinit: big_hole_zero ok
test_stackinit: trailing_hole_zero ok
test_stackinit: packed_zero ok
test_stackinit: small_hole_dynamic_partial ok
test_stackinit: big_hole_dynamic_partial ok
test_stackinit: trailing_hole_dynamic_partial ok
test_stackinit: packed_dynamic_partial ok
test_stackinit: small_hole_static_partial ok
test_stackinit: big_hole_static_partial ok
test_stackinit: trailing_hole_static_partial ok
test_stackinit: packed_static_partial ok
test_stackinit: small_hole_static_all ok
test_stackinit: big_hole_static_all ok
test_stackinit: trailing_hole_static_all ok
test_stackinit: packed_static_all ok
test_stackinit: small_hole_dynamic_all ok
test_stackinit: big_hole_dynamic_all ok
test_stackinit: trailing_hole_dynamic_all ok
test_stackinit: packed_dynamic_all ok
test_stackinit: small_hole_runtime_partial ok
test_stackinit: big_hole_runtime_partial ok
test_stackinit: trailing_hole_runtime_partial ok
test_stackinit: packed_runtime_partial ok
test_stackinit: small_hole_runtime_all ok
test_stackinit: big_hole_runtime_all ok
test_stackinit: trailing_hole_runtime_all ok
test_stackinit: packed_runtime_all ok
test_stackinit: u8_none ok
test_stackinit: u16_none ok
test_stackinit: u32_none ok
test_stackinit: u64_none ok
test_stackinit: char_array_none ok
test_stackinit: switch_1_none XFAIL (uninit bytes: 8)
test_stackinit: switch_2_none XFAIL (uninit bytes: 8)
test_stackinit: small_hole_none ok
test_stackinit: big_hole_none ok
test_stackinit: trailing_hole_none ok
test_stackinit: packed_none ok
test_stackinit: user ok
test_stackinit: all tests passed!

The switch cases are an "expected fail" still, and that's totally fine
for now[1]. Those were all purged from real kernel code anyway. ;)

So that's a big "Ack" from me. :)

What are next steps for this patch? I know a lot of people are looking
forward to it. (And then long-open bug[2] for auto-init can be closed.)

Thanks again!

-Kees

[1] Clang doesn't handle this either https://bugs.llvm.org/show_bug.cgi?id=44916
[2] https://gcc.gnu.org/bugzilla/show_bug.cgi?id=87210

> 
> Thanks.
> 
> Qing
> 
> **compared to Version 1, this version added the following new features to 
> address Kees’s comments:
> 
> 1.  correctly handle VLA inside a structure for pattern initialization.
> In tree.c (build_pattern_cst):
> 
> +   /* if the field is a variable length array, it should be the last
> +  field of the record, and no need to initialize.  */
> +   if (TREE_CODE (TREE_TYPE (field)) == ARRAY_TYPE
> +   && TYPE_SIZE (TREE_TYPE (field)) == NULL_TREE
> +   && ((TYPE_DOMAIN (TREE_TYPE (field)) != NULL_TREE
> +   && TYPE_MAX_VALUE (TYPE_DOMAIN (TREE_TYPE (field)))
> +  == NULL_TREE)
> +  || TYPE_DOMAIN (TREE_TYPE (field)) == NULL_TREE))
> + continue;
> 
> 2.  initialize all paddings to zero when -ftrivial-auto-var-init is present.
> In expr.c (store_constructor):
> 
>   Clear the whole structure when
> -ftrivial-auto-var-init and the structure has paddings.
> 
> In gimplify.c (gimplify_init_constructor):
> 
>   Clear the whole structure when
> -ftrivial-auto-var-init and the structure has paddings.
> 
> As agreed with Kees, treat the issue related to auto variables outside of the 
> cases and inside the switch as a low priority one. 
> 
> 3. Add  the following new testing cases for the above 1 and 2:
> 
> * c-c++-common/auto-init-13.c: New test.
> * c-c++-common/auto-init-14.c: New test. 
> 
>   * gcc.target/aarch64/auto-init-9.c: New test.
>   * gcc.target/aarch64/auto-init-10.c: New test.
> * gcc.target/aarch64/auto-init-11.c: New test.
> * gcc.target/aarch64/auto-init-12.c: New test.
> * gcc.target/aarch64/auto-init-13.c: New test.
> * gcc.target/aarch64/auto-init-14.c: New test.
> * gcc.target/aarch64/auto-init-15.c: New test.
> * gcc.target/aarch64/auto-init-16.c: New test.
> * gcc.target/aarch64/auto-init-17.c: New test.
> * gcc.target/aarch64/auto-init-18.c: New test.
> * gcc.target/aarch64/auto-init-19.c: New test.
> * gcc.target/aarch64/auto-init-20.c: New test.
> 
> * gcc.target/i386/auto-init-9.c: New test.
> * gcc.target/i386/auto-init-10.c: New test.
> * gcc.target/i386/auto-init-11.c: New test.
> * gcc.tar

fix a couple of typos (PR 99883)

2021-04-07 Thread Martin Sebor via Gcc-patches

The attached patch fixes a couple of typos.  Not sure they qualify
as regressions but it seems like a trivial fix worth making even
now.  I'll go ahead and commit it as obvious if no-one objects.

Martin
PR middle-end/99883 - A couple of minor misspellings

gcc/c-family/ChangeLog:

	* c.opt (Wmismatched-new-delete): Correct spelling.

gcc/lto/ChangeLog:

	* lto-lang.c (lto_post_options): Correct spelling.

diff --git a/gcc/c-family/c.opt b/gcc/c-family/c.opt
index 64e46e7573e..ed9a82599ef 100644
--- a/gcc/c-family/c.opt
+++ b/gcc/c-family/c.opt
@@ -800,7 +800,7 @@ functions.
 
 Wmismatched-new-delete
 C++ ObjC++ Var(warn_mismatched_new_delete) Warning LangEnabledBy(C ObjC C++ ObjC++,Wall)
-Warn for mismatches between calls to operator new or delete and the corrsponding
+Warn for mismatches between calls to operator new or delete and the corresponding
 call to the allocation or deallocation function.
 
 Wmismatched-tags
diff --git a/gcc/lto/lto-lang.c b/gcc/lto/lto-lang.c
index 8e73a11a7f6..c13c7e45ac1 100644
--- a/gcc/lto/lto-lang.c
+++ b/gcc/lto/lto-lang.c
@@ -896,7 +896,7 @@ lto_post_options (const char **pfilename ATTRIBUTE_UNUSED)
   lang_hooks.lto.end_section = lhd_end_section;
   if (flag_ltrans)
 	error ("%<-flinker-output=rel%> and %<-fltrans%> are mutually "
-	   "exclussive");
+	   "exclusive");
   break;
 
 case LTO_LINKER_OUTPUT_NOLTOREL: /* .o: incremental link producing asm  */


Re: [PATCH] c++: Fix ICE with unexpanded parameter pack [PR99844]

2021-04-07 Thread Jason Merrill via Gcc-patches

On 4/7/21 5:58 PM, Marek Polacek wrote:

In explicit17.C, we weren't detecting an unexpanded parameter pack in
explicit(bool), so we crashed on a TEMPLATE_PARM_INDEX in constexpr.

I noticed the same is true for noexcept(), but only since my patch to
implement delayed parsing of noexcept.  Previously, we would detect the
unexpanded pack in push_template_decl but now the noexcept expression
has not yet been parsed, so we need to do it a bit later.

Bootstrapped/regtested on x86_64-pc-linux-gnu, ok for trunk?


OK.


gcc/cp/ChangeLog:

PR c++/99844
* decl.c (build_explicit_specifier): Call
check_for_bare_parameter_packs.
* except.c (build_noexcept_spec): Likewise.

gcc/testsuite/ChangeLog:

PR c++/99844
* g++.dg/cpp2a/explicit16.C: Use c++20.
* g++.dg/cpp0x/noexcept66.C: New test.
* g++.dg/cpp2a/explicit17.C: New test.
---
  gcc/cp/decl.c   |  3 +++
  gcc/cp/except.c |  2 ++
  gcc/testsuite/g++.dg/cpp0x/noexcept66.C | 13 +
  gcc/testsuite/g++.dg/cpp2a/explicit16.C |  2 +-
  gcc/testsuite/g++.dg/cpp2a/explicit17.C |  9 +
  5 files changed, 28 insertions(+), 1 deletion(-)
  create mode 100644 gcc/testsuite/g++.dg/cpp0x/noexcept66.C
  create mode 100644 gcc/testsuite/g++.dg/cpp2a/explicit17.C

diff --git a/gcc/cp/decl.c b/gcc/cp/decl.c
index edab147c78d..3294b4fa943 100644
--- a/gcc/cp/decl.c
+++ b/gcc/cp/decl.c
@@ -17991,6 +17991,9 @@ require_deduced_type (tree decl, tsubst_flags_t 
complain)
  tree
  build_explicit_specifier (tree expr, tsubst_flags_t complain)
  {
+  if (check_for_bare_parameter_packs (expr))
+return error_mark_node;
+
if (instantiation_dependent_expression_p (expr))
  /* Wait for instantiation, tsubst_function_decl will handle it.  */
  return expr;
diff --git a/gcc/cp/except.c b/gcc/cp/except.c
index 0bbc229490e..cbafc09629b 100644
--- a/gcc/cp/except.c
+++ b/gcc/cp/except.c
@@ -1228,6 +1228,8 @@ type_throw_all_p (const_tree type)
  tree
  build_noexcept_spec (tree expr, tsubst_flags_t complain)
  {
+  if (check_for_bare_parameter_packs (expr))
+return error_mark_node;
if (TREE_CODE (expr) != DEFERRED_NOEXCEPT
&& !value_dependent_expression_p (expr))
  {
diff --git a/gcc/testsuite/g++.dg/cpp0x/noexcept66.C 
b/gcc/testsuite/g++.dg/cpp0x/noexcept66.C
new file mode 100644
index 000..6c76d9146ad
--- /dev/null
+++ b/gcc/testsuite/g++.dg/cpp0x/noexcept66.C
@@ -0,0 +1,13 @@
+// PR c++/99844
+// { dg-do compile { target c++11 } }
+
+template 
+struct S {
+ void fn() noexcept(B); // { dg-error "parameter packs not expanded" }
+};
+
+void fn ()
+{
+  S s;
+  s.fn();
+}
diff --git a/gcc/testsuite/g++.dg/cpp2a/explicit16.C 
b/gcc/testsuite/g++.dg/cpp2a/explicit16.C
index 9d95b0d669e..9c20f6332e9 100644
--- a/gcc/testsuite/g++.dg/cpp2a/explicit16.C
+++ b/gcc/testsuite/g++.dg/cpp2a/explicit16.C
@@ -1,5 +1,5 @@
  // PR c++/95066 - explicit malfunction with dependent expression.
-// { dg-do compile { target c++2a } }
+// { dg-do compile { target c++20 } }
  
  template 

  struct Foo {
diff --git a/gcc/testsuite/g++.dg/cpp2a/explicit17.C 
b/gcc/testsuite/g++.dg/cpp2a/explicit17.C
new file mode 100644
index 000..38a61f4a273
--- /dev/null
+++ b/gcc/testsuite/g++.dg/cpp2a/explicit17.C
@@ -0,0 +1,9 @@
+// PR c++/99844
+// { dg-do compile { target c++20 } }
+
+template 
+struct S {
+  constexpr explicit(B) S() {} // { dg-error "parameter packs not expanded" }
+};
+
+constexpr S s;

base-commit: 2f3d9104610cb2058cf091707a20c1c6eff8d470