Re: [C++ PATCH] Allow [[likely]] and [[unlikely]] in constexpr functions (PR c++/92343)

2019-11-05 Thread Jason Merrill
OK.  I wonder why we're returning false for EMPTY_CLASS_EXPR?

On Tue, Nov 5, 2019 at 7:35 AM Jakub Jelinek  wrote:
>
> Hi!
>
> When Martin Liska added PREDICT_EXPR to potential_constant_expression_1,
> it was with goto in mind and in that case goto isn't a potential
> constant expression, but when the {,un}likely attributes are used on other
> statements, they are valid.
>
> Fixed thusly, bootstrapped/regtested on x86_64-linux and i686-linux, ok for
> trunk?
>
> 2019-11-05  Jakub Jelinek  
>
> PR c++/92343
> * constexpr.c (potential_constant_expression_1): Return true rather
> than false for PREDICT_EXPR.
>
> * g++.dg/cpp2a/attr-likely6.C: New test.
>
> --- gcc/cp/constexpr.c.jj   2019-11-02 00:26:48.965846855 +0100
> +++ gcc/cp/constexpr.c  2019-11-04 09:53:35.070621487 +0100
> @@ -6493,6 +6493,7 @@ potential_constant_expression_1 (tree t,
>  case LABEL_DECL:
>  case LABEL_EXPR:
>  case CASE_LABEL_EXPR:
> +case PREDICT_EXPR:
>  case CONST_DECL:
>  case SIZEOF_EXPR:
>  case ALIGNOF_EXPR:
> @@ -7354,7 +7355,6 @@ potential_constant_expression_1 (tree t,
>return true;
>
>  case EMPTY_CLASS_EXPR:
> -case PREDICT_EXPR:
>return false;
>
>  case GOTO_EXPR:
> --- gcc/testsuite/g++.dg/cpp2a/attr-likely6.C.jj2019-11-04 
> 09:54:50.126485303 +0100
> +++ gcc/testsuite/g++.dg/cpp2a/attr-likely6.C   2019-11-04 09:55:21.001017926 
> +0100
> @@ -0,0 +1,14 @@
> +// PR c++/92343
> +// { dg-do compile { target c++14 } }
> +
> +constexpr bool
> +foo (bool x)
> +{
> +  if (x)
> +[[unlikely]] return true;
> +  else
> +[[likely]] return false;
> +}
> +
> +static_assert (foo (true), "");
> +static_assert (!foo (false), "");
>
> Jakub


Re: [PATCH] Fix compute_objsize ICE on VLA ARRAY_REF (PR tree-optimization/91945)

2019-11-05 Thread Richard Biener
On Tue, 5 Nov 2019, Jakub Jelinek wrote:

> Hi!
> 
> As the testcase shows, ARRAY_REF on an array with variable length element
> doesn't have INTEGER_CST TYPE_SIZE_UNIT which the code was assuming.
> The following patch punts in that case.
> 
> Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?

OK.

Richard.

> 2019-11-05  Jakub Jelinek  
> 
>   PR tree-optimization/91945
>   * builtins.c (compute_objsize): For ARRAY_REF, only multiply off
>   by tpsize if it is both non-NULL and INTEGER_CST, otherwise punt.
>   Formatting fix.
> 
>   * gfortran.dg/pr91945.f90: New test.
> 
> --- gcc/builtins.c.jj 2019-10-10 01:33:20.0 +0200
> +++ gcc/builtins.c2019-11-04 10:09:21.200301352 +0100
> @@ -3626,7 +3626,7 @@ compute_objsize (tree dest, int ostype,
>   }
>   }
> else if (TREE_CODE (off) == SSA_NAME
> -   && INTEGRAL_TYPE_P (TREE_TYPE (off)))
> +&& INTEGRAL_TYPE_P (TREE_TYPE (off)))
>   {
> wide_int min, max;
> enum value_range_kind rng = get_range_info (off, &min, &max);
> @@ -3680,7 +3680,8 @@ compute_objsize (tree dest, int ostype,
> if (TREE_CODE (dest) == ARRAY_REF)
>   {
> tree eltype = TREE_TYPE (dest);
> -   if (tree tpsize = TYPE_SIZE_UNIT (eltype))
> +   tree tpsize = TYPE_SIZE_UNIT (eltype);
> +   if (tpsize && TREE_CODE (tpsize) == INTEGER_CST)
>   off = fold_build2 (MULT_EXPR, size_type_node, off, tpsize);
> else
>   return NULL_TREE;
> --- gcc/testsuite/gfortran.dg/pr91945.f90.jj  2019-11-04 10:13:40.392378534 
> +0100
> +++ gcc/testsuite/gfortran.dg/pr91945.f90 2019-11-04 10:13:21.272667903 
> +0100
> @@ -0,0 +1,5 @@
> +! PR tree-optimization/91945
> +! { dg-do compile }
> +! { dg-options "-O3 -fstack-arrays -fno-guess-branch-probability" }
> +
> +include 'result_in_spec_1.f90'
> 
>   Jakub
> 
> 

-- 
Richard Biener 
SUSE Software Solutions Germany GmbH, Maxfeldstrasse 5, 90409 Nuernberg,
Germany; GF: Felix Imendörffer; HRB 36809 (AG Nuernberg)

Re: [PATCH] Reject VLAs in inline asm operands that require registers (PR inline-asm/92352)

2019-11-05 Thread Richard Biener
On Tue, 5 Nov 2019, Jakub Jelinek wrote:

> Hi!
> 
> On VLAs with register only constraints we ICE, because during gimplification
> we try to create temporaries for them and force_constant_size aborts in that
> case.
> 
> The following patch diagnoses those early, like we diagnose already C++
> non-PODs.
> 
> Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?
> 
> 2019-11-05  Jakub Jelinek  
> 
>   PR inline-asm/92352
>   * gimplify.c (gimplify_asm_expr): Reject VLA in output or input
>   operands with non-memory constraints.
> 
>   * c-c++-common/pr92352.c: New test.
> 
> --- gcc/gimplify.c.jj 2019-11-02 10:00:59.595253274 +0100
> +++ gcc/gimplify.c2019-11-05 00:21:01.585958514 +0100
> @@ -6235,8 +6235,14 @@ gimplify_asm_expr (tree *expr_p, gimple_
> is_inout = false;
>   }
>  
> -  /* If we can't make copies, we can only accept memory.  */
> -  if (TREE_ADDRESSABLE (TREE_TYPE (TREE_VALUE (link
> +  /* If we can't make copies, we can only accept memory.
> +  Similarly for VLAs.  */
> +  tree outtype = TREE_TYPE (TREE_VALUE (link));
> +  if (outtype != error_mark_node

so for error_mark_node we don't diagnose anything?

> +   && (TREE_ADDRESSABLE (outtype)
> +   || !COMPLETE_TYPE_P (outtype)
> +   || (!tree_fits_poly_uint64_p (TYPE_SIZE_UNIT (outtype))
> +   && max_int_size_in_bytes (outtype

so max_int_size_in_bytes == 0 is OK?  I suppose we have a testcase
for this?

Otherwise looks reasonable to me.

Thanks,
Richard.

>   {
> if (allows_mem)
>   allows_reg = 0;
> @@ -6392,7 +6398,12 @@ gimplify_asm_expr (tree *expr_p, gimple_
> oconstraints, &allows_mem, &allows_reg);
>  
>/* If we can't make copies, we can only accept memory.  */
> -  if (TREE_ADDRESSABLE (TREE_TYPE (TREE_VALUE (link
> +  tree intype = TREE_TYPE (TREE_VALUE (link));
> +  if (intype != error_mark_node
> +   && (TREE_ADDRESSABLE (intype)
> +   || !COMPLETE_TYPE_P (intype)
> +   || (!tree_fits_poly_uint64_p (TYPE_SIZE_UNIT (intype))
> +   && max_int_size_in_bytes (intype
>   {
> if (allows_mem)
>   allows_reg = 0;
> --- gcc/testsuite/c-c++-common/pr92352.c.jj   2019-11-04 14:03:18.725275255 
> +0100
> +++ gcc/testsuite/c-c++-common/pr92352.c  2019-11-04 14:02:55.211629675 
> +0100
> @@ -0,0 +1,15 @@
> +/* PR inline-asm/92352 */
> +
> +void
> +foo (int x)
> +{
> +  int var[x];
> +  asm volatile ("" : "+r" (var));/* { dg-error "impossible constraint in 
> 'asm'" } */
> +}/* { dg-error "non-memory output 0 must 
> stay in memory" "" { target *-*-* } .-1 } */
> +
> +void
> +bar (int x)
> +{
> +  int var[x];
> +  asm volatile ("" : "+m" (var));
> +}
> 
>   Jakub
> 
> 

-- 
Richard Biener 
SUSE Software Solutions Germany GmbH, Maxfeldstrasse 5, 90409 Nuernberg,
Germany; GF: Felix Imendörffer; HRB 36809 (AG Nuernberg)

Re: [PATCH V3] rs6000: Refine small loop unroll in loop_unroll_adjust hook

2019-11-05 Thread Jiufu Guo
Segher Boessenkool  writes:

> Hi!
>
> On Mon, Nov 04, 2019 at 02:31:43PM +0800, Jiufu Guo wrote:
>> In this patch, loop unroll adjust hook is introduced for powerpc.  We can do
>> target related hueristic adjustment in this hook. In this patch, small loops
>> is unrolled 2 times for O2 and O3 by default.  With this patch, we can see
>> some improvement for spec2017.  This patch enhanced a little for [Patch V2] 
>> to
>> enable small loops unroll for O3 by default like O2.
>
>>  * gcc/config/rs6000/rs6000.opt (unroll_small_loops): New internal flag.
>
> That's the declaration of a variable.  A command line flag is something
> like -munroll-small-loops.  Do we want a command line option like that?
> It makes testing simpler.
Thanks for great sugguestion, will update patch to add a command line
option.

>
>> -  /* unroll very small loops 2 time if no -funroll-loops.  */
>> +  /* If funroll-loops is not enabled explicitly, then enable small loops
>> + unrolling for -O2, and do not turn fweb or frename-registers on.  */
>>if (!global_options_set.x_flag_unroll_loops
>>&& !global_options_set.x_flag_unroll_all_loops)
>>  {
>> -  maybe_set_param_value (PARAM_MAX_UNROLL_TIMES, 2,
>> - global_options.x_param_values,
>> - global_options_set.x_param_values);
>> -
>> -  maybe_set_param_value (PARAM_MAX_UNROLLED_INSNS, 20,
>> - global_options.x_param_values,
>> - global_options_set.x_param_values);
>> +  unroll_small_loops = optimize >= 2 ? 1 : 0;
>
> That includes -Os?
>
> I think you shouldn't always set it to some value, only enable it where
> you want to enable it.  If you make a command line option for it this is
> especially simple (the table in common/config/rs6000/rs6000-common.c).
Thanks again, update rs6000_option_optimization_table as :
{ OPT_LEVELS_2_PLUS_SPEED_ONLY, OPT_funroll_loops, NULL, 1 },
{ OPT_LEVELS_2_PLUS_SPEED_ONLY, OPT_fweb, NULL, 0 },
{ OPT_LEVELS_2_PLUS_SPEED_ONLY, OPT_frename_registers, NULL, 0 },
{ OPT_LEVELS_2_PLUS_SPEED_ONLY, OPT_munroll_small_loops, NULL, 1 },

While, I still keep the code to disable unroll_small_loops for explicit
-funroll-loops via checking global_options_set.x_flag_unroll_loops. 
>
>> +static unsigned
>> +rs6000_loop_unroll_adjust (unsigned nunroll, struct loop * loop)
>> +{
>> +  if (unroll_small_loops)
>> +{
>> +  /* TODO: This is hardcoded to 10 right now.  It can be refined, for
>> + example we may want to unroll very small loops more times (4 perhaps).
>> + We also should use a PARAM for this.  */
>> +  if (loop->ninsns <= 10)
>> +return MIN (2, nunroll);
>> +  else
>> +return 0;
>> +}
>
> (Add an empty line here?)
Thanks again, updated accordingly.
>
>> +  return nunroll;
>> +}
>
> Great :-)
>
>> @@ -23472,6 +23488,7 @@ rs6000_function_specific_save (struct 
>> cl_target_option *ptr,
>>  {
>>ptr->x_rs6000_isa_flags = opts->x_rs6000_isa_flags;
>>ptr->x_rs6000_isa_flags_explicit = opts->x_rs6000_isa_flags_explicit;
>> +  ptr->x_unroll_small_loops = opts->x_unroll_small_loops;
>>  }
>
> Yeah we shouldn't need to add that, this should all be automatic.
Yes, through adding new option in .opt, this is handled automaticly.

Updated patch is at the end of this mail. Thanks for review.

Jiufu
>
>
> Segher

Updated patch:

Index: gcc/common/config/rs6000/rs6000-common.c
===
--- gcc/common/config/rs6000/rs6000-common.c(revision 277765)
+++ gcc/common/config/rs6000/rs6000-common.c(working copy)
@@ -35,7 +35,9 @@ static const struct default_options rs6000_option_
 { OPT_LEVELS_ALL, OPT_fsplit_wide_types_early, NULL, 1 },
 /* Enable -fsched-pressure for first pass instruction scheduling.  */
 { OPT_LEVELS_1_PLUS, OPT_fsched_pressure, NULL, 1 },
-{ OPT_LEVELS_2_PLUS, OPT_funroll_loops, NULL, 1 },
+/* Enable  -funroll-loops with -munroll-small-loops.  */
+{ OPT_LEVELS_2_PLUS_SPEED_ONLY, OPT_funroll_loops, NULL, 1 },
+{ OPT_LEVELS_2_PLUS_SPEED_ONLY, OPT_munroll_small_loops, NULL, 1 },
 { OPT_LEVELS_NONE, 0, NULL, 0 }
   };
 
Index: gcc/config/rs6000/rs6000.c
===
--- gcc/config/rs6000/rs6000.c  (revision 277765)
+++ gcc/config/rs6000/rs6000.c  (working copy)
@@ -1428,6 +1428,9 @@ static const struct attribute_spec rs6000_attribut
 #undef TARGET_VECTORIZE_DESTROY_COST_DATA
 #define TARGET_VECTORIZE_DESTROY_COST_DATA rs6000_destroy_cost_data
 
+#undef TARGET_LOOP_UNROLL_ADJUST
+#define TARGET_LOOP_UNROLL_ADJUST rs6000_loop_unroll_adjust
+
 #undef TARGET_INIT_BUILTINS
 #define TARGET_INIT_BUILTINS rs6000_init_builtins
 #undef TARGET_BUILTIN_DECL
@@ -4540,24 +4543,21 @@ rs6000_option_override_internal (bool global_init_
 global_options.x_param_values,
 

Re: [PATCH rs6000]Fix PR92132

2019-11-05 Thread Kewen.Lin
Hi Segher,

Thanks for the comments!

on 2019/11/2 上午7:17, Segher Boessenkool wrote:
> On Tue, Oct 29, 2019 at 01:16:53PM +0800, Kewen.Lin wrote:
>>  (vcond_mask_): New expand.
> 
> Say for which mode please?  Like
>   (vcond_mask_ for VEC_I and VEC_I): New expand.
> 

Fixed as below.

>>  (vcond_mask_): Likewise.
> 
> "for VEC_I and VEC_F", here, but the actual names in the pattern are for
> vector modes of same-size integer elements.  Maybe it is clear enough like
> this, dunno.

Changed to for VEC_F, New expand for float vector modes and same-size 
integer vector modes.

> 
>>  (vector_{ungt,unge,unlt,unle}): Likewise.
> 
> Never use wildcards (or shell expansions) in the "what changed" part of a
> changelog, because people try to search for that.

Thanks for the explanation, fixed. 

> 
>>  ;; 128-bit one's complement
>> -(define_insn_and_split "*one_cmpl3_internal"
>> +(define_insn_and_split "one_cmpl3_internal"
> 
> Instead, rename it to "one_cmpl3" and delete the define_expand that
> serves no function?

Renamed.  Sorry, what's the "define_expand" specified here.  I thought it's
for existing one_cmpl3 but I didn't find it. 

> 
>> +(define_code_iterator fpcmpun [ungt unge unlt unle])
> 
> Why these four?  Should there be more?  Should this be added to some
> existing iterator?

For floating point comparison operator and vector type, currently rs6000
supports eq, gt, ge, *ltgt, *unordered, *ordered, *uneq (* for unnamed).
We can leverage gt, ge, eq for lt, le, ne, then these four left.

I originally wanted to merge them into the existing unordered or uneq, but
I found it's hard to share their existing patterns.  For example, the uneq
looks like:

  [(set (match_dup 3)
(gt:VEC_F (match_dup 1)
  (match_dup 2)))
   (set (match_dup 4)
(gt:VEC_F (match_dup 2)
  (match_dup 1)))
   (set (match_dup 0)
(and:VEC_F (not:VEC_F (match_dup 3))
   (not:VEC_F (match_dup 4]

While ungt looks like:

  [(set (match_dup 3)
(ge:VEC_F (match_dup 1)
  (match_dup 2)))
   (set (match_dup 4)
(ge:VEC_F (match_dup 2)
  (match_dup 1)))
   (set (match_dup 3)
(ior:VEC_F (not:VEC_F (match_dup 3))
   (not:VEC_F (match_dup 4
   (set (match_dup 4)
(gt:VEC_F (match_dup 1)
  (match_dup 2)))
   (set (match_dup 3)
(ior:VEC_F (match_dup 3)
   (match_dup 4)))]
  
> 
> It's not all comparisons including unordered, there are uneq, unordered
> itself, and ne as well.

Yes, they are not, just a list holding missing support comparison operator.

> 
>> +;; Same mode for condition true/false values and predicate operand.
>> +(define_expand "vcond_mask_"
>> +  [(match_operand:VEC_I 0 "vint_operand")
>> +   (match_operand:VEC_I 1 "vint_operand")
>> +   (match_operand:VEC_I 2 "vint_operand")
>> +   (match_operand:VEC_I 3 "vint_operand")]
>> +  "VECTOR_UNIT_ALTIVEC_OR_VSX_P (mode)"
>> +{
>> +  emit_insn (gen_vector_select_ (operands[0], operands[2], 
>> operands[1],
>> +  operands[3]));
>> +  DONE;
>> +})
> 
> So is this exactly the same as vsel/xxsel?

Yes, expanded into if_then_else and ne against zero, can match their patterns.

> 
>> +;; For signed integer vectors comparison.
>> +(define_expand "vec_cmp"
> 
>> +case GEU:
>> +  emit_insn (
>> +gen_vector_nltu (operands[0], operands[2], operands[3], tmp));
>> +  break;
>> +case GTU:
>> +  emit_insn (gen_vector_gtu (operands[0], operands[2], 
>> operands[3]));
>> +  break;
>> +case LEU:
>> +  emit_insn (
>> +gen_vector_ngtu (operands[0], operands[2], operands[3], tmp));
>> +  break;
>> +case LTU:
>> +  emit_insn (gen_vector_gtu (operands[0], operands[3], 
>> operands[2]));
>> +  break;
> 
> You shouldn't allow those for signed comparisons, that will only hide
> problems.

OK, moved into vec_cmpu*.

> 
> You can do all the rest with some iterator / code attribute?  Or two cases,
> one for the codes that need ops 2 and 3 swapped, one for the rest?
> 

Sorry, I tried to use code attributes here but failed.  I think the reason is 
the
pattern name doesn't have .  I can only get the code from operand 1, then
have to use "switch case"?  I can change it with one more define_expand, but
is that what we wanted?  It looks we still need "case"s.

define_expand "vec_cmp"
...
{...
enum rtx_code code = GET_CODE (operands[1]);
switch (code)
  case GT:
  ... gen_vec_cmpgt
  ...
}

define_expand "vec_cmp"
  ... gen_vector_


>> +;; For unsigned integer vectors comparison.
>> +(define_expand "vec_cmpu"
>> +  [(set (match_operand:VEC_I 0 "vint_operand")
>> +(match_operator 1 "comparison_operator"
>> +  [(match_operand:VEC_I 2 "vint_operand")
>> +   (match_operand:VEC_I 3 "vint_operand")]))]
>> +  "VECTOR_UNIT_ALTIVEC_OR_VSX_P (mode)"
>> +{
>> +  emit_insn (gen_vec_cmp (operands[0], operands[1],
>> +  

Re: [PATCH] Fix hash_operand for fields of a CONSTRUCTOR.

2019-11-05 Thread Martin Liška

On 11/4/19 4:24 PM, Jeff Law wrote:

On 11/4/19 6:36 AM, Richard Biener wrote:

On Mon, Nov 4, 2019 at 2:35 PM Richard Biener
 wrote:


On Mon, Nov 4, 2019 at 10:09 AM Martin Liška  wrote:


On 11/1/19 10:51 PM, Jeff Law wrote:

On 10/31/19 10:01 AM, Martin Liška wrote:

Hi.

operand_equal_p can properly handle situation where we have a CONSTRUCTOR
where indices are NULL:

if (!operand_equal_p (c0->value, c1->value, flags)
/* In GIMPLE the indexes can be either NULL or matching i.
   Double check this so we won't get false
   positives for GENERIC.  */
|| (c0->index
&& (TREE_CODE (c0->index) != INTEGER_CST
|| compare_tree_int (c0->index, i)))
|| (c1->index
&& (TREE_CODE (c1->index) != INTEGER_CST
|| compare_tree_int (c1->index, i
  return false;

but the corresponding hash function always hashes field (which
can be NULL_TREE or equal to ctor index).

Patch can bootstrap on x86_64-linux-gnu and survives regression tests.

Ready to be installed?
Thanks,
Martin

gcc/ChangeLog:

2019-10-31  Martin Liska  

  PR ipa/92304
  * fold-const.c (operand_compare::hash_operand): Fix field
  hashing of CONSTRUCTOR.

OK.  One question though, do these routines need to handle
CONSTRUCTOR_NO_CLEARING?


Good point, but I bet it's just a flag used in GENERIC, right?


Yes.  It matters for gimplification only.  I don't think we can
optimistically make use of it in operand_equal_p.


OTOH for GENERIC and sth like ICF the flags have to match.

Precisely my concern.  I'm not immediately aware of any case where it
matters, but it'd be nice to future proof this if we can.

jeff



Sure, I've got the following tested patch.

Patch can bootstrap on x86_64-linux-gnu and survives regression tests.

Ready to be installed?
Thanks,
Martin
>From 2302c15cb2568bc71b4b7bc3abbfd66aafc7c06c Mon Sep 17 00:00:00 2001
From: Martin Liska 
Date: Mon, 4 Nov 2019 15:39:40 +0100
Subject: [PATCH] Add CONSTRUCTOR_NO_CLEARING to operand_equal_p.

gcc/ChangeLog:

2019-11-04  Martin Liska  

	* fold-const.c (operand_compare::operand_equal_p): Add comparison
	of CONSTRUCTOR_NO_CLEARING.
	(operand_compare::hash_operand): Likewise.
---
 gcc/fold-const.c | 4 
 1 file changed, 4 insertions(+)

diff --git a/gcc/fold-const.c b/gcc/fold-const.c
index 1e25859a707..a1f80b91cce 100644
--- a/gcc/fold-const.c
+++ b/gcc/fold-const.c
@@ -3475,6 +3475,9 @@ operand_compare::operand_equal_p (const_tree arg0, const_tree arg1,
 case tcc_exceptional:
   if (TREE_CODE (arg0) == CONSTRUCTOR)
 	{
+	  if (CONSTRUCTOR_NO_CLEARING (arg0) != CONSTRUCTOR_NO_CLEARING (arg1))
+	return false;
+
 	  /* In GIMPLE constructors are used only to build vectors from
 	 elements.  Individual elements in the constructor must be
 	 indexed in increasing order and form an initial sequence.
@@ -3657,6 +3660,7 @@ operand_compare::hash_operand (const_tree t, inchash::hash &hstate,
 	unsigned HOST_WIDE_INT idx;
 	tree field, value;
 	flags &= ~OEP_ADDRESS_OF;
+	hstate.add_int (CONSTRUCTOR_NO_CLEARING (t));
 	FOR_EACH_CONSTRUCTOR_ELT (CONSTRUCTOR_ELTS (t), idx, field, value)
 	  {
 	/* In GIMPLE the indexes can be either NULL or matching i.  */
-- 
2.23.0



[PATCH][OBVIOUS] Remove FIELD_DECL leftover.

2019-11-05 Thread Martin Liška

Hi.

The patch installed as r277614 is not exactly 1:1 to the previous
revision, but I forgot to remove hashing of FIELD_DECL. The tree type
is not handled in operand_equal_p.

Patch can bootstrap on x86_64-linux-gnu and survives regression tests.

I'm going to install it as obvious.
Thanks,
Martin

gcc/ChangeLog:

2019-11-04  Martin Liska  

PR c++/92339
* fold-const.c (operand_compare::hash_operand): Remove FIELD_DECL
handling.

gcc/testsuite/ChangeLog:

2019-11-04  Martin Liska  

PR c++/92339
* g++.dg/pr92339.C: New test.
---
 gcc/fold-const.c   |  4 
 gcc/testsuite/g++.dg/pr92339.C | 10 ++
 2 files changed, 10 insertions(+), 4 deletions(-)
 create mode 100644 gcc/testsuite/g++.dg/pr92339.C


diff --git a/gcc/fold-const.c b/gcc/fold-const.c
index 1e25859a707..88a069f4306 100644
--- a/gcc/fold-const.c
+++ b/gcc/fold-const.c
@@ -3682,10 +3682,6 @@ operand_compare::hash_operand (const_tree t, inchash::hash &hstate,
 case IDENTIFIER_NODE:
   hstate.add_object (IDENTIFIER_HASH_VALUE (t));
   return;
-case FIELD_DECL:
-  inchash::add_expr (DECL_FIELD_OFFSET (t), hstate, flags);
-  inchash::add_expr (DECL_FIELD_BIT_OFFSET (t), hstate, flags);
-  return;
 case FUNCTION_DECL:
   /* When referring to a built-in FUNCTION_DECL, use the __builtin__ form.
 	 Otherwise nodes that compare equal according to operand_equal_p might
diff --git a/gcc/testsuite/g++.dg/pr92339.C b/gcc/testsuite/g++.dg/pr92339.C
new file mode 100644
index 000..5bf15b08b17
--- /dev/null
+++ b/gcc/testsuite/g++.dg/pr92339.C
@@ -0,0 +1,10 @@
+/* PR c++/92339  */
+/* { dg-options "-std=c++11" } */
+
+class a {
+  template  struct c { c(a *); };
+  int m_fn1();
+  unsigned long d;
+  using e = c;
+};
+int a::m_fn1() { e(this); return 0; }



Re: Optimize handling of inline summaries

2019-11-05 Thread Martin Liška

On 11/4/19 8:09 PM, Jan Hubicka wrote:

On 11/4/19 3:12 PM, Jan Hubicka wrote:

Martin, do you know why this flag was introduced?


Hi.

The flag is used in IPA CP:

call_summary 

class edge_clone_summary
{
...
   cgraph_edge *prev_clone;
   cgraph_edge *next_clone;
}


I see, so it is there to collect chains of duplications. I suppose it
makes sense even though it is bit unexpected use of summaries (I suppose
I approved it :)

In this case we want to more know that something was duplicated and
trigger creation. There are other cases where we do not want to
duplicate in all siutations (like when inline clone is created).
I was wondering about adding duplicate_p function which will by default
return true if source summary exists and which one can overwrite with
different behaviour.  What do you think?


Sure, that sounds reasonable to me. Feel free to change the flag
to the suggested hook.

Martin



Honza





Re: [PATCH V3] rs6000: Refine small loop unroll in loop_unroll_adjust hook

2019-11-05 Thread Jiufu Guo
Richard Biener  writes:

> On Mon, 4 Nov 2019, Segher Boessenkool wrote:
>
>> Hi!
>> 
>> On Mon, Nov 04, 2019 at 02:31:43PM +0800, Jiufu Guo wrote:
>> > In this patch, loop unroll adjust hook is introduced for powerpc.  We can 
>> > do
>> > target related hueristic adjustment in this hook. In this patch, small 
>> > loops
>> > is unrolled 2 times for O2 and O3 by default.  With this patch, we can see
>> > some improvement for spec2017.  This patch enhanced a little for [Patch 
>> > V2] to
>> > enable small loops unroll for O3 by default like O2.
>> 
>> >* gcc/config/rs6000/rs6000.opt (unroll_small_loops): New internal flag.
>> 
>> That's the declaration of a variable.  A command line flag is something
>> like -munroll-small-loops.  Do we want a command line option like that?
>> It makes testing simpler.
>> 
>> > -  /* unroll very small loops 2 time if no -funroll-loops.  */
>> > +  /* If funroll-loops is not enabled explicitly, then enable small 
>> > loops
>> > +   unrolling for -O2, and do not turn fweb or frename-registers on.  */
>> >if (!global_options_set.x_flag_unroll_loops
>> >  && !global_options_set.x_flag_unroll_all_loops)
>> >{
>> > -maybe_set_param_value (PARAM_MAX_UNROLL_TIMES, 2,
>> > -   global_options.x_param_values,
>> > -   global_options_set.x_param_values);
>> > -
>> > -maybe_set_param_value (PARAM_MAX_UNROLLED_INSNS, 20,
>> > -   global_options.x_param_values,
>> > -   global_options_set.x_param_values);
>> > +unroll_small_loops = optimize >= 2 ? 1 : 0;
>> 
>> That includes -Os?
>
> It also re-introduces the exactly same issue as the --param with LTO.
Thanks Richard,
This flag (unroll_small_loops) is saved/restored cl_target_option it can
distigush different CU. I had a test, it works when -flto for
multi-sources. 

Jiufu
BR.

>
>> I think you shouldn't always set it to some value, only enable it where
>> you want to enable it.  If you make a command line option for it this is
>> especially simple (the table in common/config/rs6000/rs6000-common.c).
>> 
>> > +static unsigned
>> > +rs6000_loop_unroll_adjust (unsigned nunroll, struct loop * loop)
>> > +{
>> > +  if (unroll_small_loops)
>> > +{
>> > +  /* TODO: This is hardcoded to 10 right now.  It can be refined, for
>> > +   example we may want to unroll very small loops more times (4 perhaps).
>> > +   We also should use a PARAM for this.  */
>> > +  if (loop->ninsns <= 10)
>> > +  return MIN (2, nunroll);
>> > +  else
>> > +  return 0;
>> > +}
>> 
>> (Add an empty line here?)
>> 
>> > +  return nunroll;
>> > +}
>> 
>> Great :-)
>> 
>> > @@ -23472,6 +23488,7 @@ rs6000_function_specific_save (struct 
>> > cl_target_option *ptr,
>> >  {
>> >ptr->x_rs6000_isa_flags = opts->x_rs6000_isa_flags;
>> >ptr->x_rs6000_isa_flags_explicit = opts->x_rs6000_isa_flags_explicit;
>> > +  ptr->x_unroll_small_loops = opts->x_unroll_small_loops;
>> >  }
>> 
>> Yeah we shouldn't need to add that, this should all be automatic.
>> 
>> 
>> Segher
>> 


Re: [PATCH V3] rs6000: Refine small loop unroll in loop_unroll_adjust hook

2019-11-05 Thread Jiufu Guo
Jiufu Guo  writes:

> Segher Boessenkool  writes:
>
>> Hi!
>>
>> On Mon, Nov 04, 2019 at 02:31:43PM +0800, Jiufu Guo wrote:
>>> In this patch, loop unroll adjust hook is introduced for powerpc.  We can do
>>> target related hueristic adjustment in this hook. In this patch, small loops
>>> is unrolled 2 times for O2 and O3 by default.  With this patch, we can see
>>> some improvement for spec2017.  This patch enhanced a little for [Patch V2] 
>>> to
>>> enable small loops unroll for O3 by default like O2.
>>
>>> * gcc/config/rs6000/rs6000.opt (unroll_small_loops): New internal flag.
>>
>> That's the declaration of a variable.  A command line flag is something
>> like -munroll-small-loops.  Do we want a command line option like that?
>> It makes testing simpler.
> Thanks for great sugguestion, will update patch to add a command line
> option.
>
>>
>>> -  /* unroll very small loops 2 time if no -funroll-loops.  */
>>> +  /* If funroll-loops is not enabled explicitly, then enable small 
>>> loops
>>> +unrolling for -O2, and do not turn fweb or frename-registers on.  */
>>>if (!global_options_set.x_flag_unroll_loops
>>>   && !global_options_set.x_flag_unroll_all_loops)
>>> {
>>> - maybe_set_param_value (PARAM_MAX_UNROLL_TIMES, 2,
>>> -global_options.x_param_values,
>>> -global_options_set.x_param_values);
>>> -
>>> - maybe_set_param_value (PARAM_MAX_UNROLLED_INSNS, 20,
>>> -global_options.x_param_values,
>>> -global_options_set.x_param_values);
>>> + unroll_small_loops = optimize >= 2 ? 1 : 0;
>>
>> That includes -Os?
>>
>> I think you shouldn't always set it to some value, only enable it where
>> you want to enable it.  If you make a command line option for it this is
>> especially simple (the table in common/config/rs6000/rs6000-common.c).
> Thanks again, update rs6000_option_optimization_table as :
> { OPT_LEVELS_2_PLUS_SPEED_ONLY, OPT_funroll_loops, NULL, 1 },
> { OPT_LEVELS_2_PLUS_SPEED_ONLY, OPT_fweb, NULL, 0 },
> { OPT_LEVELS_2_PLUS_SPEED_ONLY, OPT_frename_registers, NULL, 0 },
Sorry, typo, in patch, above 2 lines are not there. Because they do not
turn off flag_web and flag_ename_registers.
> { OPT_LEVELS_2_PLUS_SPEED_ONLY, OPT_munroll_small_loops, NULL, 1 },
>
> While, I still keep the code to disable unroll_small_loops for explicit
> -funroll-loops via checking global_options_set.x_flag_unroll_loops. 
>>
>>> +static unsigned
>>> +rs6000_loop_unroll_adjust (unsigned nunroll, struct loop * loop)
>>> +{
>>> +  if (unroll_small_loops)
>>> +{
>>> +  /* TODO: This is hardcoded to 10 right now.  It can be refined, for
>>> +example we may want to unroll very small loops more times (4 perhaps).
>>> +We also should use a PARAM for this.  */
>>> +  if (loop->ninsns <= 10)
>>> +   return MIN (2, nunroll);
>>> +  else
>>> +   return 0;
>>> +}
>>
>> (Add an empty line here?)
> Thanks again, updated accordingly.
>>
>>> +  return nunroll;
>>> +}
>>
>> Great :-)
>>
>>> @@ -23472,6 +23488,7 @@ rs6000_function_specific_save (struct 
>>> cl_target_option *ptr,
>>>  {
>>>ptr->x_rs6000_isa_flags = opts->x_rs6000_isa_flags;
>>>ptr->x_rs6000_isa_flags_explicit = opts->x_rs6000_isa_flags_explicit;
>>> +  ptr->x_unroll_small_loops = opts->x_unroll_small_loops;
>>>  }
>>
>> Yeah we shouldn't need to add that, this should all be automatic.
> Yes, through adding new option in .opt, this is handled automaticly.
>
> Updated patch is at the end of this mail. Thanks for review.
>
> Jiufu
>>
>>
>> Segher
>
> Updated patch:
>
> Index: gcc/common/config/rs6000/rs6000-common.c
> ===
> --- gcc/common/config/rs6000/rs6000-common.c  (revision 277765)
> +++ gcc/common/config/rs6000/rs6000-common.c  (working copy)
> @@ -35,7 +35,9 @@ static const struct default_options rs6000_option_
>  { OPT_LEVELS_ALL, OPT_fsplit_wide_types_early, NULL, 1 },
>  /* Enable -fsched-pressure for first pass instruction scheduling.  */
>  { OPT_LEVELS_1_PLUS, OPT_fsched_pressure, NULL, 1 },
> -{ OPT_LEVELS_2_PLUS, OPT_funroll_loops, NULL, 1 },
> +/* Enable  -funroll-loops with -munroll-small-loops.  */
> +{ OPT_LEVELS_2_PLUS_SPEED_ONLY, OPT_funroll_loops, NULL, 1 },
> +{ OPT_LEVELS_2_PLUS_SPEED_ONLY, OPT_munroll_small_loops, NULL, 1 },
>  { OPT_LEVELS_NONE, 0, NULL, 0 }
>};
>  
> Index: gcc/config/rs6000/rs6000.c
> ===
> --- gcc/config/rs6000/rs6000.c(revision 277765)
> +++ gcc/config/rs6000/rs6000.c(working copy)
> @@ -1428,6 +1428,9 @@ static const struct attribute_spec rs6000_attribut
>  #undef TARGET_VECTORIZE_DESTROY_COST_DATA
>  #define TARGET_VECTORIZE_DESTROY_COST_DATA rs6000_destroy_cost_data
>  
> +#undef TARGET_LOOP_UNROLL_ADJUST
> +#define TARGET_LOOP_UNROLL_ADJUST rs600

Re: Ping: [PATCH V6] Extend IPA-CP to support arithmetically-computed value-passing on by-ref argument (PR ipa/91682)

2019-11-05 Thread Feng Xue OS
Hi Martin,

  Thanks for your review. I updated the patch with your comments.

Feng



> Sorry that it took so long.  Next time, please consider making the
> review a bit easier by writing a ChangeLog (yes, I usually read them and
> you'll have to write one anyway).
>> +  class ipcp_param_lattices *src_plats
>> + = ipa_get_parm_lattices (info, src_idx);

> Wrong indentation for GNU coding standard.
Done.

>>  struct ipa_known_agg_contents_list
>>  {
>>/* Offset and size of the described part of the aggregate.  */
>>HOST_WIDE_INT offset, size;
>> -  /* Known constant value or NULL if the contents is known to be unknown.  
>> */
>> -  tree constant;
>> +
>> +  /* Type of the described part of the aggregate.  */
>> +  tree type;
>> +
>> +  /* Known constant value or jump function data describing contents.  */
>> +  struct ipa_load_agg_data value;

> I wonder whether it would be cleaner to repeat the fields of
> ipa_load_agg_dat here.  But I don't insist.
But this will make duplicate code. If we want to add more information to
describe agg jump function, we have to copy that here.

>> + item.value.pass_through.operand
>> + = unshare_expr_without_location (operand);

> Wrong indentation for GNU coding standard.
Done.

>> +   Since load-value-from-aggregate jump function data structure is 
>> informative
>> +   enough to describe constant and simple pass-through jump function, here 
>> we
>> +   do not need a jump function type, merely use FORMAL_ID and OPERAND in
>> +   IPA_LOAD_AGG_DATA to distinguish different jump functions.  */

> This last comment is difficult to understand to the point when IMHO one
> has to read the code anyway.  Perhaps you could just list which special
> values imply which final jump function type?  And perhaps that list
> should go to the comment describing ipa_known_agg_contents_list.
Changed the comments.

>> +
>> +static void
>> +compute_assign_agg_jump_func (struct ipa_func_body_info *fbi,
>> +   struct ipa_load_agg_data *agg_value,

> My preference would be for this function to receive a pointer to the
> whole ipa_known_agg_contents_list as a parameter instead of agg_value,
> and to be called something like extract_agg_content_or_origin or
> something that would not suggest it creates a real jump function.
> Please at least consider changing the name.
I deliberately split this function from extract_mem_content, and limits
its access scope to part of ipa_known_agg_contents_list, not whole,
since it only analyzes rhs side of assignment to an aggregate.

Rename to analyze_agg_content_value.

>> +  if (!is_gimple_assign (stmt = SSA_NAME_DEF_STMT (rhs1)))

> Please put the assignment into a separate statement.
Done.

>>  /* Traverse statements from CALL backwards, scanning whether an aggregate 
>> given
>> -   in ARG is filled in with constant values.  ARG can either be an aggregate
>> -   expression or a pointer to an aggregate.  ARG_TYPE is the type of the
>> -   aggregate.  JFUNC is the jump function into which the constants are
>> -   subsequently stored.  AA_WALK_BUDGET_P points to limit on number of
>> -   statements we allow get_continuation_for_phi to examine.  */
>> +   in ARG is filled in constant or value that is derived from caller's 
>> formal

> Minor nit, please use plural "constants or values that are..."
Done.

>> -   if (src->agg.items
>> -   && (dst->value.ancestor.agg_preserved || !src->agg.by_ref))
>> +   /* Parameter and argument in ancestor jump function must be pointer
>> +  type, which means access to aggregate must be by-reference.  */
>> +   gcc_checking_assert (!src->agg.items || src->agg.by_ref);

> I am slightly afraid that some type mismatches in between the call
> statement fntype and callee type which are possible with LTO (and
> horribly bad user input) might trigger this.  Please make this a
> non-checking assert so that we find out if that is indeed true.
Done.

>> +struct GTY(()) ipa_agg_value

> Why the GTY marker, is this structure ever allocated in garbage
> collected memory?  I don't think so (but it is getting late here).
Removed GTY marker.

>> +struct GTY(()) ipa_agg_value_set

> Likewise, moreover...
Removed.

>> +  /* Return true if there is any value for aggregate.  */
>> +  operator bool () const
>> +  {
>> +return !items.is_empty ();
>> +  }

> I do not know various C++ conventions well, but unless this is a really
> really well established one, please don't use an operator but a normal
> method.  (My preference is to invert its meaning and call it is_empty
Done.
From a6df7c7dc70f50995335425c5183f4feddf998b8 Mon Sep 17 00:00:00 2001
From: Feng Xue 
Date: Thu, 15 Aug 2019 15:47:14 +0800
Subject: [PATCH 1/2] temp

---
 gcc/ChangeLog  |  71 +++
 gcc/ipa-cp.c   | 498 +++--
 gcc/ipa-fnsummary.c|  61 +--
 gcc/ipa-fnsummary.h  

[committed] move vrp_set_zero_nonzero_bits into vr-values.c

2019-11-05 Thread Aldy Hernandez
This function is only used in vr-values.c, so it doesn't need to be 
externally visible or defined in tree-vrp.c.


In moving it I noticed that wide_int_range_set_zero_nonzero_bits is 
redundant, as we can grab the version in range-op.


I've also renamed the function to vr_set_zero_nonzero_bits (removed the 
"P"), as it's more a value_range thing than a VRP thing.


Committed as obvious.

commit a59ec913fa95849a356d00d211510b29eab5565f
Author: Aldy Hernandez 
Date:   Tue Nov 5 09:48:41 2019 +0100

Move vrp_set_zero_nonzero_bits from tree-vrp.c into vr-values.c, and
make it use wi_set_zero_nonzero_bits.  Remove the now redundant
wide_int_range_set_zero_nonzero_bits.

diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index f492ea6da0c..786dee727dc 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,14 @@
+2019-11-05  Aldy Hernandez  
+
+	* range-op.cc (wi_set_zero_nonzero_bits): Remove static qualifier.
+	* range-op.h (wi_set_zero_nonzero_bits): New prototype.
+	* tree-vrp.h (vrp_set_zero_nonzero_bits): Remove.
+	* tree-vrp.c (wide_int_range_set_zero_nonzero_bits): Remove.
+	(vrp_set_zero_nonzero_bits): Move to...
+	* vr-values.c (vr_set_zero_nonzero_bits): ...here.
+	(vr_values::simplify_bit_ops_using_ranges): Rename
+	vrp_set_zero_nonzero_bits to vr_set_zero_nonzero_bits.
+
 2019-11-05  Aldy Hernandez  
 
 	* tree-vrp.h (vrp_bitmap_equal_p): Remove.
diff --git a/gcc/range-op.cc b/gcc/range-op.cc
index fc31485384b..56e8a20ad9e 100644
--- a/gcc/range-op.cc
+++ b/gcc/range-op.cc
@@ -1847,7 +1847,7 @@ wi_optimize_and_or (value_range_base &r,
 // for all numbers in the range the bit is 1, otherwise it might be 0
 // or 1.
 
-static void
+void
 wi_set_zero_nonzero_bits (tree type,
 			  const wide_int &lb, const wide_int &ub,
 			  wide_int &maybe_nonzero,
diff --git a/gcc/range-op.h b/gcc/range-op.h
index f6510758163..e531b918263 100644
--- a/gcc/range-op.h
+++ b/gcc/range-op.h
@@ -82,7 +82,10 @@ protected:
 };
 
 extern range_operator *range_op_handler (enum tree_code code, tree type);
-
 extern void range_cast (value_range_base &, tree type);
+extern void wi_set_zero_nonzero_bits (tree type,
+  const wide_int &, const wide_int &,
+  wide_int &maybe_nonzero,
+  wide_int &mustbe_nonzero);
 
 #endif // GCC_RANGE_OP_H
diff --git a/gcc/tree-vrp.c b/gcc/tree-vrp.c
index e926670b962..e1d5c7cb98c 100644
--- a/gcc/tree-vrp.c
+++ b/gcc/tree-vrp.c
@@ -1260,69 +1260,6 @@ value_range_base::value_inside_range (tree val) const
 return !!cmp2;
 }
 
-/* For range [LB, UB] compute two wide_int bit masks.
-
-   In the MAY_BE_NONZERO bit mask, if some bit is unset, it means that
-   for all numbers in the range the bit is 0, otherwise it might be 0
-   or 1.
-
-   In the MUST_BE_NONZERO bit mask, if some bit is set, it means that
-   for all numbers in the range the bit is 1, otherwise it might be 0
-   or 1.  */
-
-static inline void
-wide_int_range_set_zero_nonzero_bits (signop sign,
-  const wide_int &lb, const wide_int &ub,
-  wide_int &may_be_nonzero,
-  wide_int &must_be_nonzero)
-{
-  may_be_nonzero = wi::minus_one (lb.get_precision ());
-  must_be_nonzero = wi::zero (lb.get_precision ());
-
-  if (wi::eq_p (lb, ub))
-{
-  may_be_nonzero = lb;
-  must_be_nonzero = may_be_nonzero;
-}
-  else if (wi::ge_p (lb, 0, sign) || wi::lt_p (ub, 0, sign))
-{
-  wide_int xor_mask = lb ^ ub;
-  may_be_nonzero = lb | ub;
-  must_be_nonzero = lb & ub;
-  if (xor_mask != 0)
-	{
-	  wide_int mask = wi::mask (wi::floor_log2 (xor_mask), false,
-may_be_nonzero.get_precision ());
-	  may_be_nonzero = may_be_nonzero | mask;
-	  must_be_nonzero = wi::bit_and_not (must_be_nonzero, mask);
-	}
-}
-}
-
-/* value_range wrapper for wide_int_range_set_zero_nonzero_bits above.
-
-   Return TRUE if VR was a constant range and we were able to compute
-   the bit masks.  */
-
-bool
-vrp_set_zero_nonzero_bits (const tree expr_type,
-			   const value_range_base *vr,
-			   wide_int *may_be_nonzero,
-			   wide_int *must_be_nonzero)
-{
-  if (!range_int_cst_p (vr))
-{
-  *may_be_nonzero = wi::minus_one (TYPE_PRECISION (expr_type));
-  *must_be_nonzero = wi::zero (TYPE_PRECISION (expr_type));
-  return false;
-}
-  wide_int_range_set_zero_nonzero_bits (TYPE_SIGN (expr_type),
-	wi::to_wide (vr->min ()),
-	wi::to_wide (vr->max ()),
-	*may_be_nonzero, *must_be_nonzero);
-  return true;
-}
-
 /* Create two value-ranges in *VR0 and *VR1 from the anti-range *AR
so that *VR0 U *VR1 == *AR.  Returns true if that is possible,
false otherwise.  If *AR can be represented with a single range
diff --git a/gcc/tree-vrp.h b/gcc/tree-vrp.h
index 3861634fb7e..0bf33caba85 100644
--- a/gcc/tree-vrp.h
+++ b/gcc/tree-vrp.h
@@ -299,8 +299,6 @@ void range_fold_binary_expr (value_range_base *, enum tree_code, tree type,
 extern bool vrp_operand_equal_p (const_tree, const_tree);
 extern enum value_range_kind intersect_range_with_nonze

Re: [PATCH, Fortran] Allow CHARACTER literals in assignments and DATA statements

2019-11-05 Thread Mark Eggleston


On 25/10/2019 09:03, Tobias Burnus wrote:

Hello Mark, hi all,

On 10/21/19 4:40 PM, Mark Eggleston wrote:
This is an extension to support a legacy feature supported by other 
compilers such as flang and the sun compiler.  As I understand it 
this feature is associated with DEC so it enabled using 
-fdec-char-conversions and by -fdec.


It allows character literals to be assigned to numeric (INTEGER, 
REAL, COMPLEX) and LOGICAL variables by direct assignment or in DATA 
statements.


    * arith.c (hollerith2representation): Use 
OPT_Wcharacter_truncation in

    call to gfc_warning.


This has two effects: First, it permits to toggle the warning on and 
off; secondly, it disables the warning by default. It is enabled by 
-Wall, however. – I think that's acceptable: while Holleriths are less 
transparent as normal strings, for normal strings the result is 
identical.




+ result->representation.string[result_len] = '\0'; /* For debugger  */


Tiny nit: full stop after 'debugger'.

Done.



+/* Convert character to integer. The constant will be padded or 
truncated. */


And here an extra space before '*/'.

Done.



+Allowing character literals to be used in a similar way to Hollerith 
constants

+is a non-standard extension.
+
+Character literals can be used in @code{DATA} statements and 
assignments with


I wonder whether one should mention here explicitly that only 
default-kind (i.e. kind=1) character strings are permitted. 
Additionally, I wonder whether -fdec-char-conversion should be 
mentioned here – without, it is not supported and the error message 
doesn't point to this option.



Now mentions -fdec-char-conversion and kind=1.



+
+  /* Flang allows character conversions similar to Hollerith 
conversions

+ - the first characters will be turned into ascii values. */


Is this Flang or DEC or …? I thought we talk about legacy support and 
Flang is not really legacy.




Re-worded.

--- a/gcc/fortran/resolve.c
+++ b/gcc/fortran/resolve.c
  +  if ((gfc_numeric_ts (&lhs->ts) || lhs->ts.type == BT_LOGICAL)
+  && rhs->ts.type == BT_CHARACTER
+  && rhs->expr_type != EXPR_CONSTANT)
+    {
+  gfc_error ("Cannot convert %s to %s at %L", gfc_typename (rhs),
+ gfc_typename (lhs), &rhs->where);
+  return false;
+    }


Maybe add a comment like:
/* Happens with flag_dec_char_conversions for nonconstant strings.  */
might help casual readers to understand where this if comes from.


Done.



@@ -331,8 +332,9 @@ gfc_conv_constant_to_tree (gfc_expr * expr)
  gfc_build_string_const (expr->representation.length,
  expr->representation.string));
    if (!integer_zerop (tmp) && !integer_onep (tmp))
-    gfc_warning (0, "Assigning value other than 0 or 1 to LOGICAL"
- " has undefined result at %L", &expr->where);
+    gfc_warning (OPT_Wsurprising, "Assigning value other than 0 
or 1 "

+ "to LOGICAL has undefined result at %L",
+ &expr->where);


I am not happy with this. We had odd issues with combining code 
generated by gfortran and ifort and Booleans types ("logical"). 
Namely, gfortran uses 0 and 1 – while ifort uses -1 and 0. When using 
".not. var", it is sufficient to flip a single bit – either the first 
or the last bit – and it is sufficient to look only a single bit.


Hence, one can get ".not. var .eqv. var".

The same result one can get when assigning "-1" to logical. Hence, a 
default warning makes more sense than -Wsurprising. At least, 
-Wsurprising is enabled by default.


Hence, I wonder whether your 'OPT_Wsurprising' or 
'flag_dec_char_conversions ? OPT_Wsurprising : 0' makes more sense.



The latter.


Actually, I don't quickly see whether   4_'string'  (i.e. kind=4) 
strings are rejected or not. The gfc_character2* functions all assume 
kind=1 characters – while code like gfc_convert_constant or the 
resolve.c code only looks at BT_CHARACTER.
On the other hand, the add_conv calls in intrintrinsic.c's 
add_conversions are only added for the default-character kind.


In any case, can you add a test which checks that – even with 
-fdec-char-conversion – assigning a 2_'string' and 4_'string' to a 
integer/real/complex/logical will be rejected at compile time?


Did not add 2_'string' tests as 2 is not accepted as a valid kind for 
characters. The addition of 4_'string' in a data statement resulted in 
an ICE which has been fixed by only accepting characters of kind=1.

Otherwise, it looks okay to me.

Tobias


I noticed that warning were not produced for conversion to logicals, 
re-ordering of an if..else if sequence fixes that problem. Additional 
test cases have been added.


Steve Kargl suggested a revision to the conversion warning adding 
"Nonstandard" to the text this has also been done.


Tested on x86_64 using make -j 8 check-fortran.

Please find attached the updated patch, the change logs follow. OK to 
commit?


regards,

Mark

gcc/fortran/ChangeLog

    Jim MacArthur 

Re: [PATCH] Reject VLAs in inline asm operands that require registers (PR inline-asm/92352)

2019-11-05 Thread Jakub Jelinek
On Tue, Nov 05, 2019 at 09:27:45AM +0100, Richard Biener wrote:
> > --- gcc/gimplify.c.jj   2019-11-02 10:00:59.595253274 +0100
> > +++ gcc/gimplify.c  2019-11-05 00:21:01.585958514 +0100
> > @@ -6235,8 +6235,14 @@ gimplify_asm_expr (tree *expr_p, gimple_
> >   is_inout = false;
> > }
> >  
> > -  /* If we can't make copies, we can only accept memory.  */
> > -  if (TREE_ADDRESSABLE (TREE_TYPE (TREE_VALUE (link
> > +  /* If we can't make copies, we can only accept memory.
> > +Similarly for VLAs.  */
> > +  tree outtype = TREE_TYPE (TREE_VALUE (link));
> > +  if (outtype != error_mark_node
> 
> so for error_mark_node we don't diagnose anything?

Yes, we should have diagnosed it already.  The != error_mark_node
I've added only after seeing tons of ICEs in the testsuite with earlier
version of the patch.

> > + && (TREE_ADDRESSABLE (outtype)
> > + || !COMPLETE_TYPE_P (outtype)
> > + || (!tree_fits_poly_uint64_p (TYPE_SIZE_UNIT (outtype))
> > + && max_int_size_in_bytes (outtype
> 
> so max_int_size_in_bytes == 0 is OK?  I suppose we have a testcase
> for this?

Actually, I meant max_int_size_in_bytes (outtype) < 0, i.e. something
on which force_constant_size ICE immediately, sorry for screwing it up in
the end.
All these VLAs with max_int_size_in_bytes >= 0 sizes are specific to Ada
and I have no idea what is and isn't valid there, for C/C++ it should
always return -1.

> Otherwise looks reasonable to me.

So, is the following ok if it passes bootstrap/regtest, or shall
I just go for || !tree_fits_poly_uint64_p (TYPE_SIZE_UNIT (outtype))) ?

2019-11-05  Jakub Jelinek  

PR inline-asm/92352
* gimplify.c (gimplify_asm_expr): Reject VLA in output or input
operands with non-memory constraints.

* c-c++-common/pr92352.c: New test.

--- gcc/gimplify.c.jj   2019-11-02 10:00:59.595253274 +0100
+++ gcc/gimplify.c  2019-11-05 00:21:01.585958514 +0100
@@ -6235,8 +6235,14 @@ gimplify_asm_expr (tree *expr_p, gimple_
  is_inout = false;
}
 
-  /* If we can't make copies, we can only accept memory.  */
-  if (TREE_ADDRESSABLE (TREE_TYPE (TREE_VALUE (link
+  /* If we can't make copies, we can only accept memory.
+Similarly for VLAs.  */
+  tree outtype = TREE_TYPE (TREE_VALUE (link));
+  if (outtype != error_mark_node
+ && (TREE_ADDRESSABLE (outtype)
+ || !COMPLETE_TYPE_P (outtype)
+ || (!tree_fits_poly_uint64_p (TYPE_SIZE_UNIT (outtype))
+ && max_int_size_in_bytes (outtype) < 0)))
{
  if (allows_mem)
allows_reg = 0;
@@ -6392,7 +6398,12 @@ gimplify_asm_expr (tree *expr_p, gimple_
  oconstraints, &allows_mem, &allows_reg);
 
   /* If we can't make copies, we can only accept memory.  */
-  if (TREE_ADDRESSABLE (TREE_TYPE (TREE_VALUE (link
+  tree intype = TREE_TYPE (TREE_VALUE (link));
+  if (intype != error_mark_node
+ && (TREE_ADDRESSABLE (intype)
+ || !COMPLETE_TYPE_P (intype)
+ || (!tree_fits_poly_uint64_p (TYPE_SIZE_UNIT (intype))
+ && max_int_size_in_bytes (intype) < 0)))
{
  if (allows_mem)
allows_reg = 0;
--- gcc/testsuite/c-c++-common/pr92352.c.jj 2019-11-04 14:03:18.725275255 
+0100
+++ gcc/testsuite/c-c++-common/pr92352.c2019-11-04 14:02:55.211629675 
+0100
@@ -0,0 +1,15 @@
+/* PR inline-asm/92352 */
+
+void
+foo (int x)
+{
+  int var[x];
+  asm volatile ("" : "+r" (var));  /* { dg-error "impossible constraint in 
'asm'" } */
+}  /* { dg-error "non-memory output 0 must 
stay in memory" "" { target *-*-* } .-1 } */
+
+void
+bar (int x)
+{
+  int var[x];
+  asm volatile ("" : "+m" (var));
+}


Jakub



Add object allocators to symbol and call summaries

2019-11-05 Thread Jan Hubicka


Hi,
this patch adds object allocators to manage IPA summaries. This reduces
malloc overhead and fragmentation.  I now get peak memory use 7.5GB instead
of 10GB for firefox WPA because reduced fragmentation leads to less COWs after
forks.  Additional bonus is that we now have statistics gathered by mem-reports
which makes my life easier, too.
(though memory stats are far from ideal - we need to pass location info around
bit more).

Bootstrapped/regtested x86_64-linux, will commit it shortly.

* hsa-brig.c: Include alloc-pool.h
* hsa-dump.c: Likewise.
* hsa-gen.c: Likewise.
* hse-regalloc.c: Likewise.
* ipa-hsa.c: Likewise.
* ipa-predicate.c: Likewise.
* ipa-reference.c: Likewise.
* ipa-sra.c: Likewise.
* omp-expand.c: Likewise.
* omp-general.c: Likewise.
* omp-low.c: Likewise.
* sumbol-summary.h (function_summary_base): Add allocator.
(function_summary::function_summary): Update construction.
(fast_function_summary::fast_function_summary): Likewise.
(call_summary_base): Add allcator.
(call_summary::call_summary): Update construction.
(fast_call_summary::fast_call_summary): Likewise.
Index: hsa-brig.c
===
--- hsa-brig.c  (revision 277796)
+++ hsa-brig.c  (working copy)
@@ -44,6 +44,7 @@ along with GCC; see the file COPYING3.
 #include "cgraph.h"
 #include "dumpfile.h"
 #include "print-tree.h"
+#include "alloc-pool.h"
 #include "symbol-summary.h"
 #include "hsa-common.h"
 #include "gomp-constants.h"
Index: hsa-dump.c
===
--- hsa-dump.c  (revision 277796)
+++ hsa-dump.c  (working copy)
@@ -33,6 +33,7 @@ along with GCC; see the file COPYING3.
 #include "gimple-pretty-print.h"
 #include "cgraph.h"
 #include "print-tree.h"
+#include "alloc-pool.h"
 #include "symbol-summary.h"
 #include "hsa-common.h"
 
Index: hsa-gen.c
===
--- hsa-gen.c   (revision 277796)
+++ hsa-gen.c   (working copy)
@@ -48,6 +48,7 @@ along with GCC; see the file COPYING3.
 #include "ssa-iterators.h"
 #include "cgraph.h"
 #include "print-tree.h"
+#include "alloc-pool.h"
 #include "symbol-summary.h"
 #include "hsa-common.h"
 #include "cfghooks.h"
Index: hsa-regalloc.c
===
--- hsa-regalloc.c  (revision 277796)
+++ hsa-regalloc.c  (working copy)
@@ -35,6 +35,7 @@ along with GCC; see the file COPYING3.
 #include "cgraph.h"
 #include "print-tree.h"
 #include "cfghooks.h"
+#include "alloc-pool.h"
 #include "symbol-summary.h"
 #include "hsa-common.h"
 
Index: ipa-hsa.c
===
--- ipa-hsa.c   (revision 277796)
+++ ipa-hsa.c   (working copy)
@@ -40,6 +40,7 @@ along with GCC; see the file COPYING3.
 #include "stringpool.h"
 #include "cgraph.h"
 #include "print-tree.h"
+#include "alloc-pool.h"
 #include "symbol-summary.h"
 #include "hsa-common.h"
 
Index: ipa-predicate.c
===
--- ipa-predicate.c (revision 277796)
+++ ipa-predicate.c (working copy)
@@ -25,8 +25,8 @@ along with GCC; see the file COPYING3.
 #include "tree.h"
 #include "cgraph.h"
 #include "tree-vrp.h"
-#include "symbol-summary.h"
 #include "alloc-pool.h"
+#include "symbol-summary.h"
 #include "ipa-prop.h"
 #include "ipa-fnsummary.h"
 #include "real.h"
Index: ipa-reference.c
===
--- ipa-reference.c (revision 277796)
+++ ipa-reference.c (working copy)
@@ -48,6 +48,7 @@ along with GCC; see the file COPYING3.
 #include "calls.h"
 #include "ipa-utils.h"
 #include "ipa-reference.h"
+#include "alloc-pool.h"
 #include "symbol-summary.h"
 
 /* The static variables defined within the compilation unit that are
Index: ipa-sra.c
===
--- ipa-sra.c   (revision 277796)
+++ ipa-sra.c   (working copy)
@@ -75,6 +75,7 @@ along with GCC; see the file COPYING3.
 #include "gimple-walk.h"
 #include "tree-dfa.h"
 #include "tree-sra.h"
+#include "alloc-pool.h"
 #include "symbol-summary.h"
 #include "params.h"
 #include "dbgcnt.h"
Index: omp-expand.c
===
--- omp-expand.c(revision 277796)
+++ omp-expand.c(working copy)
@@ -52,6 +52,7 @@ along with GCC; see the file COPYING3.
 #include "omp-general.h"
 #include "omp-offload.h"
 #include "tree-cfgcleanup.h"
+#include "alloc-pool.h"
 #include "symbol-summary.h"
 #include "gomp-constants.h"
 #include "gimple-pretty-print.h"
Index: omp-general.c
===
--- omp-general.c   (revision 277796)
+++ omp-general.c   (working copy)
@@ -37,6 +37,7 @@ along with GCC; see the file

Free memory used by optimization/target options

2019-11-05 Thread Jan Hubicka
Hi,
this fixes memory leak for xstrduped strings in option summaries which may
get ggc_freed by tree merging.

Bootstrapped/regtested x86_64-linux, OK?

Honza

* optc-save-gen.awk: Generate cl_target_option_free
and cl_optimization_option_free.
* opth-en.awk: Declare cl_target_option_free
and cl_optimization_option_free.
* tree.c (free_node): Use it.
Index: optc-save-gen.awk
===
--- optc-save-gen.awk   (revision 277796)
+++ optc-save-gen.awk   (working copy)
@@ -802,6 +802,17 @@ for (i = 0; i < n_target_val; i++) {
 
 print "}";
 
+print "/* free heap memory used by target options  */";
+print "void";
+print "cl_target_option_free (struct cl_target_option *ptr ATTRIBUTE_UNUSED)";
+print "{";
+for (i = 0; i < n_target_str; i++) {
+   name = var_target_str[i]
+   print "  if (ptr->" name")";
+   print "free (const_cast (ptr->" name"));";
+}
+print "}";
+
 n_opt_val = 4;
 var_opt_val[0] = "x_optimize"
 var_opt_val_type[0] = "char "
@@ -921,4 +932,18 @@ for (i = 0; i < n_opt_val; i++) {
  print "  ptr->" name" = (" var_opt_val_type[i] ") bp_unpack_value 
(bp, 64);";
 }
 print "}";
+print "/* Free heap memory used by optimization options  */";
+print "void";
+print "cl_optimization_option_free (struct cl_optimization *ptr 
ATTRIBUTE_UNUSED)";
+print "{";
+for (i = 0; i < n_opt_val; i++) {
+   name = var_opt_val[i]
+   otype = var_opt_val_type[i];
+   if (otype ~ "^const char \\**$")
+   {
+ print "  if (ptr->" name")";
+ print "free (const_cast (ptr->" name"));";
+   }
+}
+print "}";
 }
Index: opth-gen.awk
===
--- opth-gen.awk(revision 277796)
+++ opth-gen.awk(working copy)
@@ -303,6 +303,9 @@ print "";
 print "/* Compare two target option variables from a structure.  */";
 print "extern bool cl_target_option_eq (const struct cl_target_option *, const 
struct cl_target_option *);";
 print "";
+print "/* Free heap memory used by target option variables.  */";
+print "extern void cl_target_option_free (struct cl_target_option *);";
+print "";
 print "/* Hash option variables from a structure.  */";
 print "extern hashval_t cl_target_option_hash (const struct cl_target_option 
*);";
 print "";
@@ -312,6 +315,9 @@ print "";
 print "/* Compare two optimization options.  */";
 print "extern bool cl_optimization_option_eq (cl_optimization const *ptr1, 
cl_optimization const *ptr2);"
 print "";
+print "/* Free heap memory used by optimization options.  */";
+print "extern void cl_optimization_option_free (cl_optimization *ptr1);"
+print "";
 print "/* Generator files may not have access to location_t, and don't need 
these.  */"
 print "#if defined(UNKNOWN_LOCATION)"
 print "bool  "
Index: tree.c
===
--- tree.c  (revision 277796)
+++ tree.c  (working copy)
@@ -1170,6 +1170,10 @@ free_node (tree node)
 vec_free (BLOCK_NONLOCALIZED_VARS (node));
   else if (code == TREE_BINFO)
 vec_free (BINFO_BASE_ACCESSES (node));
+  else if (code == OPTIMIZATION_NODE)
+cl_optimization_option_free (TREE_OPTIMIZATION (node));
+  else if (code == TARGET_OPTION_NODE)
+cl_target_option_free (TREE_TARGET_OPTION (node));
   ggc_free (node);
 }
 


Re: [PATCH] Reject VLAs in inline asm operands that require registers (PR inline-asm/92352)

2019-11-05 Thread Richard Biener
On Tue, 5 Nov 2019, Jakub Jelinek wrote:

> On Tue, Nov 05, 2019 at 09:27:45AM +0100, Richard Biener wrote:
> > > --- gcc/gimplify.c.jj 2019-11-02 10:00:59.595253274 +0100
> > > +++ gcc/gimplify.c2019-11-05 00:21:01.585958514 +0100
> > > @@ -6235,8 +6235,14 @@ gimplify_asm_expr (tree *expr_p, gimple_
> > > is_inout = false;
> > >   }
> > >  
> > > -  /* If we can't make copies, we can only accept memory.  */
> > > -  if (TREE_ADDRESSABLE (TREE_TYPE (TREE_VALUE (link
> > > +  /* If we can't make copies, we can only accept memory.
> > > +  Similarly for VLAs.  */
> > > +  tree outtype = TREE_TYPE (TREE_VALUE (link));
> > > +  if (outtype != error_mark_node
> > 
> > so for error_mark_node we don't diagnose anything?
> 
> Yes, we should have diagnosed it already.  The != error_mark_node
> I've added only after seeing tons of ICEs in the testsuite with earlier
> version of the patch.
> 
> > > +   && (TREE_ADDRESSABLE (outtype)
> > > +   || !COMPLETE_TYPE_P (outtype)
> > > +   || (!tree_fits_poly_uint64_p (TYPE_SIZE_UNIT (outtype))
> > > +   && max_int_size_in_bytes (outtype
> > 
> > so max_int_size_in_bytes == 0 is OK?  I suppose we have a testcase
> > for this?
> 
> Actually, I meant max_int_size_in_bytes (outtype) < 0, i.e. something
> on which force_constant_size ICE immediately, sorry for screwing it up in
> the end.
> All these VLAs with max_int_size_in_bytes >= 0 sizes are specific to Ada
> and I have no idea what is and isn't valid there, for C/C++ it should
> always return -1.
> 
> > Otherwise looks reasonable to me.
> 
> So, is the following ok if it passes bootstrap/regtest, or shall
> I just go for || !tree_fits_poly_uint64_p (TYPE_SIZE_UNIT (outtype))) ?

Hmm, can we actually copy VLAs with max_int_size_in_bytes != -1?  I 
suppose we can copy even unconstrained VLAs, we just miss a
WITH_SIZE_EXPR here but then it's unlikely we ever fit that into
a non-memory.  That's true as well if we just know the max size,
so allowing that seems suspicious at best.

So I'd say OK with just !tree_fits_poly_uint64_p.

Thanks,
Richard.

> 2019-11-05  Jakub Jelinek  
> 
>   PR inline-asm/92352
>   * gimplify.c (gimplify_asm_expr): Reject VLA in output or input
>   operands with non-memory constraints.
> 
>   * c-c++-common/pr92352.c: New test.
> 
> --- gcc/gimplify.c.jj 2019-11-02 10:00:59.595253274 +0100
> +++ gcc/gimplify.c2019-11-05 00:21:01.585958514 +0100
> @@ -6235,8 +6235,14 @@ gimplify_asm_expr (tree *expr_p, gimple_
> is_inout = false;
>   }
>  
> -  /* If we can't make copies, we can only accept memory.  */
> -  if (TREE_ADDRESSABLE (TREE_TYPE (TREE_VALUE (link
> +  /* If we can't make copies, we can only accept memory.
> +  Similarly for VLAs.  */
> +  tree outtype = TREE_TYPE (TREE_VALUE (link));
> +  if (outtype != error_mark_node
> +   && (TREE_ADDRESSABLE (outtype)
> +   || !COMPLETE_TYPE_P (outtype)
> +   || (!tree_fits_poly_uint64_p (TYPE_SIZE_UNIT (outtype))
> +   && max_int_size_in_bytes (outtype) < 0)))
>   {
> if (allows_mem)
>   allows_reg = 0;
> @@ -6392,7 +6398,12 @@ gimplify_asm_expr (tree *expr_p, gimple_
> oconstraints, &allows_mem, &allows_reg);
>  
>/* If we can't make copies, we can only accept memory.  */
> -  if (TREE_ADDRESSABLE (TREE_TYPE (TREE_VALUE (link
> +  tree intype = TREE_TYPE (TREE_VALUE (link));
> +  if (intype != error_mark_node
> +   && (TREE_ADDRESSABLE (intype)
> +   || !COMPLETE_TYPE_P (intype)
> +   || (!tree_fits_poly_uint64_p (TYPE_SIZE_UNIT (intype))
> +   && max_int_size_in_bytes (intype) < 0)))
>   {
> if (allows_mem)
>   allows_reg = 0;
> --- gcc/testsuite/c-c++-common/pr92352.c.jj   2019-11-04 14:03:18.725275255 
> +0100
> +++ gcc/testsuite/c-c++-common/pr92352.c  2019-11-04 14:02:55.211629675 
> +0100
> @@ -0,0 +1,15 @@
> +/* PR inline-asm/92352 */
> +
> +void
> +foo (int x)
> +{
> +  int var[x];
> +  asm volatile ("" : "+r" (var));/* { dg-error "impossible constraint in 
> 'asm'" } */
> +}/* { dg-error "non-memory output 0 must 
> stay in memory" "" { target *-*-* } .-1 } */
> +
> +void
> +bar (int x)
> +{
> +  int var[x];
> +  asm volatile ("" : "+m" (var));
> +}
> 
> 
>   Jakub
> 
> 

-- 
Richard Biener 
SUSE Software Solutions Germany GmbH, Maxfeldstrasse 5, 90409 Nuernberg,
Germany; GF: Felix Imendörffer; HRB 36809 (AG Nuernberg)

Add obstack for canonical file name hash table

2019-11-05 Thread Jan Hubicka
Hi,
looking into malloc overhead I noticed that we do a lot of small
allocations to hold file names comming from location info. This patch
puts it into an obstack so it interleaves memory allocated by scc_hash
less frequently.
(Still we end up interleaving 64k pages which are permanent - in fact
this table seems to leak from WPA and temporary during stream in)

Bootstrapped/regtested x86_64-linux. OK?

Honza

* lto-streamer-in.c (file_name_obstack): New obstack.
(canon_file_name): Use it.
(lto_reader_init): Initialize it.
Index: lto-streamer-in.c
===
--- lto-streamer-in.c   (revision 277796)
+++ lto-streamer-in.c   (working copy)
@@ -57,6 +57,7 @@ freeing_string_slot_hasher::remove (valu
 
 /* The table to hold the file names.  */
 static hash_table *file_name_hash_table;
+static struct obstack file_name_obstack;
 
 
 /* Check that tag ACTUAL has one of the given values.  NUM_TAGS is the
@@ -113,8 +114,9 @@ canon_file_name (const char *string)
   char *saved_string;
   struct string_slot *new_slot;
 
-  saved_string = (char *) xmalloc (len + 1);
-  new_slot = XCNEW (struct string_slot);
+  saved_string = XOBNEWVEC (&file_name_obstack, char, len + 1);
+  new_slot = XOBNEWVAR (&file_name_obstack,
+   struct string_slot, sizeof (struct string_slot));
   memcpy (saved_string, string, len + 1);
   new_slot->s = saved_string;
   new_slot->len = len;
@@ -1723,6 +1725,7 @@ lto_reader_init (void)
   lto_streamer_init ();
   file_name_hash_table
 = new hash_table (37);
+  gcc_obstack_init (&file_name_obstack);
 }
 
 


Re: Add object allocators to symbol and call summaries

2019-11-05 Thread Martin Liška

On 11/5/19 11:36 AM, Jan Hubicka wrote:

Hi,
this patch adds object allocators to manage IPA summaries. This reduces
malloc overhead and fragmentation.  I now get peak memory use 7.5GB instead
of 10GB for firefox WPA because reduced fragmentation leads to less COWs after
forks.


That sounds promising.


Additional bonus is that we now have statistics gathered by mem-reports
which makes my life easier, too.


What's currently bad with the detailed memory statistics? I updated the
code that one should see the allocation for the underlying hash_map and
vec?


(though memory stats are far from ideal - we need to pass location info around
bit more).


Please rename allocator to m_allocator.

Martin



Re: Free memory used by optimization/target options

2019-11-05 Thread Richard Biener
On Tue, 5 Nov 2019, Jan Hubicka wrote:

> Hi,
> this fixes memory leak for xstrduped strings in option summaries which may
> get ggc_freed by tree merging.
> 
> Bootstrapped/regtested x86_64-linux, OK?

OK.  If those are ever reaped by regular GC the memory still leaks, no?
So wouldn't it be better to put the strings into GC memory?

Thanks,
Richard.

> Honza
> 
>   * optc-save-gen.awk: Generate cl_target_option_free
>   and cl_optimization_option_free.
>   * opth-en.awk: Declare cl_target_option_free
>   and cl_optimization_option_free.
>   * tree.c (free_node): Use it.
> Index: optc-save-gen.awk
> ===
> --- optc-save-gen.awk (revision 277796)
> +++ optc-save-gen.awk (working copy)
> @@ -802,6 +802,17 @@ for (i = 0; i < n_target_val; i++) {
>  
>  print "}";
>  
> +print "/* free heap memory used by target options  */";
> +print "void";
> +print "cl_target_option_free (struct cl_target_option *ptr 
> ATTRIBUTE_UNUSED)";
> +print "{";
> +for (i = 0; i < n_target_str; i++) {
> + name = var_target_str[i]
> + print "  if (ptr->" name")";
> + print "free (const_cast (ptr->" name"));";
> +}
> +print "}";
> +
>  n_opt_val = 4;
>  var_opt_val[0] = "x_optimize"
>  var_opt_val_type[0] = "char "
> @@ -921,4 +932,18 @@ for (i = 0; i < n_opt_val; i++) {
> print "  ptr->" name" = (" var_opt_val_type[i] ") bp_unpack_value 
> (bp, 64);";
>  }
>  print "}";
> +print "/* Free heap memory used by optimization options  */";
> +print "void";
> +print "cl_optimization_option_free (struct cl_optimization *ptr 
> ATTRIBUTE_UNUSED)";
> +print "{";
> +for (i = 0; i < n_opt_val; i++) {
> + name = var_opt_val[i]
> + otype = var_opt_val_type[i];
> + if (otype ~ "^const char \\**$")
> + {
> +   print "  if (ptr->" name")";
> +   print "free (const_cast (ptr->" name"));";
> + }
> +}
> +print "}";
>  }
> Index: opth-gen.awk
> ===
> --- opth-gen.awk  (revision 277796)
> +++ opth-gen.awk  (working copy)
> @@ -303,6 +303,9 @@ print "";
>  print "/* Compare two target option variables from a structure.  */";
>  print "extern bool cl_target_option_eq (const struct cl_target_option *, 
> const struct cl_target_option *);";
>  print "";
> +print "/* Free heap memory used by target option variables.  */";
> +print "extern void cl_target_option_free (struct cl_target_option *);";
> +print "";
>  print "/* Hash option variables from a structure.  */";
>  print "extern hashval_t cl_target_option_hash (const struct cl_target_option 
> *);";
>  print "";
> @@ -312,6 +315,9 @@ print "";
>  print "/* Compare two optimization options.  */";
>  print "extern bool cl_optimization_option_eq (cl_optimization const *ptr1, 
> cl_optimization const *ptr2);"
>  print "";
> +print "/* Free heap memory used by optimization options.  */";
> +print "extern void cl_optimization_option_free (cl_optimization *ptr1);"
> +print "";
>  print "/* Generator files may not have access to location_t, and don't need 
> these.  */"
>  print "#if defined(UNKNOWN_LOCATION)"
>  print "bool  
> "
> Index: tree.c
> ===
> --- tree.c(revision 277796)
> +++ tree.c(working copy)
> @@ -1170,6 +1170,10 @@ free_node (tree node)
>  vec_free (BLOCK_NONLOCALIZED_VARS (node));
>else if (code == TREE_BINFO)
>  vec_free (BINFO_BASE_ACCESSES (node));
> +  else if (code == OPTIMIZATION_NODE)
> +cl_optimization_option_free (TREE_OPTIMIZATION (node));
> +  else if (code == TARGET_OPTION_NODE)
> +cl_target_option_free (TREE_TARGET_OPTION (node));
>ggc_free (node);
>  }
>  
> 

-- 
Richard Biener 
SUSE Software Solutions Germany GmbH, Maxfeldstrasse 5, 90409 Nuernberg,
Germany; GF: Felix Imendörffer; HRB 36809 (AG Nuernberg)

Re: Free memory used by optimization/target options

2019-11-05 Thread Jan Hubicka
> On Tue, 5 Nov 2019, Jan Hubicka wrote:
> 
> > Hi,
> > this fixes memory leak for xstrduped strings in option summaries which may
> > get ggc_freed by tree merging.
> > 
> > Bootstrapped/regtested x86_64-linux, OK?
> 
> OK.  If those are ever reaped by regular GC the memory still leaks, no?
> So wouldn't it be better to put the strings into GC memory?
I am not sure why those are malloced, but I think there was some fun
with gengtype/awk generator interactions. Also I think all those option
nodes come into cl_option_hash that is fully permanent.

Honza


Re: Add object allocators to symbol and call summaries

2019-11-05 Thread Martin Liška

On 11/5/19 11:45 AM, Martin Liška wrote:

Please rename allocator to m_allocator.


You were faster and installed that patch.

Thus I'm sending the adjustment.

Martin
>From 6edd5d8c4afb0451aaaf05ba857435219b31814d Mon Sep 17 00:00:00 2001
From: Martin Liska 
Date: Tue, 5 Nov 2019 11:50:32 +0100
Subject: [PATCH] Update coding style in symbol-summary.h.

gcc/ChangeLog:

2019-11-05  Martin Liska  

	* symbol-summary.h: Rename allocator to m_allocator and
	add comment.
---
 gcc/symbol-summary.h | 20 
 1 file changed, 12 insertions(+), 8 deletions(-)

diff --git a/gcc/symbol-summary.h b/gcc/symbol-summary.h
index d663cbb90fd..8aedcfe9143 100644
--- a/gcc/symbol-summary.h
+++ b/gcc/symbol-summary.h
@@ -31,7 +31,7 @@ public:
   function_summary_base (symbol_table *symtab CXX_MEM_STAT_INFO):
   m_symtab (symtab),
   m_insertion_enabled (true),
-  allocator ("function summary" PASS_MEM_STAT)
+  m_allocator ("function summary" PASS_MEM_STAT)
   {}
 
   /* Basic implementation of insert operation.  */
@@ -62,7 +62,7 @@ protected:
 /* Call gcc_internal_because we do not want to call finalizer for
a type T.  We call dtor explicitly.  */
 return is_ggc () ? new (ggc_internal_alloc (sizeof (T))) T ()
-		 : allocator.allocate () ;
+		 : m_allocator.allocate () ;
   }
 
   /* Release an item that is stored within map.  */
@@ -74,7 +74,7 @@ protected:
 	ggc_free (item);
   }
 else
-  allocator.remove (item);
+  m_allocator.remove (item);
   }
 
   /* Unregister all call-graph hooks.  */
@@ -95,7 +95,9 @@ protected:
 private:
   /* Return true when the summary uses GGC memory for allocation.  */
   virtual bool is_ggc () = 0;
-  object_allocator allocator;
+
+  /* Object allocator for heap allocation.  */
+  object_allocator m_allocator;
 };
 
 template 
@@ -537,7 +539,7 @@ public:
   call_summary_base (symbol_table *symtab CXX_MEM_STAT_INFO):
   m_symtab (symtab),
   m_initialize_when_cloning (false),
-  allocator ("call summary" PASS_MEM_STAT)
+  m_allocator ("call summary" PASS_MEM_STAT)
   {}
 
   /* Basic implementation of removal operation.  */
@@ -553,7 +555,7 @@ protected:
 /* Call gcc_internal_because we do not want to call finalizer for
a type T.  We call dtor explicitly.  */
 return is_ggc () ? new (ggc_internal_alloc (sizeof (T))) T ()
-		 : allocator.allocate ();
+		 : m_allocator.allocate ();
   }
 
   /* Release an item that is stored within map.  */
@@ -565,7 +567,7 @@ protected:
 	ggc_free (item);
   }
 else
-  allocator.remove (item);
+  m_allocator.remove (item);
   }
 
   /* Unregister all call-graph hooks.  */
@@ -584,7 +586,9 @@ protected:
 private:
   /* Return true when the summary uses GGC memory for allocation.  */
   virtual bool is_ggc () = 0;
-  object_allocator allocator;
+
+  /* Object allocator for heap allocation.  */
+  object_allocator m_allocator;
 };
 
 template 
-- 
2.23.0



Re: Add obstack for canonical file name hash table

2019-11-05 Thread Richard Biener
On Tue, 5 Nov 2019, Jan Hubicka wrote:

> Hi,
> looking into malloc overhead I noticed that we do a lot of small
> allocations to hold file names comming from location info. This patch
> puts it into an obstack so it interleaves memory allocated by scc_hash
> less frequently.
> (Still we end up interleaving 64k pages which are permanent - in fact
> this table seems to leak from WPA and temporary during stream in)
> 
> Bootstrapped/regtested x86_64-linux. OK?

I think the obstack deserves a big fat comment that it cannot be
reclaimed since the linemap retains permanent pointers into it.
That also suggests to put the string_slot into a separate obstack
or better, make the hasher (and other string_slot hashers)
embed the string_slot struct in the hash?  We'd save an allocation
everywhere.

Richard.

> Honza
> 
>   * lto-streamer-in.c (file_name_obstack): New obstack.
>   (canon_file_name): Use it.
>   (lto_reader_init): Initialize it.
> Index: lto-streamer-in.c
> ===
> --- lto-streamer-in.c (revision 277796)
> +++ lto-streamer-in.c (working copy)
> @@ -57,6 +57,7 @@ freeing_string_slot_hasher::remove (valu
>  
>  /* The table to hold the file names.  */
>  static hash_table *file_name_hash_table;
> +static struct obstack file_name_obstack;
>  
>  
>  /* Check that tag ACTUAL has one of the given values.  NUM_TAGS is the
> @@ -113,8 +114,9 @@ canon_file_name (const char *string)
>char *saved_string;
>struct string_slot *new_slot;
>  
> -  saved_string = (char *) xmalloc (len + 1);
> -  new_slot = XCNEW (struct string_slot);
> +  saved_string = XOBNEWVEC (&file_name_obstack, char, len + 1);
> +  new_slot = XOBNEWVAR (&file_name_obstack,
> + struct string_slot, sizeof (struct string_slot));
>memcpy (saved_string, string, len + 1);
>new_slot->s = saved_string;
>new_slot->len = len;
> @@ -1723,6 +1725,7 @@ lto_reader_init (void)
>lto_streamer_init ();
>file_name_hash_table
>  = new hash_table (37);
> +  gcc_obstack_init (&file_name_obstack);
>  }
>  
>  
> 

-- 
Richard Biener 
SUSE Software Solutions Germany GmbH, Maxfeldstrasse 5, 90409 Nuernberg,
Germany; GF: Felix Imendörffer; HRB 36809 (AG Nuernberg)

[PATCH] Fix part of PR92324

2019-11-05 Thread Richard Biener


We have to be able to choose one signedness for the epilogue
part of the reduction - for MIN/MAX this means if we have two
both have to be of the same signedness.

Bootstrapped and tested on x86_64-unknown-linux-gnu, applied.

Richard.

2019-11-05  Richard Biener  

PR tree-optimization/92324
* tree-vect-loop.c (check_reduction_path): For MIN/MAX require
all signed or unsigned operations.

* gcc.dg/vect/pr92324-3.c: New testcase.

Index: gcc/tree-vect-loop.c
===
--- gcc/tree-vect-loop.c(revision 277782)
+++ gcc/tree-vect-loop.c(working copy)
@@ -2744,6 +2744,7 @@ pop:
   /* Check whether the reduction path detected is valid.  */
   bool fail = path.length () == 0;
   bool neg = false;
+  int sign = -1;
   *code = ERROR_MARK;
   for (unsigned i = 1; i < path.length (); ++i)
 {
@@ -2787,12 +2788,22 @@ pop:
TREE_TYPE (gimple_assign_rhs1 (use_stmt
;
   else if (*code == ERROR_MARK)
-   *code = use_code;
+   {
+ *code = use_code;
+ sign = TYPE_SIGN (TREE_TYPE (gimple_assign_lhs (use_stmt)));
+   }
   else if (use_code != *code)
{
  fail = true;
  break;
}
+  else if ((use_code == MIN_EXPR
+   || use_code == MAX_EXPR)
+  && sign != TYPE_SIGN (TREE_TYPE (gimple_assign_lhs (use_stmt
+   {
+ fail = true;
+ break;
+   }
 }
   return ! fail && ! neg && *code != ERROR_MARK;
 }
Index: gcc/testsuite/gcc.dg/vect/pr92324-3.c
===
--- gcc/testsuite/gcc.dg/vect/pr92324-3.c   (nonexistent)
+++ gcc/testsuite/gcc.dg/vect/pr92324-3.c   (working copy)
@@ -0,0 +1,27 @@
+#include "tree-vect.h"
+
+int a[1024];
+unsigned b[1024];
+
+int __attribute__((noipa))
+foo (int n)
+{
+  int res = 0;
+  for (int i = 0; i < n; ++i)
+{
+  res = res > a[i] ? res : a[i];
+  res = res > b[i] ? res : b[i];
+}
+  return res;
+}
+
+int main ()
+{
+  check_vect ();
+  b[3] = (unsigned)__INT_MAX__ + 1;
+  if (foo (4) != -__INT_MAX__ - 1)
+__builtin_abort ();
+  return 0;
+}
+
+/* { dg-final { scan-tree-dump-not "vectorized \[1-9\] loops" "vect" } } */


Re: Add object allocators to symbol and call summaries

2019-11-05 Thread Jan Hubicka
> On 11/5/19 11:36 AM, Jan Hubicka wrote:
> > Hi,
> > this patch adds object allocators to manage IPA summaries. This reduces
> > malloc overhead and fragmentation.  I now get peak memory use 7.5GB instead
> > of 10GB for firefox WPA because reduced fragmentation leads to less COWs 
> > after
> > forks.
> 
> That sounds promising.
> 
> > Additional bonus is that we now have statistics gathered by mem-reports
> > which makes my life easier, too.
> 
> What's currently bad with the detailed memory statistics? I updated the
> code that one should see the allocation for the underlying hash_map and
> vec?

I currently get:


Pool name   Allocation pool 
  Pools   LeakPeakTimesElt size

tree_scclto/lto-common.c:2709 (read_cgraph_and_symbols) 
 1 0 :  0.0%   99M 3169k: 43.7%  32
IPA histogram   ipa-profile.c:77 
(__static_initialization_and_de 116 :  0.0%   16 1 :  
0.0%  16
IPA-PROP ref descriptions   ipa-prop.c:170 
(__static_initialization_and_dest 1   226k:  0.3%  226k 9670 :  
0.1%  24
function summaryipa-fnsummary.c:557 (ipa_fn_summary_alloc)  
 1  6145k:  7.0% 6257k  391k:  5.4%  16
function summaryipa-pure-const.c:136 (__base_ctor ) 
 1  6863k:  7.9% 9449k  590k:  8.1%  16
edge predicates ipa-fnsummary.c:93 
(__static_initialization_and_ 1  8327k:  9.5% 8385k  209k:  
2.9%  40
call summaryipa-sra.c:436 (__base_ctor )
 118M: 21.3%   21M 1393k: 19.2%  16
call summaryipa-fnsummary.h:276 (__base_ctor )  
 146M: 54.0%   46M 1483k: 20.5%  32

Pool name   Allocation pool 
  Pools   LeakPeakTimesElt size

Total   
  9 85M


This is quite readable, though we may give them different names and
update constructors. Not a big deal IMO.

For GGC statistics I see:

varpool.c:137 (create_empty)  7924k:  0.4%0 :  
0.0% 3214k:  0.2%0 :  0.0%   87k
cgraph.c:939 (cgraph_allocate_init_indirect_info  8566k:  0.4%0 :  
0.0% 1395k:  0.1%0 :  0.0%  113k
alias.c:1170 (record_alias_subset)  12M:  0.6%0 :  
0.0%   12k:  0.0%   99k:  0.1%   12k
ipa-sra.c:2717 (isra_read_node_info)12M:  0.6%0 :  
0.0% 4179k:  0.2%   21k:  0.0%  376k
toplev.c:904 (realloc_for_line_map) 16M:  0.8%0 :  
0.0%   15M:  0.9%  144 :  0.0%   12 
ipa-prop.c:278 (ipa_alloc_node_params)  16M:  0.8%  266k:  
0.4%0 :  0.0%   22k:  0.0%  366k
symbol-summary.h:555 (allocate_new) 18M:  0.9%0 :  
0.0%  119k:  0.0%0 :  0.0% 1171k
 ^^^ here we should point the caller of get_create

ipa-fnsummary.c:3877 (inline_read_section)  28M:  1.4%0 :  
0.0%  552k:  0.0%  392k:  0.3%  261k
lto-section-in.c:388 (lto_new_in_decl_state)29M:  1.4%0 :  
0.0%   11M:  0.7%0 :  0.0%  587k
symtab.c:582 (create_reference) 35M:  1.7%0 :  
0.0%   50M:  2.9% 1199k:  0.9%  541k
symbol-summary.h:64 (allocate_new)  46M:  2.2%0 :  
0.0% 2445k:  0.1%0 :  0.0% 1168k
 ^^^ same here.

stringpool.c:63 (alloc_node)47M:  2.3%0 :  
0.0%0 :  0.0%0 :  0.0% 1217k
ipa-prop.c:4480 (ipa_read_edge_info)51M:  2.4%0 :  
0.0%  260k:  0.0%  404k:  0.3%  531k
hash-table.h:801 (expand)   81M:  3.9%0 :  
0.0%   80M:  4.7%   88k:  0.1% 3349 
 ^^^ some of memory comes here which ought to be accounted to caller of
 expand.
stringpool.c:41 (str

Re: Add object allocators to symbol and call summaries

2019-11-05 Thread Jan Hubicka
> On 11/5/19 11:45 AM, Martin Liška wrote:
> > Please rename allocator to m_allocator.
> 
> You were faster and installed that patch.
> 
> Thus I'm sending the adjustment.
> 
> Martin

> From 6edd5d8c4afb0451aaaf05ba857435219b31814d Mon Sep 17 00:00:00 2001
> From: Martin Liska 
> Date: Tue, 5 Nov 2019 11:50:32 +0100
> Subject: [PATCH] Update coding style in symbol-summary.h.
> 
> gcc/ChangeLog:
> 
> 2019-11-05  Martin Liska  
> 
>   * symbol-summary.h: Rename allocator to m_allocator and
>   add comment.

This is OK, thanks!
Honza
> ---
>  gcc/symbol-summary.h | 20 
>  1 file changed, 12 insertions(+), 8 deletions(-)
> 
> diff --git a/gcc/symbol-summary.h b/gcc/symbol-summary.h
> index d663cbb90fd..8aedcfe9143 100644
> --- a/gcc/symbol-summary.h
> +++ b/gcc/symbol-summary.h
> @@ -31,7 +31,7 @@ public:
>function_summary_base (symbol_table *symtab CXX_MEM_STAT_INFO):
>m_symtab (symtab),
>m_insertion_enabled (true),
> -  allocator ("function summary" PASS_MEM_STAT)
> +  m_allocator ("function summary" PASS_MEM_STAT)
>{}
>  
>/* Basic implementation of insert operation.  */
> @@ -62,7 +62,7 @@ protected:
>  /* Call gcc_internal_because we do not want to call finalizer for
> a type T.  We call dtor explicitly.  */
>  return is_ggc () ? new (ggc_internal_alloc (sizeof (T))) T ()
> -  : allocator.allocate () ;
> +  : m_allocator.allocate () ;
>}
>  
>/* Release an item that is stored within map.  */
> @@ -74,7 +74,7 @@ protected:
>   ggc_free (item);
>}
>  else
> -  allocator.remove (item);
> +  m_allocator.remove (item);
>}
>  
>/* Unregister all call-graph hooks.  */
> @@ -95,7 +95,9 @@ protected:
>  private:
>/* Return true when the summary uses GGC memory for allocation.  */
>virtual bool is_ggc () = 0;
> -  object_allocator allocator;
> +
> +  /* Object allocator for heap allocation.  */
> +  object_allocator m_allocator;
>  };
>  
>  template 
> @@ -537,7 +539,7 @@ public:
>call_summary_base (symbol_table *symtab CXX_MEM_STAT_INFO):
>m_symtab (symtab),
>m_initialize_when_cloning (false),
> -  allocator ("call summary" PASS_MEM_STAT)
> +  m_allocator ("call summary" PASS_MEM_STAT)
>{}
>  
>/* Basic implementation of removal operation.  */
> @@ -553,7 +555,7 @@ protected:
>  /* Call gcc_internal_because we do not want to call finalizer for
> a type T.  We call dtor explicitly.  */
>  return is_ggc () ? new (ggc_internal_alloc (sizeof (T))) T ()
> -  : allocator.allocate ();
> +  : m_allocator.allocate ();
>}
>  
>/* Release an item that is stored within map.  */
> @@ -565,7 +567,7 @@ protected:
>   ggc_free (item);
>}
>  else
> -  allocator.remove (item);
> +  m_allocator.remove (item);
>}
>  
>/* Unregister all call-graph hooks.  */
> @@ -584,7 +586,9 @@ protected:
>  private:
>/* Return true when the summary uses GGC memory for allocation.  */
>virtual bool is_ggc () = 0;
> -  object_allocator allocator;
> +
> +  /* Object allocator for heap allocation.  */
> +  object_allocator m_allocator;
>  };
>  
>  template 
> -- 
> 2.23.0
> 



Re: Add obstack for canonical file name hash table

2019-11-05 Thread Jan Hubicka
> On Tue, 5 Nov 2019, Jan Hubicka wrote:
> 
> > Hi,
> > looking into malloc overhead I noticed that we do a lot of small
> > allocations to hold file names comming from location info. This patch
> > puts it into an obstack so it interleaves memory allocated by scc_hash
> > less frequently.
> > (Still we end up interleaving 64k pages which are permanent - in fact
> > this table seems to leak from WPA and temporary during stream in)
> > 
> > Bootstrapped/regtested x86_64-linux. OK?
> 
> I think the obstack deserves a big fat comment that it cannot be
> reclaimed since the linemap retains permanent pointers into it.
> That also suggests to put the string_slot into a separate obstack

The hasher is sort of ethernal, too, since at any time we want to be
able to load more from input streams, so we can not really free it.
Well, I guess just prior streaming we can, so I will split it.
> or better, make the hasher (and other string_slot hashers)
> embed the string_slot struct in the hash?  We'd save an allocation
> everywhere.

Well, if we want to free hasher, then we want to keep string separate +
comment on obstack, right?  I sill update patch tonight.

Honza
> 
> Richard.
> 
> > Honza
> > 
> > * lto-streamer-in.c (file_name_obstack): New obstack.
> > (canon_file_name): Use it.
> > (lto_reader_init): Initialize it.
> > Index: lto-streamer-in.c
> > ===
> > --- lto-streamer-in.c   (revision 277796)
> > +++ lto-streamer-in.c   (working copy)
> > @@ -57,6 +57,7 @@ freeing_string_slot_hasher::remove (valu
> >  
> >  /* The table to hold the file names.  */
> >  static hash_table *file_name_hash_table;
> > +static struct obstack file_name_obstack;
> >  
> >  
> >  /* Check that tag ACTUAL has one of the given values.  NUM_TAGS is the
> > @@ -113,8 +114,9 @@ canon_file_name (const char *string)
> >char *saved_string;
> >struct string_slot *new_slot;
> >  
> > -  saved_string = (char *) xmalloc (len + 1);
> > -  new_slot = XCNEW (struct string_slot);
> > +  saved_string = XOBNEWVEC (&file_name_obstack, char, len + 1);
> > +  new_slot = XOBNEWVAR (&file_name_obstack,
> > +   struct string_slot, sizeof (struct string_slot));
> >memcpy (saved_string, string, len + 1);
> >new_slot->s = saved_string;
> >new_slot->len = len;
> > @@ -1723,6 +1725,7 @@ lto_reader_init (void)
> >lto_streamer_init ();
> >file_name_hash_table
> >  = new hash_table (37);
> > +  gcc_obstack_init (&file_name_obstack);
> >  }
> >  
> >  
> > 
> 
> -- 
> Richard Biener 
> SUSE Software Solutions Germany GmbH, Maxfeldstrasse 5, 90409 Nuernberg,
> Germany; GF: Felix Imendörffer; HRB 36809 (AG Nuernberg)



[PATCH 1/X][mid-end] Fix declared type of personality functions

2019-11-05 Thread Matthew Malcomson
`build_personality_function` generates a declaration for a personality
function.  The type it declares for these functions doesn't match the
type of the actual personality functions that are defined by the C++
unwinding ABI.

This doesn't cause any crashes since the compiler never generates a call
to these decl's, and hence the type of the function is never used.
Nonetheless, for the sake of consistency and readability we update the
type of this declaration.

(See libstdc++-v3/libsupc++/unwind-cxx.h for declaration of _gxx_personality_v0
to compare types).

gcc/ChangeLog:

2019-11-05  Matthew Malcomson  

* expr.c (build_personality_function): Fix generated type to
match actual personality functions.



### Attachment also inlined for ease of reply###


diff --git a/gcc/expr.c b/gcc/expr.c
index 
2f2b53f8b6905013b4214eea137d67c666b0c795..7dc37a288ebffb99c990442cf339b848c5fa9d2e
 100644
--- a/gcc/expr.c
+++ b/gcc/expr.c
@@ -12525,7 +12525,8 @@ build_personality_function (const char *lang)
 
   name = ACONCAT (("__", lang, "_personality", unwind_and_version, NULL));
 
-  type = build_function_type_list (integer_type_node, integer_type_node,
+  type = build_function_type_list (unsigned_type_node,
+  integer_type_node, integer_type_node,
   long_long_unsigned_type_node,
   ptr_type_node, ptr_type_node, NULL_TREE);
   decl = build_decl (UNKNOWN_LOCATION, FUNCTION_DECL,

diff --git a/gcc/expr.c b/gcc/expr.c
index 
2f2b53f8b6905013b4214eea137d67c666b0c795..7dc37a288ebffb99c990442cf339b848c5fa9d2e
 100644
--- a/gcc/expr.c
+++ b/gcc/expr.c
@@ -12525,7 +12525,8 @@ build_personality_function (const char *lang)
 
   name = ACONCAT (("__", lang, "_personality", unwind_and_version, NULL));
 
-  type = build_function_type_list (integer_type_node, integer_type_node,
+  type = build_function_type_list (unsigned_type_node,
+  integer_type_node, integer_type_node,
   long_long_unsigned_type_node,
   ptr_type_node, ptr_type_node, NULL_TREE);
   decl = build_decl (UNKNOWN_LOCATION, FUNCTION_DECL,



[PATCH 2/X] [libsanitizer] Introduce libhwasan to GCC tree

2019-11-05 Thread Matthew Malcomson
Takes the libhwasan library from LLVM and puts it into our source tree
excluding the build system files.

Tieing the source files into our build system is done in a later commit.

We have taken the libsanitizer library from the same SVN revision as
the other sanitizer libraries are taken from (SVN revision 368656 as
mentioned in libsanitizer/MERGE).

libsanitizer/ChangeLog:

2019-11-05  Matthew Malcomson  

* README.gcc: Mention now including lib/hwasan.
* hwasan/hwasan.cpp: New file.
* hwasan/hwasan.h: New file.
* hwasan/hwasan.syms.extra: New file.
* hwasan/hwasan_allocator.cpp: New file.
* hwasan/hwasan_allocator.h: New file.
* hwasan/hwasan_blacklist.txt: New file.
* hwasan/hwasan_checks.h: New file.
* hwasan/hwasan_dynamic_shadow.cpp: New file.
* hwasan/hwasan_dynamic_shadow.h: New file.
* hwasan/hwasan_flags.h: New file.
* hwasan/hwasan_flags.inc: New file.
* hwasan/hwasan_interceptors.cpp: New file.
* hwasan/hwasan_interceptors_vfork.S: New file.
* hwasan/hwasan_interface_internal.h: New file.
* hwasan/hwasan_linux.cpp: New file.
* hwasan/hwasan_malloc_bisect.h: New file.
* hwasan/hwasan_mapping.h: New file.
* hwasan/hwasan_memintrinsics.cpp: New file.
* hwasan/hwasan_new_delete.cpp: New file.
* hwasan/hwasan_poisoning.cpp: New file.
* hwasan/hwasan_poisoning.h: New file.
* hwasan/hwasan_report.cpp: New file.
* hwasan/hwasan_report.h: New file.
* hwasan/hwasan_tag_mismatch_aarch64.S: New file.
* hwasan/hwasan_thread.cpp: New file.
* hwasan/hwasan_thread.h: New file.
* hwasan/hwasan_thread_list.cpp: New file.
* hwasan/hwasan_thread_list.h: New file.


hwasan-patch02.patch.gz
Description: hwasan-patch02.patch.gz


[aarch64] Allocate enough space for err_str in aarch64_handle_attr_branch_protection

2019-11-05 Thread Matthew Malcomson
-fsanitize=hwaddress found a one-byte overwrite when running the
testsuite here.  aarch64_handle_attr_branch_protection allocates
`strlen(str)` bytes for an error string, which is populated by
`strcpy(..., str)` in the case where the branch protection string is
completely invalid.

Tested on aarch64 with hwasan (though not a full bootstrap since it's
obvious).

gcc/ChangeLog:

2019-11-05  Matthew Malcomson  

* config/aarch64/aarch64.c (aarch64_handle_attr_cpu): Allocate
enough bytes for the NULL character.



### Attachment also inlined for ease of reply###


diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
index 
232317d4a5a4a16529f573eef5a8d7a068068207..fc03faa8f8d459a84024d4394fff375b72d31264
 100644
--- a/gcc/config/aarch64/aarch64.c
+++ b/gcc/config/aarch64/aarch64.c
@@ -13298,7 +13298,7 @@ aarch64_handle_attr_cpu (const char *str)
  static bool
  aarch64_handle_attr_branch_protection (const char* str)
  {
-  char *err_str = (char *) xmalloc (strlen (str));
+  char *err_str = (char *) xmalloc (strlen (str) + 1);
   enum aarch64_parse_opt_result res = aarch64_parse_branch_protection (str,
  &err_str);
   bool success = false;

diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
index 
232317d4a5a4a16529f573eef5a8d7a068068207..fc03faa8f8d459a84024d4394fff375b72d31264
 100644
--- a/gcc/config/aarch64/aarch64.c
+++ b/gcc/config/aarch64/aarch64.c
@@ -13298,7 +13298,7 @@ aarch64_handle_attr_cpu (const char *str)
  static bool
  aarch64_handle_attr_branch_protection (const char* str)
  {
-  char *err_str = (char *) xmalloc (strlen (str));
+  char *err_str = (char *) xmalloc (strlen (str) + 1);
   enum aarch64_parse_opt_result res = aarch64_parse_branch_protection (str,
  &err_str);
   bool success = false;



[PATCH 5/X] [libsanitizer] Remove system allocator fallback

2019-11-05 Thread Matthew Malcomson
Backport from llvm-svn: 375296.
This was an experiment made possible by a non-standard feature of the
Android dynamic loader.

Going without that experiment makes implementation for glibc easier.

libsanitizer/ChangeLog:

2019-11-05  Matthew Malcomson  

* hwasan/hwasan_allocator.cpp (hwasan_realloc, hwasan_free,
__hwasan_disable_allocator_tagging): Remove allocator fallback.
* hwasan/hwasan_allocator.h (free, realloc): Remove REAL
declarations.
* hwasan/hwasan_interceptors.cpp (free, realloc): Remove
interceptors.



### Attachment also inlined for ease of reply###


diff --git a/libsanitizer/hwasan/hwasan_allocator.h 
b/libsanitizer/hwasan/hwasan_allocator.h
index 
3a50a11f352600d82aaaf73ddd7a5595e9f8a4d5..f62be269602143679f9f7abc9ec4a8a08dad207f
 100644
--- a/libsanitizer/hwasan/hwasan_allocator.h
+++ b/libsanitizer/hwasan/hwasan_allocator.h
@@ -13,7 +13,6 @@
 #ifndef HWASAN_ALLOCATOR_H
 #define HWASAN_ALLOCATOR_H
 
-#include "interception/interception.h"
 #include "sanitizer_common/sanitizer_allocator.h"
 #include "sanitizer_common/sanitizer_allocator_checks.h"
 #include "sanitizer_common/sanitizer_allocator_interface.h"
@@ -26,11 +25,6 @@
 #error Unsupported platform
 #endif
 
-#if HWASAN_WITH_INTERCEPTORS
-DECLARE_REAL(void *, realloc, void *ptr, uptr size)
-DECLARE_REAL(void, free, void *ptr)
-#endif
-
 namespace __hwasan {
 
 struct Metadata {
diff --git a/libsanitizer/hwasan/hwasan_allocator.cpp 
b/libsanitizer/hwasan/hwasan_allocator.cpp
index 
b4fae5820d0a1749663f251c7a3f1dc841741aed..81a57d3afd4d364dca96f20df3e94014483b6df1
 100644
--- a/libsanitizer/hwasan/hwasan_allocator.cpp
+++ b/libsanitizer/hwasan/hwasan_allocator.cpp
@@ -22,11 +22,6 @@
 #include "hwasan_thread.h"
 #include "hwasan_report.h"
 
-#if HWASAN_WITH_INTERCEPTORS
-DEFINE_REAL(void *, realloc, void *ptr, uptr size)
-DEFINE_REAL(void, free, void *ptr)
-#endif
-
 namespace __hwasan {
 
 static Allocator allocator;
@@ -301,14 +296,6 @@ void *hwasan_calloc(uptr nmemb, uptr size, StackTrace 
*stack) {
 void *hwasan_realloc(void *ptr, uptr size, StackTrace *stack) {
   if (!ptr)
 return SetErrnoOnNull(HwasanAllocate(stack, size, sizeof(u64), false));
-
-#if HWASAN_WITH_INTERCEPTORS
-  // A tag of 0 means that this is a system allocator allocation, so we must 
use
-  // the system allocator to realloc it.
-  if (!flags()->disable_allocator_tagging && GetTagFromPointer((uptr)ptr) == 0)
-return REAL(realloc)(ptr, size);
-#endif
-
   if (size == 0) {
 HwasanDeallocate(stack, ptr);
 return nullptr;
@@ -381,13 +368,6 @@ int hwasan_posix_memalign(void **memptr, uptr alignment, 
uptr size,
 }
 
 void hwasan_free(void *ptr, StackTrace *stack) {
-#if HWASAN_WITH_INTERCEPTORS
-  // A tag of 0 means that this is a system allocator allocation, so we must 
use
-  // the system allocator to free it.
-  if (!flags()->disable_allocator_tagging && GetTagFromPointer((uptr)ptr) == 0)
-return REAL(free)(ptr);
-#endif
-
   return HwasanDeallocate(stack, ptr);
 }
 
@@ -400,15 +380,6 @@ void __hwasan_enable_allocator_tagging() {
 }
 
 void __hwasan_disable_allocator_tagging() {
-#if HWASAN_WITH_INTERCEPTORS
-  // Allocator tagging must be enabled for the system allocator fallback to 
work
-  // correctly. This means that we can't disable it at runtime if it was 
enabled
-  // at startup since that might result in our deallocations going to the 
system
-  // allocator. If tagging was disabled at startup we avoid this problem by
-  // disabling the fallback altogether.
-  CHECK(flags()->disable_allocator_tagging);
-#endif
-
   atomic_store_relaxed(&hwasan_allocator_tagging_enabled, 0);
 }
 
diff --git a/libsanitizer/hwasan/hwasan_interceptors.cpp 
b/libsanitizer/hwasan/hwasan_interceptors.cpp
index 
f6758efa65c051376468d3cad2c1530fa7329627..4f9bd3469eb10ca2cf3108326308e45e7a9d38b6
 100644
--- a/libsanitizer/hwasan/hwasan_interceptors.cpp
+++ b/libsanitizer/hwasan/hwasan_interceptors.cpp
@@ -334,8 +334,6 @@ void InitializeInterceptors() {
 #if !defined(__aarch64__)
   INTERCEPT_FUNCTION(pthread_create);
 #endif  // __aarch64__
-  INTERCEPT_FUNCTION(realloc);
-  INTERCEPT_FUNCTION(free);
 #endif
 
   inited = 1;

diff --git a/libsanitizer/hwasan/hwasan_allocator.h 
b/libsanitizer/hwasan/hwasan_allocator.h
index 
3a50a11f352600d82aaaf73ddd7a5595e9f8a4d5..f62be269602143679f9f7abc9ec4a8a08dad207f
 100644
--- a/libsanitizer/hwasan/hwasan_allocator.h
+++ b/libsanitizer/hwasan/hwasan_allocator.h
@@ -13,7 +13,6 @@
 #ifndef HWASAN_ALLOCATOR_H
 #define HWASAN_ALLOCATOR_H
 
-#include "interception/interception.h"
 #include "sanitizer_common/sanitizer_allocator.h"
 #include "sanitizer_common/sanitizer_allocator_checks.h"
 #include "sanitizer_common/sanitizer_allocator_interface.h"
@@ -26,11 +25,6 @@
 #error Unsupported platform
 #endif
 
-#if HWASAN_WITH_INTERCEPTORS
-DECLARE_REAL(void *, realloc, void *ptr, uptr size)
-DECLARE_REAL(void, free, void *ptr)
-#endif
-
 namespace

[PATCH 3/X] [libsanitizer] libhwasan initialisation include kernel syscall ABI relaxation

2019-11-05 Thread Matthew Malcomson
Backported from LLVM-svn 375166.

libsanitizer/ChangeLog:

2019-11-05  Matthew Malcomson  

* hwasan/hwasan.cc (InitInstrumentation): Call InitPrctl.
* hwasan/hwasan.h (InitPrctl): New decl.
* hwasan/hwasan_linux.cc (InitPrctl): New function.



### Attachment also inlined for ease of reply###


diff --git a/libsanitizer/hwasan/hwasan.h b/libsanitizer/hwasan/hwasan.h
index 
817cee65016ee60f5cf6b5dc716a30e192e51e73..9e0ced93b55d361cd5aae787db7562741683944c
 100644
--- a/libsanitizer/hwasan/hwasan.h
+++ b/libsanitizer/hwasan/hwasan.h
@@ -74,6 +74,7 @@ extern int hwasan_report_count;
 
 bool ProtectRange(uptr beg, uptr end);
 bool InitShadow();
+void InitPrctl();
 void InitThreads();
 void MadviseShadow();
 char *GetProcSelfMaps();
diff --git a/libsanitizer/hwasan/hwasan.cpp b/libsanitizer/hwasan/hwasan.cpp
index 
999b51183f6184bb6564f3ec2f51f437e2598314..36d931caf7d4091480e1fc183a09d68735008b97
 100644
--- a/libsanitizer/hwasan/hwasan.cpp
+++ b/libsanitizer/hwasan/hwasan.cpp
@@ -312,6 +312,8 @@ static void InitLoadedGlobals() {
 static void InitInstrumentation() {
   if (hwasan_instrumentation_inited) return;
 
+  InitPrctl();
+
   if (!InitShadow()) {
 Printf("FATAL: HWAddressSanitizer cannot mmap the shadow memory.\n");
 DumpProcessMap();
diff --git a/libsanitizer/hwasan/hwasan_linux.cpp 
b/libsanitizer/hwasan/hwasan_linux.cpp
index 
051ec2fb9cc3aa333f4079dde6e5052173f84723..948e40154fec9295a451a3bc4e6a6914f619d6e3
 100644
--- a/libsanitizer/hwasan/hwasan_linux.cpp
+++ b/libsanitizer/hwasan/hwasan_linux.cpp
@@ -34,6 +34,8 @@
 #include 
 #include 
 #include 
+#include 
+#include 
 
 #include "sanitizer_common/sanitizer_common.h"
 #include "sanitizer_common/sanitizer_procmaps.h"
@@ -144,6 +146,43 @@ static void InitializeShadowBaseAddress(uptr 
shadow_size_bytes) {
   FindDynamicShadowStart(shadow_size_bytes);
 }
 
+void InitPrctl() {
+#define PR_SET_TAGGED_ADDR_CTRL 55
+#define PR_GET_TAGGED_ADDR_CTRL 56
+#define PR_TAGGED_ADDR_ENABLE (1UL << 0)
+  // Check we're running on a kernel that can use the tagged address ABI.
+  if (internal_prctl(PR_GET_TAGGED_ADDR_CTRL, 0, 0, 0, 0) == (uptr)-1 &&
+  errno == EINVAL) {
+#if SANITIZER_ANDROID
+// Some older Android kernels have the tagged pointer ABI on
+// unconditionally, and hence don't have the tagged-addr prctl while still
+// allow the ABI.
+// If targeting Android and the prctl is not around we assume this is the
+// case.
+return;
+#else
+Printf(
+"FATAL: "
+"HWAddressSanitizer requires a kernel with tagged address ABI.\n");
+Die();
+#endif
+  }
+
+  // Turn on the tagged address ABI.
+  if (internal_prctl(PR_SET_TAGGED_ADDR_CTRL, PR_TAGGED_ADDR_ENABLE, 0, 0, 0) 
==
+  (uptr)-1 ||
+  !internal_prctl(PR_GET_TAGGED_ADDR_CTRL, 0, 0, 0, 0)) {
+Printf(
+"FATAL: HWAddressSanitizer failed to enable tagged address syscall "
+"ABI.\nSuggest check `sysctl abi.tagged_addr_disabled` "
+"configuration.\n");
+Die();
+  }
+#undef PR_SET_TAGGED_ADDR_CTRL
+#undef PR_GET_TAGGED_ADDR_CTRL
+#undef PR_TAGGED_ADDR_ENABLE
+}
+
 bool InitShadow() {
   // Define the entire memory range.
   kHighMemEnd = GetHighMemEnd();

diff --git a/libsanitizer/hwasan/hwasan.h b/libsanitizer/hwasan/hwasan.h
index 
817cee65016ee60f5cf6b5dc716a30e192e51e73..9e0ced93b55d361cd5aae787db7562741683944c
 100644
--- a/libsanitizer/hwasan/hwasan.h
+++ b/libsanitizer/hwasan/hwasan.h
@@ -74,6 +74,7 @@ extern int hwasan_report_count;
 
 bool ProtectRange(uptr beg, uptr end);
 bool InitShadow();
+void InitPrctl();
 void InitThreads();
 void MadviseShadow();
 char *GetProcSelfMaps();
diff --git a/libsanitizer/hwasan/hwasan.cpp b/libsanitizer/hwasan/hwasan.cpp
index 
999b51183f6184bb6564f3ec2f51f437e2598314..36d931caf7d4091480e1fc183a09d68735008b97
 100644
--- a/libsanitizer/hwasan/hwasan.cpp
+++ b/libsanitizer/hwasan/hwasan.cpp
@@ -312,6 +312,8 @@ static void InitLoadedGlobals() {
 static void InitInstrumentation() {
   if (hwasan_instrumentation_inited) return;
 
+  InitPrctl();
+
   if (!InitShadow()) {
 Printf("FATAL: HWAddressSanitizer cannot mmap the shadow memory.\n");
 DumpProcessMap();
diff --git a/libsanitizer/hwasan/hwasan_linux.cpp 
b/libsanitizer/hwasan/hwasan_linux.cpp
index 
051ec2fb9cc3aa333f4079dde6e5052173f84723..948e40154fec9295a451a3bc4e6a6914f619d6e3
 100644
--- a/libsanitizer/hwasan/hwasan_linux.cpp
+++ b/libsanitizer/hwasan/hwasan_linux.cpp
@@ -34,6 +34,8 @@
 #include 
 #include 
 #include 
+#include 
+#include 
 
 #include "sanitizer_common/sanitizer_common.h"
 #include "sanitizer_common/sanitizer_procmaps.h"
@@ -144,6 +146,43 @@ static void InitializeShadowBaseAddress(uptr 
shadow_size_bytes) {
   FindDynamicShadowStart(shadow_size_bytes);
 }
 
+void InitPrctl() {
+#define PR_SET_TAGGED_ADDR_CTRL 55
+#define PR_GET_TAGGED_ADDR_CTRL 56
+#define PR_TAGGED_ADDR_ENABLE (1UL << 0)
+  // Check we're running on a kernel 

[PATCH 4/X] [libsanitizer] libhwasan add longjmp & setjmp interceptors

2019-11-05 Thread Matthew Malcomson
Backported from LLVM git id 67474c60d

libsanitizer/ChangeLog:

2019-11-05  Matthew Malcomson  

* hwasan/hwasan.h (__hw_jmp_buf_struct, __hw_jmp_buf,
__hw_sigjmp_buf): Define new types for internal longjmp
implementation.
* hwasan/hwasan_interceptors.cpp (__sigjmp_save,
InternalLongjmp, siglongjmp, longjmp, __libc_longjmp): New.
* hwasan/hwasan_setjmp.S: New file.
* hwasan/hwasan_type_test.cpp: New file.



### Attachment also inlined for ease of reply###


diff --git a/libsanitizer/hwasan/hwasan.h b/libsanitizer/hwasan/hwasan.h
index 
9e0ced93b55d361cd5aae787db7562741683944c..64cdcf30f5c7059dcc1916f72e14b6649ca701f5
 100644
--- a/libsanitizer/hwasan/hwasan.h
+++ b/libsanitizer/hwasan/hwasan.h
@@ -172,4 +172,24 @@ void AndroidTestTlsSlot();
 RunFreeHooks(ptr);\
   } while (false)
 
+#if HWASAN_WITH_INTERCEPTORS && defined(__aarch64__)
+// For both bionic and glibc __sigset_t is an unsigned long.
+typedef unsigned long __hw_sigset_t;
+// Setjmp and longjmp implementations are platform specific, and hence the
+// interception code is platform specific too.  As yet we've only implemented
+// the interception for AArch64.
+typedef unsigned long long __hw_register_buf[22];
+struct __hw_jmp_buf_struct {
+  // NOTE: The machine-dependent definition of `__sigsetjmp'
+  // assume that a `__hw_jmp_buf' begins with a `__hw_register_buf' and that
+  // `__mask_was_saved' follows it.  Do not move these members or add others
+  // before it.
+  __hw_register_buf __jmpbuf; // Calling environment.
+  int __mask_was_saved;   // Saved the signal mask?
+  __hw_sigset_t __saved_mask; // Saved signal mask.
+};
+typedef struct __hw_jmp_buf_struct __hw_jmp_buf[1];
+typedef struct __hw_jmp_buf_struct __hw_sigjmp_buf[1];
+#endif // HWASAN_WITH_INTERCEPTORS && __aarch64__
+
 #endif  // HWASAN_H
diff --git a/libsanitizer/hwasan/hwasan_interceptors.cpp 
b/libsanitizer/hwasan/hwasan_interceptors.cpp
index 
47fed0fc9abb821996efcd8d12f7e5442916326d..f6758efa65c051376468d3cad2c1530fa7329627
 100644
--- a/libsanitizer/hwasan/hwasan_interceptors.cpp
+++ b/libsanitizer/hwasan/hwasan_interceptors.cpp
@@ -220,6 +220,80 @@ DEFINE_REAL(int, vfork)
 DECLARE_EXTERN_INTERCEPTOR_AND_WRAPPER(int, vfork)
 #endif
 
+#if HWASAN_WITH_INTERCEPTORS && defined(__aarch64__)
+// Get and/or change the set of blocked signals.
+extern "C" int sigprocmask(int __how, const __hw_sigset_t *__restrict __set,
+   __hw_sigset_t *__restrict __oset);
+#define SIG_BLOCK 0
+#define SIG_SETMASK 2
+extern "C" int __sigjmp_save(__hw_sigjmp_buf env, int savemask) {
+  env[0].__mask_was_saved =
+  (savemask && sigprocmask(SIG_BLOCK, (__hw_sigset_t *)0,
+   &env[0].__saved_mask) == 0);
+  return 0;
+}
+
+static void __attribute__((always_inline))
+InternalLongjmp(__hw_register_buf env, int retval) {
+  // Clear all memory tags on the stack between here and where we're going.
+  unsigned long long stack_pointer = env[13];
+  // The stack pointer should never be tagged, so we don't need to clear the
+  // tag for this function call.
+  __hwasan_handle_longjmp((void *)stack_pointer);
+
+  // Run code for handling a longjmp.
+  // Need to use a register that isn't going to be loaded from the environment
+  // buffer -- hence why we need to specify the register to use.
+  // Must implement this ourselves, since we don't know the order of registers
+  // in different libc implementations and many implementations mangle the
+  // stack pointer so we can't use it without knowing the demangling scheme.
+  register long int retval_tmp asm("x1") = retval;
+  register void *env_address asm("x0") = &env[0];
+  asm volatile("ldpx19, x20, [%0, #0<<3];"
+   "ldpx21, x22, [%0, #2<<3];"
+   "ldpx23, x24, [%0, #4<<3];"
+   "ldpx25, x26, [%0, #6<<3];"
+   "ldpx27, x28, [%0, #8<<3];"
+   "ldpx29, x30, [%0, #10<<3];"
+   "ldp d8,  d9, [%0, #14<<3];"
+   "ldpd10, d11, [%0, #16<<3];"
+   "ldpd12, d13, [%0, #18<<3];"
+   "ldpd14, d15, [%0, #20<<3];"
+   "ldrx5, [%0, #13<<3];"
+   "movsp, x5;"
+   // Return the value requested to return through arguments.
+   // This should be in x1 given what we requested above.
+   "cmp%1, #0;"
+   "movx0, #1;"
+   "csel   x0, %1, x0, ne;"
+   "br x30;"
+   : "+r"(env_address)
+   : "r"(retval_tmp));
+}
+
+INTERCEPTOR(void, siglongjmp, __hw_sigjmp_buf env, int val) {
+  if (env[0].__mask_was_saved)
+// Restore the saved signal mask.
+(void)sigprocmask(SIG_SETMASK, &env[0].__saved_mask,
+  (__hw_sigset_t *)0);
+  InternalLongjmp(env[0].__jmpbuf, val);
+}
+
+// Required since glibc libpthread calls

[PATCH 9/X] [libsanitizer] Remove lazy thread initialisation

2019-11-05 Thread Matthew Malcomson
Backport from llvm upstream (monorepo revision 91167e2).
This was an experiment made possible by a non-standard feature of the
Android dynamic loader.

libsanitizer/ChangeLog:

2019-11-05  Matthew Malcomson  

* hwasan/hwasan_interceptors.cpp (HwasanThreadStartFunc):
Re-introduce.
(pthread_create): Use HwasanThreadStartFunc to initialise the
sanitizer for each thread as it starts.
* hwasan/hwasan_linux.cpp (GetCurrentThread): Assume thread is
initialised.



### Attachment also inlined for ease of reply###


diff --git a/libsanitizer/hwasan/hwasan_interceptors.cpp 
b/libsanitizer/hwasan/hwasan_interceptors.cpp
index 
4f9bd3469eb10ca2cf3108326308e45e7a9d38b6..44e569ee6d721a99aa21ebf1a51fb33b6e7a
 100644
--- a/libsanitizer/hwasan/hwasan_interceptors.cpp
+++ b/libsanitizer/hwasan/hwasan_interceptors.cpp
@@ -202,23 +202,33 @@ INTERCEPTOR_ALIAS(__sanitizer_struct_mallinfo, mallinfo);
 INTERCEPTOR_ALIAS(int, mallopt, int cmd, int value);
 INTERCEPTOR_ALIAS(void, malloc_stats, void);
 #endif
-#endif // HWASAN_WITH_INTERCEPTORS
 
+struct ThreadStartArg {
+  thread_callback_t callback;
+  void *param;
+};
+
+static void *HwasanThreadStartFunc(void *arg) {
+  __hwasan_thread_enter();
+  ThreadStartArg A = *reinterpret_cast(arg);
+  UnmapOrDie(arg, GetPageSizeCached());
+  return A.callback(A.param);
+}
 
-#if HWASAN_WITH_INTERCEPTORS && !defined(__aarch64__)
-INTERCEPTOR(int, pthread_create, void *th, void *attr,
-void *(*callback)(void *), void *param) {
+INTERCEPTOR(int, pthread_create, void *th, void *attr, void 
*(*callback)(void*),
+void * param) {
   ScopedTaggingDisabler disabler;
+  ThreadStartArg *A = reinterpret_cast (MmapOrDie(
+  GetPageSizeCached(), "pthread_create"));
+  *A = {callback, param};
   int res = REAL(pthread_create)(UntagPtr(th), UntagPtr(attr),
- callback, param);
+ &HwasanThreadStartFunc, A);
   return res;
 }
-#endif
 
-#if HWASAN_WITH_INTERCEPTORS
 DEFINE_REAL(int, vfork)
 DECLARE_EXTERN_INTERCEPTOR_AND_WRAPPER(int, vfork)
-#endif
+#endif // HWASAN_WITH_INTERCEPTORS
 
 #if HWASAN_WITH_INTERCEPTORS && defined(__aarch64__)
 // Get and/or change the set of blocked signals.
@@ -331,9 +341,7 @@ void InitializeInterceptors() {
 #if defined(__linux__)
   INTERCEPT_FUNCTION(vfork);
 #endif  // __linux__
-#if !defined(__aarch64__)
   INTERCEPT_FUNCTION(pthread_create);
-#endif  // __aarch64__
 #endif
 
   inited = 1;
diff --git a/libsanitizer/hwasan/hwasan_linux.cpp 
b/libsanitizer/hwasan/hwasan_linux.cpp
index 
dfef11883a284dae0c96cfcc6a8fd1cc06c24d71..ed0f30161b023bf5927aa4a471f6a7c3edc8edf6
 100644
--- a/libsanitizer/hwasan/hwasan_linux.cpp
+++ b/libsanitizer/hwasan/hwasan_linux.cpp
@@ -354,12 +354,7 @@ void AndroidTestTlsSlot() {}
 #endif
 
 Thread *GetCurrentThread() {
-  uptr *ThreadLong = GetCurrentThreadLongPtr();
-#if HWASAN_WITH_INTERCEPTORS
-  if (!*ThreadLong)
-__hwasan_thread_enter();
-#endif
-  auto *R = (StackAllocationsRingBuffer *)ThreadLong;
+  auto *R = (StackAllocationsRingBuffer *)GetCurrentThreadLongPtr();
   return hwasanThreadList().GetThreadByBufferAddress((uptr)(R->Next()));
 }
 

diff --git a/libsanitizer/hwasan/hwasan_interceptors.cpp 
b/libsanitizer/hwasan/hwasan_interceptors.cpp
index 
4f9bd3469eb10ca2cf3108326308e45e7a9d38b6..44e569ee6d721a99aa21ebf1a51fb33b6e7a
 100644
--- a/libsanitizer/hwasan/hwasan_interceptors.cpp
+++ b/libsanitizer/hwasan/hwasan_interceptors.cpp
@@ -202,23 +202,33 @@ INTERCEPTOR_ALIAS(__sanitizer_struct_mallinfo, mallinfo);
 INTERCEPTOR_ALIAS(int, mallopt, int cmd, int value);
 INTERCEPTOR_ALIAS(void, malloc_stats, void);
 #endif
-#endif // HWASAN_WITH_INTERCEPTORS
 
+struct ThreadStartArg {
+  thread_callback_t callback;
+  void *param;
+};
+
+static void *HwasanThreadStartFunc(void *arg) {
+  __hwasan_thread_enter();
+  ThreadStartArg A = *reinterpret_cast(arg);
+  UnmapOrDie(arg, GetPageSizeCached());
+  return A.callback(A.param);
+}
 
-#if HWASAN_WITH_INTERCEPTORS && !defined(__aarch64__)
-INTERCEPTOR(int, pthread_create, void *th, void *attr,
-void *(*callback)(void *), void *param) {
+INTERCEPTOR(int, pthread_create, void *th, void *attr, void 
*(*callback)(void*),
+void * param) {
   ScopedTaggingDisabler disabler;
+  ThreadStartArg *A = reinterpret_cast (MmapOrDie(
+  GetPageSizeCached(), "pthread_create"));
+  *A = {callback, param};
   int res = REAL(pthread_create)(UntagPtr(th), UntagPtr(attr),
- callback, param);
+ &HwasanThreadStartFunc, A);
   return res;
 }
-#endif
 
-#if HWASAN_WITH_INTERCEPTORS
 DEFINE_REAL(int, vfork)
 DECLARE_EXTERN_INTERCEPTOR_AND_WRAPPER(int, vfork)
-#endif
+#endif // HWASAN_WITH_INTERCEPTORS
 
 #if HWASAN_WITH_INTERCEPTORS && defined(__aarch64__)
 // Get and/or change the set of blocked signals.
@@ -331,9 +341,7 @@ void InitializeIn

[PATCH 7/X] [libsanitizer] Add missing SANITIZER_INTERFACE_ATTRIBUTE on __hwasan_personality_wrapper

2019-11-05 Thread Matthew Malcomson
Backport from llvm upstream llvm-svn: 375298.

libsanitizer/ChangeLog:

2019-11-05  Matthew Malcomson  

* hwasan/hwasan_exceptions.cpp (__hwasan_personality_wrapper):
Add missing interface attribute.



### Attachment also inlined for ease of reply###


diff --git a/libsanitizer/hwasan/hwasan_exceptions.cpp 
b/libsanitizer/hwasan/hwasan_exceptions.cpp
index 
57a1438064cd28bb609359a4c841acf78337ebc3..169e7876cb58a9dafb70973ed9fb1dfd815a7ceb
 100644
--- a/libsanitizer/hwasan/hwasan_exceptions.cpp
+++ b/libsanitizer/hwasan/hwasan_exceptions.cpp
@@ -32,10 +32,13 @@ typedef _Unwind_Reason_Code PersonalityFn(int version, 
_Unwind_Action actions,
 typedef _Unwind_Word GetGRFn(_Unwind_Context* context, int index);
 typedef _Unwind_Word GetCFAFn(_Unwind_Context* context);
 
-extern "C" _Unwind_Reason_Code __hwasan_personality_wrapper(
-int version, _Unwind_Action actions, uint64_t exception_class,
-_Unwind_Exception* unwind_exception, _Unwind_Context* context,
-PersonalityFn* real_personality, GetGRFn* get_gr, GetCFAFn* get_cfa) {
+extern "C" SANITIZER_INTERFACE_ATTRIBUTE _Unwind_Reason_Code
+__hwasan_personality_wrapper(int version, _Unwind_Action actions,
+ uint64_t exception_class,
+ _Unwind_Exception* unwind_exception,
+ _Unwind_Context* context,
+ PersonalityFn* real_personality, GetGRFn* get_gr,
+ GetCFAFn* get_cfa) {
   _Unwind_Reason_Code rc;
   if (real_personality)
 rc = real_personality(version, actions, exception_class, unwind_exception,

diff --git a/libsanitizer/hwasan/hwasan_exceptions.cpp 
b/libsanitizer/hwasan/hwasan_exceptions.cpp
index 
57a1438064cd28bb609359a4c841acf78337ebc3..169e7876cb58a9dafb70973ed9fb1dfd815a7ceb
 100644
--- a/libsanitizer/hwasan/hwasan_exceptions.cpp
+++ b/libsanitizer/hwasan/hwasan_exceptions.cpp
@@ -32,10 +32,13 @@ typedef _Unwind_Reason_Code PersonalityFn(int version, 
_Unwind_Action actions,
 typedef _Unwind_Word GetGRFn(_Unwind_Context* context, int index);
 typedef _Unwind_Word GetCFAFn(_Unwind_Context* context);
 
-extern "C" _Unwind_Reason_Code __hwasan_personality_wrapper(
-int version, _Unwind_Action actions, uint64_t exception_class,
-_Unwind_Exception* unwind_exception, _Unwind_Context* context,
-PersonalityFn* real_personality, GetGRFn* get_gr, GetCFAFn* get_cfa) {
+extern "C" SANITIZER_INTERFACE_ATTRIBUTE _Unwind_Reason_Code
+__hwasan_personality_wrapper(int version, _Unwind_Action actions,
+ uint64_t exception_class,
+ _Unwind_Exception* unwind_exception,
+ _Unwind_Context* context,
+ PersonalityFn* real_personality, GetGRFn* get_gr,
+ GetCFAFn* get_cfa) {
   _Unwind_Reason_Code rc;
   if (real_personality)
 rc = real_personality(version, actions, exception_class, unwind_exception,



[PATCH 6/X] [libsanitizer] Add hwasan_exceptions.cpp file

2019-11-05 Thread Matthew Malcomson
This is needed for the hwasan_personality instrumentation I've added.
Backported from llvm-svn: 369721

libsanitizer/ChangeLog:

2019-11-05  Matthew Malcomson  

* hwasan/hwasan_exceptions.cpp: New file.



### Attachment also inlined for ease of reply###


diff --git a/libsanitizer/hwasan/hwasan_exceptions.cpp 
b/libsanitizer/hwasan/hwasan_exceptions.cpp
new file mode 100644
index 
..57a1438064cd28bb609359a4c841acf78337ebc3
--- /dev/null
+++ b/libsanitizer/hwasan/hwasan_exceptions.cpp
@@ -0,0 +1,64 @@
+//===-- hwasan_exceptions.cpp 
-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM 
Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===--===//
+//
+// This file is a part of HWAddressSanitizer.
+//
+// HWAddressSanitizer runtime.
+//===--===//
+
+#include "hwasan_poisoning.h"
+#include "sanitizer_common/sanitizer_common.h"
+
+#include 
+
+using namespace __hwasan;
+using namespace __sanitizer;
+
+typedef _Unwind_Reason_Code PersonalityFn(int version, _Unwind_Action actions,
+  uint64_t exception_class,
+  _Unwind_Exception* unwind_exception,
+  _Unwind_Context* context);
+
+// Pointers to the _Unwind_GetGR and _Unwind_GetCFA functions are passed in
+// instead of being called directly. This is to handle cases where the unwinder
+// is statically linked and the sanitizer runtime and the program are linked
+// against different unwinders. The _Unwind_Context data structure is opaque so
+// it may be incompatible between unwinders.
+typedef _Unwind_Word GetGRFn(_Unwind_Context* context, int index);
+typedef _Unwind_Word GetCFAFn(_Unwind_Context* context);
+
+extern "C" _Unwind_Reason_Code __hwasan_personality_wrapper(
+int version, _Unwind_Action actions, uint64_t exception_class,
+_Unwind_Exception* unwind_exception, _Unwind_Context* context,
+PersonalityFn* real_personality, GetGRFn* get_gr, GetCFAFn* get_cfa) {
+  _Unwind_Reason_Code rc;
+  if (real_personality)
+rc = real_personality(version, actions, exception_class, unwind_exception,
+  context);
+  else
+rc = _URC_CONTINUE_UNWIND;
+
+  // We only untag frames without a landing pad because landing pads are
+  // responsible for untagging the stack themselves if they resume.
+  //
+  // Here we assume that the frame record appears after any locals. This is not
+  // required by AAPCS but is a requirement for HWASAN instrumented functions.
+  if ((actions & _UA_CLEANUP_PHASE) && rc == _URC_CONTINUE_UNWIND) {
+#if defined(__x86_64__)
+uptr fp = get_gr(context, 6); // rbp
+#elif defined(__aarch64__)
+uptr fp = get_gr(context, 29); // x29
+#else
+#error Unsupported architecture
+#endif
+uptr sp = get_cfa(context);
+TagMemory(sp, fp - sp, 0);
+  }
+
+  return rc;
+}

diff --git a/libsanitizer/hwasan/hwasan_exceptions.cpp 
b/libsanitizer/hwasan/hwasan_exceptions.cpp
new file mode 100644
index 
..57a1438064cd28bb609359a4c841acf78337ebc3
--- /dev/null
+++ b/libsanitizer/hwasan/hwasan_exceptions.cpp
@@ -0,0 +1,64 @@
+//===-- hwasan_exceptions.cpp 
-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM 
Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===--===//
+//
+// This file is a part of HWAddressSanitizer.
+//
+// HWAddressSanitizer runtime.
+//===--===//
+
+#include "hwasan_poisoning.h"
+#include "sanitizer_common/sanitizer_common.h"
+
+#include 
+
+using namespace __hwasan;
+using namespace __sanitizer;
+
+typedef _Unwind_Reason_Code PersonalityFn(int version, _Unwind_Action actions,
+  uint64_t exception_class,
+  _Unwind_Exception* unwind_exception,
+  _Unwind_Context* context);
+
+// Pointers to the _Unwind_GetGR and _Unwind_GetCFA functions are passed in
+// instead of being called directly. This is to handle cases where the unwinder
+// is statically linked and the sanitizer runtime and the program are linked
+// against different unwinders. The _Unwind_Context data structure is opaque so
+// it may be incompatible between unwinders.
+typedef _Unwind_Word GetGRFn(_Unwind_Context* context, int index);
+typedef _Unwind_Word GetCFAFn(_Unwind_Context* cont

[PATCH 8/X] [libsanitizer] Expose __hwasan_tag_mismatch_stub

2019-11-05 Thread Matthew Malcomson
Backport from llvm upstream (monorepo revision 612eadb).
This allows us to report tag mismatches without threading it through the
backend to generate assembly.

libsanitizer/ChangeLog:

2019-11-05  Matthew Malcomson  

* hwasan/hwasan_interface_internal.h (__hwasan_tag_mismatch4):
New exported symbol.
* hwasan/hwasan_linux.cpp (__hwasan_tag_mismatch_stub): Rename
to ...
(__hwasan_tag_mismatch4): ... this.  Also add "size" argument.
* hwasan/hwasan_tag_mismatch_aarch64.S: Update function call to
use new name.



### Attachment also inlined for ease of reply###


diff --git a/libsanitizer/hwasan/hwasan_interface_internal.h 
b/libsanitizer/hwasan/hwasan_interface_internal.h
index 
ca57f0fe437bfdbc3d490c1978985fc3ab64d4c5..aedda317497b61349050511a3d244f480fae5ba2
 100644
--- a/libsanitizer/hwasan/hwasan_interface_internal.h
+++ b/libsanitizer/hwasan/hwasan_interface_internal.h
@@ -112,6 +112,10 @@ SANITIZER_INTERFACE_ATTRIBUTE
 void __hwasan_tag_mismatch(uptr addr, u8 ts);
 
 SANITIZER_INTERFACE_ATTRIBUTE
+void __hwasan_tag_mismatch4(uptr addr, uptr access_info, uptr *registers_frame,
+size_t outsize);
+
+SANITIZER_INTERFACE_ATTRIBUTE
 u8 __hwasan_generate_tag();
 
 // Returns the offset of the first tag mismatch or -1 if the whole range is
diff --git a/libsanitizer/hwasan/hwasan_linux.cpp 
b/libsanitizer/hwasan/hwasan_linux.cpp
index 
948e40154fec9295a451a3bc4e6a6914f619d6e3..dfef11883a284dae0c96cfcc6a8fd1cc06c24d71
 100644
--- a/libsanitizer/hwasan/hwasan_linux.cpp
+++ b/libsanitizer/hwasan/hwasan_linux.cpp
@@ -460,21 +460,6 @@ static bool HwasanOnSIGTRAP(int signo, siginfo_t *info, 
ucontext_t *uc) {
   return true;
 }
 
-// Entry point stub for interoperability between __hwasan_tag_mismatch (ASM) 
and
-// the rest of the mismatch handling code (C++).
-extern "C" void __hwasan_tag_mismatch_stub(uptr addr, uptr access_info,
-   uptr *registers_frame) {
-  AccessInfo ai;
-  ai.is_store = access_info & 0x10;
-  ai.recover = false;
-  ai.addr = addr;
-  ai.size = 1 << (access_info & 0xf);
-
-  HandleTagMismatch(ai, (uptr)__builtin_return_address(0),
-(uptr)__builtin_frame_address(0), nullptr, 
registers_frame);
-  __builtin_unreachable();
-}
-
 static void OnStackUnwind(const SignalContext &sig, const void *,
   BufferedStackTrace *stack) {
   stack->Unwind(StackTrace::GetNextInstructionPc(sig.pc), sig.bp, sig.context,
@@ -493,4 +478,24 @@ void HwasanOnDeadlySignal(int signo, void *info, void 
*context) {
 
 } // namespace __hwasan
 
+// Entry point for interoperability between __hwasan_tag_mismatch (ASM) and the
+// rest of the mismatch handling code (C++).
+void __hwasan_tag_mismatch4(uptr addr, uptr access_info, uptr *registers_frame,
+size_t outsize) {
+  __hwasan::AccessInfo ai;
+  ai.is_store = access_info & 0x10;
+  ai.is_load = !ai.is_store;
+  ai.recover = access_info & 0x20;
+  ai.addr = addr;
+  if ((access_info & 0xf) == 0xf)
+ai.size = outsize;
+  else
+ai.size = 1 << (access_info & 0xf);
+
+  __hwasan::HandleTagMismatch(ai, (uptr)__builtin_return_address(0),
+  (uptr)__builtin_frame_address(0), nullptr,
+  registers_frame);
+  __builtin_unreachable();
+}
+
 #endif // SANITIZER_FREEBSD || SANITIZER_LINUX || SANITIZER_NETBSD
diff --git a/libsanitizer/hwasan/hwasan_tag_mismatch_aarch64.S 
b/libsanitizer/hwasan/hwasan_tag_mismatch_aarch64.S
index 
92f62748048682f2e762a91050232fd3c65d538f..d1e0654cf715149d9ce6e3d8863ccd4f33745c95
 100644
--- a/libsanitizer/hwasan/hwasan_tag_mismatch_aarch64.S
+++ b/libsanitizer/hwasan/hwasan_tag_mismatch_aarch64.S
@@ -89,12 +89,12 @@ __hwasan_tag_mismatch:
   stp x4,  x5,  [sp, #32]
   stp x2,  x3,  [sp, #16]
 
-  // Pass the address of the frame to __hwasan_tag_mismatch_stub, so that it 
can
+  // Pass the address of the frame to __hwasan_tag_mismatch4, so that it can
   // extract the saved registers from this frame without having to worry about
   // finding this frame.
   mov x2, sp
 
-  bl __hwasan_tag_mismatch_stub
+  bl __hwasan_tag_mismatch4
   CFI_ENDPROC
 
 .Lfunc_end0:

diff --git a/libsanitizer/hwasan/hwasan_interface_internal.h 
b/libsanitizer/hwasan/hwasan_interface_internal.h
index 
ca57f0fe437bfdbc3d490c1978985fc3ab64d4c5..aedda317497b61349050511a3d244f480fae5ba2
 100644
--- a/libsanitizer/hwasan/hwasan_interface_internal.h
+++ b/libsanitizer/hwasan/hwasan_interface_internal.h
@@ -112,6 +112,10 @@ SANITIZER_INTERFACE_ATTRIBUTE
 void __hwasan_tag_mismatch(uptr addr, u8 ts);
 
 SANITIZER_INTERFACE_ATTRIBUTE
+void __hwasan_tag_mismatch4(uptr addr, uptr access_info, uptr *registers_frame,
+size_t outsize);
+
+SANITIZER_INTERFACE_ATTRIBUTE
 u8 __hwasan_generate_tag();
 
 // Returns the offset of the first tag mismatch or -1 if the whole

[PATCH 10/X] [libsanitizer] Tie the hwasan library into our build system

2019-11-05 Thread Matthew Malcomson
This patch does tries to tie libhwasan into the GCC build system in the
same way that the other sanitizer runtime libraries are handled.

libsanitizer/ChangeLog:

2019-11-05  Matthew Malcomson  

* Makefile.am:  Build libhwasan.
* Makefile.in:  Build libhwasan.
* asan/Makefile.in:  Build libhwasan.
* configure:  Build libhwasan.
* configure.ac:  Build libhwasan.
* hwasan/Makefile.am: New file.
* hwasan/Makefile.in: New file.
* hwasan/libtool-version: New file.
* interception/Makefile.in: Build libhwasan.
* libbacktrace/Makefile.in: Build libhwasan.
* libsanitizer.spec.in: Build libhwasan.
* lsan/Makefile.in: Build libhwasan.
* merge.sh: Build libhwasan.
* sanitizer_common/Makefile.in: Build libhwasan.
* tsan/Makefile.in: Build libhwasan.
* ubsan/Makefile.in: Build libhwasan.



### Attachment also inlined for ease of reply###


diff --git a/libsanitizer/Makefile.am b/libsanitizer/Makefile.am
index 
65ed1e712378ef453f820f86c4d3221f9dee5f2c..2a7e8e1debe838719db0f0fad218b2543cc3111b
 100644
--- a/libsanitizer/Makefile.am
+++ b/libsanitizer/Makefile.am
@@ -14,11 +14,12 @@ endif
 if LIBBACKTRACE_SUPPORTED
 SUBDIRS += libbacktrace
 endif
-SUBDIRS += lsan asan ubsan
+SUBDIRS += lsan asan ubsan hwasan
 nodist_saninclude_HEADERS += \
   include/sanitizer/lsan_interface.h \
   include/sanitizer/asan_interface.h \
-  include/sanitizer/tsan_interface.h
+  include/sanitizer/tsan_interface.h \
+  include/sanitizer/hwasan_interface.h
 if TSAN_SUPPORTED
 SUBDIRS += tsan
 endif
diff --git a/libsanitizer/Makefile.in b/libsanitizer/Makefile.in
index 
0d789b3a59d21ea2e5a23057ca3afe15425feec4..404ddcedde5428e0bc6d8ebc5f6568f99741ce2a
 100644
--- a/libsanitizer/Makefile.in
+++ b/libsanitizer/Makefile.in
@@ -92,7 +92,8 @@ target_triplet = @target@
 @SANITIZER_SUPPORTED_TRUE@am__append_1 = 
include/sanitizer/common_interface_defs.h \
 @SANITIZER_SUPPORTED_TRUE@ include/sanitizer/lsan_interface.h \
 @SANITIZER_SUPPORTED_TRUE@ include/sanitizer/asan_interface.h \
-@SANITIZER_SUPPORTED_TRUE@ include/sanitizer/tsan_interface.h
+@SANITIZER_SUPPORTED_TRUE@ include/sanitizer/tsan_interface.h \
+@SANITIZER_SUPPORTED_TRUE@ include/sanitizer/hwasan_interface.h
 @SANITIZER_SUPPORTED_TRUE@@USING_MAC_INTERPOSE_FALSE@am__append_2 = 
interception
 @LIBBACKTRACE_SUPPORTED_TRUE@@SANITIZER_SUPPORTED_TRUE@am__append_3 = 
libbacktrace
 @SANITIZER_SUPPORTED_TRUE@@TSAN_SUPPORTED_TRUE@am__append_4 = tsan
@@ -206,7 +207,7 @@ ETAGS = etags
 CTAGS = ctags
 CSCOPE = cscope
 DIST_SUBDIRS = sanitizer_common interception libbacktrace lsan asan \
-   ubsan tsan
+   ubsan hwasan tsan
 ACLOCAL = @ACLOCAL@
 ALLOC_FILE = @ALLOC_FILE@
 AMTAR = @AMTAR@
@@ -328,6 +329,7 @@ install_sh = @install_sh@
 libdir = @libdir@
 libexecdir = @libexecdir@
 link_libasan = @link_libasan@
+link_libhwasan = @link_libhwasan@
 link_liblsan = @link_liblsan@
 link_libtsan = @link_libtsan@
 link_libubsan = @link_libubsan@
@@ -341,6 +343,7 @@ pdfdir = @pdfdir@
 prefix = @prefix@
 program_transform_name = @program_transform_name@
 psdir = @psdir@
+runstatedir = @runstatedir@
 sbindir = @sbindir@
 sharedstatedir = @sharedstatedir@
 srcdir = @srcdir@
@@ -361,7 +364,7 @@ sanincludedir = 
$(libdir)/gcc/$(target_alias)/$(gcc_version)/include/sanitizer
 nodist_saninclude_HEADERS = $(am__append_1)
 @SANITIZER_SUPPORTED_TRUE@SUBDIRS = sanitizer_common $(am__append_2) \
 @SANITIZER_SUPPORTED_TRUE@ $(am__append_3) lsan asan ubsan \
-@SANITIZER_SUPPORTED_TRUE@ $(am__append_4)
+@SANITIZER_SUPPORTED_TRUE@ hwasan $(am__append_4)
 gcc_version := $(shell @get_gcc_base_ver@ $(top_srcdir)/../gcc/BASE-VER)
 
 # Work around what appears to be a GNU make bug handling MAKEFLAGS
diff --git a/libsanitizer/asan/Makefile.in b/libsanitizer/asan/Makefile.in
index 
00b6082da5372efd679ddc230f588bbc58161ef6..5ce6944a526418e484ee5925d2573248e168a736
 100644
--- a/libsanitizer/asan/Makefile.in
+++ b/libsanitizer/asan/Makefile.in
@@ -382,6 +382,7 @@ install_sh = @install_sh@
 libdir = @libdir@
 libexecdir = @libexecdir@
 link_libasan = @link_libasan@
+link_libhwasan = @link_libhwasan@
 link_liblsan = @link_liblsan@
 link_libtsan = @link_libtsan@
 link_libubsan = @link_libubsan@
@@ -395,6 +396,7 @@ pdfdir = @pdfdir@
 prefix = @prefix@
 program_transform_name = @program_transform_name@
 psdir = @psdir@
+runstatedir = @runstatedir@
 sbindir = @sbindir@
 sharedstatedir = @sharedstatedir@
 srcdir = @srcdir@
diff --git a/libsanitizer/configure b/libsanitizer/configure
index 
2d25147adbaaa3578388cf75763e6178302aa3dc..967efdb1f6f8ed30089ff73efc4e349a8bf9c1fc
 100755
--- a/libsanitizer/configure
+++ b/libsanitizer/configure
@@ -657,6 +657,7 @@ USING_MAC_INTERPOSE_TRUE
 link_liblsan
 link_libubsan
 link_libtsan
+link_libhwasan
 link_libasan
 LSAN_SUPPORTED_FALSE
 LSAN_SUPPORTED_TRUE
@@ -779,6 +780,7 @@ infodir
 docdir
 oldincludedir
 in

[PATCH 12/X] [libsanitizer] Add option to bootstrap using HWASAN

2019-11-05 Thread Matthew Malcomson
This is an analogous option to --bootstrap-asan to configure.  It allows
bootstrapping GCC using HWASAN.

For the same reasons as for ASAN we have to avoid using the HWASAN
sanitizer when compiling libiberty and the lto-plugin.

Also add a function to query whether -fsanitize=hwaddress has been
passed.

ChangeLog:

2019-08-29  Matthew Malcomson  

* configure: Regenerate.
* configure.ac: Add --bootstrap-hwasan option.

config/ChangeLog:

2019-11-05  Matthew Malcomson  

* bootstrap-hwasan.mk: New file.

libiberty/ChangeLog:

2019-11-05  Matthew Malcomson  

* configure: Regenerate.
* configure.ac: Avoid using sanitizer.

lto-plugin/ChangeLog:

2019-11-05  Matthew Malcomson  

* Makefile.am: Avoid using sanitizer.
* Makefile.in: Regenerate.



### Attachment also inlined for ease of reply###


diff --git a/config/bootstrap-hwasan.mk b/config/bootstrap-hwasan.mk
new file mode 100644
index 
..4f60bed3fd6e98b47a3a38aea6eba2a7c320da25
--- /dev/null
+++ b/config/bootstrap-hwasan.mk
@@ -0,0 +1,8 @@
+# This option enables -fsanitize=hwaddress for stage2 and stage3.
+
+STAGE2_CFLAGS += -fsanitize=hwaddress
+STAGE3_CFLAGS += -fsanitize=hwaddress
+POSTSTAGE1_LDFLAGS += -fsanitize=hwaddress -static-libhwasan \
+ -B$$r/prev-$(TARGET_SUBDIR)/libsanitizer/ \
+ -B$$r/prev-$(TARGET_SUBDIR)/libsanitizer/hwasan/ \
+ -B$$r/prev-$(TARGET_SUBDIR)/libsanitizer/hwasan/.libs
diff --git a/configure b/configure
index 
aec9186b2b0123d3088b69eb1ee541567654953e..c1a2b7ed25b6b4f8edfd44ed9049270b7eec2317
 100755
--- a/configure
+++ b/configure
@@ -754,6 +754,7 @@ infodir
 docdir
 oldincludedir
 includedir
+runstatedir
 localstatedir
 sharedstatedir
 sysconfdir
@@ -920,6 +921,7 @@ datadir='${datarootdir}'
 sysconfdir='${prefix}/etc'
 sharedstatedir='${prefix}/com'
 localstatedir='${prefix}/var'
+runstatedir='${localstatedir}/run'
 includedir='${prefix}/include'
 oldincludedir='/usr/include'
 docdir='${datarootdir}/doc/${PACKAGE}'
@@ -1172,6 +1174,15 @@ do
   | -silent | --silent | --silen | --sile | --sil)
 silent=yes ;;
 
+  -runstatedir | --runstatedir | --runstatedi | --runstated \
+  | --runstate | --runstat | --runsta | --runst | --runs \
+  | --run | --ru | --r)
+ac_prev=runstatedir ;;
+  -runstatedir=* | --runstatedir=* | --runstatedi=* | --runstated=* \
+  | --runstate=* | --runstat=* | --runsta=* | --runst=* | --runs=* \
+  | --run=* | --ru=* | --r=*)
+runstatedir=$ac_optarg ;;
+
   -sbindir | --sbindir | --sbindi | --sbind | --sbin | --sbi | --sb)
 ac_prev=sbindir ;;
   -sbindir=* | --sbindir=* | --sbindi=* | --sbind=* | --sbin=* \
@@ -1309,7 +1320,7 @@ fi
 for ac_var in  exec_prefix prefix bindir sbindir libexecdir datarootdir \
datadir sysconfdir sharedstatedir localstatedir includedir \
oldincludedir docdir infodir htmldir dvidir pdfdir psdir \
-   libdir localedir mandir
+   libdir localedir mandir runstatedir
 do
   eval ac_val=\$$ac_var
   # Remove trailing slashes.
@@ -1469,6 +1480,7 @@ Fine tuning of the installation directories:
   --sysconfdir=DIRread-only single-machine data [PREFIX/etc]
   --sharedstatedir=DIRmodifiable architecture-independent data [PREFIX/com]
   --localstatedir=DIR modifiable single-machine data [PREFIX/var]
+  --runstatedir=DIR   modifiable per-process data [LOCALSTATEDIR/run]
   --libdir=DIRobject code libraries [EPREFIX/lib]
   --includedir=DIRC header files [PREFIX/include]
   --oldincludedir=DIR C header files for non-gcc [/usr/include]
@@ -7270,7 +7282,7 @@ fi
 # or bootstrap-ubsan, bootstrap it.
 if echo " ${target_configdirs} " | grep " libsanitizer " > /dev/null 2>&1; then
   case "$BUILD_CONFIG" in
-*bootstrap-asan* | *bootstrap-ubsan* )
+*bootstrap-hwasan* | *bootstrap-asan* | *bootstrap-ubsan* )
   bootstrap_target_libs=${bootstrap_target_libs}target-libsanitizer,
   bootstrap_fixincludes=yes
   ;;
diff --git a/configure.ac b/configure.ac
index 
b8ce2ad20b9d03e42731252a9ec2a8417c13e566..16bfdf164555dad94c789f17b6a63ba1a2e3e9f4
 100644
--- a/configure.ac
+++ b/configure.ac
@@ -2775,7 +2775,7 @@ fi
 # or bootstrap-ubsan, bootstrap it.
 if echo " ${target_configdirs} " | grep " libsanitizer " > /dev/null 2>&1; then
   case "$BUILD_CONFIG" in
-*bootstrap-asan* | *bootstrap-ubsan* )
+*bootstrap-hwasan* | *bootstrap-asan* | *bootstrap-ubsan* )
   bootstrap_target_libs=${bootstrap_target_libs}target-libsanitizer,
   bootstrap_fixincludes=yes
   ;;
diff --git a/libiberty/configure b/libiberty/configure
index 
7a34dabec32b0b383bd33f07811757335f4dd39c..cb2dd4ff5295598343cc18b3a79a86a778f2261d
 100755
--- a/libiberty/configure
+++ b/libiberty/configure
@@ -5261,6 +5261,7 @@ fi
 NOASANFLAG=
 case " ${CFLAGS} " in
   *\ -fsanitize=address\ *) NOASANFLAG=-fno-sa

[PATCH 11/X] [libsanitizer] Only build libhwasan when targeting AArch64

2019-11-05 Thread Matthew Malcomson
Though the library has limited support for x86, we don't have any
support for generating code targeting x86 so there is no point building
for that target.

libsanitizer/ChangeLog:

2019-11-05  Matthew Malcomson  

* Makefile.am: Condition building hwasan directory.
* Makefile.in: Regenerate.
* configure: Regenerate.
* configure.ac: Set HWASAN_SUPPORTED based on target
architecture.
* configure.tgt: Likewise.



### Attachment also inlined for ease of reply###


diff --git a/libsanitizer/Makefile.am b/libsanitizer/Makefile.am
index 
2a7e8e1debe838719db0f0fad218b2543cc3111b..065a65e78d49f7689a01ecb64db1f07ca83aa987
 100644
--- a/libsanitizer/Makefile.am
+++ b/libsanitizer/Makefile.am
@@ -14,7 +14,7 @@ endif
 if LIBBACKTRACE_SUPPORTED
 SUBDIRS += libbacktrace
 endif
-SUBDIRS += lsan asan ubsan hwasan
+SUBDIRS += lsan asan ubsan
 nodist_saninclude_HEADERS += \
   include/sanitizer/lsan_interface.h \
   include/sanitizer/asan_interface.h \
@@ -23,6 +23,9 @@ nodist_saninclude_HEADERS += \
 if TSAN_SUPPORTED
 SUBDIRS += tsan
 endif
+if HWASAN_SUPPORTED
+SUBDIRS += hwasan
+endif
 endif
 
 ## May be used by toolexeclibdir.
diff --git a/libsanitizer/Makefile.in b/libsanitizer/Makefile.in
index 
404ddcedde5428e0bc6d8ebc5f6568f99741ce2a..3883c7d934884146763d2d751a7e88bdf31341fe
 100644
--- a/libsanitizer/Makefile.in
+++ b/libsanitizer/Makefile.in
@@ -97,6 +97,7 @@ target_triplet = @target@
 @SANITIZER_SUPPORTED_TRUE@@USING_MAC_INTERPOSE_FALSE@am__append_2 = 
interception
 @LIBBACKTRACE_SUPPORTED_TRUE@@SANITIZER_SUPPORTED_TRUE@am__append_3 = 
libbacktrace
 @SANITIZER_SUPPORTED_TRUE@@TSAN_SUPPORTED_TRUE@am__append_4 = tsan
+@HWASAN_SUPPORTED_TRUE@@SANITIZER_SUPPORTED_TRUE@am__append_5 = hwasan
 subdir = .
 ACLOCAL_M4 = $(top_srcdir)/aclocal.m4
 am__aclocal_m4_deps = $(top_srcdir)/../config/acx.m4 \
@@ -207,7 +208,7 @@ ETAGS = etags
 CTAGS = ctags
 CSCOPE = cscope
 DIST_SUBDIRS = sanitizer_common interception libbacktrace lsan asan \
-   ubsan hwasan tsan
+   ubsan tsan hwasan
 ACLOCAL = @ACLOCAL@
 ALLOC_FILE = @ALLOC_FILE@
 AMTAR = @AMTAR@
@@ -364,7 +365,7 @@ sanincludedir = 
$(libdir)/gcc/$(target_alias)/$(gcc_version)/include/sanitizer
 nodist_saninclude_HEADERS = $(am__append_1)
 @SANITIZER_SUPPORTED_TRUE@SUBDIRS = sanitizer_common $(am__append_2) \
 @SANITIZER_SUPPORTED_TRUE@ $(am__append_3) lsan asan ubsan \
-@SANITIZER_SUPPORTED_TRUE@ hwasan $(am__append_4)
+@SANITIZER_SUPPORTED_TRUE@ $(am__append_4) $(am__append_5)
 gcc_version := $(shell @get_gcc_base_ver@ $(top_srcdir)/../gcc/BASE-VER)
 
 # Work around what appears to be a GNU make bug handling MAKEFLAGS
diff --git a/libsanitizer/configure b/libsanitizer/configure
index 
967efdb1f6f8ed30089ff73efc4e349a8bf9c1fc..f44fdb9485e7e00c2f3f98fac3f0e93a3ca6abb4
 100755
--- a/libsanitizer/configure
+++ b/libsanitizer/configure
@@ -659,6 +659,8 @@ link_libubsan
 link_libtsan
 link_libhwasan
 link_libasan
+HWASAN_SUPPORTED_FALSE
+HWASAN_SUPPORTED_TRUE
 LSAN_SUPPORTED_FALSE
 LSAN_SUPPORTED_TRUE
 TSAN_SUPPORTED_FALSE
@@ -12347,7 +12349,7 @@ else
   lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2
   lt_status=$lt_dlunknown
   cat > conftest.$ac_ext <<_LT_EOF
-#line 12350 "configure"
+#line 12352 "configure"
 #include "confdefs.h"
 
 #if HAVE_DLFCN_H
@@ -12453,7 +12455,7 @@ else
   lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2
   lt_status=$lt_dlunknown
   cat > conftest.$ac_ext <<_LT_EOF
-#line 12456 "configure"
+#line 12458 "configure"
 #include "confdefs.h"
 
 #if HAVE_DLFCN_H
@@ -15804,6 +15806,7 @@ fi
 # Get target configury.
 unset TSAN_SUPPORTED
 unset LSAN_SUPPORTED
+unset HWASAN_SUPPORTED
 . ${srcdir}/configure.tgt
  if test "x$TSAN_SUPPORTED" = "xyes"; then
   TSAN_SUPPORTED_TRUE=
@@ -15821,6 +15824,14 @@ else
   LSAN_SUPPORTED_FALSE=
 fi
 
+ if test "x$HWASAN_SUPPORTED" = "xyes"; then
+  HWASAN_SUPPORTED_TRUE=
+  HWASAN_SUPPORTED_FALSE='#'
+else
+  HWASAN_SUPPORTED_TRUE='#'
+  HWASAN_SUPPORTED_FALSE=
+fi
+
 
 # Check for functions needed.
 for ac_func in clock_getres clock_gettime clock_settime lstat readlink
@@ -16803,7 +16814,7 @@ ac_config_files="$ac_config_files Makefile 
libsanitizer.spec libbacktrace/backtr
 ac_config_headers="$ac_config_headers config.h"
 
 
-ac_config_files="$ac_config_files interception/Makefile 
sanitizer_common/Makefile libbacktrace/Makefile lsan/Makefile asan/Makefile 
hwasan/Makefile ubsan/Makefile"
+ac_config_files="$ac_config_files interception/Makefile 
sanitizer_common/Makefile libbacktrace/Makefile lsan/Makefile asan/Makefile 
ubsan/Makefile"
 
 
 if test "x$TSAN_SUPPORTED" = "xyes"; then
@@ -16811,6 +16822,11 @@ if test "x$TSAN_SUPPORTED" = "xyes"; then
 
 fi
 
+if test "x$HWASAN_SUPPORTED" = "xyes"; then
+  ac_config_files="$ac_config_files hwasan/Makefile"
+
+fi
+
 
 
 
@@ -17071,6 +17087,10 @@ if test -z "${LSAN_SUPPORTED_TRUE}" && test -z 
"${LSAN_SUPPORTED_FALSE}"; then
   as_fn_error $? "conditional \"LSAN_SUPPORTE

[PATCH 13/X] [libsanitizer][options] Add hwasan flags and argument parsing

2019-11-05 Thread Matthew Malcomson
These flags can't be used at the same time as any of the other
sanitizers.
We add an equivalent flag to -static-libasan in -static-libhwasan to
ensure static linking.

The -fsanitize=kernel-hwaddress option is for compiling targeting the
kernel.  This flag has defaults that allow compiling KASAN with tags as
it is currently implemented.
These defaults are that we do not sanitize variables on the stack and
always recover from a detected bug.
Stack tagging in the kernel is a future aim, stack instrumentation has
not yet been enabled for the kernel for clang either
(https://lists.infradead.org/pipermail/linux-arm-kernel/2019-October/687121.html).

We introduce a backend hook `targetm.memtag.can_tag_addresses` that
indicates to the mid-end whether a target has a feature like AArch64 TBI
where the top byte of an address is ignored.
Without this feature hwasan sanitization is not done.

NOTE:
--
I have defined a new macro of __SANITIZE_HWADDRESS__ that gets
automatically defined when compiling with hwasan.  This is analogous to
__SANITIZE_ADDRESS__ which is defined when compiling with asan.

Users in the kernel have expressed an interest in using
__SANITIZE_ADDRESS__ for both
(https://lists.infradead.org/pipermail/linux-arm-kernel/2019-October/690703.html).

One approach to do this could be to define __SANITIZE_ADDRESS__ with
different values depending on whether we are compiling with hwasan or
asan.

Using __SANITIZE_ADDRESS__ for both means that code like the kernel
which wants to treat the two sanitizers as alternate implementations of
the same thing gets that automatically.

My preference is to use __SANITIZE_HWADDRESS__ since that means any
existing code will not be predicated on this (and hence I guess less
surprises), but would appreciate feedback on this given the point above.
--

gcc/ChangeLog:

2019-11-05  Matthew Malcomson  

* asan.c (memory_tagging_p): New.
* asan.h (memory_tagging_p): New.
* common.opt (flag_sanitize_recover): Default for kernel
hwaddress.
(static-libhwasan): New cli option.
* config/aarch64/aarch64.c (aarch64_can_tag_addresses): New.
(TARGET_MEMTAG_CAN_TAG_ADDRESSES): New.
* config/gnu-user.h (LIBHWASAN_EARLY_SPEC): hwasan equivalent of
asan command line flags.
* cppbuiltin.c (define_builtin_macros_for_compilation_flags):
Add hwasan equivalent of __SANITIZE_ADDRESS__.
* doc/tm.texi: Document new hook.
* doc/tm.texi.in: Document new hook.
* flag-types.h (enum sanitize_code): New sanitizer values.
* gcc.c (STATIC_LIBHWASAN_LIBS): New macro.
(LIBHWASAN_SPEC): New macro.
(LIBHWASAN_EARLY_SPEC): New macro.
(SANITIZER_EARLY_SPEC): Update to include hwasan.
(SANITIZER_SPEC): Update to include hwasan.
(sanitize_spec_function): Use hwasan options.
* opts.c (finish_options): Describe conflicts between address
sanitizers.
(sanitizer_opts): Introduce new sanitizer flags.
(common_handle_option): Add defaults for kernel sanitizer.
* params.def (PARAM_HWASAN_RANDOM_FRAME_TAG): New.
(PARAM_HWASAN_STACK): New.
* params.h (HWASAN_STACK): New.
(HWASAN_RANDOM_FRAME_TAG): New.
* target.def (HOOK_PREFIX): Add new hook.
* targhooks.c (default_memtag_can_tag_addresses): New.
* toplev.c (process_options): Ensure hwasan only on TBI
architectures.

gcc/c-family/ChangeLog:

2019-11-05  Matthew Malcomson  

* c-attribs.c (handle_no_sanitize_hwaddress_attribute): New
attribute.



### Attachment also inlined for ease of reply###


diff --git a/gcc/c-family/c-attribs.c b/gcc/c-family/c-attribs.c
index 
6500b998321419a1d8d57062534206c5909adb7a..2de94815f91da5a0fd06c30d0044f866084121b8
 100644
--- a/gcc/c-family/c-attribs.c
+++ b/gcc/c-family/c-attribs.c
@@ -54,6 +54,8 @@ static tree handle_cold_attribute (tree *, tree, tree, int, 
bool *);
 static tree handle_no_sanitize_attribute (tree *, tree, tree, int, bool *);
 static tree handle_no_sanitize_address_attribute (tree *, tree, tree,
  int, bool *);
+static tree handle_no_sanitize_hwaddress_attribute (tree *, tree, tree,
+   int, bool *);
 static tree handle_no_sanitize_thread_attribute (tree *, tree, tree,
 int, bool *);
 static tree handle_no_address_safety_analysis_attribute (tree *, tree, tree,
@@ -410,6 +412,8 @@ const struct attribute_spec c_common_attribute_table[] =
  handle_no_sanitize_attribute, NULL },
   { "no_sanitize_address",0, 0, true, false, false, false,
  handle_no_sanitize_address_attribute, NULL },
+  { "no_sanitize_hwaddress",0, 0, true, false, false, false,
+ handle_no_sanitize_hwaddress_attribute, NU

[PATCH 15/X] [libsanitizer] Add hwasan pass and associated gimple changes

2019-11-05 Thread Matthew Malcomson
There are four main features to this change:

1) Check pointer tags match address tags.

In the new `hwasan` pass we put HWASAN_CHECK internal functions around
all memory accesses, to check that tags in the pointer being used match
the tag stored in shadow memory for the memory region being used.

These internal functions are expanded into actual checks in the sanopt
pass that happens just before expansion into RTL.

We use the same mechanism that currently inserts ASAN_CHECK internal
functions to insert the new HWASAN_CHECK functions.

2) Instrument known builtin function calls.

Handle all builtin functions that we know use memory accesses.
This commit uses the machinery added for ASAN to identify builtin
functions that access memory.

The main differences between the approaches for HWASAN and ASAN are:
 - libhwasan intercepts much less builtin functions.
 - Alloca needs to be transformed differently (instead of adding
   redzones it needs to colour shadow memory and return a tagged
   pointer).
 - stack_restore needs to uncolour the shadow stack between the current
   position and where it's going.
 - `noreturn` functions can not be handled by simply unpoisoning the
   entire shadow stack -- there is no "always valid" colour.
   (exceptions and things such as longjmp need to be handled in a
   different way).

For hardware implemented checking (such as AArch64's memory tagging
extension) alloca and stack_restore will need to be handled by hooks in
the backend rather than transformation at the gimple level.  This will
allow architecture specific handling of such stack modifications.

3) Introduce HWASAN block-scope poisoning

Here we use exactly the same mechanism as ASAN_MARK to poison/unpoison
variables on entry/exit of a block.

In order to simply use the exact same machinery we're using the same
internal functions until the SANOPT pass.  This means that all handling
of ASAN_MARK is the same.
This has the negative that the naming may be a little confusing, but a
positive that handling of the internal function doesn't have to be
duplicated for a function that behaves exactly the same but has a
different name.

gcc/ChangeLog:

2019-11-05  Matthew Malcomson  

* asan.c (handle_builtin_stack_restore): Account for HWASAN.
(handle_builtin_alloca): Account for HWASAN.
(get_mem_refs_of_builtin_call): Special case strlen for HWASAN.
(report_error_func): Assert not HWASAN.
(build_check_stmt): Make HWASAN_CHECK instead of ASAN_CHECK.
(instrument_derefs): HWASAN does not tag globals.
(maybe_instrument_call): Don't instrument `noreturn` functions.
(initialize_sanitizer_builtins): Add new type.
(asan_expand_mark_ifn): Account for HWASAN.
(asan_expand_check_ifn): Assert never called by HWASAN.
(asan_expand_poison_ifn): Account for HWASAN.
(hwasan_instrument): New.
(hwasan_base): New.
(hwasan_emit_uncolour_frame): Free block-scope-var hash map.
(hwasan_check_func): New.
(hwasan_expand_check_ifn): New.
(hwasan_expand_mark_ifn): New.
(gate_hwasan): New.
(class pass_hwasan): New.
(make_pass_hwasan): New.
(class pass_hwasan_O0): New.
(make_pass_hwasan_O0): New.
* asan.h (hwasan_base): New decl.
(hwasan_expand_check_ifn): New decl.
(hwasan_expand_mark_ifn): New decl.
(gate_hwasan): New decl.
(enum hwasan_mark_flags): New.
(asan_intercepted_p): Always false for hwasan.
(asan_sanitize_use_after_scope): Account for HWASAN.
* builtin-types.def (BT_FN_PTR_CONST_PTR_UINT8): New.
* gimple-pretty-print.c (dump_gimple_call_args): Account for
HWASAN.
* gimplify.c (asan_poison_variable): Account for HWASAN.
(gimplify_function_tree): Remove requirement of
SANITIZE_ADDRESS, requiring asan or hwasan is accounted for in
`asan_sanitize_use_after_scope`.
* internal-fn.c (expand_HWASAN_CHECK): New.
(expand_HWASAN_CHOOSE_COLOUR): New.
(expand_HWASAN_MARK): New.
* internal-fn.def (HWASAN_CHOOSE_COLOUR): New.
(HWASAN_CHECK): New.
(HWASAN_MARK): New.
* passes.def: Add hwasan and hwasan_O0 passes.
* sanitizer.def (BUILT_IN_HWASAN_LOAD1): New.
(BUILT_IN_HWASAN_LOAD2): New.
(BUILT_IN_HWASAN_LOAD4): New.
(BUILT_IN_HWASAN_LOAD8): New.
(BUILT_IN_HWASAN_LOAD16): New.
(BUILT_IN_HWASAN_LOADN): New.
(BUILT_IN_HWASAN_STORE1): New.
(BUILT_IN_HWASAN_STORE2): New.
(BUILT_IN_HWASAN_STORE4): New.
(BUILT_IN_HWASAN_STORE8): New.
(BUILT_IN_HWASAN_STORE16): New.
(BUILT_IN_HWASAN_STOREN): New.
(BUILT_IN_HWASAN_LOAD1_NOABORT): New.
(BUILT_IN_HWASAN_LOAD2_NOABORT): New.
(BUILT_IN_HWASAN_LOAD4_NOABORT): New.
(BUILT_IN_HWASAN_LOAD8_NOABORT): New.
(BUILT_IN_HWASAN_LOAD16_NOABORT): New.
(BUI

[PATCH 14/X] [libsanitizer][mid-end] Introduce stack variable handling for HWASAN

2019-11-05 Thread Matthew Malcomson
Handling stack variables has three features.

1) Ensure HWASAN required alignment for stack variables

When colouring shadow memory, we need to ensure that each tag granule
is only used by one variable at a time.

This is done by ensuring that each coloured variable is aligned to the
tag granule representation size and also ensure that the end of each
variable as an alignment boundary between the end and the start of any
other data stored on the stack.

This patch ensures that by adding alignment requirements in
`align_local_variable` and forcing all stack variable allocation to be
deferred so that `expand_stack_vars` can ensure the stack pointer is
aligned before allocating any variable for the current frame.

2) Put tags into each stack variable pointer

Make sure that every pointer to a stack variable includes a tag of some
sort on it.

The way tagging works is:
  1) For every new stack frame, a random tag is generated.
  2) A base register is formed from the stack pointer value and this
 random tag.
  3) References to stack variables are now formed with RTL describing an
 offset from this base in both tag and value.

The random tag generation is handled by a backend hook.  This hook
decides whether to introduce a random tag or use the stack background
based on the parameter hwasan-random-frame-tag.  Using the stack
background is necessary for testing and bootstrap.  It is necessary
during bootstrap to avoid breaking the `configure` test program for
determining stack direction.

Using the stack background means that every stack frame has the initial
colour of zero and variables are coloured with incrementing tags from 1,
which also makes debugging a bit easier.

The tag&value offsets are also handled by a backend hook.

This patch also adds some macros defining how the HWASAN shadow memory
is stored and how a tag is stored in a pointer.

3) For each stack variable, colour and uncolour the shadow stack on
   function prologue and epilogue.

On entry to each function we colour the relevant shadow stack region for
each stack variable the colour to match the tag added to each pointer
for that variable.

This is the first patch where we use the HWASAN shadow space, so we need
to add in the libhwasan initialisation code that creates this shadow
memory region into the binary we produce.  This instrumentation is done
in `compile_file`.

When exiting a function we need to ensure the shadow stack for this
function has no remaining colour.  Without clearing the shadow stack
area for this stack frame, later function calls could get false
positives when those later function calls check untagged areas (such as
parameters passed on the stack) against a shadow stack area with
left-over colour.

Hence we ensure that the entire stack frame is cleared on function exit.

gcc/ChangeLog:

2019-11-05  Matthew Malcomson  

* asan.c (hwasan_record_base): New function.
(hwasan_emit_uncolour_frame): New.
(hwasan_increment_tag): New function.
(hwasan_with_tag): New function.
(hwasan_tag_init): New function.
(initialize_sanitizer_builtins): Define new builtins.
(ATTR_NOTHROW_LIST): New macro.
(hwasan_current_tag): New.
(hwasan_emit_prologue): New.
(hwasan_create_untagged_base): New.
(hwasan_finish_file): New.
(hwasan_sanitize_stack_p): New.
(memory_tagging_p): New.
* asan.h (hwasan_record_base): New declaration.
(hwasan_emit_uncolour_frame): New.
(hwasan_increment_tag): New declaration.
(hwasan_with_tag): New declaration.
(hwasan_sanitize_stack_p): New declaration.
(hwasan_tag_init): New declaration.
(memory_tagging_p): New declaration.
(HWASAN_TAG_SIZE): New macro.
(HWASAN_TAG_GRANULE_SIZE):New macro.
(HWASAN_SHIFT):New macro.
(HWASAN_SHIFT_RTX):New macro.
(HWASAN_STACK_BACKGROUND):New macro.
(hwasan_finish_file): New.
(hwasan_current_tag): New.
(hwasan_create_untagged_base): New.
(hwasan_emit_prologue): New.
* cfgexpand.c (struct stack_vars_data): Add information to
record hwasan variable stack offsets.
(expand_stack_vars): Ensure variables are offset from a tagged
base. Record offsets for hwasan. Ensure alignment.
(expand_used_vars): Call function to emit prologue, and get
uncolouring instructions for function exit.
(align_local_variable): Ensure alignment.
(defer_stack_allocation): Ensure all variables are deferred so
they can be handled by `expand_stack_vars`.
(expand_one_stack_var_at): Account for tags in
variables when using HWASAN.
(expand_one_stack_var_1): Pass new argument to
expand_one_stack_var_at.
(init_vars_expansion): Initialise hwasan internal variables when
starting variable expansion.
* doc/tm.texi (TARGET_MEMTAG_GENTAG): Document.
* doc/tm.texi

[PATCH 17/X] [libsanitizer] Add hwasan Exception handling

2019-11-05 Thread Matthew Malcomson
When tagging the stack we need to ensure that any stack frames are
untagged during unwinding of any sort.  If this is not done, then
functions called later which re-use the stack can observe tag mismatches
on accesses to memory they have not tagged but that has been tagged
previously.

This applies equally to C++ exceptions as it does to longjmp and normal
function return.
The approach that LLVM has taken to account for C++ exceptions, is to
add a new personality routine on every function.  This new personality
routine calls the original personality routine, then untags the stack of
that function.

In order to untag the stack of the function currently being unwound, the
new personality wrapper needs to know the start and end of the current
stack frame.  The current implementation in libhwasan uses the frame
pointer to find the start of the stack frame.
https://github.com/llvm-mirror/compiler-rt/blob/69445f095c22aac2388f939bedebf224a6efcdaf/lib/hwasan/hwasan_exceptions.cpp#L58
This does not work for GCC, where the frame pointer is usually the same
as the stack pointer.

This patch demonstrates how adding C++ exception handling into GCC might
work, but currently does not work.  I intend to work on getting this
functionality, but do not consider it a blocker.

The current implementation ensures every function has a wrapped
personality function by modifying `get_personality_function` to check
for if we're handling exceptions and sanitizing the code with hwasan.
If so it returns a specially generated personality function that calls
`__hwasan_personality_wrapper` in libhwasan.


I've been testing the compiler instrumentation with a hack in libhwasan shown
below and things are mostly working, but I don't feel it's a good way forward.

I was wondering -- does anyone have any better ideas to keep this personality
method working for gcc?
I've also included the people that worked on exception handling in LLVM.  I
figure you may have tried other things before and hence have a good idea of
the pitfalls in this area.

My main aim is to get software tagging working for the kernel, so to me
exception handling is not a deal-breaker, but I would really like to get this
feature working.


Patch to libhwasan that I've been testing the instrumentation with.
##
diff --git a/libsanitizer/hwasan/hwasan_exceptions.cpp 
b/libsanitizer/hwasan/hwasan_exceptions.cpp
index 169e787..faec2e8 100644
--- a/libsanitizer/hwasan/hwasan_exceptions.cpp
+++ b/libsanitizer/hwasan/hwasan_exceptions.cpp
@@ -52,14 +52,16 @@ __hwasan_personality_wrapper(int version, _Unwind_Action 
actions,
   // Here we assume that the frame record appears after any locals. This is not
   // required by AAPCS but is a requirement for HWASAN instrumented functions.
   if ((actions & _UA_CLEANUP_PHASE) && rc == _URC_CONTINUE_UNWIND) {
+uptr sp = get_cfa(context);
 #if defined(__x86_64__)
 uptr fp = get_gr(context, 6); // rbp
 #elif defined(__aarch64__)
-uptr fp = get_gr(context, 29); // x29
+uptr fp = *(uptr *)sp;
+if (fp == 0)
+  return rc;
 #else
 #error Unsupported architecture
 #endif
-uptr sp = get_cfa(context);
 TagMemory(sp, fp - sp, 0);
   }
##



gcc/ChangeLog:

2019-11-05  Matthew Malcomson  

* asan.c (hwasan_create_personality_thunk): New.
* asan.h (hwasan_create_personality_thunk): New.
* expr.c (get_personality_function): Add special function if
using hwasan.



### Attachment also inlined for ease of reply###


diff --git a/gcc/asan.h b/gcc/asan.h
index 
c27d679c117a9cc7b15b7b4c6710cf0b46050089..ff6adf2391ee1602a3c15755312a04f82d6369ce
 100644
--- a/gcc/asan.h
+++ b/gcc/asan.h
@@ -36,6 +36,7 @@ extern void hwasan_emit_prologue (rtx *, rtx *, poly_int64 *, 
uint8_t *, size_t)
 extern rtx_insn *hwasan_emit_uncolour_frame (rtx, rtx, rtx_insn *);
 extern bool hwasan_expand_check_ifn (gimple_stmt_iterator *, bool);
 extern bool hwasan_expand_mark_ifn (gimple_stmt_iterator *);
+extern tree hwasan_create_personality_thunk (tree);
 extern bool memory_tagging_p (void);
 extern bool hwasan_sanitize_stack_p (void);
 extern bool gate_hwasan (void);
diff --git a/gcc/asan.c b/gcc/asan.c
index 
edfbf2048b67d3dc7be78a8e9961152c4fb44902..ef7c90e3358c8fa880b8e4002996f27541c26953
 100644
--- a/gcc/asan.c
+++ b/gcc/asan.c
@@ -260,6 +260,10 @@ hash_set  *asan_used_labels = NULL;
 
 static uint8_t tag_offset = 0;
 static rtx hwasan_base_ptr = NULL_RTX;
+static hash_map , uintptr_t> >
+   *hwasan_wrapped_personalities = NULL;
+static tree hwasan_gr_decl = NULL_TREE;
+static tree hwasan_cfa_decl = NULL_TREE;
 
 /* Sets shadow offset to value in string VAL.  */
 
@@ -3951,6 +3955,220 @@ hwasan_tag_init ()
   tag_offset = HWASAN_STACK_BACKGROUND + 1;
 }
 
+tree
+hwasan_create_personality_thunk (tree orig_personality_decl)
+{
+  /* Only works with DWARF2 debugging.  */
+  /* Create a function called 
__hwasan_personality_thunk.
+ (should be bare __hwasan_persona

[PATCH 18/X] [libsanitizer] Add in MTE stubs

2019-11-05 Thread Matthew Malcomson
This patch in the series is just for demonstration, here we add stubs
where MTE would be implemented.

We also add a new flag to request memory tagging as a sanitizer option.
The new flag for memory tagging is `-fsanitize=memtag`, which is in line
with the flag clang uses to request memory tagging.

At the moment all implementations are dummies of some sort, the assembly
generated uses `mov` instead of `irg`, `add` instead of `addg`, and
`sub` instead of `subg`.  This should mean the binaries behave the same
as MTE binaries but for ignoring tags.

For a hardware implementation of memory tagging checks are done
automatically so adding HWASAN_CHECK is not needed.  This means that the
`hwasan` pass is not needed.
Similarly, much of the `sanopt` pass is not needed when compiling for
hardware memory tagging -- though there is still need for handling
HWASAN_MARK.

This patch gives backends extra control over how a tag is stored in a
pointer and how many real-memory bytes is represented by each byte in
the shadow space.

One final difference between memtag and hwasan is that memtag can't use
the ASAN_POISON optimisation.
This optimisation replaces accesses to a variable that has just been
poisoned with an internal function that will be used to report an error
without needing to check the access.

This provides no benefit for memtag since there tends to be no
instructions allowing a report of a memory fault outside of mis-tagging
some memory and attempting to access it.

The optimisation is hence disabled for memory tagging since it provides
no benefit and would require all backends that wanted this feature to
implement a similar dummy hook.

gcc/ChangeLog:

2019-11-05  Matthew Malcomson  

* asan.c (hwasan_tag_init): Choose initialisation value based on
memtag vs hwasan.
(memory_tagging_p): Check for either hwaddress or memtag.
(hwasan_emit_prologue): Account for memtag.
(hwasan_emit_uncolour_frame): Account for memtag.
(hwasan_finish_file): Assert not called for memtag.
(hwasan_expand_check_ifn): Assert not called for memtag.
(gate_hwasan): Don't run when have memtag.
* asan.h (HWASAN_TAG_SIZE): Use backend hook if memtag.
(HWASAN_TAG_GRANULE_SIZE): Use backend hook if memtag.
(HWASAN_SHIFT): New.
(HWASAN_SHIFT_RTX): New.
(HWASAN_TAG_SHIFT_SIZE): New.
* builtins.c (expand_builtin_alloca): Extra TODO comment.
(expand_stack_restore): Extra TODO comment.
* cfgexpand.c (expand_stack_vars): Only bother untagging bases
for hwasan.
* config/aarch64/aarch64.c (aarch64_classify_address): Account
for addtag unspec marker.
(aarch64_has_memtag_isa): New hook.
(aarch64_tag_memory): Add dummy hook.
(aarch64_gentag): Add dummy hook.
(aarch64_addtag): New hook.
(aarch64_addtag_force_operand): New hook.
(TARGET_MEMTAG_HAS_MEMORY_TAGGING): New.
(TARGET_MEMTAG_TAG): New.
(TARGET_MEMTAG_GENTAG): New.
(TARGET_MEMTAG_ADDTAG): New.
(TARGET_MEMTAG_ADDTAG_FORCE_OPERAND): New.
* config/aarch64/aarch64.h (AARCH64_ISA_MEMTAG): New macro.
* config/aarch64/aarch64.md (random_tag, plain_offset_tagdi):
New.
(unspec enum): Add GENTAG and ADDTAG markers.
* config/aarch64/predicates.md (aarch64_MTE_add_temp,
aarch64_MTE_tag_offset, aarch64_MTE_value_offset): New.
* doc/tm.texi: Document new hooks.
* doc/tm.texi.in: Document new hooks.
* flag-types.h (enum sanitize_code): Add MEMTAG enum.
* gcc.c (sanitize_spec_function): Account for MEMTAG option.
* internal-fn.c (expand_HWASAN_MARK): Account for memtag.
* opts.c (finish_options): Ensure MEMTAG conflicts with ASAN,
HWASAN, and THREAD.
(finish_options): Turn on stack tagging for memtag.
(sanitizer_opts): Add MEMTAG option.
* target.def (targetm.memtag.has_memory_tagging): New.
(targetm.memtag.tag_size): New.
(targetm.memtag.granule_size): New.
(targetm.memtag.copy_tag): New.
(targetm.memtag.tag): New.
* targhooks.c (default_memtag_has_memory_tagging): New.
(default_memtag_tag_size): New.
(default_memtag_granule_size): New.
(default_memtag_copy_tag): New.
* targhooks.h (default_memtag_tag_size): New decl.
(default_memtag_granule_size): New decl.
(default_memtag_copy_tag): New decl.
* tree-ssa.c (execute_update_addresses_taken): Avoid ASAN_POISON
optimisation for memtag.

gcc/testsuite/ChangeLog:

2019-11-05  Matthew Malcomson  

* gcc.dg/hwasan/poly-int-stack-vars.c: New test.



### Attachment also inlined for ease of reply###


diff --git a/gcc/asan.h b/gcc/asan.h
index 
ff6adf2391ee1602a3c15755312a04f82d6369ce..71dbaee708d0e64911f568503655478b8720f494
 100644
--- a/gcc/asan.h
+++ b/gcc/asan.h
@@ -27,1

[PATCH 16/X] [libsanitizer] Add tests

2019-11-05 Thread Matthew Malcomson
Adding hwasan tests.

Frankly, these could be tidied up a little.
I will be tidying them up while getting feedback on the hwasan introduction.


gcc/testsuite/ChangeLog:

2019-11-05  Matthew Malcomson  

* c-c++-common/hwasan/arguments.c: New test.
* c-c++-common/hwasan/halt_on_error-1.c: New test.
* g++.dg/hwasan/rvo-handled.c: New test.
* g++.dg/hwasan/try-catch-0.cpp: New test.
* g++.dg/hwasan/try-catch-1.cpp: New test.
* gcc.dg/hwasan/aligned-alloc.c: New test.
* gcc.dg/hwasan/alloca-array-accessible.c: New test.
* gcc.dg/hwasan/alloca-gets-different-tag.c: New test.
* gcc.dg/hwasan/alloca-outside-caught.c: New test.
* gcc.dg/hwasan/bitfield-1.c: New test.
* gcc.dg/hwasan/bitfield-2.c: New test.
* gcc.dg/hwasan/builtin-special-handling.c: New test.
* gcc.dg/hwasan/check-interface.c: New test.
* gcc.dg/hwasan/hwasan-poison-optimisation.c: New test.
* gcc.dg/hwasan/hwasan-thread-access-parent.c: New test.
* gcc.dg/hwasan/hwasan-thread-basic-failure.c: New test.
* gcc.dg/hwasan/hwasan-thread-clears-stack.c: New test.
* gcc.dg/hwasan/hwasan-thread-success.c: New test.
* gcc.dg/hwasan/hwasan.exp: New file.
* gcc.dg/hwasan/kernel-defaults.c: New test.
* gcc.dg/hwasan/large-aligned-0.c: New test.
* gcc.dg/hwasan/large-aligned-1.c: New test.
* gcc.dg/hwasan/macro-definition.c: New test.
* gcc.dg/hwasan/nested-functions-0.c: New test.
* gcc.dg/hwasan/nested-functions-1.c: New test.
* gcc.dg/hwasan/nested-functions-2.c: New test.
* gcc.dg/hwasan/no-sanitize-attribute.c: New test.
* gcc.dg/hwasan/random-frame-tag.c: New test.
* gcc.dg/hwasan/setjmp-longjmp-0.c: New test.
* gcc.dg/hwasan/setjmp-longjmp-1.c: New test.
* gcc.dg/hwasan/stack-tagging-basic-0.c: New test.
* gcc.dg/hwasan/stack-tagging-basic-1.c: New test.
* gcc.dg/hwasan/stack-tagging-disable.c: New test.
* gcc.dg/hwasan/vararray-outside-caught.c: New test.
* gcc.dg/hwasan/very-large-objects.c: New test.
* lib/hwasan-dg.exp: New file.



### Attachment also inlined for ease of reply###


diff --git a/gcc/testsuite/c-c++-common/hwasan/arguments.c 
b/gcc/testsuite/c-c++-common/hwasan/arguments.c
new file mode 100644
index 
..2d563eb8541694d501b021babd9452fd7fd502a3
--- /dev/null
+++ b/gcc/testsuite/c-c++-common/hwasan/arguments.c
@@ -0,0 +1,7 @@
+/*
+   TODO
+   Somehow test the conflict of arguments
+   -fsanitize=hwaddress -fsanitize=kernel-address
+   -fsanitize=hwaddress -fsanitize=address
+   -fsanitize=hwaddress -fsanitize=thread
+ */
diff --git a/gcc/testsuite/c-c++-common/hwasan/halt_on_error-1.c 
b/gcc/testsuite/c-c++-common/hwasan/halt_on_error-1.c
new file mode 100644
index 
..118191e2e00bd07bd4839888d2fb29baec926c60
--- /dev/null
+++ b/gcc/testsuite/c-c++-common/hwasan/halt_on_error-1.c
@@ -0,0 +1,25 @@
+/* Test recovery mode.  */
+/* { dg-do run } */
+/* { dg-options "-fsanitize-recover=hwaddress" } */
+/* { dg-set-target-env-var HWASAN_OPTIONS "halt_on_error=false" } */
+/* { dg-shouldfail "hwasan" } */
+
+#include 
+
+volatile int ten = 16;
+
+int main() {
+  char x[10];
+  __builtin_memset(x, 0, ten + 1);
+  asm volatile ("" : : : "memory");
+  volatile int res = x[ten];
+  x[ten] = res + 3;
+  res = x[ten];
+  return 0;
+}
+
+/* { dg-output "WRITE of size 17 at 0x\[0-9a-f\]+.*" } */
+/* { dg-output "READ of size 1 at 0x\[0-9a-f\]+.*" } */
+/* { dg-output "WRITE of size 1 at 0x\[0-9a-f\]+.*" } */
+/* { dg-output "READ of size 1 at 0x\[0-9a-f\]+.*" } */
+
diff --git a/gcc/testsuite/g++.dg/hwasan/rvo-handled.c 
b/gcc/testsuite/g++.dg/hwasan/rvo-handled.c
new file mode 100644
index 
..6e6934a0be1b0ce14c459555168f6a2590a8ec7f
--- /dev/null
+++ b/gcc/testsuite/g++.dg/hwasan/rvo-handled.c
@@ -0,0 +1,56 @@
+/* { dg-do run } */
+/* TODO Ensure this test has enough optimisation to get RVO. */
+
+#define assert(x) if (!(x)) __builtin_abort ()
+
+struct big_struct {
+int left;
+int right;
+void *ptr;
+int big_array[100];
+};
+
+/*
+   Tests for RVO (basically, checking -fsanitize=hwaddress has not broken RVO
+   in any way).
+
+   0) The value is accessible in both functions without a hwasan complaint.
+   1) RVO does happen.
+ */
+
+struct big_struct __attribute__ ((noinline))
+return_on_stack()
+{
+  struct big_struct x;
+  x.left = 100;
+  x.right = 20;
+  x.big_array[10] = 30;
+  x.ptr = &x;
+  return x;
+}
+
+struct big_struct __attribute__ ((noinline))
+unnamed_return_on_stack()
+{
+  return (struct big_struct){
+  .left = 100,
+  .right = 20,
+  .ptr = __builtin_frame_address (0),
+  .big_array = {0}
+  };
+}
+
+int main()
+{
+  struct big_struct x;
+  x = return_on_stack();
+  /

Re: Add obstack for canonical file name hash table

2019-11-05 Thread Richard Biener
On Tue, 5 Nov 2019, Jan Hubicka wrote:

> > On Tue, 5 Nov 2019, Jan Hubicka wrote:
> > 
> > > Hi,
> > > looking into malloc overhead I noticed that we do a lot of small
> > > allocations to hold file names comming from location info. This patch
> > > puts it into an obstack so it interleaves memory allocated by scc_hash
> > > less frequently.
> > > (Still we end up interleaving 64k pages which are permanent - in fact
> > > this table seems to leak from WPA and temporary during stream in)
> > > 
> > > Bootstrapped/regtested x86_64-linux. OK?
> > 
> > I think the obstack deserves a big fat comment that it cannot be
> > reclaimed since the linemap retains permanent pointers into it.
> > That also suggests to put the string_slot into a separate obstack
> 
> The hasher is sort of ethernal, too, since at any time we want to be
> able to load more from input streams, so we can not really free it.
> Well, I guess just prior streaming we can, so I will split it.
> > or better, make the hasher (and other string_slot hashers)
> > embed the string_slot struct in the hash?  We'd save an allocation
> > everywhere.
> 
> Well, if we want to free hasher, then we want to keep string separate +
> comment on obstack, right?  I sill update patch tonight.

Yes, we want to have an obstack with just the strings and a comment
that we have to keep that.

Richard.

> Honza
> > 
> > Richard.
> > 
> > > Honza
> > > 
> > >   * lto-streamer-in.c (file_name_obstack): New obstack.
> > >   (canon_file_name): Use it.
> > >   (lto_reader_init): Initialize it.
> > > Index: lto-streamer-in.c
> > > ===
> > > --- lto-streamer-in.c (revision 277796)
> > > +++ lto-streamer-in.c (working copy)
> > > @@ -57,6 +57,7 @@ freeing_string_slot_hasher::remove (valu
> > >  
> > >  /* The table to hold the file names.  */
> > >  static hash_table *file_name_hash_table;
> > > +static struct obstack file_name_obstack;
> > >  
> > >  
> > >  /* Check that tag ACTUAL has one of the given values.  NUM_TAGS is the
> > > @@ -113,8 +114,9 @@ canon_file_name (const char *string)
> > >char *saved_string;
> > >struct string_slot *new_slot;
> > >  
> > > -  saved_string = (char *) xmalloc (len + 1);
> > > -  new_slot = XCNEW (struct string_slot);
> > > +  saved_string = XOBNEWVEC (&file_name_obstack, char, len + 1);
> > > +  new_slot = XOBNEWVAR (&file_name_obstack,
> > > + struct string_slot, sizeof (struct string_slot));
> > >memcpy (saved_string, string, len + 1);
> > >new_slot->s = saved_string;
> > >new_slot->len = len;
> > > @@ -1723,6 +1725,7 @@ lto_reader_init (void)
> > >lto_streamer_init ();
> > >file_name_hash_table
> > >  = new hash_table (37);
> > > +  gcc_obstack_init (&file_name_obstack);
> > >  }
> > >  
> > >  
> > > 
> > 
> > -- 
> > Richard Biener 
> > SUSE Software Solutions Germany GmbH, Maxfeldstrasse 5, 90409 Nuernberg,
> > Germany; GF: Felix Imendörffer; HRB 36809 (AG Nuernberg)
> 
> 

-- 
Richard Biener 
SUSE Software Solutions Germany GmbH, Maxfeldstrasse 5, 90409 Nuernberg,
Germany; GF: Felix Imendörffer; HRB 36809 (AG Nuernberg)

Re: [aarch64] Allocate enough space for err_str in aarch64_handle_attr_branch_protection

2019-11-05 Thread Kyrylo Tkachov
Hi Matthew,

On 11/5/19 11:33 AM, Matthew Malcomson wrote:
> -fsanitize=hwaddress found a one-byte overwrite when running the
> testsuite here.  aarch64_handle_attr_branch_protection allocates
> `strlen(str)` bytes for an error string, which is populated by
> `strcpy(..., str)` in the case where the branch protection string is
> completely invalid.
>
> Tested on aarch64 with hwasan (though not a full bootstrap since it's
> obvious).
>
Nice to see hwasan catching these things!

Ok.

Thanks,

Kyrill



> gcc/ChangeLog:
>
> 2019-11-05  Matthew Malcomson 
>
>     * config/aarch64/aarch64.c (aarch64_handle_attr_cpu): Allocate
>     enough bytes for the NULL character.
>
>
>
> ### Attachment also inlined for ease of reply    
> ###
>
>
> diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
> index 
> 232317d4a5a4a16529f573eef5a8d7a068068207..fc03faa8f8d459a84024d4394fff375b72d31264
>  
> 100644
> --- a/gcc/config/aarch64/aarch64.c
> +++ b/gcc/config/aarch64/aarch64.c
> @@ -13298,7 +13298,7 @@ aarch64_handle_attr_cpu (const char *str)
>   static bool
>   aarch64_handle_attr_branch_protection (const char* str)
>   {
> -  char *err_str = (char *) xmalloc (strlen (str));
> +  char *err_str = (char *) xmalloc (strlen (str) + 1);
>    enum aarch64_parse_opt_result res = aarch64_parse_branch_protection 
> (str,
> &err_str);
>    bool success = false;
>


Re: [PATCH 1/X][mid-end] Fix declared type of personality functions

2019-11-05 Thread Richard Biener
On Tue, 5 Nov 2019, Matthew Malcomson wrote:

> `build_personality_function` generates a declaration for a personality
> function.  The type it declares for these functions doesn't match the
> type of the actual personality functions that are defined by the C++
> unwinding ABI.
> 
> This doesn't cause any crashes since the compiler never generates a call
> to these decl's, and hence the type of the function is never used.
> Nonetheless, for the sake of consistency and readability we update the
> type of this declaration.
> 
> (See libstdc++-v3/libsupc++/unwind-cxx.h for declaration of 
> _gxx_personality_v0
> to compare types).

OK.  I believe _Unwind_Personality_Fn in libgcc/unwind-generic.h is the
correct reference.

Thanks,
Richard.

> gcc/ChangeLog:
> 
> 2019-11-05  Matthew Malcomson  
> 
>   * expr.c (build_personality_function): Fix generated type to
>   match actual personality functions.
> 
> 
> 
> ### Attachment also inlined for ease of reply
> ###
> 
> 
> diff --git a/gcc/expr.c b/gcc/expr.c
> index 
> 2f2b53f8b6905013b4214eea137d67c666b0c795..7dc37a288ebffb99c990442cf339b848c5fa9d2e
>  100644
> --- a/gcc/expr.c
> +++ b/gcc/expr.c
> @@ -12525,7 +12525,8 @@ build_personality_function (const char *lang)
>  
>name = ACONCAT (("__", lang, "_personality", unwind_and_version, NULL));
>  
> -  type = build_function_type_list (integer_type_node, integer_type_node,
> +  type = build_function_type_list (unsigned_type_node,
> +integer_type_node, integer_type_node,
>  long_long_unsigned_type_node,
>  ptr_type_node, ptr_type_node, NULL_TREE);
>decl = build_decl (UNKNOWN_LOCATION, FUNCTION_DECL,
> 
> 

-- 
Richard Biener 
SUSE Software Solutions Germany GmbH, Maxfeldstrasse 5, 90409 Nuernberg,
Germany; GF: Felix Imendörffer; HRB 36809 (AG Nuernberg)

Re: [PATCH][Aarch64] Fix vec_perm cost for thunderx2t99

2019-11-05 Thread Kyrylo Tkachov
Hi Andrew, Anton,

On 11/1/19 11:22 PM, Andrew Pinski wrote:
> On Fri, Nov 1, 2019 at 7:03 AM Anton Youdkevitch
>  wrote:
> >
> > Hello,
> >
> > Here is the one-liner that fixes the incorrect
> > vec_perm cost for thunderx2t99 chip.
> > With the patch applied 526.blender of CPU2017
> > gets ~5% improvement with no measurable changes
> > for other benchmarks.
> >
> > Bootstrapped OK on aarch64-linux-gnu.
> >
> > OK for trunk?
>
> Maybe the big question is vec_perm used for both 1 input and 2 input
> cases?  If so maybe splitting the two cases would be important too.
> Otherwise this is ok from my point of view but I can't approve it.
>
I'd be interested to see a testcase/demonstration where this would would 
be beneficial.

In the meantime this patch is ok if it helps thunderx2t99 performance.

2019-11-01 Anton Youdkevitch 

     * gcc/config/aarch64/aarch64.c (thunderx2t99_vector_cost):
     change vec_perm field

ChangeLog nits:

* Two spaces between name and date+email

* No gcc/ prefix as the relevant ChangeLog file lives in gcc/

* End entry with full stop.

Anton, do you need someone to commit this for you?

Thanks,

Kyrill



>
> Thanks,
> Andrew Pinski
>
> >
> > 2019-11-01 Anton Youdkevitch 
> >
> > * gcc/config/aarch64/aarch64.c (thunderx2t99_vector_cost):
> > change vec_perm field
> >
> > --
> >   Thanks,
> >   Anton


Re: r272976 - in /trunk/gcc/ada: ChangeLog ali.adb ...

2019-11-05 Thread Arnaud Charlet
> That should be @uref{prerequisites.html#GNAT-prerequisite,,GNAT 
> prerequisites}.

Thanks. I wouldn't have been able to guess that!

Corresponding patch installed.

2019-11-05  Arnaud Charlet  

* doc/install.texi: Further fix syntax for html generation.

Index: doc/install.texi
===
--- doc/install.texi(revision 277823)
+++ doc/install.texi(working copy)
@@ -2731,7 +2731,7 @@
 @ref{GNAT-prerequisite}.
 @end ifnothtml
 @ifhtml
-@uref{GNAT-prerequisite}.
+@uref{prerequisites.html#GNAT-prerequisite,,GNAT prerequisites}.
 @end ifhtml

 @section Building with profile feedback



Re: [PATCH][Aarch64] Fix vec_perm cost for thunderx2t99

2019-11-05 Thread Anton Youdkevitch

Andrew,

On 02.11.2019 2:22, Andrew Pinski wrote:

On Fri, Nov 1, 2019 at 7:03 AM Anton Youdkevitch
 wrote:


Hello,

Here is the one-liner that fixes the incorrect
vec_perm cost for thunderx2t99 chip.
With the patch applied 526.blender of CPU2017
gets ~5% improvement with no measurable changes
for other benchmarks.

Bootstrapped OK on aarch64-linux-gnu.

OK for trunk?


Maybe the big question is vec_perm used for both 1 input and 2 input
cases?  If so maybe splitting the two cases would be important too.

It is as there is no per-number-of-operands distinction while
computing the vector permutation cost.
However, since 1-operand permutes are rare this would be a good
approximation (statistically).


Otherwise this is ok from my point of view but I can't approve it.





Thanks,
Andrew Pinski



2019-11-01 Anton Youdkevitch 

 * gcc/config/aarch64/aarch64.c (thunderx2t99_vector_cost):
 change vec_perm field

--
   Thanks,
   Anton


Re: [C++ PATCH] Allow [[likely]] and [[unlikely]] in constexpr functions (PR c++/92343)

2019-11-05 Thread Jakub Jelinek
On Tue, Nov 05, 2019 at 08:19:17AM +, Jason Merrill wrote:
> OK.

Thanks, committed.

>  I wonder why we're returning false for EMPTY_CLASS_EXPR?

No idea.  Seems it has been added in http://gcc.gnu.org/r227296 on
c++-delayed-folding branch by Kai, but there was no testsuite coverage nor
I can find any mail on gcc-patches for it.

Jakub



Re: [PATCH 0/X] Introduce HWASAN sanitizer to GCC

2019-11-05 Thread Matthew Malcomson
On 05/11/2019 11:32, Matthew Malcomson wrote:
> 
> Testing done:
> Full bootstrap and regtest on x86_64 (no difference -- hwasan not used).
> 
> Full bootstrap and regtest on AArch64 sanitizing with hwasan and running
> on recent kernel.
> Regressions all accounted for:
>1) tests under plugin/
>   testism where hwasan library is not linked in.
>   (same appears to happen for asan)
>2) branch-protection-attr.c
>   New bug found by hwasan, fix in this patch series.
>3) pr88597.c
>   timeout, can run manually and everything works (but is very slow)
>4) aarch64/long_branch_1.c
>   timeout, as above.
>5) gfortran/class_61.f90
>   bug already caught by ASAN and reported upstream
>   https://gcc.gnu.org/bugzilla/show_bug.cgi?id=89661
>6) gfortran/dec_type_print_2.f03
>   bug already caught by ASAN and reported upstream
>   https://gcc.gnu.org/bugzilla/show_bug.cgi?id=86657
>7) gfortran/minlocval_3.f90
>   timeout, can run manually and passes (but is very slow)
> 

Oh, and I also tested by compiling the kernel for software memory 
tagging (after a quick hack to use __SANITIZE_HWADDRESS__ and pass the 
correct flags for gcc) then using the kernel sanitizer testing module 
test_kasan.

The kernel was tested without stack-tagging, and we caught the tests 
which didn't access within a valid 16 byte granule (i.e. caught those 
that were expected).

I've been running this sanitized kernel on my test machine without 
problems for a week and a bit.

> 
> Entire patch series attached to cover letter.
> 



Re: [PATCH][Aarch64] Fix vec_perm cost for thunderx2t99

2019-11-05 Thread Anton Youdkevitch

Kyrill,

On 05.11.2019 14:43, Kyrylo Tkachov wrote:

Hi Andrew, Anton,

On 11/1/19 11:22 PM, Andrew Pinski wrote:

On Fri, Nov 1, 2019 at 7:03 AM Anton Youdkevitch
 wrote:


Hello,

Here is the one-liner that fixes the incorrect
vec_perm cost for thunderx2t99 chip.
With the patch applied 526.blender of CPU2017
gets ~5% improvement with no measurable changes
for other benchmarks.

Bootstrapped OK on aarch64-linux-gnu.

OK for trunk?


Maybe the big question is vec_perm used for both 1 input and 2 input
cases?  If so maybe splitting the two cases would be important too.
Otherwise this is ok from my point of view but I can't approve it.


I'd be interested to see a testcase/demonstration where this would would
be beneficial.

Well, since I measured this on SPEC 2017, so, the result is
the overall benchmark score. I can try to extract the relevant
pieces of code that get compiled differently to see if they can
be make into a standalone testcase. I didn't try this yet, though.



In the meantime this patch is ok if it helps thunderx2t99 performance.

2019-11-01 Anton Youdkevitch 

      * gcc/config/aarch64/aarch64.c (thunderx2t99_vector_cost):
      change vec_perm field

ChangeLog nits:

* Two spaces between name and date+email

* No gcc/ prefix as the relevant ChangeLog file lives in gcc/

* End entry with full stop.

Thanks, will do like this next time.



Anton, do you need someone to commit this for you?

Yes, it would be nice if you can do this for me.



Thanks,

Kyrill





Thanks,
Andrew Pinski



2019-11-01 Anton Youdkevitch 

  * gcc/config/aarch64/aarch64.c (thunderx2t99_vector_cost):
  change vec_perm field

--
    Thanks,
    Anton


Re: Add object allocators to symbol and call summaries

2019-11-05 Thread Martin Liška

On 11/5/19 12:01 PM, Jan Hubicka wrote:

On 11/5/19 11:36 AM, Jan Hubicka wrote:

Hi,
this patch adds object allocators to manage IPA summaries. This reduces
malloc overhead and fragmentation.  I now get peak memory use 7.5GB instead
of 10GB for firefox WPA because reduced fragmentation leads to less COWs after
forks.


That sounds promising.


Additional bonus is that we now have statistics gathered by mem-reports
which makes my life easier, too.


What's currently bad with the detailed memory statistics? I updated the
code that one should see the allocation for the underlying hash_map and
vec?


I currently get:


Pool name   Allocation pool 
  Pools   LeakPeakTimesElt size

tree_scclto/lto-common.c:2709 (read_cgraph_and_symbols) 
 1 0 :  0.0%   99M 3169k: 43.7%  32
IPA histogram   ipa-profile.c:77 
(__static_initialization_and_de 116 :  0.0%   16 1 :  
0.0%  16
IPA-PROP ref descriptions   ipa-prop.c:170 
(__static_initialization_and_dest 1   226k:  0.3%  226k 9670 :  
0.1%  24
function summaryipa-fnsummary.c:557 (ipa_fn_summary_alloc)  
 1  6145k:  7.0% 6257k  391k:  5.4%  16
function summaryipa-pure-const.c:136 (__base_ctor ) 
 1  6863k:  7.9% 9449k  590k:  8.1%  16
edge predicates ipa-fnsummary.c:93 
(__static_initialization_and_ 1  8327k:  9.5% 8385k  209k:  
2.9%  40
call summaryipa-sra.c:436 (__base_ctor )
 118M: 21.3%   21M 1393k: 19.2%  16
call summaryipa-fnsummary.h:276 (__base_ctor )  
 146M: 54.0%   46M 1483k: 20.5%  32

Pool name   Allocation pool 
  Pools   LeakPeakTimesElt size

Total   
  9 85M


This is quite readable, though we may give them different names and
update constructors. Not a big deal IMO.

For GGC statistics I see:

varpool.c:137 (create_empty)  7924k:  0.4%0 :  
0.0% 3214k:  0.2%0 :  0.0%   87k
cgraph.c:939 (cgraph_allocate_init_indirect_info  8566k:  0.4%0 :  
0.0% 1395k:  0.1%0 :  0.0%  113k
alias.c:1170 (record_alias_subset)  12M:  0.6%0 :  
0.0%   12k:  0.0%   99k:  0.1%   12k
ipa-sra.c:2717 (isra_read_node_info)12M:  0.6%0 :  
0.0% 4179k:  0.2%   21k:  0.0%  376k
toplev.c:904 (realloc_for_line_map) 16M:  0.8%0 :  
0.0%   15M:  0.9%  144 :  0.0%   12
ipa-prop.c:278 (ipa_alloc_node_params)  16M:  0.8%  266k:  
0.4%0 :  0.0%   22k:  0.0%  366k
symbol-summary.h:555 (allocate_new) 18M:  0.9%0 :  
0.0%  119k:  0.0%0 :  0.0% 1171k
  ^^^ here we should point the caller of get_create

ipa-fnsummary.c:3877 (inline_read_section)  28M:  1.4%0 :  
0.0%  552k:  0.0%  392k:  0.3%  261k
lto-section-in.c:388 (lto_new_in_decl_state)29M:  1.4%0 :  
0.0%   11M:  0.7%0 :  0.0%  587k
symtab.c:582 (create_reference) 35M:  1.7%0 :  
0.0%   50M:  2.9% 1199k:  0.9%  541k
symbol-summary.h:64 (allocate_new)  46M:  2.2%0 :  
0.0% 2445k:  0.1%0 :  0.0% 1168k
  ^^^ same here.

stringpool.c:63 (alloc_node)47M:  2.3%0 :  
0.0%0 :  0.0%0 :  0.0% 1217k
ipa-prop.c:4480 (ipa_read_edge_info)51M:  2.4%0 :  
0.0%  260k:  0.0%  404k:  0.3%  531k
hash-table.h:801 (expand)   81M:  3.9%0 :  
0.0%   80M:  4.7%   88k:  0.1% 3349
  ^^^ some of memory comes here which ought to be accounted to caller of
  expand.


Yes, these all com

Re: [PR47785] COLLECT_AS_OPTIONS

2019-11-05 Thread Richard Biener
On Tue, Nov 5, 2019 at 12:17 AM Kugan Vivekanandarajah
 wrote:
>
> Hi,
> Thanks for the review.
>
> On Tue, 5 Nov 2019 at 03:57, H.J. Lu  wrote:
> >
> > On Sun, Nov 3, 2019 at 6:45 PM Kugan Vivekanandarajah
> >  wrote:
> > >
> > > Thanks for the reviews.
> > >
> > >
> > > On Sat, 2 Nov 2019 at 02:49, H.J. Lu  wrote:
> > > >
> > > > On Thu, Oct 31, 2019 at 6:33 PM Kugan Vivekanandarajah
> > > >  wrote:
> > > > >
> > > > > On Wed, 30 Oct 2019 at 03:11, H.J. Lu  wrote:
> > > > > >
> > > > > > On Sun, Oct 27, 2019 at 6:33 PM Kugan Vivekanandarajah
> > > > > >  wrote:
> > > > > > >
> > > > > > > Hi Richard,
> > > > > > >
> > > > > > > Thanks for the review.
> > > > > > >
> > > > > > > On Wed, 23 Oct 2019 at 23:07, Richard Biener 
> > > > > > >  wrote:
> > > > > > > >
> > > > > > > > On Mon, Oct 21, 2019 at 10:04 AM Kugan Vivekanandarajah
> > > > > > > >  wrote:
> > > > > > > > >
> > > > > > > > > Hi Richard,
> > > > > > > > >
> > > > > > > > > Thanks for the pointers.
> > > > > > > > >
> > > > > > > > >
> > > > > > > > >
> > > > > > > > > On Fri, 11 Oct 2019 at 22:33, Richard Biener 
> > > > > > > > >  wrote:
> > > > > > > > > >
> > > > > > > > > > On Fri, Oct 11, 2019 at 6:15 AM Kugan Vivekanandarajah
> > > > > > > > > >  wrote:
> > > > > > > > > > >
> > > > > > > > > > > Hi Richard,
> > > > > > > > > > > Thanks for the review.
> > > > > > > > > > >
> > > > > > > > > > > On Wed, 2 Oct 2019 at 20:41, Richard Biener 
> > > > > > > > > > >  wrote:
> > > > > > > > > > > >
> > > > > > > > > > > > On Wed, Oct 2, 2019 at 10:39 AM Kugan Vivekanandarajah
> > > > > > > > > > > >  wrote:
> > > > > > > > > > > > >
> > > > > > > > > > > > > Hi,
> > > > > > > > > > > > >
> > > > > > > > > > > > > As mentioned in the PR, attached patch adds 
> > > > > > > > > > > > > COLLECT_AS_OPTIONS for
> > > > > > > > > > > > > passing assembler options specified with -Wa, to the 
> > > > > > > > > > > > > link-time driver.
> > > > > > > > > > > > >
> > > > > > > > > > > > > The proposed solution only works for uniform -Wa 
> > > > > > > > > > > > > options across all
> > > > > > > > > > > > > TUs. As mentioned by Richard Biener, supporting 
> > > > > > > > > > > > > non-uniform -Wa flags
> > > > > > > > > > > > > would require either adjusting partitioning according 
> > > > > > > > > > > > > to flags or
> > > > > > > > > > > > > emitting multiple object files  from a single LTRANS 
> > > > > > > > > > > > > CU. We could
> > > > > > > > > > > > > consider this as a follow up.
> > > > > > > > > > > > >
> > > > > > > > > > > > > Bootstrapped and regression tests on  arm-linux-gcc. 
> > > > > > > > > > > > > Is this OK for trunk?
> > > > > > > > > > > >
> > > > > > > > > > > > While it works for your simple cases it is unlikely to 
> > > > > > > > > > > > work in practice since
> > > > > > > > > > > > your implementation needs the assembler options be 
> > > > > > > > > > > > present at the link
> > > > > > > > > > > > command line.  I agree that this might be the way for 
> > > > > > > > > > > > people to go when
> > > > > > > > > > > > they face the issue but then it needs to be documented 
> > > > > > > > > > > > somewhere
> > > > > > > > > > > > in the manual.
> > > > > > > > > > > >
> > > > > > > > > > > > That is, with COLLECT_AS_OPTION (why singular?  I'd 
> > > > > > > > > > > > expected
> > > > > > > > > > > > COLLECT_AS_OPTIONS) available to cc1 we could stream 
> > > > > > > > > > > > this string
> > > > > > > > > > > > to lto_options and re-materialize it at link time (and 
> > > > > > > > > > > > diagnose mismatches
> > > > > > > > > > > > even if we like).
> > > > > > > > > > > OK. I will try to implement this. So the idea is if we 
> > > > > > > > > > > provide
> > > > > > > > > > > -Wa,options as part of the lto compile, this should be 
> > > > > > > > > > > available
> > > > > > > > > > > during link time. Like in:
> > > > > > > > > > >
> > > > > > > > > > > arm-linux-gnueabihf-gcc -march=armv7-a -mthumb -O2 -flto
> > > > > > > > > > > -Wa,-mimplicit-it=always,-mthumb -c test.c
> > > > > > > > > > > arm-linux-gnueabihf-gcc  -flto  test.o
> > > > > > > > > > >
> > > > > > > > > > > I am not sure where should we stream this. Currently, 
> > > > > > > > > > > cl_optimization
> > > > > > > > > > > has all the optimization flag provided for compiler and 
> > > > > > > > > > > it is
> > > > > > > > > > > autogenerated and all the flags are integer values. Do 
> > > > > > > > > > > you have any
> > > > > > > > > > > preference or example where this should be done.
> > > > > > > > > >
> > > > > > > > > > In lto_write_options, I'd simply append the contents of 
> > > > > > > > > > COLLECT_AS_OPTIONS
> > > > > > > > > > (with -Wa, prepended to each of them), then recover them in 
> > > > > > > > > > lto-wrapper
> > > > > > > > > > for each TU and pass them down to the LTRANS compiles (if 
> > > > > > > > > > they agree
> > > > > > > > > > for all TUs, otherwise I'd warn and drop them).
> > > > > > > > >
> > > > > > > > > Attached pa

Re: [PATCH][Aarch64] Fix vec_perm cost for thunderx2t99

2019-11-05 Thread Kyrylo Tkachov

On 11/5/19 11:54 AM, Anton Youdkevitch wrote:
Kyrill,

On 05.11.2019 14:43, Kyrylo Tkachov wrote:
> Hi Andrew, Anton,
>
> On 11/1/19 11:22 PM, Andrew Pinski wrote:
>> On Fri, Nov 1, 2019 at 7:03 AM Anton Youdkevitch
>>  wrote:
>>>
>>> Hello,
>>>
>>> Here is the one-liner that fixes the incorrect
>>> vec_perm cost for thunderx2t99 chip.
>>> With the patch applied 526.blender of CPU2017
>>> gets ~5% improvement with no measurable changes
>>> for other benchmarks.
>>>
>>> Bootstrapped OK on aarch64-linux-gnu.
>>>
>>> OK for trunk?
>>
>> Maybe the big question is vec_perm used for both 1 input and 2 input
>> cases?  If so maybe splitting the two cases would be important too.
>> Otherwise this is ok from my point of view but I can't approve it.
>>
> I'd be interested to see a testcase/demonstration where this would would
> be beneficial.
Well, since I measured this on SPEC 2017, so, the result is
the overall benchmark score. I can try to extract the relevant
pieces of code that get compiled differently to see if they can
be make into a standalone testcase. I didn't try this yet, though.


Sorry, I was referring to Andrew's suggestion about splitting the costs rather 
than your change.


>
> In the meantime this patch is ok if it helps thunderx2t99 performance.
>
> 2019-11-01 Anton Youdkevitch 
> 
>
>   * gcc/config/aarch64/aarch64.c (thunderx2t99_vector_cost):
>   change vec_perm field
>
> ChangeLog nits:
>
> * Two spaces between name and date+email
>
> * No gcc/ prefix as the relevant ChangeLog file lives in gcc/
>
> * End entry with full stop.
Thanks, will do like this next time.

>
> Anton, do you need someone to commit this for you?
Yes, it would be nice if you can do this for me.


Committed as r277826 with the following adjusted ChangeLog:

2019-11-05  Anton Youdkevitch  


* config/aarch64/aarch64.c (thunderx2t99_vector_cost):
Change vec_perm field to 10.

Thanks for the patch. If you intend to make more contributions in the future it 
would be worth sorting a copyright assignment if you haven't done so already.

Kyrill



>
> Thanks,
>
> Kyrill
>
>
>
>>
>> Thanks,
>> Andrew Pinski
>>
>>>
>>> 2019-11-01 Anton Youdkevitch 
>>> 
>>>
>>>   * gcc/config/aarch64/aarch64.c (thunderx2t99_vector_cost):
>>>   change vec_perm field
>>>
>>> --
>>> Thanks,
>>> Anton


[RFC PATCH] Extend the simd function attribute

2019-11-05 Thread Szabolcs Nagy
(sorry for the resend, i forgot to add the mailing list)

GCC currently supports two ways to declare the availability of vector
variants of a scalar function:

  #pragma omp declare simd
  void f (void);

and

  __attribute__ ((simd))
  void f (void);

However neither can declare unambiguously a single vector variant, only
a set of vector variants which may change in the future as new vector
architectures or vector call ABIs are introduced. So these mechanisms
are not suitable for libraries which must not declare more than what
they provide.

One solution is to use the omp declare variant feature of OpenMP 5,
but that seems overcomplicated and still does not provide a reliable
mechanism (requires gcc or vendor specific extensions for unambiguous
declarations). And the omp pragma only works with -fopenmp-simd or
-fopenmp.

A simpler approach is to extend the gcc specific simd attribute such
that it can specify a single vector variant of simple scalar functions.
Where simple scalar functions are ones that only take or return scalar
integer or floating type values. I believe this can be achieved by

  __attribute__ ((simd (mask, simdlen, simdabi

where mask is "inbranch" or "notinbranch" like now, simdlen is an int
with the same meaning as in omp declare simd and simdabi is a string
specifying the call ABI (which the intel vector ABI calls ISA), can be
the same single letter string as the one used for name mangling.

A library may not want to use the ABI symbol name or provide multiple
implementations for the same vector function so it might make sense to
extend the syntax to

  __attribute__ ((simd (mask, simdlen, simdabi, name

when the same vector function is declared with different names then
it's unspecified which one the compiler picks.

The simd attribute currently can be used for both declarations and
definitions, in the latter case the simd varaints of the function are
generated, which should work with the extended simd attribute too.

The implementation of the simd attribute relied on openmp related
infrastructure in the compiler, i extended that to cover the new
attribute parameters, but I'm not sure if the new OMP_*_CLAUSEs may
break somewhere.

The simd attribute has some issues i did not try to address:

- incompatible vector prototypes are not checked:

  int _ZGVbN4_foo(int);
  __attribute__((simd("notinbranch", 4, "b")))
  void foo(void);

- incompatible symbol definitions are not checked:

  int _ZGVbN4_foo(int x) {return 2*x;}
  __attribute__((simd("notinbranch", 4, "b")))
  void foo(void) {}

- symbol redirection with asm does not work:

  int _ZGVbN4_foo(int) __asm__("bar");
  __attribute__((simd("notinbranch", 4, "b")))
  void foo(void);

Remaining work:

- extend the fortran builtin directive that's currently

  !GCC$ builtin (func) attributes simd FLAGS if(target)

I assume it can be

  !GCC$ builtin (func) attributes simd (mask, len, abi) if(target)

but i don't know how to handle old compilers in fortran.

- syntax for scalable vector functions: simdlen == 0 should
  work but requires internal changes,

- update documentation and add tests,

- and there are various TODOs left in the code.

Tested on aarch64-linux-gnu and x86_64-linux-gnu, I would like to see
some feedback on the approach.
diff --git a/gcc/c-family/c-attribs.c b/gcc/c-family/c-attribs.c
index 1c9f28587fbb2348cc30e302e889a5a22906901a..ae73b88a882cbe7fed1bade48d3d111d255433c5 100644
--- a/gcc/c-family/c-attribs.c
+++ b/gcc/c-family/c-attribs.c
@@ -448,7 +448,7 @@ const struct attribute_spec c_common_attribute_table[] =
 			  handle_omp_declare_variant_attribute, NULL },
   { "omp declare variant variant", 0, -1, true,  false, false, false,
 			  handle_omp_declare_variant_attribute, NULL },
-  { "simd",		  0, 1, true,  false, false, false,
+  { "simd",		  0, 4, true,  false, false, false,
 			  handle_simd_attribute, NULL },
   { "omp declare target", 0, -1, true, false, false, false,
 			  handle_omp_declare_target_attribute, NULL },
@@ -3094,6 +3094,15 @@ handle_simd_attribute (tree *node, tree name, tree args, int, bool *no_add_attrs
 {
   tree t = get_identifier ("omp declare simd");
   tree attr = NULL_TREE;
+
+  /* Allow
+	  simd
+	  simd (mask)
+	  simd (mask, simdlen)
+	  simd (mask, simdlen, simdabi)
+	  simd (mask, simdlen, simdabi, name)
+	 forms.  */
+
   if (args)
 	{
 	  tree id = TREE_VALUE (args);
@@ -3118,8 +3127,73 @@ handle_simd_attribute (tree *node, tree name, tree args, int, bool *no_add_attrs
 	  *no_add_attrs = true;
 	  return NULL_TREE;
 	}
+
+	  args = TREE_CHAIN (args);
 	}
 
+  if (args)
+	{
+	  tree arg = TREE_VALUE (args);
+
+	  // TODO: simdlen == 0 means scalable ?
+	  arg = c_fully_fold (arg, false, NULL);
+	  if (TREE_CODE (arg) != INTEGER_CST
+	  || !INTEGRAL_TYPE_P (TREE_TYPE (arg))
+	  || tree_int_cst_sgn (arg) != 1)
+	{
+	  error ("second argument of attribute %qE must be positive "
+		 "con

[PATCH 2/5] Update Makefile.am.

2019-11-05 Thread Martin Liska

libsanitizer/ChangeLog:

2019-11-05  Martin Liska  

* tsan/Makefile.am: Rename tsan_interceptors.cpp to
tsan_interceptors_posix.
* tsan/Makefile.in: Regenerate.
---
 libsanitizer/tsan/Makefile.am | 2 +-
 libsanitizer/tsan/Makefile.in | 6 +++---
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/libsanitizer/tsan/Makefile.am b/libsanitizer/tsan/Makefile.am
index 1ca9b68a3c5..5d37abd20de 100644
--- a/libsanitizer/tsan/Makefile.am
+++ b/libsanitizer/tsan/Makefile.am
@@ -20,7 +20,7 @@ tsan_files = \
 	tsan_fd.cpp \
 	tsan_flags.cpp \
 	tsan_ignoreset.cpp \
-	tsan_interceptors.cpp \
+	tsan_interceptors_posix.cpp \
 	tsan_interceptors_mac.cpp \
 	tsan_interface_ann.cpp \
 	tsan_interface_atomic.cpp \
diff --git a/libsanitizer/tsan/Makefile.in b/libsanitizer/tsan/Makefile.in
index cae00ab45ad..3d1d9565e47 100644
--- a/libsanitizer/tsan/Makefile.in
+++ b/libsanitizer/tsan/Makefile.in
@@ -146,7 +146,7 @@ LTLIBRARIES = $(toolexeclib_LTLIBRARIES)
 am__DEPENDENCIES_1 =
 am__objects_1 = tsan_clock.lo tsan_debugging.lo tsan_external.lo \
 	tsan_fd.lo tsan_flags.lo tsan_ignoreset.lo \
-	tsan_interceptors.lo tsan_interceptors_mac.lo \
+	tsan_interceptors_posix.lo tsan_interceptors_mac.lo \
 	tsan_interface_ann.lo tsan_interface_atomic.lo \
 	tsan_interface.lo tsan_interface_java.lo tsan_malloc_mac.lo \
 	tsan_md5.lo tsan_mman.lo tsan_mutex.lo tsan_mutexset.lo \
@@ -421,7 +421,7 @@ tsan_files = \
 	tsan_fd.cpp \
 	tsan_flags.cpp \
 	tsan_ignoreset.cpp \
-	tsan_interceptors.cpp \
+	tsan_interceptors_posix.cpp \
 	tsan_interceptors_mac.cpp \
 	tsan_interface_ann.cpp \
 	tsan_interface_atomic.cpp \
@@ -585,8 +585,8 @@ distclean-compile:
 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/tsan_fd.Plo@am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/tsan_flags.Plo@am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/tsan_ignoreset.Plo@am__quote@
-@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/tsan_interceptors.Plo@am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/tsan_interceptors_mac.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/tsan_interceptors_posix.Plo@am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/tsan_interface.Plo@am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/tsan_interface_ann.Plo@am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/tsan_interface_atomic.Plo@am__quote@


[PATCH 4/5] Set print_summary for UBSAN.

2019-11-05 Thread Martin Liska

libsanitizer/ChangeLog:

2019-11-05  Martin Liska  

* ubsan/ubsan_flags.cpp (InitializeFlags): Trunk decided to print
summary for all sanitizers, but we want to have UBSAN without it.
---
 libsanitizer/ubsan/ubsan_flags.cpp | 1 +
 1 file changed, 1 insertion(+)

diff --git a/libsanitizer/ubsan/ubsan_flags.cpp b/libsanitizer/ubsan/ubsan_flags.cpp
index 721c2273f13..80de2a6d101 100644
--- a/libsanitizer/ubsan/ubsan_flags.cpp
+++ b/libsanitizer/ubsan/ubsan_flags.cpp
@@ -54,6 +54,7 @@ void InitializeFlags() {
   {
 CommonFlags cf;
 cf.CopyFrom(*common_flags());
+cf.print_summary = false;
 cf.external_symbolizer_path = GetFlag("UBSAN_SYMBOLIZER_PATH");
 OverrideCommonFlags(cf);
   }


[PATCH 0/5] libsanitizer: merge from trunk

2019-11-05 Thread Martin Liska
Hi.

I've just done merge from trunk for libsanitizer.

I bootstrapped the patch set on x86_64-linux-gnu and run
asan/ubsan/tsan tests on x86_64, ppc64le (power8) and
aarch64.

Plus I run ubsan and asan boostrap on ppc64le-linux-gnu and
abidiff is fine comparing the current trunk with the merged
libsanitizer.

I'll install the patches if there are no objections.

Martin

Martin Liska (5):
  Libsanitizer: merge from trunk with merge.sh.
  Update Makefile.am.
  Reapply all revisions mentioned in LOCAL_PATCHES.
  Set print_summary for UBSAN.
  Update scanned patterns in a test-case.

 .../c-c++-common/ubsan/ptr-overflow-2.c   |   10 +-
 libsanitizer/BlocksRuntime/Block.h|   59 +
 libsanitizer/BlocksRuntime/Block_private.h|  179 ++
 libsanitizer/MERGE|2 +-
 libsanitizer/asan/asan_allocator.cpp  |2 +-
 libsanitizer/asan/asan_debugging.cpp  |8 +-
 libsanitizer/asan/asan_descriptions.h |2 +-
 libsanitizer/asan/asan_errors.cpp |5 +-
 libsanitizer/asan/asan_errors.h   |3 +-
 libsanitizer/asan/asan_flags.inc  |   13 +-
 libsanitizer/asan/asan_globals.cpp|8 +-
 libsanitizer/asan/asan_globals_win.cpp|8 +-
 libsanitizer/asan/asan_interceptors.cpp   |  121 +-
 libsanitizer/asan/asan_interceptors.h |   12 +
 .../asan/asan_interceptors_memintrinsics.cpp  |2 +-
 libsanitizer/asan/asan_mac.cpp|2 +-
 libsanitizer/asan/asan_malloc_linux.cpp   |2 +-
 libsanitizer/asan/asan_malloc_win.cpp |   11 +-
 libsanitizer/asan/asan_new_delete.cpp |2 +-
 libsanitizer/asan/asan_poisoning.cpp  |2 +-
 libsanitizer/asan/asan_report.cpp |   10 +-
 libsanitizer/asan/asan_rtems.cpp  |4 +-
 libsanitizer/asan/asan_rtl.cpp|6 +-
 libsanitizer/asan/asan_scariness_score.h  |2 +-
 libsanitizer/asan/asan_shadow_setup.cpp   |3 +-
 libsanitizer/asan/asan_stats.cpp  |2 +-
 libsanitizer/asan/asan_suppressions.cpp   |2 +-
 libsanitizer/asan/asan_thread.cpp |5 +-
 libsanitizer/asan/asan_win.cpp|   10 +-
 libsanitizer/asan/asan_win_dll_thunk.cpp  |8 +-
 .../asan/asan_win_dynamic_runtime_thunk.cpp   |   12 +-
 .../include/sanitizer/dfsan_interface.h   |2 +-
 .../include/sanitizer/tsan_interface_atomic.h |8 +-
 .../include/sanitizer/ubsan_interface.h   |   32 +
 libsanitizer/interception/interception.h  |4 +-
 .../interception/interception_win.cpp |8 +-
 libsanitizer/lsan/lsan.cpp|4 +-
 libsanitizer/lsan/lsan_common.cpp |   16 +-
 libsanitizer/lsan/lsan_common.h   |5 +-
 libsanitizer/lsan/lsan_common_linux.cpp   |   12 +-
 libsanitizer/lsan/lsan_common_mac.cpp |6 +-
 libsanitizer/lsan/lsan_interceptors.cpp   |   55 +
 libsanitizer/lsan/lsan_mac.cpp|2 +-
 .../sanitizer_allocator_checks.h  |2 +-
 .../sanitizer_allocator_report.cpp|9 +-
 libsanitizer/sanitizer_common/sanitizer_asm.h |4 +-
 .../sanitizer_common/sanitizer_atomic_msvc.h  |   63 +-
 .../sanitizer_common/sanitizer_common.cpp |2 +-
 .../sanitizer_common/sanitizer_common.h   |   25 +-
 .../sanitizer_common_interceptors.inc |  118 +-
 .../sanitizer_common_interface.inc|1 +
 .../sanitizer_coverage_fuchsia.cpp|6 +-
 .../sanitizer_coverage_libcdep_new.cpp|4 +-
 .../sanitizer_coverage_win_sections.cpp   |   12 +-
 .../sanitizer_common/sanitizer_file.cpp   |2 +-
 .../sanitizer_flag_parser.cpp |3 +-
 .../sanitizer_common/sanitizer_flag_parser.h  |4 +-
 .../sanitizer_common/sanitizer_flags.cpp  |4 +-
 .../sanitizer_common/sanitizer_fuchsia.cpp|2 +-
 .../sanitizer_common/sanitizer_getauxval.h|   30 +-
 .../sanitizer_glibc_version.h |   26 +
 .../sanitizer_interceptors_ioctl_netbsd.inc   |2 +-
 .../sanitizer_internal_defs.h |   75 +-
 .../sanitizer_common/sanitizer_libc.cpp   |   11 +-
 .../sanitizer_common/sanitizer_linux.cpp  |   39 +-
 .../sanitizer_linux_libcdep.cpp   |7 +-
 .../sanitizer_common/sanitizer_mac.cpp|  124 +-
 .../sanitizer_platform_interceptors.h |   24 +-
 .../sanitizer_platform_limits_freebsd.h   | 1090 -
 .../sanitizer_platform_limits_posix.cpp   |   15 +-
 .../sanitizer_platform_limits_posix.h | 2168 +
 .../sanitizer_platform_limits_solaris.h   |5 +-
 .../sanitizer_common/sanitizer_posix.cpp  |2 +
 .../sanitizer_common/sanitizer_posix.h|2 +-
 .../sanitizer_posix_libcdep.cpp   |   26 +-
 .../sanitizer_common/sanitizer_printf.cpp |6 +-
 .../sanitizer_common/sanitizer_procmaps.h |2 +-
 .../sa

[PATCH 3/5] Reapply all revisions mentioned in LOCAL_PATCHES.

2019-11-05 Thread Martin Liska

libsanitizer/ChangeLog:

2019-11-05  Martin Liska  

* asan/asan_globals.cpp (CheckODRViolationViaIndicator): Reapply from
LOCAL_PATCHES.
(CheckODRViolationViaPoisoning): Likewise.
(RegisterGlobal): Likewise.
* asan/asan_interceptors.h 
(ASAN_INTERCEPT___CXA_RETHROW_PRIMARY_EXCEPTION): Likewise.
(defined): Likewise.
* asan/asan_mapping.h: Likewise.
* sanitizer_common/sanitizer_linux_libcdep.cpp (defined): Likewise.
* sanitizer_common/sanitizer_mac.cpp (defined): Likewise.
* sanitizer_common/sanitizer_platform_limits_linux.cpp (defined): 
Likewise.
* sanitizer_common/sanitizer_platform_limits_posix.h: Likewise.
* sanitizer_common/sanitizer_stacktrace.cpp (GetCanonicFrame): Likewise.
* tsan/tsan_rtl_ppc64.S: Likewise.
* ubsan/ubsan_handlers.cpp (__ubsan::__ubsan_handle_cfi_bad_icall): 
Likewise.
(__ubsan::__ubsan_handle_cfi_bad_icall_abort): Likewise.
* ubsan/ubsan_handlers.h (struct CFIBadIcallData): Likewise.
(struct CFICheckFailData): Likewise.
(RECOVERABLE): Likewise.
* ubsan/ubsan_platform.h: Likewise.
---
 libsanitizer/asan/asan_globals.cpp| 19 ---
 libsanitizer/asan/asan_interceptors.h |  7 ++-
 libsanitizer/asan/asan_mapping.h  |  2 +-
 .../sanitizer_linux_libcdep.cpp   |  4 
 .../sanitizer_common/sanitizer_mac.cpp|  2 +-
 .../sanitizer_platform_limits_linux.cpp   |  7 +--
 .../sanitizer_platform_limits_posix.h |  2 +-
 .../sanitizer_common/sanitizer_stacktrace.cpp | 17 -
 libsanitizer/tsan/tsan_rtl_ppc64.S|  1 +
 libsanitizer/ubsan/ubsan_handlers.cpp | 15 +++
 libsanitizer/ubsan/ubsan_handlers.h   |  8 
 libsanitizer/ubsan/ubsan_platform.h   |  2 ++
 12 files changed, 56 insertions(+), 30 deletions(-)

diff --git a/libsanitizer/asan/asan_globals.cpp b/libsanitizer/asan/asan_globals.cpp
index 9d7dbc6f264..e045c31cd1c 100644
--- a/libsanitizer/asan/asan_globals.cpp
+++ b/libsanitizer/asan/asan_globals.cpp
@@ -154,23 +154,6 @@ static void CheckODRViolationViaIndicator(const Global *g) {
   }
 }
 
-// Check ODR violation for given global G by checking if it's already poisoned.
-// We use this method in case compiler doesn't use private aliases for global
-// variables.
-static void CheckODRViolationViaPoisoning(const Global *g) {
-  if (__asan_region_is_poisoned(g->beg, g->size_with_redzone)) {
-// This check may not be enough: if the first global is much larger
-// the entire redzone of the second global may be within the first global.
-for (ListOfGlobals *l = list_of_all_globals; l; l = l->next) {
-  if (g->beg == l->g->beg &&
-  (flags()->detect_odr_violation >= 2 || g->size != l->g->size) &&
-  !IsODRViolationSuppressed(g->name))
-ReportODRViolation(g, FindRegistrationSite(g),
-   l->g, FindRegistrationSite(l->g));
-}
-  }
-}
-
 // Clang provides two different ways for global variables protection:
 // it can poison the global itself or its private alias. In former
 // case we may poison same symbol multiple times, that can help us to
@@ -216,8 +199,6 @@ static void RegisterGlobal(const Global *g) {
 // where two globals with the same name are defined in different modules.
 if (UseODRIndicator(g))
   CheckODRViolationViaIndicator(g);
-else
-  CheckODRViolationViaPoisoning(g);
   }
   if (CanPoisonMemory())
 PoisonRedZones(*g);
diff --git a/libsanitizer/asan/asan_interceptors.h b/libsanitizer/asan/asan_interceptors.h
index 344a64bd83d..b7a85fedbdf 100644
--- a/libsanitizer/asan/asan_interceptors.h
+++ b/libsanitizer/asan/asan_interceptors.h
@@ -80,7 +80,12 @@ void InitializePlatformInterceptors();
 #if ASAN_HAS_EXCEPTIONS && !SANITIZER_WINDOWS && !SANITIZER_SOLARIS && \
 !SANITIZER_NETBSD
 # define ASAN_INTERCEPT___CXA_THROW 1
-# define ASAN_INTERCEPT___CXA_RETHROW_PRIMARY_EXCEPTION 1
+# if ! defined(ASAN_HAS_CXA_RETHROW_PRIMARY_EXCEPTION) \
+ || ASAN_HAS_CXA_RETHROW_PRIMARY_EXCEPTION
+#   define ASAN_INTERCEPT___CXA_RETHROW_PRIMARY_EXCEPTION 1
+# else
+#   define ASAN_INTERCEPT___CXA_RETHROW_PRIMARY_EXCEPTION 0
+# endif
 # if defined(_GLIBCXX_SJLJ_EXCEPTIONS) || (SANITIZER_IOS && defined(__arm__))
 #  define ASAN_INTERCEPT__UNWIND_SJLJ_RAISEEXCEPTION 1
 # else
diff --git a/libsanitizer/asan/asan_mapping.h b/libsanitizer/asan/asan_mapping.h
index 41fb49ee46d..09be904270c 100644
--- a/libsanitizer/asan/asan_mapping.h
+++ b/libsanitizer/asan/asan_mapping.h
@@ -163,7 +163,7 @@ static const u64 kDefaultShort64bitShadowOffset =
 static const u64 kAArch64_ShadowOffset64 = 1ULL << 36;
 static const u64 kMIPS32_ShadowOffset32 = 0x0aaa;
 static const u64 kMIPS64_ShadowOffset64 = 1ULL << 37;
-static const u64 kPPC64_ShadowOffset64 = 1ULL << 44;
+static const u64 kPPC64_ShadowOffset64 = 1ULL << 41;

[PATCH 5/5] Update scanned patterns in a test-case.

2019-11-05 Thread Martin Liska

gcc/testsuite/ChangeLog:

2019-11-05  Martin Liska  

* c-c++-common/ubsan/ptr-overflow-2.c: Update based on changed
run-time reporting format.
---
 gcc/testsuite/c-c++-common/ubsan/ptr-overflow-2.c | 10 +-
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/gcc/testsuite/c-c++-common/ubsan/ptr-overflow-2.c b/gcc/testsuite/c-c++-common/ubsan/ptr-overflow-2.c
index a1110a2ddbc..9e72401d792 100644
--- a/gcc/testsuite/c-c++-common/ubsan/ptr-overflow-2.c
+++ b/gcc/testsuite/c-c++-common/ubsan/ptr-overflow-2.c
@@ -93,16 +93,16 @@ main ()
   return 0;
 }
 
-/* { dg-output ":5:6\[79]\[^\n\r]*runtime error: pointer index expression with base (0\[xX])?\[fF]\+ overflowed to (0\[xX])?0\+(\n|\r\n|\r)" } */
-/* { dg-output "\[^\n\r]*:6:6\[79]\[^\n\r]*runtime error: pointer index expression with base (0\[xX])?0\+ overflowed to (0\[xX])?\[fF]\+(\n|\r\n|\r)" } */
-/* { dg-output "\[^\n\r]*:7:7\[46]\[^\n\r]*runtime error: pointer index expression with base (0\[xX])?\[fF]\+9 overflowed to (0\[xX])?0\+(\n|\r\n|\r)" } */
+/* { dg-output ":5:6\[79]\[^\n\r]*runtime error: applying non-zero offset to non-null pointer (0\[xX])?\[fF]\+ produced null pointer(\n|\r\n|\r)" } */
+/* { dg-output "\[^\n\r]*:6:6\[79]\[^\n\r]*runtime error: applying non-zero offset \[0-9]\+ to null pointer(\n|\r\n|\r)" } */
+/* { dg-output "\[^\n\r]*:7:7\[46]\[^\n\r]*runtime error: applying non-zero offset to non-null pointer (0\[xX])?\[fF]\+9 produced null pointer(\n|\r\n|\r)" } */
 /* { dg-output "\[^\n\r]*:8:7\[46]\[^\n\r]*runtime error: pointer index expression with base (0\[xX])?0\+3 overflowed to (0\[xX])?\[fF]\+(\n|\r\n|\r)" } */
 /* { dg-output "\[^\n\r]*:9:7\[46]\[^\n\r]*runtime error: pointer index expression with base (0\[xX])?0\+17 overflowed to (0\[xX])?\[fF]\+\[cC](\n|\r\n|\r)" } */
-/* { dg-output "\[^\n\r]*:10:7\[46]\[^\n\r]*runtime error: pointer index expression with base (0\[xX])?\[fF]\+1 overflowed to (0\[xX])?0\+(\n|\r\n|\r)" } */
+/* { dg-output "\[^\n\r]*:10:7\[46]\[^\n\r]*runtime error: applying non-zero offset to non-null pointer (0\[xX])?\[fF]\+1 produced null pointer(\n|\r\n|\r)" } */
 /* { dg-output "\[^\n\r]*:11:\[89]\[80]\[^\n\r]*runtime error: pointer index expression with base (0\[xX])?\[fF]\+\[eE]3 overflowed to (0\[xX])?0\+2(\n|\r\n|\r)" } */
 /* { dg-output "\[^\n\r]*:13:\[89]\[80]\[^\n\r]*runtime error: pointer index expression with base (0\[xX])?0\+17 overflowed to (0\[xX])?\[fF]\+(\n|\r\n|\r)" } */
 /* { dg-output "\[^\n\r]*:12:\[89]\[80]\[^\n\r]*runtime error: pointer index expression with base (0\[xX])?0\+7 overflowed to (0\[xX])?\[fF]\+(\n|\r\n|\r)" } */
-/* { dg-output "\[^\n\r]*:14:\[89]\[91]\[^\n\r]*runtime error: pointer index expression with base (0\[xX])?\[fF]\+\[eE]7 overflowed to (0\[xX])?0\+" } */
+/* { dg-output "\[^\n\r]*:14:\[89]\[91]\[^\n\r]*runtime error: applying non-zero offset to non-null pointer (0\[xX])?\[fF]\+\[eE]7 produced null pointer" } */
 /* { dg-output "(\n|\r\n|\r)" { target int32 } } */
 /* { dg-output "\[^\n\r]*:17:\[67]\[82]\[^\n\r]*runtime error: pointer index expression with base (0\[xX])?\[fF]\+ overflowed to (0\[xX])?0\+3(\n|\r\n|\r)" { target int32 } } */
 /* { dg-output "\[^\n\r]*:18:\[67]\[86]\[^\n\r]*runtime error: pointer index expression with base (0\[xX])?\[fF]\+ overflowed to (0\[xX])?0\+107(\n|\r\n|\r)" { target int32 } } */


Re: [PATCH][Aarch64] Fix vec_perm cost for thunderx2t99

2019-11-05 Thread Anton Youdkevitch

On 05.11.2019 15:09, Kyrylo Tkachov wrote:


On 11/5/19 11:54 AM, Anton Youdkevitch wrote:

Kyrill,

On 05.11.2019 14:43, Kyrylo Tkachov wrote:
> Hi Andrew, Anton,
> 
> On 11/1/19 11:22 PM, Andrew Pinski wrote:

>> On Fri, Nov 1, 2019 at 7:03 AM Anton Youdkevitch
>>  wrote:
>>>
>>> Hello,
>>>
>>> Here is the one-liner that fixes the incorrect
>>> vec_perm cost for thunderx2t99 chip.
>>> With the patch applied 526.blender of CPU2017
>>> gets ~5% improvement with no measurable changes
>>> for other benchmarks.
>>>
>>> Bootstrapped OK on aarch64-linux-gnu.
>>>
>>> OK for trunk?
>>
>> Maybe the big question is vec_perm used for both 1 input and 2 input
>> cases?  If so maybe splitting the two cases would be important too.
>> Otherwise this is ok from my point of view but I can't approve it.
>>
> I'd be interested to see a testcase/demonstration where this would would
> be beneficial.
Well, since I measured this on SPEC 2017, so, the result is
the overall benchmark score. I can try to extract the relevant
pieces of code that get compiled differently to see if they can
be make into a standalone testcase. I didn't try this yet, though.

Sorry, I was referring to Andrew's suggestion about splitting the costs 
rather than your change.



> 
> In the meantime this patch is ok if it helps thunderx2t99 performance.
> 
> 2019-11-01 Anton Youdkevitch 
> 
>       * gcc/config/aarch64/aarch64.c (thunderx2t99_vector_cost):

>       change vec_perm field
> 
> ChangeLog nits:
> 
> * Two spaces between name and date+email
> 
> * No gcc/ prefix as the relevant ChangeLog file lives in gcc/
> 
> * End entry with full stop.

Thanks, will do like this next time.

> 
> Anton, do you need someone to commit this for you?

Yes, it would be nice if you can do this for me.


Committed as r277826 with the following adjusted ChangeLog:

2019-11-05  Anton Youdkevitch 

     * config/aarch64/aarch64.c (thunderx2t99_vector_cost):
     Change vec_perm field to 10.

Thanks for the patch. If you intend to make more contributions in the 
future it would be worth sorting a copyright assignment if you haven't 
done so already.

Thanks a lot for pushing it.
I will figure that out.



Kyrill



> 
> Thanks,
> 
> Kyrill
> 
> 
> 
>>

>> Thanks,
>> Andrew Pinski
>>
>>>
>>> 2019-11-01 Anton Youdkevitch 
>>>
>>>   * gcc/config/aarch64/aarch64.c (thunderx2t99_vector_cost):
>>>   change vec_perm field
>>>
>>> --
>>>     Thanks,
>>>     Anton


Re: [PATCH 1/5] Libsanitizer: merge from trunk with merge.sh.

2019-11-05 Thread Jakub Jelinek
On Mon, Nov 04, 2019 at 04:10:27PM +0100, Martin Liska wrote:
> 
> libsanitizer/ChangeLog:
> 
> 2019-11-05  Martin Liska  
> 
>   * all source files: Merge from upstream r375507.
> ---
>  libsanitizer/BlocksRuntime/Block.h|   59 +
>  libsanitizer/BlocksRuntime/Block_private.h|  179 ++

Do we really need this?

> --- a/libsanitizer/tsan/tsan_libdispatch.cpp
> +++ b/libsanitizer/tsan/tsan_interceptors_libdispatch.cpp
> @@ -1,4 +1,4 @@
> -//===-- tsan_libdispatch.cpp 
> --===//
> +//===-- tsan_interceptors_libdispatch.cpp 
> -===//
>  //
>  // Part of the LLVM Project, under the Apache License v2.0 with LLVM 
> Exceptions.
>  // See https://llvm.org/LICENSE.txt for license information.
> @@ -16,6 +16,7 @@
>  #include "tsan_interceptors.h"
>  #include "tsan_rtl.h"
>  
> +#include "BlocksRuntime/Block.h"
>  #include "tsan_dispatch_defs.h"
>  
>  namespace __tsan {

I mean, couldn't we wrap this Block.h include with #ifdef __BLOCKS__ or so
as a local patch (at least for now)?

Otherwise the patch series LGTM.

Jakub



Re: [PATCH v2] PR85678: Change default to -fno-common

2019-11-05 Thread Richard Biener
On Mon, Nov 4, 2019 at 3:39 PM Wilco Dijkstra  wrote:
>
> Hi Richard,
>
> >> > Please don't add -fcommon in lto.exp.
> >>
> >> So what is the best way to add an extra option to lto.exp?
> >> Note dg-lto-options completely overrides the options from lto.exp, so I 
> >> can't
> >> use that except in tests which already use it.
> >
> > On what testcases do you need it at all?
>
> These need it in order to run over the original set of LTO options. A 
> possibility
> would be to select one of the set of options and just run that using 
> dg-lto-options
> (assuming it's safe to use -flto-partition and/or -flinker-plugin on all 
> targets).
>
> PASS->FAIL: g++.dg/lto/odr-6 2 (test for LTO warnings, odr-6_0.C line 3)
> PASS->FAIL: g++.dg/lto/odr-6 2 (test for LTO warnings, odr-6_0.C line 3)
> PASS->FAIL: g++.dg/lto/odr-6 2 (test for LTO warnings, odr-6_1.c line 1)
> PASS->FAIL: g++.dg/lto/odr-6 2 (test for LTO warnings, odr-6_1.c line 1)

Please investigate those - C++ has -fno-common already so it might be a mix
of C/C++ required here.  Note that secondary files can use dg-options
with the same behavior as dg-additional-options (they append to dg-lto-options),
so here in _1.c add { dg-options "-fcommon" }

> PASS->FAIL: g++.dg/lto/odr-6 cp_lto_odr-6_0.o-cp_lto_odr-6_1.o link, -O0 
> -flto -flto-partition=1to1 -fno-use-linker-plugin
> PASS->FAIL: g++.dg/lto/odr-6 cp_lto_odr-6_0.o-cp_lto_odr-6_1.o link, -O0 
> -flto -flto-partition=none -fuse-linker-plugin
> PASS->FAIL: g++.dg/lto/odr-6 cp_lto_odr-6_0.o-cp_lto_odr-6_1.o link, -O0 
> -flto -fuse-linker-plugin -fno-fat-lto-objects
> PASS->FAIL: g++.dg/lto/odr-6 cp_lto_odr-6_0.o-cp_lto_odr-6_1.o link, -O2 
> -flto -flto-partition=1to1 -fno-use-linker-plugin
> PASS->FAIL: g++.dg/lto/odr-6 cp_lto_odr-6_0.o-cp_lto_odr-6_1.o link, -O2 
> -flto -flto-partition=none -fuse-linker-plugin -fno-fat-lto-objects
> PASS->FAIL: g++.dg/lto/odr-6 cp_lto_odr-6_0.o-cp_lto_odr-6_1.o link, -O2 
> -flto -fuse-linker-plugin
>
>
> PASS->FAIL: gcc.dg/lto/pr88077 c_lto_pr88077_0.o-c_lto_pr88077_1.o link, -O0 
> -flto -flto-partition=1to1 -fno-use-linker-plugin
> PASS->FAIL: gcc.dg/lto/pr88077 c_lto_pr88077_0.o-c_lto_pr88077_1.o link, -O0 
> -flto -flto-partition=none -fuse-linker-plugin
> PASS->FAIL: gcc.dg/lto/pr88077 c_lto_pr88077_0.o-c_lto_pr88077_1.o link, -O0 
> -flto -fuse-linker-plugin -fno-fat-lto-objects
> PASS->FAIL: gcc.dg/lto/pr88077 c_lto_pr88077_0.o-c_lto_pr88077_1.o link, -O2 
> -flto -flto-partition=1to1 -fno-use-linker-plugin
> PASS->FAIL: gcc.dg/lto/pr88077 c_lto_pr88077_0.o-c_lto_pr88077_1.o link, -O2 
> -flto -flto-partition=none -fuse-linker-plugin -fno-fat-lto-objects
> PASS->FAIL: gcc.dg/lto/pr88077 c_lto_pr88077_0.o-c_lto_pr88077_1.o link, -O2 
> -flto -fuse-linker-plugin

This is a testcase relying on -fcommon to link (it has two
definitions), I belive you can
add dg-options "-fcommon" to the secondary file here as well, one
COMMON is enough
to make the link work.

Richard.

>
> Wilco


Re: [PATCH] Add if-chain to switch conversion pass.

2019-11-05 Thread Richard Biener
On Mon, Nov 4, 2019 at 3:49 PM Jakub Jelinek  wrote:
>
> On Mon, Nov 04, 2019 at 03:23:20PM +0100, Martin Liška wrote:
> > The patch adds a new pass that identifies a series of if-elseif
> > statements and transform then into a GIMPLE switch (if possible).
> > The pass runs right after tree-ssa pass and I decided to implement
> > matching of various forms that are introduced by folder (fold_range_test):
>
> Not a review, just a few questions:

Likewise - please do not name switches -ftree-*, 'tree' doens't add anything
but confusion to users.  Thus use -fif-to-switch or -fconvert-if-to-switch

+The transformation can help to produce a faster code for
+the switch statement.

produce faster code.

Doesn't it also produce smaller code eventually?

Please to not put code transform passes into build_ssa_passes (why did
you choose this place)?  The pass should go into pass_all_early_optimizations
instead, and I'm quite sure you want to run _after_ CSE.  I'd even say
that the pass should run as part of switch-conversion, so we build
a representation of a switch internally and then code-generate the optimal
form directly.  For now just put the pass before switch-conversion.

There are functions without comments in the patch and you copied
from DSE which shows in confusing comments left over from the original.

+  mark_virtual_operands_for_renaming (cfun);

if you did nothing renaming all vops is expensive.

I'm missing an overall comment - you are using a dominator walk
but do nothing in the after hook which means you are not really
gathering any data?  You're also setting visited bits on BBs which
means you are visiting alternate BBs during the DOM walk.

> 1) what does it do if __builtin_expect* has been used, does it preserve
>the probabilities and if in the end decides to expand as ifs, are those
>probabilities retained through it?
> 2) for the reassoc-*.c testcases, do you get identical or better code
>with the patch?
> 3) shouldn't it be gimple-if-to-switch.c instead?
> 4) what code size effect does the patch have say on cc1plus (if you don't
>count the code changes of the patch itself, i.e. revert the patch in the
>stage3 and rebuild just the stage3)?
>
> > +struct case_range
> > +{
> > +  /* Default constructor.  */
> > +  case_range ():
> > +m_min (NULL_TREE), m_max (NULL_TREE)
>
> I admit I'm never sure about coding conventions for C++,
> but shouldn't there be a space before :, or even better :
> be on the next line before m_min ?
>
> Jakub
>


[PATCH][OBVIOUS] Use more ggc_delete.

2019-11-05 Thread Martin Liška

Hi.

It's a small refactoring patch that I've just tested.

Patch can bootstrap on x86_64-linux-gnu and survives regression tests.

I'm going to install the patch.
Thanks,
Martin

gcc/ChangeLog:

2019-11-05  Martin Liska  

* symbol-summary.h: Use ggc_delete.
---
 gcc/symbol-summary.h | 10 ++
 1 file changed, 2 insertions(+), 8 deletions(-)


diff --git a/gcc/symbol-summary.h b/gcc/symbol-summary.h
index 8aedcfe9143..a5e20e547a1 100644
--- a/gcc/symbol-summary.h
+++ b/gcc/symbol-summary.h
@@ -69,10 +69,7 @@ protected:
   void release (T *item)
   {
 if (is_ggc ())
-  {
-	item->~T ();
-	ggc_free (item);
-  }
+  ggc_delete (item);
 else
   m_allocator.remove (item);
   }
@@ -562,10 +559,7 @@ protected:
   void release (T *item)
   {
 if (is_ggc ())
-  {
-	item->~T ();
-	ggc_free (item);
-  }
+  ggc_delete (item);
 else
   m_allocator.remove (item);
   }



Re: [9/n] Replace vec_info::vector_size with vec_info::vector_mode

2019-11-05 Thread Richard Biener
On Fri, Oct 25, 2019 at 2:39 PM Richard Sandiford
 wrote:
>
> This patch replaces vec_info::vector_size with vec_info::vector_mode,
> but for now continues to use it as a way of specifying a single
> vector size.  This makes it easier for later patches to use
> related_vector_mode instead.

OK.

>
> 2019-10-24  Richard Sandiford  
>
> gcc/
> * tree-vectorizer.h (vec_info::vector_size): Replace with...
> (vec_info::vector_mode): ...this new field.
> * tree-vect-loop.c (vect_update_vf_for_slp): Update accordingly.
> (vect_analyze_loop, vect_transform_loop): Likewise.
> * tree-vect-slp.c (can_duplicate_and_interleave_p): Likewise.
> (vect_make_slp_decision, vect_slp_bb_region): Likewise.
> * tree-vect-stmts.c (get_vectype_for_scalar_type): Likewise.
> * tree-vectorizer.c (try_vectorize_loop_1): Likewise.
>
> gcc/testsuite/
> * gcc.dg/vect/vect-tail-nomask-1.c: Update expected epilogue
> vectorization message.
>
> Index: gcc/tree-vectorizer.h
> ===
> --- gcc/tree-vectorizer.h   2019-10-25 13:26:59.093879082 +0100
> +++ gcc/tree-vectorizer.h   2019-10-25 13:27:19.317736181 +0100
> @@ -329,9 +329,9 @@ typedef std::pair vec_object
>/* Cost data used by the target cost model.  */
>void *target_cost_data;
>
> -  /* The vector size for this loop in bytes, or 0 if we haven't picked
> - a size yet.  */
> -  poly_uint64 vector_size;
> +  /* If we've chosen a vector size for this vectorization region,
> + this is one mode that has such a size, otherwise it is VOIDmode.  */
> +  machine_mode vector_mode;
>
>  private:
>stmt_vec_info new_stmt_vec_info (gimple *stmt);
> Index: gcc/tree-vect-loop.c
> ===
> --- gcc/tree-vect-loop.c2019-10-25 13:27:15.525762975 +0100
> +++ gcc/tree-vect-loop.c2019-10-25 13:27:19.309736237 +0100
> @@ -1414,8 +1414,8 @@ vect_update_vf_for_slp (loop_vec_info lo
> dump_printf_loc (MSG_NOTE, vect_location,
>  "Loop contains SLP and non-SLP stmts\n");
>/* Both the vectorization factor and unroll factor have the form
> -loop_vinfo->vector_size * X for some rational X, so they must have
> -a common multiple.  */
> +GET_MODE_SIZE (loop_vinfo->vector_mode) * X for some rational X,
> +so they must have a common multiple.  */
>vectorization_factor
> = force_common_multiple (vectorization_factor,
>  LOOP_VINFO_SLP_UNROLLING_FACTOR 
> (loop_vinfo));
> @@ -2341,7 +2341,7 @@ vect_analyze_loop (class loop *loop, loo
> " loops cannot be vectorized\n");
>
>unsigned n_stmts = 0;
> -  poly_uint64 autodetected_vector_size = 0;
> +  machine_mode autodetected_vector_mode = VOIDmode;
>opt_loop_vec_info first_loop_vinfo = opt_loop_vec_info::success (NULL);
>machine_mode next_vector_mode = VOIDmode;
>while (1)
> @@ -2357,7 +2357,7 @@ vect_analyze_loop (class loop *loop, loo
>   gcc_checking_assert (first_loop_vinfo == NULL);
>   return loop_vinfo;
> }
> -  loop_vinfo->vector_size = GET_MODE_SIZE (next_vector_mode);
> +  loop_vinfo->vector_mode = next_vector_mode;
>
>bool fatal = false;
>
> @@ -2366,7 +2366,7 @@ vect_analyze_loop (class loop *loop, loo
>
>opt_result res = vect_analyze_loop_2 (loop_vinfo, fatal, &n_stmts);
>if (mode_i == 0)
> -   autodetected_vector_size = loop_vinfo->vector_size;
> +   autodetected_vector_mode = loop_vinfo->vector_mode;
>
>if (res)
> {
> @@ -2401,21 +2401,21 @@ vect_analyze_loop (class loop *loop, loo
>
>if (mode_i < vector_modes.length ()
>   && known_eq (GET_MODE_SIZE (vector_modes[mode_i]),
> -  autodetected_vector_size))
> +  GET_MODE_SIZE (autodetected_vector_mode)))
> mode_i += 1;
>
>if (mode_i == vector_modes.length ()
> - || known_eq (autodetected_vector_size, 0U))
> + || autodetected_vector_mode == VOIDmode)
> {
>   if (first_loop_vinfo)
> {
>   loop->aux = (loop_vec_info) first_loop_vinfo;
>   if (dump_enabled_p ())
> {
> + machine_mode mode = first_loop_vinfo->vector_mode;
>   dump_printf_loc (MSG_NOTE, vect_location,
> -  "* Choosing vector size ");
> - dump_dec (MSG_NOTE, first_loop_vinfo->vector_size);
> - dump_printf (MSG_NOTE, "\n");
> +  "* Choosing vector mode %s\n",
> +  GET_MODE_NAME (mode));
> }
>   return first_loop_vinfo;
> }
> @@ -8238,12 +8238,9 @@ vect_transform_loop (loop_vec_info loop_
>   dump_printf (MSG_NOTE, "\n");
>   

Re: [10/n] Make less use of get_same_sized_vectype

2019-11-05 Thread Richard Biener
On Fri, Oct 25, 2019 at 2:41 PM Richard Sandiford
 wrote:
>
> Some callers of get_same_sized_vectype were dealing with operands that
> are constant or defined externally, and so have no STMT_VINFO_VECTYPE
> available.  Under the current model, using get_same_sized_vectype for
> that case is equivalent to using get_vectype_for_scalar_type, since
> get_vectype_for_scalar_type always returns vectors of the same size,
> once a size is fixed.
>
> Using get_vectype_for_scalar_type is arguably more obvious though:
> if we're using the same scalar type as we would for internal
> definitions, we should use the same vector type too.  (Constant and
> external definitions sometimes let us change the original scalar type
> to a "nicer" scalar type, but that isn't what's happening here.)
>
> This is a prerequisite to supporting multiple vector sizes in the same
> vec_info.

This might change the actual type we get back, IIRC we mass-changed
it in the opposite direction from your change in the past, because it's
more obvious to relate the type used to another vector type on the
stmt.  So isn't it better to use the new related_vector_type thing here?

Richard.

>
> 2019-10-24  Richard Sandiford  
>
> gcc/
> * tree-vect-stmts.c (vectorizable_call): If an operand is
> constant or external, use get_vectype_for_scalar_type
> rather than get_same_sized_vectype to get its vector type.
> (vectorizable_conversion, vectorizable_shift): Likewise.
> (vectorizable_operation): Likewise.
>
> Index: gcc/tree-vect-stmts.c
> ===
> --- gcc/tree-vect-stmts.c   2019-10-25 13:27:19.313736209 +0100
> +++ gcc/tree-vect-stmts.c   2019-10-25 13:27:22.985710263 +0100
> @@ -3308,10 +3308,10 @@ vectorizable_call (stmt_vec_info stmt_in
>   return false;
> }
>  }
> -  /* If all arguments are external or constant defs use a vector type with
> - the same size as the output vector type.  */
> +  /* If all arguments are external or constant defs, infer the vector type
> + from the scalar type.  */
>if (!vectype_in)
> -vectype_in = get_same_sized_vectype (rhs_type, vectype_out);
> +vectype_in = get_vectype_for_scalar_type (vinfo, rhs_type);
>if (vec_stmt)
>  gcc_assert (vectype_in);
>if (!vectype_in)
> @@ -4800,10 +4800,10 @@ vectorizable_conversion (stmt_vec_info s
> }
>  }
>
> -  /* If op0 is an external or constant defs use a vector type of
> - the same size as the output vector type.  */
> +  /* If op0 is an external or constant def, infer the vector type
> + from the scalar type.  */
>if (!vectype_in)
> -vectype_in = get_same_sized_vectype (rhs_type, vectype_out);
> +vectype_in = get_vectype_for_scalar_type (vinfo, rhs_type);
>if (vec_stmt)
>  gcc_assert (vectype_in);
>if (!vectype_in)
> @@ -5564,10 +5564,10 @@ vectorizable_shift (stmt_vec_info stmt_i
>   "use not simple.\n");
>return false;
>  }
> -  /* If op0 is an external or constant def use a vector type with
> - the same size as the output vector type.  */
> +  /* If op0 is an external or constant def, infer the vector type
> + from the scalar type.  */
>if (!vectype)
> -vectype = get_same_sized_vectype (TREE_TYPE (op0), vectype_out);
> +vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (op0));
>if (vec_stmt)
>  gcc_assert (vectype);
>if (!vectype)
> @@ -5666,7 +5666,7 @@ vectorizable_shift (stmt_vec_info stmt_i
>   "vector/vector shift/rotate found.\n");
>
>if (!op1_vectype)
> -   op1_vectype = get_same_sized_vectype (TREE_TYPE (op1), vectype_out);
> +   op1_vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (op1));
>incompatible_op1_vectype_p
> = (op1_vectype == NULL_TREE
>|| maybe_ne (TYPE_VECTOR_SUBPARTS (op1_vectype),
> @@ -5997,8 +5997,8 @@ vectorizable_operation (stmt_vec_info st
>   "use not simple.\n");
>return false;
>  }
> -  /* If op0 is an external or constant def use a vector type with
> - the same size as the output vector type.  */
> +  /* If op0 is an external or constant def, infer the vector type
> + from the scalar type.  */
>if (!vectype)
>  {
>/* For boolean type we cannot determine vectype by
> @@ -6018,7 +6018,7 @@ vectorizable_operation (stmt_vec_info st
>   vectype = vectype_out;
> }
>else
> -   vectype = get_same_sized_vectype (TREE_TYPE (op0), vectype_out);
> +   vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (op0));
>  }
>if (vec_stmt)
>  gcc_assert (vectype);


Re: [PATCH 1/5] Libsanitizer: merge from trunk with merge.sh.

2019-11-05 Thread Martin Liška

On 11/5/19 1:23 PM, Jakub Jelinek wrote:

On Mon, Nov 04, 2019 at 04:10:27PM +0100, Martin Liska wrote:


libsanitizer/ChangeLog:

2019-11-05  Martin Liska  

* all source files: Merge from upstream r375507.
---
  libsanitizer/BlocksRuntime/Block.h|   59 +
  libsanitizer/BlocksRuntime/Block_private.h|  179 ++


Do we really need this?


No, as we do not use tsan_interceptors_libdispatch.cpp file.
Originally I included the file so that I needed libsanitizer/BlocksRuntime/*
files. That is resolved now.

Martin




--- a/libsanitizer/tsan/tsan_libdispatch.cpp
+++ b/libsanitizer/tsan/tsan_interceptors_libdispatch.cpp
@@ -1,4 +1,4 @@
-//===-- tsan_libdispatch.cpp 
--===//
+//===-- tsan_interceptors_libdispatch.cpp 
-===//
  //
  // Part of the LLVM Project, under the Apache License v2.0 with LLVM 
Exceptions.
  // See https://llvm.org/LICENSE.txt for license information.
@@ -16,6 +16,7 @@
  #include "tsan_interceptors.h"
  #include "tsan_rtl.h"
  
+#include "BlocksRuntime/Block.h"

  #include "tsan_dispatch_defs.h"
  
  namespace __tsan {


I mean, couldn't we wrap this Block.h include with #ifdef __BLOCKS__ or so
as a local patch (at least for now)?

Otherwise the patch series LGTM.

Jakub





Re: [11/n] Support vectorisation with mixed vector sizes

2019-11-05 Thread Richard Biener
On Fri, Oct 25, 2019 at 2:43 PM Richard Sandiford
 wrote:
>
> After previous patches, it's now possible to make the vectoriser
> support multiple vector sizes in the same vector region, using
> related_vector_mode to pick the right vector mode for a given
> element mode.  No port yet takes advantage of this, but I have
> a follow-on patch for AArch64.
>
> This patch also seemed like a good opportunity to add some more dump
> messages: one to make it clear which vector size/mode was being used
> when analysis passed or failed, and another to say when we've decided
> to skip a redundant vector size/mode.

OK.

I wonder if, when we requested a specific size previously, we now
have to verify we got that constraint satisfied after the change.
Esp. the epilogue vectorization cases want to get V2DI
from V4DI.

  sz /= 2;
- vectype1 = get_vectype_for_scalar_type_and_size (scalar_type, sz);
+ vectype1 = get_related_vectype_for_scalar_type (TYPE_MODE (vectype),
+ scalar_type,
+ sz / scalar_bytes);

doesn't look like an improvement in readability to me there.  Maybe
re-formulating
the whole code in terms of lanes instead of size would make it easier to follow?

Thanks,
Richard.

>
> 2019-10-24  Richard Sandiford  
>
> gcc/
> * machmode.h (opt_machine_mode::operator==): New function.
> (opt_machine_mode::operator!=): Likewise.
> * tree-vectorizer.h (vec_info::vector_mode): Update comment.
> (get_related_vectype_for_scalar_type): Delete.
> (get_vectype_for_scalar_type_and_size): Declare.
> * tree-vect-slp.c (vect_slp_bb_region): Print dump messages to say
> whether analysis passed or failed, and with what vector modes.
> Use related_vector_mode to check whether trying a particular
> vector mode would be redundant with the autodetected mode,
> and print a dump message if we decide to skip it.
> * tree-vect-loop.c (vect_analyze_loop): Likewise.
> (vect_create_epilog_for_reduction): Use
> get_related_vectype_for_scalar_type instead of
> get_vectype_for_scalar_type_and_size.
> * tree-vect-stmts.c (get_vectype_for_scalar_type_and_size): Replace
> with...
> (get_related_vectype_for_scalar_type): ...this new function.
> Take a starting/"prevailing" vector mode rather than a vector size.
> Take an optional nunits argument, with the same meaning as for
> related_vector_mode.  Use related_vector_mode when not
> auto-detecting a mode, falling back to mode_for_vector if no
> target mode exists.
> (get_vectype_for_scalar_type): Update accordingly.
> (get_same_sized_vectype): Likewise.
> * tree-vectorizer.c (get_vec_alignment_for_array_type): Likewise.
>
> Index: gcc/machmode.h
> ===
> --- gcc/machmode.h  2019-10-25 13:26:59.053879364 +0100
> +++ gcc/machmode.h  2019-10-25 13:27:26.201687539 +0100
> @@ -258,6 +258,9 @@ #define CLASS_HAS_WIDER_MODES_P(CLASS)
>bool exists () const;
>template bool exists (U *) const;
>
> +  bool operator== (const T &m) const { return m_mode == m; }
> +  bool operator!= (const T &m) const { return m_mode != m; }
> +
>  private:
>machine_mode m_mode;
>  };
> Index: gcc/tree-vectorizer.h
> ===
> --- gcc/tree-vectorizer.h   2019-10-25 13:27:19.317736181 +0100
> +++ gcc/tree-vectorizer.h   2019-10-25 13:27:26.209687483 +0100
> @@ -329,8 +329,9 @@ typedef std::pair vec_object
>/* Cost data used by the target cost model.  */
>void *target_cost_data;
>
> -  /* If we've chosen a vector size for this vectorization region,
> - this is one mode that has such a size, otherwise it is VOIDmode.  */
> +  /* The argument we should pass to related_vector_mode when looking up
> + the vector mode for a scalar mode, or VOIDmode if we haven't yet
> + made any decisions about which vector modes to use.  */
>machine_mode vector_mode;
>
>  private:
> @@ -1595,8 +1596,9 @@ extern dump_user_location_t find_loop_lo
>  extern bool vect_can_advance_ivs_p (loop_vec_info);
>
>  /* In tree-vect-stmts.c.  */
> +extern tree get_related_vectype_for_scalar_type (machine_mode, tree,
> +poly_uint64 = 0);
>  extern tree get_vectype_for_scalar_type (vec_info *, tree);
> -extern tree get_vectype_for_scalar_type_and_size (tree, poly_uint64);
>  extern tree get_mask_type_for_scalar_type (vec_info *, tree);
>  extern tree get_same_sized_vectype (tree, tree);
>  extern bool vect_get_loop_mask_type (loop_vec_info);
> Index: gcc/tree-vect-slp.c
> ===
> --- gcc/tree-vect-slp.c 2019-10-25 13:27:19.313736209 +0100
> +++ gcc/tree-vect-sl

Re: [13/n] Allow mixed vector sizes within a single vectorised stmt

2019-11-05 Thread Richard Biener
On Fri, Oct 25, 2019 at 2:49 PM Richard Sandiford
 wrote:
>
> Although a previous patch allowed mixed vector sizes within a vector
> region, we generally still required equal vector sizes within a vector
> stmt.  Specifically, vect_get_vector_types_for_stmt computes two vector
> types: the vector type corresponding to STMT_VINFO_VECTYPE and the
> vector type that determines the minimum vectorisation factor for the
> stmt ("nunits_vectype").  It then required these two types to be
> the same size.
>
> There doesn't seem to be any need for that restriction though.  AFAICT,
> all vectorizable_* functions either do their own compatibility checks
> or don't need to do them (because gimple guarantees that the scalar
> types are compatible).
>
> It should always be the case that nunits_vectype has at least as many
> elements as the other vectype, but that's something we can assert for.
>
> I couldn't resist a couple of other tweaks while there:
>
> - there's no need to compute nunits_vectype if its element type is
>   the same as STMT_VINFO_VECTYPE's.
>
> - it's useful to distinguish the nunits_vectype from the main vectype
>   in dump messages
>
> - when reusing the existing STMT_VINFO_VECTYPE, it's useful to say so
>   in the dump, and say what the type is

OK.

Thanks,
Richard.

>
> 2019-10-24  Richard Sandiford  
>
> gcc/
> * tree-vect-stmts.c (vect_get_vector_types_for_stmt): Don't
> require vectype and nunits_vectype to have the same size;
> instead assert that nunits_vectype has at least as many
> elements as vectype.  Don't compute a separate nunits_vectype
> if the scalar type is obviously the same as vectype's.
> Tweak dump messages.
>
> Index: gcc/tree-vect-stmts.c
> ===
> --- gcc/tree-vect-stmts.c   2019-10-25 13:27:26.205687511 +0100
> +++ gcc/tree-vect-stmts.c   2019-10-25 13:27:32.877640367 +0100
> @@ -11973,7 +11973,12 @@ vect_get_vector_types_for_stmt (stmt_vec
>tree vectype;
>tree scalar_type = NULL_TREE;
>if (STMT_VINFO_VECTYPE (stmt_info))
> -*stmt_vectype_out = vectype = STMT_VINFO_VECTYPE (stmt_info);
> +{
> +  *stmt_vectype_out = vectype = STMT_VINFO_VECTYPE (stmt_info);
> +  if (dump_enabled_p ())
> +   dump_printf_loc (MSG_NOTE, vect_location,
> +"precomputed vectype: %T\n", vectype);
> +}
>else
>  {
>gcc_assert (!STMT_VINFO_DATA_REF (stmt_info));
> @@ -12005,7 +12010,7 @@ vect_get_vector_types_for_stmt (stmt_vec
>
>if (dump_enabled_p ())
> dump_printf_loc (MSG_NOTE, vect_location,
> -"get vectype for scalar type:  %T\n", scalar_type);
> +"get vectype for scalar type: %T\n", scalar_type);
>vectype = get_vectype_for_scalar_type (vinfo, scalar_type);
>if (!vectype)
> return opt_result::failure_at (stmt,
> @@ -12022,42 +12027,38 @@ vect_get_vector_types_for_stmt (stmt_vec
>
>/* Don't try to compute scalar types if the stmt produces a boolean
>   vector; use the existing vector type instead.  */
> -  tree nunits_vectype;
> -  if (VECTOR_BOOLEAN_TYPE_P (vectype))
> -nunits_vectype = vectype;
> -  else
> +  tree nunits_vectype = vectype;
> +  if (!VECTOR_BOOLEAN_TYPE_P (vectype)
> +  && *stmt_vectype_out != boolean_type_node)
>  {
>/* The number of units is set according to the smallest scalar
>  type (or the largest vector size, but we only support one
>  vector size per vectorization).  */
> -  if (*stmt_vectype_out != boolean_type_node)
> +  HOST_WIDE_INT dummy;
> +  scalar_type = vect_get_smallest_scalar_type (stmt_info, &dummy, 
> &dummy);
> +  if (scalar_type != TREE_TYPE (vectype))
> {
> - HOST_WIDE_INT dummy;
> - scalar_type = vect_get_smallest_scalar_type (stmt_info,
> -  &dummy, &dummy);
> + if (dump_enabled_p ())
> +   dump_printf_loc (MSG_NOTE, vect_location,
> +"get vectype for smallest scalar type: %T\n",
> +scalar_type);
> + nunits_vectype = get_vectype_for_scalar_type (vinfo, scalar_type);
> + if (!nunits_vectype)
> +   return opt_result::failure_at
> + (stmt, "not vectorized: unsupported data-type %T\n",
> +  scalar_type);
> + if (dump_enabled_p ())
> +   dump_printf_loc (MSG_NOTE, vect_location, "nunits vectype: %T\n",
> +nunits_vectype);
> }
> -  if (dump_enabled_p ())
> -   dump_printf_loc (MSG_NOTE, vect_location,
> -"get vectype for scalar type:  %T\n", scalar_type);
> -  nunits_vectype = get_vectype_for_scalar_type (vinfo, scalar_type);
>  }
> -  if (!nunits_vectype)
> -return opt_result::failure_at (stmt,
> -  "not 

Re: [14/n] Vectorise conversions between differently-sized integer vectors

2019-11-05 Thread Richard Biener
On Fri, Oct 25, 2019 at 2:51 PM Richard Sandiford
 wrote:
>
> This patch adds AArch64 patterns for converting between 64-bit and
> 128-bit integer vectors, and makes the vectoriser and expand pass
> use them.

So on GIMPLE we'll see

v4si _1;
v4di _2;

 _1 = (v4si) _2;

then, correct?  Likewise for float conversions.

I think that's "new", can you add to tree-cfg.c:verify_gimple_assign_unary
verification that the number of lanes of the LHS and the RHS match please?

OK with that change.
Thanks,
Richard.

>
> 2019-10-24  Richard Sandiford  
>
> gcc/
> * tree-vect-stmts.c (vectorizable_conversion): Extend the
> non-widening and non-narrowing path to handle standard
> conversion codes, if the target supports them.
> * expr.c (convert_move): Try using the extend and truncate optabs
> for vectors.
> * optabs-tree.c (supportable_convert_operation): Likewise.
> * config/aarch64/iterators.md (Vnarroqw): New iterator.
> * config/aarch64/aarch64-simd.md (2)
> (trunc2): New patterns.
>
> gcc/testsuite/
> * gcc.dg/vect/no-scevccp-outer-12.c: Expect the test to pass
> on aarch64 targets.
> * gcc.dg/vect/vect-double-reduc-5.c: Likewise.
> * gcc.dg/vect/vect-outer-4e.c: Likewise.
> * gcc.target/aarch64/vect_mixed_sizes_5.c: New test.
> * gcc.target/aarch64/vect_mixed_sizes_6.c: Likewise.
> * gcc.target/aarch64/vect_mixed_sizes_7.c: Likewise.
> * gcc.target/aarch64/vect_mixed_sizes_8.c: Likewise.
> * gcc.target/aarch64/vect_mixed_sizes_9.c: Likewise.
> * gcc.target/aarch64/vect_mixed_sizes_10.c: Likewise.
> * gcc.target/aarch64/vect_mixed_sizes_11.c: Likewise.
> * gcc.target/aarch64/vect_mixed_sizes_12.c: Likewise.
> * gcc.target/aarch64/vect_mixed_sizes_13.c: Likewise.
>
> Index: gcc/tree-vect-stmts.c
> ===
> --- gcc/tree-vect-stmts.c   2019-10-25 13:27:32.877640367 +0100
> +++ gcc/tree-vect-stmts.c   2019-10-25 13:27:36.197616908 +0100
> @@ -4861,7 +4861,9 @@ vectorizable_conversion (stmt_vec_info s
>switch (modifier)
>  {
>  case NONE:
> -  if (code != FIX_TRUNC_EXPR && code != FLOAT_EXPR)
> +  if (code != FIX_TRUNC_EXPR
> + && code != FLOAT_EXPR
> + && !CONVERT_EXPR_CODE_P (code))
> return false;
>if (supportable_convert_operation (code, vectype_out, vectype_in,
>  &decl1, &code1))
> Index: gcc/expr.c
> ===
> --- gcc/expr.c  2019-10-22 08:46:57.359355939 +0100
> +++ gcc/expr.c  2019-10-25 13:27:36.193616936 +0100
> @@ -250,6 +250,31 @@ convert_move (rtx to, rtx from, int unsi
>
>if (VECTOR_MODE_P (to_mode) || VECTOR_MODE_P (from_mode))
>  {
> +  if (GET_MODE_UNIT_PRECISION (to_mode)
> + > GET_MODE_UNIT_PRECISION (from_mode))
> +   {
> + optab op = unsignedp ? zext_optab : sext_optab;
> + insn_code icode = convert_optab_handler (op, to_mode, from_mode);
> + if (icode != CODE_FOR_nothing)
> +   {
> + emit_unop_insn (icode, to, from,
> + unsignedp ? ZERO_EXTEND : SIGN_EXTEND);
> + return;
> +   }
> +   }
> +
> +  if (GET_MODE_UNIT_PRECISION (to_mode)
> + < GET_MODE_UNIT_PRECISION (from_mode))
> +   {
> + insn_code icode = convert_optab_handler (trunc_optab,
> +  to_mode, from_mode);
> + if (icode != CODE_FOR_nothing)
> +   {
> + emit_unop_insn (icode, to, from, TRUNCATE);
> + return;
> +   }
> +   }
> +
>gcc_assert (known_eq (GET_MODE_BITSIZE (from_mode),
> GET_MODE_BITSIZE (to_mode)));
>
> Index: gcc/optabs-tree.c
> ===
> --- gcc/optabs-tree.c   2019-10-08 09:23:31.894529571 +0100
> +++ gcc/optabs-tree.c   2019-10-25 13:27:36.193616936 +0100
> @@ -303,6 +303,20 @@ supportable_convert_operation (enum tree
>return true;
>  }
>
> +  if (GET_MODE_UNIT_PRECISION (m1) > GET_MODE_UNIT_PRECISION (m2)
> +  && can_extend_p (m1, m2, TYPE_UNSIGNED (vectype_in)))
> +{
> +  *code1 = code;
> +  return true;
> +}
> +
> +  if (GET_MODE_UNIT_PRECISION (m1) < GET_MODE_UNIT_PRECISION (m2)
> +  && convert_optab_handler (trunc_optab, m1, m2) != CODE_FOR_nothing)
> +{
> +  *code1 = code;
> +  return true;
> +}
> +
>/* Now check for builtin.  */
>if (targetm.vectorize.builtin_conversion
>&& targetm.vectorize.builtin_conversion (code, vectype_out, 
> vectype_in))
> Index: gcc/config/aarch64/iterators.md
> ===
> --- gcc/config/aarch64/iterators.md 2019-10-17 14:23:07.71142 +

Re: PR92163

2019-11-05 Thread Christophe Lyon
On Tue, 5 Nov 2019 at 05:46, Prathamesh Kulkarni
 wrote:
>
> On Mon, 4 Nov 2019 at 18:37, Christophe Lyon  
> wrote:
> >
> > On Mon, 28 Oct 2019 at 16:03, Prathamesh Kulkarni
> >  wrote:
> > >
> > > On Mon, 28 Oct 2019 at 07:18, Richard Biener  
> > > wrote:
> > > >
> > > > On Fri, Oct 25, 2019 at 9:58 PM Prathamesh Kulkarni
> > > >  wrote:
> > > > >
> > > > > On Fri, 25 Oct 2019 at 13:19, Richard Biener 
> > > > >  wrote:
> > > > > >
> > > > > > On Wed, Oct 23, 2019 at 11:45 PM Prathamesh Kulkarni
> > > > > >  wrote:
> > > > > > >
> > > > > > > Hi,
> > > > > > > The attached patch tries to fix PR92163 by calling
> > > > > > > gimple_purge_dead_eh_edges from ifcvt_local_dce if we need eh 
> > > > > > > cleanup.
> > > > > > > Does it look OK ?
> > > > > >
> > > > > > Hmm.  I think it shows an issue with the return value of 
> > > > > > remove_stmt_form_eh_lp
> > > > > > which is true if the LP index is -1 (externally throwing).  We don't
> > > > > > need to purge
> > > > > > any edges in that case.  That is, if-conversion should never need to
> > > > > > do EH purging
> > > > > > since that would be wrong-code.
> > > > > >
> > > > > > As of the segfault can you please instead either pass down 
> > > > > > need_eh_cleanup
> > > > > > as function parameter (and NULL from ifcvt) or use the return value 
> > > > > > in DSE
> > > > > > to set the bit in the caller.
> > > > > Hi Richard,
> > > > > Thanks for the suggestions, does the attached patch look OK ?
> > > > > Bootstrap+test in progress on x86_64-unknown-linux-gnu.
> > > >
> > > > OK.
> > > Thanks, committed to trunk in r277525 after bootstrap+test on
> > > x86_64-unknown-linux-gnu.
> > >
> >
> > Hi Prathamesh,
> >
> > There's a problem with the new test you added: if uses -fopenacc which
> > is not supported by arm-eabi or aarch64-elf targets for instance.
> > You probably want to move the test to gcc.dg/goacc or add
> > dg-require-effective-target fopenacc.
> Oops, sorry about that. Could you please confirm if attached patch
> fixes the issue ?
> I added dg-require-effective-target fopenacc.
>

Yes that works. Maybe you can commit it as obvious?

Thanks,

Christophe

> Thanks,
> Prathamesh
> >
> > Thanks,
> >
> > Christophe
> >
> > > Thanks,
> > > Prathamesh
> > > >
> > > > Richard.
> > > >
> > > > > Thanks,
> > > > > Prathamesh
> > > > > >
> > > > > > Thanks,
> > > > > > Richard.
> > > > > >
> > > > > > > Thanks,
> > > > > > > Prathamesh


Re: [15/n] Consider building nodes from scalars in vect_slp_analyze_node_operations

2019-11-05 Thread Richard Biener
On Tue, Oct 29, 2019 at 6:04 PM Richard Sandiford
 wrote:
>
> If the statements in an SLP node aren't similar enough to be vectorised,
> or aren't something the vectoriser has code to handle, the BB vectoriser
> tries building the vector from scalars instead.  This patch does the
> same thing if we're able to build a viable-looking tree but fail later
> during the analysis phase, e.g. because the target doesn't support a
> particular vector operation.
>
> This is needed to avoid regressions with a later patch.

OK.

Thanks,
Richard.

>
> 2019-10-29  Richard Sandiford  
>
> gcc/
> * tree-vect-slp.c (vect_contains_pattern_stmt_p): New function.
> (vect_slp_convert_to_external): Likewise.
> (vect_slp_analyze_node_operations): If analysis fails, try building
> the node from scalars instead.
>
> gcc/testsuite/
> * gcc.dg/vect/bb-slp-div-2.c: New test.
>
> Index: gcc/tree-vect-slp.c
> ===
> --- gcc/tree-vect-slp.c 2019-10-29 17:01:46.0 +
> +++ gcc/tree-vect-slp.c 2019-10-29 17:02:06.355512105 +
> @@ -225,6 +225,19 @@ vect_free_oprnd_info (vec  }
>
>
> +/* Return true if STMTS contains a pattern statement.  */
> +
> +static bool
> +vect_contains_pattern_stmt_p (vec stmts)
> +{
> +  stmt_vec_info stmt_info;
> +  unsigned int i;
> +  FOR_EACH_VEC_ELT (stmts, i, stmt_info)
> +if (is_pattern_stmt_p (stmt_info))
> +  return true;
> +  return false;
> +}
> +
>  /* Find the place of the data-ref in STMT_INFO in the interleaving chain
> that starts from FIRST_STMT_INFO.  Return -1 if the data-ref is not a part
> of the chain.  */
> @@ -2630,6 +2643,39 @@ vect_slp_analyze_node_operations_1 (vec_
>return vect_analyze_stmt (stmt_info, &dummy, node, node_instance, 
> cost_vec);
>  }
>
> +/* Try to build NODE from scalars, returning true on success.
> +   NODE_INSTANCE is the SLP instance that contains NODE.  */
> +
> +static bool
> +vect_slp_convert_to_external (vec_info *vinfo, slp_tree node,
> + slp_instance node_instance)
> +{
> +  stmt_vec_info stmt_info;
> +  unsigned int i;
> +
> +  if (!is_a  (vinfo)
> +  || node == SLP_INSTANCE_TREE (node_instance)
> +  || vect_contains_pattern_stmt_p (SLP_TREE_SCALAR_STMTS (node)))
> +return false;
> +
> +  if (dump_enabled_p ())
> +dump_printf_loc (MSG_NOTE, vect_location,
> +"Building vector operands from scalars instead\n");
> +
> +  /* Don't remove and free the child nodes here, since they could be
> + referenced by other structures.  The analysis and scheduling phases
> + (need to) ignore child nodes of anything that isn't vect_internal_def.  
> */
> +  unsigned int group_size = SLP_TREE_SCALAR_STMTS (node).length ();
> +  SLP_TREE_DEF_TYPE (node) = vect_external_def;
> +  SLP_TREE_SCALAR_OPS (node).safe_grow (group_size);
> +  FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt_info)
> +{
> +  tree lhs = gimple_get_lhs (vect_orig_stmt (stmt_info)->stmt);
> +  SLP_TREE_SCALAR_OPS (node)[i] = lhs;
> +}
> +  return true;
> +}
> +
>  /* Analyze statements contained in SLP tree NODE after recursively analyzing
> the subtree.  NODE_INSTANCE contains NODE and VINFO contains INSTANCE.
>
> @@ -2656,6 +2702,13 @@ vect_slp_analyze_node_operations (vec_in
>  {
>SLP_TREE_NUMBER_OF_VEC_STMTS (node)
> = SLP_TREE_NUMBER_OF_VEC_STMTS (*leader);
> +  /* Cope with cases in which we made a late decision to build the
> +node from scalars.  */
> +  if (SLP_TREE_DEF_TYPE (*leader) == vect_external_def
> + && vect_slp_convert_to_external (vinfo, node, node_instance))
> +   ;
> +  else
> +   gcc_assert (SLP_TREE_DEF_TYPE (node) == SLP_TREE_DEF_TYPE (*leader));
>return true;
>  }
>
> @@ -2715,6 +2768,11 @@ vect_slp_analyze_node_operations (vec_in
>  if (SLP_TREE_SCALAR_STMTS (child).length () != 0)
>STMT_VINFO_DEF_TYPE (SLP_TREE_SCALAR_STMTS (child)[0]) = dt[j];
>
> +  /* If this node can't be vectorized, try pruning the tree here rather
> + than felling the whole thing.  */
> +  if (!res && vect_slp_convert_to_external (vinfo, node, node_instance))
> +res = true;
> +
>return res;
>  }
>
> Index: gcc/testsuite/gcc.dg/vect/bb-slp-div-2.c
> ===
> --- /dev/null   2019-09-17 11:41:18.176664108 +0100
> +++ gcc/testsuite/gcc.dg/vect/bb-slp-div-2.c2019-10-29 17:02:06.351512133 
> +
> @@ -0,0 +1,14 @@
> +/* { dg-do compile } */
> +
> +int x[4], y[4], z[4];
> +
> +void
> +f (void)
> +{
> +  x[0] += y[0] / z[0] * 2;
> +  x[1] += y[1] / z[1] * 2;
> +  x[2] += y[2] / z[2] * 2;
> +  x[3] += y[3] / z[3] * 2;
> +}
> +
> +/* { dg-final { scan-tree-dump "basic block vectorized" "slp2" { target 
> vect_int } } } */


Re: [PATCH 13/X] [libsanitizer][options] Add hwasan flags and argument parsing

2019-11-05 Thread Andrey Konovalov via gcc-patches
On Tue, Nov 5, 2019 at 12:34 PM Matthew Malcomson
 wrote:
>
> These flags can't be used at the same time as any of the other
> sanitizers.
> We add an equivalent flag to -static-libasan in -static-libhwasan to
> ensure static linking.
>
> The -fsanitize=kernel-hwaddress option is for compiling targeting the
> kernel.  This flag has defaults that allow compiling KASAN with tags as
> it is currently implemented.
> These defaults are that we do not sanitize variables on the stack and
> always recover from a detected bug.
> Stack tagging in the kernel is a future aim, stack instrumentation has
> not yet been enabled for the kernel for clang either
> (https://lists.infradead.org/pipermail/linux-arm-kernel/2019-October/687121.html).
>
> We introduce a backend hook `targetm.memtag.can_tag_addresses` that
> indicates to the mid-end whether a target has a feature like AArch64 TBI
> where the top byte of an address is ignored.
> Without this feature hwasan sanitization is not done.
>
> NOTE:
> --
> I have defined a new macro of __SANITIZE_HWADDRESS__ that gets
> automatically defined when compiling with hwasan.  This is analogous to
> __SANITIZE_ADDRESS__ which is defined when compiling with asan.
>
> Users in the kernel have expressed an interest in using
> __SANITIZE_ADDRESS__ for both
> (https://lists.infradead.org/pipermail/linux-arm-kernel/2019-October/690703.html).
>
> One approach to do this could be to define __SANITIZE_ADDRESS__ with
> different values depending on whether we are compiling with hwasan or
> asan.
>
> Using __SANITIZE_ADDRESS__ for both means that code like the kernel
> which wants to treat the two sanitizers as alternate implementations of
> the same thing gets that automatically.
>
> My preference is to use __SANITIZE_HWADDRESS__ since that means any
> existing code will not be predicated on this (and hence I guess less
> surprises), but would appreciate feedback on this given the point above.

+Evgenii Stepanov

(A repost from my answer from the mentioned thread):

> Similarly, I'm thinking I'll add no_sanitize_hwaddress as the hwasan
> equivalent of no_sanitize_address, which will require an update in the
> kernel given it seems you want KASAN to be used the same whether using
> tags or not.

We have intentionally reused the same macros to simplify things. Is
there any reason to use separate macros for GCC? Are there places
where we need to use specifically no_sanitize_hwaddress and
__SANITIZE_HWADDRESS__, but not no_sanitize_address and
__SANITIZE_ADDRESS__?


> --
>
> gcc/ChangeLog:
>
> 2019-11-05  Matthew Malcomson  
>
> * asan.c (memory_tagging_p): New.
> * asan.h (memory_tagging_p): New.
> * common.opt (flag_sanitize_recover): Default for kernel
> hwaddress.
> (static-libhwasan): New cli option.
> * config/aarch64/aarch64.c (aarch64_can_tag_addresses): New.
> (TARGET_MEMTAG_CAN_TAG_ADDRESSES): New.
> * config/gnu-user.h (LIBHWASAN_EARLY_SPEC): hwasan equivalent of
> asan command line flags.
> * cppbuiltin.c (define_builtin_macros_for_compilation_flags):
> Add hwasan equivalent of __SANITIZE_ADDRESS__.
> * doc/tm.texi: Document new hook.
> * doc/tm.texi.in: Document new hook.
> * flag-types.h (enum sanitize_code): New sanitizer values.
> * gcc.c (STATIC_LIBHWASAN_LIBS): New macro.
> (LIBHWASAN_SPEC): New macro.
> (LIBHWASAN_EARLY_SPEC): New macro.
> (SANITIZER_EARLY_SPEC): Update to include hwasan.
> (SANITIZER_SPEC): Update to include hwasan.
> (sanitize_spec_function): Use hwasan options.
> * opts.c (finish_options): Describe conflicts between address
> sanitizers.
> (sanitizer_opts): Introduce new sanitizer flags.
> (common_handle_option): Add defaults for kernel sanitizer.
> * params.def (PARAM_HWASAN_RANDOM_FRAME_TAG): New.
> (PARAM_HWASAN_STACK): New.
> * params.h (HWASAN_STACK): New.
> (HWASAN_RANDOM_FRAME_TAG): New.
> * target.def (HOOK_PREFIX): Add new hook.
> * targhooks.c (default_memtag_can_tag_addresses): New.
> * toplev.c (process_options): Ensure hwasan only on TBI
> architectures.
>
> gcc/c-family/ChangeLog:
>
> 2019-11-05  Matthew Malcomson  
>
> * c-attribs.c (handle_no_sanitize_hwaddress_attribute): New
> attribute.
>
>
>
> ### Attachment also inlined for ease of reply
> ###
>
>
> diff --git a/gcc/c-family/c-attribs.c b/gcc/c-family/c-attribs.c
> index 
> 6500b998321419a1d8d57062534206c5909adb7a..2de94815f91da5a0fd06c30d0044f866084121b8
>  100644
> --- a/gcc/c-family/c-attribs.c
> +++ b/gcc/c-family/c-attribs.c
> @@ -54,6 +54,8 @@ static tree handle_cold_attribute (tree *, tree, tree, int, 
> bool *);
>  static tree handle_no_sanitize_attribute (tree *, tree, tree, int, bool *);
>  static tree handle_no_sanitize_address_attribute (tree *, tree, tree,
>

make range_int_cst_p work with any numeric range (VR_ANTI_RANGE, etc)

2019-11-05 Thread Aldy Hernandez
The function range_int_cst_p only works with VR_RANGE's at the moment. 
This is silly because VR_ANTI_RANGE and even VR_VARYING can contain 
numeric bounds.  I have fixed this oversight and have made the function 
return the bounds in MIN/MAX.  This simplifies a lot of code, because 
there is no longer a need to special case VR_VARYING and VR_ANTI_RANGE, 
as well as pick at the individual range components outside of the API.


The patch has the pleasant side-effect of bringing more things into the 
API fold.  Basically, any access to either value_range::min(), max(), or 
kind(), is suspect and a big hint that the code should be rewritten to 
use the API (contains_p, varying_p, zero_p, etc).


One of the primary culprits of API noncompliance is the sprintf and 
strlen warning code.  Mind you, not due to negligence on the author, but 
because we had no value-range API when Martin added the passes.  I 
realize it's nobody's responsibility to fix older value-range code, and 
I'll probably end up doing it myself (next cycle??), but I could 
definitely use a hand from the experts, as it's intricate and delicate code.


Speak of which, in converting dump_strlen_info() to use the new 
range_int_cst_p, I noticed a lot of the code disappeared if we used the 
API.  Martin, if you'd prefer not to dump varying, undefined, etc, let 
me know and we can gate that call to vr.dump().  I took the liberty 
because it was simple, clean, and hidden away in an internal debugging 
helper.


OK for trunk?
commit cba4b59ef2e0e6821d63cfa959d201f22534eb69
Author: Aldy Hernandez 
Date:   Tue Nov 5 10:54:22 2019 +0100

Make range_int_cst_p work with any numeric range, not just VR_RANGE.
This includes VR_ANTI_RANGE as well as VR_VARYING, as they can also
have numeric bounds.  Add two new arguments to return the MIN/MAX
bounds.

Remove the now redundant range_has_numeric_bounds_p.

diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index 562b69d1aab..5f12d166176 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -48,6 +48,30 @@
 	* fold-const.c (operand_compare::hash_operand): Remove
 	FIELD_DECL handling.
 
+2019-11-05  Aldy Hernandez  
+
+	* gimple-ssa-sprintf.c (get_int_range): Call range_int_cst_p with
+	min/max arguments.
+	(format_integer): Same.
+	(handle_printf_call): Same.
+	* tree-ssa-strlen.c (compare_nonzero_chars): Same.
+	(dump_strlen_info): Same.
+	(get_range_strlen_dynamic): Same.
+	(count_nonzero_bytes): Same.
+	* tree-vrp.c (range_has_numeric_bounds_p): Remove.
+	(extract_range_from_pointer_plus_expr): Call range_int_cst_p with
+	min/max arguments.
+	(value_range_base::normalize_addresses): Use range_int_cst_p
+	instead of removed range_has_numeric_bounds_p.
+	(range_int_cst_p): New MIN/MAX arguments.
+	* tree-vrp.h (range_int_cst_p): Add two new arguments.
+	* vr-values.c (r_values::check_for_binary_op_overflow): Call
+	range_int_cst_p with min/max arguments.
+	(vr_values::simplify_div_or_mod_using_ranges): Same.
+	(vr_set_zero_nonzero_bits): Same.
+	(range_fits_type_p): Use value_range_base::supports_type_p instead
+	of open-coding the test.
+
 2019-11-05  Aldy Hernandez  
 
 	* tree-vrp.h (vrp_bitmap_equal_p): Remove.
diff --git a/gcc/gimple-ssa-sprintf.c b/gcc/gimple-ssa-sprintf.c
index b548bbd95e3..0029cfc258c 100644
--- a/gcc/gimple-ssa-sprintf.c
+++ b/gcc/gimple-ssa-sprintf.c
@@ -1023,18 +1023,12 @@ get_int_range (tree arg, HOST_WIDE_INT *pmin, HOST_WIDE_INT *pmax,
 	  /* Try to determine the range of values of the integer argument.  */
 	  const value_range *vr
 	= CONST_CAST (class vr_values *, vr_values)->get_value_range (arg);
+	  tree min, max;
 
-	  if (range_int_cst_p (vr))
+	  if (!vr->varying_p () && range_int_cst_p (vr, &min, &max))
 	{
-	  HOST_WIDE_INT type_min
-		= (TYPE_UNSIGNED (argtype)
-		   ? tree_to_uhwi (TYPE_MIN_VALUE (argtype))
-		   : tree_to_shwi (TYPE_MIN_VALUE (argtype)));
-
-	  HOST_WIDE_INT type_max = tree_to_uhwi (TYPE_MAX_VALUE (argtype));
-
-	  *pmin = TREE_INT_CST_LOW (vr->min ());
-	  *pmax = TREE_INT_CST_LOW (vr->max ());
+	  *pmin = TREE_INT_CST_LOW (min);
+	  *pmax = TREE_INT_CST_LOW (max);
 
 	  if (*pmin < *pmax)
 		{
@@ -1044,8 +1038,12 @@ get_int_range (tree arg, HOST_WIDE_INT *pmin, HOST_WIDE_INT *pmax,
 		 and its upper bound is in excess of TYPE_MAX.  In
 		 that (invalid) case disregard the range and use that
 		 of the expected type instead.  */
+		  HOST_WIDE_INT type_min
+		= (TYPE_UNSIGNED (argtype)
+		   ? tree_to_uhwi (TYPE_MIN_VALUE (argtype))
+		   : tree_to_shwi (TYPE_MIN_VALUE (argtype)));
+		  HOST_WIDE_INT type_max = tree_to_uhwi (TYPE_MAX_VALUE (argtype));
 		  knownrange = type_min < *pmin || *pmax < type_max;
-
 		  unknown = false;
 		}
 	}
@@ -1326,11 +1324,8 @@ format_integer (const directive &dir, tree arg, const vr_values *vr_values)
   const value_range *vr
 	= CONST_CAST (class vr_values *, vr_values)->get_value_range (arg);
 
-  if (range_int_cst_p (vr))
+  

Re: [PATCH 1/2] [ARM,testsuite] Skip tests incompatible with -mpure-code

2019-11-05 Thread Christophe Lyon
On Mon, 4 Nov 2019 at 17:54, Kyrill Tkachov  wrote:
>
> Hi Christophe,
>
> On 10/18/19 2:18 PM, Christophe Lyon wrote:
> > Hi,
> >
> > All these tests fail when using -mpure-code:
> > * some force A or R profile
> > * some use Neon
> > * some use -fpic/-fPIC
> > all of which are not supported by this option.
> >
> > OK?
>
>
> Hmm... I'm tempted to ask if it would be simpler to add a check for
> -mpure-code in the effective target checks for the above features but a
> lot of those tests don't use effective target checks consistently anyway...
>
> So this is ok, though I'm not a big fan of adding so many dg-skip-if
> directives.
>
I'm not a big fan either, that's why I asked
https://gcc.gnu.org/ml/gcc-patches/2019-10/msg01281.html
but finally decided it was easier for me to go with the dg-skip-if approach.

I supposed that all the existing dg-skip-if related to float-abi are used
by the Arm team when running various types of validations?

Committed as r277828.

Thanks

Christophe

> Thanks,
>
> Kyrill
>
>
> >
> > Thanks,
> >
> > Christophe


Re: [16/n] Apply maximum nunits for BB SLP

2019-11-05 Thread Richard Biener
On Tue, Oct 29, 2019 at 6:05 PM Richard Sandiford
 wrote:
>
> The BB vectoriser picked vector types in the same way as the loop
> vectoriser: it picked a vector mode/size for the region and then
> based all the vector types off that choice.  This meant we could
> end up trying to use vector types that had too many elements for
> the group size.
>
> The main part of this patch is therefore about passing the SLP
> group size down to routines like get_vectype_for_scalar_type and
> ensuring that each vector type in the SLP tree is chosen wrt the
> group size.  That part in itself is pretty easy and mechanical.
>
> The main warts are:
>
> (1) We normally pick a STMT_VINFO_VECTYPE for data references at an
> early stage (vect_analyze_data_refs).  However, nothing in the
> BB vectoriser relied on this, or on the min_vf calculated from it.
> I couldn't see anything other than vect_recog_bool_pattern that
> tried to access the vector type before the SLP tree is built.

So can you not set STMT_VINFO_VECTYPE for data refs with BB vectorization
then?

> (2) It's possible for the same statement to be used in the groups of
> different sizes.  Taking the group size into account meant that
> we could try to pick different vector types for the same statement.

That only happens when we have multiple SLP instances though
(entries into the shared SLP graph).  It probably makes sense to
keep handling SLP instances sharing stmts together for costing
reasons but one issue is that for disjunct pieces (in the same BB)
disqualifying one cost-wise disqualifies all.  So at some point
during analysis (which should eventually cover more than a single
BB) we want to split the graph.  It probably doesn't help the above
case.

> This problem should go away with the move to doing everything on
> SLP trees, where presumably we would attach the vector type to the
> SLP node rather than the stmt_vec_info.  Until then, the patch just
> uses a first-come, first-served approach.

Yeah, I ran into not having vectype on SLP trees with invariants/externals
as well.  I suppose you didn't try simply adding that to the SLP tree
and pushing/popping it like we push/pop the def type?

Assigning the vector types should really happen in vectorizable_*
and not during SLP build itself btw.

Your update-all-shared-vectypes thing looks quadratic to me :/

> (3) A similar problem exists for grouped data references, where
> different statements in the same dataref group could be used
> in SLP nodes that have different group sizes.  The patch copes
> with that by making sure that all vector types in a dataref
> group remain consistent.
>
> The patch means that:
>
> void
> f (int *x, short *y)
> {
>   x[0] += y[0];
>   x[1] += y[1];
>   x[2] += y[2];
>   x[3] += y[3];
> }
>
> now produces:
>
> ldr q0, [x0]
> ldr d1, [x1]
> saddw   v0.4s, v0.4s, v1.4h
> str q0, [x0]
> ret
>
> instead of:
>
> ldrsh   w2, [x1]
> ldrsh   w3, [x1, 2]
> fmovs0, w2
> ldrsh   w2, [x1, 4]
> ldrsh   w1, [x1, 6]
> ins v0.s[1], w3
> ldr q1, [x0]
> ins v0.s[2], w2
> ins v0.s[3], w1
> add v0.4s, v0.4s, v1.4s
> str q0, [x0]
> ret

Nice.

> Unfortunately it also means we start to vectorise
> gcc.target/i386/pr84101.c for -m32.  That seems like a target
> cost issue though; see PR92265 for details.
>
>
> 2019-10-29  Richard Sandiford  
>
> gcc/
> * tree-vectorizer.h (vect_get_vector_types_for_stmt): Take an
> optional maximum nunits.
> (get_vectype_for_scalar_type): Likewise.  Also declare a form that
> takes an slp_tree.
> (get_mask_type_for_scalar_type): Take an optional slp_tree.
> (vect_get_mask_type_for_stmt): Likewise.
> * tree-vect-data-refs.c (vect_analyze_data_refs): Don't store
> the vector type in STMT_VINFO_VECTYPE for BB vectorization.
> * tree-vect-patterns.c (vect_recog_bool_pattern): Use
> vect_get_vector_types_for_stmt instead of STMT_VINFO_VECTYPE
> to get an assumed vector type for data references.
> * tree-vect-slp.c (vect_update_shared_vectype): New function.
> (vect_update_all_shared_vectypes): Likewise.
> (vect_build_slp_tree_1): Pass the group size to
> vect_get_vector_types_for_stmt.  Use vect_update_shared_vectype
> for BB vectorization.
> (vect_build_slp_tree_2): Call vect_update_all_shared_vectypes
> before building the vectof from scalars.
> (vect_analyze_slp_instance): Pass the group size to
> get_vectype_for_scalar_type.
> (vect_slp_analyze_node_operations_1): Don't recompute the vector
> types for BB vectorization here; just handle the case in which
> we deferred the choice for booleans.
> (vect_get_constant_vectors):

Re: CPUID Patch for IDT Winchip

2019-11-05 Thread tedheadster
On Tue, May 21, 2019 at 11:20 AM Uros Bizjak  wrote:
>
> 2019-05-21  Uroš Bizjak  
>
> * config/i386/cpuid.h (__cpuid): For 32bit targets, zero
> %ebx and %ecx bafore calling cpuid with leaf 1 or
> non-constant leaf argument.
>
> Bootstrapped and regression tested on x86_64-linux-gnu {,-m32}.
>
> Committed to mainline SVN, will be backported to all active branches.

Has this been backported to active branches? I could only find it in
the 9.2.0 release using the git repositories.

- Matthew


[PATCH] Fix PR92280

2019-11-05 Thread Richard Biener


This avoids folding

_1 = { _2, _3, _4, _5 };
_2 = BIT_FIELD_REF <_1, ..>;

to

_1 = { _2, _3, _4, _5 };
_2 = { _4, _5 };

when the first CTOR doesn't become dead.  This in turn makes FRE
handle partial loads from 

mem = _1;

by inserting BIT_FIELD_REFs on _1 (instead of CTORs which FRE refuses
to insert).  For gcc.target/i386/pr83008.c this means eliding the
memory and producing quite optimal code.

simplify_bitfield_ref used to need GENERIC folding for
BIT_FIELD_REF of CTOR but we since moved the folding to match.pd
and leaving the GENERIC call in defeats the single_use check.

Bootstrapped and tested on x86_64-unknown-linux-gnu, applied.

Richard.

2019-11-05  Richard Biener  

PR tree-optimization/92280
* match.pd (BIT_FIELD_REF of CTOR): Unless the original CTOR
had a single use do not create a new CTOR.
* tree-ssa-forwprop.c (simplify_bitfield_ref): Do not re-fold
BIT_FIELD_REF of a CTOR via GENERIC.

Index: gcc/match.pd
===
--- gcc/match.pd(revision 277813)
+++ gcc/match.pd(working copy)
@@ -5565,15 +5565,19 @@ (define_operator_list COND_TERNARY
 (if (elt < CONSTRUCTOR_NELTS (ctor))
  (view_convert { CONSTRUCTOR_ELT (ctor, elt)->value; })
  { build_zero_cst (type); })
-{
-  vec *vals;
-  vec_alloc (vals, count);
-  for (unsigned i = 0;
-   i < count && elt + i < CONSTRUCTOR_NELTS (ctor); ++i)
-CONSTRUCTOR_APPEND_ELT (vals, NULL_TREE,
-CONSTRUCTOR_ELT (ctor, elt + i)->value);
-  build_constructor (type, vals);
-})))
+/* We don't want to emit new CTORs unless the old one goes away.
+   ???  Eventually allow this if the CTOR ends up constant or
+   uniform.  */
+(if (single_use (@0))
+ {
+   vec *vals;
+   vec_alloc (vals, count);
+   for (unsigned i = 0;
+i < count && elt + i < CONSTRUCTOR_NELTS (ctor); ++i)
+ CONSTRUCTOR_APPEND_ELT (vals, NULL_TREE,
+ CONSTRUCTOR_ELT (ctor, elt + i)->value);
+   build_constructor (type, vals);
+ }
   /* The bitfield references a single constructor element.  */
   (if (k.is_constant (&const_k)
   && idx + n <= (idx / const_k + 1) * const_k)
Index: gcc/tree-ssa-forwprop.c
===
--- gcc/tree-ssa-forwprop.c (revision 277813)
+++ gcc/tree-ssa-forwprop.c (working copy)
@@ -1786,7 +1786,7 @@ simplify_bitfield_ref (gimple_stmt_itera
 {
   gimple *stmt = gsi_stmt (*gsi);
   gimple *def_stmt;
-  tree op, op0, op1, op2;
+  tree op, op0, op1;
   tree elem_type;
   unsigned idx, size;
   enum tree_code code;
@@ -1804,20 +1804,7 @@ simplify_bitfield_ref (gimple_stmt_itera
 return false;
 
   op1 = TREE_OPERAND (op, 1);
-  op2 = TREE_OPERAND (op, 2);
   code = gimple_assign_rhs_code (def_stmt);
-
-  if (code == CONSTRUCTOR)
-{
-  tree tem = fold_ternary (BIT_FIELD_REF, TREE_TYPE (op),
-  gimple_assign_rhs1 (def_stmt), op1, op2);
-  if (!tem || !valid_gimple_rhs_p (tem))
-   return false;
-  gimple_assign_set_rhs_from_tree (gsi, tem);
-  update_stmt (gsi_stmt (*gsi));
-  return true;
-}
-
   elem_type = TREE_TYPE (TREE_TYPE (op0));
   if (TREE_TYPE (op) != elem_type)
 return false;


[Committed 0/4] IBM Z: Fix a few testsuite problems

2019-11-05 Thread Andreas Krebbel
Andreas Krebbel (4):
  IBM Z: Use tree_fits_uhwi_p in vector_alignment hook
  IBM Z: Fix testsuite useable_hw check
  IBM Z: gen-vect-11/32: Set min-vect-loop-bound param back to default
  IBM Z: gen-vect-26/28: Vectorizing without peeling is ok for Z

 gcc/config/s390/s390.c  |  8 +++-
 gcc/testsuite/gcc.dg/tree-ssa/gen-vect-11.c |  6 +-
 gcc/testsuite/gcc.dg/tree-ssa/gen-vect-26.c |  5 +++--
 gcc/testsuite/gcc.dg/tree-ssa/gen-vect-28.c |  5 +++--
 gcc/testsuite/gcc.dg/tree-ssa/gen-vect-32.c |  4 
 gcc/testsuite/gcc.target/s390/s390.exp  | 22 -
 6 files changed, 35 insertions(+), 15 deletions(-)

-- 
2.23.0



[PATCH 2/4] IBM Z: Fix testsuite useable_hw check

2019-11-05 Thread Andreas Krebbel
This fixes various issues with the useable_hw check in s390.exp.  The
check is supposed to verify whether a testcase can be run on the
current hardware.

- the test never returned true for -m31 because vzero is not available
  in ESA mode and -m31 defaults to -mesa
- the missing v0 clobber on the vzero instruction made the check fail
  if the stack pointer got saved in f0
- the lcbb instruction used for checking whether we are on a z13
  also requires vx.  Replace it with an instruction from the generic
  instruction set extensions.
- no support for z14 and z15 so far

gcc/testsuite/ChangeLog:

2019-11-05  Andreas Krebbel  

* gcc.target/s390/s390.exp
(check_effective_target_s390_useable_hw): Add inline asm for z14
and z15. Replace instruction for z13 with lochiz. Add register
clobbers. Check also for __zarch__ when doing the __VX__ test.
---
 gcc/testsuite/gcc.target/s390/s390.exp | 22 +-
 1 file changed, 13 insertions(+), 9 deletions(-)

diff --git a/gcc/testsuite/gcc.target/s390/s390.exp 
b/gcc/testsuite/gcc.target/s390/s390.exp
index 925eb568832..b4057b00f14 100644
--- a/gcc/testsuite/gcc.target/s390/s390.exp
+++ b/gcc/testsuite/gcc.target/s390/s390.exp
@@ -87,18 +87,22 @@ proc check_effective_target_s390_useable_hw { } {
int main (void)
{
asm (".machinemode zarch" : : );
-   #if __ARCH__ >= 11
-   asm ("lcbb %%r2,0(%%r15),0" : : );
+   #if __ARCH__ >= 13
+   asm ("ncrk %%r2,%%r2,%%r2" : : : "r2");
+   #elif __ARCH__ >= 12
+   asm ("agh %%r2,0(%%r15)" : : : "r2");
+   #elif __ARCH__ >= 11
+   asm ("lochiz %%r2,42" : : : "r2");
#elif __ARCH__ >= 10
-   asm ("risbgn %%r2,%%r2,0,0,0" : : );
+   asm ("risbgn %%r2,%%r2,0,0,0" : : : "r2");
#elif __ARCH__ >= 9
-   asm ("sgrk %%r2,%%r2,%%r2" : : );
+   asm ("sgrk %%r2,%%r2,%%r2" : : : "r2");
#elif __ARCH__ >= 8
-   asm ("rosbg %%r2,%%r2,0,0,0" : : );
+   asm ("rosbg %%r2,%%r2,0,0,0" : : : "r2");
#elif __ARCH__ >= 7
-   asm ("nilf %%r2,0" : : );
+   asm ("nilf %%r2,0" : : : "r2");
#elif __ARCH__ >= 6
-   asm ("lay %%r2,0(%%r15)" : : );
+   asm ("lay %%r2,0(%%r15)" : : : "r2");
#elif __ARCH__ >= 5
asm ("tam" : : );
#endif
@@ -108,8 +112,8 @@ proc check_effective_target_s390_useable_hw { } {
asm ("etnd %0" : "=d" (nd));
  }
#endif
-   #ifdef __VX__
-   asm ("vzero %%v0" : : );
+   #if defined (__VX__) && defined (__zarch__)
+   asm ("vzero %%v0" : : : "v0");
#endif
  return 0;
}
-- 
2.23.0



[PATCH 3/4] IBM Z: gen-vect-11/32: Set min-vect-loop-bound param back to default

2019-11-05 Thread Andreas Krebbel
In the Z backend we still set min-vect-loop-bound to 2 to work around
corner cases where awkward epilogue code gets generated in the
vectorizer.  This has a particular bad impact when vectorizing loops
with a low iteration count.  Due to this we do not vectorize the loop
in gen-vect-11/32 - what actually is a pity.

The patch sets min-vect-loop-bound back to the default value of 0 in
order to enable vectorization.

2019-11-05  Andreas Krebbel  

* gcc.dg/tree-ssa/gen-vect-11.c: Add --param min-vect-loop-bound=0
for IBM Z.
* gcc.dg/tree-ssa/gen-vect-11.c: Likewise.
---
 gcc/testsuite/gcc.dg/tree-ssa/gen-vect-11.c | 6 +-
 gcc/testsuite/gcc.dg/tree-ssa/gen-vect-32.c | 4 
 2 files changed, 9 insertions(+), 1 deletion(-)

diff --git a/gcc/testsuite/gcc.dg/tree-ssa/gen-vect-11.c 
b/gcc/testsuite/gcc.dg/tree-ssa/gen-vect-11.c
index 650e73a5ee8..dd1c0ac3eba 100644
--- a/gcc/testsuite/gcc.dg/tree-ssa/gen-vect-11.c
+++ b/gcc/testsuite/gcc.dg/tree-ssa/gen-vect-11.c
@@ -1,6 +1,10 @@
 /* { dg-do run { target vect_cmdline_needed } } */
 /* { dg-options "-O2 -ftree-vectorize -fwrapv -fdump-tree-vect-details 
-fvect-cost-model=dynamic" } */
-/* { dg-options "-O2 -ftree-vectorize -fwrapv -fdump-tree-vect-details 
-fvect-cost-model=dynamic -mno-sse" { target { i?86-*-* x86_64-*-* } } } */
+/* { dg-additional-options "-mno-sse" { target { i?86-*-* x86_64-*-* } } } */
+/* The IBM Z backend sets the min-vect-loop-bound param to 2 to avoid
+   awkward epilogue code generation in some cases.  This line needs to
+   be removed after finding an alternate way to fix this.  */
+/* { dg-additional-options "--param min-vect-loop-bound=0" { target { 
s390*-*-* } } } */
 
 #include 
 
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/gen-vect-32.c 
b/gcc/testsuite/gcc.dg/tree-ssa/gen-vect-32.c
index c4bee19b75a..378dd0b831c 100644
--- a/gcc/testsuite/gcc.dg/tree-ssa/gen-vect-32.c
+++ b/gcc/testsuite/gcc.dg/tree-ssa/gen-vect-32.c
@@ -1,6 +1,10 @@
 /* { dg-do run { target vect_cmdline_needed } } */
 /* { dg-options "-O2 -fno-tree-loop-distribute-patterns -ftree-vectorize 
-fdump-tree-vect-details -fno-vect-cost-model" } */
 /* { dg-additional-options "-mno-sse" { target { i?86-*-* x86_64-*-* } } } */
+/* The IBM Z backend sets the min-vect-loop-bound param to 2 to avoid
+   awkward epilogue code generation in some cases.  This line needs to
+   be removed after finding an alternate way to fix this.  */
+/* { dg-additional-options "--param min-vect-loop-bound=0" { target { 
s390*-*-* } } } */
 
 #include 
 
-- 
2.23.0



[PATCH 1/4] IBM Z: Use tree_fits_uhwi_p in vector_alignment hook

2019-11-05 Thread Andreas Krebbel
This fixes an ICE in gcc.dg/attr-vector_size.c testcase.

gcc/ChangeLog:

2019-11-05  Andreas Krebbel  

* config/s390/s390.c (s390_vector_alignment): Check if the value
fits into uhwi before using it.
---
 gcc/config/s390/s390.c | 8 +++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/gcc/config/s390/s390.c b/gcc/config/s390/s390.c
index 151b80da0b3..ff0b43c2c29 100644
--- a/gcc/config/s390/s390.c
+++ b/gcc/config/s390/s390.c
@@ -16075,13 +16075,19 @@ s390_support_vector_misalignment (machine_mode mode 
ATTRIBUTE_UNUSED,
 static HOST_WIDE_INT
 s390_vector_alignment (const_tree type)
 {
+  tree size = TYPE_SIZE (type);
+
   if (!TARGET_VX_ABI)
 return default_vector_alignment (type);
 
   if (TYPE_USER_ALIGN (type))
 return TYPE_ALIGN (type);
 
-  return MIN (64, tree_to_shwi (TYPE_SIZE (type)));
+  if (tree_fits_uhwi_p (size)
+  && tree_to_uhwi (size) < BIGGEST_ALIGNMENT)
+return tree_to_uhwi (size);
+
+  return BIGGEST_ALIGNMENT;
 }
 
 /* Implement TARGET_CONSTANT_ALIGNMENT.  Alignment on even addresses for
-- 
2.23.0



[PATCH 4/4] IBM Z: gen-vect-26/28: Vectorizing without peeling is ok for Z

2019-11-05 Thread Andreas Krebbel
These tests check if loop peeling has been applied to avoid
having to vectorize unaligned loops.  On Z we do not have any
alignment requirements for vectorization so we also don't need want
the loop peeling here.

2019-11-05  Andreas Krebbel  

* gcc.dg/tree-ssa/gen-vect-26.c: Disable loop peeling check for
IBM Z.
* gcc.dg/tree-ssa/gen-vect-28.c: Likewise.
---
 gcc/testsuite/gcc.dg/tree-ssa/gen-vect-26.c | 5 +++--
 gcc/testsuite/gcc.dg/tree-ssa/gen-vect-28.c | 5 +++--
 2 files changed, 6 insertions(+), 4 deletions(-)

diff --git a/gcc/testsuite/gcc.dg/tree-ssa/gen-vect-26.c 
b/gcc/testsuite/gcc.dg/tree-ssa/gen-vect-26.c
index 242316893c0..6f3c2b7d88a 100644
--- a/gcc/testsuite/gcc.dg/tree-ssa/gen-vect-26.c
+++ b/gcc/testsuite/gcc.dg/tree-ssa/gen-vect-26.c
@@ -30,5 +30,6 @@ int main ()
 
 
 /* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target { 
! avr-*-* } } } } */
-/* { dg-final { scan-tree-dump-times "Vectorizing an unaligned access" 0 
"vect" { target { ! avr-*-* } } } } */
-/* { dg-final { scan-tree-dump-times "Alignment of access forced using 
peeling" 1 "vect" { target { ! avr-*-* } } } } */
+/* IBM Z does not require special alignment for vectorization.  */
+/* { dg-final { scan-tree-dump-times "Vectorizing an unaligned access" 0 
"vect" { target { ! { avr-*-* s390*-*-* } } } } } */
+/* { dg-final { scan-tree-dump-times "Alignment of access forced using 
peeling" 1 "vect" { target { ! { avr-*-* s390*-*-* } } } } } */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/gen-vect-28.c 
b/gcc/testsuite/gcc.dg/tree-ssa/gen-vect-28.c
index 24853e0e0db..7b26bbdc70c 100644
--- a/gcc/testsuite/gcc.dg/tree-ssa/gen-vect-28.c
+++ b/gcc/testsuite/gcc.dg/tree-ssa/gen-vect-28.c
@@ -38,5 +38,6 @@ int main (void)
 
 
 /* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target { 
! avr-*-* } } } } */
-/* { dg-final { scan-tree-dump-times "Vectorizing an unaligned access" 0 
"vect" { target { ! avr-*-* } } } } */
-/* { dg-final { scan-tree-dump-times "Alignment of access forced using 
peeling" 1 "vect" { target { ! avr-*-* } } } } */
+/* IBM Z does not require special alignment for vectorization.  */
+/* { dg-final { scan-tree-dump-times "Vectorizing an unaligned access" 0 
"vect" { target { ! { avr-*-* s390*-*-* } } } } } */
+/* { dg-final { scan-tree-dump-times "Alignment of access forced using 
peeling" 1 "vect" { target { ! { avr-*-* s390*-*-* } } } } } */
-- 
2.23.0



Re: [PATCH V4] Loop split upon semi-invariant condition (PR tree-optimization/89134)

2019-11-05 Thread Richard Biener
On Thu, Oct 31, 2019 at 3:38 PM Feng Xue OS  wrote:
>
> Hi, Richard
>
>This is a new patch to support more generalized semi-invariant condition, 
> which uses
> control dependence analysis.

Uh.  Note it's not exactly helpful to change algorithms between
reviews, that makes it
just harder :/

Btw, I notice you use post-dominance info.  Note that we generally do
not keep that
up-to-date with CFG manipulations (and for dominators fast queries are
disabled).
Probably the way we walk & transform loops makes this safe but it's something to
remember when extending that.  Possibly doing analysis of all candidates first
and then applying the transform for all wanted cases would avoid this (and maybe
also can reduce the number of update_ssa calls).  I guess this can be done as
followup.

The patch is OK.

Thanks,
Richard.



> Thanks,
> Feng
>
> 
> From: Feng Xue OS 
> Sent: Friday, October 25, 2019 11:43 AM
> To: Richard Biener
> Cc: Michael Matz; Philipp Tomsich; gcc-patches@gcc.gnu.org; Christoph 
> Müllner; erick.oc...@theobroma-systems.com
> Subject: Re: [PATCH V3] Loop split upon semi-invariant condition (PR 
> tree-optimization/89134)
>
> Richard,
>
> Thanks for your comments.
>
> >+  /* For PHI node that is not in loop header, its source operands should
> >+be defined inside the loop, which are seen as loop variant.  */
> >+  if (def_bb != loop->header || !skip_head)
> >+   return false;
>
> > so if we have
> >
> > for (;;)
> >  {
> > if (x)
> >   a = ..;
> > else
> >   a = ...;
> > if (cond-to-split-on dependent on a)
> > ...
> >  }
> >
> > the above is too restrictive in case 'x' is semi-invariant as well, correct?
> In above case, cond-on-a will not be identified as semi-invariant, in that
> a is defined by PHI with real multi-sources. To handle it,  besides each
> source value, we should add extra check on each source's control
> dependence node (x in the case), which might have not a little code expansion.
> Anyway, I'll have a try.
>
>
> >+ /* A new value comes from outside of loop.  */
> >+ if (!bb || !flow_bb_inside_loop_p (loop, bb))
> >+   return false;
>
> > but that means starting from the second iteration the value is invariant.
> No. Traversal direction is reverse to loop execution. In the following,
> start from "x_1 = ", extract latch value x_3, and get x_3 definition, and
> finally reach "x_1 =".
>
> Loop:
>   x_1 = PHI (x_0, x_3)
>   ...
>   x_3 =
>   ...
>   goto Loop;
>
>
> >+ /* Don't consider redefinitions in excluded basic blocks.  
> >*/
> >+ if (!dominated_by_p (CDI_DOMINATORS, e->src, skip_head))
> >+   {
> >+ /* There are more than one source operands that can
> >+provide value to the SSA name, it is variant.  */
> >+ if (from)
> >+   return false;
> >
> > they might be the same though, for PHIs with > 2 arguments.
> OK. Will add value equivalence check.
>
>
> > In the cycle handling you are not recursing via stmt_semi_invariant_p
> > but only handle SSA name copies - any particular reason for that?
> The cycle handling is specified for ssa that crosses iteration. It is
> semi-invariant if it remains unchanged after certain iteration, which
> means its value in previous iteration (coming from latch edge) is just
> a copy of its self,  nothing else. So, recursion via stmt_semi_invariant_p
> is unnecessary.
>
> Loop:
>   x_1 = PHI (x_0, x_3);
>   x_2 = PHI(x_1, value defined in excluded branch);
>   x_3 = x_2;
>   goto Loop;
>
>
> >+static bool
> >+branch_removable_p (basic_block branch_bb)
> >+{
> >+  if (single_pred_p (branch_bb))
> >+return true;
> >
> > I'm not sure what this function tests - at least the single_pred_p check
> > looks odd to me given the dominator checks later.  The single predecessor
> > could simply be a forwarder.  I wonder if you are looking for branches 
> > forming
> > an irreducible loop?  I think you can then check EDGE_IRREDUCIBLE_LOOP
> > or BB_IRREDUCIBLE_LOOP on the condition block (btw, I don't see
> > testcases covering the appearant special-cases in the patch - refering to
> > existing ones via a comment often helps understanding the code).
>
> Upon condition evaluation, if a branch is not selected,
> This function test a branch is reachable from other place other than its
> conditional statement. This ensure that when the branch is not selected
> upon condition evaluation, trace path led by the branch will never
> be executed so that it can be excluded  during semi-invariantness analysis.
>
> If single_pred_p, only condition statement can reach the branch.
>
> If not, consider a half diamond condition control graph, with a back-edge to
> true branch.
>
> condition
>|  \
>|   \
>|  false branch
>.--->.

Re: [16/n] Apply maximum nunits for BB SLP

2019-11-05 Thread Richard Sandiford
Richard Biener  writes:
> On Tue, Oct 29, 2019 at 6:05 PM Richard Sandiford
>  wrote:
>>
>> The BB vectoriser picked vector types in the same way as the loop
>> vectoriser: it picked a vector mode/size for the region and then
>> based all the vector types off that choice.  This meant we could
>> end up trying to use vector types that had too many elements for
>> the group size.
>>
>> The main part of this patch is therefore about passing the SLP
>> group size down to routines like get_vectype_for_scalar_type and
>> ensuring that each vector type in the SLP tree is chosen wrt the
>> group size.  That part in itself is pretty easy and mechanical.
>>
>> The main warts are:
>>
>> (1) We normally pick a STMT_VINFO_VECTYPE for data references at an
>> early stage (vect_analyze_data_refs).  However, nothing in the
>> BB vectoriser relied on this, or on the min_vf calculated from it.
>> I couldn't see anything other than vect_recog_bool_pattern that
>> tried to access the vector type before the SLP tree is built.
>
> So can you not set STMT_VINFO_VECTYPE for data refs with BB vectorization
> then?

Yeah, the patch stops us from setting it during vect_analyze_data_refs.
We still need to set it later when building the SLP tree, just like
we do for other statements.

>> (2) It's possible for the same statement to be used in the groups of
>> different sizes.  Taking the group size into account meant that
>> we could try to pick different vector types for the same statement.
>
> That only happens when we have multiple SLP instances though
> (entries into the shared SLP graph).

Yeah.

> It probably makes sense to keep handling SLP instances sharing stmts
> together for costing reasons but one issue is that for disjunct pieces
> (in the same BB) disqualifying one cost-wise disqualifies all.  So at
> some point during analysis (which should eventually cover more than a
> single BB) we want to split the graph.  It probably doesn't help the
> above case.

Yeah, sounds like there are two issues: one with sharing stmt_vec_infos
between multiple SLP nodes, and one with sharing SLP child nodes between
multiple parent nodes.  (2) comes from the first, but I guess failing
based on costs is more about the second.

>> This problem should go away with the move to doing everything on
>> SLP trees, where presumably we would attach the vector type to the
>> SLP node rather than the stmt_vec_info.  Until then, the patch just
>> uses a first-come, first-served approach.
>
> Yeah, I ran into not having vectype on SLP trees with invariants/externals
> as well.  I suppose you didn't try simply adding that to the SLP tree
> and pushing/popping it like we push/pop the def type?

No, didn't try that.  Maybe it would be worth a go, but it seems like it
could be a rabbit hole.

> Assigning the vector types should really happen in vectorizable_*
> and not during SLP build itself btw.

Agree we need to improve the way this is handled, but delaying it
to vectorizable_* sounds quite late.  Maybe it should be a more global
decision, since the vector types for each vectorizable_* have to be
compatible and it's not obvious which routine should get first choice.

> Your update-all-shared-vectypes thing looks quadratic to me :/

Should be amortised linear.  The statements in a DR group always
have the same vectype.  When we want to change the vector type
of one statement, we change it for all statements if possible
or fail if we can't.

Thanks,
Richard


Re: [PATCH][RFC] Param to options conversion (demo).

2019-11-05 Thread Richard Biener
On Thu, Oct 31, 2019 at 2:17 PM Martin Liška  wrote:
>
> On 10/31/19 2:16 PM, Martin Liška wrote:
> > On 10/31/19 2:01 PM, Martin Liška wrote:
> >> Hi.
> >>
> >> Based on the discussion with Honza and Richard I'm sending a proposal
> >> for conversion of param machinery into the existing option machinery.
> >> Our motivation for the change is to provide per function param values,
> >> similarly what 'Optimization' keyword does for options.
> >>
> >> Right now, we support the following format:
> >> gcc --param=lto-partitions=4 /tmp/main.c -c
> >>
> >> And so that I decided to name newly the params like:
> >>
> >> -param=ipa-sra-ptr-growth-factor=
> >> Common Joined UInteger Var(param_ipa_sra_ptr_growth_factor) Init(2) Param 
> >> Optimization
> >> Maximum allowed growth of number and total size of new parameters
> >> that ipa-sra replaces a pointer to an aggregate with.
> >>
> >> And I learnt decoder to parse '--param' 'name=value' as 
> >> '--param=name=value'. Doing that
> >> the transformation works. Help provides reasonable output as well:
> >>
> >> $ ./xgcc -B. --param predictable-branch-outcome=5  /tmp/main.c -c -Q 
> >> --help=param
> >> The --param option recognizes the following as parameters:
> >>   --param=ipa-sra-ptr-growth-factor= 2
> >>   --param=predictable-branch-outcome=<0,50>  5
> >>
> >> Thoughts?
> >> Thanks,
> >> Martin
> >>
> >> ---
> >>  gcc/common.opt| 18 +++---
> >>  gcc/ipa-sra.c |  3 +--
> >>  gcc/opt-functions.awk |  3 ++-
> >>  gcc/opts-common.c |  9 +
> >>  gcc/opts.c| 36 
> >>  gcc/params.def| 10 --
> >>  gcc/predict.c |  4 ++--
> >>  7 files changed, 25 insertions(+), 58 deletions(-)
> >>
> >>
> >
> > I forgot to add gcc-patches to To.
> >
> > Martin
> >
>
> + the patch.

Nice.

I wonder if we can auto-generate params.h so that
PARAM_VALUE (...) can continue to "work"?  But maybe that's too much
and against making them first-class (but "unsupported") options.  At least
it would make the final patch _much_ smaller... (one could think of
auto-generating an enum and using an array of params for the storage
again - but then possibly split for [non-]Optimization - ugh).  If we
(auto-)name
the variables all-uppercase like PARAM_IPA_SRA_PTR_GROWTH_FACTOR
we could have

#define PARAM_VALUE (x) x

... (that said, everything that helps making the transition hit GCC 10
is appreciated ;))

For

+-param=ipa-sra-ptr-growth-factor=
+Common Joined UInteger Var(param_ipa_sra_ptr_growth_factor) Init(2)
Param Optimization

I wonder if both Var(...) and Param can be "autodetected" (aka
actually required)?

At least the core of the patch looks nicely small!  How do the OPT_ enum values
for a --param look like?

Thanks,
Richard.

> Martin


Re: [PATCH] Report errors on inconsistent OpenACC nested reduction clauses

2019-11-05 Thread Thomas Schwinge
Hi Frederik!

On 2019-10-29T13:20:53+0100, "Harwath, Frederik"  
wrote:
> On 24.10.19 16:31, Thomas Schwinge wrote:
>> So just C/C++ testing, no Fortran at all.  This is not ideal, but
>> probably (hopefully) acceptable given that this is working on the middle
>> end representation shared between all front ends.
>
> Thanks to Tobias, we now also have Fortran tests.

Indeed, thanks, Tobias.  I have not reviewed these in great detail, but
they certainly do look plausible.


>>> --- a/gcc/testsuite/c-c++-common/goacc/reduction-6.c
>>> +++ b/gcc/testsuite/c-c++-common/goacc/reduction-6.c
>>> @@ -16,17 +16,6 @@ int foo (int N)
>>> }
>>> }
>>>   
>>> -  #pragma acc parallel
>>> -  {
>>> -#pragma acc loop reduction(+:b)
>>> -for (int i = 0; i < N; i++)
>>> -  {
>>> -#pragma acc loop
>>> -   for (int j = 0; j < N; j++)
>>> - b += 1;
>>> -  }
>>> -  }
>>> -
>>> #pragma acc parallel
>>> {
>>>   #pragma acc loop reduction(+:c)
>> 
>> That one stays in, but gets a 'dg-warning'.
>
> What warning would you expect to see here? I do not get any warnings.

What I meant was that you should re-instantiate the code removed here,
and then add the expected 'dg-warning'.

..., but upon having a look myself, I notice that there actually is no
"nested loop in reduction needs reduction clause" diagnostic printed
here, huh.  Should there be?  (OK to address separately, later on.)


Similar for the libgomp execution test cases: undo the 'reduction' clause
additions, and instead add the expected 'dg-warning's (here, they're
really necessary), for the reason I had given at the end of my email.

Sorry if that was unclear.

For the same reason, please also leave out Tobias' translated
'libgomp.oacc-fortran/par-loop-comb-reduction-1.f90' -- we shall later
consider that one, separately.


For your convenience, I'm attaching an incremental patch, to be merged
into yours.


> From 22f45d4c2c11febce171272f9289c487aed4f9d7 Mon Sep 17 00:00:00 2001
> From: Frederik Harwath 
> Date: Tue, 29 Oct 2019 12:39:23 +0100
> Subject: [PATCH] Warn about inconsistent OpenACC nested reduction clauses
> MIME-Version: 1.0
> Content-Type: text/plain; charset=UTF-8
> Content-Transfer-Encoding: 8bit
>
> OpenACC (cf. OpenACC 2.7, section 2.9.11. "reduction clause";
> this was first clarified by OpenACC 2.6) requires that, if a
> variable is used in reduction clauses on two nested loops, then
> there must be reduction clauses for that variable on all loops
> that are nested in between the two loops and all these reduction
> clauses must use the same operator.
> This commit introduces a check for that property which reports
> warnings if it is violated.
>
> In gcc/testsuite/c-c++-common/goacc/reduction-6.c, we remove the erroneous
> reductions on variable b; adding a reduction clause to make it compile 
> cleanly
> would make it a duplicate of the test for variable c.

The latter paragraph then is not needed anymore.

> 2019-10-29  Gergö Barany  
>   Tobias Burnus  
>   Frederik Harwath  
>   Thomas Schwinge  
>
>gcc/
>* omp-low.c (struct omp_context): New fields
>local_reduction_clauses, outer_reduction_clauses.
>(new_omp_context): Initialize these.
>(scan_sharing_clauses): Record reduction clauses on OpenACC constructs.
>(scan_omp_for): Check reduction clauses for incorrect nesting.
>gcc/testsuite/
>* c-c++-common/goacc/nested-reductions-warn.c: New test.
>* c-c++-common/goacc/nested-reductions.c: New test.
>* c-c++-common/goacc/reduction-6.c: Adjust.
>* gfortran.dg/goacc/nested-reductions-warn.f90: New test.
>* gfortran.dg/goacc/nested-reductions.f90: New test.
>libgomp/
>* testsuite/libgomp.oacc-c-c++-common/par-loop-comb-reduction-1.c:
>Add missing reduction clauses.
>* testsuite/libgomp.oacc-c-c++-common/par-loop-comb-reduction-2.c:
>Likewise.
>* testsuite/libgomp.oacc-c-c++-common/par-loop-comb-reduction-3.c:
>Likewise.
>* testsuite/libgomp.oacc-c-c++-common/par-loop-comb-reduction-4.c:
>Likewise.
>* testsuite/libgomp.oacc-fortran/par-loop-comb-reduction-1.f90:
>New test.

The ChangeLog updates still have to be adjusted per my incremental patch.

With that addressed, OK for trunk.


A few more comments to address separately, later on.


I noticed in the 'libgomp.log' that we currently print:

[...]/libgomp.oacc-c-c++-common/par-loop-comb-reduction-1.c: In function 
'main':
[...]/libgomp.oacc-c-c++-common/par-loop-comb-reduction-1.c:18:13: warning: 
nested loop in reduction needs reduction clause for 'res'
[...]/libgomp.oacc-c-c++-common/par-loop-comb-reduction-1.c:18:13: warning: 
nested loop in reduction needs reduction clause for 'res'

Duplicate diagnostic, due to the the two nested inner loops.  (I'm just
noting that, not compla

[0/6] Optionally pick the cheapest loop_vec_info

2019-11-05 Thread Richard Sandiford
This series adds a mode in which we try to vectorise loops once for
each supported vector mode combination and then pick the one with the
lowest cost.  There are only really two patches for that: one to add the
feature and another to enable it by default for SVE.  However, for it to
work as hoped, I also needed to tweak some of the cost calculations.

The series applies on top of two earlier ones:

  https://gcc.gnu.org/ml/gcc-patches/2019-11/msg00119.html
  https://gcc.gnu.org/ml/gcc-patches/2019-10/msg01822.html

Each patch tested individually on aarch64-linux-gnu and the series
as a whole on x86_64-linux-gnu.

Richard


[1/6] Fix vectorizable_conversion costs

2019-11-05 Thread Richard Sandiford
This patch makes two tweaks to vectorizable_conversion.  The first
is to use "modifier" to distinguish between promotion, demotion,
and neither promotion nor demotion, rather than using a code for
some cases and "modifier" for others.  The second is to take ncopies
into account for the promotion and demotion costs; previously we gave
multiple copies the same cost as a single copy.

Later patches test this, but it seemed worth splitting out.


2019-11-05  Richard Sandiford  

gcc/
* tree-vect-stmts.c (vect_model_promotion_demotion_cost): Take the
number of ncopies as an additional argument.
(vectorizable_conversion): Update call accordingly.  Use "modifier"
to check whether a conversion is between vectors with the same
numbers of units.

Index: gcc/tree-vect-stmts.c
===
--- gcc/tree-vect-stmts.c   2019-11-05 11:08:12.521631453 +
+++ gcc/tree-vect-stmts.c   2019-11-05 14:17:43.330141911 +
@@ -917,26 +917,27 @@ vect_model_simple_cost (stmt_vec_info st
 }
 
 
-/* Model cost for type demotion and promotion operations.  PWR is normally
-   zero for single-step promotions and demotions.  It will be one if 
-   two-step promotion/demotion is required, and so on.  Each additional
+/* Model cost for type demotion and promotion operations.  PWR is
+   normally zero for single-step promotions and demotions.  It will be
+   one if two-step promotion/demotion is required, and so on.  NCOPIES
+   is the number of vector results (and thus number of instructions)
+   for the narrowest end of the operation chain.  Each additional
step doubles the number of instructions required.  */
 
 static void
 vect_model_promotion_demotion_cost (stmt_vec_info stmt_info,
-   enum vect_def_type *dt, int pwr,
+   enum vect_def_type *dt,
+   unsigned int ncopies, int pwr,
stmt_vector_for_cost *cost_vec)
 {
-  int i, tmp;
+  int i;
   int inside_cost = 0, prologue_cost = 0;
 
   for (i = 0; i < pwr + 1; i++)
 {
-  tmp = (STMT_VINFO_TYPE (stmt_info) == type_promotion_vec_info_type) ?
-   (i + 1) : i;
-  inside_cost += record_stmt_cost (cost_vec, vect_pow2 (tmp),
-  vec_promote_demote, stmt_info, 0,
-  vect_body);
+  inside_cost += record_stmt_cost (cost_vec, ncopies, vec_promote_demote,
+  stmt_info, 0, vect_body);
+  ncopies *= 2;
 }
 
   /* FORNOW: Assuming maximum 2 args per stmts.  */
@@ -4981,7 +4982,7 @@ vectorizable_conversion (stmt_vec_info s
   if (!vec_stmt)   /* transformation not required.  */
 {
   DUMP_VECT_SCOPE ("vectorizable_conversion");
-  if (code == FIX_TRUNC_EXPR || code == FLOAT_EXPR)
+  if (modifier == NONE)
 {
  STMT_VINFO_TYPE (stmt_info) = type_conversion_vec_info_type;
  vect_model_simple_cost (stmt_info, ncopies, dt, ndts, slp_node,
@@ -4990,14 +4991,17 @@ vectorizable_conversion (stmt_vec_info s
   else if (modifier == NARROW)
{
  STMT_VINFO_TYPE (stmt_info) = type_demotion_vec_info_type;
- vect_model_promotion_demotion_cost (stmt_info, dt, multi_step_cvt,
- cost_vec);
+ /* The final packing step produces one vector result per copy.  */
+ vect_model_promotion_demotion_cost (stmt_info, dt, ncopies,
+ multi_step_cvt, cost_vec);
}
   else
{
  STMT_VINFO_TYPE (stmt_info) = type_promotion_vec_info_type;
- vect_model_promotion_demotion_cost (stmt_info, dt, multi_step_cvt,
- cost_vec);
+ /* The initial unpacking step produces two vector results
+per copy.  */
+ vect_model_promotion_demotion_cost (stmt_info, dt, ncopies * 2,
+ multi_step_cvt, cost_vec);
}
   interm_types.release ();
   return true;


[2/6] Don't assign a cost to vectorizable_assignment

2019-11-05 Thread Richard Sandiford
vectorizable_assignment handles true SSA-to-SSA copies (which hopefully
we don't see in practice) and no-op conversions that are required
to maintain correct gimple, such as changes between signed and
unsigned types.  These cases shouldn't generate any code and so
shouldn't count against either the scalar or vector costs.

Later patches test this, but it seemed worth splitting out.


2019-11-04  Richard Sandiford  

gcc/
* tree-vect-stmts.c (vectorizable_assignment): Don't add a cost.

Index: gcc/tree-vect-stmts.c
===
--- gcc/tree-vect-stmts.c   2019-11-05 14:17:43.330141911 +
+++ gcc/tree-vect-stmts.c   2019-11-05 14:18:39.169752725 +
@@ -5305,7 +5305,7 @@ vectorizable_conversion (stmt_vec_info s
 static bool
 vectorizable_assignment (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
 stmt_vec_info *vec_stmt, slp_tree slp_node,
-stmt_vector_for_cost *cost_vec)
+stmt_vector_for_cost *)
 {
   tree vec_dest;
   tree scalar_dest;
@@ -5313,7 +5313,6 @@ vectorizable_assignment (stmt_vec_info s
   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
   tree new_temp;
   enum vect_def_type dt[1] = {vect_unknown_def_type};
-  int ndts = 1;
   int ncopies;
   int i, j;
   vec vec_oprnds = vNULL;
@@ -5409,7 +5408,8 @@ vectorizable_assignment (stmt_vec_info s
 {
   STMT_VINFO_TYPE (stmt_info) = assignment_vec_info_type;
   DUMP_VECT_SCOPE ("vectorizable_assignment");
-  vect_model_simple_cost (stmt_info, ncopies, dt, ndts, slp_node, 
cost_vec);
+  /* Don't add a cost here.  SSA copies and no-op conversions
+shouldn't generate any code in either scalar or vector form.  */
   return true;
 }
 


Re: make range_int_cst_p work with any numeric range (VR_ANTI_RANGE, etc)

2019-11-05 Thread Richard Biener
On Tue, Nov 5, 2019 at 2:15 PM Aldy Hernandez  wrote:
>
> The function range_int_cst_p only works with VR_RANGE's at the moment.
> This is silly because VR_ANTI_RANGE and even VR_VARYING can contain
> numeric bounds.  I have fixed this oversight and have made the function
> return the bounds in MIN/MAX.  This simplifies a lot of code, because
> there is no longer a need to special case VR_VARYING and VR_ANTI_RANGE,
> as well as pick at the individual range components outside of the API.
>
> The patch has the pleasant side-effect of bringing more things into the
> API fold.  Basically, any access to either value_range::min(), max(), or
> kind(), is suspect and a big hint that the code should be rewritten to
> use the API (contains_p, varying_p, zero_p, etc).
>
> One of the primary culprits of API noncompliance is the sprintf and
> strlen warning code.  Mind you, not due to negligence on the author, but
> because we had no value-range API when Martin added the passes.  I
> realize it's nobody's responsibility to fix older value-range code, and
> I'll probably end up doing it myself (next cycle??), but I could
> definitely use a hand from the experts, as it's intricate and delicate code.
>
> Speak of which, in converting dump_strlen_info() to use the new
> range_int_cst_p, I noticed a lot of the code disappeared if we used the
> API.  Martin, if you'd prefer not to dump varying, undefined, etc, let
> me know and we can gate that call to vr.dump().  I took the liberty
> because it was simple, clean, and hidden away in an internal debugging
> helper.
>
> OK for trunk?

No.  It's a semantic change, no?  Don't we for VR_ANTI_RANGE
always get [-INF, +INF] back then?  Likewise for VARYING?
What do we get for UNDEFINED?  I think callers are not prepared
for this and expect it to return true for "useful" ranges only.

If you want this, use a new name, like get_range_bounds ().
Also not sure why min/max need to be INTEGER_CST, why
not _always_ return something (that is, the fucntion should never
need to return false).

The patch doesn't look like an improvement, it just adds to confusion.

Richard.


[3/6] Avoid accounting for non-existent vector loop versioning

2019-11-05 Thread Richard Sandiford
vect_analyze_loop_costing uses two profitability thresholds: a runtime
one and a static compile-time one.  The runtime one is simply the point
at which the vector loop is cheaper than the scalar loop, while the
static one also takes into account the cost of choosing between the
scalar and vector loops at runtime.  We compare this static cost against
the expected execution frequency to decide whether it's worth generating
any vector code at all.

However, we never reclaimed the cost of applying the runtime threshold
if it turned out that the vector code can always be used.  And we only
know whether that's true once we've calculated what the runtime
threshold would be.


2019-11-04  Richard Sandiford  

gcc/
* tree-vectorizer.h (vect_apply_runtime_profitability_check_p):
New function.
* tree-vect-loop-manip.c (vect_loop_versioning): Use it.
* tree-vect-loop.c (vect_analyze_loop_2): Likewise.
(vect_transform_loop): Likewise.
(vect_analyze_loop_costing): Don't take the cost of versioning
into account for the static profitability threshold if it turns
out that no versioning is needed.

Index: gcc/tree-vectorizer.h
===
--- gcc/tree-vectorizer.h   2019-11-05 11:14:42.786884473 +
+++ gcc/tree-vectorizer.h   2019-11-05 14:19:33.829371745 +
@@ -1557,6 +1557,17 @@ vect_get_scalar_dr_size (dr_vec_info *dr
   return tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dr_info->dr;
 }
 
+/* Return true if LOOP_VINFO requires a runtime check for whether the
+   vector loop is profitable.  */
+
+inline bool
+vect_apply_runtime_profitability_check_p (loop_vec_info loop_vinfo)
+{
+  unsigned int th = LOOP_VINFO_COST_MODEL_THRESHOLD (loop_vinfo);
+  return (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
+ && th >= vect_vf_for_cost (loop_vinfo));
+}
+
 /* Source location + hotness information. */
 extern dump_user_location_t vect_location;
 
Index: gcc/tree-vect-loop-manip.c
===
--- gcc/tree-vect-loop-manip.c  2019-11-05 10:38:31.838181047 +
+++ gcc/tree-vect-loop-manip.c  2019-11-05 14:19:33.825371773 +
@@ -3173,8 +3173,7 @@ vect_loop_versioning (loop_vec_info loop
 = LOOP_REQUIRES_VERSIONING_FOR_SIMD_IF_COND (loop_vinfo);
   unsigned th = LOOP_VINFO_COST_MODEL_THRESHOLD (loop_vinfo);
 
-  if (th >= vect_vf_for_cost (loop_vinfo)
-  && !LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
+  if (vect_apply_runtime_profitability_check_p (loop_vinfo)
   && !ordered_p (th, versioning_threshold))
 cond_expr = fold_build2 (GE_EXPR, boolean_type_node, scalar_loop_iters,
 build_int_cst (TREE_TYPE (scalar_loop_iters),
Index: gcc/tree-vect-loop.c
===
--- gcc/tree-vect-loop.c2019-11-05 11:14:42.782884501 +
+++ gcc/tree-vect-loop.c2019-11-05 14:19:33.829371745 +
@@ -1689,6 +1689,24 @@ vect_analyze_loop_costing (loop_vec_info
   return 0;
 }
 
+  /* The static profitablity threshold min_profitable_estimate includes
+ the cost of having to check at runtime whether the scalar loop
+ should be used instead.  If it turns out that we don't need or want
+ such a check, the threshold we should use for the static estimate
+ is simply the point at which the vector loop becomes more profitable
+ than the scalar loop.  */
+  if (min_profitable_estimate > min_profitable_iters
+  && !LOOP_REQUIRES_VERSIONING (loop_vinfo)
+  && !LOOP_VINFO_PEELING_FOR_NITER (loop_vinfo)
+  && !LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo)
+  && !vect_apply_runtime_profitability_check_p (loop_vinfo))
+{
+  if (dump_enabled_p ())
+   dump_printf_loc (MSG_NOTE, vect_location, "no need for a runtime"
+" choice between the scalar and vector loops\n");
+  min_profitable_estimate = min_profitable_iters;
+}
+
   HOST_WIDE_INT estimated_niter;
 
   /* If we are vectorizing an epilogue then we know the maximum number of
@@ -2225,8 +2243,7 @@ vect_analyze_loop_2 (loop_vec_info loop_
 
   /*  Use the same condition as vect_transform_loop to decide when to use
  the cost to determine a versioning threshold.  */
-  if (th >= vect_vf_for_cost (loop_vinfo)
- && !LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
+  if (vect_apply_runtime_profitability_check_p (loop_vinfo)
  && ordered_p (th, niters_th))
niters_th = ordered_max (poly_uint64 (th), niters_th);
 
@@ -8268,14 +8285,13 @@ vect_transform_loop (loop_vec_info loop_
  run at least the (estimated) vectorization factor number of times
  checking is pointless, too.  */
   th = LOOP_VINFO_COST_MODEL_THRESHOLD (loop_vinfo);
-  if (th >= vect_vf_for_cost (loop_vinfo)
-  && !LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo))
+  if (vect_apply_runtime_profitability_check_p (loop_vinfo))

[4/6] Optionally pick the cheapest loop_vec_info

2019-11-05 Thread Richard Sandiford
This patch adds a mode in which the vectoriser tries each available
base vector mode and picks the one with the lowest cost.  For now
the behaviour is behind a default-off --param, but a later patch
enables it by default for SVE.

The patch keeps the current behaviour of preferring a VF of
loop->simdlen over any larger or smaller VF, regardless of costs
or target preferences.


2019-11-05  Richard Sandiford  

gcc/
* params.def (vect-compare-loop-costs): New param.
* doc/invoke.texi: Document it.
* tree-vectorizer.h (_loop_vec_info::vec_outside_cost)
(_loop_vec_info::vec_inside_cost): New member variables.
* tree-vect-loop.c (_loop_vec_info::_loop_vec_info): Initialize them.
(vect_better_loop_vinfo_p, vect_joust_loop_vinfos): New functions.
(vect_analyze_loop): When the new parameter allows, try vectorizing
the loop with each available vector mode and picking the one with
the lowest cost.
(vect_estimate_min_profitable_iters): Record the computed costs
in the loop_vec_info.

Index: gcc/params.def
===
--- gcc/params.def  2019-10-31 17:15:25.470517368 +
+++ gcc/params.def  2019-11-05 14:19:58.781197820 +
@@ -661,6 +661,13 @@ DEFPARAM(PARAM_VECT_MAX_PEELING_FOR_ALIG
  "Maximum number of loop peels to enhance alignment of data references 
in a loop.",
  -1, -1, 64)
 
+DEFPARAM(PARAM_VECT_COMPARE_LOOP_COSTS,
+"vect-compare-loop-costs",
+"Whether to try vectorizing a loop using each supported"
+" combination of vector types and picking the version with the"
+" lowest cost.",
+0, 0, 1)
+
 DEFPARAM(PARAM_MAX_CSELIB_MEMORY_LOCATIONS,
 "max-cselib-memory-locations",
 "The maximum memory locations recorded by cselib.",
Index: gcc/doc/invoke.texi
===
--- gcc/doc/invoke.texi 2019-11-04 21:13:57.611756365 +
+++ gcc/doc/invoke.texi 2019-11-05 14:19:58.777197850 +
@@ -11563,6 +11563,12 @@ doing loop versioning for alias in the v
 The maximum number of loop peels to enhance access alignment
 for vectorizer. Value -1 means no limit.
 
+@item vect-compare-loop-costs
+Whether to try vectorizing a loop using each supported combination of
+vector types and picking the version with the lowest cost.  This parameter
+has no effect when @option{-fno-vect-cost-model} or
+@option{-fvect-cost-model=unlimited} are used.
+
 @item max-iterations-to-track
 The maximum number of iterations of a loop the brute-force algorithm
 for analysis of the number of iterations of the loop tries to evaluate.
Index: gcc/tree-vectorizer.h
===
--- gcc/tree-vectorizer.h   2019-11-05 14:19:33.829371745 +
+++ gcc/tree-vectorizer.h   2019-11-05 14:19:58.781197820 +
@@ -601,6 +601,13 @@ typedef class _loop_vec_info : public ve
   /* Cost of a single scalar iteration.  */
   int single_scalar_iteration_cost;
 
+  /* The cost of the vector prologue and epilogue, including peeled
+ iterations and set-up code.  */
+  int vec_outside_cost;
+
+  /* The cost of the vector loop body.  */
+  int vec_inside_cost;
+
   /* Is the loop vectorizable? */
   bool vectorizable;
 
Index: gcc/tree-vect-loop.c
===
--- gcc/tree-vect-loop.c2019-11-05 14:19:33.829371745 +
+++ gcc/tree-vect-loop.c2019-11-05 14:19:58.781197820 +
@@ -830,6 +830,8 @@ _loop_vec_info::_loop_vec_info (class lo
 scan_map (NULL),
 slp_unrolling_factor (1),
 single_scalar_iteration_cost (0),
+vec_outside_cost (0),
+vec_inside_cost (0),
 vectorizable (false),
 can_fully_mask_p (true),
 fully_masked_p (false),
@@ -2373,6 +2375,80 @@ vect_analyze_loop_2 (loop_vec_info loop_
   goto start_over;
 }
 
+/* Return true if vectorizing a loop using NEW_LOOP_VINFO appears
+   to be better than vectorizing it using OLD_LOOP_VINFO.  Assume that
+   OLD_LOOP_VINFO is better unless something specifically indicates
+   otherwise.
+
+   Note that this deliberately isn't a partial order.  */
+
+static bool
+vect_better_loop_vinfo_p (loop_vec_info new_loop_vinfo,
+ loop_vec_info old_loop_vinfo)
+{
+  struct loop *loop = LOOP_VINFO_LOOP (new_loop_vinfo);
+  gcc_assert (LOOP_VINFO_LOOP (old_loop_vinfo) == loop);
+
+  poly_int64 new_vf = LOOP_VINFO_VECT_FACTOR (new_loop_vinfo);
+  poly_int64 old_vf = LOOP_VINFO_VECT_FACTOR (old_loop_vinfo);
+
+  /* Always prefer a VF of loop->simdlen over any other VF.  */
+  if (loop->simdlen)
+{
+  bool new_simdlen_p = known_eq (new_vf, loop->simdlen);
+  bool old_simdlen_p = known_eq (old_vf, loop->simdlen);
+  if (new_simdlen_p != old_simdlen_p)
+   return new_simdlen_p;
+}
+
+  /* Limit the VFs to what is likely to be the maximum number of

[5/6] Account for the cost of generating loop masks

2019-11-05 Thread Richard Sandiford
We didn't take the cost of generating loop masks into account, and so
tended to underestimate the cost of loops that need multiple masks.


2019-11-05  Richard Sandiford  

gcc/
* tree-vect-loop.c (vect_estimate_min_profitable_iters): Include
the cost of generating loop masks.

gcc/testsuite/
* gcc.target/aarch64/sve/mask_struct_store_3.c: Add
-fno-vect-cost-model.
* gcc.target/aarch64/sve/mask_struct_store_3_run.c: Likewise.
* gcc.target/aarch64/sve/peel_ind_3.c: Likewise.
* gcc.target/aarch64/sve/peel_ind_3_run.c: Likewise.

Index: gcc/tree-vect-loop.c
===
--- gcc/tree-vect-loop.c2019-11-05 14:19:58.781197820 +
+++ gcc/tree-vect-loop.c2019-11-05 14:20:40.188909187 +
@@ -3435,6 +3435,32 @@ vect_estimate_min_profitable_iters (loop
  si->kind, si->stmt_info, si->misalign,
  vect_epilogue);
}
+
+  /* Calculate how many masks we need to generate.  */
+  unsigned int num_masks = 0;
+  rgroup_masks *rgm;
+  unsigned int num_vectors_m1;
+  FOR_EACH_VEC_ELT (LOOP_VINFO_MASKS (loop_vinfo), num_vectors_m1, rgm)
+   if (rgm->mask_type)
+ num_masks += num_vectors_m1 + 1;
+  gcc_assert (num_masks > 0);
+
+  /* In the worst case, we need to generate each mask in the prologue
+and in the loop body.  One of the loop body mask instructions
+replaces the comparison in the scalar loop, and since we don't
+count the scalar comparison against the scalar body, we shouldn't
+count that vector instruction against the vector body either.
+
+Sometimes we can use unpacks instead of generating prologue
+masks and sometimes the prologue mask will fold to a constant,
+so the actual prologue cost might be smaller.  However, it's
+simpler and safer to use the worst-case cost; if this ends up
+being the tie-breaker between vectorizing or not, then it's
+probably better not to vectorize.  */
+  (void) add_stmt_cost (target_cost_data, num_masks, vector_stmt,
+   NULL, 0, vect_prologue);
+  (void) add_stmt_cost (target_cost_data, num_masks - 1, vector_stmt,
+   NULL, 0, vect_body);
 }
   else if (npeel < 0)
 {
Index: gcc/testsuite/gcc.target/aarch64/sve/mask_struct_store_3.c
===
--- gcc/testsuite/gcc.target/aarch64/sve/mask_struct_store_3.c  2019-03-08 
18:14:29.768994780 +
+++ gcc/testsuite/gcc.target/aarch64/sve/mask_struct_store_3.c  2019-11-05 
14:20:40.184909216 +
@@ -1,5 +1,5 @@
 /* { dg-do compile } */
-/* { dg-options "-O2 -ftree-vectorize -ffast-math" } */
+/* { dg-options "-O2 -ftree-vectorize -ffast-math -fno-vect-cost-model" } */
 
 #include 
 
Index: gcc/testsuite/gcc.target/aarch64/sve/mask_struct_store_3_run.c
===
--- gcc/testsuite/gcc.target/aarch64/sve/mask_struct_store_3_run.c  
2019-03-08 18:14:29.772994767 +
+++ gcc/testsuite/gcc.target/aarch64/sve/mask_struct_store_3_run.c  
2019-11-05 14:20:40.184909216 +
@@ -1,5 +1,5 @@
 /* { dg-do run { target aarch64_sve_hw } } */
-/* { dg-options "-O2 -ftree-vectorize -ffast-math" } */
+/* { dg-options "-O2 -ftree-vectorize -ffast-math -fno-vect-cost-model" } */
 
 #include "mask_struct_store_3.c"
 
Index: gcc/testsuite/gcc.target/aarch64/sve/peel_ind_3.c
===
--- gcc/testsuite/gcc.target/aarch64/sve/peel_ind_3.c   2019-03-08 
18:14:29.776994751 +
+++ gcc/testsuite/gcc.target/aarch64/sve/peel_ind_3.c   2019-11-05 
14:20:40.184909216 +
@@ -1,7 +1,7 @@
 /* { dg-do compile } */
 /* Pick an arbitrary target for which unaligned accesses are more
expensive.  */
-/* { dg-options "-O3 -msve-vector-bits=256 -mtune=thunderx" } */
+/* { dg-options "-O3 -msve-vector-bits=256 -mtune=thunderx 
-fno-vect-cost-model" } */
 
 #define N 32
 #define MAX_START 8
Index: gcc/testsuite/gcc.target/aarch64/sve/peel_ind_3_run.c
===
--- gcc/testsuite/gcc.target/aarch64/sve/peel_ind_3_run.c   2019-03-08 
18:14:29.784994721 +
+++ gcc/testsuite/gcc.target/aarch64/sve/peel_ind_3_run.c   2019-11-05 
14:20:40.184909216 +
@@ -1,6 +1,6 @@
 /* { dg-do run { target aarch64_sve_hw } } */
-/* { dg-options "-O3 -mtune=thunderx" } */
-/* { dg-options "-O3 -mtune=thunderx -msve-vector-bits=256" { target 
aarch64_sve256_hw } } */
+/* { dg-options "-O3 -mtune=thunderx -fno-vect-cost-model" } */
+/* { dg-options "-O3 -mtune=thunderx -msve-vector-bits=256 
-fno-vect-cost-model" { target aarch64_sve256_hw } } */
 
 #include "peel_ind_3.c"
 


[6/6][AArch64] Enable vect-compare-loop-costs by default for SVE

2019-11-05 Thread Richard Sandiford
This patch enables vect-compare-loop-costs by default for SVE, both so
that we can compare SVE against Advanced SIMD and so that (with future
patches) we can compare multiple SVE vectorisation approaches against
each other.

I'll apply if the prerequisites are approved.


2019-11-05  Richard Sandiford  

gcc/
* config/aarch64/aarch64.c (aarch64_override_options_internal):
Set the default value of PARAM_VECT_COMPARE_LOOP_COSTS to 1
when SVE is enabled.

gcc/testsuite/
* gcc.target/aarch64/sve/reduc_3.c: Split multi-vector cases out
into...
* gcc.target/aarch64/sve/reduc_3_costly.c: ...this new test,
passing -fno-vect-cost-model for them.
* gcc.target/aarch64/sve/slp_6.c: Add -fno-vect-cost-model.
* gcc.target/aarch64/sve/slp_7.c,
* gcc.target/aarch64/sve/slp_7_run.c: Split multi-vector cases out
into...
* gcc.target/aarch64/sve/slp_7_costly.c,
* gcc.target/aarch64/sve/slp_7_costly_run.c: ...these new tests,
passing -fno-vect-cost-model for them.

Index: gcc/config/aarch64/aarch64.c
===
--- gcc/config/aarch64/aarch64.c2019-11-05 11:04:15.559298615 +
+++ gcc/config/aarch64/aarch64.c2019-11-05 14:21:15.416663625 +
@@ -13308,6 +13308,14 @@ aarch64_override_options_internal (struc
   initialize_aarch64_code_model (opts);
   initialize_aarch64_tls_size (opts);
 
+  /* Enable vect-compare-loop-costs by default for SVE, both so that we
+ can compare SVE against Advanced SIMD and so that we can compare
+ multiple SVE vectorisation approaches against each other.  */
+  if (TARGET_SVE)
+maybe_set_param_value (PARAM_VECT_COMPARE_LOOP_COSTS, 1,
+  opts->x_param_values,
+  global_options_set.x_param_values);
+
   int queue_depth = 0;
   switch (aarch64_tune_params.autoprefetcher_model)
 {
Index: gcc/testsuite/gcc.target/aarch64/sve/reduc_3.c
===
--- gcc/testsuite/gcc.target/aarch64/sve/reduc_3.c  2019-03-08 
18:14:29.784994721 +
+++ gcc/testsuite/gcc.target/aarch64/sve/reduc_3.c  2019-11-05 
14:21:15.416663625 +
@@ -17,7 +17,6 @@ void reduc_ptr_##DSTTYPE##_##SRCTYPE (DS
 
 REDUC_PTR (int8_t, int8_t)
 REDUC_PTR (int16_t, int16_t)
-
 REDUC_PTR (int32_t, int32_t)
 REDUC_PTR (int64_t, int64_t)
 
@@ -25,17 +24,6 @@ REDUC_PTR (_Float16, _Float16)
 REDUC_PTR (float, float)
 REDUC_PTR (double, double)
 
-/* Widening reductions.  */
-REDUC_PTR (int32_t, int8_t)
-REDUC_PTR (int32_t, int16_t)
-
-REDUC_PTR (int64_t, int8_t)
-REDUC_PTR (int64_t, int16_t)
-REDUC_PTR (int64_t, int32_t)
-
-REDUC_PTR (float, _Float16)
-REDUC_PTR (double, float)
-
 /* Float<>Int conversions */
 REDUC_PTR (_Float16, int16_t)
 REDUC_PTR (float, int32_t)
@@ -45,8 +33,14 @@ REDUC_PTR (int16_t, _Float16)
 REDUC_PTR (int32_t, float)
 REDUC_PTR (int64_t, double)
 
-/* { dg-final { scan-assembler-times {\tuaddv\td[0-9]+, p[0-7], z[0-9]+\.s\n} 
3 } } */
-/* { dg-final { scan-assembler-times {\tuaddv\td[0-9]+, p[0-7], z[0-9]+\.d\n} 
4 } } */
+/* { dg-final { scan-assembler-times {\tuaddv\td[0-9]+, p[0-7], z[0-9]+\.b\n} 
1 } } */
+/* { dg-final { scan-assembler-times {\tuaddv\td[0-9]+, p[0-7], z[0-9]+\.h\n} 
2 { xfail *-*-* } } } */
+/* { dg-final { scan-assembler-times {\tuaddv\td[0-9]+, p[0-7], z[0-9]+\.s\n} 
2 { xfail *-*-* } } } */
+/* { dg-final { scan-assembler-times {\tuaddv\td[0-9]+, p[0-7], z[0-9]+\.d\n} 
2 { xfail *-*-* } } } */
+/* We don't yet vectorize the int<-float cases.  */
+/* { dg-final { scan-assembler-times {\tuaddv\td[0-9]+, p[0-7], z[0-9]+\.h\n} 
1 } } */
+/* { dg-final { scan-assembler-times {\tuaddv\td[0-9]+, p[0-7], z[0-9]+\.s\n} 
1 } } */
+/* { dg-final { scan-assembler-times {\tuaddv\td[0-9]+, p[0-7], z[0-9]+\.d\n} 
1 } } */
 /* { dg-final { scan-assembler-times {\tfaddv\th[0-9]+, p[0-7], z[0-9]+\.h\n} 
2 } } */
-/* { dg-final { scan-assembler-times {\tfaddv\ts[0-9]+, p[0-7], z[0-9]+\.s\n} 
3 } } */
-/* { dg-final { scan-assembler-times {\tfaddv\td[0-9]+, p[0-7], z[0-9]+\.d\n} 
3 } } */
+/* { dg-final { scan-assembler-times {\tfaddv\ts[0-9]+, p[0-7], z[0-9]+\.s\n} 
2 } } */
+/* { dg-final { scan-assembler-times {\tfaddv\td[0-9]+, p[0-7], z[0-9]+\.d\n} 
2 } } */
Index: gcc/testsuite/gcc.target/aarch64/sve/reduc_3_costly.c
===
--- /dev/null   2019-09-17 11:41:18.176664108 +0100
+++ gcc/testsuite/gcc.target/aarch64/sve/reduc_3_costly.c   2019-11-05 
14:21:15.416663625 +
@@ -0,0 +1,32 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -ftree-vectorize -ffast-math -fno-vect-cost-model" } */
+
+#include 
+
+#define NUM_ELEMS(TYPE) (32 / sizeof (TYPE))
+
+#define REDUC_PTR(DSTTYPE, SRCTYPE)\
+void reduc_ptr_##DSTTYPE##_##SRCTYPE (DSTTYPE *restrict sum,   \
+ SRCTYPE *restrict

  1   2   >