[PATCH] Fix 3 generic vector lowering issues with VECTOR_BOOLEAN_TYPE_P SSA_NAMEs with scalar modes (PR tree-optimization/91157)

2019-07-17 Thread Jakub Jelinek
Hi!

On the following testcase we end up with a comparison (EQ_EXPR in this case)
with unsupported vector operands, but supported result (vector boolean
type with scalar mode, i.e. the AVX512F-ish integer bitmask) and later
a VEC_COND_EXPR which is also not supported by the optab and has the vector
boolean type with scalar mode as the first operand.

The last hunk makes sure that we don't just ignore lowering of the comparison
when it has an integer bitmask result but unsupported vector operands.
The expand_vector_comparison changes makes sure we lower the comparison
properly into the integer bitmask and finally the expand_vector_condition
changes makes sure we lower properly the VEC_COND_EXPR.

Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?

2019-07-17  Jakub Jelinek  

PR tree-optimization/91157
* tree-vect-generic.c (expand_vector_comparison): Handle lhs being
a vector boolean with scalar mode.
(expand_vector_condition): Handle first operand being a vector boolean
with scalar mode.
(expand_vector_operations_1): For comparisons, don't bail out early
if the return type is vector boolean with scalar mode, but comparison
operand type is not.

* gcc.target/i386/avx512f-pr91157.c: New test.
* gcc.target/i386/avx512bw-pr91157.c: New test.

--- gcc/tree-vect-generic.c.jj  2019-07-04 00:18:37.063010439 +0200
+++ gcc/tree-vect-generic.c 2019-07-16 12:40:41.343059690 +0200
@@ -382,8 +382,48 @@ expand_vector_comparison (gimple_stmt_it
   tree t;
   if (!expand_vec_cmp_expr_p (TREE_TYPE (op0), type, code)
   && !expand_vec_cond_expr_p (type, TREE_TYPE (op0), code))
-t = expand_vector_piecewise (gsi, do_compare, type,
-TREE_TYPE (TREE_TYPE (op0)), op0, op1, code);
+{
+  if (VECTOR_BOOLEAN_TYPE_P (type)
+ && VECTOR_BOOLEAN_TYPE_P (type)
+ && SCALAR_INT_MODE_P (TYPE_MODE (type))
+ && known_lt (GET_MODE_BITSIZE (TYPE_MODE (type)),
+  TYPE_VECTOR_SUBPARTS (type)
+  * GET_MODE_BITSIZE (SCALAR_TYPE_MODE
+   (TREE_TYPE (type)
+   {
+ tree inner_type = TREE_TYPE (TREE_TYPE (op0));
+ tree part_width = TYPE_SIZE (inner_type);
+ tree index = bitsize_int (0);
+ int nunits = nunits_for_known_piecewise_op (TREE_TYPE (op0));
+ int prec = GET_MODE_PRECISION (SCALAR_TYPE_MODE (type));
+ tree ret_type = build_nonstandard_integer_type (prec, 1);
+ tree ret_inner_type = boolean_type_node;
+ int i;
+ location_t loc = gimple_location (gsi_stmt (*gsi));
+ t = build_zero_cst (ret_type);
+
+ if (TYPE_PRECISION (ret_inner_type) != 1)
+   ret_inner_type = build_nonstandard_integer_type (1, 1);
+ warning_at (loc, OPT_Wvector_operation_performance,
+ "vector operation will be expanded piecewise");
+ for (i = 0; i < nunits;
+  i++, index = int_const_binop (PLUS_EXPR, index, part_width))
+   {
+ tree a = tree_vec_extract (gsi, inner_type, op0, part_width,
+index);
+ tree b = tree_vec_extract (gsi, inner_type, op1, part_width,
+index);
+ tree result = gimplify_build2 (gsi, code, ret_inner_type, a, b);
+ t = gimplify_build3 (gsi, BIT_INSERT_EXPR, ret_type, t, result,
+  bitsize_int (i));
+   }
+ t = gimplify_build1 (gsi, VIEW_CONVERT_EXPR, type, t);
+   }
+  else
+   t = expand_vector_piecewise (gsi, do_compare, type,
+TREE_TYPE (TREE_TYPE (op0)), op0, op1,
+code);
+}
   else
 t = NULL_TREE;
 
@@ -879,6 +919,7 @@ expand_vector_condition (gimple_stmt_ite
   tree a1 = a;
   tree a2 = NULL_TREE;
   bool a_is_comparison = false;
+  bool a_is_scalar_bitmask = false;
   tree b = gimple_assign_rhs2 (stmt);
   tree c = gimple_assign_rhs3 (stmt);
   vec *v;
@@ -942,6 +983,20 @@ expand_vector_condition (gimple_stmt_ite
   warning_at (loc, OPT_Wvector_operation_performance,
  "vector condition will be expanded piecewise");
 
+  if (!a_is_comparison
+  && VECTOR_BOOLEAN_TYPE_P (TREE_TYPE (a))
+  && SCALAR_INT_MODE_P (TYPE_MODE (TREE_TYPE (a)))
+  && known_lt (GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (a))),
+  TYPE_VECTOR_SUBPARTS (TREE_TYPE (a))
+  * GET_MODE_BITSIZE (SCALAR_TYPE_MODE
+   (TREE_TYPE (TREE_TYPE (a))
+{
+  a_is_scalar_bitmask = true;
+  int prec = GET_MODE_PRECISION (SCALAR_TYPE_MODE (TREE_TYPE (a)));
+  tree atype = build_nonstandard_integer_type (prec, 1);
+  a = gimplify_build1 (gsi, VIEW_CONVERT_EXPR, atype, a);
+}
+
   int nunits = nunits_fo

[PATCH] Fix PR91181

2019-07-17 Thread Richard Biener


Bootstrapped / tested on x86_64-unknown-linux-gnu, applied.

Richard.

2019-07-17  Richard Biener  

PR tree-optimization/91181
* tree-vect-slp.c (vect_build_slp_tree_1): Do not compare
IFN_LOADs as calls.

* gcc.dg/pr91181.c: New testcase.

Index: gcc/tree-vect-slp.c
===
--- gcc/tree-vect-slp.c (revision 273490)
+++ gcc/tree-vect-slp.c (working copy)
@@ -857,7 +857,7 @@ vect_build_slp_tree_1 (unsigned char *sw
  continue;
}
 
- if (rhs_code == CALL_EXPR)
+ if (!load_p && rhs_code == CALL_EXPR)
{
  if (!compatible_calls_p (as_a  (stmts[0]->stmt),
   as_a  (stmt)))
Index: gcc/testsuite/gcc.dg/pr91181.c
===
--- gcc/testsuite/gcc.dg/pr91181.c  (nonexistent)
+++ gcc/testsuite/gcc.dg/pr91181.c  (working copy)
@@ -0,0 +1,21 @@
+/* { dg-do compile } */
+/* { dg-options "-Ofast" } */
+/* { dg-additional-options "-mavx" { target x86_64-*-* i?86-*-* } } */
+
+enum { a, b, c };
+float *d, *e;
+int f, g, h, i;
+int j()
+{
+  float a;
+  for (; h; h++)
+{
+  i = h * 4;
+  a = d[i + b];
+  if (a) {
+ e[i + b] = g < d[i + b] * f * a ? g : d[i + b] * f * a;
+ e[i + c] = g < d[i + c] * f * a ? g : d[i + c] * f * a;
+  }
+  e[i + b] = e[i + c];
+}
+}


Re: [PATCH] Fix 3 generic vector lowering issues with VECTOR_BOOLEAN_TYPE_P SSA_NAMEs with scalar modes (PR tree-optimization/91157)

2019-07-17 Thread Richard Biener
On Wed, 17 Jul 2019, Jakub Jelinek wrote:

> Hi!
> 
> On the following testcase we end up with a comparison (EQ_EXPR in this case)
> with unsupported vector operands, but supported result (vector boolean
> type with scalar mode, i.e. the AVX512F-ish integer bitmask) and later
> a VEC_COND_EXPR which is also not supported by the optab and has the vector
> boolean type with scalar mode as the first operand.
> 
> The last hunk makes sure that we don't just ignore lowering of the comparison
> when it has an integer bitmask result but unsupported vector operands.
> The expand_vector_comparison changes makes sure we lower the comparison
> properly into the integer bitmask and finally the expand_vector_condition
> changes makes sure we lower properly the VEC_COND_EXPR.
> 
> Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?

OK.

Thanks,
Richard.

> 2019-07-17  Jakub Jelinek  
> 
>   PR tree-optimization/91157
>   * tree-vect-generic.c (expand_vector_comparison): Handle lhs being
>   a vector boolean with scalar mode.
>   (expand_vector_condition): Handle first operand being a vector boolean
>   with scalar mode.
>   (expand_vector_operations_1): For comparisons, don't bail out early
>   if the return type is vector boolean with scalar mode, but comparison
>   operand type is not.
> 
>   * gcc.target/i386/avx512f-pr91157.c: New test.
>   * gcc.target/i386/avx512bw-pr91157.c: New test.
> 
> --- gcc/tree-vect-generic.c.jj2019-07-04 00:18:37.063010439 +0200
> +++ gcc/tree-vect-generic.c   2019-07-16 12:40:41.343059690 +0200
> @@ -382,8 +382,48 @@ expand_vector_comparison (gimple_stmt_it
>tree t;
>if (!expand_vec_cmp_expr_p (TREE_TYPE (op0), type, code)
>&& !expand_vec_cond_expr_p (type, TREE_TYPE (op0), code))
> -t = expand_vector_piecewise (gsi, do_compare, type,
> -  TREE_TYPE (TREE_TYPE (op0)), op0, op1, code);
> +{
> +  if (VECTOR_BOOLEAN_TYPE_P (type)
> +   && VECTOR_BOOLEAN_TYPE_P (type)
> +   && SCALAR_INT_MODE_P (TYPE_MODE (type))
> +   && known_lt (GET_MODE_BITSIZE (TYPE_MODE (type)),
> +TYPE_VECTOR_SUBPARTS (type)
> +* GET_MODE_BITSIZE (SCALAR_TYPE_MODE
> + (TREE_TYPE (type)
> + {
> +   tree inner_type = TREE_TYPE (TREE_TYPE (op0));
> +   tree part_width = TYPE_SIZE (inner_type);
> +   tree index = bitsize_int (0);
> +   int nunits = nunits_for_known_piecewise_op (TREE_TYPE (op0));
> +   int prec = GET_MODE_PRECISION (SCALAR_TYPE_MODE (type));
> +   tree ret_type = build_nonstandard_integer_type (prec, 1);
> +   tree ret_inner_type = boolean_type_node;
> +   int i;
> +   location_t loc = gimple_location (gsi_stmt (*gsi));
> +   t = build_zero_cst (ret_type);
> +
> +   if (TYPE_PRECISION (ret_inner_type) != 1)
> + ret_inner_type = build_nonstandard_integer_type (1, 1);
> +   warning_at (loc, OPT_Wvector_operation_performance,
> +   "vector operation will be expanded piecewise");
> +   for (i = 0; i < nunits;
> +i++, index = int_const_binop (PLUS_EXPR, index, part_width))
> + {
> +   tree a = tree_vec_extract (gsi, inner_type, op0, part_width,
> +  index);
> +   tree b = tree_vec_extract (gsi, inner_type, op1, part_width,
> +  index);
> +   tree result = gimplify_build2 (gsi, code, ret_inner_type, a, b);
> +   t = gimplify_build3 (gsi, BIT_INSERT_EXPR, ret_type, t, result,
> +bitsize_int (i));
> + }
> +   t = gimplify_build1 (gsi, VIEW_CONVERT_EXPR, type, t);
> + }
> +  else
> + t = expand_vector_piecewise (gsi, do_compare, type,
> +  TREE_TYPE (TREE_TYPE (op0)), op0, op1,
> +  code);
> +}
>else
>  t = NULL_TREE;
>  
> @@ -879,6 +919,7 @@ expand_vector_condition (gimple_stmt_ite
>tree a1 = a;
>tree a2 = NULL_TREE;
>bool a_is_comparison = false;
> +  bool a_is_scalar_bitmask = false;
>tree b = gimple_assign_rhs2 (stmt);
>tree c = gimple_assign_rhs3 (stmt);
>vec *v;
> @@ -942,6 +983,20 @@ expand_vector_condition (gimple_stmt_ite
>warning_at (loc, OPT_Wvector_operation_performance,
> "vector condition will be expanded piecewise");
>  
> +  if (!a_is_comparison
> +  && VECTOR_BOOLEAN_TYPE_P (TREE_TYPE (a))
> +  && SCALAR_INT_MODE_P (TYPE_MODE (TREE_TYPE (a)))
> +  && known_lt (GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (a))),
> +TYPE_VECTOR_SUBPARTS (TREE_TYPE (a))
> +* GET_MODE_BITSIZE (SCALAR_TYPE_MODE
> + (TREE_TYPE (TREE_TYPE (a))
> +{
> +  a_is_scalar_bitmask = true;
> +  int prec = GET_MODE_PRECISION (SCALAR_TYPE_MODE (TR

Re: PR90724 - ICE with __sync_bool_compare_and_swap with -march=armv8.2-a

2019-07-17 Thread Prathamesh Kulkarni
On Wed, 10 Jul 2019 at 16:54, Prathamesh Kulkarni
 wrote:
>
> Hi,
> For following test-case,
> static long long AL[24];
>
> int
> check_ok (void)
> {
>   return (__sync_bool_compare_and_swap (AL+1, 0x20003ll, 0x1234567890ll));
> }
>
> Compiling with -O2 -march=armv8.2-a results in:
> pr90724.c: In function ‘check_ok’:
> pr90724.c:7:1: error: unrecognizable insn:
> 7 | }
>   | ^
> (insn 11 10 12 2 (set (reg:CC 66 cc)
> (compare:CC (reg:DI 95)
> (const_int 8589934595 [0x20003]))) "pr90724.c":6:11 -1
>  (nil))
>
> IIUC, the issue is that 0x20003 falls outside the range of
> allowable immediate in cmp ? If it's replaced by a small constant then it 
> works.
>
> The ICE results with -march=armv8.2-a because, we enter if
> (TARGET_LSE) { ... } condition
> in aarch64_expand_compare_and_swap, while with -march=armv8.a it goes into 
> else,
> which forces oldval into register if the predicate fails to match.
>
> The attached patch checks if y (oldval) satisfies aarch64_plus_operand
> predicate and if not, forces it to be in register, which resolves ICE.
> Does it look OK ?
>
> Bootstrap+testing in progress on aarch64-linux-gnu.
ping https://gcc.gnu.org/ml/gcc-patches/2019-07/msg00793.html

Thanks,
Prathamesh
>
> PS: The issue has nothing to do with SVE, which I incorrectly
> mentioned in bug report.
>
> Thanks,
> Prathamesh


[committed] Small cleanup now that Cilk+ simd is not supported anymore

2019-07-17 Thread Jakub Jelinek
Hi!

We needed the & GF_OMP_FOR_SIMD stuff when we had
GF_OMP_FOR_SIMD = 1 << 3,
GF_OMP_FOR_KIND_SIMD= GF_OMP_FOR_SIMD | 0,
GF_OMP_FOR_KIND_CILKSIMD= GF_OMP_FOR_SIMD | 1,
and kind & GF_OMP_FOR_SIMD stood for
kind == GF_OMP_FOR_KIND_SIMD || kind == GF_OMP_FOR_KIND_CILKSIMD
Now that we don't have CILKSIMD (since GCC 8), that is completely
unnecessary.

Bootstrapped/regtested on x86_64-linux and i686-linux, committed to trunk.

2019-07-17  Jakub Jelinek  

* gimple.h (enum gf_mask): Remove GF_OMP_FOR_SIMD, change
GF_OMP_FOR_KIND_SIMD to a value serially after other kinds,
divide GF_OMP_FOR_KIND_MASK, GF_OMP_FOR_COMBINED,
GF_OMP_FOR_COMBINED_INTO, GF_OMP_FOR_GRID_PHONY,
GF_OMP_FOR_GRID_INTRA_GROUP and GF_OMP_FOR_GRID_GROUP_ITER by two.
* omp-grid.c (grid_process_grid_body,
grid_eliminate_combined_simd_part): Use GF_OMP_FOR_KIND_SIMD instead
of GF_OMP_FOR_SIMD, don't test & GF_OMP_FOR_SIMD but
== GF_OMP_FOR_KIND_SIMD.
* omp-low.c (build_outer_var_ref, scan_sharing_clauses,
check_omp_nesting_restrictions, scan_omp_1_stmt,
lower_rec_input_clauses, lower_lastprivate_conditional_clauses,
lower_lastprivate_clauses, lower_reduction_clauses, lower_omp_scan,
omp_find_scan): Likewise.
* omp-expand.c (expand_omp_for): Likewise.
* omp-general.c (omp_extract_for_data): Likewise.

--- gcc/gimple.h.jj 2019-06-10 14:18:17.343527538 +0200
+++ gcc/gimple.h2019-07-16 18:15:58.762506593 +0200
@@ -153,24 +153,22 @@ enum gf_mask {
 GF_OMP_PARALLEL_GRID_PHONY = 1 << 1,
 GF_OMP_TASK_TASKLOOP   = 1 << 0,
 GF_OMP_TASK_TASKWAIT   = 1 << 1,
-GF_OMP_FOR_KIND_MASK   = (1 << 4) - 1,
+GF_OMP_FOR_KIND_MASK   = (1 << 3) - 1,
 GF_OMP_FOR_KIND_FOR= 0,
 GF_OMP_FOR_KIND_DISTRIBUTE = 1,
 GF_OMP_FOR_KIND_TASKLOOP   = 2,
 GF_OMP_FOR_KIND_OACC_LOOP  = 4,
-GF_OMP_FOR_KIND_GRID_LOOP = 5,
-/* Flag for SIMD variants of OMP_FOR kinds.  */
-GF_OMP_FOR_SIMD= 1 << 3,
-GF_OMP_FOR_KIND_SIMD   = GF_OMP_FOR_SIMD | 0,
-GF_OMP_FOR_COMBINED= 1 << 4,
-GF_OMP_FOR_COMBINED_INTO   = 1 << 5,
+GF_OMP_FOR_KIND_GRID_LOOP  = 5,
+GF_OMP_FOR_KIND_SIMD   = 6,
+GF_OMP_FOR_COMBINED= 1 << 3,
+GF_OMP_FOR_COMBINED_INTO   = 1 << 4,
 /* The following flag must not be used on GF_OMP_FOR_KIND_GRID_LOOP loop
statements.  */
-GF_OMP_FOR_GRID_PHONY  = 1 << 6,
+GF_OMP_FOR_GRID_PHONY  = 1 << 5,
 /* The following two flags should only be set on GF_OMP_FOR_KIND_GRID_LOOP
loop statements.  */
-GF_OMP_FOR_GRID_INTRA_GROUP= 1 << 6,
-GF_OMP_FOR_GRID_GROUP_ITER  = 1 << 7,
+GF_OMP_FOR_GRID_INTRA_GROUP= 1 << 5,
+GF_OMP_FOR_GRID_GROUP_ITER  = 1 << 6,
 GF_OMP_TARGET_KIND_MASK= (1 << 4) - 1,
 GF_OMP_TARGET_KIND_REGION  = 0,
 GF_OMP_TARGET_KIND_DATA= 1,
--- gcc/omp-grid.c.jj   2019-07-10 15:52:27.858038889 +0200
+++ gcc/omp-grid.c  2019-07-16 18:19:33.239240023 +0200
@@ -1002,7 +1002,7 @@ grid_process_grid_body (gimple_stmt_iter
   *handled_ops_p = false;
   gimple *stmt = gsi_stmt (*gsi);
   if (gimple_code (stmt) == GIMPLE_OMP_FOR
-  && (gimple_omp_for_kind (stmt) & GF_OMP_FOR_SIMD))
+  && gimple_omp_for_kind (stmt) == GF_OMP_FOR_KIND_SIMD)
   {
 gomp_for *loop = as_a  (stmt);
 tree clauses = gimple_omp_for_clauses (loop);
@@ -1030,14 +1030,14 @@ grid_eliminate_combined_simd_part (gomp_
 
   memset (&wi, 0, sizeof (wi));
   wi.val_only = true;
-  enum gf_mask msk = GF_OMP_FOR_SIMD;
+  enum gf_mask msk = GF_OMP_FOR_KIND_SIMD;
   wi.info = (void *) &msk;
   walk_gimple_seq (gimple_omp_body (parloop), omp_find_combined_for, NULL, 
&wi);
   gimple *stmt = (gimple *) wi.info;
   /* We expect that the SIMD id the only statement in the parallel loop.  */
   gcc_assert (stmt
  && gimple_code (stmt) == GIMPLE_OMP_FOR
- && (gimple_omp_for_kind (stmt) == GF_OMP_FOR_SIMD)
+ && (gimple_omp_for_kind (stmt) == GF_OMP_FOR_KIND_SIMD)
  && gimple_omp_for_combined_into_p (stmt)
  && !gimple_omp_for_combined_p (stmt));
   gomp_for *simd = as_a  (stmt);
--- gcc/omp-low.c.jj2019-07-16 18:14:17.965041785 +0200
+++ gcc/omp-low.c   2019-07-16 18:41:21.660310215 +0200
@@ -580,7 +580,7 @@ build_outer_var_ref (tree var, omp_conte
   x = build_receiver_ref (var, by_ref, ctx);
 }
   else if ((gimple_code (ctx->stmt) == GIMPLE_OMP_FOR
-   && gimple_omp_for_kind (ctx->stmt) & GF_OMP_FOR_SIMD)
+   && gimple_omp_for_kind (ctx->stmt) == GF_OMP_FOR_KIND_SIMD)
   || (code == OMP_CLAUSE_PRIVATE
   && (gimple_code (ctx->stmt) == GIMPLE_OMP_FOR
   || gimple_code (ctx->stmt) == GIMPLE_OMP_SECTIONS
@@ -1441,7 +1441,7 @@ scan_sharing_clauses (tree clauses, omp_
  inst

[PATCH] Fix PR91180

2019-07-17 Thread Richard Biener


The following fixes PR91180.

Bootstrap / regtest running on x86_64-unknown-linux-gnu.

Richard.

2019-07-17  Richard Biener  

PR tree-optimization/91180
* tree-ssa-sccvn.c (vn_reference_lookup_3): Fix offset
computation for memset partial defs.

* gcc.dg/torture/pr91180.c: New testcase.

Index: gcc/tree-ssa-sccvn.c
===
--- gcc/tree-ssa-sccvn.c(revision 273542)
+++ gcc/tree-ssa-sccvn.c(working copy)
@@ -2486,7 +2535,7 @@ vn_reference_lookup_3 (ao_ref *ref, tree
{
  pd_data pd;
  pd.rhs = build_constructor (NULL_TREE, NULL);
- pd.offset = offset2i - offseti;
+ pd.offset = (offset2i - offseti) / BITS_PER_UNIT;
  pd.size = leni;
  return data->push_partial_def (pd, vuse, maxsizei);
}
Index: gcc/testsuite/gcc.dg/torture/pr91180.c
===
--- gcc/testsuite/gcc.dg/torture/pr91180.c  (nonexistent)
+++ gcc/testsuite/gcc.dg/torture/pr91180.c  (working copy)
@@ -0,0 +1,13 @@
+/* { dg-do run } */
+
+int
+main ()
+{
+#if __SIZEOF_INT__ == 4
+  unsigned x = 0x;
+  __builtin_memset (1 + (char *) &x, 0, 2);
+  if (x != 0xffff)
+__builtin_abort ();
+#endif
+  return 0;
+}


Re: [PATCH] Make a warning for -Werror=wrong-language (PR driver/91172).

2019-07-17 Thread Martin Liška
On 7/16/19 6:40 PM, Martin Sebor wrote:
> On 7/16/19 5:16 AM, Martin Liška wrote:
>> Hi.
>>
>> I noticed in the PR that -Werror=argument argument is not verified
>> that the option is supported by a language we compile for.
>> That's changed in the patch. However, it's not ideal as I need to mark
>> the -Werror as the problematic option and one can't print a proper
>> list of valid languages for which the rejected option can be used.
>>
>> Patch can bootstrap on x86_64-linux-gnu and survives regression tests.
>>
>> Ready to be installed?
>> Thanks,
>> Martin
>>
>> gcc/ChangeLog:
>>
>> 2019-07-16  Martin Liska  
>>
>> PR driver/91172
>> * opts-common.c (decode_cmdline_option): Decode
>> argument of -Werror and check it for a wrong language.
>> * opts-global.c (complain_wrong_lang): Remove such case.
>>
>> gcc/testsuite/ChangeLog:
>>
>> 2019-07-16  Martin Liska  
>>
>> PR driver/91172
>> * gcc.dg/pr91172.c: New test.
>> ---
>>   gcc/opts-common.c  | 20 +++-
>>   gcc/opts-global.c  |  6 +-
>>   gcc/testsuite/gcc.dg/pr91172.c |  3 +++
>>   3 files changed, 27 insertions(+), 2 deletions(-)
>>   create mode 100644 gcc/testsuite/gcc.dg/pr91172.c
> 
> Nice catch!

Yep, I came to the quite accidentally.

> 
> @@ -745,6 +746,23 @@ decode_cmdline_option (const char **argv, unsigned int 
> lang_mask,
>    /* Check if this is a switch for a different front end.  */
>    if (!option_ok_for_language (option, lang_mask))
>  errors |= CL_ERR_WRONG_LANG;
> +  else if (strcmp (option->opt_text, "-Werror=") == 0
> +   && strchr (opt_value, ',') == NULL)
> +    {
> +  /* Verify that -Werror argument is a valid warning
> + for a languages.  */
> 
> Typo: "for a language" (singular).

Fixed.

> 
> +  else
> +    /* Happens for -Werror=warning_name.  */
> +    warning (0, "command-line error argument %qs is not valid for %s",
> + text, bad_lang);
> 
> It might be better phrased as something like
> 
>   "%<-Werror=%> argument %qs is not valid for %s"
> 
> The argument is not one of a "command-line error."  It's one
> to the -Werror option (which can be specified in other places
> besides the command line).

I like language corrections from native speakers.

I'm sending updated version of the patch.
Thanks,
Martin

> 
> Martin

>From 03baf640c12ea6dfda2215ae07d288b292179217 Mon Sep 17 00:00:00 2001
From: Martin Liska 
Date: Tue, 16 Jul 2019 11:11:00 +0200
Subject: [PATCH] Make a warning for -Werror=wrong-language (PR driver/91172).

gcc/ChangeLog:

2019-07-16  Martin Liska  

	PR driver/91172
	* opts-common.c (decode_cmdline_option): Decode
	argument of -Werror and check it for a wrong language.
	* opts-global.c (complain_wrong_lang): Remove such case.

gcc/testsuite/ChangeLog:

2019-07-16  Martin Liska  

	PR driver/91172
	* gcc.dg/pr91172.c: New test.
---
 gcc/opts-common.c  | 20 +++-
 gcc/opts-global.c  |  6 +-
 gcc/testsuite/gcc.dg/pr91172.c |  3 +++
 3 files changed, 27 insertions(+), 2 deletions(-)
 create mode 100644 gcc/testsuite/gcc.dg/pr91172.c

diff --git a/gcc/opts-common.c b/gcc/opts-common.c
index 660dfe63858..e3f9c549b10 100644
--- a/gcc/opts-common.c
+++ b/gcc/opts-common.c
@@ -537,7 +537,8 @@ decode_cmdline_option (const char **argv, unsigned int lang_mask,
 
   extra_args = 0;
 
-  opt_index = find_opt (argv[0] + 1, lang_mask);
+  const char *opt_value = argv[0] + 1;
+  opt_index = find_opt (opt_value, lang_mask);
   i = 0;
   while (opt_index == OPT_SPECIAL_unknown
 	 && i < ARRAY_SIZE (option_map))
@@ -745,6 +746,23 @@ decode_cmdline_option (const char **argv, unsigned int lang_mask,
   /* Check if this is a switch for a different front end.  */
   if (!option_ok_for_language (option, lang_mask))
 errors |= CL_ERR_WRONG_LANG;
+  else if (strcmp (option->opt_text, "-Werror=") == 0
+	   && strchr (opt_value, ',') == NULL)
+{
+  /* Verify that -Werror argument is a valid warning
+	 for a language.  */
+  char *werror_arg = xstrdup (opt_value + 6);
+  werror_arg[0] = 'W';
+
+  size_t warning_index = find_opt (werror_arg, lang_mask);
+  if (warning_index != OPT_SPECIAL_unknown)
+	{
+	  const struct cl_option *warning_option
+	= &cl_options[warning_index];
+	  if (!option_ok_for_language (warning_option, lang_mask))
+	errors |= CL_ERR_WRONG_LANG;
+	}
+}
 
   /* Convert the argument to lowercase if appropriate.  */
   if (arg && option->cl_tolower)
diff --git a/gcc/opts-global.c b/gcc/opts-global.c
index bf4db775928..7c5bd16c7ea 100644
--- a/gcc/opts-global.c
+++ b/gcc/opts-global.c
@@ -103,10 +103,14 @@ complain_wrong_lang (const struct cl_decoded_option *decoded,
 	   text, bad_lang);
   else if (lang_mask == CL_DRIVER)
 gcc_unreachable ();
-  else
+  else if (ok_langs[0] != '\0')
 /* Eventually this should become a hard error IMO.  */
 warning (0, "command-line option %qs is valid for %s but not for %s",
 	 text, ok_langs, bad_

Re: [PATCH v4] Generalize get_most_common_single_value to return k_th value & count

2019-07-17 Thread Martin Liška
On 7/17/19 7:44 AM, luoxhu wrote:
> Hi Martin,
> Thanks for your review, v4 Changes as below:
>  1. Use decrease bubble sort.
> BTW, I have a question about hist->hvalue.counters[2], when will it become
>  -1, please? Thanks.  Currently, if it is -1, the function will return false.

Hi.

Thanks for that. I made a minor changes to your patch, please see it in 
attachment.
-1 is a value that we use for invalidated histogram. That happens when you need
to fit in more values during instrumentation than you have counters in the 
histogram.
It helps to make reproducible builds of a software.

Martin
diff --git a/gcc/ipa-profile.c b/gcc/ipa-profile.c
index 1fb939b73d0..970dba39c80 100644
--- a/gcc/ipa-profile.c
+++ b/gcc/ipa-profile.c
@@ -192,8 +192,8 @@ ipa_profile_generate_summary (void)
 		  if (h)
 		{
 		  gcov_type val, count, all;
-		  if (get_most_common_single_value (NULL, "indirect call",
-			h, &val, &count, &all))
+		  if (get_nth_most_common_value (NULL, "indirect call", h,
+		 &val, &count, &all))
 			{
 			  struct cgraph_edge * e = node->get_edge (stmt);
 			  if (e && !e->indirect_unknown_callee)
diff --git a/gcc/profile.c b/gcc/profile.c
index 441cb8eb183..1151b491848 100644
--- a/gcc/profile.c
+++ b/gcc/profile.c
@@ -743,6 +743,44 @@ compute_branch_probabilities (unsigned cfg_checksum, unsigned lineno_checksum)
   free_aux_for_blocks ();
 }
 
+/* Sort the histogram value and count for TOPN and INDIR_CALL types.  */
+
+static void
+sort_hist_values (histogram_value hist)
+{
+  /* counters[2] equal to -1 means that all counters are invalidated.  */
+  if (hist->hvalue.counters[2] == -1)
+return;
+
+  gcc_assert (hist->type == HIST_TYPE_TOPN_VALUES
+	  || hist->type == HIST_TYPE_INDIR_CALL);
+
+  gcc_assert (hist->n_counters == GCOV_TOPN_VALUES_COUNTERS);
+
+  /* Hist value is organized as:
+ [total_executions, value1, counter1, ..., value4, counter4]
+ Use decrese bubble sort to rearrange it.  The sort starts from  and compares counter first.  If counter is same, compares the
+ value, exchange it if small to keep stable.  */
+  for (unsigned i = 0; i < GCOV_TOPN_VALUES - 1; i++)
+{
+  bool swapped = false;
+  for (unsigned j = 0; j < GCOV_TOPN_VALUES - 1 - i; j++)
+	{
+	  gcov_type *p = &hist->hvalue.counters[2 * j + 1];
+	  if (p[1] < p[3] || (p[1] == p[3] && p[0] < p[2]))
+	{
+	  std::swap (p[0], p[2]);
+	  std::swap (p[1], p[3]);
+	  swapped = true;
+	}
+	}
+
+  if (!swapped)
+	break;
+}
+}
+
 /* Load value histograms values whose description is stored in VALUES array
from .gcda file.  
 
@@ -808,6 +846,10 @@ compute_value_histograms (histogram_values values, unsigned cfg_checksum,
 else
   hist->hvalue.counters[j] = 0;
 
+  if (hist->type == HIST_TYPE_TOPN_VALUES
+	  || hist->type == HIST_TYPE_INDIR_CALL)
+	sort_hist_values (hist);
+
   /* Time profiler counter is not related to any statement,
  so that we have to read the counter and set the value to
  the corresponding call graph node.  */
diff --git a/gcc/value-prof.c b/gcc/value-prof.c
index 32e6ddd8165..759458868a8 100644
--- a/gcc/value-prof.c
+++ b/gcc/value-prof.c
@@ -713,45 +713,38 @@ gimple_divmod_fixed_value (gassign *stmt, tree value, profile_probability prob,
   return tmp2;
 }
 
-/* Return most common value of TOPN_VALUE histogram.  If
-   there's a unique value, return true and set VALUE and COUNT
+/* Return the n-th value count of TOPN_VALUE histogram.  If
+   there's a value, return true and set VALUE and COUNT
arguments.  */
 
 bool
-get_most_common_single_value (gimple *stmt, const char *counter_type,
-			  histogram_value hist,
-			  gcov_type *value, gcov_type *count,
-			  gcov_type *all)
+get_nth_most_common_value (gimple *stmt, const char *counter_type,
+			   histogram_value hist, gcov_type *value,
+			   gcov_type *count, gcov_type *all, unsigned n)
 {
   if (hist->hvalue.counters[2] == -1)
 return false;
 
+  gcc_assert (n < GCOV_TOPN_VALUES);
+
   *count = 0;
   *value = 0;
 
   gcov_type read_all = hist->hvalue.counters[0];
 
-  for (unsigned i = 0; i < GCOV_TOPN_VALUES; i++)
-{
-  gcov_type v = hist->hvalue.counters[2 * i + 1];
-  gcov_type c = hist->hvalue.counters[2 * i + 2];
-
-  /* Indirect calls can't be vereified.  */
-  if (stmt && check_counter (stmt, counter_type, &c, &read_all,
- gimple_bb (stmt)->count))
-	return false;
+  gcov_type v = hist->hvalue.counters[2 * n + 1];
+  gcov_type c = hist->hvalue.counters[2 * n + 2];
 
-  *all = read_all;
+  /* Indirect calls can't be verified.  */
+  if (stmt
+  && check_counter (stmt, counter_type, &c, &read_all,
+			gimple_bb (stmt)->count))
+return false;
 
-  if (c > *count)
-	{
-	  *value = v;
-	  *count = c;
-	}
-  else if (c == *count && v > *value)
-	*value = v;
-}
+  *all = read_all;
 
+  *value = v;
+  *count = c;
   return true;
 }
 
@@ -784,8 +777,8 @@ gimp

Re: [range-ops] patch 02/04: enforce canonicalization in value_range

2019-07-17 Thread Aldy Hernandez
I've rebased this patch to be independent of the others, to perhaps 
parallelize the review process.


As mentioned before, this patch enforces canonicalization of ranges upon 
creation.  This makes it easier to compare results of range 
implementations and avoids multiple representations for the same range 
(and the special casing the is usually needed to handle these).  The 
patch also enforces no equivalences for VARYING and UNDEFINED, and 
guarantees that there is only one way to represent varying, with 
VR_VARYING, not with [MIN,MAX], etc.


I have also adjusted ranges_from_anti_range, vrp_val_max, and 
vrp_val_min to work with pointers.  I have seen multiple uses of pointer 
ranges containing constants in VRP, for magic pointer values and such in 
libgcc, and other code bases.  Even though VRP mostly generalizes 
pointers to null / non-null, range-ops gets finer results because it 
doesn't "forget" that a pointer could have been 0 or 0xdeadbeef, or 
whatever.  These changes are not strictly necessary (we could dumb down 
range-ops), but I see no reason to do so.


No ChangeLog entries yet, as we usually go through various interations 
before we agree on anything.


Tested on x86-64 Linux with all languages.

Aldy
commit ce0b4c5a66cf17a3f4f91793bcf68db854d8d2b8
Author: Aldy Hernandez 
Date:   Mon Jul 15 18:09:27 2019 +0200

Enforce canonicalization in value_range.

diff --git a/gcc/tree-vrp.c b/gcc/tree-vrp.c
index 594ee9adc17..de2f39d8487 100644
--- a/gcc/tree-vrp.c
+++ b/gcc/tree-vrp.c
@@ -69,23 +69,20 @@ along with GCC; see the file COPYING3.  If not see
 #include "builtins.h"
 #include "wide-int-range.h"
 
+static bool
+ranges_from_anti_range (const value_range_base *ar,
+			value_range_base *vr0, value_range_base *vr1,
+			bool handle_pointers = false);
+
 /* Set of SSA names found live during the RPO traversal of the function
for still active basic-blocks.  */
 static sbitmap *live;
 
-void
-value_range_base::set (enum value_range_kind kind, tree min, tree max)
-{
-  m_kind = kind;
-  m_min = min;
-  m_max = max;
-  if (flag_checking)
-check ();
-}
-
 void
 value_range::set_equiv (bitmap equiv)
 {
+  if (undefined_p () || varying_p ())
+equiv = NULL;
   /* Since updating the equivalence set involves deep copying the
  bitmaps, only do it if absolutely necessary.
 
@@ -261,7 +258,8 @@ value_range_base::constant_p () const
 void
 value_range_base::set_undefined ()
 {
-  set (VR_UNDEFINED, NULL, NULL);
+  m_kind = VR_UNDEFINED;
+  m_min = m_max = NULL;
 }
 
 void
@@ -273,7 +271,8 @@ value_range::set_undefined ()
 void
 value_range_base::set_varying ()
 {
-  set (VR_VARYING, NULL, NULL);
+  m_kind = VR_VARYING;
+  m_min = m_max = NULL;
 }
 
 void
@@ -324,6 +323,24 @@ value_range::equiv_add (const_tree var,
 bool
 value_range_base::singleton_p (tree *result) const
 {
+  if (m_kind == VR_ANTI_RANGE)
+{
+  if (nonzero_p ())
+	{
+	  if (TYPE_PRECISION (type ()) == 1)
+	{
+	  if (result)
+		*result = m_max;
+	  return true;
+	}
+	  return false;
+	}
+
+  value_range_base vr0, vr1;
+  return (ranges_from_anti_range (this, &vr0, &vr1, true)
+	  && vr1.undefined_p ()
+	  && vr0.singleton_p (result));
+}
   if (m_kind == VR_RANGE
   && vrp_operand_equal_p (min (), max ())
   && is_gimple_min_invariant (min ()))
@@ -499,23 +516,28 @@ static assert_locus **asserts_for;
 /* Return the maximum value for TYPE.  */
 
 tree
-vrp_val_max (const_tree type)
+vrp_val_max (const_tree type, bool handle_pointers)
 {
-  if (!INTEGRAL_TYPE_P (type))
-return NULL_TREE;
-
-  return TYPE_MAX_VALUE (type);
+  if (INTEGRAL_TYPE_P (type))
+return TYPE_MAX_VALUE (type);
+  if (POINTER_TYPE_P (type) && handle_pointers)
+{
+  wide_int max = wi::max_value (TYPE_PRECISION (type), TYPE_SIGN (type));
+  return wide_int_to_tree (const_cast (type), max);
+}
+  return NULL_TREE;
 }
 
 /* Return the minimum value for TYPE.  */
 
 tree
-vrp_val_min (const_tree type)
+vrp_val_min (const_tree type, bool handle_pointers)
 {
-  if (!INTEGRAL_TYPE_P (type))
-return NULL_TREE;
-
-  return TYPE_MIN_VALUE (type);
+  if (INTEGRAL_TYPE_P (type))
+return TYPE_MIN_VALUE (type);
+  if (POINTER_TYPE_P (type) && handle_pointers)
+return build_zero_cst (const_cast (type));
+  return NULL_TREE;
 }
 
 /* Return whether VAL is equal to the maximum value of its type.
@@ -626,8 +648,7 @@ intersect_range_with_nonzero_bits (enum value_range_kind vr_type,
extract ranges from var + CST op limit.  */
 
 void
-value_range_base::set_and_canonicalize (enum value_range_kind kind,
-	tree min, tree max)
+value_range_base::set (enum value_range_kind kind, tree min, tree max)
 {
   /* Use the canonical setters for VR_UNDEFINED and VR_VARYING.  */
   if (kind == VR_UNDEFINED)
@@ -645,7 +666,9 @@ value_range_base::set_and_canonicalize (enum value_range_kind kind,
   if (TREE_CODE (min) != INTEGER_CST
   || TREE_CODE (max) != INTEGER_CST)
 {
-  set (ki

Re: PR90724 - ICE with __sync_bool_compare_and_swap with -march=armv8.2-a

2019-07-17 Thread Kyrill Tkachov

Hi Prathamesh

On 7/10/19 12:24 PM, Prathamesh Kulkarni wrote:

Hi,
For following test-case,
static long long AL[24];

int
check_ok (void)
{
  return (__sync_bool_compare_and_swap (AL+1, 0x20003ll, 
0x1234567890ll));

}

Compiling with -O2 -march=armv8.2-a results in:
pr90724.c: In function ‘check_ok’:
pr90724.c:7:1: error: unrecognizable insn:
    7 | }
  | ^
(insn 11 10 12 2 (set (reg:CC 66 cc)
    (compare:CC (reg:DI 95)
    (const_int 8589934595 [0x20003]))) "pr90724.c":6:11 -1
 (nil))

IIUC, the issue is that 0x20003 falls outside the range of
allowable immediate in cmp ? If it's replaced by a small constant then 
it works.


The ICE results with -march=armv8.2-a because, we enter if
(TARGET_LSE) { ... } condition
in aarch64_expand_compare_and_swap, while with -march=armv8.a it goes 
into else,

which forces oldval into register if the predicate fails to match.

The attached patch checks if y (oldval) satisfies aarch64_plus_operand
predicate and if not, forces it to be in register, which resolves ICE.
Does it look OK ?

Bootstrap+testing in progress on aarch64-linux-gnu.

PS: The issue has nothing to do with SVE, which I incorrectly
mentioned in bug report.


This looks ok to me (but you'll need maintainer approval).

Does this fail on the branches as well?

Thanks,

Kyrill



Thanks,
Prathamesh


Update ia64 baseline symbols

2019-07-17 Thread Andreas Schwab
Installed as obvious.

Andreas.

* config/abi/post/ia64-linux-gnu/baseline_symbols.txt: Update.

Index: config/abi/post/ia64-linux-gnu/baseline_symbols.txt
===
--- config/abi/post/ia64-linux-gnu/baseline_symbols.txt (revision 273546)
+++ config/abi/post/ia64-linux-gnu/baseline_symbols.txt (working copy)
@@ -112,6 +112,7 @@
 FUNC:_ZN11__gnu_debug19_Safe_sequence_base18_M_detach_singularEv@@GLIBCXX_3.4
 
FUNC:_ZN11__gnu_debug19_Safe_sequence_base22_M_revalidate_singularEv@@GLIBCXX_3.4
 FUNC:_ZN11__gnu_debug19_Safe_sequence_base7_M_swapERS0_@@GLIBCXX_3.4
+FUNC:_ZN11__gnu_debug25_Safe_local_iterator_base16_M_attach_singleEPNS_19_Safe_sequence_baseEb@@GLIBCXX_3.4.26
 
FUNC:_ZN11__gnu_debug25_Safe_local_iterator_base9_M_attachEPNS_19_Safe_sequence_baseEb@@GLIBCXX_3.4.17
 FUNC:_ZN11__gnu_debug25_Safe_local_iterator_base9_M_detachEv@@GLIBCXX_3.4.17
 
FUNC:_ZN11__gnu_debug30_Safe_unordered_container_base13_M_detach_allEv@@GLIBCXX_3.4.17
@@ -261,6 +262,7 @@
 FUNC:_ZNKSbIwSt11char_traitsIwESaIwEE8capacityEv@@GLIBCXX_3.4
 FUNC:_ZNKSbIwSt11char_traitsIwESaIwEE8max_sizeEv@@GLIBCXX_3.4
 FUNC:_ZNKSbIwSt11char_traitsIwESaIwEE9_M_ibeginEv@@GLIBCXX_3.4
+FUNC:_ZNKSbIwSt11char_traitsIwESaIwEEcvSt17basic_string_viewIwS0_EEv@@GLIBCXX_3.4.26
 FUNC:_ZNKSbIwSt11char_traitsIwESaIwEEixEm@@GLIBCXX_3.4
 FUNC:_ZNKSi6gcountEv@@GLIBCXX_3.4
 FUNC:_ZNKSi6sentrycvbEv@@GLIBCXX_3.4
@@ -328,9 +330,66 @@
 FUNC:_ZNKSs8capacityEv@@GLIBCXX_3.4
 FUNC:_ZNKSs8max_sizeEv@@GLIBCXX_3.4
 FUNC:_ZNKSs9_M_ibeginEv@@GLIBCXX_3.4
+FUNC:_ZNKSscvSt17basic_string_viewIcSt11char_traitsIcEEEv@@GLIBCXX_3.4.26
 FUNC:_ZNKSsixEm@@GLIBCXX_3.4
 FUNC:_ZNKSt10bad_typeid4whatEv@@GLIBCXX_3.4.9
 FUNC:_ZNKSt10error_code23default_error_conditionEv@@GLIBCXX_3.4.11
+FUNC:_ZNKSt10filesystem16filesystem_error4whatEv@@GLIBCXX_3.4.26
+FUNC:_ZNKSt10filesystem16filesystem_error5path1Ev@@GLIBCXX_3.4.26
+FUNC:_ZNKSt10filesystem16filesystem_error5path2Ev@@GLIBCXX_3.4.26
+FUNC:_ZNKSt10filesystem18directory_iteratordeEv@@GLIBCXX_3.4.26
+FUNC:_ZNKSt10filesystem28recursive_directory_iterator17recursion_pendingEv@@GLIBCXX_3.4.26
+FUNC:_ZNKSt10filesystem28recursive_directory_iterator5depthEv@@GLIBCXX_3.4.26
+FUNC:_ZNKSt10filesystem28recursive_directory_iterator7optionsEv@@GLIBCXX_3.4.26
+FUNC:_ZNKSt10filesystem28recursive_directory_iteratordeEv@@GLIBCXX_3.4.26
+FUNC:_ZNKSt10filesystem4path11parent_pathEv@@GLIBCXX_3.4.26
+FUNC:_ZNKSt10filesystem4path12has_filenameEv@@GLIBCXX_3.4.26
+FUNC:_ZNKSt10filesystem4path13has_root_nameEv@@GLIBCXX_3.4.26
+FUNC:_ZNKSt10filesystem4path13has_root_pathEv@@GLIBCXX_3.4.26
+FUNC:_ZNKSt10filesystem4path13relative_pathEv@@GLIBCXX_3.4.26
+FUNC:_ZNKSt10filesystem4path14root_directoryEv@@GLIBCXX_3.4.26
+FUNC:_ZNKSt10filesystem4path15has_parent_pathEv@@GLIBCXX_3.4.26
+FUNC:_ZNKSt10filesystem4path16lexically_normalEv@@GLIBCXX_3.4.26
+FUNC:_ZNKSt10filesystem4path17_M_find_extensionEv@@GLIBCXX_3.4.26
+FUNC:_ZNKSt10filesystem4path17has_relative_pathEv@@GLIBCXX_3.4.26
+FUNC:_ZNKSt10filesystem4path18has_root_directoryEv@@GLIBCXX_3.4.26
+FUNC:_ZNKSt10filesystem4path18lexically_relativeERKS0_@@GLIBCXX_3.4.26
+FUNC:_ZNKSt10filesystem4path19lexically_proximateERKS0_@@GLIBCXX_3.4.26
+FUNC:_ZNKSt10filesystem4path5_List13_Impl_deleterclEPNS1_5_ImplE@@GLIBCXX_3.4.26
+FUNC:_ZNKSt10filesystem4path5_List3endEv@@GLIBCXX_3.4.26
+FUNC:_ZNKSt10filesystem4path5_List5beginEv@@GLIBCXX_3.4.26
+FUNC:_ZNKSt10filesystem4path7compareERKS0_@@GLIBCXX_3.4.26
+FUNC:_ZNKSt10filesystem4path7compareESt17basic_string_viewIcSt11char_traitsIcEE@@GLIBCXX_3.4.26
+FUNC:_ZNKSt10filesystem4path9root_nameEv@@GLIBCXX_3.4.26
+FUNC:_ZNKSt10filesystem4path9root_pathEv@@GLIBCXX_3.4.26
+FUNC:_ZNKSt10filesystem7__cxx1116filesystem_error4whatEv@@GLIBCXX_3.4.26
+FUNC:_ZNKSt10filesystem7__cxx1116filesystem_error5path1Ev@@GLIBCXX_3.4.26
+FUNC:_ZNKSt10filesystem7__cxx1116filesystem_error5path2Ev@@GLIBCXX_3.4.26
+FUNC:_ZNKSt10filesystem7__cxx1118directory_iteratordeEv@@GLIBCXX_3.4.26
+FUNC:_ZNKSt10filesystem7__cxx1128recursive_directory_iterator17recursion_pendingEv@@GLIBCXX_3.4.26
+FUNC:_ZNKSt10filesystem7__cxx1128recursive_directory_iterator5depthEv@@GLIBCXX_3.4.26
+FUNC:_ZNKSt10filesystem7__cxx1128recursive_directory_iterator7optionsEv@@GLIBCXX_3.4.26
+FUNC:_ZNKSt10filesystem7__cxx1128recursive_directory_iteratordeEv@@GLIBCXX_3.4.26
+FUNC:_ZNKSt10filesystem7__cxx114path11parent_pathEv@@GLIBCXX_3.4.26
+FUNC:_ZNKSt10filesystem7__cxx114path12has_filenameEv@@GLIBCXX_3.4.26
+FUNC:_ZNKSt10filesystem7__cxx114path13has_root_nameEv@@GLIBCXX_3.4.26
+FUNC:_ZNKSt10filesystem7__cxx114path13has_root_pathEv@@GLIBCXX_3.4.26
+FUNC:_ZNKSt10filesystem7__cxx114path13relative_pathEv@@GLIBCXX_3.4.26
+FUNC:_ZNKSt10filesystem7__cxx114path14root_directoryEv@@GLIBCXX_3.4.26
+FUNC:_ZNKSt10filesystem7__cxx114path15has_parent_pathEv@@GLIBCXX_3.4.26
+FUNC:_ZNKSt10filesystem7__cxx114path16lexically_normalEv@@GLIBCXX_3.4.26
+FUNC:_ZNKSt10filesystem7__cxx114path17_M_find_extensionEv@@G

Re: [RFC/PATCH v2][PR89245] Check REG_CALL_DECL note during the tail-merging

2019-07-17 Thread Dragan Mladjenovic


On 09.07.2019. 23:21, Jeff Law wrote:
> On 7/9/19 2:06 PM, Dragan Mladjenovic wrote:
>> This patch prevents merging of CALL instructions that that have different
>> REG_CALL_DECL notes attached to them.
>>
>> On most architectures this is not an important distinction. Usually 
>> instruction patterns
>> for calls to different functions reference different SYMBOL_REF-s, so they 
>> won't match.
>> On MIPS PIC calls get split into an got_load/*call_internal pair where the 
>> latter represents
>> indirect register call w/o SYMBOL_REF attached (until machine_reorg pass). 
>> The bugzilla issue
>> had such two internal_call-s merged despite the fact that they had different 
>> register usage
>> information assigned by ipa-ra.
>>
>> As per comment form Richard Sandiford, this version compares reg usage for 
>> both call
>> instruction instead of shallow comparing the notes. Tests updated 
>> accordingly.
>>
>> gcc/ChangeLog:
>>
>> 2019-07-09  Dragan Mladjenovic  
>>
>>  * cfgcleanup.c (old_insns_match_p): Check if used hard regs set is equal
>>  for both call instructions.
>>
>> gcc/testsuite/ChangeLog:
>>
>> 2019-07-09  Dragan Mladjenovic  
>>
>>  * gcc.target/mips/cfgcleanup-jalr1.c: New test.
>>  * gcc.target/mips/cfgcleanup-jalr2.c: New test.
>>  * gcc.target/mips/cfgcleanup-jalr3.c: New test.
> THanks.  I've installed this on the trunk.
>
> jeff
Thanks. Can this be back-ported to active branches also. This issue 
seems to be there
since gcc6 if not gcc5.

Thanks in advance,

Dragan


[PATCH, rs6000] Support vrotr3 for int vector types

2019-07-17 Thread Kewen.Lin
Hi all,

This patch follows the idea to improve rs6000 backend instead of
generic expander.  I think this is a better solution?  I was thinking
generic expander change may benefit other targets suffering similar
issues but the previous RFC seems too restricted on const rotation 
count, although it's possible to extend.  Any comments on their pros/
cons are really helpful to me (a noob).

Regression testing just launched, is it OK for trunk if it's bootstrapped
and regresstested on powerpc64le-unknown-linux-gnu?


Thanks,
Kewen

 

gcc/ChangeLog

2019-07-17  Kewen Lin  

* config/rs6000/predicates.md (vint_reg_or_const_vector): New predicate.
* config/rs6000/vector.md (vrotr3): New define_expand.

gcc/testsuite/ChangeLog

2019-07-17  Kewen Lin  

* gcc.target/powerpc/vec_rotate-1.c: New test.
* gcc.target/powerpc/vec_rotate-2.c: New test.

on 2019/7/16 下午4:45, Kewen.Lin wrote:
> Hi all,
> 
> Based on the previous comments (thank you!), I tried to update the 
> handling in expander and vectorizer.  Middle-end optimizes lrotate
> with const rotation count to rrotate all the time, it makes vectorizer
> fail to vectorize if rrotate isn't supported on the target.  We can at
> least teach it on const rotation count, the cost should be the same? 
> At the same time, the expander already tries to use the opposite 
> rotation optable for scalar, we can teach it to deal with vector as well.
> 
> Is it on the right track and reasonable?
> 
> 
> Thanks,
> Kewen
> 
diff --git a/gcc/config/rs6000/predicates.md b/gcc/config/rs6000/predicates.md
index 8ca98299950..c4c74630d26 100644
--- a/gcc/config/rs6000/predicates.md
+++ b/gcc/config/rs6000/predicates.md
@@ -163,6 +163,17 @@
   return VINT_REGNO_P (REGNO (op));
 })
 
+;; Return 1 if op is a vector register that operates on integer vectors
+;; or if op is a const vector with integer vector modes.
+(define_predicate "vint_reg_or_const_vector"
+  (match_code "reg,subreg,const_vector")
+{
+  if (GET_CODE (op) == CONST_VECTOR && GET_MODE_CLASS (mode) == 
MODE_VECTOR_INT)
+return 1;
+
+  return vint_operand (op, mode);
+})
+
 ;; Return 1 if op is a vector register to do logical operations on (and, or,
 ;; xor, etc.)
 (define_predicate "vlogical_operand"
diff --git a/gcc/config/rs6000/vector.md b/gcc/config/rs6000/vector.md
index 70bcfe02e22..5c6a344e452 100644
--- a/gcc/config/rs6000/vector.md
+++ b/gcc/config/rs6000/vector.md
@@ -1260,6 +1260,32 @@
   "VECTOR_UNIT_ALTIVEC_OR_VSX_P (mode)"
   "")
 
+;; Expanders for rotatert to make use of vrotl
+(define_expand "vrotr3"
+  [(set (match_operand:VEC_I 0 "vint_operand")
+   (rotatert:VEC_I (match_operand:VEC_I 1 "vint_operand")
+ (match_operand:VEC_I 2 "vint_reg_or_const_vector")))]
+  "VECTOR_UNIT_ALTIVEC_OR_VSX_P (mode)"
+{
+  machine_mode inner_mode = GET_MODE_INNER (mode);
+  unsigned int bits = GET_MODE_PRECISION (inner_mode);
+  rtx imm_vec = gen_const_vec_duplicate (mode, GEN_INT (bits));
+  rtx rot_count = gen_reg_rtx (mode);
+  if (GET_CODE (operands[2]) == CONST_VECTOR)
+{
+  imm_vec = simplify_const_binary_operation (MINUS, mode, imm_vec,
+operands[2]);
+  rot_count = force_reg (mode, imm_vec);
+}
+  else
+{
+  rtx imm_reg = force_reg (mode, imm_vec);
+  emit_insn (gen_sub3 (rot_count, imm_reg, operands[2]));
+}
+  emit_insn (gen_vrotl3 (operands[0], operands[1], rot_count));
+  DONE;
+})
+
 ;; Expanders for arithmetic shift left on each vector element
 (define_expand "vashl3"
   [(set (match_operand:VEC_I 0 "vint_operand")
diff --git a/gcc/testsuite/gcc.target/powerpc/vec_rotate-1.c 
b/gcc/testsuite/gcc.target/powerpc/vec_rotate-1.c
new file mode 100644
index 000..80aca1a94a5
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/vec_rotate-1.c
@@ -0,0 +1,46 @@
+/* { dg-options "-O3" } */
+/* { dg-require-effective-target powerpc_vsx_ok } */
+
+/* Check vectorizer can exploit vector rotation instructions on Power, mainly
+   for the case rotation count is const number.  */
+
+#define N 256
+unsigned long long sud[N], rud[N];
+unsigned int suw[N], ruw[N];
+unsigned short suh[N], ruh[N];
+unsigned char sub[N], rub[N];
+
+void
+testULL ()
+{
+  for (int i = 0; i < 256; ++i)
+rud[i] = (sud[i] >> 8) | (sud[i] << (sizeof (sud[0]) * 8 - 8));
+}
+
+void
+testUW ()
+{
+  for (int i = 0; i < 256; ++i)
+ruw[i] = (suw[i] >> 8) | (suw[i] << (sizeof (suw[0]) * 8 - 8));
+}
+
+void
+testUH ()
+{
+  for (int i = 0; i < 256; ++i)
+ruh[i] = (unsigned short) (suh[i] >> 9)
+| (unsigned short) (suh[i] << (sizeof (suh[0]) * 8 - 9));
+}
+
+void
+testUB ()
+{
+  for (int i = 0; i < 256; ++i)
+rub[i] = (unsigned char) (sub[i] >> 5)
+| (unsigned char) (sub[i] << (sizeof (sub[0]) * 8 - 5));
+}
+
+/* { dg-final { scan-assembler {\mvrld\M} } } */
+/* { dg-final { scan-assembler {\mvrlw\M} } } */
+/* { dg-final { scan-assembler {\mvrlh\M} } } */
+/* { dg-final 

Re: [PATCH, rs6000] Support vrotr3 for int vector types

2019-07-17 Thread Jakub Jelinek
On Wed, Jul 17, 2019 at 04:32:15PM +0800, Kewen.Lin wrote:
> --- a/gcc/config/rs6000/vector.md
> +++ b/gcc/config/rs6000/vector.md
> @@ -1260,6 +1260,32 @@
>"VECTOR_UNIT_ALTIVEC_OR_VSX_P (mode)"
>"")
>  
> +;; Expanders for rotatert to make use of vrotl
> +(define_expand "vrotr3"
> +  [(set (match_operand:VEC_I 0 "vint_operand")
> + (rotatert:VEC_I (match_operand:VEC_I 1 "vint_operand")
> +   (match_operand:VEC_I 2 "vint_reg_or_const_vector")))]
> +  "VECTOR_UNIT_ALTIVEC_OR_VSX_P (mode)"
> +{
> +  machine_mode inner_mode = GET_MODE_INNER (mode);
> +  unsigned int bits = GET_MODE_PRECISION (inner_mode);
> +  rtx imm_vec = gen_const_vec_duplicate (mode, GEN_INT (bits));
> +  rtx rot_count = gen_reg_rtx (mode);
> +  if (GET_CODE (operands[2]) == CONST_VECTOR)
> +{
> +  imm_vec = simplify_const_binary_operation (MINUS, mode, imm_vec,
> +  operands[2]);
> +  rot_count = force_reg (mode, imm_vec);
> +}
> +  else
> +{
> +  rtx imm_reg = force_reg (mode, imm_vec);
> +  emit_insn (gen_sub3 (rot_count, imm_reg, operands[2]));
> +}

Is this actually correct if one or more elements in operands[2] are 0?
If vrotl3 acts with truncated shift count, that is not an issue
(but then perhaps you wouldn't have to compute imm_reg - operands[2] but
just - operands[2]), but if it does something else, then prec - 0 will be
prec and thus outside of the allowed rotate count.  Or does rs6000 allow
rotate counts to be 0 to prec inclusive?

Jakub


Re: [PATCH v4] Generalize get_most_common_single_value to return k_th value & count

2019-07-17 Thread luoxhu

Hi Martin,

On 2019/7/17 15:55, Martin Liška wrote:

On 7/17/19 7:44 AM, luoxhu wrote:

Hi Martin,
Thanks for your review, v4 Changes as below:
  1. Use decrease bubble sort.
BTW, I have a question about hist->hvalue.counters[2], when will it become
  -1, please? Thanks.  Currently, if it is -1, the function will return false.


Hi.

Thanks for that. I made a minor changes to your patch, please see it in 
attachment.
-1 is a value that we use for invalidated histogram. That happens when you need
to fit in more values during instrumentation than you have counters in the 
histogram.
It helps to make reproducible builds of a software.

Thanks for your patience with many tiny fixes.  I will install the updated
patch to trunk.

Xionghu



Martin





Re: [PATCH v4] Generalize get_most_common_single_value to return k_th value & count

2019-07-17 Thread Martin Liška
On 7/17/19 10:44 AM, luoxhu wrote:
> Hi Martin,
> 
> On 2019/7/17 15:55, Martin Liška wrote:
>> On 7/17/19 7:44 AM, luoxhu wrote:
>>> Hi Martin,
>>> Thanks for your review, v4 Changes as below:
>>>   1. Use decrease bubble sort.
>>> BTW, I have a question about hist->hvalue.counters[2], when will it become
>>>   -1, please? Thanks.  Currently, if it is -1, the function will return 
>>> false.
>>
>> Hi.
>>
>> Thanks for that. I made a minor changes to your patch, please see it in 
>> attachment.
>> -1 is a value that we use for invalidated histogram. That happens when you 
>> need
>> to fit in more values during instrumentation than you have counters in the 
>> histogram.
>> It helps to make reproducible builds of a software.
> Thanks for your patience with many tiny fixes.  I will install the updated
> patch to trunk.

Please wait for an approval of a maintainer, I'm not one of them ;)

Thanks,
Martin

> 
> Xionghu
> 
>>
>> Martin
>>
> 



Re: [PATCH, rs6000] Support vrotr3 for int vector types

2019-07-17 Thread Kewen.Lin
on 2019/7/17 下午4:42, Jakub Jelinek wrote:
> On Wed, Jul 17, 2019 at 04:32:15PM +0800, Kewen.Lin wrote:
>> --- a/gcc/config/rs6000/vector.md
>> +++ b/gcc/config/rs6000/vector.md
>> @@ -1260,6 +1260,32 @@
>>"VECTOR_UNIT_ALTIVEC_OR_VSX_P (mode)"
>>"")
>>  
>> +;; Expanders for rotatert to make use of vrotl
>> +(define_expand "vrotr3"
>> +  [(set (match_operand:VEC_I 0 "vint_operand")
>> +(rotatert:VEC_I (match_operand:VEC_I 1 "vint_operand")
>> +  (match_operand:VEC_I 2 "vint_reg_or_const_vector")))]
>> +  "VECTOR_UNIT_ALTIVEC_OR_VSX_P (mode)"
>> +{
>> +  machine_mode inner_mode = GET_MODE_INNER (mode);
>> +  unsigned int bits = GET_MODE_PRECISION (inner_mode);
>> +  rtx imm_vec = gen_const_vec_duplicate (mode, GEN_INT (bits));
>> +  rtx rot_count = gen_reg_rtx (mode);
>> +  if (GET_CODE (operands[2]) == CONST_VECTOR)
>> +{
>> +  imm_vec = simplify_const_binary_operation (MINUS, mode, imm_vec,
>> + operands[2]);
>> +  rot_count = force_reg (mode, imm_vec);
>> +}
>> +  else
>> +{
>> +  rtx imm_reg = force_reg (mode, imm_vec);
>> +  emit_insn (gen_sub3 (rot_count, imm_reg, operands[2]));
>> +}
> 
> Is this actually correct if one or more elements in operands[2] are 0?
> If vrotl3 acts with truncated shift count, that is not an issue
> (but then perhaps you wouldn't have to compute imm_reg - operands[2] but
> just - operands[2]), but if it does something else, then prec - 0 will be
> prec and thus outside of the allowed rotate count.  Or does rs6000 allow
> rotate counts to be 0 to prec inclusive?
> 
>   Jakub
> 

Hi Jakub,

Good question, the vector rotation for byte looks like (others are similar):

vrlb VRT,VRA,VRB
  do i=0 to 127 by 8
   sh = (VRB)[i+5:i+7]
   VRT[i:i+7] = (VRA)[i:i+7] <<< sh
  end

It only takes care of the counts from 0 to prec-1 (inclusive) [log2(prec) bits]
So it's fine even operands[2] are zero or negative.

Take byte as example, prec is 8.
  - rot count is 0, then minus res gets 8. (out of 3 bits range), same as 0.
  - rot count is 9, then minus res gets -1. (3 bits parsed as 7), the original 
rot count 9 was parsed as 1 (in 3 bits range).
  - rot count is -1, then minus res gets 9, (3 bits parsed as 1), the original
rot count was parsed as 7 (in 3 bits range).

It's a good idea to just use negate!  Thanks!!


Kewen



Re: [PATCH, rs6000] Support vrotr3 for int vector types

2019-07-17 Thread Jakub Jelinek
On Wed, Jul 17, 2019 at 05:22:38PM +0800, Kewen.Lin wrote:
> Good question, the vector rotation for byte looks like (others are similar):
> 
> vrlb VRT,VRA,VRB
>   do i=0 to 127 by 8
>sh = (VRB)[i+5:i+7]
>VRT[i:i+7] = (VRA)[i:i+7] <<< sh
>   end
> 
> It only takes care of the counts from 0 to prec-1 (inclusive) [log2(prec) 
> bits]
> So it's fine even operands[2] are zero or negative.
> 
> Take byte as example, prec is 8.
>   - rot count is 0, then minus res gets 8. (out of 3 bits range), same as 0.
>   - rot count is 9, then minus res gets -1. (3 bits parsed as 7), the 
> original 
> rot count 9 was parsed as 1 (in 3 bits range).
>   - rot count is -1, then minus res gets 9, (3 bits parsed as 1), the original
> rot count was parsed as 7 (in 3 bits range).
> 
> It's a good idea to just use negate!  Thanks!!

Ok, so the hw for the vectors truncates, the question is how happy will the
RTL generic code with that.  rs6000 defines SHIFT_COUNT_TRUNCATED to 0,
so the generic code can't assume there is a truncation going on.  Either it
will punt some optimizations when it sees say negative or too large
shift/rotate count (that is the better case), or it might just assume there
is UB.
As the documentation says, for zero SHIFT_COUNT_TRUNCATED there is an option
of having a pattern with the truncation being explicit, so in your case
*vrotl3_and or similar that would have an explicit AND on the shift
operand with say {7, 7...} vector for the byte shifts etc. but emit in the
end identical instruction to vrotl3 and use the MINUS + that pattern
for vrotr3.  If the rotate argument is CONST_VECTOR, you can of course
just canonicalize, i.e. perform -operands[2] & mask, fold that into constant
and keep using vrotl3 in that case.

Jakub


[PATCH] Fix quadraticnesses in release_defs_bitset and split_constant_offset

2019-07-17 Thread Richard Biener


The testcase in PR91178 runs into these because of vectorizer
code-gen stupidities (I'm going to fix that as well).

For split_constant_offset we should simply limit walking the
SSA def chain, now there's a convenient --param we can use for that.

For release_defs_bitset it's current implementation falls into the
trap of making it too easy to run into quadraticness for the
natural SSA name allocation of a chain of increments like

  _2 = _1 + 1;
  _3 = _2 + 1;
...

which happens here.  The fix is to (heuristically) iterate from
SSA names with higher version to ones with lower version.
Unfortunately there's no backwards bitmap iterator so the following
patch rewrites the iteration to use a vector which also allows us
to switch the bitmap to tree form for the actual iteration then
only doing bit tests/clears.  It's still horrible and as the
comment mentions a topological sort is the correct thing to do
(but we don't have the tooling for that - well, I deleted the
closest match recently).  Until we run into the next testcase ;)

Bootstrapped on x86_64-unknown-linux-gnu, testing in progress.

Richard.

2019-07-17  Richard Biener  

PR tree-optimization/91178
* tree-ssa.c (release_defs_bitset): Iterate from higher to
lower SSA names to avoid quadratic behavior in the common case.
* tree-data-ref.c (split_constant_offset): Add limit argument
and pass it down.  Initialize it from PARAM_SSA_NAME_DEF_CHAIN_LIMIT.
(split_constant_offset_1): Add limit argument and use it to
limit SSA def walking.  Optimize the common plus/minus case.

Index: gcc/tree-ssa.c
===
--- gcc/tree-ssa.c  (revision 273542)
+++ gcc/tree-ssa.c  (working copy)
@@ -559,20 +559,25 @@ release_defs_bitset (bitmap toremove)
 
   /* Performing a topological sort is probably overkill, this will
  most likely run in slightly superlinear time, rather than the
- pathological quadratic worst case.  */
+ pathological quadratic worst case.
+ But iterate from max SSA name version to min one because
+ that mimics allocation order during code generation behavior best.
+ Use an array for this which we compact on-the-fly with a NULL
+ marker moving towards the end of the vector.  */
+  auto_vec names;
+  names.reserve (bitmap_count_bits (toremove) + 1);
+  names.quick_push (NULL_TREE);
+  EXECUTE_IF_SET_IN_BITMAP (toremove, 0, j, bi)
+names.quick_push (ssa_name (j));
+
+  bitmap_tree_view (toremove);
   while (!bitmap_empty_p (toremove))
 {
-  unsigned to_remove_bit = -1U;
-  EXECUTE_IF_SET_IN_BITMAP (toremove, 0, j, bi)
+  j = names.length () - 1;
+  for (unsigned i = names.length () - 1; names[i];)
{
- if (to_remove_bit != -1U)
-   {
- bitmap_clear_bit (toremove, to_remove_bit);
- to_remove_bit = -1U;
-   }
-
  bool remove_now = true;
- tree var = ssa_name (j);
+ tree var = names[i];
  gimple *stmt;
  imm_use_iterator uit;
 
@@ -617,14 +622,15 @@ release_defs_bitset (bitmap toremove)
  gsi_remove (&gsi, true);
  release_defs (def);
}
-
- to_remove_bit = j;
+ bitmap_clear_bit (toremove, SSA_NAME_VERSION (var));
}
+ else
+   --i;
+ if (--j != i)
+   names[i] = names[j];
}
-  if (to_remove_bit != -1U)
-   bitmap_clear_bit (toremove, to_remove_bit);
 }
-
+  bitmap_list_view (toremove);
 }
 
 /* Disable warnings about missing quoting in GCC diagnostics for
Index: gcc/tree-data-ref.c
===
--- gcc/tree-data-ref.c (revision 273542)
+++ gcc/tree-data-ref.c (working copy)
@@ -583,7 +583,8 @@ debug_ddrs (vec ddrs)
 
 static void
 split_constant_offset (tree exp, tree *var, tree *off,
-  hash_map > &cache);
+  hash_map > &cache,
+  unsigned *limit);
 
 /* Helper function for split_constant_offset.  Expresses OP0 CODE OP1
(the type of the result is TYPE) as VAR + OFF, where OFF is a nonzero
@@ -594,7 +595,8 @@ split_constant_offset (tree exp, tree *v
 static bool
 split_constant_offset_1 (tree type, tree op0, enum tree_code code, tree op1,
 tree *var, tree *off,
-hash_map > &cache)
+hash_map > &cache,
+unsigned *limit)
 {
   tree var0, var1;
   tree off0, off1;
@@ -615,8 +617,15 @@ split_constant_offset_1 (tree type, tree
   /* FALLTHROUGH */
 case PLUS_EXPR:
 case MINUS_EXPR:
-  split_constant_offset (op0, &var0, &off0, cache);
-  split_constant_offset (op1, &var1, &off1, cache);
+  if (TREE_CODE (op1) == INTEGER_CST)
+   {
+ split_constant_offset (op0, &var0, &off0, cache, limit);
+ *var = var0;
+   

[PATCH] Fix PR91178

2019-07-17 Thread Richard Biener


This is the vectorizer part of the fix - currently when we
need to permute a load in contiguous accesses we load the
"gap" between two instances of a group as well.  That can
cause quite excessive code generation (fixed up by DCE / forwprop
later but confusing intermediate passes compile-time wise)
in case the gap is large.

The following addresses this in the SLP case, simply skipping
code generation of such loads.  This avoids the huge IV
increment chain which causes all of the followup issues.

Bootstrapped and tested on x86_64-unknown-linux-gnu, applied.

Richard.

2019-07-17  Richard Biener  

PR tree-optimization/91178
* tree-vect-stmts.c (get_group_load_store_type): For SLP
loads with a gap larger than the vector size always use
VMAT_STRIDED_SLP.
(vectorizable_load): For VMAT_STRIDED_SLP with a permutation
avoid loading vectors that are only contained in the gap
and thus are not needed.

* gcc.dg/torture/pr91178.c: New testcase.

Index: gcc/testsuite/gcc.dg/torture/pr91178.c
===
--- gcc/testsuite/gcc.dg/torture/pr91178.c  (nonexistent)
+++ gcc/testsuite/gcc.dg/torture/pr91178.c  (working copy)
@@ -0,0 +1,11 @@
+/* { dg-do compile } */
+
+int a;
+extern int f[10][91125];
+int b[50];
+void c()
+{
+  for (int d = 6; d <= a; d++)
+for (int e = 16; e <= 24; e++)
+  b[e] -= f[d][d];
+}
Index: gcc/tree-vect-stmts.c
===
--- gcc/tree-vect-stmts.c   (revision 273520)
+++ gcc/tree-vect-stmts.c   (working copy)
@@ -2267,6 +2267,14 @@ get_group_load_store_type (stmt_vec_info
/ vect_get_scalar_dr_size (first_dr_info)))
overrun_p = false;
 
+ /* If the gap at the end of the group exceeds a whole vector
+in size use the strided SLP code which can skip code-generation
+for the gap.  */
+ if (vls_type == VLS_LOAD && known_gt (gap, nunits))
+   *memory_access_type = VMAT_STRIDED_SLP;
+ else
+   *memory_access_type = VMAT_CONTIGUOUS;
+
  /* If the gap splits the vector in half and the target
 can do half-vector operations avoid the epilogue peeling
 by simply loading half of the vector only.  Usually
@@ -2274,7 +2282,8 @@ get_group_load_store_type (stmt_vec_info
  dr_alignment_support alignment_support_scheme;
  scalar_mode elmode = SCALAR_TYPE_MODE (TREE_TYPE (vectype));
  machine_mode vmode;
- if (overrun_p
+ if (*memory_access_type == VMAT_CONTIGUOUS
+ && overrun_p
  && !masked_p
  && (((alignment_support_scheme
  = vect_supportable_dr_alignment (first_dr_info, false)))
@@ -2297,7 +2306,6 @@ get_group_load_store_type (stmt_vec_info
 "Peeling for outer loop is not supported\n");
  return false;
}
- *memory_access_type = VMAT_CONTIGUOUS;
}
 }
   else
@@ -8732,6 +8740,7 @@ vectorizable_load (stmt_vec_info stmt_in
   /* Checked by get_load_store_type.  */
   unsigned int const_nunits = nunits.to_constant ();
   unsigned HOST_WIDE_INT cst_offset = 0;
+  unsigned int group_gap = 0;
 
   gcc_assert (!LOOP_VINFO_FULLY_MASKED_P (loop_vinfo));
   gcc_assert (!nested_in_vect_loop);
@@ -8749,6 +8758,7 @@ vectorizable_load (stmt_vec_info stmt_in
   if (slp && grouped_load)
{
  group_size = DR_GROUP_SIZE (first_stmt_info);
+ group_gap = DR_GROUP_GAP (first_stmt_info);
  ref_type = get_group_alias_ptr_type (first_stmt_info);
}
   else
@@ -8892,6 +8902,14 @@ vectorizable_load (stmt_vec_info stmt_in
  if (nloads > 1)
vec_alloc (v, nloads);
  stmt_vec_info new_stmt_info = NULL;
+ if (slp && slp_perm
+ && (group_el % group_size) > group_size - group_gap
+ && (group_el % group_size) + nloads * lnel < group_size)
+   {
+ dr_chain.quick_push (NULL_TREE);
+ group_el += nloads * lnel;
+ continue;
+   }
  for (i = 0; i < nloads; i++)
{
  tree this_off = build_int_cst (TREE_TYPE (alias_off),


[PATCH] Simplify LTO section format.

2019-07-17 Thread Martin Liška
Hi.

The patch is about simplified LTO ELF section header where
want to make public fields major_version, minor_version and
slim_object. The rest is implementation defined by GCC.

Patch can bootstrap on x86_64-linux-gnu and survives regression tests.

Ready to be installed?
Thanks,
Martin

gcc/ChangeLog:

2019-07-15  Martin Liska  

* lto-section-in.c (lto_get_section_data):
Use new function get_compression.
* lto-streamer-out.c (produce_lto_section): Use
set_compression to encode compression algorithm.
* lto-streamer.h (struct lto_section): Do not
use bitfields in the format.
---
 gcc/lto-section-in.c   |  3 ++-
 gcc/lto-streamer-out.c |  3 ++-
 gcc/lto-streamer.h | 19 ---
 3 files changed, 20 insertions(+), 5 deletions(-)


diff --git a/gcc/lto-section-in.c b/gcc/lto-section-in.c
index 4c2870176ae..0bdcf62b1de 100644
--- a/gcc/lto-section-in.c
+++ b/gcc/lto-section-in.c
@@ -161,7 +161,8 @@ lto_get_section_data (struct lto_file_decl_data *file_data,
 
   stream = lto_start_uncompression (lto_append_data, &buffer);
   lto_uncompress_block (stream, data, *len);
-  lto_end_uncompression (stream, file_data->lto_section_header.compression);
+  lto_end_uncompression (stream,
+			 file_data->lto_section_header.get_compression ());
 
   *len = buffer.length - header_length;
   data = buffer.data + header_length;
diff --git a/gcc/lto-streamer-out.c b/gcc/lto-streamer-out.c
index 35dcae4d589..e0881cf57af 100644
--- a/gcc/lto-streamer-out.c
+++ b/gcc/lto-streamer-out.c
@@ -2403,7 +2403,8 @@ produce_lto_section ()
 
   bool slim_object = flag_generate_lto && !flag_fat_lto_objects;
   lto_section s
-= { LTO_major_version, LTO_minor_version, slim_object, compression, 0 };
+= { LTO_major_version, LTO_minor_version, slim_object, 0 };
+  s.set_compression (compression);
   lto_write_data (&s, sizeof s);
   lto_end_section ();
   destroy_output_block (ob);
diff --git a/gcc/lto-streamer.h b/gcc/lto-streamer.h
index 3c35d8a3f9a..bf755a64141 100644
--- a/gcc/lto-streamer.h
+++ b/gcc/lto-streamer.h
@@ -394,9 +394,22 @@ struct lto_section
 {
   int16_t major_version;
   int16_t minor_version;
-  unsigned char slim_object: 1;
-  lto_compression compression: 4;
-  int32_t reserved0: 27;
+  unsigned char slim_object;
+
+  /* Flags is a private field that is not defined publicly.  */
+  uint16_t flags;
+
+  /* Set compression to FLAGS.  */
+  inline void set_compression (lto_compression c)
+  {
+flags = c;
+  }
+
+  /* Get compression from FLAGS.  */
+  inline lto_compression get_compression ()
+  {
+return (lto_compression) flags;
+  }
 };
 
 STATIC_ASSERT (sizeof (lto_section) == 8);



Re: [RFC] Consider lrotate const rotation in vectorizer

2019-07-17 Thread Richard Biener
On Tue, Jul 16, 2019 at 10:45 AM Kewen.Lin  wrote:
>
> Hi all,
>
> Based on the previous comments (thank you!), I tried to update the
> handling in expander and vectorizer.  Middle-end optimizes lrotate
> with const rotation count to rrotate all the time, it makes vectorizer
> fail to vectorize if rrotate isn't supported on the target.  We can at
> least teach it on const rotation count, the cost should be the same?
> At the same time, the expander already tries to use the opposite
> rotation optable for scalar, we can teach it to deal with vector as well.
>
> Is it on the right track and reasonable?

So you're basically fixing this up in the expander.  I think on
the GIMPLE level you then miss to update tree-vect-generic.c?

I'm not sure if it makes sense to have both LROTATE_EXPR and
RROTATE_EXPR on the GIMPLE level then (that CPUs only
support one direction is natural though).  So maybe simply get
rid of one?  Its semantics are also nowhere documented
(do we allow negative rotation amounts?  how are
non-mode-precision entities rotated? etc.).

Richard.

>
> Thanks,
> Kewen
>
> --
>
> One informal patch to help describing this new thought:
>
>
> diff --git a/gcc/optabs.c b/gcc/optabs.c
> index a0e361b8bfe..ebebb0ad145 100644
> --- a/gcc/optabs.c
> +++ b/gcc/optabs.c
> @@ -1273,6 +1273,7 @@ expand_binop (machine_mode mode, optab binoptab, rtx 
> op0, rtx op1,
>if (mclass == MODE_VECTOR_INT)
>  {
>optab otheroptab = unknown_optab;
> +  optab otheroptab1 = unknown_optab;
>
>if (binoptab == ashl_optab)
> otheroptab = vashl_optab;
> @@ -1281,23 +1282,50 @@ expand_binop (machine_mode mode, optab binoptab, rtx 
> op0, rtx op1,
>else if (binoptab == lshr_optab)
> otheroptab = vlshr_optab;
>else if (binoptab == rotl_optab)
> -   otheroptab = vrotl_optab;
> +   {
> + otheroptab = vrotl_optab;
> + otheroptab1 = vrotr_optab;
> +   }
>else if (binoptab == rotr_optab)
> -   otheroptab = vrotr_optab;
> +   {
> + otheroptab = vrotr_optab;
> + otheroptab1 = vrotl_optab;
> +   }
> +
> +  bool other_ok = (otheroptab && (icode = optab_handler (otheroptab, 
> mode)) != CODE_FOR_nothing);
> +  bool other1_ok = false;
> +  if (!other_ok && otheroptab1)
> +   other1_ok
> + = ((icode = optab_handler (otheroptab1, mode)) != CODE_FOR_nothing)
> +   && SCALAR_INT_MODE_P (GET_MODE_INNER (mode));
>
> -  if (otheroptab
> - && (icode = optab_handler (otheroptab, mode)) != CODE_FOR_nothing)
> +  if (other_ok || other1_ok)
> {
>   /* The scalar may have been extended to be too wide.  Truncate
>  it back to the proper size to fit in the broadcast vector.  */
>   scalar_mode inner_mode = GET_MODE_INNER (mode);
> - if (!CONST_INT_P (op1)
> - && (GET_MODE_BITSIZE (as_a  (GET_MODE (op1)))
> + rtx newop1 = op1;
> + if (other1_ok)
> +   {
> + unsigned int bits = GET_MODE_PRECISION (inner_mode);
> +
> + if (CONST_INT_P (op1))
> +   newop1 = gen_int_shift_amount (int_mode, bits - INTVAL (op1));
> + else if (targetm.shift_truncation_mask (int_mode) == bits - 1)
> +   newop1 = negate_rtx (GET_MODE (op1), op1);
> + else
> +   newop1 = expand_binop (GET_MODE (op1), sub_optab,
> +  gen_int_mode (bits, GET_MODE (op1)), 
> op1,
> +  NULL_RTX, unsignedp, OPTAB_DIRECT);
> +   }
> + if (!CONST_INT_P (newop1)
> + && (GET_MODE_BITSIZE (as_a (GET_MODE (newop1)))
>   > GET_MODE_BITSIZE (inner_mode)))
> -   op1 = force_reg (inner_mode,
> -simplify_gen_unary (TRUNCATE, inner_mode, op1,
> -GET_MODE (op1)));
> - rtx vop1 = expand_vector_broadcast (mode, op1);
> +   newop1 = force_reg (inner_mode,
> +   simplify_gen_unary (TRUNCATE, inner_mode,
> +   newop1, GET_MODE 
> (newop1)));
> +
> + rtx vop1 = expand_vector_broadcast (mode, newop1);
>   if (vop1)
> {
>   temp = expand_binop_directly (icode, mode, otheroptab, op0, 
> vop1,
>
> diff --git a/gcc/tree-vect-patterns.c b/gcc/tree-vect-patterns.c
> index ff952d6f464..c05ce1acba4 100644
> --- a/gcc/tree-vect-patterns.c
> +++ b/gcc/tree-vect-patterns.c
> @@ -2039,6 +2039,15 @@ vect_recog_rotate_pattern (stmt_vec_info stmt_vinfo, 
> tree *type_out)
>if (optab1
>&& optab_handler (optab1, TYPE_MODE (vectype)) != CODE_FOR_nothing)
>  return NULL;
> +  /* middle-end canonicalizing LROTATE to RROTATE with const rotation count,
> + let's try the LROTATE as well.  */
> +  if (rhs_code == RROTATE_EXPR && TREE_CODE(oprnd1) == INTEGER_CST)
>

Re: Make alias sets of ODR types more precise

2019-07-17 Thread Richard Biener
On Tue, 16 Jul 2019, Jan Hubicka wrote:

> Hi,
> this is the hunk we omitted from the original patch enabling TBAA for
> ODR types.  Currently record_component_aliases record all pointers as
> void *.  This is because canonical type merging handles them this way
> and thus it may merge for example
> 
> strut a { int *ptr;};
> 
> and 
> 
> struct b { short *ptr;};
> 
> into one canonical type.  The alias set of that canonical type then must
> conflict with both int * and short * which we do by globing it to void *
> which conflict with everything.
> 
> For ODR types where we do canonical types based on their name we however
> assign differnt TYPE_CANONICAL to each of them.  Thanks to this we can
> make alias set to contain int * or short * respectively.
> 
> 
> Bootstrapped/regtested x86_64-linux, OK?

OK.

Richard.

> Honza
>   * alias.c (record_component_aliases): Do not simplify pointed-to
>   types of ODR types 
>   * testsuite/g++.dg/lto/alias-4_0.C
> Index: alias.c
> ===
> --- alias.c   (revision 273478)
> +++ alias.c   (working copy)
> @@ -1202,47 +1202,52 @@ record_component_aliases (tree type)
>  case RECORD_TYPE:
>  case UNION_TYPE:
>  case QUAL_UNION_TYPE:
> -  for (field = TYPE_FIELDS (type); field != 0; field = DECL_CHAIN 
> (field))
> - if (TREE_CODE (field) == FIELD_DECL && !DECL_NONADDRESSABLE_P (field))
> -   {
> - /* LTO type merging does not make any difference between 
> -component pointer types.  We may have
> -
> -struct foo {int *a;};
> -
> -as TYPE_CANONICAL of 
> -
> -struct bar {float *a;};
> -
> -Because accesses to int * and float * do not alias, we would get
> -false negative when accessing the same memory location by
> -float ** and bar *. We thus record the canonical type as:
> -
> -struct {void *a;};
> -
> -void * is special cased and works as a universal pointer type.
> -Accesses to it conflicts with accesses to any other pointer
> -type.  */
> - tree t = TREE_TYPE (field);
> - if (in_lto_p)
> -   {
> - /* VECTOR_TYPE and ARRAY_TYPE share the alias set with their
> -element type and that type has to be normalized to void *,
> -too, in the case it is a pointer. */
> - while (!canonical_type_used_p (t) && !POINTER_TYPE_P (t))
> -   {
> - gcc_checking_assert (TYPE_STRUCTURAL_EQUALITY_P (t));
> - t = TREE_TYPE (t);
> -   }
> - if (POINTER_TYPE_P (t))
> -   t = ptr_type_node;
> - else if (flag_checking)
> -   gcc_checking_assert (get_alias_set (t)
> -== get_alias_set (TREE_TYPE (field)));
> -   }
> -
> - record_alias_subset (superset, get_alias_set (t));
> -   }
> +  {
> + /* LTO non-ODR type merging does not make any difference between 
> +component pointer types.  We may have
> +
> +struct foo {int *a;};
> +
> +as TYPE_CANONICAL of 
> +
> +struct bar {float *a;};
> +
> +Because accesses to int * and float * do not alias, we would get
> +false negative when accessing the same memory location by
> +float ** and bar *. We thus record the canonical type as:
> +
> +struct {void *a;};
> +
> +void * is special cased and works as a universal pointer type.
> +Accesses to it conflicts with accesses to any other pointer
> +type.  */
> + bool void_pointers = in_lto_p
> +  && (!odr_type_p (type)
> +  || !odr_based_tbaa_p (type));
> + for (field = TYPE_FIELDS (type); field != 0; field = DECL_CHAIN (field))
> +   if (TREE_CODE (field) == FIELD_DECL && !DECL_NONADDRESSABLE_P (field))
> + {
> +   tree t = TREE_TYPE (field);
> +   if (void_pointers)
> + {
> +   /* VECTOR_TYPE and ARRAY_TYPE share the alias set with their
> +  element type and that type has to be normalized to void *,
> +  too, in the case it is a pointer. */
> +   while (!canonical_type_used_p (t) && !POINTER_TYPE_P (t))
> + {
> +   gcc_checking_assert (TYPE_STRUCTURAL_EQUALITY_P (t));
> +   t = TREE_TYPE (t);
> + }
> +   if (POINTER_TYPE_P (t))
> + t = ptr_type_node;
> +   else if (flag_checking)
> + gcc_checking_assert (get_alias_set (t)
> +  == get_alias_set (TREE_TYPE (field)));
> + }
> +
> +   record_alias_subset (superset, get_alias_set (t));
> + }
> +  }
>break;
>  
>  case COMPLEX_TYPE:
> 
> Index: testsuit

Re: [RFC] Consider lrotate const rotation in vectorizer

2019-07-17 Thread Jakub Jelinek
On Wed, Jul 17, 2019 at 12:37:59PM +0200, Richard Biener wrote:
> On Tue, Jul 16, 2019 at 10:45 AM Kewen.Lin  wrote:
> > Based on the previous comments (thank you!), I tried to update the
> > handling in expander and vectorizer.  Middle-end optimizes lrotate
> > with const rotation count to rrotate all the time, it makes vectorizer
> > fail to vectorize if rrotate isn't supported on the target.  We can at
> > least teach it on const rotation count, the cost should be the same?
> > At the same time, the expander already tries to use the opposite
> > rotation optable for scalar, we can teach it to deal with vector as well.
> >
> > Is it on the right track and reasonable?
> 
> So you're basically fixing this up in the expander.  I think on
> the GIMPLE level you then miss to update tree-vect-generic.c?
> 
> I'm not sure if it makes sense to have both LROTATE_EXPR and
> RROTATE_EXPR on the GIMPLE level then (that CPUs only
> support one direction is natural though).  So maybe simply get
> rid of one?  Its semantics are also nowhere documented

A lot of targets support both, and I think not all targets do the
truncation, so at least with non-constant rotate count emitting one over the
other is important and trying to match it up only during combine might be
too late and not work well in many cases.
Then there are some targets that only support left rotates and not right
rotates (rs6000, s390, tilegx, ...), and other targets that only support
right rotates (mips, iq2000, ...).
So only having one GIMPLE code doesn't seem to be good enough.

I think handling it during expansion in generic code is fine, especially
when we clearly have several targets that do support only one of the
rotates.  As you wrote, it needs corresponding code in tree-vect-generic.c,
and shouldn't hardcode the rs6000 direction of mapping rotr to rotl, but
support also the other direction - rotl to rotr.  For the sake of
!SHIFT_COUNT_TRUNCATED targets for constant shift counts it needs to do
negation + masking and for variable shift counts probably punt and let the
backend code handle it if it can do the truncation in there?

Jakub


Re: [PATCH PR91137]Find base object for ivopts via walk_tree

2019-07-17 Thread Richard Biener
On Wed, Jul 17, 2019 at 8:52 AM bin.cheng  wrote:
>
> Hi,
> This patch fixes PR91137 by finding base objects with walk_tree utility.  
> Note we specially return
> integer_zero_node when a tree expression contains multiple base objects.  
> This works since the
> special node is compared unequal to any real base object, thus skipped in 
> candidate selection.
> This is intended to avoid propagating multiple base objects (maybe introduced 
> by programmer).
>
> Bootstrap and test on x86_64.  Is it OK?

OK.

Thanks,
Richard.

> Thanks,
> bin
> 2019-07-15  Bin Cheng  
>
> PR tree-optimization/91137
> * tree-ssa-loop-ivopts.c (struct ivopts_data): New field.
> (tree_ssa_iv_optimize_init, alloc_iv, tree_ssa_iv_optimize_finalize):
> Init, use and fini the above new field.
> (determine_base_object_1): New function.
> (determine_base_object): Reimplement using walk_tree.
>
> gcc/testsuite
> 2019-07-15  Bin Cheng  
>
> PR tree-optimization/91137
> * gcc.c-torture/execute/pr91137.c: New test.


[Arm][CMSE]Add warn_unused_return attribute to cmse functions

2019-07-17 Thread Joel Hutton
At present it is possible to call the CMSE functions for checking 
addresses (such as cmse_check_address_range) and  forget to check/use 
the return value. This patch makes the interfaces more robust against 
programmer error by marking these functions with the warn_unused_result 
attribute. With this set, any use of these functions that does not use 
the result will produce a warning.

This produces a warning on default warn levels when the result of the 
cmse functions is not used.

For the following function:
void foo()
{
     int *data;
     cmse_check_address_range((int*)data, 0, 0);
}
The following warning is emitted:
warning: ignoring return value of 'cmse_check_address_range' declared 
with attribute 'warn_unused_result' [-Wunused-result]
     6 |  cmse_check_address_range((int*)data, 0, 0);
    |  ^~

gcc/ChangeLog:

2019-07-10  Joel Hutton  

     * config/arm/arm_cmse.h (cmse_nonsecure_caller): Add 
warn_unused_result attribute.
     (cmse_check_address_range): Add warn_unused_result attribute.

libgcc/ChangeLog:

2019-07-10  Joel Hutton  

     * config/arm/cmse.c (cmse_check_address_range): Add 
warn_unused_result attribute.

2019-07-10  Joel Hutton  

     * gcc.target/arm/cmse/cmse-17.c: New test.

From 628070faaf157934e6b4c8d7d2d288244467bea6 Mon Sep 17 00:00:00 2001
From: Joel Hutton 
Date: Wed, 10 Jul 2019 09:59:58 +0100
Subject: [PATCH] CMSE warn unused result

---
 gcc/config/arm/arm_cmse.h   |  2 ++
 gcc/testsuite/gcc.target/arm/cmse/cmse-17.c | 10 ++
 libgcc/config/arm/cmse.c|  1 +
 3 files changed, 13 insertions(+)
 create mode 100644 gcc/testsuite/gcc.target/arm/cmse/cmse-17.c

diff --git a/gcc/config/arm/arm_cmse.h b/gcc/config/arm/arm_cmse.h
index b543cbfe455ae57487be199f7c918eb87db30bf2..a72c46f1a954bd3ba4aefcdbb7a31085d0f583c0 100644
--- a/gcc/config/arm/arm_cmse.h
+++ b/gcc/config/arm/arm_cmse.h
@@ -164,6 +164,7 @@ __CMSE_TT_ASM (at)
 
 /* FIXME: diagnose use outside cmse_nonsecure_entry functions.  */
 __extension__ static __inline int __attribute__ ((__always_inline__))
+__attribute__ ((warn_unused_result))
 cmse_nonsecure_caller (void)
 {
   return __builtin_arm_cmse_nonsecure_caller ();
@@ -184,6 +185,7 @@ cmse_nonsecure_caller (void)
 #define CMSE_MPU_READ		8
 
 __extension__ void *
+__attribute__ ((warn_unused_result))
 cmse_check_address_range (void *, size_t, int);
 
 #define cmse_check_pointed_object(p, f) \
diff --git a/gcc/testsuite/gcc.target/arm/cmse/cmse-17.c b/gcc/testsuite/gcc.target/arm/cmse/cmse-17.c
new file mode 100644
index ..a2cce09afae590461b86397e73e9b98649bed95a
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/cmse/cmse-17.c
@@ -0,0 +1,10 @@
+/* { dg-do compile } */
+/* { dg-options "-mcmse"}  */
+
+#include 
+
+void foo()
+{
+	int *data;
+	cmse_check_address_range((int*)data, 0, 0); /* { dg-warning "ignoring return value" } */
+}
diff --git a/libgcc/config/arm/cmse.c b/libgcc/config/arm/cmse.c
index 34a46fde2d2fcd9dc181bf5a74dd698de2ebc9bd..0c5a3eaefab49ae07e67b82481fdd0d8dd100227 100644
--- a/libgcc/config/arm/cmse.c
+++ b/libgcc/config/arm/cmse.c
@@ -30,6 +30,7 @@
address range.  See ACLE changes for ARMv8-M.  */
 
 void *
+__attribute__ ((warn_unused_result))
 cmse_check_address_range (void *p, size_t size, int flags)
 {
   cmse_address_info_t permb, perme;
-- 
2.17.1



Re: [RFC] Consider lrotate const rotation in vectorizer

2019-07-17 Thread Richard Biener
On Wed, Jul 17, 2019 at 12:54 PM Jakub Jelinek  wrote:
>
> On Wed, Jul 17, 2019 at 12:37:59PM +0200, Richard Biener wrote:
> > On Tue, Jul 16, 2019 at 10:45 AM Kewen.Lin  wrote:
> > > Based on the previous comments (thank you!), I tried to update the
> > > handling in expander and vectorizer.  Middle-end optimizes lrotate
> > > with const rotation count to rrotate all the time, it makes vectorizer
> > > fail to vectorize if rrotate isn't supported on the target.  We can at
> > > least teach it on const rotation count, the cost should be the same?
> > > At the same time, the expander already tries to use the opposite
> > > rotation optable for scalar, we can teach it to deal with vector as well.
> > >
> > > Is it on the right track and reasonable?
> >
> > So you're basically fixing this up in the expander.  I think on
> > the GIMPLE level you then miss to update tree-vect-generic.c?
> >
> > I'm not sure if it makes sense to have both LROTATE_EXPR and
> > RROTATE_EXPR on the GIMPLE level then (that CPUs only
> > support one direction is natural though).  So maybe simply get
> > rid of one?  Its semantics are also nowhere documented
>
> A lot of targets support both, and I think not all targets do the
> truncation, so at least with non-constant rotate count emitting one over the
> other is important and trying to match it up only during combine might be
> too late and not work well in many cases.
> Then there are some targets that only support left rotates and not right
> rotates (rs6000, s390, tilegx, ...), and other targets that only support
> right rotates (mips, iq2000, ...).
> So only having one GIMPLE code doesn't seem to be good enough.

It seems for constants it is by means of canonicalization.  The lack
of canonicalization for non-constants then makes us fail to CSE
lrotate and rrotate.  Given rotates are only
detected on GIMPLE always creating one or the other should be
reasonably easy and fixup during expansion can happen either via TER
or via pre-expand generation of optab corresponding IFNs?

Might get tricky if we face width - (a + 5) so the pattern matching
of an opposing direction rotate gets harder.

> I think handling it during expansion in generic code is fine, especially
> when we clearly have several targets that do support only one of the
> rotates.

Yes.

> As you wrote, it needs corresponding code in tree-vect-generic.c,
> and shouldn't hardcode the rs6000 direction of mapping rotr to rotl, but
> support also the other direction - rotl to rotr.  For the sake of
> !SHIFT_COUNT_TRUNCATED targets for constant shift counts it needs to do
> negation + masking and for variable shift counts probably punt and let the
> backend code handle it if it can do the truncation in there?

Ick.  I wouldn't even touch SHIFT_COUNT_TRUNCATED with a 10-foot pole
here.  And for rotates we can simply always truncate constant amounts to
the rotated operands width, no?  For non-constant ones I fear targets
would need to support both to get reliable expansion.

Richard.

> Jakub


Re: [RFC] Consider lrotate const rotation in vectorizer

2019-07-17 Thread Richard Biener
On Wed, Jul 17, 2019 at 1:32 PM Richard Biener
 wrote:
>
> On Wed, Jul 17, 2019 at 12:54 PM Jakub Jelinek  wrote:
> >
> > On Wed, Jul 17, 2019 at 12:37:59PM +0200, Richard Biener wrote:
> > > On Tue, Jul 16, 2019 at 10:45 AM Kewen.Lin  wrote:
> > > > Based on the previous comments (thank you!), I tried to update the
> > > > handling in expander and vectorizer.  Middle-end optimizes lrotate
> > > > with const rotation count to rrotate all the time, it makes vectorizer
> > > > fail to vectorize if rrotate isn't supported on the target.  We can at
> > > > least teach it on const rotation count, the cost should be the same?
> > > > At the same time, the expander already tries to use the opposite
> > > > rotation optable for scalar, we can teach it to deal with vector as 
> > > > well.
> > > >
> > > > Is it on the right track and reasonable?
> > >
> > > So you're basically fixing this up in the expander.  I think on
> > > the GIMPLE level you then miss to update tree-vect-generic.c?
> > >
> > > I'm not sure if it makes sense to have both LROTATE_EXPR and
> > > RROTATE_EXPR on the GIMPLE level then (that CPUs only
> > > support one direction is natural though).  So maybe simply get
> > > rid of one?  Its semantics are also nowhere documented
> >
> > A lot of targets support both, and I think not all targets do the
> > truncation, so at least with non-constant rotate count emitting one over the
> > other is important and trying to match it up only during combine might be
> > too late and not work well in many cases.
> > Then there are some targets that only support left rotates and not right
> > rotates (rs6000, s390, tilegx, ...), and other targets that only support
> > right rotates (mips, iq2000, ...).
> > So only having one GIMPLE code doesn't seem to be good enough.
>
> It seems for constants it is by means of canonicalization.  The lack
> of canonicalization for non-constants then makes us fail to CSE
> lrotate and rrotate.  Given rotates are only
> detected on GIMPLE always creating one or the other should be
> reasonably easy and fixup during expansion can happen either via TER
> or via pre-expand generation of optab corresponding IFNs?
>
> Might get tricky if we face width - (a + 5) so the pattern matching
> of an opposing direction rotate gets harder.
>
> > I think handling it during expansion in generic code is fine, especially
> > when we clearly have several targets that do support only one of the
> > rotates.
>
> Yes.
>
> > As you wrote, it needs corresponding code in tree-vect-generic.c,
> > and shouldn't hardcode the rs6000 direction of mapping rotr to rotl, but
> > support also the other direction - rotl to rotr.  For the sake of
> > !SHIFT_COUNT_TRUNCATED targets for constant shift counts it needs to do
> > negation + masking and for variable shift counts probably punt and let the
> > backend code handle it if it can do the truncation in there?
>
> Ick.  I wouldn't even touch SHIFT_COUNT_TRUNCATED with a 10-foot pole
> here.  And for rotates we can simply always truncate constant amounts to
> the rotated operands width, no?  For non-constant ones I fear targets
> would need to support both to get reliable expansion.

Btw, the docs of SHIFT_COUNT_TRUNCATED do not mention rotates
unless you treat a rotate as a shift.

Richard.

> Richard.
>
> > Jakub


PR91166 - Unfolded ZIPs of constants

2019-07-17 Thread Prathamesh Kulkarni
Hi,
The attached patch tries to fix PR91166.
Does it look OK ?
Bootstrap+test in progress on aarch64-linux-gnu and x86_64-unknown-linux-gnu.

Thanks,
Prathamesh
2019-07-17  Prathamesh Kulkarni  

PR middle-end/91166
* match.pd (vec_perm_expr(v, v, mask) -> v): New pattern.
(define_predicates): Add entry for uniform_vector_p.

testsuite/
* gcc.target/aarch64/sve/pr91166.c: New test.

diff --git a/gcc/match.pd b/gcc/match.pd
index 4a7aa0185d8..2ad98c28fd8 100644
--- a/gcc/match.pd
+++ b/gcc/match.pd
@@ -36,7 +36,8 @@ along with GCC; see the file COPYING3.  If not see
integer_valued_real_p
integer_pow2p
uniform_integer_cst_p
-   HONOR_NANS)
+   HONOR_NANS
+   uniform_vector_p)
 
 /* Operator lists.  */
 (define_operator_list tcc_comparison
@@ -5568,3 +5569,12 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
  { bitsize_int (at * tree_to_uhwi (TYPE_SIZE (TREE_TYPE (type; })
(if (changed)
 (vec_perm { op0; } { op1; } { op2; }))
+
+/* VEC_PERM_EXPR (v, v, mask) -> v where v contains same element.  */
+(simplify
+ (vec_perm (vec_duplicate@0 @1) @0 @2)
+ { @0; })
+
+(simplify
+ (vec_perm uniform_vector_p@0 @0 @1)
+ { @0; }) 
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pr91166.c 
b/gcc/testsuite/gcc.target/aarch64/sve/pr91166.c
new file mode 100644
index 000..42654be3b31
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/pr91166.c
@@ -0,0 +1,20 @@
+/* { dg-do compile } */
+/* { dg-options "-O3 -march=armv8.2-a+sve -fdump-tree-optimized" } */
+
+void
+f1 (double x[][4]) 
+{
+  for (int i = 0; i < 4; ++i)
+for (int j = 0; j < 4; ++j)
+  x[i][j] = 0;
+}
+
+void
+f2 (double x[][4], double y)
+{
+  for (int i = 0; i < 4; ++i)
+for (int j = 0; j < 4; ++j)
+  x[i][j] = y;
+}
+
+/* { dg-final { scan-tree-dump-not "VEC_PERM_EXPR" "optimized"} } */


Re: PR90724 - ICE with __sync_bool_compare_and_swap with -march=armv8.2-a

2019-07-17 Thread Prathamesh Kulkarni
On Wed, 17 Jul 2019 at 13:45, Kyrill Tkachov
 wrote:
>
> Hi Prathamesh
>
> On 7/10/19 12:24 PM, Prathamesh Kulkarni wrote:
> > Hi,
> > For following test-case,
> > static long long AL[24];
> >
> > int
> > check_ok (void)
> > {
> >   return (__sync_bool_compare_and_swap (AL+1, 0x20003ll,
> > 0x1234567890ll));
> > }
> >
> > Compiling with -O2 -march=armv8.2-a results in:
> > pr90724.c: In function ‘check_ok’:
> > pr90724.c:7:1: error: unrecognizable insn:
> > 7 | }
> >   | ^
> > (insn 11 10 12 2 (set (reg:CC 66 cc)
> > (compare:CC (reg:DI 95)
> > (const_int 8589934595 [0x20003]))) "pr90724.c":6:11 -1
> >  (nil))
> >
> > IIUC, the issue is that 0x20003 falls outside the range of
> > allowable immediate in cmp ? If it's replaced by a small constant then
> > it works.
> >
> > The ICE results with -march=armv8.2-a because, we enter if
> > (TARGET_LSE) { ... } condition
> > in aarch64_expand_compare_and_swap, while with -march=armv8.a it goes
> > into else,
> > which forces oldval into register if the predicate fails to match.
> >
> > The attached patch checks if y (oldval) satisfies aarch64_plus_operand
> > predicate and if not, forces it to be in register, which resolves ICE.
> > Does it look OK ?
> >
> > Bootstrap+testing in progress on aarch64-linux-gnu.
> >
> > PS: The issue has nothing to do with SVE, which I incorrectly
> > mentioned in bug report.
> >
> This looks ok to me (but you'll need maintainer approval).
>
> Does this fail on the branches as well?
Hi Kyrill,
Thanks for the review. The test also fails on gcc-9-branch (but not on gcc-8).

Thanks,
Prathamesh
>
> Thanks,
>
> Kyrill
>
>
> > Thanks,
> > Prathamesh


Re: [PATCH, rs6000] Support vrotr3 for int vector types

2019-07-17 Thread Segher Boessenkool
Hi Kewen,

On Wed, Jul 17, 2019 at 04:32:15PM +0800, Kewen.Lin wrote:
> Regression testing just launched, is it OK for trunk if it's bootstrapped
> and regresstested on powerpc64le-unknown-linux-gnu?

> +;; Expanders for rotatert to make use of vrotl
> +(define_expand "vrotr3"
> +  [(set (match_operand:VEC_I 0 "vint_operand")
> + (rotatert:VEC_I (match_operand:VEC_I 1 "vint_operand")
> +   (match_operand:VEC_I 2 "vint_reg_or_const_vector")))]

Having any rotatert in a define_expand or define_insn will regress.

So, nope, sorry.


Segher


Re: [RFC] Consider lrotate const rotation in vectorizer

2019-07-17 Thread Segher Boessenkool
On Wed, Jul 17, 2019 at 01:32:50PM +0200, Richard Biener wrote:
> On Wed, Jul 17, 2019 at 12:54 PM Jakub Jelinek  wrote:
> Ick.  I wouldn't even touch SHIFT_COUNT_TRUNCATED with a 10-foot pole
> here.  And for rotates we can simply always truncate constant amounts to
> the rotated operands width, no?  For non-constant ones I fear targets
> would need to support both to get reliable expansion.

SHIFT_COUNT_TRUNCATED has no meaning for rotate instructions, as far as
I can see.  Mathematically it doesn't, and are there any CPUs that fail
for it for no reason?  Any archs where some rotate amounts give
undefined results?  So if this is true, SHIFT_COUNT_TRUNCATED can be
treated as always *on* for rotates.


Segher


[PATCH] Re-indent recently added vn_walk_cb_data::push_partial_def

2019-07-17 Thread Richard Biener


I need to add stuff and indent level is soo high.  But there's rescue.
No functional change.

Bootstrapped on x86_64-unknown-linux-gnu, testing in progress.

Richard.

2019-07-17  Richard Biener  

* tree-ssa-sccvn.c (vn_walk_cb_data::push_partial_def): Refactor
branches to make code less indented.

Index: gcc/tree-ssa-sccvn.c
===
--- gcc/tree-ssa-sccvn.c(revision 273550)
+++ gcc/tree-ssa-sccvn.c(working copy)
@@ -1746,160 +1746,136 @@ vn_walk_cb_data::push_partial_def (const
   first_range.size = pd.size;
   first_vuse = vuse;
   last_vuse_ptr = NULL;
+  /* Continue looking for partial defs.  */
+  return NULL;
+}
+
+  if (!known_ranges)
+{
+  /* ???  Optimize the case where the 2nd partial def completes things.  */
+  gcc_obstack_init (&ranges_obstack);
+  known_ranges = splay_tree_new_with_allocator (pd_range_compare, 0, 0,
+   pd_tree_alloc,
+   pd_tree_dealloc, this);
+  splay_tree_insert (known_ranges,
+(splay_tree_key)&first_range.offset,
+(splay_tree_value)&first_range);
+}
+
+  pd_range newr = { pd.offset, pd.size };
+  splay_tree_node n;
+  pd_range *r;
+  /* Lookup the predecessor of offset + 1 and see if we need to merge.  */
+  HOST_WIDE_INT loffset = newr.offset + 1;
+  if ((n = splay_tree_predecessor (known_ranges, (splay_tree_key)&loffset))
+  && ((r = (pd_range *)n->value), true)
+  && ranges_known_overlap_p (r->offset, r->size + 1,
+newr.offset, newr.size))
+{
+  /* Ignore partial defs already covered.  */
+  if (known_subrange_p (newr.offset, newr.size, r->offset, r->size))
+   return NULL;
+  r->size = MAX (r->offset + r->size, newr.offset + newr.size) - r->offset;
 }
   else
 {
-  if (!known_ranges)
-   {
- /* ???  Optimize the case where the second partial def
-completes things.  */
- gcc_obstack_init (&ranges_obstack);
- known_ranges
- = splay_tree_new_with_allocator (pd_range_compare, 0, 0,
-  pd_tree_alloc,
-  pd_tree_dealloc, this);
- splay_tree_insert (known_ranges,
-(splay_tree_key)&first_range.offset,
-(splay_tree_value)&first_range);
-   }
-  if (known_ranges)
+  /* newr.offset wasn't covered yet, insert the range.  */
+  r = XOBNEW (&ranges_obstack, pd_range);
+  *r = newr;
+  splay_tree_insert (known_ranges, (splay_tree_key)&r->offset,
+(splay_tree_value)r);
+}
+  /* Merge r which now contains newr and is a member of the splay tree with
+ adjacent overlapping ranges.  */
+  pd_range *rafter;
+  while ((n = splay_tree_successor (known_ranges, (splay_tree_key)&r->offset))
+&& ((rafter = (pd_range *)n->value), true)
+&& ranges_known_overlap_p (r->offset, r->size + 1,
+   rafter->offset, rafter->size))
+{
+  r->size = MAX (r->offset + r->size,
+rafter->offset + rafter->size) - r->offset;
+  splay_tree_remove (known_ranges, (splay_tree_key)&rafter->offset);
+}
+  partial_defs.safe_push (pd);
+
+  /* Now we have merged newr into the range tree.  When we have covered
+ [offseti, sizei] then the tree will contain exactly one node which has
+ the desired properties and it will be 'r'.  */
+  if (!known_subrange_p (0, maxsizei / BITS_PER_UNIT, r->offset, r->size))
+/* Continue looking for partial defs.  */
+return NULL;
+
+  /* Now simply native encode all partial defs in reverse order.  */
+  unsigned ndefs = partial_defs.length ();
+  /* We support up to 512-bit values (for V8DFmode).  */
+  unsigned char buffer[64];
+  int len;
+
+  while (!partial_defs.is_empty ())
+{
+  pd_data pd = partial_defs.pop ();
+  if (TREE_CODE (pd.rhs) == CONSTRUCTOR)
+   /* Empty CONSTRUCTOR.  */
+   memset (buffer + MAX (0, pd.offset),
+   0, MIN ((HOST_WIDE_INT)sizeof (buffer), pd.size));
+  else
{
- pd_range newr = { pd.offset, pd.size };
- splay_tree_node n;
- pd_range *r;
- /* Lookup the predecessor of offset + 1 and see if
-we need to merge with it.  */
- HOST_WIDE_INT loffset = newr.offset + 1;
- if ((n = splay_tree_predecessor (known_ranges,
-  (splay_tree_key)&loffset))
- && ((r = (pd_range *)n->value), true)
- && ranges_known_overlap_p (r->offset, r->size + 1,
-newr.offset, newr.size))
+ unsigned pad = 0;
+ if (BYTES_BIG_ENDIAN
+ && is_a  (TYPE_MODE (TR

[PATCH, i386]: Handle potential partial reg stall in *andqi_2_maybe_si

2019-07-17 Thread Uros Bizjak
2019-07-17  Uroš Bizjak  

* config/i386/i386.md (*andqi_2_maybe_si): Handle potential
partial reg stall on alternative 2.

Bootstrapped and regression tested on x86_64-linux-gnu {,-m32}.

Committed to mainline SVN.

Uros.
Index: config/i386/i386.md
===
--- config/i386/i386.md (revision 273534)
+++ config/i386/i386.md (working copy)
@@ -8689,7 +8689,7 @@
  (match_operand:QI 1 "nonimmediate_operand" "%0,0,0")
  (match_operand:QI 2 "general_operand" "qmn,qn,n"))
 (const_int 0)))
-   (set (match_operand:QI 0 "nonimmediate_operand" "=q,qm,*r")
+   (set (match_operand:QI 0 "nonimmediate_operand" "=q,qm,r")
(and:QI (match_dup 1) (match_dup 2)))]
   "ix86_binary_operator_ok (AND, QImode, operands)
&& ix86_match_ccmode (insn,
@@ -8705,7 +8705,12 @@
   return "and{b}\t{%2, %0|%0, %2}";
 }
   [(set_attr "type" "alu")
-   (set_attr "mode" "QI,QI,SI")])
+   (set_attr "mode" "QI,QI,SI")
+   ;; Potential partial reg stall on alternative 2.
+   (set (attr "preferred_for_speed")
+ (cond [(eq_attr "alternative" "2")
+ (symbol_ref "!TARGET_PARTIAL_REG_STALL")]
+  (symbol_ref "true")))])
 
 (define_insn "*and_2"
   [(set (reg FLAGS_REG)


Re: -Wmissing-attributes: avoid duplicates and false positives

2019-07-17 Thread Martin Sebor

On 7/17/19 12:02 AM, Alexandre Oliva wrote:

Hello, Martin,

The initial patch for PR 81824 fixed one of the possibilities of
-Wmissing-attributes reporting duplicates, namely, if TMPL had an
attribute in ATTRLIST that was missing from DECL's decl and type
attribute lists, both being non-empty.

Another possibility of duplicate reporting remained: when an attribute
in ATTRLIST is present in both decl and type attribute lists of TMPL,
and absent from DECL's attribute lists, it is reported once for each
of TMPL's lists.

The implementation still allowed for false positives: an attribute in
ATTRLIST that is present in TMPL will be regarded as missing as soon
as it is not found in DECL's decl attribute list, even if it is later
found in DECL's type attribute list.


Hey Alex!

Isn't this test sufficient to avoid the problems?

  if (!k && kmax > 1)
continue;


This patch fixes both problems, so that an attribute from ATTRLIST
that is present in any of TMPL's lists will be reported at most once,
and only if it is missing from both DECL's lists.


Now, I realize there are some attributes that are only acceptable for
types, and some that are only acceptable for function declarations.
ISTM that some even have different meanings depending on whether
they're associated with types or declarations.  There's room for an
argument for checking only corresponding lists, for at least some of
the attributes.  AFAICT it doesn't apply to -Wmissing-attributes, that
are either acceptable in either list, or only in the FUNCTION_DECL
list, so I'm leaving it at that in the hope that it doesn't apply to
any other users of decls_mismatched_attributes either.


Regstrapping on x86_64-linux-gnu.  Ok to install if it passes?


The change looks cleaner than the cumbersome code that's there
now so I have no problem with it but I'm not sure it does more
than the test above.  The test case included in the patch also
gets just the expected warnings with the trunk so I'm wondering
how the problems you describe can come up.  Can you put together
a test case that does do the wrong thing?

Thanks
Martin




for  gcc/ChangeLog

PR middle-end/81824
* attribs.c (decls_mismatched_attributes): Avoid duplicates
and false positives.

for  gcc/testsuite/ChangeLog

PR middle-end/81824
* g++.dg/Wmissing-attributes-1.C: New.
---
  gcc/attribs.c|   14 +--
  gcc/testsuite/g++.dg/Wmissing-attributes-1.C |   55 ++
  2 files changed, 65 insertions(+), 4 deletions(-)
  create mode 100644 gcc/testsuite/g++.dg/Wmissing-attributes-1.C

diff --git a/gcc/attribs.c b/gcc/attribs.c
index 8e5401655972..f4777c6a8233 100644
--- a/gcc/attribs.c
+++ b/gcc/attribs.c
@@ -1931,15 +1931,19 @@ decls_mismatched_attributes (tree tmpl, tree decl, tree 
attrlist,
  if (!has_attribute (tmpls[j], tmpl_attrs[j], blacklist[i]))
continue;
  
+	  bool found = false;

  unsigned kmax = 1 + !!decl_attrs[1];
  for (unsigned k = 0; k != kmax; ++k)
{
  if (has_attribute (decls[k], decl_attrs[k], blacklist[i]))
-   break;
-
- if (!k && kmax > 1)
-   continue;
+   {
+ found = true;
+ break;
+   }
+   }
  
+	  if (!found)

+   {
  if (nattrs)
pp_string (attrstr, ", ");
  pp_begin_quote (attrstr, pp_show_color (global_dc->printer));
@@ -1947,6 +1951,8 @@ decls_mismatched_attributes (tree tmpl, tree decl, tree 
attrlist,
  pp_end_quote (attrstr, pp_show_color (global_dc->printer));
  ++nattrs;
}
+
+ break;
}
  }
  
diff --git a/gcc/testsuite/g++.dg/Wmissing-attributes-1.C b/gcc/testsuite/g++.dg/Wmissing-attributes-1.C

new file mode 100644
index ..56d28b339e17
--- /dev/null
+++ b/gcc/testsuite/g++.dg/Wmissing-attributes-1.C
@@ -0,0 +1,55 @@
+// { dg-do compile }
+// { dg-options "-Wmissing-attributes" }
+
+#define ATTR(list)   __attribute__ (list)
+
+/* Type attributes are normally absent in template functions, and the
+   mere presence of any such attribute used to cause the
+   -Wmissing-attributes checks, that checked for attributes typically
+   associated with functions rather than types, to report any missing
+   attributes twice: once for the specialization attribute list, once
+   for its type attribute list.
+
+   If we force any of the checked-for attributes to be associated with
+   types rather than functions, even later implementations that fixed
+   the duplicate reporting problem above would still report them as
+   missing (in the function attribute list) even when present (in the
+   type attribute list).  */
+typedef void* ATTR ((alloc_size (1))) f_type (int);
+
+template 
+f_type
+ATTR ((malloc))
+missing_malloc;// { dg-message "missing primary template attribute 
.mall

Re: Rewrite some jump.c routines to use flags

2019-07-17 Thread Eric Botcazou
> No, no trivial paths unfortunately.  I'd hoped that inlining and
> jump threading would give us very similar code, but no such luck.
> condition_to_flags is a table lookup, but then flags_to_condition
> is a branch tree.

Too bad.  Perhaps this would be an interesting optimization exercise.

> If that's a concern, I can drop the changes to the existing
> functions and just use the new flags for the follow-on patch.

IMO the net pessimization is a little hard to swallow, although it probably 
doesn't matter much in practice.  I'd suggest adding the new logic in every 
case, but keeping the fast path when it's a nop:

 enum rtx_code
 swap_condition (enum rtx_code code)
 {
  /* Deal with the trivial cases first.  */
  switch (code)
{
case EQ:
case NE:
case UNORDERED:
case ORDERED:
case UNEQ:
case LTGT:
  return code;
default:
  break;
}

  unsigned int flags = condition_to_flags (code);
  flags = ((flags & ~(FLAGS_GT | FLAGS_LT))
  | (flags & FLAGS_GT ? FLAGS_LT : 0)
  | (flags & FLAGS_LT ? FLAGS_GT : 0));
  return flags_to_condition (flags, true);
 }

OK with this additional change.

-- 
Eric Botcazou


Re: [RFC] Consider lrotate const rotation in vectorizer

2019-07-17 Thread Segher Boessenkool
On Wed, Jul 17, 2019 at 12:54:32PM +0200, Jakub Jelinek wrote:
> On Wed, Jul 17, 2019 at 12:37:59PM +0200, Richard Biener wrote:
> > I'm not sure if it makes sense to have both LROTATE_EXPR and
> > RROTATE_EXPR on the GIMPLE level then (that CPUs only
> > support one direction is natural though).  So maybe simply get
> > rid of one?  Its semantics are also nowhere documented
> 
> A lot of targets support both,

Of all the linux targets, we have:

No rotate:
  alpha microblaze riscv sparc

Both directions:
  aarch64 c6x ia64 m68k nios2 parisc sh x86 xtensa

Left only:
  csky h8300 powerpc s390

Right only:
  arc arm mips nds32 openrisc

> Then there are some targets that only support left rotates and not right
> rotates (rs6000, s390, tilegx, ...), and other targets that only support
> right rotates (mips, iq2000, ...).
> So only having one GIMPLE code doesn't seem to be good enough.
> 
> I think handling it during expansion in generic code is fine, especially
> when we clearly have several targets that do support only one of the
> rotates.  As you wrote, it needs corresponding code in tree-vect-generic.c,
> and shouldn't hardcode the rs6000 direction of mapping rotr to rotl, but
> support also the other direction - rotl to rotr.  For the sake of
> !SHIFT_COUNT_TRUNCATED targets for constant shift counts it needs to do
> negation + masking and for variable shift counts probably punt and let the
> backend code handle it if it can do the truncation in there?

I think we can say that *all* targets behave like SHIFT_COUNT_TRUNCATED
for rotates?  Not all immediates are valid of course, but that is a
separate issue.


Segher


C++ PATCH to add test for c++/91104

2019-07-17 Thread Marek Polacek
This was a wrong code issue where we printed
2 3 1
1 2 3
instead of
1 2 3
1 2 3
but it was fixed by r271705.  I don't know of a good way to check
the auto... expansion here so I used dg-output.

Tested on x86_64-linux, ok for trunk?

2019-07-17  Marek Polacek  

PR c++/91104
* g++.dg/cpp1y/lambda-generic-variadic20.C: New test.

--- /dev/null
+++ gcc/testsuite/g++.dg/cpp1y/lambda-generic-variadic20.C
@@ -0,0 +1,20 @@
+// PR c++/91104
+// { dg-do run { target c++14 } }
+
+#include 
+
+void test(void (*f)(int, int, int)) {
+f(1, 2, 3);
+}
+
+int main() {
+test([](auto... args) {
+printf("%d %d %d\n", args...);
+});
+test([](int a, int b, int c) {
+printf("%d %d %d\n", a, b, c);
+});
+}
+
+// { dg-output "1 2 3(\n|\r\n|\r)" }
+// { dg-output "\[^\n\r]*1 2 3" }


Re: [PATCH 2/5, OpenACC] Support Fortran optional arguments in the firstprivate clause

2019-07-17 Thread Kwok Cheung Yeung

On 12/07/2019 12:41 pm, Jakub Jelinek wrote:

This should be done through a langhook.
Are really all PARM_DECLs wtih DECL_BY_REFERENCE and pointer type optional
arguments?  I mean, POINTER_TYPE is used for a lot of cases.


Hmmm... I thought it was the case that if you pass an argument in by reference 
(the default) in Fortran, the PARM_DECL will always be a reference to the 
argument type if non-optional, or a pointer if optional. However, fixed-shape 
arrays are passed in via a pointer whether optional or not...


I also experimented with passing in a pointer by value, but it seems like that 
is not allowed. e.g.


  subroutine foo(x)
integer, pointer, value :: x
  end subroutine foo

results in:

   11 | integer, pointer, value :: x
  |   1
Error: VALUE attribute conflicts with POINTER attribute at (1)

Are there any more examples in Fortran where a PARM_DECL is a pointer type 
without being optional?


In the Fortran FE, optional arguments are indicated by setting attr.optional on 
the gfc_symbol for the parameter, but the OMP lowering works on a tree - is it 
somehow possible to get from the tree back to the gfc_symbol? If so, that would 
be a more reliable method of detecting optional arguments.


Thanks

Kwok


C++ PATCH to add test for c++/90455

2019-07-17 Thread Marek Polacek
Fixed by r272287.

Tested x86_64-linux, applying to trunk.

2019-07-17  Marek Polacek  

PR c++/90455
* g++.dg/cpp0x/nsdmi-list6.C: New test.

--- /dev/null
+++ gcc/testsuite/g++.dg/cpp0x/nsdmi-list6.C
@@ -0,0 +1,28 @@
+// PR c++/90455
+// { dg-do compile { target c++11 } }
+
+struct B;
+template  struct b {
+  void operator()(a *) { sizeof(a); }
+};
+struct c {
+  struct D {
+using d = B *;
+  };
+
+  using e = D::d;
+  e f();
+};
+template  class g {
+  c h;
+  using i = b;
+public:
+  ~g() {
+auto j = h.f();
+k()(j);
+  }
+  i k();
+};
+struct l {
+  g m{};
+};


Re: [C++] DEFERRED_PARSE

2019-07-17 Thread Marek Polacek
On Mon, Jul 08, 2019 at 08:25:25AM -0400, Nathan Sidwell wrote:
> Jason, Marek,
> can DEFERRED_PARSE trees survive past the in-class-context late parsing
> stage?  My assumption was not, but in reducing a module testcase I
> encountered a situation when one survived to end of compilation (with no
> errors).  It was an exception specifier on a declared-but-not-defined
> template member function.
> 
> Is my assumption incorrect?  (I can of course further reduce the testcase,
> if needed.)

I think that should be fine.  I guess we can treat it similarly to
DEFERRED_NOEXCEPT.  At least I haven't seen that it broke anything.

Marek


[PATCH, i386]: Remove redundant constraints from ALU insn patterns

2019-07-17 Thread Uros Bizjak
No functional changes.

2019-07-17  Uroš Bizjak  

* config/i386/i386.md (*add3_doubleword):
Remove redundant constraints.
(*add_1): Ditto.
(*addhi_1): Ditto.
(*addqi_1): Ditto.
(*addqi_1_slp): Ditto.
(*add_2): Ditto.
(*addv4): Ditto.
(*sub3_doubleword): Ditto.
(*sub_1): Ditto.
(*subqi_1_slp): Ditto.
(*sub_2): Ditto.
(*subv4): Ditto.
(*sub_3): Ditto.
(@add3_carry): Ditto.
(@sub3_carry): Ditto.
(*add3_cc_overflow_1): Ditto.
(*add3_zext_cc_overflow_2): Ditto.
(*anddi_1): Ditto.
(*and_1): Ditto.
(*andqi_1): Ditto.
(*andqi_1_slp): Ditto.
(*anddi_2): Ditto.
(*andqi_2_maybe_si): Ditto.
(*and_2): Ditto.
(*andqi_2_slp): Ditto.
(*_1): Ditto.
(*qi_1): Ditto.
(*qi_1_slp): Ditto.
(*_2): Ditto.
(*qi_2_slp): Ditto.

Bootstrapped and regression tested on x86_64-linux-gnu {,-m32}.

Committed to mainline SVN.

Uros.
diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
index eb32d7c1d2a5..4a359e8035fd 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -5337,11 +5337,10 @@
   "ix86_expand_binary_operator (PLUS, mode, operands); DONE;")
 
 (define_insn_and_split "*add3_doubleword"
-  [(set (match_operand: 0 "nonimmediate_operand" "=r,o")
+  [(set (match_operand: 0 "nonimmediate_operand" "=ro,r")
(plus:
  (match_operand: 1 "nonimmediate_operand" "%0,0")
- (match_operand: 2 "x86_64_hilo_general_operand"
-   "ro,r")))
+ (match_operand: 2 "x86_64_hilo_general_operand" "r,o")))
(clobber (reg:CC FLAGS_REG))]
   "ix86_binary_operator_ok (PLUS, mode, operands)"
   "#"
@@ -5369,10 +5368,10 @@
 })
 
 (define_insn "*add_1"
-  [(set (match_operand:SWI48 0 "nonimmediate_operand" "=r,rm,r,r")
+  [(set (match_operand:SWI48 0 "nonimmediate_operand" "=rm,r,r,r")
(plus:SWI48
  (match_operand:SWI48 1 "nonimmediate_operand" "%0,0,r,r")
- (match_operand:SWI48 2 "x86_64_general_operand" "rme,re,0,le")))
+ (match_operand:SWI48 2 "x86_64_general_operand" "re,m,0,le")))
(clobber (reg:CC FLAGS_REG))]
   "ix86_binary_operator_ok (PLUS, mode, operands)"
 {
@@ -5475,7 +5474,7 @@
 (define_insn "*addhi_1"
   [(set (match_operand:HI 0 "nonimmediate_operand" "=rm,r,r,Yp")
(plus:HI (match_operand:HI 1 "nonimmediate_operand" "%0,0,r,Yp")
-(match_operand:HI 2 "general_operand" "rn,rm,0,ln")))
+(match_operand:HI 2 "general_operand" "rn,m,0,ln")))
(clobber (reg:CC FLAGS_REG))]
   "ix86_binary_operator_ok (PLUS, HImode, operands)"
 {
@@ -5524,7 +5523,7 @@
 (define_insn "*addqi_1"
   [(set (match_operand:QI 0 "nonimmediate_operand" "=qm,q,q,r,r,Yp")
(plus:QI (match_operand:QI 1 "nonimmediate_operand" "%0,0,q,0,r,Yp")
-(match_operand:QI 2 "general_operand" "qn,qm,0,rn,0,ln")))
+(match_operand:QI 2 "general_operand" "qn,m,0,rn,0,ln")))
(clobber (reg:CC FLAGS_REG))]
   "ix86_binary_operator_ok (PLUS, QImode, operands)"
 {
@@ -5587,7 +5586,7 @@
 (define_insn "*addqi_1_slp"
   [(set (strict_low_part (match_operand:QI 0 "nonimmediate_operand" "+qm,q"))
(plus:QI (match_dup 0)
-(match_operand:QI 1 "general_operand" "qn,qm")))
+(match_operand:QI 1 "general_operand" "qn,m")))
(clobber (reg:CC FLAGS_REG))]
   "(! TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun))
&& !(MEM_P (operands[0]) && MEM_P (operands[1]))"
@@ -5680,9 +5679,9 @@
(compare
  (plus:SWI
(match_operand:SWI 1 "nonimmediate_operand" "%0,0,")
-   (match_operand:SWI 2 "" ",,0"))
+   (match_operand:SWI 2 "" ",m,0"))
  (const_int 0)))
-   (set (match_operand:SWI 0 "nonimmediate_operand" "=,m,")
+   (set (match_operand:SWI 0 "nonimmediate_operand" "=m,,")
(plus:SWI (match_dup 1) (match_dup 2)))]
   "ix86_match_ccmode (insn, CCGOCmode)
&& ix86_binary_operator_ok (PLUS, mode, operands)"
@@ -6073,11 +6072,10 @@
   (sign_extend:
  (match_operand:SWI 1 "nonimmediate_operand" "%0,0"))
   (sign_extend:
- (match_operand:SWI 2 ""
-  "mWe,We")))
+ (match_operand:SWI 2 "" "We,m")))
(sign_extend:
   (plus:SWI (match_dup 1) (match_dup 2)
-   (set (match_operand:SWI 0 "nonimmediate_operand" "=,m")
+   (set (match_operand:SWI 0 "nonimmediate_operand" "=m,")
(plus:SWI (match_dup 1) (match_dup 2)))]
   "ix86_binary_operator_ok (PLUS, mode, operands)"
   "add{}\t{%2, %0|%0, %2}"
@@ -6091,9 +6089,9 @@
  (match_operand:SWI 1 "nonimmediate_operand" "0"))
   (match_operand: 3 "const_int_operand" "i"))
(sign_extend:
-  (plus:SWI (match_dup 1)
-(match_operand:SWI 2 "x86_64_immediate_operand"

Re: Rewrite some jump.c routines to use flags

2019-07-17 Thread Richard Sandiford
Eric Botcazou  writes:
>> No, no trivial paths unfortunately.  I'd hoped that inlining and
>> jump threading would give us very similar code, but no such luck.
>> condition_to_flags is a table lookup, but then flags_to_condition
>> is a branch tree.
>
> Too bad.  Perhaps this would be an interesting optimization exercise.

Yeah.  Unfortunately I have a lot of those to get through for GCC 10
already :-)

>> If that's a concern, I can drop the changes to the existing
>> functions and just use the new flags for the follow-on patch.
>
> IMO the net pessimization is a little hard to swallow, although it probably 
> doesn't matter much in practice.  I'd suggest adding the new logic in every 
> case, but keeping the fast path when it's a nop:
>
>  enum rtx_code
>  swap_condition (enum rtx_code code)
>  {
>   /* Deal with the trivial cases first.  */
>   switch (code)
> {
> case EQ:
> case NE:
> case UNORDERED:
> case ORDERED:
> case UNEQ:
> case LTGT:
>   return code;
> default:
>   break;
> }
>
>   unsigned int flags = condition_to_flags (code);
>   flags = ((flags & ~(FLAGS_GT | FLAGS_LT))
>   | (flags & FLAGS_GT ? FLAGS_LT : 0)
>   | (flags & FLAGS_LT ? FLAGS_GT : 0));
>   return flags_to_condition (flags, true);
>  }
>
> OK with this additional change.

I'm not sure using flags_to_condition really buys anything then,
since you have to think about each individual case to see whether
it belongs in the switch or not.  I also don't have any proof
that the no-op cases are the common ones (since adding this
fast path of course slows down the others).

I think I'll just use the new routines for the new optimisation
and leave the existing ones as-is.

Thanks,
Richard


Re: -Wmissing-attributes: avoid duplicates and false positives

2019-07-17 Thread Alexandre Oliva
On Jul 17, 2019, Martin Sebor  wrote:

> Isn't this test sufficient to avoid the problems?

> if (!k && kmax > 1)
>   continue;

It is, unless someone (i) doesn't realize attributes that are present in
the type can't be present in the decl, (ii) misreads the '!k' as just
'k', and (iii) uses the wrong toolchain to confirm the consequences of
the misreading ;-/  doh!

> Can you put together a test case that does do the wrong thing?

I'm afraid not, all of the additional errors I observed were correctly
covered by the test I misunderstood once I grasped the actual logic.
Sorry about the, erhm, false positive ;-)


> The change looks cleaner than the cumbersome code that's there
> now so I have no problem with it but I'm not sure it does more
> than the test above

Would you like me to put it in regardless?
Does it make sense to put the testcase in anyway?

-- 
Alexandre Oliva, freedom fighter  he/him   https://FSFLA.org/blogs/lxo
Be the change, be Free! FSF Latin America board member
GNU Toolchain EngineerFree Software Evangelist
Hay que enGNUrecerse, pero sin perder la terGNUra jamás - Che GNUevara


Re: -Wmissing-attributes: avoid duplicates and false positives

2019-07-17 Thread Martin Sebor

On 7/17/19 1:14 PM, Alexandre Oliva wrote:

On Jul 17, 2019, Martin Sebor  wrote:


Isn't this test sufficient to avoid the problems?



  if (!k && kmax > 1)
continue;


It is, unless someone (i) doesn't realize attributes that are present in
the type can't be present in the decl, (ii) misreads the '!k' as just
'k', and (iii) uses the wrong toolchain to confirm the consequences of
the misreading ;-/  doh!


Can you put together a test case that does do the wrong thing?


I'm afraid not, all of the additional errors I observed were correctly
covered by the test I misunderstood once I grasped the actual logic.
Sorry about the, erhm, false positive ;-)


No worries.  The purpose of the test above is far from intuitive
(even to me now).


The change looks cleaner than the cumbersome code that's there
now so I have no problem with it but I'm not sure it does more
than the test above


Would you like me to put it in regardless?


Sure, if it's worthwhile to you I think it's an improvement even
if it doesn't fix a bug.  (In full disclosure I'm not empowered
to formally approve bigger patches but I think cleanups like this
can safely be committed as obvious.)


Does it make sense to put the testcase in anyway?


If it isn't already covered by one of the existing tests I'd say
definitely.  I also tried the following while playing with it so
if this variation isn't being exercised either it might be worth
adding to the new test as well:

  template 
  f_type
  missing_nothing2;

  template <>
  void *
  ATTR ((alloc_size (1)))
  missing_nothing2(int);

It's your call.

Thanks!
Martin


Re: C++ PATCH to detect narrowing in case values (PR c++/90805)

2019-07-17 Thread Marek Polacek
On Wed, Jul 03, 2019 at 01:57:06PM -0400, Jason Merrill wrote:
> On 7/3/19 10:13 AM, Marek Polacek wrote:
> > On Sat, Jun 22, 2019 at 11:28:36PM -0400, Jason Merrill wrote:
> > > On 6/13/19 5:03 PM, Marek Polacek wrote:
> > > > Case values are converted constant expressions, so narrowing conversion 
> > > > is not
> > > > permitted.  This patch adds detecting narrowing to case_conversion; 
> > > > it's a
> > > > handy spot because we have both the value and the (adjusted) type of the
> > > > condition.
> > > 
> > > Is there a reason not to use build_converted_constant_expr?
> > 
> > The function comment says "Note that if TYPE and VALUE are already integral
> > we don't really do the conversion because the language-independent
> > warning/optimization code will work better that way" so I avoided adding any
> > conversions.
> 
> > What I could do is to, instead of calling check_narrowing, call
> > build_converted_constant_expr (type, value, tf_warning_or_error);
> > and not use its result, but I'm not sure what the benefits would be.
> 
> I was thinking about using it instead of the current
> perform_implicit_conversion_flags, so we get the somewhat different
> constraints on the conversion.  And then it becomes simpler to use it
> unconditionally but throw the result away in the easy case.

Ah, I see.  So something like this?

Bootstrapped/regtested on x86_64-linux, ok for trunk?

2019-07-17  Marek Polacek  

PR c++/90805 - detect narrowing in case values.
* decl.c (case_conversion): Detect narrowing in case values.

* c-c++-common/pr89888.c: Update expected dg-error.
* g++.dg/cpp0x/Wnarrowing17.C: New test.
* g++.dg/cpp0x/enum28.C: Update expected dg-error.

diff --git gcc/cp/decl.c gcc/cp/decl.c
index dbcf681c783..2d3ffdfbb54 100644
--- gcc/cp/decl.c
+++ gcc/cp/decl.c
@@ -3630,16 +3630,23 @@ case_conversion (tree type, tree value)
 
   value = mark_rvalue_use (value);
 
+  if (INTEGRAL_OR_UNSCOPED_ENUMERATION_TYPE_P (type))
+type = type_promotes_to (type);
+
+  tree ovalue = value;
+  /* The constant-expression VALUE shall be a converted constant expression
+ of the adjusted type of the switch condition, which doesn't allow
+ narrowing conversions.  */
+  value = build_converted_constant_expr (type, value, tf_warning_or_error);
+
   if (cxx_dialect >= cxx11
   && (SCOPED_ENUM_P (type)
- || !INTEGRAL_OR_UNSCOPED_ENUMERATION_TYPE_P (TREE_TYPE (value
-{
-  if (INTEGRAL_OR_UNSCOPED_ENUMERATION_TYPE_P (type))
-   type = type_promotes_to (type);
-  value = (perform_implicit_conversion_flags
-  (type, value, tf_warning_or_error,
-   LOOKUP_IMPLICIT | LOOKUP_NO_NON_INTEGRAL));
-}
+ || !INTEGRAL_OR_UNSCOPED_ENUMERATION_TYPE_P (TREE_TYPE (ovalue
+/* Use the converted value.  */;
+  else
+/* The already integral case.  */
+value = ovalue;
+
   return cxx_constant_value (value);
 }
 
diff --git gcc/testsuite/c-c++-common/pr89888.c 
gcc/testsuite/c-c++-common/pr89888.c
index d9e11d6f26a..f14881ca052 100644
--- gcc/testsuite/c-c++-common/pr89888.c
+++ gcc/testsuite/c-c++-common/pr89888.c
@@ -11,8 +11,8 @@ foo (unsigned char x)
 {
 case -1: y = -1; break;/* { dg-message "previously 
used here" } */
/* { dg-warning "case label 
value is less than minimum value for type" "" { target *-*-* } .-1 } */
-case 0x: y = 0x; break;/* { dg-error "duplicate case 
value" } */
-case ~0U: y = ~0U; break;  /* { dg-error "duplicate case 
value" } */
+case 0x: y = 0x; break;/* { dg-error "duplicate case 
value|narrowing" } */
+case ~0U: y = ~0U; break;  /* { dg-error "duplicate case 
value|narrowing" } */
 }
 }
 
diff --git gcc/testsuite/g++.dg/cpp0x/Wnarrowing17.C 
gcc/testsuite/g++.dg/cpp0x/Wnarrowing17.C
new file mode 100644
index 000..064de531cb3
--- /dev/null
+++ gcc/testsuite/g++.dg/cpp0x/Wnarrowing17.C
@@ -0,0 +1,19 @@
+// PR c++/90805 - detect narrowing in case values.
+// { dg-do compile { target c++11 } }
+
+void f(int i, char c, unsigned u)
+{
+  switch (i)
+{
+case 2149056512u:; // { dg-error "narrowing conversion of .2149056512. 
from .unsigned int. to .int." }
+case (long long int) 1e10:; // { dg-error "narrowing conversion of 
.100. from .long long int. to .int." }
+// { dg-warning "overflow in conversion" "overflow" { target *-*-* } .-1 }
+}
+
+  switch (c)
+// No narrowing, the adjusted type is int.
+case 300:; // { dg-warning "exceeds maximum value for type" }
+
+  switch (u)
+case -42:; // { dg-error "narrowing conversion of .-42. from .int. to 
.unsigned int." }
+}
diff --git gcc/testsuite/g++.dg/cpp0x/enum28.C 
gcc/testsuite/g++.dg/cpp0x/enum28.C
index 3967699dd03..bfebde57cb3 100644
--- gcc/testsuite/g++.dg/cpp0x/enum28.C
+++ gcc/testsuite/g++.dg/cpp0x/enum28.C
@@ -7,11

Re: [PATCH 2/5, OpenACC] Support Fortran optional arguments in the firstprivate clause

2019-07-17 Thread Tobias Burnus
Am 17.07.19 um 19:53 schrieb Kwok Cheung Yeung:
> On 12/07/2019 12:41 pm, Jakub Jelinek wrote:
>> This should be done through a langhook.
>> Are really all PARM_DECLs wtih DECL_BY_REFERENCE and pointer type
>> optional
>> arguments?  I mean, POINTER_TYPE is used for a lot of cases.
>
> Hmmm... I thought it was the case that if you pass an argument in by
> reference (the default) in Fortran, the PARM_DECL will always be a
> reference to the argument type if non-optional, or a pointer if
> optional. However, fixed-shape arrays are passed in via a pointer
> whether optional or not...

[I have to admit that I have not yet read the OpenACC (nor OpenMP 5)
spec to know the semantics and whether it matters if something is a true
pointer or just optional.]


The following is a rather special case (matching a C "void *" pointer),
which is useless without later casting the argument ("c_f_pointer"
call), but it is a pointer argument which is not by reference and not
optional.


use iso_c_binding
implicit none
call foo(c_null_ptr)
contains
  subroutine foo(x)
    type(c_ptr), value :: x ! Matches a C 'void *' pointer
  end subroutine foo
end

Maybe there are more methods, but that requires some pondering.


> In the Fortran FE, optional arguments are indicated by setting
> attr.optional on the gfc_symbol for the parameter, but the OMP
> lowering works on a tree - is it somehow possible to get from the tree
> back to the gfc_symbol? If so, that would be a more reliable method of
> detecting optional arguments. 

The gfc_symbol etc. is gone. The only possibility is to store some extra
data in the language-dependent part of the tree, i.e. using
DECL_LANG_SPECIFIC. Cf. lang_decl in trans.h and the various #defines
which use DECL_LANG_SPECIFIC.

Cheers,

Tobias



Re: Rewrite some jump.c routines to use flags

2019-07-17 Thread Eric Botcazou
> I'm not sure using flags_to_condition really buys anything then,
> since you have to think about each individual case to see whether
> it belongs in the switch or not.  I also don't have any proof
> that the no-op cases are the common ones (since adding this
> fast path of course slows down the others).

Really?  Branch prediction is rather efficient these days.

> I think I'll just use the new routines for the new optimisation
> and leave the existing ones as-is.

OK, your call.

-- 
Eric Botcazou


[committed] Fix indentation goof in tree-ssa-dse.c

2019-07-17 Thread Jeff Law

I've been poking at a bit at some missed cases in tree-ssa-dse.c and
noticed the formatting for a couple of case statements was wrong.  There
was also a missing return/break would led to a fallthru warning in some
of the work I did.

There's no functional changes here.  Bootstrapped and regression tested
on x86-64.  Installing on the trunk.

jeff
diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index c43cf95f4e0..6d8b082a718 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,9 @@
+2019-07-17  Jeff Law  
+
+   * tree-ssa-dse.c (initialize_ao_ref_for_dse): Fix formatting.
+   (dse_walker::dse_optimize_stmt): Likewise.  Add missing return to
+   avoid unexpected switch statement fallthru.
+
 2019-07-17  Uroš Bizjak  
 
* config/i386/i386.md (*add3_doubleword):
diff --git a/gcc/tree-ssa-dse.c b/gcc/tree-ssa-dse.c
index df05a55ce78..9bdcf9ae6af 100644
--- a/gcc/tree-ssa-dse.c
+++ b/gcc/tree-ssa-dse.c
@@ -107,42 +107,42 @@ initialize_ao_ref_for_dse (gimple *stmt, ao_ref *write)
 {
   switch (DECL_FUNCTION_CODE (gimple_call_fndecl (stmt)))
{
- case BUILT_IN_MEMCPY:
- case BUILT_IN_MEMMOVE:
- case BUILT_IN_MEMSET:
- case BUILT_IN_MEMCPY_CHK:
- case BUILT_IN_MEMMOVE_CHK:
- case BUILT_IN_MEMSET_CHK:
-   {
- tree size = NULL_TREE;
- if (gimple_call_num_args (stmt) == 3)
-   size = gimple_call_arg (stmt, 2);
- tree ptr = gimple_call_arg (stmt, 0);
- ao_ref_init_from_ptr_and_size (write, ptr, size);
- return true;
-   }
+   case BUILT_IN_MEMCPY:
+   case BUILT_IN_MEMMOVE:
+   case BUILT_IN_MEMSET:
+   case BUILT_IN_MEMCPY_CHK:
+   case BUILT_IN_MEMMOVE_CHK:
+   case BUILT_IN_MEMSET_CHK:
+ {
+   tree size = NULL_TREE;
+   if (gimple_call_num_args (stmt) == 3)
+ size = gimple_call_arg (stmt, 2);
+   tree ptr = gimple_call_arg (stmt, 0);
+   ao_ref_init_from_ptr_and_size (write, ptr, size);
+   return true;
+ }
 
- /* A calloc call can never be dead, but it can make
-subsequent stores redundant if they store 0 into
-the same memory locations.  */
- case BUILT_IN_CALLOC:
-   {
- tree nelem = gimple_call_arg (stmt, 0);
- tree selem = gimple_call_arg (stmt, 1);
- tree lhs;
- if (TREE_CODE (nelem) == INTEGER_CST
- && TREE_CODE (selem) == INTEGER_CST
- && (lhs = gimple_call_lhs (stmt)) != NULL_TREE)
-   {
- tree size = fold_build2 (MULT_EXPR, TREE_TYPE (nelem),
-  nelem, selem);
- ao_ref_init_from_ptr_and_size (write, lhs, size);
- return true;
-   }
-   }
+   /* A calloc call can never be dead, but it can make
+  subsequent stores redundant if they store 0 into
+  the same memory locations.  */
+   case BUILT_IN_CALLOC:
+ {
+   tree nelem = gimple_call_arg (stmt, 0);
+   tree selem = gimple_call_arg (stmt, 1);
+   tree lhs;
+   if (TREE_CODE (nelem) == INTEGER_CST
+   && TREE_CODE (selem) == INTEGER_CST
+   && (lhs = gimple_call_lhs (stmt)) != NULL_TREE)
+ {
+   tree size = fold_build2 (MULT_EXPR, TREE_TYPE (nelem),
+nelem, selem);
+   ao_ref_init_from_ptr_and_size (write, lhs, size);
+   return true;
+ }
+ }
 
- default:
-   break;
+   default:
+ break;
}
 }
   else if (is_gimple_assign (stmt))
@@ -964,57 +964,58 @@ dse_dom_walker::dse_optimize_stmt (gimple_stmt_iterator 
*gsi)
   tree fndecl = gimple_call_fndecl (stmt);
   switch (DECL_FUNCTION_CODE (fndecl))
{
- case BUILT_IN_MEMCPY:
- case BUILT_IN_MEMMOVE:
- case BUILT_IN_MEMSET:
- case BUILT_IN_MEMCPY_CHK:
- case BUILT_IN_MEMMOVE_CHK:
- case BUILT_IN_MEMSET_CHK:
-   {
- /* Occasionally calls with an explicit length of zero
-show up in the IL.  It's pointless to do analysis
-on them, they're trivially dead.  */
- tree size = gimple_call_arg (stmt, 2);
- if (integer_zerop (size))
-   {
- delete_dead_or_redundant_call (gsi, "dead");
- return;
-   }
-
- /* If this is a memset call that initializes an object
-to zero, it may be redundant with an earlier memset
-or empty CONSTRUCTOR of a larger object.  */
- if ((DECL_FUNCTION_CODE (fndecl) == BUILT_IN_MEMSET
-  || DECL_FUNCTION_CODE (fndecl) == BUILT_IN_MEMSET_CHK)
- && integer_zerop (gimple_call_arg (stmt, 1))

[PATCH 00/10, OpenACC] Rework handling of OpenACC kernels regions

2019-07-17 Thread Kwok Cheung Yeung
This series of patches reworks the way that OpenACC kernels regions are 
processed by GCC. Instead of relying on the parloops pass for 
auto-parallelisation of the kernel region, the contents of the region are 
transformed into a sequence of offloaded regions, which are then processed 
individually.


Tested on an x86_64 host, with offloading to a Nvidia Tesla K20c card.

Okay for trunk?

Thanks

Kwok


[PATCH 01/10, OpenACC] Use "-fopenacc-kernels=parloops" to document "parloops" test cases

2019-07-17 Thread Kwok Cheung Yeung
This patch introduces a new option "-fopenacc-kernels" to control how OpenACC 
kernels are processed. The current behaviour will be equivalent to 
'-fopenacc-kernels=parloops'.


2019-07-16  Thomas Schwinge  

gcc/
* flag-types.h (enum openacc_kernels): New type.

gcc/c-family/
* c.opt (fopenacc-kernels): New flag.

gcc/fortran/
* lang.opt (fopenacc-kernels): New flag.

gcc/testsuite/
* c-c++-common/goacc/kernels-1.c: Add
"-fopenacc-kernels=parloops".
* c-c++-common/goacc/kernels-alias-2.c: Likewise.
* c-c++-common/goacc/kernels-alias-3.c: Likewise.
* c-c++-common/goacc/kernels-alias-4.c: Likewise.
* c-c++-common/goacc/kernels-alias-5.c: Likewise.
* c-c++-common/goacc/kernels-alias-6.c: Likewise.
* c-c++-common/goacc/kernels-alias-7.c: Likewise.
* c-c++-common/goacc/kernels-alias-8.c: Likewise.
* c-c++-common/goacc/kernels-alias-ipa-pta-2.c: Likewise.
* c-c++-common/goacc/kernels-alias-ipa-pta-3.c: Likewise.
* c-c++-common/goacc/kernels-alias-ipa-pta-4.c: Likewise.
* c-c++-common/goacc/kernels-alias-ipa-pta.c: Likewise.
* c-c++-common/goacc/kernels-alias.c: Likewise.
* c-c++-common/goacc/kernels-counter-var-redundant-load.c:
Likewise.
* c-c++-common/goacc/kernels-counter-vars-function-scope.c:
Likewise.
* c-c++-common/goacc/kernels-double-reduction-n.c: Likewise.
* c-c++-common/goacc/kernels-double-reduction.c: Likewise.
* c-c++-common/goacc/kernels-loop-2.c: Likewise.
* c-c++-common/goacc/kernels-loop-3.c: Likewise.
* c-c++-common/goacc/kernels-loop-data-2.c: Likewise.
* c-c++-common/goacc/kernels-loop-data-enter-exit-2.c: Likewise.
* c-c++-common/goacc/kernels-loop-data-enter-exit.c: Likewise.
* c-c++-common/goacc/kernels-loop-data-update.c: Likewise.
* c-c++-common/goacc/kernels-loop-data.c: Likewise.
* c-c++-common/goacc/kernels-loop-g.c: Likewise.
* c-c++-common/goacc/kernels-loop-mod-not-zero.c: Likewise.
* c-c++-common/goacc/kernels-loop-n.c: Likewise.
* c-c++-common/goacc/kernels-loop-nest.c: Likewise.
* c-c++-common/goacc/kernels-loop.c: Likewise.
* c-c++-common/goacc/kernels-one-counter-var.c: Likewise.
* c-c++-common/goacc/kernels-parallel-loop-data-enter-exit.c:
Likewise.
* c-c++-common/goacc/kernels-reduction.c: Likewise.
* gfortran.dg/goacc/kernels-alias-2.f95: Likewise.
* gfortran.dg/goacc/kernels-alias-3.f95: Likewise.
* gfortran.dg/goacc/kernels-alias-4.f95: Likewise.
* gfortran.dg/goacc/kernels-alias.f95: Likewise.
* gfortran.dg/goacc/kernels-loop-2.f95: Likewise.
* gfortran.dg/goacc/kernels-loop-data-2.f95: Likewise.
* gfortran.dg/goacc/kernels-loop-data-enter-exit-2.f95: Likewise.
* gfortran.dg/goacc/kernels-loop-data-enter-exit.f95: Likewise.
* gfortran.dg/goacc/kernels-loop-data-update.f95: Likewise.
* gfortran.dg/goacc/kernels-loop-data.f95: Likewise.
* gfortran.dg/goacc/kernels-loop-inner.f95: Likewise.
* gfortran.dg/goacc/kernels-loop-n.f95: Likewise.
* gfortran.dg/goacc/kernels-loop.f95: Likewise.
* gfortran.dg/goacc/kernels-loops-adjacent.f95: Likewise.
* gfortran.dg/goacc/kernels-parallel-loop-data-enter-exit.f95:
Likewise.

libgomp/
* testsuite/libgomp.oacc-c-c++-common/declare-vla.c: Likewise.
* testsuite/libgomp.oacc-c-c++-common/kernels-alias-ipa-pta-2.c:
Add "-fopenacc-kernels=parloops".
* testsuite/libgomp.oacc-c-c++-common/kernels-alias-ipa-pta-3.c:
Likewise.
* testsuite/libgomp.oacc-c-c++-common/kernels-alias-ipa-pta.c:
Likewise.
* testsuite/libgomp.oacc-c-c++-common/kernels-empty.c: Likewise.
* testsuite/libgomp.oacc-c-c++-common/kernels-loop-2.c: Likewise.
* testsuite/libgomp.oacc-c-c++-common/kernels-loop-3.c: Likewise.
* testsuite/libgomp.oacc-c-c++-common/kernels-loop-and-seq-2.c:
Likewise.
* testsuite/libgomp.oacc-c-c++-common/kernels-loop-and-seq-3.c:
Likewise.
* testsuite/libgomp.oacc-c-c++-common/kernels-loop-and-seq-4.c:
Likewise.
* testsuite/libgomp.oacc-c-c++-common/kernels-loop-and-seq-5.c:
Likewise.
* testsuite/libgomp.oacc-c-c++-common/kernels-loop-and-seq-6.c:
Likewise.
* testsuite/libgomp.oacc-c-c++-common/kernels-loop-and-seq.c:
Likewise.
* testsuite/libgomp.oacc-c-c++-common/kernels-loop-collapse.c:
Likewise.
* testsuite/libgomp.oacc-c-c++-common/kernels-loop-data-2.c:
Likewise.
* testsuite/libgomp.oacc-c-c++-common/kernels-loop-data-enter-exit-2.c:
Likewise.
* testsuite/libgomp.oacc-c-c++-common/kernels-loop-data-enter-exit.c:
Likewise.

[PATCH 02/10, OpenACC] Add OpenACC target kinds for decomposed kernels regions

2019-07-17 Thread Kwok Cheung Yeung
This patch is in preparation for changes that will cut up OpenACC kernels 
regions into individual parts. For the new sub-regions that will be generated, 
this adds the following new kinds of OpenACC regions for internal use:


- GF_OMP_TARGET_KIND_OACC_PARALLEL_KERNELS_PARALLELIZED for parts of kernels 
regions to be executed in gang-redundant mode
- GF_OMP_TARGET_KIND_OACC_PARALLEL_KERNELS_GANG_SINGLE for parts of kernels 
regions to be executed in gang-single mode
- GF_OMP_TARGET_KIND_OACC_DATA_KERNELS for data regions generated around the 
body of a kernels region


2019-07-16  Thomas Schwinge  

gcc/
* gimple.h (enum gf_mask): Add new target kinds
GF_OMP_TARGET_KIND_OACC_PARALLEL_KERNELS_PARALLELIZED,
GF_OMP_TARGET_KIND_OACC_PARALLEL_KERNELS_GANG_SINGLE, and
GF_OMP_TARGET_KIND_OACC_DATA_KERNELS.
(is_gimple_omp_oacc): Handle new target kinds.
(is_gimple_omp_offloaded): Likewise.
* gimple-pretty-print.c (dump_gimple_omp_target): Likewise.
* omp-expand.c (expand_omp_target): Likewise.
(build_omp_regions_1): Likewise.
(omp_make_gimple_edges): Likewise.
* omp-low.c (is_oacc_parallel_or_serial): Likewise.
(was_originally_oacc_kernels): New function.
(scan_omp_for): Update check for illegal nesting.
(check_omp_nesting_restrictions): Handle new target kinds.
(lower_oacc_reductions): Likewise.
(lower_omp_target): Likewise.
* omp-offload.c (execute_oacc_device_lower): Likewise.
---
 gcc/gimple-pretty-print.c |  9 +
 gcc/gimple.h  | 14 +
 gcc/omp-expand.c  | 34 
 gcc/omp-low.c | 50 ++-
 gcc/omp-offload.c | 20 +++
 5 files changed, 118 insertions(+), 9 deletions(-)

diff --git a/gcc/gimple-pretty-print.c b/gcc/gimple-pretty-print.c
index ce339ee..cf4d0e0 100644
--- a/gcc/gimple-pretty-print.c
+++ b/gcc/gimple-pretty-print.c
@@ -1691,6 +1691,15 @@ dump_gimple_omp_target (pretty_printer *buffer, 
gomp_target *gs,

 case GF_OMP_TARGET_KIND_OACC_HOST_DATA:
   kind = " oacc_host_data";
   break;
+case GF_OMP_TARGET_KIND_OACC_PARALLEL_KERNELS_PARALLELIZED:
+  kind = " oacc_parallel_kernels_parallelized";
+  break;
+case GF_OMP_TARGET_KIND_OACC_PARALLEL_KERNELS_GANG_SINGLE:
+  kind = " oacc_parallel_kernels_gang_single";
+  break;
+case GF_OMP_TARGET_KIND_OACC_DATA_KERNELS:
+  kind = " oacc_data_kernels";
+  break;
 default:
   gcc_unreachable ();
 }
diff --git a/gcc/gimple.h b/gcc/gimple.h
index 47070e7..d8423be 100644
--- a/gcc/gimple.h
+++ b/gcc/gimple.h
@@ -184,6 +184,15 @@ enum gf_mask {
 GF_OMP_TARGET_KIND_OACC_ENTER_EXIT_DATA = 9,
 GF_OMP_TARGET_KIND_OACC_DECLARE = 10,
 GF_OMP_TARGET_KIND_OACC_HOST_DATA = 11,
+/* A GF_OMP_TARGET_KIND_OACC_PARALLEL that originates from a 'kernels'
+   construct, parallelized.  */
+GF_OMP_TARGET_KIND_OACC_PARALLEL_KERNELS_PARALLELIZED = 12,
+/* A GF_OMP_TARGET_KIND_OACC_PARALLEL that originates from a 'kernels'
+   construct, "gang-single".  */
+GF_OMP_TARGET_KIND_OACC_PARALLEL_KERNELS_GANG_SINGLE = 13,
+/* A GF_OMP_TARGET_KIND_OACC_DATA that originates from a 'kernels'
+   construct.  */
+GF_OMP_TARGET_KIND_OACC_DATA_KERNELS = 14,
 GF_OMP_TEAMS_GRID_PHONY= 1 << 0,
 GF_OMP_TEAMS_HOST  = 1 << 1,

@@ -6479,6 +6488,9 @@ is_gimple_omp_oacc (const gimple *stmt)
case GF_OMP_TARGET_KIND_OACC_ENTER_EXIT_DATA:
case GF_OMP_TARGET_KIND_OACC_DECLARE:
case GF_OMP_TARGET_KIND_OACC_HOST_DATA:
+   case GF_OMP_TARGET_KIND_OACC_PARALLEL_KERNELS_PARALLELIZED:
+   case GF_OMP_TARGET_KIND_OACC_PARALLEL_KERNELS_GANG_SINGLE:
+   case GF_OMP_TARGET_KIND_OACC_DATA_KERNELS:
  return true;
default:
  return false;
@@ -6503,6 +6515,8 @@ is_gimple_omp_offloaded (const gimple *stmt)
case GF_OMP_TARGET_KIND_REGION:
case GF_OMP_TARGET_KIND_OACC_PARALLEL:
case GF_OMP_TARGET_KIND_OACC_KERNELS:
+   case GF_OMP_TARGET_KIND_OACC_PARALLEL_KERNELS_PARALLELIZED:
+   case GF_OMP_TARGET_KIND_OACC_PARALLEL_KERNELS_GANG_SINGLE:
  return true;
default:
  return false;
diff --git a/gcc/omp-expand.c b/gcc/omp-expand.c
index c007ec1..7e4d5a8 100644
--- a/gcc/omp-expand.c
+++ b/gcc/omp-expand.c
@@ -7914,6 +7914,8 @@ expand_omp_target (struct omp_region *region)
 case GF_OMP_TARGET_KIND_ENTER_DATA:
 case GF_OMP_TARGET_KIND_EXIT_DATA:
 case GF_OMP_TARGET_KIND_OACC_PARALLEL:
+case GF_OMP_TARGET_KIND_OACC_PARALLEL_KERNELS_PARALLELIZED:
+case GF_OMP_TARGET_KIND_OACC_PARALLEL_KERNELS_GANG_SINGLE:
 case GF_OMP_TARGET_KIND_OACC_KERNELS:
 case GF_OMP_TARGET_KIND_OACC_UPDATE:
 case GF_OMP_TARGET_KIND_OACC_ENTER_EXIT_DATA:
@@ -7923,6 +7925,7 @@ expand_omp_target (struct omp_regio

[PATCH 03/10, OpenACC] Separate OpenACC kernels regions in data and parallel parts

2019-07-17 Thread Kwok Cheung Yeung
In the future, kernels regions will be transformed into data regions containing 
a sequence of serial and parallel offloaded regions. This first patch sets up a 
new pass that is responsible for this transformation, and in a first step 
constructs the new data region containing a parallel region with the original 
kernels region's body.


2019-07-16  Gergö Barany  

gcc/
* Makefile.in: Add...
* omp-oacc-kernels.c: ... this new file for the kernels conversion
pass.
* flag-types.h (enum openacc_kernels): Add "split" style.  Adjust
all users.
* doc/invoke.texi (-fopenacc-kernels): Update.
* passes.def: Add pass_convert_oacc_kernels to pipeline.
* tree-pass.h (make_pass_convert_oacc_kernels): Add declaration.

gcc/c-family/
* c.opt (fopenacc-kernels): Document.  Add 'split' option.

gcc/fortran/
* lang.opt (fopenacc-kernels): Document.

gcc/testsuite/
* c-c++-common/goacc/kernels-conversion.c: New test.
* gfortran.dg/goacc/kernels-conversion.f95: Likewise.
* c-c++-common/goacc/if-clause-2.c: Update.
* gfortran.dg/goacc/kernels-tree.f95: Likewise.
---
 gcc/Makefile.in|   2 +
 gcc/c-family/c.opt |   6 +-
 gcc/doc/invoke.texi|  13 +-
 gcc/flag-types.h   |   1 +
 gcc/fortran/lang.opt   |   3 +-
 gcc/omp-oacc-kernels.c | 245 +
 gcc/passes.def |   1 +
 gcc/testsuite/c-c++-common/goacc/if-clause-2.c |   7 +
 .../c-c++-common/goacc/kernels-conversion.c|  36 +++
 .../gfortran.dg/goacc/kernels-conversion.f95   |  33 +++
 gcc/testsuite/gfortran.dg/goacc/kernels-tree.f95   |   6 +
 gcc/tree-pass.h|   1 +
 12 files changed, 351 insertions(+), 3 deletions(-)
 create mode 100644 gcc/omp-oacc-kernels.c
 create mode 100644 gcc/testsuite/c-c++-common/goacc/kernels-conversion.c
 create mode 100644 gcc/testsuite/gfortran.dg/goacc/kernels-conversion.f95

diff --git a/gcc/Makefile.in b/gcc/Makefile.in
index 597dc01..82537f6 100644
--- a/gcc/Makefile.in
+++ b/gcc/Makefile.in
@@ -1432,6 +1432,7 @@ OBJS = \
omp-general.o \
omp-grid.o \
omp-low.o \
+   omp-oacc-kernels.o \
omp-simd-clone.o \
opt-problem.o \
optabs.o \
@@ -2560,6 +2561,7 @@ GTFILES = $(CPPLIB_H) $(srcdir)/input.h 
$(srcdir)/coretypes.h \

   $(srcdir)/omp-offload.c \
   $(srcdir)/omp-expand.c \
   $(srcdir)/omp-low.c \
+  $(srcdir)/omp-oacc-kernels.c \
   $(srcdir)/targhooks.c $(out_file) $(srcdir)/passes.c $(srcdir)/cgraphunit.c \
   $(srcdir)/cgraphclones.c \
   $(srcdir)/tree-phinodes.c \
diff --git a/gcc/c-family/c.opt b/gcc/c-family/c.opt
index 4bdacb6..a193875 100644
--- a/gcc/c-family/c.opt
+++ b/gcc/c-family/c.opt
@@ -1689,12 +1689,16 @@ C ObjC C++ ObjC++ LTO Joined Var(flag_openacc_dims)
 Specify default OpenACC compute dimensions.

 fopenacc-kernels=
-C ObjC C++ ObjC++ RejectNegative Joined Enum(openacc_kernels) 
Var(flag_openacc_kernels) Init(OPENACC_KERNELS_PARLOOPS) Undocumented
+C ObjC C++ ObjC++ RejectNegative Joined Enum(openacc_kernels) 
Var(flag_openacc_kernels) Init(OPENACC_KERNELS_PARLOOPS)

+-fopenacc-kernels=[split|parloops] Configure OpenACC 'kernels' constructs 
handling.

 Enum
 Name(openacc_kernels) Type(enum openacc_kernels)

 EnumValue
+Enum(openacc_kernels) String(split) Value(OPENACC_KERNELS_SPLIT)
+
+EnumValue
 Enum(openacc_kernels) String(parloops) Value(OPENACC_KERNELS_PARLOOPS)

 fopenmp
diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
index 0c20cb6..ec98ab6 100644
--- a/gcc/doc/invoke.texi
+++ b/gcc/doc/invoke.texi
@@ -198,7 +198,7 @@ in the following sections.
 -aux-info @var{filename}  -fallow-parameterless-variadic-functions @gol
 -fno-asm  -fno-builtin  -fno-builtin-@var{function}  -fgimple@gol
 -fhosted  -ffreestanding @gol
--fopenacc  -fopenacc-dim=@var{geom} @gol
+-fopenacc  -fopenacc-dim=@var{geom}  -fopenacc-kernels=@var{style} @gol
 -fopenmp  -fopenmp-simd @gol
 -fms-extensions  -fplan9-extensions  -fsso-struct=@var{endianness} @gol
 -fallow-single-precision  -fcond-mismatch  -flax-vector-conversions @gol
@@ -2193,6 +2193,17 @@ not explicitly specify.  The @var{geom} value is a 
triple of
 ':'-separated sizes, in order 'gang', 'worker' and, 'vector'.  A size
 can be omitted, to use a target-specific default value.

+@item -fopenacc-kernels=@var{style}
+@opindex fopenacc-kernels
+@cindex OpenACC accelerator programming
+Configure OpenACC 'kernels' constructs handling.
+With @option{-fopenacc-kernels=split}, OpenACC 'kernels' constructs
+are split into a sequence of compute constructs, each then handled
+individually.
+With @option{-fopenacc-kernels=parloops}, the whole OpenACC
+'kernels' constructs is handled by the @samp{parloops} pa

[PATCH 04/10, OpenACC] Turn OpenACC kernels regions into a sequence of, parallel regions

2019-07-17 Thread Kwok Cheung Yeung

This patch decomposes each OpenACC kernels region into a sequence of
parallel regions. Each OpenACC loop nest turns into its own region; any code 
between such loop nests is gathered up into a region as well. The loop regions 
can be distributed across gangs if the original kernels region had a num_gangs 
clause, while the other regions are executed in "gang-single" mode. The implied 
default "auto" clause on kernels loops is made explicit unless there is a 
conflicting clause.


2019-07-16  Gergö Barany  

gcc/
* omp-oacc-kernels.c (top_level_omp_for_in_stmt): New function.
(make_gang_single_region): Likewise.
(transform_kernels_loop_clauses, make_gang_parallel_loop_region):
Likewise.
(flatten_binds): Likewise.
(make_data_region_try_statement): Likewise.
(maybe_build_inner_data_region): Likewise.
(decompose_kernels_region_body): Likewise.
(transform_kernels_region): Delegate to decompose_kernels_region_body
and make_data_region_try_statement.

gcc/testsuite/
* c-c++-common/goacc/kernels-conversion.c: Test for a gang-single
region.
* gfortran.dg/goacc/kernels-conversion.f95: Likewise.
---
 gcc/omp-oacc-kernels.c | 558 -
 .../c-c++-common/goacc/kernels-conversion.c|  11 +-
 .../gfortran.dg/goacc/kernels-conversion.f95   |  11 +-
 3 files changed, 557 insertions(+), 23 deletions(-)

diff --git a/gcc/omp-oacc-kernels.c b/gcc/omp-oacc-kernels.c
index d180377..6e08366 100644
--- a/gcc/omp-oacc-kernels.c
+++ b/gcc/omp-oacc-kernels.c
@@ -30,6 +30,7 @@ along with GCC; see the file COPYING3.  If not see
 #include "backend.h"
 #include "target.h"
 #include "tree.h"
+#include "cp/cp-tree.h"
 #include "gimple.h"
 #include "tree-pass.h"
 #include "cgraph.h"
@@ -45,16 +46,548 @@ along with GCC; see the file COPYING3.  If not see
For now, the translation is as follows:
- The entire kernels region is turned into a data region with clauses
  taken from the kernels region.  New "create" clauses are added for all
- variables declared at the top level in the kernels region.  */
+ variables declared at the top level in the kernels region.
+   - Any loop annotated with an OpenACC loop directive is wrapped in a new
+ parallel region.  Gang/worker/vector annotations are copied from the
+ original kernels region if present.
+ * Loops without an explicit "independent" or "seq" annotation get an
+   "auto" annotation; other annotations are preserved on the loop or
+   moved to the new surrounding parallel region.  Which annotations are
+   moved is determined by the constraints in the OpenACC spec; for
+   example, loops in the kernels region may have a gang clause, but
+   such annotations must now be moved to the new parallel region.
+   - Any sequences of other code (non-loops, non-OpenACC loops) are wrapped
+ in new "gang-single" parallel regions: Worker/vector annotations are
+ copied from the original kernels region if present, but num_gangs is
+ explicitly set to 1.  */
+
+/* Helper function for decompose_kernels_region_body.  If STMT contains a
+   "top-level" OMP_FOR statement, returns a pointer to that statement;
+   returns NULL otherwise.
+
+   A "top-level" OMP_FOR statement is one that is possibly accompanied by
+   small snippets of setup code.  Specifically, this function accepts an
+   OMP_FOR possibly wrapped in a singleton bind and a singleton try
+   statement to allow for a local loop variable, but not an OMP_FOR
+   statement nested in any other constructs.  Alternatively, it accepts a
+   non-singleton bind containing only assignments and then an OMP_FOR
+   statement at the very end.  The former style can be generated by the C
+   frontend, the latter by the Fortran frontend.  */
+
+static gimple *
+top_level_omp_for_in_stmt (gimple *stmt)
+{
+  if (gimple_code (stmt) == GIMPLE_OMP_FOR)
+return stmt;
+
+  if (gimple_code (stmt) == GIMPLE_BIND)
+{
+  gimple_seq body = gimple_bind_body (as_a  (stmt));
+  if (gimple_seq_singleton_p (body))
+{
+  /* Accept an OMP_FOR statement, or a try statement containing only
+ a single OMP_FOR.  */
+  gimple *maybe_for_or_try = gimple_seq_first_stmt (body);
+  if (gimple_code (maybe_for_or_try) == GIMPLE_OMP_FOR)
+return maybe_for_or_try;
+  else if (gimple_code (maybe_for_or_try) == GIMPLE_TRY)
+{
+  gimple_seq try_body = gimple_try_eval (maybe_for_or_try);
+  if (!gimple_seq_singleton_p (try_body))
+return NULL;
+  gimple *maybe_omp_for_stmt = gimple_seq_first_stmt (try_body);
+  if (gimple_code (maybe_omp_for_stmt) == GIMPLE_OMP_FOR)
+return maybe_omp_for_stmt;
+}
+}
+  else
+{
+  gimple_stmt_iterator gsi;
+  /* Acce

[PATCH 05/10, OpenACC] Handle conditional execution of loops in OpenACC, kernels regions

2019-07-17 Thread Kwok Cheung Yeung
Any OpenACC loop controlled by an if statement or a non-OpenACC loop must be 
executed in a gang-single region. Detecting such loops is not trivial as OpenACC 
kernels expansion is done on GIMPLE but before computation of the control flow 
graph. This patch adds an auxiliary analysis for determining whether a statement 
is inside a conditionally executed region (relative to the kernels region'sentry).


2019-07-16  Gergö Barany  

gcc/
* omp-oacc-kernels.c (control_flow_regions): New class.
(control_flow_regions::control_flow_regions): New constructor.
(control_flow_regions::is_unconditional_oacc_for_loop): New method.
(control_flow_regions::find_rep): Likewise.
(control_flow_regions::union_reps): Likewise.
(control_flow_regions::compute_regions): Likewise.
(decompose_kernels_region_body): Use test for conditional execution.

gcc/testsuite/
* c-c++-common/goacc/kernels-conversion.c: Add test for conditionally
executed code.
* gfortran.dg/goacc/kernels-conversion.f95: Likewise.
---
 gcc/omp-oacc-kernels.c | 216 -
 .../c-c++-common/goacc/kernels-conversion.c|  20 +-
 .../gfortran.dg/goacc/kernels-conversion.f95   |  21 +-
 3 files changed, 245 insertions(+), 12 deletions(-)

diff --git a/gcc/omp-oacc-kernels.c b/gcc/omp-oacc-kernels.c
index 6e08366..80a82fa 100644
--- a/gcc/omp-oacc-kernels.c
+++ b/gcc/omp-oacc-kernels.c
@@ -385,6 +385,208 @@ maybe_build_inner_data_region (location_t loc, 
gimple*body,
   return body;
 }

+/* Auxiliary analysis of the body of a kernels region, to determine for each
+   OpenACC loop whether it is control-dependent (i.e., not necessarily
+   executed every time the kernels region is entered) or not.
+   We say that a loop is control-dependent if there is some cond, switch, or
+   goto statement that jumps over it, forwards or backwards.  For example,
+   if the loop is controlled by an if statement, then a jump to the true
+   block, the false block, or from one of those blocks to the control flow
+   join point will necessarily jump over the loop.
+   This analysis implements an ad-hoc union-find data structure classifying
+   statements into "control-flow regions" as follows: Most statements are in
+   the same region as their predecessor, except that each OpenACC loop is in
+   a region of its own, and each OpenACC loop's successor starts a new
+   region.  We then unite the regions of any statements linked by jumps,
+   placing any cond, switch, or goto statement in the same region as its
+   target label(s).
+   In the end, control dependence of OpenACC loops can be determined by
+   comparing their immediate predecessor and successor statements' regions.
+   A jump crosses the loop if and only if the predecessor and successor are
+   in the same region.  (If there is no predecessor or successor, the loop
+   is executed unconditionally.)
+   The methods in this class identify statements by their index in the
+   kernels region's body.  */
+
+class control_flow_regions
+{
+  public:
+/* Initialize an instance and pre-compute the control-flow region
+   information for the statement sequence SEQ.  */
+control_flow_regions (gimple_seq seq);
+
+/* Return true if the STMT with the given index IDX in the analyzed
+   statement sequence is an unconditionally executed OpenACC loop.  */
+bool is_unconditional_oacc_for_loop (gimple *stmt, size_t idx);
+
+  private:
+/* Find the region representative for the statement identified by index
+   STMT_IDX.  */
+size_t find_rep (size_t stmt_idx);
+
+/* Union the regions containing the statements represented by
+   representatives A and B.  */
+void union_reps (size_t a, size_t b);
+
+/* Helper for the constructor.  Performs the actual computation of the
+   control-flow regions in the statement sequence SEQ.  */
+void compute_regions (gimple_seq seq);
+
+/* The mapping from statement indices to region representatives.  */
+vec  representatives;
+
+/* A cache mapping statement indices to a flag indicating whether the
+   statement is a top level OpenACC for loop.  */
+vec  omp_for_loops;
+};
+
+control_flow_regions::control_flow_regions (gimple_seq seq)
+{
+  representatives.create (1);
+  omp_for_loops.create (1);
+  compute_regions (seq);
+}
+
+bool
+control_flow_regions::is_unconditional_oacc_for_loop (gimple *stmt, size_tidx)
+{
+  if (top_level_omp_for_in_stmt (stmt) == NULL)
+/* Not an OpenACC for loop.  */
+return false;
+  if (idx == 0 || idx == representatives.length () - 1)
+/* The first or last statement in the kernels region.  This means that
+   there is no room before or after it for a jump or a label.  Thus
+   there cannot be a jump across it, so it is unconditional.  */
+return true;
+  /* Otherwise, the loop is unconditional if the statements before and after
+ 

[PATCH 06/10, OpenACC] Adjust parallelism of loops in gang-single parts of OpenACC kernels regions

2019-07-17 Thread Kwok Cheung Yeung

Loops in gang-single parts of kernels regions cannot be executed in
gang-redundant mode. If the user specified gang clauses on such loops, emit an 
error and remove these clauses. Adjust automatic partitioning to exclude gang 
partitioning in gang-single regions.


2019-07-16  Gergö Barany  

gcc/
* omp-oacc-kernels.c (visit_loops_in_gang_single_region): Emit warning 
on
conditionally executed code with a gang clause.
(make_loops_gang_single): New function.
(add_parent_or_loop_num_clause): New function.
(adjust_nested_loop_clauses_wi_info): New struct.
(adjust_nested_loop_clauses): New function.
(transform_kernels_loop_clauses): Add worker and vector clause 
parameters,
emit error on illegal nesting.
(make_gang_parallel_loop_region): Likewise.
(decompose_kernels_region_body): Separate out gang/worker/vector clauses
for separate handling; add call to make_loops_gang_single.
* omp-offload.c (oacc_loop_auto_partitions): Add and propagate
is_oacc_gang_single parameter.
(oacc_loop_partition): Likewise.
(execute_oacc_device_lower): Adjust call to oacc_loop_partition.
---
 gcc/omp-oacc-kernels.c | 380 -
 gcc/omp-offload.c  |  22 ++-
 2 files changed, 364 insertions(+), 38 deletions(-)

diff --git a/gcc/omp-oacc-kernels.c b/gcc/omp-oacc-kernels.c
index 80a82fa..11a960c 100644
--- a/gcc/omp-oacc-kernels.c
+++ b/gcc/omp-oacc-kernels.c
@@ -59,7 +59,14 @@ along with GCC; see the file COPYING3.  If not see
- Any sequences of other code (non-loops, non-OpenACC loops) are wrapped
  in new "gang-single" parallel regions: Worker/vector annotations are
  copied from the original kernels region if present, but num_gangs is
- explicitly set to 1.  */
+ explicitly set to 1.
+   - Both points above only apply at the topmost level in the region, i.e.,
+ the transformation does not introduce new parallel regions inside
+ nested statement bodies.  In particular, this means that a
+ gang-parallelizable loop inside an if statement is "gang-serialized" by
+ the transformation.
+ The transformation visits loops inside such new gang-single-regions and
+ removes and warns about any gang annotations.  */

 /* Helper function for decompose_kernels_region_body.  If STMT contains a
"top-level" OMP_FOR statement, returns a pointer to that statement;
@@ -122,6 +129,67 @@ top_level_omp_for_in_stmt (gimple *stmt)
   return NULL;
 }

+/* Helper function for make_loops_gang_single for walking the tree.  If the
+   statement indicated by GSI_P is an OpenACC for loop with a gang clause,
+   issue a warning and remove the clause.  */
+
+static tree
+visit_loops_in_gang_single_region (gimple_stmt_iterator *gsi_p,
+   bool *handled_ops_p,
+   struct walk_stmt_info *)
+{
+  gimple *stmt = gsi_stmt (*gsi_p);
+  tree clauses = NULL, prev_clause = NULL;
+  *handled_ops_p = false;
+
+  switch (gimple_code (stmt))
+{
+case GIMPLE_OMP_FOR:
+  clauses = gimple_omp_for_clauses (stmt);
+  for (tree clause = clauses; clause; clause = OMP_CLAUSE_CHAIN (clause))
+{
+  if (OMP_CLAUSE_CODE (clause) == OMP_CLAUSE_GANG)
+{
+  /* It makes no sense to have a gang clause in a gang-single
+ region, so remove it and warn.  */
+  warning_at (gimple_location (stmt), 0,
+  "conditionally executed loop in kernels region"
+  " will be executed in a single gang;"
+  " ignoring % clause");
+  if (prev_clause != NULL)
+OMP_CLAUSE_CHAIN (prev_clause) = OMP_CLAUSE_CHAIN (clause);
+  else
+clauses = OMP_CLAUSE_CHAIN (clause);
+
+  break;
+}
+  prev_clause = clause;
+}
+  gimple_omp_for_set_clauses (stmt, clauses);
+  /* No need to recurse into nested statements; no loop nested inside
+ this loop can be gang-partitioned.  */
+  *handled_ops_p = true;
+  break;
+
+default:
+  break;
+}
+
+  return NULL;
+}
+
+/* Visit all nested OpenACC loops in the statement indicated by GSI.  This
+   statement is expected to be inside a gang-single region.  Issue a warning
+   for any loops inside it that have gang clauses and remove the clauses.  */
+
+static void
+make_loops_gang_single (gimple_stmt_iterator gsi)
+{
+  struct walk_stmt_info wi;
+  memset (&wi, 0, sizeof (wi));
+  walk_gimple_stmt (&gsi, visit_loops_in_gang_single_region, NULL, &wi);
+}
+
 /* Construct a "gang-single" OpenACC parallel region at LOC containing the
STMTS.  The newly created region is annotated with CLAUSES, which must
not contain a num_gangs clause, and an additional "num_gangs(1)" clause
@@ -150,45 +218,253 @@ make_gang_single_region (loc

[PATCH 07/10, OpenACC] Launch kernels asynchronously in OpenACC kernels regions

2019-07-17 Thread Kwok Cheung Yeung
Kernels regions are decomposed into one or more smaller regions that are to be 
executed in sequence. With this patch, all of these regions are launched 
asynchronously, and a wait directive is added after them. This means that the 
host only waits once for the kernels to complete, not once per kernel. If the 
original kernels region was marked async, that asynchronous behavior is 
preserved, and no wait is added.


2019-07-16  Gergö Barany  

gcc/
* omp-oacc-kernels.c (add_async_clauses_and_wait): New function...
(decompose_kernels_region_body): ... called from here.

gcc/testsuite/
* c-c++-common/goacc/kernels-conversion.c: Test automatically generated
async clauses.
* gfortran.dg/goacc/kernels-conversion.f95: Likewise.
---
 gcc/omp-oacc-kernels.c | 56 --
 .../c-c++-common/goacc/kernels-conversion.c|  5 ++
 .../gfortran.dg/goacc/kernels-conversion.f95   |  5 ++
 3 files changed, 63 insertions(+), 3 deletions(-)

diff --git a/gcc/omp-oacc-kernels.c b/gcc/omp-oacc-kernels.c
index 11a960c..0fae74a 100644
--- a/gcc/omp-oacc-kernels.c
+++ b/gcc/omp-oacc-kernels.c
@@ -66,7 +66,13 @@ along with GCC; see the file COPYING3.  If not see
  gang-parallelizable loop inside an if statement is "gang-serialized" by
  the transformation.
  The transformation visits loops inside such new gang-single-regions and
- removes and warns about any gang annotations.  */
+ removes and warns about any gang annotations.
+   - In order to make the host wait only once for the whole region instead
+ of once per kernel launch, the new parallel and serial regions are
+ annotated async.  Unless the original kernels region was marked async,
+ the entire region ends with a wait construct.  If the original kernels
+ region was marked async, the generated async statements use the async
+ queue the kernels region was annotated with (possibly implicitly).  */

 /* Helper function for decompose_kernels_region_body.  If STMT contains a
"top-level" OMP_FOR statement, returns a pointer to that statement;
@@ -676,6 +682,38 @@ maybe_build_inner_data_region (location_t loc, gimple 
*body,
   return body;
 }

+/* Helper function of decompose_kernels_region_body.  The statements in
+   REGION_BODY are expected to be decomposed parallel regions; add an
+   "async" clause to each.  Also add a "wait" pragma at the end of the
+   sequence.  */
+
+static void
+add_async_clauses_and_wait (location_t loc, gimple_seq *region_body)
+{
+  tree default_async_queue
+= build_int_cst (integer_type_node, GOMP_ASYNC_NOVAL);
+  for (gimple_stmt_iterator gsi = gsi_start (*region_body);
+   !gsi_end_p (gsi);
+   gsi_next (&gsi))
+{
+  gimple *stmt = gsi_stmt (gsi);
+  tree target_clauses = gimple_omp_target_clauses (stmt);
+  tree new_async_clause = build_omp_clause (loc, OMP_CLAUSE_ASYNC);
+  OMP_CLAUSE_OPERAND (new_async_clause, 0) = default_async_queue;
+  OMP_CLAUSE_CHAIN (new_async_clause) = target_clauses;
+  target_clauses = new_async_clause;
+  gimple_omp_target_set_clauses (as_a  (stmt),
+ target_clauses);
+}
+  /* A "#pragma acc wait" is just a call GOACC_wait (acc_async_sync, 0).  */
+  tree wait_fn = builtin_decl_explicit (BUILT_IN_GOACC_WAIT);
+  tree sync_arg = build_int_cst (integer_type_node, GOMP_ASYNC_SYNC);
+  gimple *wait_call = gimple_build_call (wait_fn, 2,
+ sync_arg, integer_zero_node);
+  gimple_set_location (wait_call, loc);
+  gimple_seq_add_stmt (region_body, wait_call);
+}
+
 /* Auxiliary analysis of the body of a kernels region, to determine for each
OpenACC loop whether it is control-dependent (i.e., not necessarily
executed every time the kernels region is entered) or not.
@@ -890,10 +928,12 @@ decompose_kernels_region_body (gimple *kernels_region, 
tree kernels_clauses)

  except that the num_gangs, num_workers, and vector_length clauses will
  only be added to loop regions.  The other regions are "gang-single" and
  get an explicit num_gangs(1) clause.  So separate out the num_gangs,
- num_workers, and vector_length clauses here.  */
+ num_workers, and vector_length clauses here.
+ Also check for the presence of an async clause but do not remove it
+ from the kernels clauses.  */
   tree num_gangs_clause = NULL, num_workers_clause = NULL,
vector_length_clause = NULL;
-  tree prev_clause = NULL, next_clause = NULL;
+  tree prev_clause = NULL, next_clause = NULL, async_clause = NULL;
   tree parallel_clauses = kernels_clauses;
   for (tree c = parallel_clauses; c; c = next_clause)
 {
@@ -927,6 +967,8 @@ decompose_kernels_region_body (gimple *kernels_region, tree 
kernels_clauses)

 }
   else
 prev_clause = c;
+  if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_ASYNC)
+async_clause = c;
 }


[PATCH] Fix simd attribute handling on aarch64

2019-07-17 Thread Steve Ellcey
This patch fixes a bug with SIMD functions on Aarch64.  I found it
while trying to run SPEC with ToT GCC and a glibc that defines vector
math functions for aarch64.  When a function is declared with the simd
attribute GCC creates vector clones of that function with the return
and argument types changed to vector types.  On Aarch64 the vector
clones are also marked with the aarch64_vector_pcs attribute to signify
that they use an alternate calling convention.  Due to a bug in GCC the
non-vector version of the function being cloned was also being marked
with this attribute.

Because simd_clone_adjust and expand_simd_clones are calling
targetm.simd_clone.adjust (which attached the aarch64_vector_pcs
attribute to the function type) before calling
simd_clone_adjust_return_type (which created a new distinct type tree
for the cloned function) the attribute got attached to both the
'normal' scalar version of the SIMD function and any vector versions of
the function.  The attribute should only be on the vector versions.

My fix is to call simd_clone_adjust_return_type and create the new type
before calling targetm.simd_clone.adjust which adds the attribute.  The
only other platform that this patch could affect is x86 because that is
the only other platform to use targetm.simd_clone.adjust.  I did a
bootstrap and gcc test run on x86 (as well as Aarch64) and got no
regressions.

OK to checkin?

Steve Ellcey
sell...@marvell.com


2019-07-17  Steve Ellcey  

* omp-simd-clone.c (simd_clone_adjust):  Call targetm.simd_clone.adjust
after calling simd_clone_adjust_return_type.
(expand_simd_clones): Ditto.


diff --git a/gcc/omp-simd-clone.c b/gcc/omp-simd-clone.c
index caa8da3cba5..6a6b439d146 100644
--- a/gcc/omp-simd-clone.c
+++ b/gcc/omp-simd-clone.c
@@ -1164,9 +1164,8 @@ simd_clone_adjust (struct cgraph_node *node)
 {
   push_cfun (DECL_STRUCT_FUNCTION (node->decl));
 
-  targetm.simd_clone.adjust (node);
-
   tree retval = simd_clone_adjust_return_type (node);
+  targetm.simd_clone.adjust (node);
   ipa_parm_adjustment_vec adjustments
 = simd_clone_adjust_argument_types (node);
 
@@ -1737,8 +1736,8 @@ expand_simd_clones (struct cgraph_node *node)
simd_clone_adjust (n);
  else
{
- targetm.simd_clone.adjust (n);
  simd_clone_adjust_return_type (n);
+ targetm.simd_clone.adjust (n);
  simd_clone_adjust_argument_types (n);
}
}




[PATCH 08/10, OpenACC] New OpenACC kernels region decompose algorithm

2019-07-17 Thread Kwok Cheung Yeung
Previously, OpenACC kernels region bodies were decomposed into a sequence of 
alternating gang-single and gang-parallel "parallel" regions. The new algorithm 
in this patch introduces a third possibility: Loops that look like they might 
benefit from the parloops pass are converted into old "kernels" regions, 
exposing them to the parloops pass later on. This has the benefit that loops 
that cannot be parallelized are not offloaded to the GPU.


2019-07-16  Thomas Schwinge  

gcc/
* omp-oacc-kernels.c (adjust_region_code_walk_stmt_fn)
(adjust_region_code): New functions.
(make_loops_gang_single): Update.
(make_gang_single_region): Rename to...
(make_region_seq): ... this, and update.
(make_gang_parallel_loop_region): Rename to...
(make_region_loop_nest): ... this, and update.
(is_unconditional_oacc_for_loop): Remove stmt parameter and check.
(decompose_kernels_region_body): Update.

gcc/testsuite/
* c-c++-common/goacc/kernels-conversion.c: Adjust test.
* gfortran.dg/goacc/kernels-conversion.f95: Likewise.
* c-c++-common/goacc/kernels-decompose-1.c: New file.
* gfortran.dg/goacc/kernels-decompose-1.f95: Likewise.
libgomp/
* testsuite/libgomp.oacc-c-c++-common/kernels-decompose-1.c: New
file.
---
 gcc/omp-oacc-kernels.c | 293 +
 .../c-c++-common/goacc/kernels-conversion.c|  19 +-
 .../c-c++-common/goacc/kernels-decompose-1.c   | 123 +
 .../gfortran.dg/goacc/kernels-conversion.f95   |  22 +-
 .../gfortran.dg/goacc/kernels-decompose-1.f95  | 132 ++
 .../kernels-decompose-1.c  |  30 +++
 6 files changed, 553 insertions(+), 66 deletions(-)
 create mode 100644 gcc/testsuite/c-c++-common/goacc/kernels-decompose-1.c
 create mode 100644 gcc/testsuite/gfortran.dg/goacc/kernels-decompose-1.f95
 create mode 100644 
libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-decompose-1.c


diff --git a/gcc/omp-oacc-kernels.c b/gcc/omp-oacc-kernels.c
index 0fae74a..d65e6c6 100644
--- a/gcc/omp-oacc-kernels.c
+++ b/gcc/omp-oacc-kernels.c
@@ -39,6 +39,7 @@ along with GCC; see the file COPYING3.  If not see
 #include "gimple-iterator.h"
 #include "gimple-walk.h"
 #include "gomp-constants.h"
+#include "omp-general.h"

 /* This is a preprocessing pass to be run immediately before lower_omp.  It
will convert OpenACC "kernels" regions into sequences of "parallel"
@@ -135,6 +136,95 @@ top_level_omp_for_in_stmt (gimple *stmt)
   return NULL;
 }

+/* Helper for adjust_region_code: evaluate the statement at GSI_P.  */
+
+static tree
+adjust_region_code_walk_stmt_fn (gimple_stmt_iterator *gsi_p,
+bool *handled_ops_p,
+struct walk_stmt_info *wi)
+{
+  int *region_code = (int *) wi->info;
+
+  gimple *stmt = gsi_stmt (*gsi_p);
+  switch (gimple_code (stmt))
+{
+case GIMPLE_OMP_FOR:
+  {
+   tree clauses = gimple_omp_for_clauses (stmt);
+   if (omp_find_clause (clauses, OMP_CLAUSE_INDEPENDENT))
+ {
+   /* Explicit 'independent' clause.  */
+   /* Keep going; recurse into loop body.  */
+   break;
+ }
+   else if (omp_find_clause (clauses, OMP_CLAUSE_SEQ))
+ {
+   /* Explicit 'seq' clause.  */
+   /* We'll "parallelize" if at some level a loop construct has been
+  marked up by the user as unparallelizable ('seq' clause; we'll
+  respect that in the later processing).  Given that the user has
+  explicitly marked it up, this loop construct cannot be
+  performance-critical (and we thus don't have to "avoid
+  offloading"), and in this case it's also fine to "parallelize"
+  instead of "gang-single", because any outer or inner loops may
+  still exploit the available parallelism.  */
+   /* Keep going; recurse into loop body.  */
+   break;
+ }
+   else
+ {
+   /* Explicit or implicit 'auto' clause.  */
+   /* The user would like this loop analyzed ('auto' clause) and
+  typically parallelized, but we don't have available yet the
+  compiler logic to analyze this, so can't parallelize it here, so
+  we'd very likely be running into a performance problem if we
+  were to execute this unparallelized, thus forward the whole loop
+  nest to "parloops".  */
+   *region_code = GF_OMP_TARGET_KIND_OACC_KERNELS;
+   /* Terminate: final decision for this region.  */
+   *handled_ops_p = true;
+   return integer_zero_node;
+ }
+   gcc_unreachable ();
+  }
+
+case GIMPLE_COND:
+case GIMPLE_GOTO:
+case GIMPLE_SWITCH:
+case GIMPLE_ASM:
+case GIMPLE_TRANSACTION:
+case GIMPLE_RETURN:
+  /* Sta

[PATCH 09/10, OpenACC] Avoid introducing 'create' mapping clauses for loop index variables in kernels regions

2019-07-17 Thread Kwok Cheung Yeung
This patch avoids adding CREATE mapping clauses for loop index variables. It 
also sets TREE_ADDRESSABLE on newly mapped declarations, which fixes an ICE that 
sometimes appears due to an assert firing in omp-low.c.


2019-07-16  Julian Brown  

gcc/
* omp-oacc-kernels.c (find_omp_for_index_vars_1,
find_omp_for_index_vars): New functions.
(maybe_build_inner_data_region): Add IDX_VARS argument. Don't add
CREATE mapping clauses for loop index variables.  Set TREE_ADDRESSABLE
flag on newly-mapped declarations as a side effect.
(decompose_kernels_region_body): Call find_omp_for_index_vars.  Don't
create PRESENT clause for loop index variables.  Pass index variable
set to maybe_build_inner_data_region.
---
 gcc/omp-oacc-kernels.c | 58 --
 1 file changed, 51 insertions(+), 7 deletions(-)

diff --git a/gcc/omp-oacc-kernels.c b/gcc/omp-oacc-kernels.c
index d65e6c6..2091385 100644
--- a/gcc/omp-oacc-kernels.c
+++ b/gcc/omp-oacc-kernels.c
@@ -766,6 +766,43 @@ flatten_binds (gbind *bind, bool include_toplevel_vars = 
false)
   return vars;
 }

+/* Recursively search BODY_SEQUENCE for 'for' loops, and record their loop
+   indices in IDX_VARS.  */
+
+static void
+find_omp_for_index_vars_1 (gimple_seq body_sequence, hash_set *idx_vars)
+{
+  gimple_stmt_iterator gsi;
+
+  for (gsi = gsi_start (body_sequence); !gsi_end_p (gsi); gsi_next (&gsi))
+{
+  gimple *stmt = gsi_stmt (gsi);
+  gimple *for_stmt = top_level_omp_for_in_stmt (stmt);
+
+  if (for_stmt)
+{
+ tree idx = gimple_omp_for_index (for_stmt, 0);
+ idx_vars->add (idx);
+ find_omp_for_index_vars_1 (gimple_omp_body (for_stmt), idx_vars);
+   }
+  else if (gimple_code (stmt) == GIMPLE_BIND)
+   find_omp_for_index_vars_1 (gimple_bind_body (as_a  (stmt)),
+  idx_vars);
+}
+}
+
+/* Find all loop index variables in a bind.  */
+
+static hash_set
+find_omp_for_index_vars (gbind *bind)
+{
+  hash_set idx_vars;
+
+  find_omp_for_index_vars_1 (gimple_bind_body (bind), &idx_vars);
+
+  return idx_vars;
+}
+
 /* Helper function for places where we construct data regions.  Wraps the BODY
inside a try-finally construct at LOC that calls __builtin_GOACC_data_end
in its cleanup block.  Returns this try statement.  */
@@ -784,13 +821,15 @@ make_data_region_try_statement (location_t loc, gimple 
*body)

 /* If INNER_BIND_VARS holds variables, build an OpenACC data region with
location LOC containing BODY and having "create(var)" clauses for each
-   variable.  If INNER_CLEANUP is present, add a try-finally statement with
-   this cleanup code in the finally block.  Return the new data region, or
-   the original BODY if no data region was needed.  */
+   variable (such variables are also made addressable as a side effect).  If
+   INNER_CLEANUP is present, add a try-finally statement with this cleanup
+   code in the finally block.  Return the new data region, or the original
+   BODY if no data region was needed.  */

 static gimple *
 maybe_build_inner_data_region (location_t loc, gimple *body,
-   tree inner_bind_vars, gimple *inner_cleanup)
+  tree inner_bind_vars, gimple *inner_cleanup,
+  hash_set *idx_vars)
 {
   /* Build data "create(var)" clauses for these local variables.
  Below we will add these to a data region enclosing the entire body
@@ -817,7 +856,7 @@ maybe_build_inner_data_region (location_t loc, gimple *body,
   else
 inner_bind_vars = next;
 }
-  else
+  else if (!idx_vars->contains (v))
 {
   /* Otherwise, build the map clause.  */
   tree new_clause = build_omp_clause (loc, OMP_CLAUSE_MAP);
@@ -825,6 +864,7 @@ maybe_build_inner_data_region (location_t loc, gimple *body,
   OMP_CLAUSE_DECL (new_clause) = v;
   OMP_CLAUSE_SIZE (new_clause) = DECL_SIZE_UNIT (v);
   OMP_CLAUSE_CHAIN (new_clause) = inner_data_clauses;
+ TREE_ADDRESSABLE (v) = 1;
   inner_data_clauses = new_clause;

   prev_mapped_var = v;
@@ -1156,6 +1196,8 @@ decompose_kernels_region_body (gimple *kernels_region, 
tree kernels_clauses)

   tree inner_bind_vars = flatten_binds (kernels_bind);
   gimple_seq body_sequence = gimple_bind_body (kernels_bind);

+  hash_set idx_vars = find_omp_for_index_vars (kernels_bind);
+
   /* All these inner variables will get allocated on the device (below, by
  calling maybe_build_inner_data_region).  Here we create "present"
  clauses for them and add these clauses to the list of clauses to be
@@ -1163,7 +1205,9 @@ decompose_kernels_region_body (gimple *kernels_region, 
tree kernels_clauses)

   tree present_clauses = kernels_clauses;
   for (tree var = inner_bind_vars; var; var = TREE_CHAIN (var))
 {
-  if (!DECL_ARTIFICIAL (va

[PATCH 10/10, OpenACC] Make new OpenACC kernels conversion the default; adjust and add tests

2019-07-17 Thread Kwok Cheung Yeung
This patch makes the new kernel conversion scheme the default, and adjusts the 
tests accordingly.


2019-07-16  Thomas Schwinge  
Kwok Cheung Yeung  

gcc/c-family/
* c.opt (fopenacc-kernels): Default to "split".

gcc/fortran/
* lang.opt (fopenacc-kernels): Default to "split".

gcc/
* doc/invoke.texi (-fopenacc-kernels): Update.

gcc/testsuite/
* 
c-c++-common/goacc/note-parallelism-1-kernels-conditional-loop-independent_seq.c:
New file.
* c-c++-common/goacc/note-parallelism-1-kernels-loop-auto.c:
Likewise.
* c-c++-common/goacc/note-parallelism-1-kernels-loop-independent_seq.c:
Likewise.
* c-c++-common/goacc/note-parallelism-1-kernels-loops.c: Likewise.
* c-c++-common/goacc/note-parallelism-1-kernels-straight-line.c:
Likewise.
* c-c++-common/goacc/note-parallelism-combined-kernels-loop-auto.c:
Likewise.
* 
c-c++-common/goacc/note-parallelism-combined-kernels-loop-independent_seq.c:
Likewise.
* 
c-c++-common/goacc/note-parallelism-kernels-conditional-loop-independent_seq.c:
Likewise.
* c-c++-common/goacc/note-parallelism-kernels-loop-auto.c:
Likewise.
* c-c++-common/goacc/note-parallelism-kernels-loop-independent_seq.c:
Likewise.
* c-c++-common/goacc/note-parallelism-kernels-loops.c: Likewise.
* c-c++-common/goacc/classify-kernels-unparallelized.c: Update.
* c-c++-common/goacc/classify-kernels.c: Likewise.
* c-c++-common/goacc/classify-parallel.c: Likewise.
* c-c++-common/goacc/classify-routine.c: Likewise.
* c-c++-common/goacc/if-clause-2.c: Likewise.
* c-c++-common/goacc/kernels-conversion.c: Likewise.
* c-c++-common/goacc/kernels-decompose-1.c: Likewise.
* c-c++-common/goacc/loop-2-kernels.c: Likewise.
* c-c++-common/goacc/note-parallelism.c: Likewise.
* c-c++-common/goacc/uninit-dim-clause.c: Likewise.
* gfortran.dg/goacc/kernels-conversion.f95: Likewise.
* gfortran.dg/goacc/kernels-decompose-1.f95: Likewise.
* gfortran.dg/goacc/kernels-tree.f95: Likewise.
* gfortran.dg/goacc/classify-kernels-unparallelized.f95
* gfortran.dg/goacc/classify-kernels.f95
* gfortran.dg/goacc/loop-2-kernels.f95

libgomp/
* testsuite/libgomp.oacc-c-c++-common/acc_prof-kernels-1.c:
Update.
* testsuite/libgomp.oacc-c-c++-common/kernels-decompose-1.c:
Likewise.
---
 gcc/c-family/c.opt |   2 +-
 gcc/doc/invoke.texi|   2 +-
 gcc/fortran/lang.opt   |   2 +-
 .../goacc/classify-kernels-unparallelized.c|   9 +-
 .../c-c++-common/goacc/classify-kernels.c  |   4 +-
 .../c-c++-common/goacc/classify-parallel.c |   2 +-
 .../c-c++-common/goacc/classify-routine.c  |   2 +-
 gcc/testsuite/c-c++-common/goacc/if-clause-2.c |   1 -
 .../c-c++-common/goacc/kernels-conversion.c|  10 +-
 .../c-c++-common/goacc/kernels-decompose-1.c   |  69 ---
 gcc/testsuite/c-c++-common/goacc/loop-2-kernels.c  |  14 +-
 ...sm-1-kernels-conditional-loop-independent_seq.c | 129 +
 .../goacc/note-parallelism-1-kernels-loop-auto.c   | 126 +
 ...te-parallelism-1-kernels-loop-independent_seq.c | 126 +
 .../goacc/note-parallelism-1-kernels-loops.c   |  47 +
 .../note-parallelism-1-kernels-straight-line.c |  82 +
 .../note-parallelism-combined-kernels-loop-auto.c  | 121 
 ...llelism-combined-kernels-loop-independent_seq.c | 121 
 ...lism-kernels-conditional-loop-independent_seq.c | 204 +
 .../goacc/note-parallelism-kernels-loop-auto.c | 138 ++
 ...note-parallelism-kernels-loop-independent_seq.c | 138 ++
 .../goacc/note-parallelism-kernels-loops.c |  50 +
 .../c-c++-common/goacc/note-parallelism.c  |   3 +-
 .../c-c++-common/goacc/uninit-dim-clause.c |   6 +-
 .../goacc/classify-kernels-unparallelized.f95  |   1 +
 .../gfortran.dg/goacc/classify-kernels.f95 |   1 +
 .../gfortran.dg/goacc/kernels-conversion.f95   |   7 +-
 .../gfortran.dg/goacc/kernels-decompose-1.f95  |  79 
 gcc/testsuite/gfortran.dg/goacc/kernels-tree.f95   |   1 -
 gcc/testsuite/gfortran.dg/goacc/loop-2-kernels.f95 |  22 +--
 .../libgomp.oacc-c-c++-common/acc_prof-kernels-1.c |  17 +-
 .../kernels-decompose-1.c  |   9 +-
 32 files changed, 1416 insertions(+), 129 deletions(-)
 create mode 100644 
gcc/testsuite/c-c++-common/goacc/note-parallelism-1-kernels-conditional-loop-independent_seq.c
 create mode 100644 
gcc/testsuite/c-c++-common/goacc/note-parallelism-1-kernels-loop-auto.c
 create mode 100644 
gcc/testsuite/c-c++-common/goacc/note-parallelism-1-kernels-loop-i

PING [PATCH v2] S/390: Improve storing asan frame_pc

2019-07-17 Thread Ilya Leoshkevich
Hello,

I would like to ping this change.

Best regards,
Ilya

> Am 02.07.2019 um 17:34 schrieb Ilya Leoshkevich :
> 
> Bootstrap and regtest running on x86_64-redhat-linux, s390x-redhat-linux
> and ppc64le-redhat-linux.
> 
> Currently s390 emits the following sequence to store a frame_pc:
> 
>   a:
>   .LASANPC0:
> 
>   lg  %r1,.L5-.L4(%r13)
>   la  %r1,0(%r1,%r12)
>   stg %r1,176(%r11)
> 
>   .L5:
>   .quad   .LASANPC0@GOTOFF
> 
> The reason GOT indirection is used instead of larl is that gcc does not
> know that .LASANPC0, being a code label, is aligned on a 2-byte
> boundary, and larl can load only even addresses.
> 
> This patch provides such an alignment hint.  Since targets don't provide
> their instruction alignments yet, the new macro is introduced for that
> purpose.  It returns 1-byte alignment by default, so this change is a
> no-op for targets other than s390.
> 
> As a result, we get the desired:
> 
>   larl%r1,.LASANPC0
>   stg %r1,176(%r11)
> 
> gcc/ChangeLog:
> 
> 2019-06-28  Ilya Leoshkevich  
> 
>   * asan.c (asan_emit_stack_protection): Provide an alignment
>   hint.
>   * config/s390/s390.h (CODE_LABEL_BOUNDARY): Specify that s390
>   requires code labels to be aligned on a 2-byte boundary.
>   * defaults.h (CODE_LABEL_BOUNDARY): New macro.
>   * doc/tm.texi: Document CODE_LABEL_BOUNDARY.
>   * doc/tm.texi.in: Likewise.
> 
> gcc/testsuite/ChangeLog:
> 
> 2019-06-28  Ilya Leoshkevich  
> 
>   * gcc.target/s390/asan-no-gotoff.c: New test.
> ---
> gcc/asan.c |  1 +
> gcc/config/s390/s390.h |  3 +++
> gcc/defaults.h |  5 +
> gcc/doc/tm.texi|  4 
> gcc/doc/tm.texi.in |  4 
> gcc/testsuite/gcc.target/s390/asan-no-gotoff.c | 15 +++
> 6 files changed, 32 insertions(+)
> create mode 100644 gcc/testsuite/gcc.target/s390/asan-no-gotoff.c
> 
> diff --git a/gcc/asan.c b/gcc/asan.c
> index 605d04f87f7..2db69f476bc 100644
> --- a/gcc/asan.c
> +++ b/gcc/asan.c
> @@ -1523,6 +1523,7 @@ asan_emit_stack_protection (rtx base, rtx pbase, 
> unsigned int alignb,
>   DECL_INITIAL (decl) = decl;
>   TREE_ASM_WRITTEN (decl) = 1;
>   TREE_ASM_WRITTEN (id) = 1;
> +  SET_DECL_ALIGN (decl, CODE_LABEL_BOUNDARY);
>   emit_move_insn (mem, expand_normal (build_fold_addr_expr (decl)));
>   shadow_base = expand_binop (Pmode, lshr_optab, base,
> gen_int_shift_amount (Pmode, ASAN_SHADOW_SHIFT),
> diff --git a/gcc/config/s390/s390.h b/gcc/config/s390/s390.h
> index 969f58a2ba0..3d0266c9dff 100644
> --- a/gcc/config/s390/s390.h
> +++ b/gcc/config/s390/s390.h
> @@ -334,6 +334,9 @@ extern const char *s390_host_detect_local_cpu (int argc, 
> const char **argv);
> /* Allocation boundary (in *bits*) for the code of a function.  */
> #define FUNCTION_BOUNDARY 64
> 
> +/* Alignment required for a code label, in bits.  */
> +#define CODE_LABEL_BOUNDARY 16
> +
> /* There is no point aligning anything to a rounder boundary than this.  */
> #define BIGGEST_ALIGNMENT 64
> 
> diff --git a/gcc/defaults.h b/gcc/defaults.h
> index af7ea185f1e..97c4c17537d 100644
> --- a/gcc/defaults.h
> +++ b/gcc/defaults.h
> @@ -1459,4 +1459,9 @@ see the files COPYING3 and COPYING.RUNTIME 
> respectively.  If not, see
> #define DWARF_GNAT_ENCODINGS_DEFAULT DWARF_GNAT_ENCODINGS_GDB
> #endif
> 
> +/* Alignment required for a code label, in bits.  */
> +#ifndef CODE_LABEL_BOUNDARY
> +#define CODE_LABEL_BOUNDARY BITS_PER_UNIT
> +#endif
> +
> #endif  /* ! GCC_DEFAULTS_H */
> diff --git a/gcc/doc/tm.texi b/gcc/doc/tm.texi
> index 14c1ea6a323..3b50fc0c0a7 100644
> --- a/gcc/doc/tm.texi
> +++ b/gcc/doc/tm.texi
> @@ -1019,6 +1019,10 @@ to a value equal to or larger than 
> @code{STACK_BOUNDARY}.
> Alignment required for a function entry point, in bits.
> @end defmac
> 
> +@defmac CODE_LABEL_BOUNDARY
> +Alignment required for a code label, in bits.
> +@end defmac
> +
> @defmac BIGGEST_ALIGNMENT
> Biggest alignment that any data type can require on this machine, in
> bits.  Note that this is not the biggest alignment that is supported,
> diff --git a/gcc/doc/tm.texi.in b/gcc/doc/tm.texi.in
> index b4d57b86e2f..ab038b7462c 100644
> --- a/gcc/doc/tm.texi.in
> +++ b/gcc/doc/tm.texi.in
> @@ -969,6 +969,10 @@ to a value equal to or larger than @code{STACK_BOUNDARY}.
> Alignment required for a function entry point, in bits.
> @end defmac
> 
> +@defmac CODE_LABEL_BOUNDARY
> +Alignment required for a code label, in bits.
> +@end defmac
> +
> @defmac BIGGEST_ALIGNMENT
> Biggest alignment that any data type can require on this machine, in
> bits.  Note that this is not the biggest alignment that is supported,
> diff --git a/gcc/testsuite/gcc.target/s390/asan-no-gotoff.c 
> b/gcc/testsuite/gcc.target/s390/asan-no-gotoff.c
> new file mode 100644

[PATCH,RFC,V4 3/5] Setup for CTF generation and emission

2019-07-17 Thread Indu Bhagat
Initialize CTF container when -gtLEVEL is specified.  Generate CTF debug info
for global decls.  Import the CTF header from binutils.

[Changes from V3]
Inform the user instead of warning if -gtLEVEL is used and the frontend is not
C.

gcc/ChangeLog:
 
* Makefile.in: Add ctfout.* files to GTFILES.
* cgraphunit.c (symbol_table::finalize_compilation_unit): Generate CTF
debug info for decl. Invoke CTF debug info emission.
* ctfout.c: New file.
* ctfout.h: Likewise.
* gengtype.c (open_base_files): Add ctfout.h to ifiles.
* passes.c (rest_of_decl_compilation): Generate CTF debug info for
decl.
* toplev.c (process_options): Inform the user and ignore -gtLEVEL if
frontend is not C.
(toplev::finalize): Finalize CTF containers.

gcc/testsuite/ChangeLog:

* gcc.dg/debug/ctf/ctf-1.c: New test.
* gcc.dg/debug/ctf/ctf-preamble-1.c: Likewise.
* gcc.dg/debug/ctf/ctf.exp: Add CTF testsuite.
* gcc.dg/debug/dwarf2-ctf-1.c: New test.

include/ChangeLog:

* ctf.h: Import from binutils.

---
 gcc/ChangeLog   |  14 +
 gcc/Makefile.in |   3 +
 gcc/cgraphunit.c|  12 +-
 gcc/ctfout.c| 175 +
 gcc/ctfout.h|  53 +++
 gcc/gengtype.c  |   4 +-
 gcc/passes.c|   7 +-
 gcc/testsuite/ChangeLog |   7 +
 gcc/testsuite/gcc.dg/debug/ctf/ctf-1.c  |   6 +
 gcc/testsuite/gcc.dg/debug/ctf/ctf-preamble-1.c |  11 +
 gcc/testsuite/gcc.dg/debug/ctf/ctf.exp  |  41 ++
 gcc/testsuite/gcc.dg/debug/dwarf2-ctf-1.c   |   7 +
 gcc/toplev.c|  18 +
 include/ChangeLog   |   4 +
 include/ctf.h   | 483 
 15 files changed, 839 insertions(+), 6 deletions(-)
 create mode 100644 gcc/ctfout.c
 create mode 100644 gcc/ctfout.h
 create mode 100644 gcc/testsuite/gcc.dg/debug/ctf/ctf-1.c
 create mode 100644 gcc/testsuite/gcc.dg/debug/ctf/ctf-preamble-1.c
 create mode 100644 gcc/testsuite/gcc.dg/debug/ctf/ctf.exp
 create mode 100644 gcc/testsuite/gcc.dg/debug/dwarf2-ctf-1.c
 create mode 100644 include/ctf.h

diff --git a/gcc/Makefile.in b/gcc/Makefile.in
index 597dc01..5487377 100644
--- a/gcc/Makefile.in
+++ b/gcc/Makefile.in
@@ -1256,6 +1256,7 @@ OBJS = \
cfgloopanal.o \
cfgloopmanip.o \
cfgrtl.o \
+   ctfout.o \
symtab.o \
cgraph.o \
cgraphbuild.o \
@@ -2534,6 +2535,8 @@ GTFILES = $(CPPLIB_H) $(srcdir)/input.h 
$(srcdir)/coretypes.h \
   $(srcdir)/dwarf2asm.c \
   $(srcdir)/dwarf2cfi.c \
   $(srcdir)/dwarf2out.c \
+  $(srcdir)/ctfout.h \
+  $(srcdir)/ctfout.c \
   $(srcdir)/tree-vect-generic.c \
   $(srcdir)/dojump.c $(srcdir)/emit-rtl.h \
   $(srcdir)/emit-rtl.c $(srcdir)/except.h $(srcdir)/explow.c $(srcdir)/expr.c \
diff --git a/gcc/cgraphunit.c b/gcc/cgraphunit.c
index 5999b9e..05d54f5 100644
--- a/gcc/cgraphunit.c
+++ b/gcc/cgraphunit.c
@@ -205,6 +205,7 @@ along with GCC; see the file COPYING3.  If not see
 #include "lto-section-names.h"
 #include "stringpool.h"
 #include "attribs.h"
+#include "ctfout.h"
 
 /* Queue of cgraph nodes scheduled to be added into cgraph.  This is a
secondary queue used during optimization to accommodate passes that
@@ -2844,17 +2845,22 @@ symbol_table::finalize_compilation_unit (void)
 
   if (!seen_error ())
 {
-  /* Emit early debug for reachable functions, and by consequence,
-locally scoped symbols.  */
+  /* Emit early debug and CTF debug info for reachable functions, and by
+consequence, locally scoped symbols.  */
   struct cgraph_node *cnode;
   FOR_EACH_FUNCTION_WITH_GIMPLE_BODY (cnode)
-   (*debug_hooks->early_global_decl) (cnode->decl);
+   {
+ (*debug_hooks->early_global_decl) (cnode->decl);
+ ctf_early_global_decl (cnode->decl);
+   }
 
   /* Clean up anything that needs cleaning up after initial debug
 generation.  */
   debuginfo_early_start ();
   (*debug_hooks->early_finish) (main_input_filename);
+  ctf_early_finish (main_input_filename);
   debuginfo_early_stop ();
+
 }
 
   /* Finally drive the pass manager.  */
diff --git a/gcc/ctfout.c b/gcc/ctfout.c
new file mode 100644
index 000..471cf80
--- /dev/null
+++ b/gcc/ctfout.c
@@ -0,0 +1,175 @@
+/* Output CTF format from GCC.
+   Copyright (C) 2019 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 3, or (at your option) any later
+version.
+
+GCC is distributed in the hope that it will be useful, but WITHOUT

[PATCH,RFC,V4 0/5] Support for CTF in GCC

2019-07-17 Thread Indu Bhagat
Hello,

I have made some progress on the patch set that adds support for CTF generation
in GCC. I am sharing the current status so that those interested can review
and/or try it out.

For a brief summary of the individual patches in this set, please see previous
posting https://gcc.gnu.org/ml/gcc-patches/2019-06/msg01710.html

[Changes from V3]

1. Patch 3 has inform () instead of warning () when frontend in not C.

2. Patch 4 has evolved. Apart from some bug fixes, some key features that have
   been implemented are :

   - CTF function and object index sub-sections
 A CTF section has a close association with the symbol table in that the
 entries in the func-info and obj-info need to be in the same order as
 entries in the symbol table.
 Since the compiler does not know the order of the entries in the symbol
 table, these new sub-sections allow a clean handshake between the compiler
 and the rest of the toolchain. The compiler only makes sure the entries in 
 the func-info and the func-info-index are in sync; and similarly that the
 entries in the obj-info and obj-info-index are in sync.

 A CTF consumer will use these indexes if present.

 Here is the binutils posting for adding these sub-sections to the CTF
 section https://sourceware.org/ml/binutils/2019-07/msg00157.html
   
   - Skip types when CTF lacks representation for them.
 CTF does not have representation for VECTOR_TYPE, Complex Integer type,
 Non IEEE float type, FIXED_POINT type and finally those enum constants
 that are not representable with a signed 32-bit integer.

 All these cases are skipped for CTF generation.  A type with type ID = 0
 is used for such declarations.  All these cases are tagged with a
 TBD_CTF_REPRESENTATION_LIMIT in the source files at this time.

 A new testcase testsuite/gcc.dg/debug/ctf/ctf-skip-types-1.c has been
 added that non-exhaustively lists specimens of types that are currently
 being skipped by the compiler for CTF generation.

 Having noted these skipped types for now, we will be working on a
 representation for them. Also, a careful perusal of the GNU C extensions
 in context of CTF representation is needed.

   - CTF CU name
 Each CTF section now holds a reference to the CU name in the CTF string
 table.

Testing :

Apart from the usual bootstrap and regression testing on x86_64/linux and
sparc64/linux, I have now compiled more codebases with -gt.  With this patch
set, I was able to use -gt for compiling gcc libraries, and also run dg.exp
suite with -gt.

( PS : Linker support for CTF is being actively worked on as well
 https://sourceware.org/ml/binutils/2019-07/msg00159.html. This current GCC
 patch set has the ctf.h in sync with the afore-mentioned binutils patch set. )

Thanks

Indu Bhagat (5):
  Add new function lang_GNU_GIMPLE
  Add CTF command line options : -gtLEVEL
  Setup for CTF generation and emission
  CTF generation for a single compilation unit
  Update CTF testsuite

 gcc/ChangeLog  |   97 +
 gcc/Makefile.in|5 +
 gcc/cgraphunit.c   |   12 +-
 gcc/common.opt |9 +
 gcc/ctfcreate.c|  531 ++
 gcc/ctfout.c   | 1924 
 gcc/ctfout.h   |  364 
 gcc/ctfutils.c |  198 ++
 gcc/doc/invoke.texi|   16 +
 gcc/flag-types.h   |   13 +
 gcc/gengtype.c |4 +-
 gcc/langhooks.c|9 +
 gcc/langhooks.h|1 +
 gcc/opts.c |   26 +
 gcc/passes.c   |7 +-
 gcc/testsuite/ChangeLog|   41 +
 gcc/testsuite/gcc.dg/debug/ctf/ctf-1.c |6 +
 gcc/testsuite/gcc.dg/debug/ctf/ctf-2.c |   10 +
 .../gcc.dg/debug/ctf/ctf-anonymous-struct-1.c  |   23 +
 .../gcc.dg/debug/ctf/ctf-anonymous-union-1.c   |   26 +
 gcc/testsuite/gcc.dg/debug/ctf/ctf-array-1.c   |   31 +
 gcc/testsuite/gcc.dg/debug/ctf/ctf-array-2.c   |   39 +
 gcc/testsuite/gcc.dg/debug/ctf/ctf-bitfields-1.c   |   30 +
 gcc/testsuite/gcc.dg/debug/ctf/ctf-bitfields-2.c   |   39 +
 gcc/testsuite/gcc.dg/debug/ctf/ctf-complex-1.c |   22 +
 gcc/testsuite/gcc.dg/debug/ctf/ctf-cvr-quals-1.c   |   44 +
 gcc/testsuite/gcc.dg/debug/ctf/ctf-cvr-quals-2.c   |   30 +
 gcc/testsuite/gcc.dg/debug/ctf/ctf-cvr-quals-3.c   |   41 +
 gcc/testsuite/gcc.dg/debug/ctf/ctf-enum-1.c|   21 +
 gcc/testsuite/gcc.dg/debug/ctf/ctf-enum-2.c|   26 +
 gcc/testsuite/gcc.dg/debug/ctf/ctf-float-1.c   |   16 +
 gcc/testsuite/gcc.dg/debug/ctf/ctf-fo

[PATCH,RFC,V4 1/5] Add new function lang_GNU_GIMPLE

2019-07-17 Thread Indu Bhagat
gcc/ChangeLog:

* langhooks.c (lang_GNU_GIMPLE): New Function.
* langhooks.h: New Prototype.

---
 gcc/ChangeLog   | 5 +
 gcc/langhooks.c | 9 +
 gcc/langhooks.h | 1 +
 3 files changed, 15 insertions(+)

diff --git a/gcc/langhooks.c b/gcc/langhooks.c
index 2df97f2..f3a64c1 100644
--- a/gcc/langhooks.c
+++ b/gcc/langhooks.c
@@ -825,3 +825,12 @@ lang_GNU_OBJC (void)
 {
   return strncmp (lang_hooks.name, "GNU Objective-C", 15) == 0;
 }
+
+/* Returns true if the current lang_hooks represents the GNU GIMPLE
+   frontend.  */
+
+bool
+lang_GNU_GIMPLE (void)
+{
+  return strncmp (lang_hooks.name, "GNU GIMPLE", 10) == 0;
+}
diff --git a/gcc/langhooks.h b/gcc/langhooks.h
index a45579b..0ac794e 100644
--- a/gcc/langhooks.h
+++ b/gcc/langhooks.h
@@ -570,5 +570,6 @@ extern bool lang_GNU_C (void);
 extern bool lang_GNU_CXX (void);
 extern bool lang_GNU_Fortran (void);
 extern bool lang_GNU_OBJC (void);
+extern bool lang_GNU_GIMPLE (void);
 
 #endif /* GCC_LANG_HOOKS_H */
-- 
1.8.3.1



[PATCH,RFC,V4 2/5] Add CTF command line options : -gtLEVEL

2019-07-17 Thread Indu Bhagat
-gtLEVEL is used to request CTF debug information and also to specify how much
CTF debug information.

gcc/ChangeLog:
 
* common.opt: Add CTF debug info options.
* doc/invoke.texi: Document the CTF debug info options.
* flag-types.h (enum ctf_debug_info_levels): New enum.
* opts.c (common_handle_option): New Function.
(set_ctf_debug_level): Handle the new CTF debug info options.

---
 gcc/ChangeLog   |  8 
 gcc/common.opt  |  9 +
 gcc/doc/invoke.texi | 16 
 gcc/flag-types.h| 13 +
 gcc/opts.c  | 26 ++
 5 files changed, 72 insertions(+)

diff --git a/gcc/common.opt b/gcc/common.opt
index b998b25..cfa7d5c 100644
--- a/gcc/common.opt
+++ b/gcc/common.opt
@@ -125,6 +125,11 @@ enum debug_info_levels debug_info_level = DINFO_LEVEL_NONE
 Variable
 bool use_gnu_debug_info_extensions
 
+; Level of CTF debugging information we are producing.  See flag-types.h
+; for the definitions of the different possible levels.
+Variable
+enum ctf_debug_info_levels ctf_debug_info_level = CTFINFO_LEVEL_NONE
+
 ; Original value of maximum field alignment in bytes, specified via
 ; -fpack-struct=.
 Variable
@@ -3007,6 +3012,10 @@ gcolumn-info
 Common Driver Var(debug_column_info,1) Init(1)
 Record DW_AT_decl_column and DW_AT_call_column in DWARF.
 
+gt
+Common Driver RejectNegative JoinedOrMissing
+Generate CTF debug information at default level.
+
 gdwarf
 Common Driver JoinedOrMissing Negative(gdwarf-)
 Generate debug information in default version of DWARF format.
diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
index 758aef3..70ab5b4 100644
--- a/gcc/doc/invoke.texi
+++ b/gcc/doc/invoke.texi
@@ -375,6 +375,7 @@ Objective-C and Objective-C++ Dialects}.
 @item Debugging Options
 @xref{Debugging Options,,Options for Debugging Your Program}.
 @gccoptlist{-g  -g@var{level}  -gdwarf  -gdwarf-@var{version} @gol
+-gt  -gt@var{level} @gol
 -ggdb  -grecord-gcc-switches  -gno-record-gcc-switches @gol
 -gstabs  -gstabs+  -gstrict-dwarf  -gno-strict-dwarf @gol
 -gas-loc-support  -gno-as-loc-support @gol
@@ -7812,6 +7813,21 @@ other DWARF-related options such as
 @option{-fno-dwarf2-cfi-asm}) retain a reference to DWARF Version 2
 in their names, but apply to all currently-supported versions of DWARF.
 
+@item -gt
+@itemx -gt@var{level}
+@opindex gt
+Request CTF debug information and use level to specify how much CTF debug
+information should be produced.  If -gt is specified without a value for level,
+the default level of CTF debug information is 2.
+
+Level 0 produces no CTF debug information at all.  Thus, -gt0 negates -gt.
+
+Level 1 produces CTF information for tracebacks only.  This includes callsite
+information, but does not include type information.
+
+Level 2 produces type information for entities (functions, data objects etc.)
+at file-scope or global-scope only.
+
 @item -gstabs
 @opindex gstabs
 Produce debugging information in stabs format (if that is supported),
diff --git a/gcc/flag-types.h b/gcc/flag-types.h
index a210328..61a1432 100644
--- a/gcc/flag-types.h
+++ b/gcc/flag-types.h
@@ -105,6 +105,19 @@ enum dwarf_gnat_encodings
   Emit GNAT encodings for the rest.  */
 };
 
+/* CTF debug info levels.
+   CTF debug info levels are untied with DWARF debug info levels because CTF
+   may co-exist with DWARF.  */
+enum ctf_debug_info_levels
+{
+  CTFINFO_LEVEL_NONE = 0, /* Write no CTF debug info.  */
+  CTFINFO_LEVEL_TERSE = 1,/* Write CTF information to support tracebacks
+only.  Not Implemented.  */
+  CTFINFO_LEVEL_NORMAL = 2/* Write CTF type information for all entities
+(functions, data objects, variables etc.)
+at file-scope or global-scope only.  */
+};
+
 /* Enumerate Objective-c instance variable visibility settings. */
 
 enum ivar_visibility
diff --git a/gcc/opts.c b/gcc/opts.c
index 46a19a2..3b617c8 100644
--- a/gcc/opts.c
+++ b/gcc/opts.c
@@ -195,6 +195,8 @@ static void set_debug_level (enum debug_info_type type, int 
extended,
 const char *arg, struct gcc_options *opts,
 struct gcc_options *opts_set,
 location_t loc);
+static void set_ctf_debug_level (const char *arg,
+struct gcc_options *opts, location_t loc);
 static void set_fast_math_flags (struct gcc_options *opts, int set);
 static void decode_d_option (const char *arg, struct gcc_options *opts,
 location_t loc, diagnostic_context *dc);
@@ -2686,6 +2688,10 @@ common_handle_option (struct gcc_options *opts,
   opts->x_flag_stack_usage_info = value != 0;
   break;
 
+case OPT_gt:
+  set_ctf_debug_level (arg, opts, loc);
+  break;
+
 case OPT_g:
   set_debug_level (NO_DEBUG, DEFAULT_GDB_EXTENSIONS, arg, opts, opts_set,

[PATCH,RFC,V4 5/5] Update CTF testsuite

2019-07-17 Thread Indu Bhagat
[Changes from V3]
Added new testcases
 - gcc.dg/debug/ctf/ctf-array-2.c
 - gcc.dg/debug/ctf/ctf-complex-1.c
 - gcc.dg/debug/ctf/ctf-enum-2.c
 - gcc.dg/debug/ctf/ctf-func-index-1.c
 - gcc.dg/debug/ctf/ctf-objt-index-1.c
 - gcc.dg/debug/ctf/ctf-skip-types-1.c

gcc/testsuite/ChangeLog:

* gcc.dg/debug/ctf/ctf-2.c: New test.
* gcc.dg/debug/ctf/ctf-anonymous-struct-1.c: Likewise.
* gcc.dg/debug/ctf/ctf-anonymous-union-1.c: Likewise.
* gcc.dg/debug/ctf/ctf-array-1.c: Likewise.
* gcc.dg/debug/ctf/ctf-array-2.c: Likewise.
* gcc.dg/debug/ctf/ctf-bitfields-1.c: Likewise.
* gcc.dg/debug/ctf/ctf-bitfields-2.c: Likewise.
* gcc.dg/debug/ctf/ctf-complex-1.c: Likewise.
* gcc.dg/debug/ctf/ctf-cvr-quals-1.c: Likewise.
* gcc.dg/debug/ctf/ctf-cvr-quals-2.c: Likewise.
* gcc.dg/debug/ctf/ctf-cvr-quals-3.c: Likewise.
* gcc.dg/debug/ctf/ctf-enum-1.c: Likewise.
* gcc.dg/debug/ctf/ctf-enum-2.c: Likewise.
* gcc.dg/debug/ctf/ctf-float-1.c: Likewise.
* gcc.dg/debug/ctf/ctf-forward-1.c: Likewise.
* gcc.dg/debug/ctf/ctf-forward-2.c: Likewise.
* gcc.dg/debug/ctf/ctf-func-index-1.c: Likewise.
* gcc.dg/debug/ctf/ctf-function-pointers-1.c: Likewise.
* gcc.dg/debug/ctf/ctf-functions-1.c: Likewise.
* gcc.dg/debug/ctf/ctf-int-1.c: Likewise.
* gcc.dg/debug/ctf/ctf-objt-index-1.c: Likewise.
* gcc.dg/debug/ctf/ctf-pointers-1.c: Likewise.
* gcc.dg/debug/ctf/ctf-skip-types-1.c: Likewise.
* gcc.dg/debug/ctf/ctf-str-table-1.c: Likewise.
* gcc.dg/debug/ctf/ctf-struct-1.c: Likewise.
* gcc.dg/debug/ctf/ctf-struct-2.c: Likewise.
* gcc.dg/debug/ctf/ctf-struct-array-1.c: Likewise.
* gcc.dg/debug/ctf/ctf-typedef-1.c: Likewise.
* gcc.dg/debug/ctf/ctf-typedef-struct-1.c: Likewise.
* gcc.dg/debug/ctf/ctf-union-1.c: Likewise.
* gcc.dg/debug/ctf/ctf-variables-1.c: Likewise.


---
 gcc/testsuite/ChangeLog| 34 +
 gcc/testsuite/gcc.dg/debug/ctf/ctf-2.c | 10 +
 .../gcc.dg/debug/ctf/ctf-anonymous-struct-1.c  | 23 +++
 .../gcc.dg/debug/ctf/ctf-anonymous-union-1.c   | 26 +
 gcc/testsuite/gcc.dg/debug/ctf/ctf-array-1.c   | 31 +++
 gcc/testsuite/gcc.dg/debug/ctf/ctf-array-2.c   | 39 +++
 gcc/testsuite/gcc.dg/debug/ctf/ctf-bitfields-1.c   | 30 +++
 gcc/testsuite/gcc.dg/debug/ctf/ctf-bitfields-2.c   | 39 +++
 gcc/testsuite/gcc.dg/debug/ctf/ctf-complex-1.c | 22 +++
 gcc/testsuite/gcc.dg/debug/ctf/ctf-cvr-quals-1.c   | 44 ++
 gcc/testsuite/gcc.dg/debug/ctf/ctf-cvr-quals-2.c   | 30 +++
 gcc/testsuite/gcc.dg/debug/ctf/ctf-cvr-quals-3.c   | 41 
 gcc/testsuite/gcc.dg/debug/ctf/ctf-enum-1.c| 21 +++
 gcc/testsuite/gcc.dg/debug/ctf/ctf-enum-2.c| 26 +
 gcc/testsuite/gcc.dg/debug/ctf/ctf-float-1.c   | 16 
 gcc/testsuite/gcc.dg/debug/ctf/ctf-forward-1.c | 36 ++
 gcc/testsuite/gcc.dg/debug/ctf/ctf-forward-2.c | 16 
 gcc/testsuite/gcc.dg/debug/ctf/ctf-func-index-1.c  | 25 
 .../gcc.dg/debug/ctf/ctf-function-pointers-1.c | 24 
 gcc/testsuite/gcc.dg/debug/ctf/ctf-functions-1.c   | 34 +
 gcc/testsuite/gcc.dg/debug/ctf/ctf-int-1.c | 17 +
 gcc/testsuite/gcc.dg/debug/ctf/ctf-objt-index-1.c  | 29 ++
 gcc/testsuite/gcc.dg/debug/ctf/ctf-pointers-1.c| 26 +
 gcc/testsuite/gcc.dg/debug/ctf/ctf-skip-types-1.c  | 33 
 gcc/testsuite/gcc.dg/debug/ctf/ctf-str-table-1.c   | 26 +
 gcc/testsuite/gcc.dg/debug/ctf/ctf-struct-1.c  | 25 
 gcc/testsuite/gcc.dg/debug/ctf/ctf-struct-2.c  | 30 +++
 .../gcc.dg/debug/ctf/ctf-struct-array-1.c  | 36 ++
 gcc/testsuite/gcc.dg/debug/ctf/ctf-typedef-1.c | 23 +++
 .../gcc.dg/debug/ctf/ctf-typedef-struct-1.c| 12 ++
 gcc/testsuite/gcc.dg/debug/ctf/ctf-union-1.c   | 14 +++
 gcc/testsuite/gcc.dg/debug/ctf/ctf-variables-1.c   | 25 
 32 files changed, 863 insertions(+)
 create mode 100644 gcc/testsuite/gcc.dg/debug/ctf/ctf-2.c
 create mode 100644 gcc/testsuite/gcc.dg/debug/ctf/ctf-anonymous-struct-1.c
 create mode 100644 gcc/testsuite/gcc.dg/debug/ctf/ctf-anonymous-union-1.c
 create mode 100644 gcc/testsuite/gcc.dg/debug/ctf/ctf-array-1.c
 create mode 100644 gcc/testsuite/gcc.dg/debug/ctf/ctf-array-2.c
 create mode 100644 gcc/testsuite/gcc.dg/debug/ctf/ctf-bitfields-1.c
 create mode 100644 gcc/testsuite/gcc.dg/debug/ctf/ctf-bitfields-2.c
 create mode 100644 gcc/testsuite/gcc.dg/debug/ctf/ctf-complex-1.c
 create mode 100644 gcc/testsuite/gcc.dg/debug/ctf/ctf-cvr-quals-1.c
 create mode 100644 gcc/testsuite/gcc.dg/debug/ctf/ctf-cvr-qual

[PATCH,RFC,V4 4/5] CTF generation for a single compilation unit

2019-07-17 Thread Indu Bhagat
For each translation unit, a CTF container (ctf_container_t) is used to
keep the CTF debug info.

- ctfout.c hosts the compiler facing routines for CTF generation and emission.
- ctfcreate.c contains the CTF format specific CTF creation routines.
- ctfutils.c contains helper routines for CTF creation.

[Changes from V3]
   - Bugfixes
   - Implementation for CTF function and object index sub-sections.
   - Skip types when CTF lacks representation for them.
   - CTF Compilation Unit name support (CU name).


gcc/ChangeLog:

* Makefile.in: Add new object files.
* ctfcreate.c: New file.
* ctfout.c (ctf_dtu_d_union_selector): New helper function for garbage
collection of dtd_u union in ctf_dtdef_t.
(ctfc_add_cuname): New function to add compilation unit name to CTF
container.
(ctf_dtdef_hash::hash): New function to generate hashkey for a CTF type
record.
(hash_dtd_tree_decl): New function.
(ctf_dtdef_hash::equal): Likewise.
(is_ctf_base_type): Likewise.
(get_cvr_quals_for_type): Likewise.
(get_type_name_string): Likewise.
(get_decl_name_string): Likewise.
(ctf_type_exists): Likewise.
(init_ctf_string_table): Likewise.
(new_ctf_container): Allocate contents of CTF container.
(delete_ctf_container): Cleanup contents of CTF container.
(init_ctf_sections): Update code comments regarding LTO.
(gen_ctf_base_type): New function.
(gen_ctf_pointer_type): Likewise.
(gen_ctf_array_type): Likewise.
(gen_ctf_forward_type): Likewise.
(gen_ctf_enum_const_list): Likewise.
(gen_ctf_enum_type): Likewise.
(gen_ctf_function_type): Likewise.
(gen_ctf_cvrquals): Likewise.
(gen_ctf_sou_type): Likewise.
(gen_ctf_typedef): Likewise.
(gen_ctf_variable): Likewise.
(gen_ctf_function): Likewise.
(gen_ctf_type): Likewise.
(gen_ctf_bitfield_type_for_decl): Likewise.
(gen_ctf_type_for_decl): Likewise.
(ctf_preprocess_var): Likewise.
(ctf_dvd_preprocess_cb): Likewise.
(ctf_dtd_preprocess_cb): Likewise.
(ctf_preprocess): Likewise.
(ctf_asm_preamble): Likewise.
(ctf_asm_stype): Likewise.
(ctf_asm_type): Likewise.
(ctf_asm_slice): Likewise.
(ctf_asm_array): Likewise.
(ctf_asm_varent): Likewise.
(ctf_asm_sou_lmember): Likewise.
(ctf_asm_sou_member): Likewise.
(ctf_asm_enum_const): Likewise.
(output_ctf_header): Output the CTF section if the CTF container is not
empty.
(output_ctf_obj_info): New function.
(output_ctf_func_info): Likewise.
(output_ctf_objtidx): Likewise.
(output_ctf_funcidx): Likewise.
(output_ctf_vars): Likewise.
(output_ctf_strs): Likewise.
(output_asm_ctf_sou_fields): Likewise.
(output_asm_ctf_enum_list): Likewise.
(output_asm_ctf_vlen_bytes): Likewise.
(output_asm_ctf_type): Likewise.
(output_ctf_types): Likewise.
(ctf_decl): Likewise.
(ctf_early_finish): Trigger CTF emission.
(ctf_early_global_decl): Invoke CTF generation function.
(ctfout_c_finalize): Add cleanup of CTF container.
* ctfout.h (typedef struct GTY): New data structures.
(struct ctf_dtdef_hash): CTF type structure hasher.
* ctfutils.c: New file.

include/ChangeLog:
 
* ctf.h: Sync with binutils.  Keep ctf_slice_t aligned.  Add CTF obj
index and func index section.

---
 gcc/ChangeLog |   70 +++
 gcc/Makefile.in   |2 +
 gcc/ctfcreate.c   |  531 
 gcc/ctfout.c  | 1811 -
 gcc/ctfout.h  |  317 +-
 gcc/ctfutils.c|  198 ++
 include/ChangeLog |5 +
 include/ctf.h |   58 +-
 8 files changed, 2942 insertions(+), 50 deletions(-)
 create mode 100644 gcc/ctfcreate.c
 create mode 100644 gcc/ctfutils.c

diff --git a/gcc/ctfcreate.c b/gcc/ctfcreate.c
new file mode 100644
index 000..f14ee69
--- /dev/null
+++ b/gcc/ctfcreate.c
@@ -0,0 +1,531 @@
+/* Functions to create and update CTF from GCC.
+   Copyright (C) 2019 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 3, or (at your option) any later
+version.
+
+GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+.  */
+
+/* Create CTF types.  The code is mostly adapted from l

wrap math.h for M_PI et al in target/i386 tests

2019-07-17 Thread Alexandre Oliva


Most but not all of the tests that expect M_PI, M_PI_2 and/or M_PI_4
to be defined in math.h explicitly exclude one target system that does
not satisfy this non-standard assumption.

This patch introduces a wrapper header that includes math.h and then
conditionally supplies the missing non-standard macro definitions.
With that, we can drop the dg-skip-if "no M_PI" exclusions.

Tested on x86_64-linux-gnu, with a regular math.h, and with a "manually
fixincluded" math.h so as to not define M_PI, M_PI_2 and M_PI_4.  Ok to
install?


for  gcc/testsuite/ChangeLog

* gcc.target/i386/math_m_pi.h: New.
* gcc.target/i386/sse4_1-round.h: Use it.
* gcc.target/i386/pr73350.c: Likewise.
* gcc.target/i386/avx512f-vfixupimmpd-2.c: Likewise.
* gcc.target/i386/avx512f-vfixupimmps-2.c: Likewise.
* gcc.target/i386/avx512f-vfixupimmsd-2.c: Likewise.
* gcc.target/i386/avx512f-vfixupimmss-2.c: Likewise.
* gcc.target/i386/avx512f-vfixupimmss-2.c: Likewise.
* gcc.target/i386/avx-ceil-sfix-2-vec.c: Likewise.  Drop
dg-skip-if "no M_PI".
* gcc.target/i386/avx-cvt-2-vec.c: Likewise.
* gcc.target/i386/avx-floor-sfix-2-vec.c: Likewise.
* gcc.target/i386/avx-rint-sfix-2-vec.c: Likewise.
* gcc.target/i386/avx-round-sfix-2-vec.c: Likewise.
* gcc.target/i386/avx512f-ceil-sfix-vec-1.c: Likewise.
* gcc.target/i386/avx512f-ceil-vec-1.c: Likewise.
* gcc.target/i386/avx512f-ceilf-sfix-vec-1.c: Likewise.
* gcc.target/i386/avx512f-ceilf-vec-1.c: Likewise.
* gcc.target/i386/avx512f-floor-sfix-vec-1.c: Likewise.
* gcc.target/i386/avx512f-floor-vec-1.c: Likewise.
* gcc.target/i386/avx512f-floorf-sfix-vec-1.c: Likewise.
* gcc.target/i386/avx512f-floorf-vec-1.c: Likewise.
* gcc.target/i386/avx512f-rint-sfix-vec-1.c: Likewise.
* gcc.target/i386/avx512f-rintf-sfix-vec-1.c: Likewise.
* gcc.target/i386/avx512f-round-sfix-vec-1.c: Likewise.
* gcc.target/i386/avx512f-roundf-sfix-vec-1.c: Likewise.
* gcc.target/i386/avx512f-trunc-vec-1.c: Likewise.
* gcc.target/i386/avx512f-truncf-vec-1.c: Likewise.
* gcc.target/i386/sse2-cvt-vec.c: Likewise.
* gcc.target/i386/sse4_1-ceil-sfix-vec.c: Likewise.
* gcc.target/i386/sse4_1-ceil-vec.c: Likewise.
* gcc.target/i386/sse4_1-ceilf-sfix-vec.c: Likewise.
* gcc.target/i386/sse4_1-ceilf-vec.c: Likewise.
* gcc.target/i386/sse4_1-floor-sfix-vec.c: Likewise.
* gcc.target/i386/sse4_1-floor-vec.c: Likewise.
* gcc.target/i386/sse4_1-floorf-sfix-vec.c: Likewise.
* gcc.target/i386/sse4_1-floorf-vec.c: Likewise.
* gcc.target/i386/sse4_1-rint-sfix-vec.c: Likewise.
* gcc.target/i386/sse4_1-rint-vec.c: Likewise.
* gcc.target/i386/sse4_1-rintf-sfix-vec.c: Likewise.
* gcc.target/i386/sse4_1-rintf-vec.c: Likewise.
* gcc.target/i386/sse4_1-round-sfix-vec.c: Likewise.
* gcc.target/i386/sse4_1-round-vec.c: Likewise.
* gcc.target/i386/sse4_1-roundf-sfix-vec.c: Likewise.
* gcc.target/i386/sse4_1-roundf-vec.c: Likewise.
* gcc.target/i386/sse4_1-roundsd-4.c: Likewise.
* gcc.target/i386/sse4_1-roundss-4.c: Likewise.
* gcc.target/i386/sse4_1-trunc-vec.c: Likewise.
* gcc.target/i386/sse4_1-truncf-vec.c: Likewise.
---
 .../gcc.target/i386/avx-ceil-sfix-2-vec.c  |3 +--
 gcc/testsuite/gcc.target/i386/avx-ceil-sfix-vec.c  |1 -
 gcc/testsuite/gcc.target/i386/avx-ceil-vec.c   |1 -
 gcc/testsuite/gcc.target/i386/avx-ceilf-sfix-vec.c |1 -
 gcc/testsuite/gcc.target/i386/avx-ceilf-vec.c  |1 -
 gcc/testsuite/gcc.target/i386/avx-cvt-2-vec.c  |3 +--
 gcc/testsuite/gcc.target/i386/avx-cvt-vec.c|1 -
 .../gcc.target/i386/avx-floor-sfix-2-vec.c |3 +--
 gcc/testsuite/gcc.target/i386/avx-floor-sfix-vec.c |1 -
 gcc/testsuite/gcc.target/i386/avx-floor-vec.c  |1 -
 .../gcc.target/i386/avx-floorf-sfix-vec.c  |1 -
 gcc/testsuite/gcc.target/i386/avx-floorf-vec.c |1 -
 .../gcc.target/i386/avx-rint-sfix-2-vec.c  |3 +--
 gcc/testsuite/gcc.target/i386/avx-rint-sfix-vec.c  |1 -
 gcc/testsuite/gcc.target/i386/avx-rint-vec.c   |1 -
 gcc/testsuite/gcc.target/i386/avx-rintf-sfix-vec.c |1 -
 gcc/testsuite/gcc.target/i386/avx-rintf-vec.c  |1 -
 .../gcc.target/i386/avx-round-sfix-2-vec.c |3 +--
 gcc/testsuite/gcc.target/i386/avx-round-sfix-vec.c |1 -
 gcc/testsuite/gcc.target/i386/avx-round-vec.c  |1 -
 .../gcc.target/i386/avx-roundf-sfix-vec.c  |1 -
 gcc/testsuite/gcc.target/i386/avx-roundf-vec.c |1 -
 gcc/testsuite/gcc.target/i386/avx-trunc-vec.c  |1 -
 gcc/testsuite/gcc.target/i386/avx-truncf-vec.c |1 -
 .../gcc.target/i386/avx512f-ceil-sfix-vec-1.c  |3 +--
 gcc/testsuite/gcc.target/i386/avx512f

[PATCH] Put more data in read-only data on hppa

2019-07-17 Thread John David Anglin
When generating non-PIC code on Linux, the linker support allows us to put both 
local
and global relocs in read-only data.  On HP-UX, we can put local relocs in 
read-only data.
We can also put constant data in read-only data on the HP-UX SOM target when 
generating
PIC code.

This patch implements the above.

Tested on hppa2.0w-hp-hpux11.11, hppa64-hp-hpux11.11 and hppa-unknown-linux-gnu.

Committed to trunk and gcc-9.

Dave
-- 
John David Anglin  dave.ang...@bell.net

2019-07-17  John David Anglin  

* config/pa/pa.c (pa_som_asm_init_sections): Don't force all constant
data into data section when generating PIC code.
(pa_select_section): Use pa_reloc_rw_mask() to qualify relocs.
(pa_reloc_rw_mask): Return 3 when generating PIC code and when
generating code for SOM targets earlier than HP-UX 11.  Otherwise,
return 2 for SOM and 0 for other targets.

Index: config/pa/pa.c
===
--- config/pa/pa.c  (revision 273480)
+++ config/pa/pa.c  (working copy)
@@ -9805,20 +9805,23 @@
   = get_unnamed_section (0, output_section_asm_op,
 "\t.SPACE $PRIVATE$\n\t.SUBSPA $TM_CLONE_TABLE$");

-  /* FIXME: HPUX ld generates incorrect GOT entries for "T" fixups
- which reference data within the $TEXT$ space (for example constant
+  /* HPUX ld generates incorrect GOT entries for "T" fixups which
+ reference data within the $TEXT$ space (for example constant
  strings in the $LIT$ subspace).

  The assemblers (GAS and HP as) both have problems with handling
- the difference of two symbols which is the other correct way to
+ the difference of two symbols.  This is the other correct way to
  reference constant data during PIC code generation.

- So, there's no way to reference constant data which is in the
- $TEXT$ space during PIC generation.  Instead place all constant
- data into the $PRIVATE$ subspace (this reduces sharing, but it
- works correctly).  */
-  readonly_data_section = flag_pic ? data_section : som_readonly_data_section;
+ Thus, we can't put constant data needing relocation in the $TEXT$
+ space during PIC generation.

+ Previously, we placed all constant data into the $DATA$ subspace
+ when generating PIC code.  This reduces sharing, but it works
+ correctly.  Now we rely on pa_reloc_rw_mask() for section selection.
+ This puts constant data not needing relocation into the $TEXT$ space.  */
+  readonly_data_section = som_readonly_data_section;
+
   /* We must not have a reference to an external symbol defined in a
  shared library in a readonly section, else the SOM linker will
  complain.
@@ -9850,7 +9853,7 @@
   && DECL_INITIAL (exp)
   && (DECL_INITIAL (exp) == error_mark_node
   || TREE_CONSTANT (DECL_INITIAL (exp)))
-  && !reloc)
+  && !(reloc & pa_reloc_rw_mask ()))
 {
   if (TARGET_SOM
  && DECL_ONE_ONLY (exp)
@@ -9859,7 +9862,8 @@
   else
return readonly_data_section;
 }
-  else if (CONSTANT_CLASS_P (exp) && !reloc)
+  else if (CONSTANT_CLASS_P (exp)
+  && !(reloc & pa_reloc_rw_mask ()))
 return readonly_data_section;
   else if (TARGET_SOM
   && TREE_CODE (exp) == VAR_DECL
@@ -9875,12 +9879,11 @@
 static int
 pa_reloc_rw_mask (void)
 {
-  /* We force (const (plus (symbol) (const_int))) to memory when the
- const_int doesn't fit in a 14-bit integer.  The SOM linker can't
- handle this construct in read-only memory and we want to avoid
- this for ELF.  So, we always force an RTX needing relocation to
- the data section.  */
-  return 3;
+  if (flag_pic || (TARGET_SOM && !TARGET_HPUX_11))
+return 3;
+
+  /* HP linker does not support global relocs in readonly memory.  */
+  return TARGET_SOM ? 2 : 0;
 }

 static void


Re: -Wmissing-attributes: avoid duplicates and false positives

2019-07-17 Thread Alexandre Oliva
On Jul 17, 2019, Martin Sebor  wrote:

> Sure, if it's worthwhile to you I think it's an improvement even
> if it doesn't fix a bug.  (In full disclosure I'm not empowered
> to formally approve bigger patches but I think cleanups like this
> can safely be committed as obvious.)

Thanks, I'm installing the patch below.

>> Does it make sense to put the testcase in anyway?

> If it isn't already covered by one of the existing tests I'd say
> definitely.  I also tried the following while playing with it so
> if this variation isn't being exercised either it might be worth
> adding to the new test as well:

Thanks, I added it to the new test



-Wmissing-attributes: check that we avoid duplicates and false positives

The initial patch for PR 81824 fixed various possibilities of
-Wmissing-attributes reporting duplicates and false positives.  The
test that avoided them was a little obscure, though, so this patch
rewrites it into a more self-evident form.

The patch also adds a testcase that already passed, but that
explicitly covers some of the possibilities of reporting duplicates
and false positives that preexisting tests did not cover.


for  gcc/ChangeLog

PR middle-end/81824
* attribs.c (decls_mismatched_attributes): Simplify the logic
that avoids duplicates and false positives.

for  gcc/testsuite/ChangeLog

PR middle-end/81824
* g++.dg/Wmissing-attributes-1.C: New.  Some of its fragments
are from Martin Sebor.
---
 gcc/attribs.c|   14 --
 gcc/testsuite/g++.dg/Wmissing-attributes-1.C |   66 ++
 2 files changed, 76 insertions(+), 4 deletions(-)
 create mode 100644 gcc/testsuite/g++.dg/Wmissing-attributes-1.C

diff --git a/gcc/attribs.c b/gcc/attribs.c
index 8e54016559723..f4777c6a82336 100644
--- a/gcc/attribs.c
+++ b/gcc/attribs.c
@@ -1931,15 +1931,19 @@ decls_mismatched_attributes (tree tmpl, tree decl, tree 
attrlist,
  if (!has_attribute (tmpls[j], tmpl_attrs[j], blacklist[i]))
continue;
 
+ bool found = false;
  unsigned kmax = 1 + !!decl_attrs[1];
  for (unsigned k = 0; k != kmax; ++k)
{
  if (has_attribute (decls[k], decl_attrs[k], blacklist[i]))
-   break;
-
- if (!k && kmax > 1)
-   continue;
+   {
+ found = true;
+ break;
+   }
+   }
 
+ if (!found)
+   {
  if (nattrs)
pp_string (attrstr, ", ");
  pp_begin_quote (attrstr, pp_show_color (global_dc->printer));
@@ -1947,6 +1951,8 @@ decls_mismatched_attributes (tree tmpl, tree decl, tree 
attrlist,
  pp_end_quote (attrstr, pp_show_color (global_dc->printer));
  ++nattrs;
}
+
+ break;
}
 }
 
diff --git a/gcc/testsuite/g++.dg/Wmissing-attributes-1.C 
b/gcc/testsuite/g++.dg/Wmissing-attributes-1.C
new file mode 100644
index 0..972e68305bb90
--- /dev/null
+++ b/gcc/testsuite/g++.dg/Wmissing-attributes-1.C
@@ -0,0 +1,66 @@
+// { dg-do compile }
+// { dg-options "-Wmissing-attributes" }
+
+#define ATTR(list)   __attribute__ (list)
+
+/* Type attributes are normally absent in template functions, and the
+   mere presence of any such attribute used to cause the
+   -Wmissing-attributes checks, that checked for attributes typically
+   associated with functions rather than types, to report any missing
+   attributes twice: once for the specialization attribute list, once
+   for its type attribute list.
+
+   This test uses both decl and type attributes to exercise the code
+   that avoids reporting duplicates, in ways that failed in the past
+   but that were not covered in other tests.  */
+typedef void* ATTR ((alloc_size (1))) f_type (int);
+
+template 
+f_type
+ATTR ((malloc))
+missing_malloc;// { dg-message "missing primary template attribute 
.malloc." }
+
+template <>
+f_type
+missing_malloc;  // { dg-warning "explicit specialization 
.\[^\n\r\]+. may be missing attributes" }
+
+
+/* Check that even an attribute that appears in both lists (decl and
+   type) in a template declaration is reported as missing only
+   once.  */
+
+template 
+f_type
+ATTR ((alloc_size (1))) // In both attr lists, decl's and type's.
+missing_alloc_size;// { dg-message "missing primary template 
attribute .alloc_size." }
+
+template <>
+void *
+missing_alloc_size(int); // { dg-warning "explicit specialization 
.\[^\n\r\]+. may be missing attributes" }
+
+
+/* Check that even an attribute that appears in both lists (decl and
+   type) is not reported as missing if it's present only in the type
+   list.  */
+
+template 
+f_type
+ATTR ((alloc_size (1))) // In both attr lists, decl's and type's.
+missing_nothing;
+
+template <>
+f_type
+missing_nothing;
+
+
+/* For completeness, check that a type attribute is matched by a decl
+   attribute in the specializa

[PATCH,fortran] Handle BOZ in accordance to Fortran 2018 standard

2019-07-17 Thread Steve Kargl
I will be away until Monday.  Plenty of time for a review.


TL;DR version:

The attached patch fixes the handling of a BOZ literal constant
in gfortran to conform to the F2018 standard.

Long version:

  Highlights:

  * No longer need to use -fno-range-check with BOZ
  * Fixed merge_bits
  * Eliminate a number of undocumented extension.
  * Added -fallow-invalid-boz option to allow BOZ in prohibited
contexts.  This option is used by the new function gfc_invalid_boz,
which issues an error for an invalid BOZ usage.  The option degrades
the error to warning, and may allow the code to compile.  The
result may not be what is expected based on older versions of
gfortran (e.g., see FLOAT).  The warning can be suppressed with
the -w option.
  * Deprecate the use of 'X' for an alias of 'Z'.
  * Deprecate postfix syntax (ie., '1234'z).

I believe the patch fixes all open PR's about BOZ with the 
possible exception of PR88227.  PR88227 is a clever combination
of -m64 and/or -m32 with the -fdefault-* options.  It should be
well-known that IHMO the -fdefault-* option should be deprecated.
So, I don't care if this is fixed or not. 

There is a long history of poor handling of a BOZ in the Fortran 
standard.  In revision 95643 (2005-02-27) I made gfortran conform
to the Fortran 95 standard's definition of a BOZ in a DATA statement
(which is the only context in which a BOZ can appear in a conforming
Fortran 95 program).  This is tantamount to converting a BOZ to the
widest available INTEGER on a target (i.e., INTEGER(8) or INTEGER(16))
when the BOZ is initially parsed.  This has the effect that a BOZ may
appear in any context where a integer literal constant may appear.
Gfortran contains a boat load of undocumented extensions.  The patch
removes most of these undocumented extensions (i.e., an error will 
be issued).

In F2008 and F2018, a BOZ is a ***typeless*** string of bits, which
does not have a kind type parameter.  When the string of bits is to
be converted to some quantity, a few things can happen.  If it is
too short, the string is padded with zeros.  If it is too long, it 
is truncated.  The handling of the sign bit is processor dependent.
I have introduced 2 functions in check.c (gfc_boz2int and gfc_boz2real)
to handle the conversion.  gfc_boz2int essentially converts a BOZ
into an unsigned integer, and then does two-complements wrap-around
to obtain negative values.  gfc_boz2real does the padding/truncation
as needed and then converts that BOZ into an intermediate widest
INTEGER, which is given to gfc_convert_boz in target-memory.c to 
do the actual conversion to a REAL.  Range checking has been removed
in gfc_convert_boz.

Along the way, I have deprecated the SHORT and LONG aliases for 
INT(x,2) and INT(x,4).  The primary reason for deprecation is that
LONG is documented to convert its argument to a C long.  Well, the
size of a C long depends on the target.

The code for deprecated items is still present and can sometimes
be used via the -fallow-invalid-boz option.  After 10.1 is released,
and if I am still contributing to gfortran, I will remove the code.
If someone feels strongly that a previous undocument extension 
should be retain, feel free to fix it after I commit the patch.
I think I've weighed the pros and cons, and have made prudent
decision of want to deprecate.

Finally, I will only respond to technical questions/comments.
Any non-technical questions/comments will be forwarded to /dev/null.


2019-07-16  Steven G. Kargl  

* arith.c (gfc_convert_integer, gfc_convert_real, gfc_convert_complex):
Move to ...
* primary.c (convert_integer, convert_real, convert_complex): ... here.
Rename and make static functions.
(match_integer_constant): Use convert_integer
(match_real_constant): Use convert_real.
(match_complex_constant: Use convert_complex.
* arith.h (gfc_convert_integer, gfc_convert_real, gfc_convert_complex):
Remove prototypes.
* array.c (match_array_cons_element): A BOZ cannot be a data 
statement value.  Jump to a common exit point.
* check.c (gfc_invalid_boz): New function.  Emit error or warning
for a BOZ in an invalid context.
(boz_args_check): Move to top of file to prevent need of forward
declaration.
(is_boz_constant): New function.  Check that BOZ expr is constant.
(gfc_boz2real): New function. In-place conversion of BOZ literal
constant to REAL in accordance to F2018.
(gfc_boz2int): New function. In-place conversion of BOZ literal
constant to INTEGER in accordance to F2018.
(gfc_check_achar, gfc_check_char, gfc_check_float): Use
gfc_invalid_boz.  Convert BOZ as needed.
(gfc_check_bge_bgt_ble_blt): Enforce F2018 requirements on BGE, 
BGT, BLE, and BLT intrinsic functions.
(gfc_check_cmplx): Re-organize to check kind, if present, first.
Convert BOZ real and/or

Go patch committed: Fix bug in unordered set when exporting

2019-07-17 Thread Ian Lance Taylor
This Go frontend patch by Than McIntosh fixes a bug in the handling of
unordered set during exporting.

In https://golang.org/cl/183850
(https://gcc.gnu.org/ml/gcc-patches/2019-07/msg00200.html) a change
was made to combine tracking/discovery of exported types and imported
packages during export data generation.  As a result of this
refactoring a bug was introduced: the new code can potentially insert
items into the exports set (an unordered_set) while iterating through
the same set, which is illegal according to the spec for
std::unordered_set.

This patch fixes the problem by changing the type discovery phase to
iterate through a separate list of sorted exports, as opposed to
iterating through the main unordered set.  Also included is a change
to fix the code that looks for variables that are referenced from
inlined routine bodies (this code wasn't scanning all of the function
that it needed to scan).

There is a new test case for this problem in https://golang.org/cl/186697.

This is for https://golang.org/issue/33020.

Bootstrapped and ran Go testsuite on x86_64-pc-linux-gnu.  Committed
to mainline.

Ian
Index: gcc/go/gofrontend/MERGE
===
--- gcc/go/gofrontend/MERGE (revision 273534)
+++ gcc/go/gofrontend/MERGE (working copy)
@@ -1,4 +1,4 @@
-0e51b7e9c03c6f6bc3d06343f2050f17349ccdc3
+19ed722fb3ae5e618c746da20efb79fc837337cd
 
 The first line of this file holds the git revision number of the last
 merge done from the gofrontend repository.
Index: gcc/go/gofrontend/export.cc
===
--- gcc/go/gofrontend/export.cc (revision 273534)
+++ gcc/go/gofrontend/export.cc (working copy)
@@ -111,7 +111,7 @@ class Collect_export_references : public
 : Traverse(traverse_expressions
| traverse_types),
   exp_(exp), exports_(exports), imports_(imports),
-  inline_fcn_worklist_(NULL)
+  inline_fcn_worklist_(NULL), exports_finalized_(false)
   { }
 
   // Initial entry point; performs a walk to expand the exports set.
@@ -121,7 +121,7 @@ class Collect_export_references : public
   // Second entry point (called after the method above), to find
   // all types referenced by exports.
   void
-  prepare_types();
+  prepare_types(const std::vector& sorted_exports);
 
  protected:
   // Override of parent class method.
@@ -141,6 +141,13 @@ class Collect_export_references : public
   traverse_named_type(Named_type*);
 
  private:
+
+  // Add a named object to the exports set (during expand_exports()).
+  // Returns TRUE if a new object was added to the exports set,
+  // FALSE otherwise.
+  bool
+  add_to_exports(Named_object*);
+
   // The exporter.
   Export* exp_;
   // The set of named objects to export.
@@ -152,6 +159,8 @@ class Collect_export_references : public
   // Worklist of functions we are exporting with inline bodies that need
   // to be checked.
   std::vector* inline_fcn_worklist_;
+  // Set to true if expand_exports() has been called and is complete.
+  bool exports_finalized_;
 };
 
 void
@@ -172,6 +181,18 @@ Collect_export_references::expand_export
}
 }
   this->inline_fcn_worklist_ = NULL;
+  this->exports_finalized_ = true;
+}
+
+bool
+Collect_export_references::add_to_exports(Named_object* no)
+{
+  std::pair ins =
+  this->exports_->insert(no);
+  // If the export list has been finalized, then we should not be
+  // adding anything new to the exports set.
+  go_assert(!this->exports_finalized_ || !ins.second);
+  return ins.second;
 }
 
 int
@@ -189,7 +210,7 @@ Collect_export_references::expression(Ex
   if (var_package != NULL)
 this->imports_->insert(var_package);
 
- this->exports_->insert(no);
+ this->add_to_exports(no);
  no->var_value()->set_is_referenced_by_inline();
}
   return TRAVERSE_CONTINUE;
@@ -210,17 +231,16 @@ Collect_export_references::expression(Ex
 
   if (this->inline_fcn_worklist_ != NULL)
 {
-  std::pair ins =
-  this->exports_->insert(no);
+  bool added = this->add_to_exports(no);
 
   if (no->is_function())
 no->func_value()->set_is_referenced_by_inline();
 
-  // If ins.second is false then this object was already in
+  // If 'added' is false then this object was already in
   // exports_, in which case it was already added to
   // check_inline_refs_ the first time we added it to exports_, so
   // we don't need to add it again.
-  if (ins.second
+  if (added
   && no->is_function()
   && no->func_value()->export_for_inlining())
 this->inline_fcn_worklist_->push_back(no);
@@ -238,11 +258,11 @@ Collect_export_references::expression(Ex
 // exported inline function from another package).
 
 void
-Collect_export_references::prepare_types()
+Collect_export_references::prepare_types(const std::vector& 

Fix failing tests after PR libstdc++/85965

2019-07-17 Thread François Dumont

Since commit 5d3695d03b7bdade9f4d05d2b those tests are failing.

    * testsuite/23_containers/unordered_map/48101_neg.cc: Adapt dg-error
    after PR libstdc++/85965 fix.
    * testsuite/23_containers/unordered_multimap/48101_neg.cc: Likewise.
    * testsuite/23_containers/unordered_multiset/48101_neg.cc: Likewise.
    * testsuite/23_containers/unordered_set/48101_neg.cc

It is quite trivial but I wonder if there is another plan to restore 
those static assertions differently.


Ok to commit ?

François

diff --git a/libstdc++-v3/testsuite/23_containers/unordered_map/48101_neg.cc b/libstdc++-v3/testsuite/23_containers/unordered_map/48101_neg.cc
index 8d823dfa476..77c0e9ce681 100644
--- a/libstdc++-v3/testsuite/23_containers/unordered_map/48101_neg.cc
+++ b/libstdc++-v3/testsuite/23_containers/unordered_map/48101_neg.cc
@@ -27,7 +27,4 @@ test01()
   c2.find(2); // { dg-error "here" }
 }
 
-// { dg-error "hash function must be invocable" "" { target *-*-* } 0 }
-// { dg-error "key equality predicate must be invocable" "" { target *-*-* } 0 }
-// { dg-prune-output "use of deleted function" }
 // { dg-prune-output "no match for call" }
diff --git a/libstdc++-v3/testsuite/23_containers/unordered_multimap/48101_neg.cc b/libstdc++-v3/testsuite/23_containers/unordered_multimap/48101_neg.cc
index a81615b3607..7db7dcb2b5d 100644
--- a/libstdc++-v3/testsuite/23_containers/unordered_multimap/48101_neg.cc
+++ b/libstdc++-v3/testsuite/23_containers/unordered_multimap/48101_neg.cc
@@ -27,7 +27,4 @@ test01()
   c2.find(2); // { dg-error "here" }
 }
 
-// { dg-error "hash function must be invocable" "" { target *-*-* } 0 }
-// { dg-error "key equality predicate must be invocable" "" { target *-*-* } 0 }
-// { dg-prune-output "use of deleted function" }
 // { dg-prune-output "no match for call" }
diff --git a/libstdc++-v3/testsuite/23_containers/unordered_multiset/48101_neg.cc b/libstdc++-v3/testsuite/23_containers/unordered_multiset/48101_neg.cc
index 03ddb898d6c..444ca210c58 100644
--- a/libstdc++-v3/testsuite/23_containers/unordered_multiset/48101_neg.cc
+++ b/libstdc++-v3/testsuite/23_containers/unordered_multiset/48101_neg.cc
@@ -29,8 +29,5 @@ test01()
 }
 
 // { dg-error "non-const, non-volatile value_type" "" { target *-*-* } 0 }
-// { dg-error "hash function must be invocable" "" { target *-*-* } 0 }
-// { dg-error "key equality predicate must be invocable" "" { target *-*-* } 0 }
-// { dg-prune-output "use of deleted function" }
 // { dg-prune-output "must have the same value_type as its allocator" }
 // { dg-prune-output "no match for call" }
diff --git a/libstdc++-v3/testsuite/23_containers/unordered_set/48101_neg.cc b/libstdc++-v3/testsuite/23_containers/unordered_set/48101_neg.cc
index e79d3769248..2c02620bccb 100644
--- a/libstdc++-v3/testsuite/23_containers/unordered_set/48101_neg.cc
+++ b/libstdc++-v3/testsuite/23_containers/unordered_set/48101_neg.cc
@@ -29,8 +29,5 @@ test01()
 }
 
 // { dg-error "non-const, non-volatile value_type" "" { target *-*-* } 0 }
-// { dg-error "hash function must be invocable" "" { target *-*-* } 0 }
-// { dg-error "key equality predicate must be invocable" "" { target *-*-* } 0 }
-// { dg-prune-output "use of deleted function" }
 // { dg-prune-output "must have the same value_type as its allocator" }
 // { dg-prune-output "no match for call" }



sized delete in _Temporary_buffer<>

2019-07-17 Thread François Dumont
As we adopted the sized deallocation in the new_allocator why not doing 
the same in _Temporary_buffer<>.


    * include/bits/stl_tempbuf.h (__detail::__return_temporary_buffer): 
New.

    (~_Temporary_buffer()): Use latter.
    (_Temporary_buffer(_FIterator, size_type)): Likewise.

Tested w/o activating sized deallocation. I'll try to run tests with 
this option activated.


Ok to commit ?

François

diff --git a/libstdc++-v3/include/bits/stl_tempbuf.h b/libstdc++-v3/include/bits/stl_tempbuf.h
index b6ad9ee6a46..bb7c2cd1334 100644
--- a/libstdc++-v3/include/bits/stl_tempbuf.h
+++ b/libstdc++-v3/include/bits/stl_tempbuf.h
@@ -63,6 +63,21 @@ namespace std _GLIBCXX_VISIBILITY(default)
 {
 _GLIBCXX_BEGIN_NAMESPACE_VERSION
 
+  namespace __detail
+  {
+template
+  inline void
+  __return_temporary_buffer(_Tp* __p,
+size_t __len __attribute__((__unused__)))
+  {
+#if __cpp_sized_deallocation
+	::operator delete(__p, __len);
+#else
+	::operator delete(__p);
+#endif
+  }
+  }
+
   /**
*  @brief Allocates a temporary buffer.
*  @param  __len  The number of objects of type Tp.
@@ -112,7 +127,6 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
 return_temporary_buffer(_Tp* __p)
 { ::operator delete(__p); }
 
-
   /**
*  This class is used in two places: stl_algo.h and ext/memory,
*  where it is wrapped as the temporary_buffer class.  See
@@ -165,7 +179,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
   ~_Temporary_buffer()
   {
 	std::_Destroy(_M_buffer, _M_buffer + _M_len);
-	std::return_temporary_buffer(_M_buffer);
+	std::__detail::__return_temporary_buffer(_M_buffer, _M_len);
   }
 
 private:
@@ -185,7 +199,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
 __ucr(_Pointer __first, _Pointer __last,
 	  _ForwardIterator __seed)
 {
-	  if(__first == __last)
+	  if (__first == __last)
 	return;
 
 	  _Pointer __cur = __first;
@@ -244,22 +258,23 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
 _Temporary_buffer(_ForwardIterator __seed, size_type __original_len)
 : _M_original_len(__original_len), _M_len(0), _M_buffer(0)
 {
-  __try
-	{
-	  std::pair __p(std::get_temporary_buffer<
-	value_type>(_M_original_len));
-	  _M_buffer = __p.first;
-	  _M_len = __p.second;
-	  if (_M_buffer)
-	std::__uninitialized_construct_buf(_M_buffer, _M_buffer + _M_len,
-	   __seed);
-	}
-  __catch(...)
+  std::pair __p(
+		std::get_temporary_buffer(_M_original_len));
+
+  if (__p.first)
 	{
-	  std::return_temporary_buffer(_M_buffer);
-	  _M_buffer = 0;
-	  _M_len = 0;
-	  __throw_exception_again;
+	  __try
+	{
+	  std::__uninitialized_construct_buf(__p.first, __p.first + __p.second,
+		 __seed);
+	  _M_buffer = __p.first;
+	  _M_len = __p.second;
+	}
+	  __catch(...)
+	{
+	  std::__detail::__return_temporary_buffer(__p.first, __p.second);
+	  __throw_exception_again;
+	}
 	}
 }
 


Re: [PATCH, rs6000] Support vrotr3 for int vector types

2019-07-17 Thread Kewen.Lin
Hi Segher,

on 2019/7/17 下午9:40, Segher Boessenkool wrote:
> Hi Kewen,
> 
> On Wed, Jul 17, 2019 at 04:32:15PM +0800, Kewen.Lin wrote:
>> Regression testing just launched, is it OK for trunk if it's bootstrapped
>> and regresstested on powerpc64le-unknown-linux-gnu?
> 
>> +;; Expanders for rotatert to make use of vrotl
>> +(define_expand "vrotr3"
>> +  [(set (match_operand:VEC_I 0 "vint_operand")
>> +(rotatert:VEC_I (match_operand:VEC_I 1 "vint_operand")
>> +  (match_operand:VEC_I 2 "vint_reg_or_const_vector")))]
> 
> Having any rotatert in a define_expand or define_insn will regress.
> 
> So, nope, sorry.
> 

Thanks for clarifying!  Since regression testing passed on powerpc64le,I'd like 
to double confirm the meaning of "regress", does it mean it's 
a regression from design view?  Is it specific to rotatert and its 
related one like vrotr? 

If yes, it sounds we can't go with vrotr way. :(


Thanks,
Kewen



Re: [RFC] Consider lrotate const rotation in vectorizer

2019-07-17 Thread Kewen.Lin
on 2019/7/17 下午6:37, Richard Biener wrote:
> On Tue, Jul 16, 2019 at 10:45 AM Kewen.Lin  wrote:
>>
>> Hi all,
>>
>> Based on the previous comments (thank you!), I tried to update the
>> handling in expander and vectorizer.  Middle-end optimizes lrotate
>> with const rotation count to rrotate all the time, it makes vectorizer
>> fail to vectorize if rrotate isn't supported on the target.  We can at
>> least teach it on const rotation count, the cost should be the same?
>> At the same time, the expander already tries to use the opposite
>> rotation optable for scalar, we can teach it to deal with vector as well.
>>
>> Is it on the right track and reasonable?
> 
> So you're basically fixing this up in the expander.  I think on
> the GIMPLE level you then miss to update tree-vect-generic.c?
> 

Thanks, I will update it.  Another question on variable rotation
number, where is the best place I can add additional cost in 
vectorizer (for negate + possible maskgen/and)?  Or to avoid this,
transform the stmt to several stmts with opposite direction
before vectorizer?

> I'm not sure if it makes sense to have both LROTATE_EXPR and
> RROTATE_EXPR on the GIMPLE level then (that CPUs only
> support one direction is natural though).  So maybe simply get
> rid of one?  

One maybe impractical idea to have ROTATE_EXPR to unify and use 
positive or negative for the direction?

> Its semantics are also nowhere documented
> (do we allow negative rotation amounts?  how are
> non-mode-precision entities rotated? etc.).
> 

I think negative rotation amount is ok, not sure non-mode-prec,
it's a good point we should guard it when would like to use 
the opposite direction.


Thanks,
Kewen



Re: [PATCH] Fix 3 generic vector lowering issues with VECTOR_BOOLEAN_TYPE_P SSA_NAMEs with scalar modes (PR tree-optimization/91157)

2019-07-17 Thread Bernhard Reutner-Fischer
On 17 July 2019 09:01:36 CEST, Jakub Jelinek  wrote:
>Hi!
>
>On the following testcase we end up with a comparison (EQ_EXPR in this
>case)
>with unsupported vector operands, but supported result (vector boolean
>type with scalar mode, i.e. the AVX512F-ish integer bitmask) and later
>a VEC_COND_EXPR which is also not supported by the optab and has the
>vector
>boolean type with scalar mode as the first operand.
>
>The last hunk makes sure that we don't just ignore lowering of the
>comparison
>when it has an integer bitmask result but unsupported vector operands.
>The expand_vector_comparison changes makes sure we lower the comparison
>properly into the integer bitmask and finally the
>expand_vector_condition
>changes makes sure we lower properly the VEC_COND_EXPR.
>
>Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?
>
>2019-07-17  Jakub Jelinek  
>
>   PR tree-optimization/91157
>   * tree-vect-generic.c (expand_vector_comparison): Handle lhs being
>   a vector boolean with scalar mode.
>   (expand_vector_condition): Handle first operand being a vector boolean
>   with scalar mode.
>   (expand_vector_operations_1): For comparisons, don't bail out early
>   if the return type is vector boolean with scalar mode, but comparison
>   operand type is not.
>
>   * gcc.target/i386/avx512f-pr91157.c: New test.
>   * gcc.target/i386/avx512bw-pr91157.c: New test.
>
>--- gcc/tree-vect-generic.c.jj 2019-07-04 00:18:37.063010439 +0200
>+++ gcc/tree-vect-generic.c2019-07-16 12:40:41.343059690 +0200
>@@ -382,8 +382,48 @@ expand_vector_comparison (gimple_stmt_it
>   tree t;
>   if (!expand_vec_cmp_expr_p (TREE_TYPE (op0), type, code)
>   && !expand_vec_cond_expr_p (type, TREE_TYPE (op0), code))
>-t = expand_vector_piecewise (gsi, do_compare, type,
>-   TREE_TYPE (TREE_TYPE (op0)), op0, op1, code);
>+{
>+  if (VECTOR_BOOLEAN_TYPE_P (type)
>+&& VECTOR_BOOLEAN_TYPE_P (type)

The above condition looks redundant, fwiw.
Did you mean to check op0?

thanks,

>+&& SCALAR_INT_MODE_P (TYPE_MODE (type))
>+&& known_lt (GET_MODE_BITSIZE (TYPE_MODE (type)),
>+ TYPE_VECTOR_SUBPARTS (type)
>+ * GET_MODE_BITSIZE (SCALAR_TYPE_MODE
>+  (TREE_TYPE (type)
>+  {
>+tree inner_type = TREE_TYPE (TREE_TYPE (op0));
>+tree part_width = TYPE_SIZE (inner_type);
>+tree index = bitsize_int (0);
>+int nunits = nunits_for_known_piecewise_op (TREE_TYPE (op0));
>+int prec = GET_MODE_PRECISION (SCALAR_TYPE_MODE (type));
>+tree ret_type = build_nonstandard_integer_type (prec, 1);
>+tree ret_inner_type = boolean_type_node;
>+int i;
>+location_t loc = gimple_location (gsi_stmt (*gsi));
>+t = build_zero_cst (ret_type);
>+
>+if (TYPE_PRECISION (ret_inner_type) != 1)
>+  ret_inner_type = build_nonstandard_integer_type (1, 1);
>+warning_at (loc, OPT_Wvector_operation_performance,
>+"vector operation will be expanded piecewise");
>+for (i = 0; i < nunits;
>+ i++, index = int_const_binop (PLUS_EXPR, index, part_width))
>+  {
>+tree a = tree_vec_extract (gsi, inner_type, op0, part_width,
>+   index);
>+tree b = tree_vec_extract (gsi, inner_type, op1, part_width,
>+   index);
>+tree result = gimplify_build2 (gsi, code, ret_inner_type, a,
>b);
>+t = gimplify_build3 (gsi, BIT_INSERT_EXPR, ret_type, t, result,
>+ bitsize_int (i));
>+  }
>+t = gimplify_build1 (gsi, VIEW_CONVERT_EXPR, type, t);
>+  }
>+  else
>+  t = expand_vector_piecewise (gsi, do_compare, type,
>+   TREE_TYPE (TREE_TYPE (op0)), op0, op1,
>+   code);
>+}
>   else
> t = NULL_TREE;
> 
>@@ -879,6 +919,7 @@ expand_vector_condition (gimple_stmt_ite
>   tree a1 = a;
>   tree a2 = NULL_TREE;
>   bool a_is_comparison = false;
>+  bool a_is_scalar_bitmask = false;
>   tree b = gimple_assign_rhs2 (stmt);
>   tree c = gimple_assign_rhs3 (stmt);
>   vec *v;
>@@ -942,6 +983,20 @@ expand_vector_condition (gimple_stmt_ite
>   warning_at (loc, OPT_Wvector_operation_performance,
> "vector condition will be expanded piecewise");
> 
>+  if (!a_is_comparison
>+  && VECTOR_BOOLEAN_TYPE_P (TREE_TYPE (a))
>+  && SCALAR_INT_MODE_P (TYPE_MODE (TREE_TYPE (a)))
>+  && known_lt (GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (a))),
>+ TYPE_VECTOR_SUBPARTS (TREE_TYPE (a))
>+ * GET_MODE_BITSIZE (SCALAR_TYPE_MODE
>+  (TREE_TYPE (TREE_TYPE (a))
>+{
>+  a_is_scalar_bitmask = true;
>+  int prec = GET_MODE_PRECISION (SCAL

Re: [PATCH] Fix 3 generic vector lowering issues with VECTOR_BOOLEAN_TYPE_P SSA_NAMEs with scalar modes (PR tree-optimization/91157)

2019-07-17 Thread Jakub Jelinek
On Thu, Jul 18, 2019 at 08:28:30AM +0200, Bernhard Reutner-Fischer wrote:
> >+  if (VECTOR_BOOLEAN_TYPE_P (type)
> >+  && VECTOR_BOOLEAN_TYPE_P (type)
> 
> The above condition looks redundant, fwiw.
> Did you mean to check op0?

It is redundant and I've already removed the second line yesterday.

Jakub