Re: [PATCH v2] c, c++: Implement -Wsizeof-array-div [PR91741]

2020-09-15 Thread Jakub Jelinek via Gcc-patches
On Mon, Sep 14, 2020 at 09:30:44PM -0400, Marek Polacek via Gcc-patches wrote:
> --- a/gcc/c/c-tree.h
> +++ b/gcc/c/c-tree.h
> @@ -147,6 +147,11 @@ struct c_expr
>   etc), so we stash a copy here.  */
>source_range src_range;
>  
> +  /* True iff the sizeof expression was enclosed in parentheses.
> + NB: This member is currently only initialized when .original_code
> + is a SIZEOF_EXPR.  ??? Add a default constructor to this class.  */
> +  bool parenthesized_p;
> +
>/* Access to the first and last locations within the source spelling
>   of this expression.  */
>location_t get_start () const { return src_range.m_start; }

I think a magic tree code would be better, c_expr is used in too many places
and returned by many functions, so it is copied over and over.
Even if you must add it, it would be better to change the struct layout,
because right now there are fields: tree, location_t, tree, 2xlocation_t,
which means 32-bit gap on 64-bit hosts before the second tree, so the new
field would fit in there.  But, if it is mostly uninitialized, it is kind of
unclean.

Jakub



Re: [RS6000] rotate and mask constants

2020-09-15 Thread Alan Modra via Gcc-patches
On Tue, Sep 15, 2020 at 10:49:46AM +0930, Alan Modra wrote:
> Implement more two insn constants.

And tests.  rot_cst1 checks the values generated, rot_cst2 checks
instruction count.

* gcc.target/powerpc/rot_cst.h,
* gcc.target/powerpc/rot_cst1.c,
* gcc.target/powerpc/rot_cst2.c: New tests.

diff --git a/gcc/testsuite/gcc.target/powerpc/rot_cst.h 
b/gcc/testsuite/gcc.target/powerpc/rot_cst.h
new file mode 100644
index 000..0d100d61233
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/rot_cst.h
@@ -0,0 +1,269 @@
+unsigned long long __attribute__ ((__noinline__, __noclone__))
+c1 (void)
+{
+  return 0xc000ULL;
+}
+
+unsigned long long __attribute__ ((__noinline__, __noclone__))
+c2 (void)
+{
+  return 0xc00ULL;
+}
+
+unsigned long long __attribute__ ((__noinline__, __noclone__))
+c3 (void)
+{
+  return 0xc0ULL;
+}
+
+unsigned long long __attribute__ ((__noinline__, __noclone__))
+c4 (void)
+{
+  return 0xcULL;
+}
+
+unsigned long long __attribute__ ((__noinline__, __noclone__))
+c5 (void)
+{
+  return 0xc000ULL;
+}
+
+unsigned long long __attribute__ ((__noinline__, __noclone__))
+c6 (void)
+{
+  return 0xc00ULL;
+}
+
+unsigned long long __attribute__ ((__noinline__, __noclone__))
+c7 (void)
+{
+  return 0xc0ULL;
+}
+
+unsigned long long __attribute__ ((__noinline__, __noclone__))
+c8 (void)
+{
+  return 0xcULL;
+}
+
+unsigned long long __attribute__ ((__noinline__, __noclone__))
+c9 (void)
+{
+  return 0xc000ULL;
+}
+
+unsigned long long __attribute__ ((__noinline__, __noclone__))
+c10 (void)
+{
+  return 0xc00ULL;
+}
+
+unsigned long long __attribute__ ((__noinline__, __noclone__))
+c11 (void)
+{
+  return 0xc0ULL;
+}
+
+unsigned long long __attribute__ ((__noinline__, __noclone__))
+c12 (void)
+{
+  return 0xcULL;
+}
+
+unsigned long long __attribute__ ((__noinline__, __noclone__))
+c13 (void)
+{
+  return 0xc000ULL;
+}
+
+unsigned long long __attribute__ ((__noinline__, __noclone__))
+c14 (void)
+{
+  return 0xc00ULL;
+}
+
+unsigned long long __attribute__ ((__noinline__, __noclone__))
+c15 (void)
+{
+  return 0xc0ULL;
+}
+
+unsigned long long __attribute__ ((__noinline__, __noclone__))
+c16 (void)
+{
+  return 0xcULL;
+}
+
+unsigned long long __attribute__ ((__noinline__, __noclone__))
+b1 (void)
+{
+  return 0xbfffULL;
+}
+
+unsigned long long __attribute__ ((__noinline__, __noclone__))
+b2 (void)
+{
+  return 0xbffULL;
+}
+
+unsigned long long __attribute__ ((__noinline__, __noclone__))
+b3 (void)
+{
+  return 0xbfULL;
+}
+
+unsigned long long __attribute__ ((__noinline__, __noclone__))
+b4 (void)
+{
+  return 0xbULL;
+}
+
+unsigned long long __attribute__ ((__noinline__, __noclone__))
+b5 (void)
+{
+  return 0xbfffULL;
+}
+
+unsigned long long __attribute__ ((__noinline__, __noclone__))
+b6 (void)
+{
+  return 0xbffULL;
+}
+
+unsigned long long __attribute__ ((__noinline__, __noclone__))
+b7 (void)
+{
+  return 0xbfULL;
+}
+
+unsigned long long __attribute__ ((__noinline__, __noclone__))
+b8 (void)
+{
+  return 0xbULL;
+}
+
+unsigned long long __attribute__ ((__noinline__, __noclone__))
+b9 (void)
+{
+  return 0xbfffULL;
+}
+
+unsigned long long __attribute__ ((__noinline__, __noclone__))
+b10 (void)
+{
+  return 0xbffULL;
+}
+
+unsigned long long __attribute__ ((__noinline__, __noclone__))
+b11 (void)
+{
+  return 0xbfULL;
+}
+
+unsigned long long __attribute__ ((__noinline__, __noclone__))
+b12 (void)
+{
+  return 0xbULL;
+}
+
+unsigned long long __attribute__ ((__noinline__, __noclone__))
+b13 (void)
+{
+  return 0xbfffULL;
+}
+
+unsigned long long __attribute__ ((__noinline__, __noclone__))
+b14 (void)
+{
+  return 0xbffULL;
+}
+
+unsigned long long __attribute__ ((__noinline__, __noclone__))
+b15 (void)
+{
+  return 0xbfULL;
+}
+
+unsigned long long __attribute__ ((__noinline__, __noclone__))
+b16 (void)
+{
+  return 0xbULL;
+}
+
+unsigned long long __attribute__ ((__noinline__, __noclone__))
+r1 (void)
+{
+  return -0x124ULL << 48;
+}
+
+unsigned long long __attribute__ ((__noinline__, __noclone__))
+r2 (void)
+{
+  return -0x124ULL << 44;
+}
+
+unsigned long long __attribute__ ((__noinline__, __noclone__))
+r3 (void)
+{
+  return -0x124ULL << 40;
+}
+
+unsigned long long __attribute__ ((__noinline__, __noclone__))
+r4 (void)
+{
+  return -0x124ULL << 32;
+}
+
+unsigned long long __attribute__ ((__noinline__, __noclone__))
+r5 (void)
+{
+  return -0x124ULL << 28;
+}
+
+unsigned long long __attribute__ ((__noinline__, __noclone__))
+r6 (void)
+{
+  return -0x124ULL << 24;
+}
+
+unsigned long long __attribute__ ((__noinline__, __noclone__))
+r7 (void)
+{
+  return -0x124ULL << 20;
+}
+
+unsigned long long __attribute__ ((__noinline__, __noclone__))
+r8 (void)
+{
+  return -0x124ULL << 16;
+}
+
+unsigned long long __attribute__ ((__noinline__, __noclone__))
+r9 (void)
+{
+ 

Re: Ping: [PATCH 2/2 V3] Simplify plusminus-mult-with-convert expr in forwprop (PR 94234)

2020-09-15 Thread Richard Biener via Gcc-patches
On Mon, Sep 14, 2020 at 5:19 AM Feng Xue OS via Gcc-patches
 wrote:
>
> Thanks,
> Feng
>
> 
> From: Feng Xue OS 
> Sent: Thursday, September 3, 2020 5:29 PM
> To: Richard Biener; gcc-patches@gcc.gnu.org
> Subject: Re: [PATCH 2/2 V3] Simplify plusminus-mult-with-convert expr in 
> forwprop (PR 94234)
>
> Attach patch file.
>
> Feng
> 
> From: Gcc-patches  on behalf of Feng Xue OS 
> via Gcc-patches 
> Sent: Thursday, September 3, 2020 5:27 PM
> To: Richard Biener; gcc-patches@gcc.gnu.org
> Subject: [PATCH 2/2 V3] Simplify plusminus-mult-with-convert expr in forwprop 
> (PR 94234)
>
> This patch is to handle simplification of plusminus-mult-with-convert 
> expression
> as ((T) X) +- ((T) Y), in which at least one of (X, Y) is result of 
> multiplication.
> This is done in forwprop pass. We try to transform it to (T) (X +- Y), and 
> resort
> to gimple-matcher to fold (X +- Y) instead of manually code pattern 
> recognition.

I still don't like the complete new function with all its correctness
issues - the existing
fold_plusminus_mult_expr was difficult enough to get correct for
corner cases and
we do have a set of match.pd patterns (partly?) implementing its transforms.

Looking at

+unsigned goo (unsigned m_param, unsigned n_param)
+{
+  unsigned b1 = m_param * (n_param + 2);
+  unsigned b2 = m_param * (n_param + 1);
+  int r = (int)(b1) - (int)(b2);

it seems we want to simplify (signed)A - (signed)B to
(signed)(A - B) if A - B "simplifies"?  I guess

(simplify
  (plusminus (nop_convert @0) (nop_convert? @1))
  (convert (plusminus! @0 @1)))

probably needs a swapped pattern or not iterate over plus/minus
to handle at least one converted operand and avoid adding
a (plus @0 @1) -> (convert (plus! @0 @1)) rule.

Even

(simplify
 (minus (nop_convert @0) (nop_convert @1))
 (convert (minus! @0 @1)))

seems to handle all your testcases already (which means
they are all the same and not very exhaustive...)

Richard.


> Regards,
> Feng
> ---
> 2020-09-03  Feng Xue  
>
> gcc/
> PR tree-optimization/94234
> * tree-ssa-forwprop.c (simplify_plusminus_mult_with_convert): New
> function.
> (fwprop_ssa_val): Move it before its new caller.
> (pass_forwprop::execute): Add call to
> simplify_plusminus_mult_with_convert.
>
> gcc/testsuite/
> PR tree-optimization/94234
> * gcc.dg/pr94234-3.c: New test.


[committed] i386: Fix up vector mul and div with broadcasts in -masm=intel mode [PR97028]

2020-09-15 Thread Jakub Jelinek via Gcc-patches
Hi!

These patterns printed bogus <>s around the {1to16} and similar strings.

Fixed thusly, bootstrapped/regtested on x86_64-linux and i686-linux,
committed to trunk as obvious.  Will do backports momentarily.

2020-09-15  Jakub Jelinek  

PR target/97028
* config/i386/sse.md (*mul3_bcs,
*_div3_bcst): Use  instead of
<>.

* gcc.target/i386/avx512f-pr97028.c: Untested fix.

--- gcc/config/i386/sse.md.jj   2020-09-12 13:36:42.0 +0200
+++ gcc/config/i386/sse.md  2020-09-14 09:21:53.378315534 +0200
@@ -1867,7 +1867,7 @@ (define_insn "*mul3_bcs
 (match_operand: 1 "memory_operand" "m"))
  (match_operand:VF_AVX512 2 "register_operand" "v")))]
   "TARGET_AVX512F && "
-  "vmul\t{%1, %2, 
%0|%0, %2, %1<>}"
+  "vmul\t{%1, %2, 
%0|%0, %2, %1}"
   [(set_attr "prefix" "evex")
(set_attr "type" "ssemul")
(set_attr "mode" "")])
@@ -1960,7 +1960,7 @@ (define_insn "*_div3 2 "memory_operand" "m"]
   "TARGET_AVX512F && "
-  "vdiv\t{%2, %1, 
%0|%0, %1, %2<>}"
+  "vdiv\t{%2, %1, 
%0|%0, %1, %2}"
   [(set_attr "prefix" "evex")
 (set_attr "type" "ssediv")
(set_attr "mode" "")])
--- gcc/testsuite/gcc.target/i386/avx512f-pr97028.c.jj  2020-09-14 
09:38:13.522659890 +0200
+++ gcc/testsuite/gcc.target/i386/avx512f-pr97028.c 2020-09-14 
09:37:36.952206164 +0200
@@ -0,0 +1,18 @@
+/* PR target/97028 */
+/* { dg-do assemble { target avx512f } } */
+/* { dg-require-effective-target masm_intel } */
+/* { dg-options "-O2 -mavx512f -masm=intel" } */
+
+#include 
+
+__m512
+foo (__m512 x, float *y)
+{
+  return _mm512_mul_ps (x, _mm512_set1_ps (*y));
+}
+
+__m512
+bar (__m512 x, float *y)
+{
+  return _mm512_div_ps (x, _mm512_set1_ps (*y));
+}

Jakub



Re: [PATCH 3/4 v3] ivopts: Consider cost_step on different forms during unrolling

2020-09-15 Thread Kewen.Lin via Gcc-patches
Hi Hans,

on 2020/9/6 上午10:47, Hans-Peter Nilsson wrote:
> On Tue, 1 Sep 2020, Bin.Cheng via Gcc-patches wrote:
>>> Great idea!  With explicitly specified -funroll-loops, it's bootstrapped
>>> but the regression testing did show one failure (the only one):
>>>
>>>   PASS->FAIL: gcc.dg/sms-4.c scan-rtl-dump-times sms "SMS succeeded" 1
>>>
>>> It exposes two issues:
>>>
>>> 1) Currently address_cost hook on rs6000 always return zero, but at least
>>> from Power7, pre_inc/pre_dec kind instructions are cracked, it means we
>>> have to take the address update into account (scalar normal operation).
>>> Since IVOPTs reduces the cost_step for ainc candidates, it makes us prefer
>>> ainc candidates.  In this case, the cand/group cost is -4 (minus cost_step),
>>> with scaling up, the off becomes much.  With one simple hack on for pre_inc/
>>> pre_dec in rs6000 address_cost, the case passed.  It should be handled in
>>> one separated issue.
>>>
>>> 2) This case makes me think we should exclude ainc candidates in function
>>> mark_reg_offset_candidates.  The justification is that: ainc candidate
>>> handles step update itself and when we calculate the cost for it against
>>> its ainc_use, the cost_step has been reduced. When unrolling happens,
>>> the ainc computations are replicated and it doesn't save step updates
>>> like normal reg_offset_p candidates.
>> Though auto-inc candidate embeds stepping operation into memory
>> access, we might want to avoid it in case of unroll if there are many
>> sequences of memory accesses, and if the unroll factor is big.  The
>> rationale is embedded stepping is a u-arch operation and does have its
>> cost.
> 
> Forgive me for barging in here (though the context is powerpc,
> the dialogue and the patch seems to be generic ivopts), but
> that's not a general remark I hope, about auto-inc (always)
> having a cost?
> 
> For some architectures, auto-inc *is* free, as free as
> register-indirect, so the more auto-inc use, the better.  All
> this should be reflected by the address-cost, IMHO, and not
> hardcoded into ivopts.
> 

Yeah, now ivopts doesn't hardcode the cost for auto-inc (always),
instead it allows targets to set its cost by themselves through
address_cost hook.  As the function get_address_cost_ainc, it
checks auto-inc operations supported or not and set the cost
as address_cost hook further.

One example on Power is listed as below:

Group 0:
  cand  costcompl.  inv.expr.   inv.vars
  1 4   1   NIL;1
  3 0   0   NIL;NIL;
  4 0   1   NIL;1
  5 0   1   NIL;NIL;
  130   1   NIL;NIL;
  18-4  0   NIL;NIL;

Cand 18 is one auto-inc candidate, whose group 0/cand cost is
-4 (minus step_cost), the iv_cost of cand 18 is 5 (step_cost +
non-original_iv cost), when it's selected, the step_cost parts
counteract, the remaining cost (1) is for non-original iv,
it shows it doesn't put any hardcoded cost to this ainc_cost
candidate.

I guess some misunderstanding was derived from some discussion
above.  Sorry if some of my previous comments misled you.

BR,
Kewen


PING^2 [PATCH 1/4] unroll: Add middle-end unroll factor estimation

2020-09-15 Thread Kewen.Lin via Gcc-patches
Hi,

Gentle ping this:

https://gcc.gnu.org/pipermail/gcc-patches/2020-May/546698.html

BR,
Kewen

on 2020/8/31 下午1:49, Kewen.Lin via Gcc-patches wrote:
> Hi,
> 
> I'd like to gentle ping this since IVOPTs part is already to land.
> 
> https://gcc.gnu.org/pipermail/gcc-patches/2020-May/546698.html
> 
> BR,
> Kewen
> 
> on 2020/5/28 下午8:19, Kewen.Lin via Gcc-patches wrote:
>>
>> gcc/ChangeLog
>>
>> 2020-MM-DD  Kewen Lin  
>>
>>  * cfgloop.h (struct loop): New field estimated_unroll.
>>  * tree-ssa-loop-manip.c (decide_unroll_const_iter): New function.
>>  (decide_unroll_runtime_iter): Likewise.
>>  (decide_unroll_stupid): Likewise.
>>  (estimate_unroll_factor): Likewise.
>>  * tree-ssa-loop-manip.h (estimate_unroll_factor): New declaration.
>>  * tree-ssa-loop.c (tree_average_num_loop_insns): New function.
>>  * tree-ssa-loop.h (tree_average_num_loop_insns): New declaration.
>>


[PATCH] c++: Fix up default initialization with consteval default ctor [PR96994]

2020-09-15 Thread Jakub Jelinek via Gcc-patches
Hi!

The following testcase is miscompiled (in particular the a and i
initialization).  The problem is that build_special_member_call due to
the immediate constructors (but not evaluated in constant expression mode)
doesn't create a CALL_EXPR, but returns a TARGET_EXPR with CONSTRUCTOR
as the initializer for it, and then expand_default_init just emits
the returned statement, but this one doesn't have any side-effects and does
nothing.  There is an if to handle constexpr ctors which emits an INIT_EXPR
but constexpr ctors still show up as CALL_EXPR and need to be manually
evaluated to constant expressions (if possible).

The following patch fixes that, though I'm not sure about several things.
One is that the earlier if also has expr == true_exp && in the condition,
not sure if we want it in this case or not.
Another is that for delegating constructors, we emit two separate calls
and build_if_in_charge them together.  Not sure if consteval could come into
play in that case.

Bootstrapped/regtested on x86_64-linux and i686-linux.

2020-09-15  Jakub Jelinek  

PR c++/96994
* init.c (expand_default_init): If rval is TARGET_EXPR with
TREE_CONSTANT TARGET_EXPR_INITIAL, emit INIT_EXPR.

* g++.dg/cpp2a/consteval18.C: New test.

--- gcc/cp/init.c.jj2020-09-10 11:24:05.019805303 +0200
+++ gcc/cp/init.c   2020-09-14 15:06:59.467341241 +0200
@@ -1999,6 +1999,9 @@ expand_default_init (tree binfo, tree tr
rval = build2 (INIT_EXPR, type, exp, e);
}
 }
+  else if (TREE_CODE (rval) == TARGET_EXPR
+  && TREE_CONSTANT (TARGET_EXPR_INITIAL (rval)))
+rval = build2 (INIT_EXPR, type, exp, rval);
 
   /* FIXME put back convert_to_void?  */
   if (TREE_SIDE_EFFECTS (rval))
--- gcc/testsuite/g++.dg/cpp2a/consteval18.C.jj 2020-09-14 15:12:50.036282784 
+0200
+++ gcc/testsuite/g++.dg/cpp2a/consteval18.C2020-09-14 15:12:42.834386644 
+0200
@@ -0,0 +1,26 @@
+// PR c++/96994
+// { dg-do run { target c++20 } }
+
+struct A { consteval A () { i = 1; } consteval A (int x) : i (x) {} int i = 0; 
};
+struct B { constexpr B () { i = 1; } constexpr B (int x) : i (x) {} int i = 0; 
};
+A const a;
+constexpr A b;
+B const c;
+A const constinit d;
+A const e = 2;
+constexpr A f = 3;
+B const g = 4;
+A const constinit h = 5;
+A i;
+B j;
+A k = 6;
+B l = 7;
+static_assert (b.i == 1 && f.i == 3);
+
+int
+main()
+{
+  if (a.i != 1 || c.i != 1 || d.i != 1 || e.i != 2 || g.i != 4 || h.i != 5
+  || i.i != 1 || j.i != 1 || k.i != 6 || l.i != 7)
+__builtin_abort ();
+}

Jakub



[PATCH v3] C-SKY: Support -mfloat-abi=hard.

2020-09-15 Thread Jojo R
gcc/ChangeLog:

* config/csky/csky.md (CSKY_NPARM_FREGS): New.
(call_value_internal_vs/d): New.
(untyped_call): New.
* config/csky/csky.h (TARGET_SINGLE_FPU): New.
(TARGET_DOUBLE_FPU): New.
(FUNCTION_VARG_REGNO_P): New.
(CSKY_VREG_MODE_P): New.
(FUNCTION_VARG_MODE_P): New.
(CUMULATIVE_ARGS): Add extra regs info.
(INIT_CUMULATIVE_ARGS): Use csky_init_cumulative_args.
(FUNCTION_ARG_REGNO_P): Use FUNCTION_VARG_REGNO_P.
* config/csky/csky-protos.h (csky_init_cumulative_args): Extern.
* config/csky/csky.c (csky_cpu_cpp_builtins): Support 
TARGET_HARD_FLOAT_ABI.
(csky_function_arg): Likewise.
(csky_num_arg_regs): Likewise.
(csky_function_arg_advance): Likewise.
(csky_function_value): Likewise.
(csky_libcall_value): Likewise.
(csky_function_value_regno_p): Likewise.
(csky_arg_partial_bytes): Likewise.
(csky_setup_incoming_varargs): Likewise.
(csky_init_cumulative_args): New.

gcc/testsuite/ChangeLog:

* gcc.dg/builtin-apply2.c : Skip if CSKY.
* gcc.dg/torture/stackalign/builtin-apply-2.c : Likewise.

---
 gcc/config/csky/csky-protos.h  |  2 +
 gcc/config/csky/csky.c | 96 +++---
 gcc/config/csky/csky.h | 34 ++--
 gcc/config/csky/csky.md| 84 +++
 gcc/testsuite/gcc.dg/builtin-apply2.c  |  2 +-
 .../gcc.dg/torture/stackalign/builtin-apply-2.c|  2 +-
 6 files changed, 200 insertions(+), 20 deletions(-)

diff --git a/gcc/config/csky/csky-protos.h b/gcc/config/csky/csky-protos.h
index cc1a033..2c02399 100644
--- a/gcc/config/csky/csky-protos.h
+++ b/gcc/config/csky/csky-protos.h
@@ -68,4 +68,6 @@ extern int csky_compute_pushpop_length (rtx *);
 
 extern int csky_default_branch_cost (bool, bool);
 extern bool csky_default_logical_op_non_short_circuit (void);
+
+extern void csky_init_cumulative_args (CUMULATIVE_ARGS *, tree, rtx, tree);
 #endif /* GCC_CSKY_PROTOS_H */
diff --git a/gcc/config/csky/csky.c b/gcc/config/csky/csky.c
index 7ba3ed3..8463d8f 100644
--- a/gcc/config/csky/csky.c
+++ b/gcc/config/csky/csky.c
@@ -328,6 +328,16 @@ csky_cpu_cpp_builtins (cpp_reader *pfile)
 {
   builtin_define ("__csky_hard_float__");
   builtin_define ("__CSKY_HARD_FLOAT__");
+  if (TARGET_HARD_FLOAT_ABI)
+   {
+ builtin_define ("__csky_hard_float_abi__");
+ builtin_define ("__CSKY_HARD_FLOAT_ABI__");
+   }
+  if (TARGET_SINGLE_FPU)
+   {
+ builtin_define ("__csky_hard_float_fpu_sf__");
+ builtin_define ("__CSKY_HARD_FLOAT_FPU_SF__");
+   }
 }
   else
 {
@@ -1790,9 +1800,22 @@ static rtx
 csky_function_arg (cumulative_args_t pcum_v, const function_arg_info &arg)
 {
   CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
+  int reg = pcum->reg;
+  machine_mode mode = arg.mode;
 
-  if (*pcum < CSKY_NPARM_REGS)
-return gen_rtx_REG (arg.mode, CSKY_FIRST_PARM_REGNUM + *pcum);
+  if (FUNCTION_VARG_MODE_P(mode)
+  && !pcum->is_stdarg)
+{
+  reg = pcum->freg;
+
+  if (reg < CSKY_NPARM_FREGS)
+   return gen_rtx_REG (mode, CSKY_FIRST_VFP_REGNUM + reg);
+  else
+   return NULL_RTX;
+}
+
+  if (reg < CSKY_NPARM_REGS)
+return gen_rtx_REG (mode, CSKY_FIRST_PARM_REGNUM + reg);
 
   return NULL_RTX;
 }
@@ -1802,7 +1825,7 @@ csky_function_arg (cumulative_args_t pcum_v, const 
function_arg_info &arg)
MODE and TYPE.  */
 
 static int
-csky_num_arg_regs (machine_mode mode, const_tree type)
+csky_num_arg_regs (machine_mode mode, const_tree type, bool is_stdarg)
 {
   int size;
 
@@ -1811,6 +1834,14 @@ csky_num_arg_regs (machine_mode mode, const_tree type)
   else
 size = GET_MODE_SIZE (mode);
 
+  if (TARGET_HARD_FLOAT_ABI
+  && !is_stdarg)
+{
+  if (CSKY_VREG_MODE_P(mode)
+ && !TARGET_SINGLE_FPU)
+   return ((CSKY_NUM_WORDS (size) + 1) / 2);
+}
+
   return CSKY_NUM_WORDS (size);
 }
 
@@ -1822,12 +1853,23 @@ csky_function_arg_advance (cumulative_args_t pcum_v,
   const function_arg_info &arg)
 {
   CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
-  int param_size = csky_num_arg_regs (arg.mode, arg.type);
+  int *reg = &pcum->reg;
+  machine_mode mode = arg.mode;
 
-  if (*pcum + param_size > CSKY_NPARM_REGS)
-*pcum = CSKY_NPARM_REGS;
+  int param_size = csky_num_arg_regs (mode, arg.type, pcum->is_stdarg);
+  int param_regs_nums = CSKY_NPARM_REGS;
+
+  if (FUNCTION_VARG_MODE_P(mode)
+  && !pcum->is_stdarg)
+{
+  reg = &pcum->freg;
+  param_regs_nums = CSKY_NPARM_FREGS;
+}
+
+  if (*reg + param_size > param_regs_nums)
+*reg = param_regs_nums;
   else
-*pcum += param_size;
+*reg += param_size;
 }
 
 
@@ -1843,6 +1885,12 @@ csky_function_value (const_tree type, const_tree func,
   mode = TYPE_MODE (typ

Re: [PATCH] Return mask <-> integer cost for non-AVX512 micro-architecture.

2020-09-15 Thread Uros Bizjak via Gcc-patches
On Tue, Sep 15, 2020 at 4:59 AM Hongtao Liu  wrote:
>
> Hi:
>   This patch would avoid spill gprs to mask registers for non-AVX512
> micro-architecture and fix regression in PR96744.
>
>   Bootstrap is ok, regression test for i386/x86-64 backend is ok.
>   No big performance impact on SPEC2017.
>
> gcc/ChangeLog:
>
> PR taregt/96744
> * config/i386/x86-tune-costs.h (struct processor_costs):
> Increase mask <-> integer cost for non AVX512 target to avoid
> spill gpr to mask. Also retune mask <-> integer and
> mask_load/store for skylake_cost.

LGTM.

Thanks,
Uros.

>
> --
> BR,
> Hongtao


[patch] Fix pessimization in EH cleanup pass

2020-09-15 Thread Eric Botcazou
Hi,

the cleanup_all_empty_eh function was originally doing a post-order traversal 
of the EH region tree to optimize it:

/* Do a post-order traversal of the EH region tree.  Examine each
   post_landing_pad block and see if we can eliminate it as empty.  */

That's sensible since the worker function cleanup_empty_eh punts if the number 
of outgoing edges from landing pads is not 0 or 1:

  /* There can be zero or one edges out of BB.  This is the quickest test.  */
  switch (EDGE_COUNT (bb->succs))
{
case 0:
  e_out = NULL;
  break;
case 1:
  e_out = single_succ_edge (bb);
  break;
default:
  return false;
}

This was recently changed to use another order and this trivially breaks 
testcases with nested regions like the attached one.  So the attached patch 
restores the post-order traversal and it also contains a small tweak to the 
line debug info to avoid a problematic inheritance for coverage measurement.

Tested on x86_64-suse-linux, OK for the mainline?


2020-09-15  Eric Botcazou  
Pierre-Marie Derodat  

* tree-eh.c (lower_try_finally_dup_block): Backward propagate slocs 
to stack restore builtin calls.
(cleanup_all_empty_eh): Do again a post-order traversal of the EH
region tree.


2020-09-15  Eric Botcazou  

* gnat.dg/concat4.adb: New test.

-- 
Eric Botcazou-- { dg-do compile }

with Ada.Text_IO; use Ada.Text_IO;

procedure Concat4 (X : Integer) is
   Ximg : constant String := Integer'Image (X);
begin
   if X > 0 then
  Put_Line (Ximg & " is Positive");
   elsif X < 0 then
  Put_Line (Ximg & " is Negative");
   else
  Put_Line (Ximg & " is Null");
   end if;
end;

-- { dg-final { scan-assembler-not "_Unwind_Resume" } }
diff --git a/gcc/tree-eh.c b/gcc/tree-eh.c
index 4246dca8806..ed32f80220d 100644
--- a/gcc/tree-eh.c
+++ b/gcc/tree-eh.c
@@ -899,23 +899,26 @@ lower_try_finally_dup_block (gimple_seq seq, struct leh_state *outer_state,
   gtry *region = NULL;
   gimple_seq new_seq;
   gimple_stmt_iterator gsi;
+  location_t last_loc = UNKNOWN_LOCATION;
 
   new_seq = copy_gimple_seq_and_replace_locals (seq);
 
-  for (gsi = gsi_start (new_seq); !gsi_end_p (gsi); gsi_next (&gsi))
+  for (gsi = gsi_last (new_seq); !gsi_end_p (gsi); gsi_prev (&gsi))
 {
   gimple *stmt = gsi_stmt (gsi);
   /* We duplicate __builtin_stack_restore at -O0 in the hope of eliminating
-	 it on the EH paths.  When it is not eliminated, make it transparent in
-	 the debug info.  */
+	 it on the EH paths.  When it is not eliminated, give it the next
+	 location in the sequence or make it transparent in the debug info.  */
   if (gimple_call_builtin_p (stmt, BUILT_IN_STACK_RESTORE))
-	gimple_set_location (stmt, UNKNOWN_LOCATION);
+	gimple_set_location (stmt, last_loc);
   else if (LOCATION_LOCUS (gimple_location (stmt)) == UNKNOWN_LOCATION)
 	{
 	  tree block = gimple_block (stmt);
 	  gimple_set_location (stmt, loc);
 	  gimple_set_block (stmt, block);
 	}
+  else
+	last_loc = gimple_location (stmt);
 }
 
   if (outer_state->tf)
@@ -4751,15 +4754,9 @@ cleanup_all_empty_eh (void)
   eh_landing_pad lp;
   int i;
 
-  /* Ideally we'd walk the region tree and process LPs inner to outer
- to avoid quadraticness in EH redirection.  Walking the LP array
- in reverse seems to be an approximation of that.  */
-  for (i = vec_safe_length (cfun->eh->lp_array) - 1; i >= 1; --i)
-{
-  lp = (*cfun->eh->lp_array)[i];
-  if (lp)
-	changed |= cleanup_empty_eh (lp);
-}
+  for (i = 1; vec_safe_iterate (cfun->eh->lp_array, i, &lp); ++i)
+if (lp)
+  changed |= cleanup_empty_eh (lp);
 
   return changed;
 }


[OG10] Merge GCC 10 into branch; two cherry picks

2020-09-15 Thread Tobias Burnus

OG10 = devel/omp/gcc-10

Committed backport plus two cherry picks:

a93cc852103 [PATCH] OpenMP/Fortran: Fix (re)mapping of allocatable/pointer 
arrays [PR96668]
e524656359b Merge remote-tracking branch 'origin/releases/gcc-10' into 
devel/omp/gcc-10
   f73772df64c Daily bump.
   0ea1b39e8e5 doc: fix spelling of -fprofile-reproducibility
   1c34981a9ba bpf: use the expected instruction for NOPs
   1dbb919d086 tree-optimization/96522 - transfer of flow-sensitive info in 
copy_ref_info
   e93428a8b05 tree-optimization/97043 - fix latent wrong-code with SLP 
vectorization
   7ad48f0a6b4 i386: Fix array index in expander
fafbe5379f9 [libgomp, nvptx] Add __sync_compare_and_swap_16
   1bcbc4da6ae Daily bump.
   23ee7de0be0 Improve costs for DImode shifts of interger constants.
   1660e831614 Daily bump.
   08a0f33a1b0 Add new shrpsi and shrpdi instruction variants to 
gcc/config/pa/pa.md.
   32ca9bb4201 Daily bump.

-
Mentor Graphics (Deutschland) GmbH, Arnulfstraße 201, 80634 München / Germany
Registergericht München HRB 106955, Geschäftsführer: Thomas Heurung, Alexander 
Walter


Re: [patch] Fix pessimization in EH cleanup pass

2020-09-15 Thread Jakub Jelinek via Gcc-patches
On Tue, Sep 15, 2020 at 10:36:20AM +0200, Eric Botcazou wrote:
> This was recently changed to use another order and this trivially breaks 
> testcases with nested regions like the attached one.  So the attached patch 
> restores the post-order traversal and it also contains a small tweak to the 
> line debug info to avoid a problematic inheritance for coverage measurement.

So it breaks PR93199 again?

Jakub



Re: PING [Patch][Middle-end]Add -fzero-call-used-regs=[skip|used-gpr|all-gpr|used|all]

2020-09-15 Thread Richard Sandiford
Qing Zhao  writes:
>> On Sep 14, 2020, at 2:20 PM, Richard Sandiford  
>> wrote:
>> 
>> Qing Zhao mailto:qing.z...@oracle.com>> writes:
 On Sep 14, 2020, at 11:33 AM, Richard Sandiford 
  wrote:
 
 Qing Zhao  writes:
>> Like I mentioned earlier though, passes that run after
>> pass_thread_prologue_and_epilogue can use call-clobbered registers that
>> weren't previously used.  For example, on x86_64, the function might
>> not use %r8 when the prologue, epilogue and returns are generated,
>> but pass_regrename might later introduce a new use of %r8.  AIUI,
>> the “used” version of the new command-line option is supposed to clear
>> %r8 in these circumstances, but it wouldn't do so if the data was
>> collected at the point that the return is generated.
> 
> Thanks for the information.
> 
>> 
>> That's why I think it's more robust to do this later (at the beginning
>> of pass_late_compilation) and insert the zeroing before returns that
>> already exist.
> 
> Yes, looks like it’s not correct to insert the zeroing at the time when 
> prologue, epilogue and return are generated.
> As I also checked, “return” might be also generated as late as pass 
> “pass_delay_slots”,  So, shall we move the
> New pass as late as possible?
 
 If we insert the zeroing before pass_delay_slots and describe the
 result correctly, pass_delay_slots should do the right thing.
 
 Describing the result correctly includes ensuring that the cleared
 registers are treated as live on exit from the function, so that the
 zeroing doesn't get deleted again, or skipped by pass_delay_slots.
>>> 
>>> In the current implementation for x86, when we generating a zeroing insn as 
>>> the following:
>>> 
>>> (insn 18 16 19 2 (set (reg:SI 1 dx)
>>>(const_int 0 [0])) "t10.c":11:1 -1
>>> (nil))
>>> (insn 19 18 20 2 (unspec_volatile [
>>>(reg:SI 1 dx)
>>>] UNSPECV_PRO_EPILOGUE_USE) "t10.c":11:1 -1
>>> (nil))
>>> 
>>> i.e, after each zeroing insn, the register that is zeroed is marked as 
>>> “UNSPECV_PRO_EPILOGUE_USE”, 
>>> By doing this, we can avoid this zeroing insn from being deleted or 
>>> skipped. 
>>> 
>>> Is doing this enough to describe the result correctly?
>>> Is there other thing we need to do in addition to this?
>> 
>> I guess that works, but I think it would be better to abstract
>> EPILOGUE_USES into a new target-independent wrapper function that
>> (a) returns true if EPILOGUE_USES itself returns true and (b) returns
>> true for registers that need to be zero on return, if the zeroing
>> instructions have already been inserted.  The places that currently
>> test EPILOGUE_USES should then test this new wrapper function instead.
>
> Okay, I see. 
> Looks like that EPILOGUE_USES is used in df-scan.c to compute the data flow 
> information. If EPILOUGE_USES return true
> for registers that need to be zeroed on return, those registers will be 
> included in the data flow information, as a result, later
> passes will not be able to delete them. 
>
> This sounds to be a cleaner approach than the current one that marks the 
> registers  “UNSPECV_PRO_EPILOGUE_USE”. 
>
> A more detailed implementation question on this: 
> Where should I put this new target-independent wrapper function in? Which 
> header file will be a proper place to hold this new function?

Not a strong opinion, but: maybe df.h and df-scan.c, since this is
really a DF query.

>> After inserting the zeroing instructions, the pass should recompute the
>> live-out sets based on this.

Sorry, I was wrong here.  It should *cause* the sets to be recomputed
where necessary (rather than recompute them directly), but see below.

> Is only computing the live-out sets of the block that including the return 
> insn enough? Or we should re-compute the whole procedure? 
>
> Which utility routine I should use to recompute the live-out sets?

Inserting the instructions will cause the containing block to be marked
dirty, via df_set_bb_dirty.  I think the pass should also call
df_set_bb_dirty on the exit block itself, to indicate that the
wrapper around EPILOGUE_USES has changed behaviour, but that might
not be necessary.

This gives the df machinery enough information to work out what has changed.
It will then propagate those changes throughout the function.  (I don't
think any propagation would be necessary here, but if I'm wrong about that,
then the df machinery will do whatever propagation is necessary.)

However, the convention is for a pass that uses the df machinery to call
df_analyze first.  This call to df_analyze updates any stale df information.

So unlike what I said yesterday, the pass itself doesn't need to make sure
that the df information is up-to-date.  It just needs to indicate what
has changed, as above.

In the case of pass_delay_slots, pass_free_cfg has:

  /* The resource.c machinery uses DF but the CFG isn't guara

[PATCH v7] genemit.c (main): split insn-emit.c for compiling parallelly

2020-09-15 Thread Jojo R
gcc/ChangeLog:

* genemit.c (main): Print 'split line'.
* Makefile.in (insn-emit.c): Define split count and file

---
 gcc/Makefile.in |  19 +
 gcc/genemit.c   | 104 +---
 2 files changed, 83 insertions(+), 40 deletions(-)

diff --git a/gcc/Makefile.in b/gcc/Makefile.in
index 79e854aa938..a7fcc7d5949 100644
--- a/gcc/Makefile.in
+++ b/gcc/Makefile.in
@@ -1258,6 +1258,21 @@ ANALYZER_OBJS = \
 # We put the *-match.o and insn-*.o files first so that a parallel make
 # will build them sooner, because they are large and otherwise tend to be
 # the last objects to finish building.
+
+# target overrides
+-include $(tmake_file)
+
+INSN-GENERATED-SPLIT-NUM ?= 0
+
+insn-generated-split-num = $(shell i=1; j=`expr $(INSN-GENERATED-SPLIT-NUM) + 
1`; \
+   while test $$i -le $$j; do \
+ echo $$i; i=`expr $$i + 1`; \
+   done)
+
+insn-emit-split-c := $(foreach o, $(shell for i in 
$(insn-generated-split-num); do echo $$i; done), insn-emit$(o).c)
+insn-emit-split-obj = $(patsubst %.c,%.o, $(insn-emit-split-c))
+$(insn-emit-split-c): insn-emit.c
+
 OBJS = \
gimple-match.o \
generic-match.o \
@@ -1265,6 +1280,7 @@ OBJS = \
insn-automata.o \
insn-dfatab.o \
insn-emit.o \
+   $(insn-emit-split-obj) \
insn-extract.o \
insn-latencytab.o \
insn-modes.o \
@@ -2365,6 +2381,9 @@ $(simple_generated_c:insn-%.c=s-%): s-%: 
build/gen%$(build_exeext)
$(RUN_GEN) build/gen$*$(build_exeext) $(md_file) \
  $(filter insn-conditions.md,$^) > tmp-$*.c
$(SHELL) $(srcdir)/../move-if-change tmp-$*.c insn-$*.c
+   $*v=$$(echo $$(csplit insn-$*.c /parallel\ compilation/ -k -s 
{$(INSN-GENERATED-SPLIT-NUM)} -f insn-$* -b "%d.c" 2>&1));\
+   [ ! "$$$*v" ] || grep "match not found" <<< $$$*v
+   [ -s insn-$*0.c ] || (for i in $(insn-generated-split-num); do touch 
insn-$*$$i.c; done && echo "" > insn-$*.c)
$(STAMP) s-$*
 
 # gencheck doesn't read the machine description, and the file produced
diff --git a/gcc/genemit.c b/gcc/genemit.c
index 84d07d388ee..54a0d909d9d 100644
--- a/gcc/genemit.c
+++ b/gcc/genemit.c
@@ -847,24 +847,13 @@ handle_overloaded_gen (overloaded_name *oname)
 }
 }
 
-int
-main (int argc, const char **argv)
-{
-  progname = "genemit";
-
-  if (!init_rtx_reader_args (argc, argv))
-return (FATAL_EXIT_CODE);
-
-#define DEF_INTERNAL_OPTAB_FN(NAME, FLAGS, OPTAB, TYPE) \
-  nofail_optabs[OPTAB##_optab] = true;
-#include "internal-fn.def"
-
-  /* Assign sequential codes to all entries in the machine description
- in parallel with the tables in insn-output.c.  */
-
-  printf ("/* Generated automatically by the program `genemit'\n\
-from the machine description file `md'.  */\n\n");
+/* Print include header.  */
 
+static void
+printf_include (void)
+{
+  printf ("/* Generated automatically by the program `genemit'\n"
+ "from the machine description file `md'.  */\n\n");
   printf ("#define IN_TARGET_CODE 1\n");
   printf ("#include \"config.h\"\n");
   printf ("#include \"system.h\"\n");
@@ -900,35 +889,70 @@ from the machine description file `md'.  */\n\n");
   printf ("#include \"tm-constrs.h\"\n");
   printf ("#include \"ggc.h\"\n");
   printf ("#include \"target.h\"\n\n");
+}
 
-  /* Read the machine description.  */
+/* Generate the `gen_...' function from GET_CODE().  */
 
-  md_rtx_info info;
-  while (read_md_rtx (&info))
-switch (GET_CODE (info.def))
-  {
-  case DEFINE_INSN:
-   gen_insn (&info);
-   break;
+static void
+gen_md_rtx (md_rtx_info *info)
+{
+  switch (GET_CODE (info->def))
+{
+case DEFINE_INSN:
+  gen_insn (info);
+  break;
 
-  case DEFINE_EXPAND:
-   printf ("/* %s:%d */\n", info.loc.filename, info.loc.lineno);
-   gen_expand (&info);
-   break;
+case DEFINE_EXPAND:
+  printf ("/* %s:%d */\n", info->loc.filename, info->loc.lineno);
+  gen_expand (info);
+  break;
 
-  case DEFINE_SPLIT:
-   printf ("/* %s:%d */\n", info.loc.filename, info.loc.lineno);
-   gen_split (&info);
-   break;
+case DEFINE_SPLIT:
+  printf ("/* %s:%d */\n", info->loc.filename, info->loc.lineno);
+  gen_split (info);
+  break;
 
-  case DEFINE_PEEPHOLE2:
-   printf ("/* %s:%d */\n", info.loc.filename, info.loc.lineno);
-   gen_split (&info);
-   break;
+case DEFINE_PEEPHOLE2:
+  printf ("/* %s:%d */\n", info->loc.filename, info->loc.lineno);
+  gen_split (info);
+  break;
 
-  default:
-   break;
-  }
+default:
+  break;
+}
+}
+
+int
+main (int argc, const char **argv)
+{
+  progname = "genemit";
+
+  if (!init_rtx_reader_args (argc, argv))
+return (FATAL_EXIT_CODE);
+
+#define DEF_INTERNAL_OPTAB_FN(NAME, FLAGS, OPTAB, TYPE) \
+  nofail_optabs[OPTAB##_optab] = true;
+#include "internal-fn.def"
+
+  /* Assign sequential codes to all entries in the machine description
+

Re: [PATCH v6] genemit.c (main): split insn-emit.c for compiling parallelly

2020-09-15 Thread Jojo R
Hi,

Ok & Thanks, I will do my best for it :)

It’s fixed in patch v7.

Jojo
在 2020年9月11日 +0800 PM11:29,Segher Boessenkool ,写道:
> Hi!
>
> On Fri, Sep 11, 2020 at 03:26:17PM +0800, Jojo R wrote:
> > +#define printf_include() do { \
>
> Don't use macros please, use a function?
>
> And maybe do this in a separate patch, for ease of review. That should
> be ack'ed pretty much immediately, after which it is out of the way, and
> we do not have to see it again.
>
> > while (read_md_rtx (&info))
> > - switch (GET_CODE (info.def))
>
> Factor this body to a separate function, too? Again, as earlier patch.
> As it is, it is impossible to see if you changed anything here.
>
> I suspect all this patch really does is pretty trivial, but it is hard
> to tell.
>
>
> Segher


Re: Ping: [PATCH 2/2 V3] Simplify plusminus-mult-with-convert expr in forwprop (PR 94234)

2020-09-15 Thread Feng Xue OS via Gcc-patches
>> This patch is to handle simplification of plusminus-mult-with-convert 
>> expression
>> as ((T) X) +- ((T) Y), in which at least one of (X, Y) is result of 
>> multiplication.
>> This is done in forwprop pass. We try to transform it to (T) (X +- Y), and 
>> resort
>> to gimple-matcher to fold (X +- Y) instead of manually code pattern 
>> recognition.
>
>I still don't like the complete new function with all its correctness
>issues - the existing
>fold_plusminus_mult_expr was difficult enough to get correct for
>corner cases and
>we do have a set of match.pd patterns (partly?) implementing its transforms.
>
>Looking at
>
>+unsigned goo (unsigned m_param, unsigned n_param)
>+{
>+  unsigned b1 = m_param * (n_param + 2);
>+  unsigned b2 = m_param * (n_param + 1);
>+  int r = (int)(b1) - (int)(b2);
>
>it seems we want to simplify (signed)A - (signed)B to
>(signed)(A - B) if A - B "simplifies"?  I guess
>
>(simplify
>  (plusminus (nop_convert @0) (nop_convert? @1))
>  (convert (plusminus! @0 @1)))
>
>probably needs a swapped pattern or not iterate over plus/minus
>to handle at least one converted operand and avoid adding
>a (plus @0 @1) -> (convert (plus! @0 @1)) rule.
>
>Even
>
>(simplify
> (minus (nop_convert @0) (nop_convert @1))
> (convert (minus! @0 @1)))
>
>seems to handle all your testcases already (which means
>they are all the same and not very exhaustive...)
Yes. This is much simpler.

Thanks,
Feng

>Richard.
>
>
>> Regards,
>> Feng
>> ---
>> 2020-09-03  Feng Xue  
>>
>> gcc/
>> PR tree-optimization/94234
>> * tree-ssa-forwprop.c (simplify_plusminus_mult_with_convert): New
>> function.
>> (fwprop_ssa_val): Move it before its new caller.
>> (pass_forwprop::execute): Add call to
>> simplify_plusminus_mult_with_convert.
>>
>> gcc/testsuite/
>> PR tree-optimization/94234
>> * gcc.dg/pr94234-3.c: New test.
>

[PATCH 2/2 V4] Add plusminus-with-convert pattern (PR 94234)

2020-09-15 Thread Feng Xue OS via Gcc-patches
Add a rule (T)(A) +- (T)(B) -> (T)(A +- B), which works only when (A +- B)
could be folded to a simple value. By this rule, a plusminus-mult-with-convert
expression could be handed over to the rule (A * C) +- (B * C) -> (A +- B).

Bootstrapped/regtested on x86_64-linux and aarch64-linux.

Feng
---
2020-09-15  Feng Xue  

gcc/
PR tree-optimization/94234
* match.pd (T)(A) +- (T)(B) -> (T)(A +- B): New simplification.

gcc/testsuite/
PR tree-optimization/94234
* gcc.dg/pr94234-3.c: New test.From f7c7483bd61fe1e3d6888f84d718fb4be4ea9e14 Mon Sep 17 00:00:00 2001
From: Feng Xue 
Date: Mon, 17 Aug 2020 23:00:35 +0800
Subject: [PATCH] tree-optimization/94234 - add plusminus-with-convert pattern

Add a rule (T)(A) +- (T)(B) -> (T)(A +- B), which works only when (A +- B)
could be folded to a simple value. By this rule, a plusminus-mult-with-convert
expression could be handed over to the rule (A * C) +- (B * C) -> (A +- B).

2020-09-15  Feng Xue  

gcc/
	PR tree-optimization/94234
	* match.pd (T)(A) +- (T)(B) -> (T)(A +- B): New simplification.

gcc/testsuite/
	PR tree-optimization/94234
 	* gcc.dg/pr94234-3.c: New test.
---
 gcc/match.pd | 16 
 gcc/testsuite/gcc.dg/pr94234-3.c | 42 
 2 files changed, 58 insertions(+)
 create mode 100644 gcc/testsuite/gcc.dg/pr94234-3.c

diff --git a/gcc/match.pd b/gcc/match.pd
index 46fd880bd37..d8c59fad9c1 100644
--- a/gcc/match.pd
+++ b/gcc/match.pd
@@ -2397,6 +2397,22 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
(plus (convert @0) (op @2 (convert @1))
 #endif
 
+/* (T)(A) +- (T)(B) -> (T)(A +- B) only when (A +- B) could be simplified
+   to a simple value.  */
+#if GIMPLE
+  (for op (plus minus)
+   (simplify
+(op (convert @0) (convert @1))
+ (if (TREE_CODE (type) == INTEGER_TYPE
+	  && TREE_CODE (TREE_TYPE (@0)) == INTEGER_TYPE
+	  && TREE_CODE (TREE_TYPE (@1)) == INTEGER_TYPE
+	  && TYPE_PRECISION (type) <= TYPE_PRECISION (TREE_TYPE (@0))
+	  && types_match (TREE_TYPE (@0), TREE_TYPE (@1))
+	  && !TYPE_OVERFLOW_TRAPS (type)
+	  && !TYPE_OVERFLOW_SANITIZED (type))
+  (convert (op! @0 @1)
+#endif
+
   /* ~A + A -> -1 */
   (simplify
(plus:c (bit_not @0) @0)
diff --git a/gcc/testsuite/gcc.dg/pr94234-3.c b/gcc/testsuite/gcc.dg/pr94234-3.c
new file mode 100644
index 000..9bb9b46bd96
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/pr94234-3.c
@@ -0,0 +1,42 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-forwprop1" } */
+
+typedef __SIZE_TYPE__ size_t;
+typedef __PTRDIFF_TYPE__ ptrdiff_t;
+
+ptrdiff_t foo1 (char *a, size_t n)
+{
+  char *b1 = a + 8 * n;
+  char *b2 = a + 8 * (n - 1);
+
+  return b1 - b2;
+}
+
+int use_ptr (char *a, char *b);
+
+ptrdiff_t foo2 (char *a, size_t n)
+{
+  char *b1 = a + 8 * (n - 1);
+  char *b2 = a + 8 * n;
+
+  use_ptr (b1, b2);
+
+  return b1 - b2;
+}
+
+int use_int (int i);
+
+unsigned goo (unsigned m_param, unsigned n_param)
+{
+  unsigned b1 = m_param * (n_param + 2);
+  unsigned b2 = m_param * (n_param + 1);
+  int r = (int)(b1) - (int)(b2);
+
+  use_int (r);
+
+  return r;
+}
+
+/* { dg-final { scan-tree-dump-times "return 8;" 1 "forwprop1" } } */
+/* { dg-final { scan-tree-dump-times "return -8;" 1 "forwprop1" } } */
+/* { dg-final { scan-tree-dump-times "return m_param" 1 "forwprop1" } } */
-- 
2.17.1



Re: [PATCH 2/2 V4] Add plusminus-with-convert pattern (PR 94234)

2020-09-15 Thread Richard Biener via Gcc-patches
On Tue, Sep 15, 2020 at 12:14 PM Feng Xue OS
 wrote:
>
> Add a rule (T)(A) +- (T)(B) -> (T)(A +- B), which works only when (A +- B)
> could be folded to a simple value. By this rule, a plusminus-mult-with-convert
> expression could be handed over to the rule (A * C) +- (B * C) -> (A +- B).

Please use INTEGRAL_TYPE_P () instead of TREE_CODE == INTEGER_TYPE
in all three cases.  It's enough to check for INTEGRAL_TYPE_P on one operand,
the types_match will take care of the other.

OK with those changes.

Thanks,
Richard.

> Bootstrapped/regtested on x86_64-linux and aarch64-linux.
>
> Feng
> ---
> 2020-09-15  Feng Xue  
>
> gcc/
> PR tree-optimization/94234
> * match.pd (T)(A) +- (T)(B) -> (T)(A +- B): New simplification.
>
> gcc/testsuite/
> PR tree-optimization/94234
> * gcc.dg/pr94234-3.c: New test.


Re: [patch] Fix pessimization in EH cleanup pass

2020-09-15 Thread Eric Botcazou
> So it breaks PR93199 again?

Indeed, although there is no regression in the testsuite AFAICS.  I guess that 
we can do the new walk before and not instead of the post-order traversal.

Revised patch attached, same ChangeLog.

-- 
Eric Botcazoudiff --git a/gcc/tree-eh.c b/gcc/tree-eh.c
index 4246dca8806..46df7af7bf2 100644
--- a/gcc/tree-eh.c
+++ b/gcc/tree-eh.c
@@ -899,23 +899,26 @@ lower_try_finally_dup_block (gimple_seq seq, struct leh_state *outer_state,
   gtry *region = NULL;
   gimple_seq new_seq;
   gimple_stmt_iterator gsi;
+  location_t last_loc = UNKNOWN_LOCATION;
 
   new_seq = copy_gimple_seq_and_replace_locals (seq);
 
-  for (gsi = gsi_start (new_seq); !gsi_end_p (gsi); gsi_next (&gsi))
+  for (gsi = gsi_last (new_seq); !gsi_end_p (gsi); gsi_prev (&gsi))
 {
   gimple *stmt = gsi_stmt (gsi);
   /* We duplicate __builtin_stack_restore at -O0 in the hope of eliminating
-	 it on the EH paths.  When it is not eliminated, make it transparent in
-	 the debug info.  */
+	 it on the EH paths.  When it is not eliminated, give it the next
+	 location in the sequence or make it transparent in the debug info.  */
   if (gimple_call_builtin_p (stmt, BUILT_IN_STACK_RESTORE))
-	gimple_set_location (stmt, UNKNOWN_LOCATION);
+	gimple_set_location (stmt, last_loc);
   else if (LOCATION_LOCUS (gimple_location (stmt)) == UNKNOWN_LOCATION)
 	{
 	  tree block = gimple_block (stmt);
 	  gimple_set_location (stmt, loc);
 	  gimple_set_block (stmt, block);
 	}
+  else
+	last_loc = gimple_location (stmt);
 }
 
   if (outer_state->tf)
@@ -4751,9 +4754,8 @@ cleanup_all_empty_eh (void)
   eh_landing_pad lp;
   int i;
 
-  /* Ideally we'd walk the region tree and process LPs inner to outer
- to avoid quadraticness in EH redirection.  Walking the LP array
- in reverse seems to be an approximation of that.  */
+  /* The post-order traversal may lead to quadraticness in EH redirection
+ so first try to walk the EH region tree from inner to outer LPs.  */
   for (i = vec_safe_length (cfun->eh->lp_array) - 1; i >= 1; --i)
 {
   lp = (*cfun->eh->lp_array)[i];
@@ -4761,6 +4763,10 @@ cleanup_all_empty_eh (void)
 	changed |= cleanup_empty_eh (lp);
 }
 
+  for (i = 1; vec_safe_iterate (cfun->eh->lp_array, i, &lp); ++i)
+if (lp)
+  changed |= cleanup_empty_eh (lp);
+
   return changed;
 }
 


Re: PING [Patch][Middle-end]Add -fzero-call-used-regs=[skip|used-gpr|all-gpr|used|all]

2020-09-15 Thread Richard Sandiford
Segher Boessenkool  writes:
> On Mon, Sep 14, 2020 at 05:33:33PM +0100, Richard Sandiford wrote:
>> > However, for the cases on Power as Segher mentioned, there are also some 
>> > scratch registers used for
>> > Other purpose, not sure whether we can correctly generate zeroing in 
>> > middle-end for Power?
>> 
>> Segher would be better placed to answer that, but I think the process
>> above has to give a conservatively-accurate list of live registers.
>> If it misses a register, the other late rtl passes could clobber
>> that same register.
>
> It will zero a whole bunch of registers that are overwritten later, that
> are not parameter passing registers either.

This thread has covered two main issues: correctness and cost.
The question above was about correctness, but your reply seems to be
about cost.  The correctness question was instead: would the process
described in my previous message lead the compiler to think that a
register wasn't live before a Power return instruction when the
register actually was live?  (And if so, how do we get around that
for other post prologue-epilogue passes that use df?)

On the cost issue: when you say some registers are “overwritten later”:
which registers do you mean, and who would be doing the overwriting?
We were talking about inserting zeroing instructions immediately before
returns that already exist.  It looks like the main Power return
pattern is:

(define_insn "return"
  [(any_return)]
  ""
  "blr"
  [(set_attr "type" "jmpreg")])

Does this overwrite anything other than the PC?  If not, it doesn't
look like anything in the function itself would clobber other registers
later (i.e. later than the inserted zeroing instructions).  And of course,
if an attacker is performing a ROP attack, the attacker controls which
address the BLR returns to.

Thanks,
Richard


Re: [patch] Fix pessimization in EH cleanup pass

2020-09-15 Thread Richard Biener via Gcc-patches
On Tue, Sep 15, 2020 at 1:29 PM Eric Botcazou  wrote:
>
> > So it breaks PR93199 again?
>
> Indeed, although there is no regression in the testsuite AFAICS.

Yeah, too big of a testcase ...

>  I guess that
> we can do the new walk before and not instead of the post-order traversal.
>
> Revised patch attached, same ChangeLog.

If that avoids the compile-time regression it's fine.

Thanks,
Richard.

> --
> Eric Botcazou


Re: [PATCH 2/2 V4] Add plusminus-with-convert pattern (PR 94234)

2020-09-15 Thread Feng Xue OS via Gcc-patches
>> Add a rule (T)(A) +- (T)(B) -> (T)(A +- B), which works only when (A +- B)
>> could be folded to a simple value. By this rule, a 
>> plusminus-mult-with-convert
>> expression could be handed over to the rule (A * C) +- (B * C) -> (A +- B).
>
>Please use INTEGRAL_TYPE_P () instead of TREE_CODE == INTEGER_TYPE
>in all three cases.  It's enough to check for INTEGRAL_TYPE_P on one operand,
>the types_match will take care of the other.

I would have considered using INTEGRAL_TYPE_P(), but if inner type is bool or
enum, can we do plus/minus operation on that?

Feng

>
>OK with those changes.
>
>Thanks,
>Richard.
>
>
> Bootstrapped/regtested on x86_64-linux and aarch64-linux.
>
> Feng
> ---
> 2020-09-15  Feng Xue  
>
> gcc/
> PR tree-optimization/94234
> * match.pd (T)(A) +- (T)(B) -> (T)(A +- B): New simplification.
>
> gcc/testsuite/
> PR tree-optimization/94234
> * gcc.dg/pr94234-3.c: New test.


[PATCH PR93334][RFC]Skip output dep if values stored are byte wise the same

2020-09-15 Thread bin.cheng via Gcc-patches
Hi,
As suggested by PR93334 comments, this patch adds an interface identifying
output dependence which can be skipped in terms of reordering and skip it in
loop distribution.  It also adds a new test case.  Any comment?

Thanks,
bin

0001-Skip-output-dependence-if-values-stored-are-bytewise.patch
Description: Binary data


Re: [PATCH 2/2 V4] Add plusminus-with-convert pattern (PR 94234)

2020-09-15 Thread Richard Biener via Gcc-patches
On Tue, Sep 15, 2020 at 2:25 PM Feng Xue OS  wrote:
>
> >> Add a rule (T)(A) +- (T)(B) -> (T)(A +- B), which works only when (A +- B)
> >> could be folded to a simple value. By this rule, a 
> >> plusminus-mult-with-convert
> >> expression could be handed over to the rule (A * C) +- (B * C) -> (A +- B).
> >
> >Please use INTEGRAL_TYPE_P () instead of TREE_CODE == INTEGER_TYPE
> >in all three cases.  It's enough to check for INTEGRAL_TYPE_P on one operand,
> >the types_match will take care of the other.
>
> I would have considered using INTEGRAL_TYPE_P(), but if inner type is bool or
> enum, can we do plus/minus operation on that?

Yes, the distinction doesn't really exist for the middle-end - they
are just integer
types with certain precision and signedness.

Richard.

> Feng
>
> >
> >OK with those changes.
> >
> >Thanks,
> >Richard.
> >
> >
> > Bootstrapped/regtested on x86_64-linux and aarch64-linux.
> >
> > Feng
> > ---
> > 2020-09-15  Feng Xue  
> >
> > gcc/
> > PR tree-optimization/94234
> > * match.pd (T)(A) +- (T)(B) -> (T)(A +- B): New simplification.
> >
> > gcc/testsuite/
> > PR tree-optimization/94234
> > * gcc.dg/pr94234-3.c: New test.


[PATCH] Allow more BB vectorization

2020-09-15 Thread Richard Biener
The following allows more BB vectorization by generally building leafs
from scalars rather than giving up.  Note this is only a first step
towards this and as can be seen with the exception for node splitting
it is generally hard to get this heuristic sound.  I've added variants
of the bb-slp-48.c testcase to make sure we still try permuting for
example.

Bootstrapped and tested on x86_64-unknown-linux-gnu, pushed.

Richard.

2020-09-15  Richard Biener  

* tree-vect-slp.c (vect_build_slp_tree_2): Also consider
building an operand from scalars when building it did not
fail fatally but avoid messing with the upcall splitting
of groups.

* gcc.dg/vect/bb-slp-48.c: New testcase.
* gcc.dg/vect/bb-slp-7.c: Adjust.
---
 gcc/testsuite/gcc.dg/vect/bb-slp-48.c | 55 +
 gcc/testsuite/gcc.dg/vect/bb-slp-7.c  |  3 +-
 gcc/tree-vect-slp.c   | 70 ---
 3 files changed, 98 insertions(+), 30 deletions(-)
 create mode 100644 gcc/testsuite/gcc.dg/vect/bb-slp-48.c

diff --git a/gcc/testsuite/gcc.dg/vect/bb-slp-48.c 
b/gcc/testsuite/gcc.dg/vect/bb-slp-48.c
new file mode 100644
index 000..cd229323ecf
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/bb-slp-48.c
@@ -0,0 +1,55 @@
+/* { dg-do compile } */
+/* { dg-additional-options "-fgimple -fdump-tree-optimized" } */
+/* { dg-require-effective-target vect_double } */
+
+double a[2];
+
+void __GIMPLE (ssa,startwith ("fix_loops"))
+foo (double x)
+{
+  double tem2;
+  double tem1;
+  double _1;
+  double _2;
+  double _3;
+  double _4;
+
+  __BB(2):
+  _1 = a[0];
+  _2 = x_6(D) * 3.0e+0;
+  tem1_7 = _1 + _2;
+  _3 = x_6(D) + 1.0e+0;
+  _4 = a[1];
+  tem2_8 = _4 + _3;
+  a[0] = tem1_7;
+  a[1] = tem2_8;
+  return;
+}
+
+void __GIMPLE (ssa,startwith ("fix_loops"))
+bar (double x)
+{
+  double tem2;
+  double tem1;
+  double _1;
+  double _2;
+  double _3;
+  double _4;
+
+  __BB(2):
+  _1 = a[0];
+  _2 = x_6(D) * 3.0e+0;
+  tem1_7 = _1 + _2;
+  _3 = x_6(D) + 1.0e+0;
+  _4 = a[1];
+  /* Once with operands swapped.  */
+  tem2_8 = _3 + _4;
+  a[0] = tem1_7;
+  a[1] = tem2_8;
+  return;
+}
+
+/* { dg-final { scan-tree-dump-times "basic block part vectorized" 2 "slp2" } 
} */
+/* We want to vectorize as { a[0], a[1] } + { x*3, x+1 } and thus
+   elide one add in each function.  */
+/* { dg-final { scan-tree-dump-times " \\+ " 4 "optimized" } } */
diff --git a/gcc/testsuite/gcc.dg/vect/bb-slp-7.c 
b/gcc/testsuite/gcc.dg/vect/bb-slp-7.c
index b8bef8cffb4..f12dc275667 100644
--- a/gcc/testsuite/gcc.dg/vect/bb-slp-7.c
+++ b/gcc/testsuite/gcc.dg/vect/bb-slp-7.c
@@ -22,6 +22,7 @@ main1 (unsigned int x, unsigned int y)
   a2 = *pin++ + 2;
   a3 = *pin++ * 31;
 
+  /* But we can still vectorize the multiplication or the store.  */
   *pout++ = a0 * x;
   *pout++ = a1 * y;
   *pout++ = a2 * x;
@@ -46,5 +47,5 @@ int main (void)
   return 0;
 }
 
-/* { dg-final { scan-tree-dump-times "basic block vectorized" 0 "slp2" } } */
+/* { dg-final { scan-tree-dump-times "basic block vectorized" 1 "slp2" } } */
 
diff --git a/gcc/tree-vect-slp.c b/gcc/tree-vect-slp.c
index 15912515caa..d844fe4d6bb 100644
--- a/gcc/tree-vect-slp.c
+++ b/gcc/tree-vect-slp.c
@@ -1444,33 +1444,6 @@ vect_build_slp_tree_2 (vec_info *vinfo,
  continue;
}
 
-  /* If the SLP build failed fatally and we analyze a basic-block
- simply treat nodes we fail to build as externally defined
-(and thus build vectors from the scalar defs).
-The cost model will reject outright expensive cases.
-???  This doesn't treat cases where permutation ultimatively
-fails (or we don't try permutation below).  Ideally we'd
-even compute a permutation that will end up with the maximum
-SLP tree size...  */
-  if (is_a  (vinfo)
- && !matches[0]
- /* ???  Rejecting patterns this way doesn't work.  We'd have to
-do extra work to cancel the pattern so the uses see the
-scalar version.  */
- && !is_pattern_stmt_p (stmt_info)
- && !oprnd_info->any_pattern)
-   {
- if (dump_enabled_p ())
-   dump_printf_loc (MSG_NOTE, vect_location,
-"Building vector operands from scalars\n");
- this_tree_size++;
- child = vect_create_new_slp_node (oprnd_info->ops);
- children.safe_push (child);
- oprnd_info->ops = vNULL;
- oprnd_info->def_stmts = vNULL;
- continue;
-   }
-
   /* If the SLP build for operand zero failed and operand zero
 and one can be commutated try that for the scalar stmts
 that failed the match.  */
@@ -1542,11 +1515,50 @@ vect_build_slp_tree_2 (vec_info *vinfo,
  children.safe_push (child);
  continue;
}
-
+ /* We do not undo the swapping here since it might still be
+the better order for the second operand in case we build
+the f

RE: [aarch64] Backport missing NEON intrinsics to GCC8

2020-09-15 Thread Kyrylo Tkachov
Hi Sebastian,

This patch implements missing intrinsics.
I'm okay with this being applied to the GCC 8 branch as these intrinsics have 
been defined in ACLE for a long time.
It is arguably a bug that they've been missing from GCC8.
Their implementation is fairly self-contained we haven't had any bugs reported 
against these in my recollection.

So ok on the grounds that it's a bug-fix.
Thanks,
Kyrill

From: Pop, Sebastian  
Sent: 11 September 2020 20:54
To: gcc-patches@gcc.gnu.org; Kyrylo Tkachov 
Subject: [aarch64] Backport missing NEON intrinsics to GCC8

Hi,

gcc-8 branch is missing NEON intrinsics for loads and stores.
Attached patches pass bootstrap and regression testing on Graviton2 
aarch64-linux.

Ok to commit to gcc-8 branch?

Thanks,
Sebastian


[PATCH] LRA: Make fixed eliminable registers live

2020-09-15 Thread H.J. Lu via Gcc-patches
commit 1bcb4c4faa4bd6b1c917c75b100d618faf9e628c
Author: Richard Sandiford 
Date:   Wed Oct 2 07:37:10 2019 +

[LRA] Don't make eliminable registers live (PR91957)

didn't make eliminable registers live which breaks

register void *cur_pro asm("reg");

where "reg" is an eliminable register.  Make fixed eliminable registers
live to fix it.

Tested on Linux/x86-64 as well as csky-linux-gnuabiv2 soft-float and
hard-float glibc builds.

gcc/

PR middle-end/91957
* lra-lives.c (make_hard_regno_dead): Record conflicts for fixed
eliminable registers.
(make_hard_regno_live): Likewise, and make them live.

gcc/testsuite/

PR middle-end/91957
* g++.target/i386/pr97054.C: New test.
---
 gcc/lra-lives.c |  5 +-
 gcc/testsuite/g++.target/i386/pr97054.C | 96 +
 2 files changed, 99 insertions(+), 2 deletions(-)
 create mode 100644 gcc/testsuite/g++.target/i386/pr97054.C

diff --git a/gcc/lra-lives.c b/gcc/lra-lives.c
index 917c617903f..226fe6a69f0 100644
--- a/gcc/lra-lives.c
+++ b/gcc/lra-lives.c
@@ -282,7 +282,8 @@ make_hard_regno_live (int regno)
 {
   lra_assert (HARD_REGISTER_NUM_P (regno));
   if (TEST_HARD_REG_BIT (hard_regs_live, regno)
-  || TEST_HARD_REG_BIT (eliminable_regset, regno))
+  || (!fixed_regs[regno]
+ && TEST_HARD_REG_BIT (eliminable_regset, regno)))
 return;
   SET_HARD_REG_BIT (hard_regs_live, regno);
   sparseset_set_bit (start_living, regno);
@@ -296,7 +297,7 @@ make_hard_regno_live (int regno)
 static void
 make_hard_regno_dead (int regno)
 {
-  if (TEST_HARD_REG_BIT (eliminable_regset, regno))
+  if (!fixed_regs[regno] && TEST_HARD_REG_BIT (eliminable_regset, regno))
 return;
 
   lra_assert (HARD_REGISTER_NUM_P (regno));
diff --git a/gcc/testsuite/g++.target/i386/pr97054.C 
b/gcc/testsuite/g++.target/i386/pr97054.C
new file mode 100644
index 000..d0693af2a42
--- /dev/null
+++ b/gcc/testsuite/g++.target/i386/pr97054.C
@@ -0,0 +1,96 @@
+// { dg-do run { target { ! ia32 } } }
+// { dg-require-effective-target fstack_protector }
+// { dg-options "-O2 -fno-strict-aliasing -msse4.2 -mfpmath=sse -fPIC 
-fstack-protector-strong -O2" }
+
+struct p2_icode *ipc;
+register int pars asm("r13");
+register struct processor *cur_pro asm("rbp");
+register int a asm("rbx");
+register int c asm("r14");
+typedef long lina_t;
+typedef long la_t;
+typedef processor processor_t;
+typedef p2_icode p2_icode_t;
+typedef enum {
+  P2_Return_Action_Next,
+} p2_return_action_t;
+typedef struct p2_icode {
+  int ic_Parameters;
+}  icode_t;
+extern "C" icode_t *x86_log_to_icode_exec(processor_t *, la_t);
+typedef struct {
+  icode_t *ipc;
+} b;
+typedef struct {
+  char ma_thread_signal;
+  int event_counter;
+  b instrumentation;
+} d;
+
+extern "C" lina_t int2linaddr(processor_t *cpu, const p2_icode_t *ic)
+{
+  return 0;
+}
+
+typedef struct e {
+  long i64;
+  char LMA;
+} f;
+
+struct processor {
+  d common;
+  e pc_RIP;
+  f pc_EFER;
+  p2_icode_t *saved_ipc;
+};
+inline la_t code_lin_to_log(processor_t *, long) { return 0; }
+void turbo_clear(processor_t *) {}
+
+p2_return_action_t p2_ep_REBIND_IPC(void)
+{
+  processor_t *cpu = cur_pro;
+  la_t vaddr = cpu->pc_RIP.i64;
+  cur_pro->saved_ipc = (p2_icode_t *) ipc;
+  cur_pro->common.instrumentation.ipc = ipc;
+  cur_pro->pc_RIP.i64 = code_lin_to_log(cur_pro, int2linaddr(cur_pro, ipc));
+  turbo_clear(cur_pro);
+
+  cpu->saved_ipc = x86_log_to_icode_exec(cur_pro, vaddr);
+  ipc++;
+  (cur_pro->common.event_counter -= (1));
+  if (__builtin_expect((!((cur_pro->common.event_counter <= 0)
+ | cur_pro->common.ma_thread_signal)), 1))
+{
+  ((pars = ((ipc)->ic_Parameters)));
+  return P2_Return_Action_Next;
+} else {
+  return (p2_return_action_t) 0;
+}
+  return P2_Return_Action_Next;
+}
+
+struct p2_icode fake_ipc = { 0 };
+struct processor fake_proc ={{ 0 } };
+
+extern "C" icode_t *
+x86_log_to_icode_exec(processor_t *cpu, la_t la)
+{
+  return 0;
+}
+
+extern "C" void
+turbo_threshold_reached(processor_t *c, p2_icode_t *i, int s)
+{
+}
+
+int main()
+{
+  if (!__builtin_cpu_supports ("sse4.2"))
+return 0;
+  fake_proc.pc_RIP.i64 = 0xbaadc0de;
+  fake_proc.pc_EFER.LMA = 0xf;
+  ipc = &fake_ipc;
+  cur_pro = &fake_proc;
+  p2_ep_REBIND_IPC();
+  return 0;
+}
-- 
2.26.2



Re: [PATCH] bb-reorder: Fix for ICEs caused by 69ca5f3a9882

2020-09-15 Thread Segher Boessenkool
On Tue, Sep 15, 2020 at 08:32:54AM +0200, Richard Biener wrote:
> On Tue, Sep 15, 2020 at 12:06 AM Segher Boessenkool
>  wrote:
> >
> > After the previous patch we are left with an unreachable BB.  This will
> > ICE if either we have -fschedule-fusion, or we do not have peephole2.
> >
> > This fixes it.  Okay for trunk?
> 
> Just delete_unreachable_blocks () would have worked as well?

Bunch of reasons:

1) We might want some other things that cleanup_cfg does.  This
unreachable block things caused ICEs in some cases, but what else is
there;
2) cleanup_cfg (0) is quite cheap anyway;
3) And, it is only run if the compgotos pass did do work;
4) I did not even remember delete_unreachable_blocks existed :-)

> Anyway, OK.

Thanks, committed.


Segher


Re: [PATCH] rs6000: inefficient 64-bit constant generation for consecutive 1-bits

2020-09-15 Thread Segher Boessenkool
Hi!

On Thu, Sep 10, 2020 at 04:58:03PM -0500, Peter Bergner wrote:
> Generating arbitrary 64-bit constants on POWER can take up to 5 instructions.
> However, some special constants can be generated in fewer instructions.
> One special class of constants we don't handle, is constants that have one
> set of consecutive 1-bits.  These can be generated with a "li rT,-1"
> followed by a "rldic rX,rT,SH,MB" instruction.  The following patch
> implements this idea.

Cool.

> +/* Helper for num_insns_constant_gpr and rs6000_emit_set_long_const.
> +   Return TRUE if VALUE contains one set of consecutive 1-bits.  Also set
> +   *SH and *MB to values needed to generate VALUE with the rldic instruction.
> +   We accept consecutive 1-bits that wrap from MSB to LSB, ex: 0xff00...00ff.
> +   Otherwise, return FALSE.  */
> +
> +static bool
> +has_consecutive_ones (unsigned HOST_WIDE_INT value, int *sh, int *mb)
> +{
> +  unsigned HOST_WIDE_INT nlz, ntz, mask;
> +  unsigned HOST_WIDE_INT allones = -1;
> +
> +  ntz = ctz_hwi (value);
> +  nlz = clz_hwi (value);
> +  mask = (allones >> nlz) & (allones << ntz);
> +  if (value == mask)
> +{
> +  /* Compute beginning and ending bit numbers, using IBM bit numbering.  
> */
> +  *mb = nlz;
> +  *sh = ntz;
> +  return true;
> +}
> +
> +  /* Check if the inverted value contains consecutive ones.  We can create
> + that constant by basically swapping the MB and ME bit numbers.  */
> +  value = ~value;
> +  ntz = ctz_hwi (value);
> +  nlz = clz_hwi (value);
> +  mask = (allones >> nlz) & (allones << ntz);
> +  if (value == mask)
> +{
> +  /* Compute beginning and ending bit numbers, using IBM bit numbering.  
> */
> +  *mb = GET_MODE_BITSIZE (DImode) - ntz;
> +  *sh = GET_MODE_BITSIZE (DImode) - nlz;
> +  return true;
> +}
> +
> +  *sh = *mb = 0;
> +  return false;
> +}

rs6000_is_valid_shift_mask handles this already (but it requires you to
pass in the shift needed).  rs6000_is_valid_mask will handle it.
rs6000_is_valid_and_mask does not get a shift count parameter, so cannot
use rldic currently.

Please improve something there instead?

> -  HOST_WIDE_INT ud1, ud2, ud3, ud4;
> +  HOST_WIDE_INT ud1, ud2, ud3, ud4, value = c;

Do not put declarations for uninitialised and initialised variables on
one line, please.

> +(define_insn "rldic"
> +  [(set (match_operand:DI 0 "gpc_reg_operand" "=r")
> + (unspec:DI [(match_operand:DI 1 "gpc_reg_operand" "r")
> + (match_operand:DI 2 "u6bit_cint_operand" "n")
> + (match_operand:DI 3 "u6bit_cint_operand" "n")]
> +UNSPEC_RLDIC))]
> +  "TARGET_POWERPC64"
> +  "rldic %0,%1,%2,%3")

Don't use an unspec please.  Unspecs prohibit most optimisation.  For
example, nothing can now see what actual value is calculated here (you
can make that a bit better by using REG_EQ* notes, but it is not as good
as simply describing what the actual insns do).

> +/* { dg-final { scan-assembler "rldic r?\[0-9\]+,r?\[0-9\]+,8,8" } } */
> +/* { dg-final { scan-assembler "rldic r?\[0-9\]+,r?\[0-9\]+,24,8" } } */
> +/* { dg-final { scan-assembler "rldic r?\[0-9\]+,r?\[0-9\]+,40,8" } } */
> +/* { dg-final { scan-assembler "rldic r?\[0-9\]+,r?\[0-9\]+,40,48" } } */
> +/* { dg-final { scan-assembler "rldic r?\[0-9\]+,r?\[0-9\]+,40,23" } } */

Please use {} quotes, and \m\M.  \d can be helpful, too.


Segher


Re: [PATCH] rs6000: inefficient 64-bit constant generation for consecutive 1-bits

2020-09-15 Thread Segher Boessenkool
Hi!

On Tue, Sep 15, 2020 at 02:23:16PM +0930, Alan Modra wrote:
> On Thu, Sep 10, 2020 at 04:58:03PM -0500, Peter Bergner via Gcc-patches wrote:
> > +unsigned long
> > +test0 (void)
> > +{
> > +   return 0x0000UL;
> > +}
> > +
> > +unsigned long
> > +test1 (void)
> > +{
> > +   return 0x0000UL;
> > +}
> > +
> > +unsigned long
> > +test2 (void)
> > +{
> > +   return 0x0000UL;
> > +}
> > +
> > +unsigned long
> > +test3 (void)
> > +{
> > +   return 0xff00UL;
> > +}
> > +
> > +unsigned long
> > +test4 (void)
> > +{
> > +   return 0xff00UL;
> > +}
> > +
> > +unsigned long
> > +test5 (void)
> > +{
> > +   return 0x0100UL;
> > +}
> > +
> > +/* { dg-final { scan-assembler "rldic r?\[0-9\]+,r?\[0-9\]+,8,8" } } */
> > +/* { dg-final { scan-assembler "rldic r?\[0-9\]+,r?\[0-9\]+,24,8" } } */
> > +/* { dg-final { scan-assembler "rldic r?\[0-9\]+,r?\[0-9\]+,40,8" } } */
> > +/* { dg-final { scan-assembler "rldic r?\[0-9\]+,r?\[0-9\]+,40,48" } } */
> > +/* { dg-final { scan-assembler "rldic r?\[0-9\]+,r?\[0-9\]+,40,23" } } */
> 
> Just a comment, I don't really see too much reason to change anything,
> but scan-assembler tests can be a maintenance pain in cases like these
> where there are multiple ways to generate a constant in two
> instructions.  For example,
> 
> test3:
>   li 3,-1
>   rldicr 3,3,0,23
>   blr
> and
> 
> test5:
> li 3,16384
> rotldi 3,3,26
> blr
> 
> would be two valid possibilities for test3 and test5 that don't use
> rldic.  Ideally the test would verify the actual values generated by
> the test functions and count instructions.

Well, the point of the test is to verify we get the expected code for
this?

Maybe we should just count insns here?  But that would be a different
test.  I'm a bit worried about how often the one-bit thing will do
something unexpected, but the rest should be fine and not cause churn.


Segher


[PATCH] make swap argument of vect_get_and_check_slp_defs readonly

2020-09-15 Thread Richard Biener
Since some time we're only using this argument to communicate from
vect_build_slp_tree_1 to vect_get_and_check_slp_defs.  This makes
the direction of information flow clear.

Bootstrap / regtest running on x86_64-unknown-linux-gnu.

2020-09-15  Richard Biener  

* tree-vect-slp.c (vect_get_and_check_slp_defs): Make swap
argument by-value and do not change it.
(vect_build_slp_tree_2): Adjust, set swap to NULL after last
use.
---
 gcc/tree-vect-slp.c | 10 +-
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/gcc/tree-vect-slp.c b/gcc/tree-vect-slp.c
index d844fe4d6bb..f9f4d706f0a 100644
--- a/gcc/tree-vect-slp.c
+++ b/gcc/tree-vect-slp.c
@@ -391,7 +391,7 @@ can_duplicate_and_interleave_p (vec_info *vinfo, unsigned 
int count,
swapping operands of father node of this one, return 1; if everything is
ok return 0.  */
 static int
-vect_get_and_check_slp_defs (vec_info *vinfo, unsigned char *swap,
+vect_get_and_check_slp_defs (vec_info *vinfo, unsigned char swap,
 vec stmts, unsigned stmt_num,
 vec *oprnds_info)
 {
@@ -441,7 +441,7 @@ vect_get_and_check_slp_defs (vec_info *vinfo, unsigned char 
*swap,
   else
 return -1;
 
-  bool swapped = (*swap != 0);
+  bool swapped = (swap != 0);
   gcc_assert (!swapped || first_op_cond);
   for (i = 0; i < number_of_oprnds; i++)
 {
@@ -450,7 +450,7 @@ again:
{
  /* Map indicating how operands of cond_expr should be swapped.  */
  int maps[3][4] = {{0, 1, 2, 3}, {1, 0, 2, 3}, {0, 1, 3, 2}};
- int *map = maps[*swap];
+ int *map = maps[swap];
 
  if (i < 2)
oprnd = TREE_OPERAND (gimple_op (stmt_info->stmt,
@@ -607,7 +607,6 @@ again:
 stmt_info->stmt);
 }
 
-  *swap = swapped;
   return 0;
 }
 
@@ -1391,7 +1390,7 @@ vect_build_slp_tree_2 (vec_info *vinfo,
   slp_oprnd_info oprnd_info;
   FOR_EACH_VEC_ELT (stmts, i, stmt_info)
 {
-  int res = vect_get_and_check_slp_defs (vinfo, &swap[i],
+  int res = vect_get_and_check_slp_defs (vinfo, swap[i],
 stmts, i, &oprnds_info);
   if (res != 0)
matches[(res == -1) ? 0 : i] = false;
@@ -1404,6 +1403,7 @@ vect_build_slp_tree_2 (vec_info *vinfo,
vect_free_oprnd_info (oprnds_info);
return NULL;
   }
+  swap = NULL;
 
   auto_vec children;
 
-- 
2.26.2


Re: [PATCH 1/2] AArch64: Cleanup CPU option processing code

2020-09-15 Thread Richard Earnshaw (lists)
On 14/09/2020 20:04, Wilco Dijkstra wrote:
> Hi Richard,
> 
>> On 14/09/2020 15:19, Wilco Dijkstra wrote:
>>> The --with-cpu/--with-arch configure option processing not only checks 
>>> valid arguments
>>> but also sets TARGET_CPU_DEFAULT with a CPU and extension bitmask.  This 
>>> isn't used
>>> however since a --with-cpu is translated into a -mcpu option which is 
>>> processed as if
>>> written on the command-line (so TARGET_CPU_DEFAULT is never accessed).
>>>
>>> So remove all the complex processing and bitmask, and just validate the 
>>> option.
>>> Fix a bug that always reports valid architecture extensions as invalid.  As 
>>> a result
>>> the CPU processing in aarch64.c can be simplified.
>>
>> Doesn't this change the default behaviour if cc1 is run directly?  I'm
>> not saying this is the wrong thing to do (I think we rely on this in the
>> arm port), but I just want to understand by what you mean when you say
>> 'never used'.
> 
> Yes it does change default behaviour of cc1, but I don't think it does matter.
> I bootstrapped and passed regress with an assert to verify TARGET_CPU_DEFAULT
> is never accessed if there is a --with-cpu configure option. So using cc1 
> directly
> is not standard practice (and I believe most other configuration options are 
> not
> baked into cc1 either).
> 

Users generally don't (or at least, shouldn't).  Developers, however,
have traditionally been used to firing up cc1 under gdb and then
expecting the key defaults to be unchanged if they weren't on the
original command line (ie they could pretty much paste the options from
the original gcc invocation into the back-end compiler and expect the
same output).

> How do we rely on it in the Arm port? That doesn't sound right...

We don't, which is why I don't think this is vital, but I wanted to
understand what you meant with your claim that this was never used.

> 
> Cheers,
> Wilco
> 

R.


[PATCH] Compile gcc.target/i386/pr78904-4a.c with -mtune=generic

2020-09-15 Thread H.J. Lu via Gcc-patches
commit e95395926a84a2406faefe0995295d199d595440
Author: Uros Bizjak 
Date:   Thu Jun 18 20:12:48 2020 +0200

i386: Fix mode of ZERO_EXTRACT RTXes, remove ext_register_operand predicate.

caused

FAIL: gcc.target/i386/pr78904-4a.c scan-assembler [ \t]movb[\t ]+%.h, t

when compiled with --target_board='unix{-m32\ -march=cascadelake}'.  With
-mtune=generic:

movzwl  4(%esp), %edx
movl8(%esp), %eax
movb%dh, t(%eax)
ret

With -mtune=cascadelake:

movzbl  5(%esp), %edx
movl8(%esp), %eax
movb%dl, t(%eax)
ret

Add -mtune=generic for --target_board='unix{-m32\ -march=cascadelake}'.
---
 gcc/testsuite/gcc.target/i386/pr78904-4a.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/gcc/testsuite/gcc.target/i386/pr78904-4a.c 
b/gcc/testsuite/gcc.target/i386/pr78904-4a.c
index 5e6159a7648..6a100c01667 100644
--- a/gcc/testsuite/gcc.target/i386/pr78904-4a.c
+++ b/gcc/testsuite/gcc.target/i386/pr78904-4a.c
@@ -1,7 +1,7 @@
 /* PR target/78904 */
 /* { dg-do compile } */
 /* { dg-require-effective-target nonpic } */
-/* { dg-options "-O2 -masm=att" } */
+/* { dg-options "-O2 -masm=att -mtune=generic" } */
 
 typedef __SIZE_TYPE__ size_t;
 
-- 
2.26.2



Re: [PATCH] LRA: Make fixed eliminable registers live

2020-09-15 Thread Richard Sandiford
Thanks for looking at this.

"H.J. Lu"  writes:
> commit 1bcb4c4faa4bd6b1c917c75b100d618faf9e628c
> Author: Richard Sandiford 
> Date:   Wed Oct 2 07:37:10 2019 +
>
> [LRA] Don't make eliminable registers live (PR91957)
>
> didn't make eliminable registers live which breaks
>
> register void *cur_pro asm("reg");
>
> where "reg" is an eliminable register.  Make fixed eliminable registers
> live to fix it.

I don't think fixedness itself is the issue here: it's usual for at
least some registers involved in eliminations to be fixed registers.

I think what makes this case different is instead that cur_pro/ebp
is a global register.  But IMO things have already gone wrong if we
think that a global register is eliminable.

So I wonder if instead we should check global_regs at the beginning of:

  for (i = 0; i < fp_reg_count; i++)
if (!TEST_HARD_REG_BIT (crtl->asm_clobbers,
HARD_FRAME_POINTER_REGNUM + i))
  {
SET_HARD_REG_BIT (eliminable_regset,
  HARD_FRAME_POINTER_REGNUM + i);
if (frame_pointer_needed)
  SET_HARD_REG_BIT (ira_no_alloc_regs,
HARD_FRAME_POINTER_REGNUM + i);
  }
else if (frame_pointer_needed)
  error ("%s cannot be used in % here",
 reg_names[HARD_FRAME_POINTER_REGNUM + i]);
else
  df_set_regs_ever_live (HARD_FRAME_POINTER_REGNUM + i, true);

(ira_setup_eliminable_regset), and handle the global_regs[] case in
the same way as the else case, i.e. short-circuiting both of the ifs.

Thanks,
Richard


Re: [PATCH] LRA: Make fixed eliminable registers live

2020-09-15 Thread H.J. Lu via Gcc-patches
On Tue, Sep 15, 2020 at 7:44 AM Richard Sandiford
 wrote:
>
> Thanks for looking at this.
>
> "H.J. Lu"  writes:
> > commit 1bcb4c4faa4bd6b1c917c75b100d618faf9e628c
> > Author: Richard Sandiford 
> > Date:   Wed Oct 2 07:37:10 2019 +
> >
> > [LRA] Don't make eliminable registers live (PR91957)
> >
> > didn't make eliminable registers live which breaks
> >
> > register void *cur_pro asm("reg");
> >
> > where "reg" is an eliminable register.  Make fixed eliminable registers
> > live to fix it.
>
> I don't think fixedness itself is the issue here: it's usual for at
> least some registers involved in eliminations to be fixed registers.
>
> I think what makes this case different is instead that cur_pro/ebp
> is a global register.  But IMO things have already gone wrong if we
> think that a global register is eliminable.
>
> So I wonder if instead we should check global_regs at the beginning of:
>
>   for (i = 0; i < fp_reg_count; i++)
> if (!TEST_HARD_REG_BIT (crtl->asm_clobbers,
> HARD_FRAME_POINTER_REGNUM + i))
>   {
> SET_HARD_REG_BIT (eliminable_regset,
>   HARD_FRAME_POINTER_REGNUM + i);
> if (frame_pointer_needed)
>   SET_HARD_REG_BIT (ira_no_alloc_regs,
> HARD_FRAME_POINTER_REGNUM + i);
>   }
> else if (frame_pointer_needed)
>   error ("%s cannot be used in % here",
>  reg_names[HARD_FRAME_POINTER_REGNUM + i]);
> else
>   df_set_regs_ever_live (HARD_FRAME_POINTER_REGNUM + i, true);
>
> (ira_setup_eliminable_regset), and handle the global_regs[] case in
> the same way as the else case, i.e. short-circuiting both of the ifs.
>

Can you send me a patch for me to try?

Thanks.

-- 
H.J.


Re: c++: local externs in templates do not get template head

2020-09-15 Thread Nathan Sidwell

On 9/14/20 6:47 PM, Tobias Burnus wrote:

This patch cause run-time fails for
   g++ -fopenmp libgomp/testsuite/libgomp.c++/udr-13.C

The follow-up fix does not help.

Namely, in udr-3.C:115:

115 if (t.s != 11 || v.v != 9 || q != 0 || d != 3.0) abort ();
(gdb) p t.s


oops, I forgot the runtime tests are there -- it was so long ago!  This 
unbreaks it, while I go develop a more robust patch.


Turns out I didn't get OMP reductions correct.  To address those I
need to do some reorganization, so this patch just reverts the
OMP-specific pieces of the local decl changes.

gcc/cp/
* pt.c (push_template_decl_real): OMP reductions retain a template
header.
(tsubst_function_decl): Likewise.

pushed to trunk

nathan

--
Nathan Sidwell
diff --git c/gcc/cp/pt.c w/gcc/cp/pt.c
index c630ef5a070..1aea105edd5 100644
--- c/gcc/cp/pt.c
+++ w/gcc/cp/pt.c
@@ -6072,9 +6072,11 @@ push_template_decl_real (tree decl, bool is_friend)
   if (is_primary)
 	retrofit_lang_decl (decl);
   if (DECL_LANG_SPECIFIC (decl)
-	  && (!VAR_OR_FUNCTION_DECL_P (decl)
-	  || !ctx
-	  || !DECL_LOCAL_DECL_P (decl)))
+	  && !(VAR_OR_FUNCTION_DECL_P (decl)
+	   && DECL_LOCAL_DECL_P (decl)
+	   /* OMP reductions still need a template header.  */
+	   && !(TREE_CODE (decl) == FUNCTION_DECL
+		&& DECL_OMP_DECLARE_REDUCTION_P (decl
 	DECL_TEMPLATE_INFO (decl) = info;
 }
 
@@ -13712,7 +13714,8 @@ tsubst_function_decl (tree t, tree args, tsubst_flags_t complain,
   gcc_assert (DECL_TEMPLATE_INFO (t) != NULL_TREE
 	  || DECL_LOCAL_DECL_P (t));
 
-  if (DECL_LOCAL_DECL_P (t))
+  if (DECL_LOCAL_DECL_P (t)
+  && !DECL_OMP_DECLARE_REDUCTION_P (t))
 {
   if (tree spec = retrieve_local_specialization (t))
 	return spec;
@@ -13967,7 +13970,8 @@ tsubst_function_decl (tree t, tree args, tsubst_flags_t complain,
 	  && !uses_template_parms (argvec))
 	tsubst_default_arguments (r, complain);
 }
-  else if (DECL_LOCAL_DECL_P (r))
+  else if (DECL_LOCAL_DECL_P (r)
+	   && !DECL_OMP_DECLARE_REDUCTION_P (r))
 {
   if (!cp_unevaluated_operand)
 	register_local_specialization (r, t);


[PATCH] S/390: Do not turn maybe-uninitialized warnings into errors

2020-09-15 Thread Stefan Schulze Frielinghaus via Gcc-patches
Over the last couple of months quite a few warnings about uninitialized
variables were raised while building GCC.  A reason why these warnings
show up on S/390 only is due to the aggressive inlining settings here.
Some of these warnings (2c832ffedf0, b776bdca932, 2786c0221b6,
1657178f59b) could be fixed or in case of a false positive silenced by
initializing the corresponding variable.  Since the latter reoccurs and
while bootstrapping such warnings are turned into errors bootstrapping
fails on S/390 consistently.  Therefore, for the moment do not turn
those warnings into errors.

config/ChangeLog:

* warnings.m4: Do not turn maybe-uninitialized warnings into errors
on S/390.

fixincludes/ChangeLog:

* configure: Regenerate.

gcc/ChangeLog:

* configure: Regenerate.

libcc1/ChangeLog:

* configure: Regenerate.

libcpp/ChangeLog:

* configure: Regenerate.

libdecnumber/ChangeLog:

* configure: Regenerate.
---
 config/warnings.m4 | 20 ++--
 fixincludes/configure  |  8 +++-
 gcc/configure  | 12 +---
 libcc1/configure   |  8 +++-
 libcpp/configure   |  8 +++-
 libdecnumber/configure |  8 +++-
 6 files changed, 51 insertions(+), 13 deletions(-)

diff --git a/config/warnings.m4 b/config/warnings.m4
index ce007f9b73e..d977bfb20af 100644
--- a/config/warnings.m4
+++ b/config/warnings.m4
@@ -101,8 +101,10 @@ AC_ARG_ENABLE(werror-always,
 AS_HELP_STRING([--enable-werror-always],
   [enable -Werror despite compiler version]),
 [], [enable_werror_always=no])
-AS_IF([test $enable_werror_always = yes],
-  [acx_Var="$acx_Var${acx_Var:+ }-Werror"])
+AS_IF([test $enable_werror_always = yes], [dnl
+  acx_Var="$acx_Var${acx_Var:+ }-Werror"
+  AS_CASE([$host], [s390*-*-*],
+  [acx_Var="$acx_Var -Wno-error=maybe-uninitialized"])])
  m4_if($1, [manual],,
  [AS_VAR_PUSHDEF([acx_GCCvers], [acx_cv_prog_cc_gcc_$1_or_newer])dnl
   AC_CACHE_CHECK([whether $CC is GCC >=$1], acx_GCCvers,
@@ -116,7 +118,9 @@ AS_IF([test $enable_werror_always = yes],
[AS_VAR_SET(acx_GCCvers, yes)],
[AS_VAR_SET(acx_GCCvers, no)])])
  AS_IF([test AS_VAR_GET(acx_GCCvers) = yes],
-   [acx_Var="$acx_Var${acx_Var:+ }-Werror"])
+   [acx_Var="$acx_Var${acx_Var:+ }-Werror"
+AS_CASE([$host], [s390*-*-*],
+[acx_Var="$acx_Var -Wno-error=maybe-uninitialized"])])
   AS_VAR_POPDEF([acx_GCCvers])])
 m4_popdef([acx_Var])dnl
 AC_LANG_POP(C)
@@ -205,8 +209,10 @@ AC_ARG_ENABLE(werror-always,
 AS_HELP_STRING([--enable-werror-always],
   [enable -Werror despite compiler version]),
 [], [enable_werror_always=no])
-AS_IF([test $enable_werror_always = yes],
-  [acx_Var="$acx_Var${acx_Var:+ }-Werror"])
+AS_IF([test $enable_werror_always = yes], [dnl
+  acx_Var="$acx_Var${acx_Var:+ }-Werror"
+  AS_CASE([$host], [s390*-*-*],
+  [strict_warn="$strict_warn -Wno-error=maybe-uninitialized"])])
  m4_if($1, [manual],,
  [AS_VAR_PUSHDEF([acx_GXXvers], [acx_cv_prog_cxx_gxx_$1_or_newer])dnl
   AC_CACHE_CHECK([whether $CXX is G++ >=$1], acx_GXXvers,
@@ -220,7 +226,9 @@ AS_IF([test $enable_werror_always = yes],
[AS_VAR_SET(acx_GXXvers, yes)],
[AS_VAR_SET(acx_GXXvers, no)])])
  AS_IF([test AS_VAR_GET(acx_GXXvers) = yes],
-   [acx_Var="$acx_Var${acx_Var:+ }-Werror"])
+   [acx_Var="$acx_Var${acx_Var:+ }-Werror"
+AS_CASE([$host], [s390*-*-*],
+[acx_Var="$acx_Var -Wno-error=maybe-uninitialized"])])
   AS_VAR_POPDEF([acx_GXXvers])])
 m4_popdef([acx_Var])dnl
 AC_LANG_POP(C++)
diff --git a/fixincludes/configure b/fixincludes/configure
index 6e2d67b655b..e0d679cc18e 100755
--- a/fixincludes/configure
+++ b/fixincludes/configure
@@ -4753,7 +4753,13 @@ else
 fi
 
 if test $enable_werror_always = yes; then :
-  WERROR="$WERROR${WERROR:+ }-Werror"
+WERROR="$WERROR${WERROR:+ }-Werror"
+  case $host in #(
+  s390*-*-*) :
+WERROR="$WERROR -Wno-error=maybe-uninitialized" ;; #(
+  *) :
+ ;;
+esac
 fi
 
 ac_ext=c
diff --git a/gcc/configure b/gcc/configure
index 0a09777dd42..ea03581537a 100755
--- a/gcc/configure
+++ b/gcc/configure
@@ -7064,7 +7064,13 @@ else
 fi
 
 if test $enable_werror_always = yes; then :
-  strict_warn="$strict_warn${strict_warn:+ }-Werror"
+strict_warn="$strict_warn${strict_warn:+ }-Werror"
+  case $host in #(
+  s390*-*-*) :
+strict_warn="$strict_warn -Wno-error=maybe-uninitialized" ;; #(
+  *) :
+ ;;
+esac
 fi
 
 ac_ext=cpp
@@ -19013,7 +19019,7 @@ else
   lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2
   lt_status=$lt_dlunknown
   cat > conftest.$ac_ext <<_LT_EOF
-#line 19016 "configure"
+#line 19022 "configure"
 #include "confdefs.h"
 
 #if HAVE_DLFCN_H
@@ -19119,7 +19125,7 @@ else
   lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2
   lt_status=$lt_dlunknown
   cat > conftest.$ac_ext <<_LT_EOF
-#line 19122 "configure"
+#line 19128 "configure"
 #include "confdefs.h"
 
 #if HAVE_DLFCN_H
diff --git 

Re: PING [Patch][Middle-end]Add -fzero-call-used-regs=[skip|used-gpr|all-gpr|used|all]

2020-09-15 Thread Qing Zhao via Gcc-patches



> On Sep 15, 2020, at 4:11 AM, Richard Sandiford  
> wrote:
> 
> Qing Zhao mailto:qing.z...@oracle.com>> writes:
>>> On Sep 14, 2020, at 2:20 PM, Richard Sandiford  
>>> wrote:
>>> 
>>> Qing Zhao mailto:qing.z...@oracle.com>> writes:
> On Sep 14, 2020, at 11:33 AM, Richard Sandiford 
>  wrote:
> 
> Qing Zhao  writes:
>>> Like I mentioned earlier though, passes that run after
>>> pass_thread_prologue_and_epilogue can use call-clobbered registers that
>>> weren't previously used.  For example, on x86_64, the function might
>>> not use %r8 when the prologue, epilogue and returns are generated,
>>> but pass_regrename might later introduce a new use of %r8.  AIUI,
>>> the “used” version of the new command-line option is supposed to clear
>>> %r8 in these circumstances, but it wouldn't do so if the data was
>>> collected at the point that the return is generated.
>> 
>> Thanks for the information.
>> 
>>> 
>>> That's why I think it's more robust to do this later (at the beginning
>>> of pass_late_compilation) and insert the zeroing before returns that
>>> already exist.
>> 
>> Yes, looks like it’s not correct to insert the zeroing at the time when 
>> prologue, epilogue and return are generated.
>> As I also checked, “return” might be also generated as late as pass 
>> “pass_delay_slots”,  So, shall we move the
>> New pass as late as possible?
> 
> If we insert the zeroing before pass_delay_slots and describe the
> result correctly, pass_delay_slots should do the right thing.
> 
> Describing the result correctly includes ensuring that the cleared
> registers are treated as live on exit from the function, so that the
> zeroing doesn't get deleted again, or skipped by pass_delay_slots.
 
 In the current implementation for x86, when we generating a zeroing insn 
 as the following:
 
 (insn 18 16 19 2 (set (reg:SI 1 dx)
   (const_int 0 [0])) "t10.c":11:1 -1
(nil))
 (insn 19 18 20 2 (unspec_volatile [
   (reg:SI 1 dx)
   ] UNSPECV_PRO_EPILOGUE_USE) "t10.c":11:1 -1
(nil))
 
 i.e, after each zeroing insn, the register that is zeroed is marked as 
 “UNSPECV_PRO_EPILOGUE_USE”, 
 By doing this, we can avoid this zeroing insn from being deleted or 
 skipped. 
 
 Is doing this enough to describe the result correctly?
 Is there other thing we need to do in addition to this?
>>> 
>>> I guess that works, but I think it would be better to abstract
>>> EPILOGUE_USES into a new target-independent wrapper function that
>>> (a) returns true if EPILOGUE_USES itself returns true and (b) returns
>>> true for registers that need to be zero on return, if the zeroing
>>> instructions have already been inserted.  The places that currently
>>> test EPILOGUE_USES should then test this new wrapper function instead.
>> 
>> Okay, I see. 
>> Looks like that EPILOGUE_USES is used in df-scan.c to compute the data flow 
>> information. If EPILOUGE_USES return true
>> for registers that need to be zeroed on return, those registers will be 
>> included in the data flow information, as a result, later
>> passes will not be able to delete them. 
>> 
>> This sounds to be a cleaner approach than the current one that marks the 
>> registers  “UNSPECV_PRO_EPILOGUE_USE”. 
>> 
>> A more detailed implementation question on this: 
>> Where should I put this new target-independent wrapper function in? Which 
>> header file will be a proper place to hold this new function?
> 
> Not a strong opinion, but: maybe df.h and df-scan.c, since this is
> really a DF query.

Okay.

> 
>>> After inserting the zeroing instructions, the pass should recompute the
>>> live-out sets based on this.
> 
> Sorry, I was wrong here.  It should *cause* the sets to be recomputed
> where necessary (rather than recompute them directly), but see below.
> 
>> Is only computing the live-out sets of the block that including the return 
>> insn enough? Or we should re-compute the whole procedure? 
>> 
>> Which utility routine I should use to recompute the live-out sets?
> 
> Inserting the instructions will cause the containing block to be marked
> dirty, via df_set_bb_dirty.  I think the pass should also call
> df_set_bb_dirty on the exit block itself, to indicate that the
> wrapper around EPILOGUE_USES has changed behaviour, but that might
> not be necessary.
> 
> This gives the df machinery enough information to work out what has changed.
> It will then propagate those changes throughout the function.  (I don't
> think any propagation would be necessary here, but if I'm wrong about that,
> then the df machinery will do whatever propagation is necessary.)
> 
> However, the convention is for a pass that uses the df machinery to call
> df_analyze first.  This call to df_analyze updates any stale df information.
> 
> So unlike what I said yesterday, the pass itself d

Re: [PATCH] arm: Fix up gcc.target/arm/lto/pr96939_* FAIL

2020-09-15 Thread Vaseeharan Vinayagamoorthy
I am seeing this unused parameter 'opts' error when building for this 
configuration:
Build: arm-none-linux-gnueabihf
Host: arm-none-linux-gnueabihf
Target: arm-none-linux-gnueabihf

In function 'void arm_option_restore(gcc_options*, gcc_options*, 
cl_target_option*)':
/src/gcc/gcc/config/arm/arm.c:3042:41: error: unused parameter 'opts' 
[-Werror=unused-parameter]
 3042 | arm_option_restore (struct gcc_options *opts, struct gcc_options 
*opts_set,
  | ^~~~


Regards
Vasee


On 13/09/2020, 09:39, "Gcc-patches on behalf of Jakub Jelinek via Gcc-patches" 
 wrote:

Hi!

The following patch on top of the
https://gcc.gnu.org/pipermail/gcc-patches/2020-September/553801.html
patch fixes the gcc.target/arm/lto/pr96939_* test in certain ARM
configurations.
As said in the above mentioned patch, the generic code takes care of
saving/restoring TargetVariables or Target Save options, so this just
arranges for the generic code to save those instead of needing the
arm backend to do it manually.

Bootstrapped/regtested on armv7hl-linux-gnueabi, ok for trunk?

2020-09-13  Jakub Jelinek  

* config/arm/arm.opt (x_arm_arch_string, x_arm_cpu_string,
x_arm_tune_string): Remove TargetSave entries.
(march=, mcpu=, mtune=): Add Save keyword.
* config/arm/arm.c (arm_option_save): Remove.
(TARGET_OPTION_SAVE): Don't redefine.
(arm_option_restore): Don't restore x_arm_*_string here.

--- gcc/config/arm/arm.opt.jj   2020-01-12 11:54:36.273415521 +0100
+++ gcc/config/arm/arm.opt  2020-09-12 10:45:51.239935884 +0200
@@ -21,15 +21,6 @@
 HeaderInclude
 config/arm/arm-opts.h

-TargetSave
-const char *x_arm_arch_string
-
-TargetSave
-const char *x_arm_cpu_string
-
-TargetSave
-const char *x_arm_tune_string
-
 Enum
 Name(tls_type) Type(enum arm_tls_type)
 TLS dialect to use:
@@ -82,7 +73,7 @@ mapcs-stack-check
 Target Report Mask(APCS_STACK) Undocumented

 march=
-Target RejectNegative Negative(march=) ToLower Joined Var(arm_arch_string)
+Target Save RejectNegative Negative(march=) ToLower Joined 
Var(arm_arch_string)
 Specify the name of the target architecture.

 ; Other arm_arch values are loaded from arm-tables.opt
@@ -107,7 +98,7 @@ Target Report Mask(CALLER_INTERWORKING)
 Thumb: Assume function pointers may go to non-Thumb aware code.

 mcpu=
-Target RejectNegative Negative(mcpu=) ToLower Joined Var(arm_cpu_string)
+Target Save RejectNegative Negative(mcpu=) ToLower Joined 
Var(arm_cpu_string)
 Specify the name of the target CPU.

 mfloat-abi=
@@ -232,7 +223,7 @@ Target Report Mask(TPCS_LEAF_FRAME)
 Thumb: Generate (leaf) stack frames even if not needed.

 mtune=
-Target RejectNegative Negative(mtune=) ToLower Joined Var(arm_tune_string)
+Target Save RejectNegative Negative(mtune=) ToLower Joined 
Var(arm_tune_string)
 Tune code for the given processor.

 mprint-tune-info
--- gcc/config/arm/arm.c.jj 2020-09-11 17:44:28.643014087 +0200
+++ gcc/config/arm/arm.c2020-09-12 10:48:09.951888347 +0200
@@ -247,8 +247,6 @@ static tree arm_build_builtin_va_list (v
 static void arm_expand_builtin_va_start (tree, rtx);
 static tree arm_gimplify_va_arg_expr (tree, tree, gimple_seq *, gimple_seq 
*);
 static void arm_option_override (void);
-static void arm_option_save (struct cl_target_option *, struct gcc_options 
*,
-struct gcc_options *);
 static void arm_option_restore (struct gcc_options *, struct gcc_options *,
struct cl_target_option *);
 static void arm_override_options_after_change (void);
@@ -443,9 +441,6 @@ static const struct attribute_spec arm_a
 #undef TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE
 #define TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE 
arm_override_options_after_change

-#undef TARGET_OPTION_SAVE
-#define TARGET_OPTION_SAVE arm_option_save
-
 #undef TARGET_OPTION_RESTORE
 #define TARGET_OPTION_RESTORE arm_option_restore

@@ -3042,24 +3037,11 @@ arm_override_options_after_change (void)
   arm_override_options_after_change_1 (&global_options, 
&global_options_set);
 }

-/* Implement TARGET_OPTION_SAVE.  */
-static void
-arm_option_save (struct cl_target_option *ptr, struct gcc_options *opts,
-struct gcc_options */* opts_set */)
-{
-  ptr->x_arm_arch_string = opts->x_arm_arch_string;
-  ptr->x_arm_cpu_string = opts->x_arm_cpu_string;
-  ptr->x_arm_tune_string = opts->x_arm_tune_string;
-}
-
 /* Implement TARGET_OPTION_RESTORE.  */
 static void
 arm_option_restore (struct gcc_options *opts, struct gcc_options *opts_set,
struct cl_target_option *ptr)
 {
-  opts->x_arm_arch_string = ptr->x_arm_arch_string;
-  opts->x_a

Re: PSA: Default C++ dialect is now C++17

2020-09-15 Thread Jeff Law via Gcc-patches

On 9/14/20 7:17 PM, Marek Polacek via Gcc-patches wrote:
> On Mon, Sep 14, 2020 at 11:13:18AM -0400, Jason Merrill via Gcc-patches wrote:
>> On Mon, Jun 29, 2020 at 1:25 PM Martin Liška  wrote:
>>> On 6/29/20 4:57 PM, Marek Polacek wrote:
 On Mon, Jun 29, 2020 at 09:51:57AM +0200, Martin Liška wrote:
> On 6/26/20 9:34 PM, Marek Polacek via Gcc-patches wrote:
>> As discussed last month:
>> 
>> it's time to change the C++ default to gnu++17.  I've committed the 
>> patch after
>> testing x86_64-pc-linux-gnu and powerpc64le-unknown-linux-gnu.  Brace 
>> yourselves!
>>
>> Marek
>>
> Just a small note that 510.parest_r SPEC 2017 benchmark can't be built now
> with default changed to -std=c++17. The spec config needs to be adjusted.
 Interesting, do you know why?  Does it use the register keyword?
>>> Apparently it needs -fno-new-ttp-matching for successful compilation.
>>> There's a reduced test-case I made:
>>>
>>> cat fe.ii
>>> template  class FiniteElement;
>>> template  class DoFHandler;
>>> class FETools {
>>>template 
>>>void back_interpolate(const DoFHandler &, const InVector 
>>> &,
>>>  const FiniteElement &, OutVector &);
>>>template  class DH, class InVector, class 
>>> OutVector,
>>>  int spacedim>
>>>void back_interpolate(const DH &, InVector,
>>>  const FiniteElement &, OutVector);
>>> };
>>> template  class DoFHandler;
>>> template  class FiniteElement;
>>> template 
>>> void FETools::back_interpolate(const DoFHandler &,
>>> const InVector &,
>>> const FiniteElement &,
>>> OutVector &) {}
>>> template void FETools::back_interpolate(const DoFHandler<3> &, const float 
>>> &,
>>>  const FiniteElement<3> &, float &);
>> Hmm, looks like I never sent this.
>>
>> Further reduced:
>>
>> template  class A;
>> template  void fn(A &) {}
>> template  class TT>  void fn(TT &);
>> template void fn(A<3> &);
>>
>> This breaks due to the C++17 changes to template template parameters
>> causing A to now be considered a valid argument for TT; with that
>> change both function templates are valid candidates, and neither is
>> more specialized than the other, so it's ambiguous.
>>
>> There are still some open core issues around these changes.
> Thanks.  I just pushed a patch to introduce GCC 11 porting_to:
>  and documented this change.
>
> Let me know if you have any comments.

And I just pushed a placeholder into that file for a few other things
I've seen during Fedora testing.  I thought I'd pushed my initial gcc-11
porting_to a month or so ago.  Apparently not :(


jeff



pEpkey.asc
Description: application/pgp-keys


Re: [PATCH] rs6000: inefficient 64-bit constant generation for consecutive 1-bits

2020-09-15 Thread Peter Bergner via Gcc-patches
> rs6000_is_valid_shift_mask handles this already (but it requires you to
> pass in the shift needed).  rs6000_is_valid_mask will handle it.
> rs6000_is_valid_and_mask does not get a shift count parameter, so cannot
> use rldic currently.

After talking with you off line, I changed to using rs6000_is_valid_mask.
The did mean I had to change num_insns_constant_gpr to take a mode param
so it could be passed down to rs6000_is_valid_mask. 




> Please improve something there instead?
> 
>> -  HOST_WIDE_INT ud1, ud2, ud3, ud4;
>> +  HOST_WIDE_INT ud1, ud2, ud3, ud4, value = c;
> 
> Do not put declarations for uninitialised and initialised variables on
> one line, please.

The new patch doesn't even touch this function anymore.



>> +(define_insn "rldic"
>> +  [(set (match_operand:DI 0 "gpc_reg_operand" "=r")
>> +(unspec:DI [(match_operand:DI 1 "gpc_reg_operand" "r")
>> +(match_operand:DI 2 "u6bit_cint_operand" "n")
>> +(match_operand:DI 3 "u6bit_cint_operand" "n")]
>> +   UNSPEC_RLDIC))]
>> +  "TARGET_POWERPC64"
>> +  "rldic %0,%1,%2,%3")

...and this is gone too.  I've replaced it with a generic splitter
that matches an already existing define_insn (rotl3_mask).

 
>> +/* { dg-final { scan-assembler "rldic r?\[0-9\]+,r?\[0-9\]+,8,8" } } */
>> +/* { dg-final { scan-assembler "rldic r?\[0-9\]+,r?\[0-9\]+,24,8" } } */
>> +/* { dg-final { scan-assembler "rldic r?\[0-9\]+,r?\[0-9\]+,40,8" } } */
>> +/* { dg-final { scan-assembler "rldic r?\[0-9\]+,r?\[0-9\]+,40,48" } } */
>> +/* { dg-final { scan-assembler "rldic r?\[0-9\]+,r?\[0-9\]+,40,23" } } */
> 
> Please use {} quotes, and \m\M.  \d can be helpful, too.

That was how I wrote it initially, but for some reason, it wouldn't match
at all.  Do I need extra \'s for my regexs when using {}?

\d is any digit?  Yeah, that would be better.  Gotta find a manpage or ???
that describes what regex patterns are allowed.


This all said, Alan's rtx_costs patch touches this same area and he talked
about removing a similar splitter, so I think I will wait for his code to
be committed and then rework this on top of his changes.

Peter




Re: [PATCH v2] rs6000: Remove useless insns fed into lvx/stvx [PR97019]

2020-09-15 Thread Segher Boessenkool
Hi Ke Wen,

On Tue, Sep 15, 2020 at 02:40:38PM +0800, Kewen.Lin wrote:
> >>* config/rs6000/rs6000-p8swap.c (insn_rtx_pair_t): New type.
> > 
> > Please don't do that.  The "first" and "second" are completely
> > meaningless.  Also,  keeping it separate arrays can very well result in
> > better machine code, and certainly makes easier to read source code.
> 
> OK, use separate arrays instead.  Here the first is the AND rtx_insn
> while the second is its fully-expanded rtx, I thought it's better to
> bundle them together before, make_pair is an easy way for that.

Easy to write, hard to read.

> >> -  rtx and_operation = 0;
> >> +  rtx and_operation = NULL_RTX;
> > 
> > Don't change code randomly (to something arguably worse, even).
> 
> Done.  I may think too much and thought NULL_RTX may be preferred
> since it has the potential to be changed by defining it as nullptr
> in the current C++11 context.

In all normal contexts you can use any null pointer constant (like 0) in
C++ as well.

>   * config/rs6000/rs6000-p8swap.c (find_alignment_op): Adjust to
>   support multiple defintions which are all AND operations with
>   the mask -16B.
>   (recombine_lvx_pattern): Adjust to handle multiple AND operations
>   from find_alignment_op.
>   (recombine_stvx_pattern): Likewise.
> 
> gcc/testsuite/ChangeLog:
> 
>   * gcc.target/powerpc/pr97019.c: New test.

> +  gcc_assert (and_insns.length () == and_ops.length ());

+1.  Thanks.

> +   for (unsigned i = 0; i < and_insns.length (); ++i)

"i++" is used more often, is more traditional.

> + {
> +   /* However, first we must be sure that we make the
> +  base register from the AND operation available
> +  in case the register has been overwritten.  Copy
> +  the base register to a new pseudo and use that
> +  as the base register of the AND operation in
> +  the new LVX instruction.  */

The swaps pass runs very early (first thing after expand really), so
this is okay.

(Not that this is new code anyway, heh.)

> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/powerpc/pr97019.c
> @@ -0,0 +1,82 @@
> +/* This issue can only exist on little-endian P8 targets, since
> +   the built-in functions vec_ld/vec_st will use lxvd2x/stxvd2x
> +   (P8 big-endian) or lxv/stxv (P9 and later).  */
> +/* { dg-do compile { target { powerpc_p8vector_ok && le } } } */
> +/* { dg-options "-O2 -mdejagnu-cpu=power8" } */

Do you need to test for LE?  If not, just always run it?  If it works,
it works, it doesn't matter that you do not expect it to ever fail (we
do not really expect *any* test we have to ever fail *anywhere*, heh).

> +/* { dg-final { scan-assembler-not "rldicr\[ \t\]+\[0-9\]+,\[0-9\]+,0,59" } 
> } */

Please use {} quotes, and \s and \d.

You can also use  {(?n)rldicr.*,0,59}  since (?n) makes . not match
newlines anymore.

Okay for trunk with or without those suggestions.  Thanks!


Segher


[PATCH] Allow copying of symbolic ranges to an irange.

2020-09-15 Thread Aldy Hernandez via Gcc-patches
This fixes an ICE when trying to copy a legacy value_range containing a 
symbolic to a multi-range:


min = make_ssa_name (type);
max = build_int_cst (type, 55);
value_range vv (min, max);
int_range<2> vr = vv;

This doesn't affect anything currently, as we don't have a lot of 
interactions between value_range's and multi_range's in trunk right, but 
it will become a problem as soon as someone tries to get a range from 
evrp and copy it over to a multi-range.


OK pending tests?

gcc/ChangeLog:

	* range-op.cc (multi_precision_range_tests): Normalize symbolics when 
copying to a multi-range.

* value-range.cc (irange::copy_legacy_range): Add test.
---
 gcc/range-op.cc|  9 +
 gcc/value-range.cc | 12 +++-
 2 files changed, 20 insertions(+), 1 deletion(-)

diff --git a/gcc/range-op.cc b/gcc/range-op.cc
index c5f511422f4..8e52d5318e9 100644
--- a/gcc/range-op.cc
+++ b/gcc/range-op.cc
@@ -3463,6 +3463,15 @@ multi_precision_range_tests ()
   small = big;
   ASSERT_TRUE (small == int_range<1> (INT (21), INT (21), VR_ANTI_RANGE));

+  // Copying a legacy symbolic to an int_range should normalize the
+  // symbolic at copy time.
+  {
+value_range legacy_range (make_ssa_name (integer_type_node), INT (25));
+int_range<2> copy = legacy_range;
+ASSERT_TRUE (copy == int_range<2>  (vrp_val_min (integer_type_node),
+   INT (25)));
+  }
+
   range3_tests ();
 }

diff --git a/gcc/value-range.cc b/gcc/value-range.cc
index 20aa4f114c9..26ccd143e5c 100644
--- a/gcc/value-range.cc
+++ b/gcc/value-range.cc
@@ -101,7 +101,17 @@ irange::copy_legacy_range (const irange &src)
   VR_ANTI_RANGE);
 }
   else
-set (src.min (), src.max (), VR_RANGE);
+{
+  // If copying legacy to int_range, normalize any symbolics.
+  if (src.legacy_mode_p () && !range_has_numeric_bounds_p (&src))
+   {
+ value_range cst (src);
+ cst.normalize_symbolics ();
+ set (cst.min (), cst.max ());
+ return;
+   }
+  set (src.min (), src.max ());
+}
 }

 // Swap min/max if they are out of order.  Return TRUE if further
--
2.26.2



Re: [PATCH v2] rs6000: Expand vec_insert in expander instead of gimple [PR79251]

2020-09-15 Thread Segher Boessenkool
On Tue, Sep 15, 2020 at 08:51:09AM +0200, Richard Biener wrote:
> On Tue, Sep 15, 2020 at 5:56 AM luoxhu  wrote:
> > > u[n % 4] = i;
> > >
> > > I guess.  Is the % 4 mandated by the vec_insert semantics btw?

(As an aside -- please use "& 3" instead: that works fine if n is signed
as well, but modulo doesn't.  Maybe that is in the patch already, I
didn't check, sorry.)

> note this is why I asked about the actual CPU instruction - as I read
> Seghers mail
> the instruction modifies a vector register, not memory.

But note that the builtin is not the same as the machine instruction --
here there shouldn't be a difference if compiling for a new enough ISA,
but the builtin is available on anything with at least AltiVec.


Segher


Re: [Patch] OpenMP/Fortran: Fix (re)mapping of allocatable/pointer arrays [PR96668]

2020-09-15 Thread Jakub Jelinek via Gcc-patches
On Tue, Sep 15, 2020 at 12:48:59AM +0200, Tobias Burnus wrote:
> +   bool has_nullptr;
> +   size_t j;
> +   for (j = 0; j < n->tgt->list_count; j++)
> + if (n->tgt->list[j].key == n)
> +   {
> + has_nullptr = n->tgt->list[j].has_null_ptr_assoc;
> + break;
> +   }
> +   if (n->tgt->list_count == 0)
> + {
> +   /* 'declare target'; assume has_nullptr; it could also be
> +  statically assigned pointer, but that it should be to
> +  the equivalent variable on the host.  */
> +   assert (n->refcount == REFCOUNT_INFINITY);
> +   has_nullptr = true;
> + }
> +   else
> + assert (j < n->tgt->list_count);
> +   /* Re-map the data if there is an 'always' modifier or if it a
> +  null pointer was there and non a nonnull has been found; that
> +  permits transparent re-mapping for Fortran array descriptors
> +  which were previously mapped unallocated.  */
> +   for (j = i + 1; j < mapnum; j++)
> + {
> +   int ptr_kind = get_kind (short_mapkind, kinds, j) & typemask;
> +   if (!GOMP_MAP_ALWAYS_POINTER_P (ptr_kind)
> +   && (!has_nullptr

David Edelsohn just reported this (rightly so) results in -Wuninitialized
warnings, I think you meant bool has_nullptr = false;
in the definition (in both places that it is defined at).

Jakub



[PATCH] document -Wuninitialized for allocated objects

2020-09-15 Thread Martin Sebor via Gcc-patches

The attached patch updates the manual to mention that Wuninitialized
and -Wmaybe-uninitialized are issued for both auto and allocated
objects, as well as for passing pointers to uninitialized objects
to const-qualified parameters.  Both of these features are GCC 11
enhancements.

Martin
Document -Wuninitialized for allocated objects.

gcc/ChangeLog:

	* doc/invoke.texi (-Wuninitialized): Document -Wuninitialized for
	allocated objects.
	 (-Wmaybe-uninitialized): Same.

diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
index 6d9ff2c3362..df8a71bd6b3 100644
--- a/gcc/doc/invoke.texi
+++ b/gcc/doc/invoke.texi
@@ -6513,9 +6513,15 @@ either specify @option{-Wextra -Wunused} (note that @option{-Wall} implies
 @item -Wuninitialized
 @opindex Wuninitialized
 @opindex Wno-uninitialized
-Warn if an automatic variable is used without first being initialized.
-In C++, warn if a non-static reference or non-static @code{const}
-member appears in a class without constructors.
+Warn if an object with automatic or allocated storage duration is used
+without having been initialized.  In C++, also warn if a non-static
+reference or non-static @code{const} member appears in a class without
+constructors.
+
+In addition, passing a pointer (or in C++, a reference) to an uninitialized
+object to a @code{const}-qualified argument of a built-in function known to
+read the object is also diagnosed by this warning.
+(@option{-Wmaybe-uninitialized} is issued for ordinary functions.)
 
 If you want to warn about code that uses the uninitialized value of the
 variable in its own initializer, use the @option{-Winit-self} option.
@@ -6557,11 +6563,18 @@ void store (int *i)
 @item -Wmaybe-uninitialized
 @opindex Wmaybe-uninitialized
 @opindex Wno-maybe-uninitialized
-For an automatic (i.e.@: local) variable, if there exists a path from the
-function entry to a use of the variable that is initialized, but there exist
-some other paths for which the variable is not initialized, the compiler
-emits a warning if it cannot prove the uninitialized paths are not
-executed at run time.
+For an object with automatic or allocated storage duration, if there exists
+a path from the function entry to a use of the object that is initialized,
+but there exist some other paths for which the object is not initialized,
+the compiler emits a warning if it cannot prove the uninitialized paths
+are not executed at run time.
+
+In addition, passing a pointer (or in C++, a reference) to an uninitialized
+object to a @code{const}-qualified function argument is also diagnosed by
+this warning.  (@option{-Wuninitialized} is issued for built-in functions
+known to read the object.)  Annotating the function with attribute
+@code{access (none)} indicates that the argument isn't used to access
+the object and avoids the warning (@pxref{Common Function Attributes}).
 
 These warnings are only possible in optimizing compilation, because otherwise
 GCC does not keep track of the state of variables.


[r11-3204 Regression] FAIL: g++.dg/vect/slp-pr87105.cc -std=c++2a scan-tree-dump-times slp2 "basic block part vectorized" 1 on Linux/x86_64 (-m64 -march=cascadelake)

2020-09-15 Thread sunil.k.pandey via Gcc-patches
On Linux/x86_64,

c9de716a59c873859df3b3e1fbb993200fce5a73 is the first bad commit
commit c9de716a59c873859df3b3e1fbb993200fce5a73
Author: Richard Biener 
Date:   Tue Sep 15 14:35:40 2020 +0200

Allow more BB vectorization

caused

FAIL: g++.dg/vect/slp-pr87105.cc  -std=c++14  scan-tree-dump slp2 "vect_[^\rm]* 
= MIN"
FAIL: g++.dg/vect/slp-pr87105.cc  -std=c++14  scan-tree-dump-times slp2 "basic 
block part vectorized" 1
FAIL: g++.dg/vect/slp-pr87105.cc  -std=c++17  scan-tree-dump slp2 "vect_[^\rm]* 
= MIN"
FAIL: g++.dg/vect/slp-pr87105.cc  -std=c++17  scan-tree-dump-times slp2 "basic 
block part vectorized" 1
FAIL: g++.dg/vect/slp-pr87105.cc  -std=c++2a  scan-tree-dump slp2 "vect_[^\rm]* 
= MIN"
FAIL: g++.dg/vect/slp-pr87105.cc  -std=c++2a  scan-tree-dump-times slp2 "basic 
block part vectorized" 1

with GCC configured with

../../gcc/configure 
--prefix=/local/skpandey/gccwork/toolwork/gcc-bisect-master/master/r11-3204/usr 
--enable-clocale=gnu --with-system-zlib --with-demangler-in-ld 
--with-fpmath=sse --enable-languages=c,c++,fortran --enable-cet --without-isl 
--enable-libmpx x86_64-linux --disable-bootstrap

To reproduce:

$ cd {build_dir}/gcc && make check 
RUNTESTFLAGS="vect.exp=g++.dg/vect/slp-pr87105.cc --target_board='unix{-m64\ 
-march=cascadelake}'"

(Please do not reply to this email, for question about this report, contact me 
at skpgkp2 at gmail dot com)


Re: [RS6000] rs6000_rtx_costs for AND

2020-09-15 Thread will schmidt via Gcc-patches
On Tue, 2020-09-15 at 10:49 +0930, Alan Modra via Gcc-patches wrote:
> The existing "case AND" in this function is not sufficient for
> optabs.c:avoid_expensive_constant usage, where the AND is passed in
> outer_code.
> 
>   * config/rs6000/rs6000.c (rs6000_rtx_costs): Move costing for
>   AND to CONST_INT case.
> 
> diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c
> index 32044d33977..523d029800a 100644
> --- a/gcc/config/rs6000/rs6000.c
> +++ b/gcc/config/rs6000/rs6000.c
> @@ -21150,16 +21150,13 @@ rs6000_rtx_costs (rtx x, machine_mode mode,
> int outer_code,
>   || outer_code == MINUS)
>  && (satisfies_constraint_I (x)
>  || satisfies_constraint_L (x)))
> -   || (outer_code == AND
> -   && (satisfies_constraint_K (x)
> -   || (mode == SImode
> -   ? satisfies_constraint_L (x)
> -   : satisfies_constraint_J (x
> -   || ((outer_code == IOR || outer_code == XOR)
> +   || ((outer_code == AND || outer_code == IOR || outer_code ==
> XOR)
> && (satisfies_constraint_K (x)
> || (mode == SImode
> ? satisfies_constraint_L (x)
> : satisfies_constraint_J (x
> +   || (outer_code == AND
> +   && rs6000_is_valid_and_mask (x, mode))
> || outer_code == ASHIFT
> || outer_code == ASHIFTRT
> || outer_code == LSHIFTRT
> @@ -21196,7 +21193,9 @@ rs6000_rtx_costs (rtx x, machine_mode mode,
> int outer_code,
>   || outer_code == IOR
>   || outer_code == XOR)
>  && (INTVAL (x)
> -& ~ (unsigned HOST_WIDE_INT) 0x) == 0))
> +& ~ (unsigned HOST_WIDE_INT) 0x) == 0)
> +|| (outer_code == AND
> +&& rs6000_is_valid_2insn_and (x, mode)))
>   {
> *total = COSTS_N_INSNS (1);
> return true;
> @@ -21334,26 +21333,6 @@ rs6000_rtx_costs (rtx x, machine_mode mode,
> int outer_code,
> *total += COSTS_N_INSNS (1);
> return true;
>   }
> -
> -   /* rotate-and-mask (no rotate), andi., andis.: 1 insn.  */
> -   HOST_WIDE_INT val = INTVAL (XEXP (x, 1));
> -   if (rs6000_is_valid_and_mask (XEXP (x, 1), mode)
> -   || (val & 0x) == val
> -   || (val & 0x) == val
> -   || ((val & 0x) == 0 && mode == SImode))
> - {
> -   *total = rtx_cost (left, mode, AND, 0, speed);
> -   *total += COSTS_N_INSNS (1);
> -   return true;
> - }
> -
> -   /* 2 insns.  */
> -   if (rs6000_is_valid_2insn_and (XEXP (x, 1), mode))
> - {
> -   *total = rtx_cost (left, mode, AND, 0, speed);
> -   *total += COSTS_N_INSNS (2);
> -   return true;
> - }
>   }

It's not exactly 1x1..   I tentatively conclude that the /* rotate-and-
mask */  lump of code here does go dead with the "case AND" changes
above.

thanks
-Will

> 
>*total = COSTS_N_INSNS (1);



Re: [RS6000] rtx_costs

2020-09-15 Thread will schmidt via Gcc-patches
On Tue, 2020-09-15 at 10:49 +0930, Alan Modra via Gcc-patches wrote:
> This patch series fixes a number of issues in rs6000_rtx_costs, the
> aim being to provide costing somewhat closer to reality.  Probably
> the
> most important patch of the series is patch 4, which just adds a
> comment.  Without the analysis that went into that comment, I found
> myself making what seemed to be good changes but which introduced
> regressions.
> 
> So far these changes have not introduced any testsuite regressions
> on --with-cpu=power8 and --with-cpu=power9 all lang bootstraps on
> powerpc64le-linux.  Pat spec tested on power9 against a baseline
> master from a few months ago, seeing a few small improvements and no
> degradations above the noise.

I've read through all the patches in this series, (including the tests
that were sent a bit later).  Your use of comments does a good job
helping describe whats going on.

One comment/question/point of clarity for the AND patch that I'll send
separately.  

That said, the series lgtm.  :-) 

thanks, 
-Will


> 
> Some notes:
> 
> Examination of varasm.o shows quite a number of cases where
> if-conversion succeeds due to different seq_cost.  One example:
> 
> extern int foo ();
> int
> default_assemble_integer (unsigned size)
> {
>   extern unsigned long rs6000_isa_flags;
> 
>   if (size > (!((rs6000_isa_flags & (1UL << 35)) != 0) ? 4 : 8))
> return 0;
>   return foo ();
> }
> 
> This rather horrible code turns the rs6000_isa_flags value into
> either
> 4 or 8:
>   rldicr 9,9,28,0
>   srdi 9,9,28
>   addic 9,9,-1
>   subfe 9,9,9
>   rldicr 9,9,0,61
>   addi 9,9,8
> Better would be
>   rldicl 9,9,29,63
>   sldi 9,9,2
>   addi 9,9,4
> 
> There is also a "rlwinm ra,rb,3,0,26" instead of "rldicr ra,rb,3,60",
> and "li r31,0x4000; rotldi r31,r31,17" vs.
> "lis r31,0x8000; clrldi r31,r31,32".
> Neither of these is a real change.  I saw one occurrence of a 5 insn
> sequence being replaced with a load from memory in
> default_function_rodata_section, for ".rodata", and others elsewhere.
> 
> Sometimes correct insn cost leads to unexpected results.  For
> example:
> 
> extern unsigned bar (void);
> unsigned
> f1 (unsigned a)
> {
>   if ((a & 0x01000200) == 0x01000200)
> return bar ();
>   return 0;
> }
> 
> emits for a & 0x01000200
>  (set (reg) (and (reg) (const_int 0x01000200)))
> at expand time (two rlwinm insns) rather than the older
>  (set (reg) (const_int 0x01000200))
>  (set (reg) (and (reg) (reg)))
> which is three insns.  However, since 0x01000200 is needed later the
> older code after optimisation is smaller.



Re: [PATCH 2/4, revised patch applied] PowerPC: Rename functions for min, max, cmove

2020-09-15 Thread Alexandre Oliva
Hello, Mike,

On Sep 11, 2020, Michael Meissner via Gcc-patches  
wrote:

> +case SFmode:
> +case DFmode:

gcc110 (ppc64) in the build farm didn't like this.  The bootstrap
compiler barfs on these expressions, because of some constexpr issue I
haven't really looked into.

I'm testing this patch.  I'll check it in when I'm done.


use E_*mode instead of just *mode

From: Alexandre Oliva 

g++ 4.8.5 rejected cases with SFmode and DFmode, presumably due to
some bug in the constexpr implementation.

for  gcc/ChangeLog

* config/rs6000/rs6000.c (have_compare_and_set_mask): Use
E_*mode in cases.
---
 gcc/config/rs6000/rs6000.c |4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c
index 6d0c550..b32fe91 100644
--- a/gcc/config/rs6000/rs6000.c
+++ b/gcc/config/rs6000/rs6000.c
@@ -15190,8 +15190,8 @@ have_compare_and_set_mask (machine_mode mode)
 {
   switch (mode)
 {
-case SFmode:
-case DFmode:
+case E_SFmode:
+case E_DFmode:
   return TARGET_P9_MINMAX;
 
 default:



-- 
Alexandre Oliva, happy hacker
https://FSFLA.org/blogs/lxo/
Free Software Activist
GNU Toolchain Engineer


[PATCH] debug: Pass --gdwarf-N to assembler if fixed gas is detected during configure

2020-09-15 Thread Jakub Jelinek via Gcc-patches
On Thu, Sep 10, 2020 at 01:45:50PM +0200, Jakub Jelinek wrote:
> On Thu, Sep 10, 2020 at 01:16:57PM +0200, Jakub Jelinek via Gcc-patches wrote:
> > As for the test assembly, I'd say we should take
> > #define F void foo (void) {}
> > F
> > compile it with
> > gcc -S -O2 -g1 -dA -gno-as-loc-support -fno-merge-debug-strings
> > remove .cfi_* directives, remove the ret instruction, change @function
> > and @progbits to %function and %progbits, change .uleb128 to just .byte,
> > I think all the values should be small enough, maybe change .value to
> > .2byte and .long to .4byte (whatever is most portable across different
> > arches and gas versions), simplify (shorten) strings and adjust
> > sizes, and do something with the .quad directives, that is dependent on
> > the address size, perhaps just take those attributes out and adjust
> > .debug_abbrev?  Finally, remove all comments (emit them in the first case
> > just to better understand the debug info).
> 
> I'm afraid it is hard to avoid the .quad or .8byte.
> Here is a 64-bit address version that assembles fine by both x86_64 and
> aarch64 as.
> Unfortunately doesn't fail with broken gas versions with -gdwarf-2 without
> the nop, so we'll need at least a nop in there.
> Fortunately gcc/configure.ac already determines the right nop insn for the
> target, in $insn.
> So I guess what we want next is have the 32-bit version of this with .4byte
> instead of .8byte and just let's try to assemble both versions, first
> without -gdwarf-2 and the one that succeeds assemble again with -gdwarf-2
> and check for the duplicate .debug_line sections error.
> What do you think?

Ok, here it is in patch form.
I've briefly tested it, with the older binutils I have around (no --gdwarf-N
support), with latest gas (--gdwarf-N that can be passed to as even when
compiling C/C++ etc. code and emitting .debug_line) and latest gas with Mark's 
fix
reverted (--gdwarf-N support, but can only pass it to as when assembling
user .s/.S files, not when compiling C/C++ etc.).
Will bootstrap/regtest (with the older binutils) later tonight.

2020-09-15  Jakub Jelinek  

* configure.ac (HAVE_AS_GDWARF_5_DEBUG_FLAG,
HAVE_AS_WORKING_DWARF_4_FLAG): New tests.
* gcc.c (ASM_DEBUG_DWARF_OPTION): Define.
(ASM_DEBUG_SPEC): Use ASM_DEBUG_DWARF_OPTION instead of
"--gdwarf2".  Use %{cond:opt1;:opt2} style.
(ASM_DEBUG_OPTION_DWARF_OPT): Define.
(ASM_DEBUG_OPTION_SPEC): Define.
(asm_debug_option): New variable.
(asm_options): Add "%(asm_debug_option)".
(static_specs): Add asm_debug_option entry.
(static_spec_functions): Add dwarf-version-gt.
(debug_level_greater_than_spec_func): New function.
* config/darwin.h (ASM_DEBUG_OPTION_SPEC): Define.
* config/darwin9.h (ASM_DEBUG_OPTION_SPEC): Redefine.
* config.in: Regenerated.
* configure: Regenerated.

--- gcc/configure.ac.jj 2020-09-08 16:48:32.377900856 +0200
+++ gcc/configure.ac2020-09-15 18:07:23.292249972 +0200
@@ -5192,6 +5192,194 @@ if test x"$insn" != x; then
   [AC_DEFINE(HAVE_AS_GDWARF2_DEBUG_FLAG, 1,
 [Define if your assembler supports the --gdwarf2 option.])])
 
+ gcc_GAS_CHECK_FEATURE([--gdwarf-5 option],
+  gcc_cv_as_gdwarf_5_flag,
+  [elf,2,36,0], [--gdwarf-5], [$insn],,
+  [AC_DEFINE(HAVE_AS_GDWARF_5_DEBUG_FLAG, 1,
+[Define if your assembler supports the --gdwarf-5 option.])])
+
+ dwarf4_debug_info_size=0x46
+ dwarf4_high_pc_form=7
+ dwarf4_debug_aranges_size=0x2c
+ dwarf4_line_sz=9
+ for dwarf4_addr_size in 8 4; do
+   conftest_s="\
+   .file   \"a.c\"
+   .text
+.Ltext0:
+   .p2align 4
+   .globl  foo
+   .type   foo, %function
+foo:
+.LFB0:
+.LM1:
+   $insn
+.LM2:
+.LFE0:
+   .size   foo, .-foo
+.Letext0:
+   .section.debug_info,\"\",%progbits
+.Ldebug_info0:
+   .4byte  $dwarf4_debug_info_size
+   .2byte  0x4
+   .4byte  .Ldebug_abbrev0
+   .byte   0x$dwarf4_addr_size
+   .byte   0x1
+   .ascii \"GNU C17\\0\"
+   .byte   0xc
+   .ascii \"a.c\\0\"
+   .ascii \"/\\0\"
+   .${dwarf4_addr_size}byte.Ltext0
+   .${dwarf4_addr_size}byte.Letext0-.Ltext0
+   .4byte  .Ldebug_line0
+   .byte   0x2
+   .ascii \"foo\\0\"
+   .byte   0x1
+   .byte   0x2
+   .byte   0x1
+   .${dwarf4_addr_size}byte.LFB0
+   .${dwarf4_addr_size}byte.LFE0-.LFB0
+   .byte   0x1
+   .byte   0x9c
+   .byte   0
+   .section.debug_abbrev,\"\",%progbits
+.Ldebug_abbrev0:
+   .byte   0x1
+   .byte   0x11
+   .byte   0x1
+   .byte   0x25
+   .byte   0x8
+   .byte   0x13
+   .byte   0xb
+   .byte   0x3
+   .byte   0x8
+   .byte   0x1b
+   .byte   0x8
+   .byte   0x11
+   .byte   0x1
+   .byte   0x12
+   .byte   0x$dwarf4_high_pc_form
+   .byte   0x10
+   .byte   0x17
+   .byte   0
+   .byte   0
+   .byte  

Re: PING [Patch][Middle-end]Add -fzero-call-used-regs=[skip|used-gpr|all-gpr|used|all]

2020-09-15 Thread Segher Boessenkool
On Mon, Sep 14, 2020 at 10:07:31PM -0500, Qing Zhao wrote:
> > On Sep 14, 2020, at 6:09 PM, Segher Boessenkool 
> >  wrote:
> >> Gadget 1:
> >> 
> >> mov  rax,  value
> >> syscall
> >> ret
> > 
> > No, just
> > 
> > mov rax,59
> > syscall
> > 
> > (no ret necessary!)
> 
> But for ROP, a typical gadget should be ended with a “ret” (or indirect 
> branch), right?

Not the last one :-)  (Especially if it is exec!)


Segher


[PATCH] work harder to avoid -Wuninitialized for empty structs (PR 96295)

2020-09-15 Thread Martin Sebor via Gcc-patches

The -Wuninitialized/-Wmaybe-uninitialized enhancement to warn when
a pointer or reference to an uninitialized object is passed to
a const-qualified function argument tries to avoid triggering for
objects of empty types.  However, the suppression is incomplete
and lets the warning trigger in some corner cases.  The attached
patch extends the suppression to those as well.

Tested on x86_64-linux.  I will plan to commit it later this week
if there are no objections.

Martin
Work harder to avoid -Wuninitialized for objects of empty structs (PR middle-end/96295).

Resolves:
PR middle-end/96295 - -Wmaybe-uninitialized warning for range operator with
reference to an empty struct

gcc/ChangeLog:

	PR middle-end/96295
	* tree-ssa-uninit.c (maybe_warn_operand): Work harder to avoid
	warning for objects of empty structs

gcc/testsuite/ChangeLog:

	PR middle-end/96295
	* g++.dg/warn/Wuninitialized-11.C: New test.

diff --git a/gcc/testsuite/g++.dg/warn/Wuninitialized-11.C b/gcc/testsuite/g++.dg/warn/Wuninitialized-11.C
new file mode 100644
index 000..a6e8beb5740
--- /dev/null
+++ b/gcc/testsuite/g++.dg/warn/Wuninitialized-11.C
@@ -0,0 +1,26 @@
+/* PR middle-end/96295 - -Wmaybe-uninitialized warning for range operator
+   with reference to an empty struct
+   { dg-do compile }
+   { dg-options "-Wall" }
+   { dg-require-effective-target c++11 } */
+
+struct I
+{
+  bool operator!= (const I&) const;
+  void* operator* () const;
+  I& operator++ ();
+};
+
+struct A
+{
+  I begin () const { return I (); }
+  I end () const { return I (); }
+};
+
+void f (void)
+{
+  for (void *p : A ())   // { dg-bogus "\\\[-Wmaybe-uninitialized" }
+{
+  (void)p;
+}
+}
diff --git a/gcc/tree-ssa-uninit.c b/gcc/tree-ssa-uninit.c
index 7eddca397b3..c7f994b0587 100644
--- a/gcc/tree-ssa-uninit.c
+++ b/gcc/tree-ssa-uninit.c
@@ -401,6 +401,8 @@ maybe_warn_operand (ao_ref &ref, gimple *stmt, tree lhs, tree rhs,
  The first_field() test is important for C++ where the predicate
  alone isn't always sufficient.  */
   tree rhstype = TREE_TYPE (rhs);
+  if (POINTER_TYPE_P (rhstype))
+rhstype = TREE_TYPE (rhstype);
   if (TYPE_EMPTY_P (rhstype)
   || (RECORD_OR_UNION_TYPE_P (rhstype)
 	  && (!first_field (rhstype)


[r11-3207 Regression] FAIL: gcc.dg/tree-ssa/20030807-10.c scan-tree-dump-times vrp1 " & 3" 1 on Linux/x86_64 (-m64 -march=cascadelake)

2020-09-15 Thread sunil.k.pandey via Gcc-patches
On Linux/x86_64,

8f0d743c2dee6afae5c6f861b0642b7b112a4a70 is the first bad commit
commit 8f0d743c2dee6afae5c6f861b0642b7b112a4a70
Author: Feng Xue 
Date:   Mon Aug 17 23:00:35 2020 +0800

tree-optimization/94234 - add plusminus-with-convert pattern

caused

FAIL: gcc.dg/tree-ssa/20030807-10.c scan-tree-dump-times vrp1 " >> 2" 1
FAIL: gcc.dg/tree-ssa/20030807-10.c scan-tree-dump-times vrp1 " & 3" 1

with GCC configured with

../../gcc/configure 
--prefix=/local/skpandey/gccwork/toolwork/gcc-bisect-master/master/r11-3207/usr 
--enable-clocale=gnu --with-system-zlib --with-demangler-in-ld 
--with-fpmath=sse --enable-languages=c,c++,fortran --enable-cet --without-isl 
--enable-libmpx x86_64-linux --disable-bootstrap

To reproduce:

$ cd {build_dir}/gcc && make check 
RUNTESTFLAGS="tree-ssa.exp=gcc.dg/tree-ssa/20030807-10.c 
--target_board='unix{-m64\ -march=cascadelake}'"

(Please do not reply to this email, for question about this report, contact me 
at skpgkp2 at gmail dot com)


[r11-3207 Regression] FAIL: gcc.dg/ifcvt-3.c scan-rtl-dump ce1 "3 true changes made" on Linux/x86_64 (-m64 -march=cascadelake)

2020-09-15 Thread sunil.k.pandey via Gcc-patches
On Linux/x86_64,

8f0d743c2dee6afae5c6f861b0642b7b112a4a70 is the first bad commit
commit 8f0d743c2dee6afae5c6f861b0642b7b112a4a70
Author: Feng Xue 
Date:   Mon Aug 17 23:00:35 2020 +0800

tree-optimization/94234 - add plusminus-with-convert pattern

caused

FAIL: gcc.dg/ifcvt-3.c scan-rtl-dump ce1 "3 true changes made"

with GCC configured with

../../gcc/configure 
--prefix=/local/skpandey/gccwork/toolwork/gcc-bisect-master/master/r11-3207/usr 
--enable-clocale=gnu --with-system-zlib --with-demangler-in-ld 
--with-fpmath=sse --enable-languages=c,c++,fortran --enable-cet --without-isl 
--enable-libmpx x86_64-linux --disable-bootstrap

To reproduce:

$ cd {build_dir}/gcc && make check RUNTESTFLAGS="dg.exp=gcc.dg/ifcvt-3.c 
--target_board='unix{-m64\ -march=cascadelake}'"

(Please do not reply to this email, for question about this report, contact me 
at skpgkp2 at gmail dot com)


Aw: Re: [PATCH] PR/fortran 96983 - ICE compiling gfortran.dg/pr96711.f90

2020-09-15 Thread Harald Anlauf
Dear Tobias,

I can see that you want a proper fix.

However, after having looked at all these comments about the powerpc
situation, I do not really think I'd want to ever touch that stuff.
It's clearly beyond my capabilities and ressources.

I do feel responsible for the regression introduced by my naive patch
for PR96711, which is a breakage for sparc and powerpc.

The patch was meant as a minimal solution, or rather a bandaid to fix
sparc, and skip the test for powerpc, and leave the powerpc situation
to someone with more knowledge of that target.

If this patch is rejected, but the regression is to be fixed before
gcc-11 release, I can therefore only offer to revert the patch for
PR96711, to reopen it and unassign.

Thanks,
Harald



Re: PING [Patch][Middle-end]Add -fzero-call-used-regs=[skip|used-gpr|all-gpr|used|all]

2020-09-15 Thread Segher Boessenkool
On Tue, Sep 15, 2020 at 12:46:00PM +0100, Richard Sandiford wrote:
> Segher Boessenkool  writes:
> > On Mon, Sep 14, 2020 at 05:33:33PM +0100, Richard Sandiford wrote:
> >> > However, for the cases on Power as Segher mentioned, there are also some 
> >> > scratch registers used for
> >> > Other purpose, not sure whether we can correctly generate zeroing in 
> >> > middle-end for Power?
> >> 
> >> Segher would be better placed to answer that, but I think the process
> >> above has to give a conservatively-accurate list of live registers.
> >> If it misses a register, the other late rtl passes could clobber
> >> that same register.
> >
> > It will zero a whole bunch of registers that are overwritten later, that
> > are not parameter passing registers either.
> 
> This thread has covered two main issues: correctness and cost.
> The question above was about correctness, but your reply seems to be
> about cost.

The issues are very heavily intertwined.  A much too high execution
cost is unacceptable, just like machine code that does not implement the
source code faithfully.

> On the cost issue: when you say some registers are “overwritten later”:
> which registers do you mean, and who would be doing the overwriting?

(Glue) code that is generated by the linker.

> We were talking about inserting zeroing instructions immediately before
> returns that already exist.  It looks like the main Power return
> pattern is:

It is.

> (define_insn "return"
>   [(any_return)]
>   ""
>   "blr"
>   [(set_attr "type" "jmpreg")])
> 
> Does this overwrite anything other than the PC?  If not, it doesn't

(We do not have a "PC" register, but :-) )

Nope.  The blr instruction does not write any register.  (The base
"bclr[l]" insn can write to CTR and LR).

> look like anything in the function itself would clobber other registers
> later (i.e. later than the inserted zeroing instructions).  And of course,
> if an attacker is performing a ROP attack, the attacker controls which
> address the BLR returns to.

That does not matter for the *normal* case.  Making the normal case even
more expensive than this scheme already is is no good.


Anyway, I was concerned about other architectures, too (that may not
even *have* a GCC port (yet)).  The point is that this should follow all
the rules we have for RTL.  Now that it will use DF (thanks!), most of
that will follow automatically (or easily, anyway).


Segher


Re: PING [Patch][Middle-end]Add -fzero-call-used-regs=[skip|used-gpr|all-gpr|used|all]

2020-09-15 Thread Segher Boessenkool
On Tue, Sep 15, 2020 at 10:11:41AM +0100, Richard Sandiford wrote:
> Qing Zhao  writes:
> >> On Sep 14, 2020, at 2:20 PM, Richard Sandiford  
> >> wrote:
(Putting correct info in DF, inserting the new insns in pro_and_epi).

But, scheduling runs *after* that, and then you need to prevent the
inserted (zeroing) insns from moving -- if you don't, the code after
some zeroing can be used as gadget!  You want to always have all
zeroing insns after *any* computational insn, or it becomes a gadget.


Segher


[PATCH] warn for integer overflow in allocation calls (PR 96838)

2020-09-15 Thread Martin Sebor via Gcc-patches

Overflowing the size of a dynamic allocation (e.g., malloc or VLA)
can lead to a subsequent buffer overflow corrupting the heap or
stack.  The attached patch diagnoses a subset of these cases where
the overflow/wraparound is still detectable.

Besides regtesting GCC on x86_64-linux I also verified the warning
doesn't introduce any false positives into Glibc or Binutils/GDB
builds on the same target.

Martin
PR middle-end/96838 - missing warning on integer overflow in calls to allocation functions

gcc/ChangeLog:

	PR middle-end/96838
	* calls.c (eval_size_vflow): New function.
	(get_size_range): Call it.  Add argument.
	(maybe_warn_alloc_args_overflow): Diagnose overflow/wraparound.
	* calls.h (get_size_range): Add argument.

gcc/testsuite/ChangeLog:

	PR middle-end/96838
	* gcc.dg/Walloc-size-larger-than-19.c: New test.
	* gcc.dg/Walloc-size-larger-than-20.c: New test.

diff --git a/gcc/calls.c b/gcc/calls.c
index 8ac94db6817..a5acff301e0 100644
--- a/gcc/calls.c
+++ b/gcc/calls.c
@@ -1237,6 +1237,139 @@ alloc_max_size (void)
   return alloc_object_size_limit;
 }
 
+/* Try to evaluate the artithmetic EXPresssion representing the size of
+   an object in the widest possible type and set RANGE[] to the result.
+   Return the overflow status for the type of the expression (which may
+   be OVF_UNKNOWN if it cannot be determined from the ranges of its
+   operands).  Used to detect calls to allocation functions with
+   an argument that either overflows or wraps around zero.  */
+
+static wi::overflow_type
+eval_size_vflow (tree exp, wide_int range[2])
+{
+  const int prec = WIDE_INT_MAX_PRECISION;
+
+  if (TREE_CODE (exp) == INTEGER_CST)
+{
+  range[0] = range[1] = wi::to_wide (exp, prec);
+  return wi::OVF_NONE;
+}
+
+  if (TREE_CODE (exp) != SSA_NAME)
+return wi::OVF_UNKNOWN;
+
+  gimple *def = SSA_NAME_DEF_STMT (exp);
+  if (!is_gimple_assign (def))
+return wi::OVF_UNKNOWN;
+
+  tree_code code = gimple_assign_rhs_code (def);
+  tree optype = NULL_TREE;
+  wide_int op1r[2], op2r[2];
+  if (code == MULT_EXPR
+  || code == MINUS_EXPR
+  || code == PLUS_EXPR)
+{
+  /* Ignore the overflow on the operands.  */
+  tree rhs1 = gimple_assign_rhs1 (def);
+  wi::overflow_type ovf = eval_size_vflow (rhs1, op1r);
+  if (ovf == wi::OVF_UNKNOWN)
+	return ovf;
+
+  optype = TREE_TYPE (rhs1);
+  tree rhs2 = gimple_assign_rhs2 (def);
+  ovf = eval_size_vflow (rhs2, op2r);
+  if (ovf == wi::OVF_UNKNOWN)
+	return ovf;
+
+  if (code == PLUS_EXPR
+	  && TYPE_UNSIGNED (optype)
+	  && TREE_CODE (rhs2) == INTEGER_CST)
+	{
+	  /* A - CST is transformed into A + (-CST).  Undo that to avoid
+	 false reports of overflow (this means overflow due to very
+	 large constants in the source code isn't detected.)  */
+	  tree sgn_type = signed_type_for (optype);
+	  tree max = TYPE_MAX_VALUE (sgn_type);
+	  wide_int smax = wi::to_wide (max, prec);
+	  if (wi::ltu_p (smax, op2r[0]))
+	{
+	  op2r[0] = wi::neg (wi::sub (op2r[0], smax, UNSIGNED, &ovf));
+	  op2r[1] = op2r[0];
+	}
+	}
+}
+  else if (code == NOP_EXPR)
+{
+  /* Strip (implicit) conversions.  Explicit conversions are stripped
+	 as well which may result in reporting overflow despite a cast.
+	 Those cases should be rare.  */
+  tree rhs1 = gimple_assign_rhs1 (def);
+  wi::overflow_type ovf = eval_size_vflow (rhs1, op1r);
+  if (ovf == wi::OVF_UNKNOWN)
+	return ovf;
+  optype = TREE_TYPE (rhs1);
+}
+  else
+{
+  wide_int min, max;
+  if (determine_value_range (exp, &min, &max) != VR_RANGE)
+	return wi::OVF_UNKNOWN;
+  optype = TREE_TYPE (exp);
+  op1r[0] = wide_int::from (min, prec, TYPE_SIGN (optype));
+  op1r[1] = wide_int::from (max, prec, TYPE_SIGN (optype));
+}
+
+  wide_int umax = wi::to_wide (TYPE_MAX_VALUE (optype), prec);
+  tree sgn_type = signed_type_for (optype);
+  wide_int smax = wi::to_wide (TYPE_MAX_VALUE (sgn_type), prec);
+
+  wi::overflow_type ovf = wi::OVF_NONE;
+  if (code == MULT_EXPR)
+{
+  /* Compute the upper bound of the result first, discarding any
+	 overflow.  Only the overflow in the lower bound matters.  */
+  range[1] = wi::mul (op1r[1], op2r[1], UNSIGNED, &ovf);
+  range[0] = wi::mul (op1r[0], op2r[0], UNSIGNED, &ovf);
+}
+  else if (code == MINUS_EXPR)
+{
+  range[1] = wi::sub (op1r[1], op2r[1], UNSIGNED, &ovf);
+  range[0] = wi::sub (op1r[0], op2r[0], UNSIGNED, &ovf);
+}
+  else if (code == PLUS_EXPR)
+{
+  signop sgn = UNSIGNED;
+  if (op2r[0] == op2r[1] && wi::ltu_p (smax, op2r[0]))
+	sgn = SIGNED;
+
+  range[1] = wi::add (op1r[1], op2r[1], sgn, &ovf);
+  range[0] = wi::add (op1r[0], op2r[0], sgn, &ovf);
+}
+  else
+{
+  range[0] = op1r[0];
+  range[1] = op1r[1];
+}
+
+  if (ovf != wi::OVF_NONE)
+return ovf;
+
+  /* Nothing can be determined from a range that spans zero.
+ TO DO: Assume a range with a negative

Re: [Patch] OpenMP/Fortran: Fix (re)mapping of allocatable/pointer arrays [PR96668]

2020-09-15 Thread Tobias Burnus


On 9/15/20 7:03 PM, Jakub Jelinek wrote:

On Tue, Sep 15, 2020 at 12:48:59AM +0200, Tobias Burnus wrote:

+  bool has_nullptr;
+  size_t j;
+  for (j = 0; j < n->tgt->list_count; j++)
+if (n->tgt->list[j].key == n)
+  {
+has_nullptr = n->tgt->list[j].has_null_ptr_assoc;
+break;
+  }
+  if (n->tgt->list_count == 0)
+  has_nullptr = true;
+  else
+assert (j < n->tgt->list_count);

David Edelsohn just reported this (rightly so) results in -Wuninitialized
warnings, I think you meant bool has_nullptr = false;
in the definition (in both places that it is defined at).


No, I meant that it should be always set.

I think the uninitialized warning is a false positive:
list_count and j are unsigned. For list_count == 0 (and, hence, j = 0),
the value is set (special case). Otherwise, if j < list_count, has_nullptr
has been set via the 'for' loop. If it is not set in the loop, j == list_count
and that's (plus j > list_count) caught by the assert.

I admit that with the assert being disabled (NDEBUG is set), that's not
visible to the compiler but otherwise, it should be able to find out.

Tobias

-
Mentor Graphics (Deutschland) GmbH, Arnulfstraße 201, 80634 München / Germany
Registergericht München HRB 106955, Geschäftsführer: Thomas Heurung, Alexander 
Walter
commit 1b9bdd52037061d7a5bd77d177b060c93c528a5d
Author: Tobias Burnus 
Date:   Tue Sep 15 21:28:40 2020 +0200

libgomp/target.c: Silence -Wuninitialized warning

libgomp/ChangeLog:

PR fortran/96668
* target.c (gomp_map_vars_internal): Initialize has_nullptr.

diff --git a/libgomp/target.c b/libgomp/target.c
index 69cdd9f14a9..ab7ac9ba8d2 100644
--- a/libgomp/target.c
+++ b/libgomp/target.c
@@ -854,7 +854,7 @@ gomp_map_vars_internal (struct gomp_device_descr *devicep,
 	  int always_to_cnt = 0;
 	  if ((kind & typemask) == GOMP_MAP_TO_PSET)
 	{
-	  bool has_nullptr;
+	  bool has_nullptr = false;
 	  size_t j;
 	  for (j = 0; j < n->tgt->list_count; j++)
 		if (n->tgt->list[j].key == n)
@@ -1017,7 +1017,7 @@ gomp_map_vars_internal (struct gomp_device_descr *devicep,
 	   == GOMP_MAP_TO_PSET)
 	  {
 	splay_tree_key k = tgt->list[i].key;
-	bool has_nullptr;
+	bool has_nullptr = false;
 	size_t j;
 	for (j = 0; j < k->tgt->list_count; j++)
 	  if (k->tgt->list[j].key == k)


Re: [PATCH 2/4, revised patch applied] PowerPC: Rename functions for min, max, cmove

2020-09-15 Thread Peter Bergner via Gcc-patches
On 9/15/20 1:38 PM, Alexandre Oliva wrote:
>> +case SFmode:
>> +case DFmode:
> 
> gcc110 (ppc64) in the build farm didn't like this.  The bootstrap
> compiler barfs on these expressions, because of some constexpr issue I
> haven't really looked into.
> 
> I'm testing this patch.  I'll check it in when I'm done.
> 
> 
> use E_*mode instead of just *mode

Bill's nightly testing on one of our old systems just hit this too.
Thanks for fixing and testing the fix!

Peter




Re: [PATCH] rs6000: inefficient 64-bit constant generation for consecutive 1-bits

2020-09-15 Thread Segher Boessenkool
On Tue, Sep 15, 2020 at 10:48:37AM -0500, Peter Bergner wrote:
> > rs6000_is_valid_shift_mask handles this already (but it requires you to
> > pass in the shift needed).  rs6000_is_valid_mask will handle it.
> > rs6000_is_valid_and_mask does not get a shift count parameter, so cannot
> > use rldic currently.
> 
> After talking with you off line, I changed to using rs6000_is_valid_mask.
> The did mean I had to change num_insns_constant_gpr to take a mode param
> so it could be passed down to rs6000_is_valid_mask. 

All its callers have on readily available, so that will work fine.

> >> +(define_insn "rldic"
> >> +  [(set (match_operand:DI 0 "gpc_reg_operand" "=r")
> >> +  (unspec:DI [(match_operand:DI 1 "gpc_reg_operand" "r")
> >> +  (match_operand:DI 2 "u6bit_cint_operand" "n")
> >> +  (match_operand:DI 3 "u6bit_cint_operand" "n")]
> >> + UNSPEC_RLDIC))]
> >> +  "TARGET_POWERPC64"
> >> +  "rldic %0,%1,%2,%3")
> 
> ...and this is gone too.  I've replaced it with a generic splitter
> that matches an already existing define_insn (rotl3_mask).

That define_insn always does a *single* machine instruction (just like
most of our define_insns).


> >> +/* { dg-final { scan-assembler "rldic r?\[0-9\]+,r?\[0-9\]+,8,8" } } */
> >> +/* { dg-final { scan-assembler "rldic r?\[0-9\]+,r?\[0-9\]+,24,8" } } */
> >> +/* { dg-final { scan-assembler "rldic r?\[0-9\]+,r?\[0-9\]+,40,8" } } */
> >> +/* { dg-final { scan-assembler "rldic r?\[0-9\]+,r?\[0-9\]+,40,48" } } */
> >> +/* { dg-final { scan-assembler "rldic r?\[0-9\]+,r?\[0-9\]+,40,23" } } */
> > 
> > Please use {} quotes, and \m\M.  \d can be helpful, too.
> 
> That was how I wrote it initially, but for some reason, it wouldn't match
> at all.  Do I need extra \'s for my regexs when using {}?

No, you don't need any here.  You only need to use \[ inside double
quotes because [ has a special meaning in double quotes!  (command
substitution.)

> \d is any digit?  Yeah, that would be better.  Gotta find a manpage or ???
> that describes what regex patterns are allowed.

"man re_syntax", and "man tcl" for the one-page tcl intro (it describes
the whole language: the substitutions, the quotes, etc.)

https://www.tcl.tk/man/tcl8.6/TclCmd/re_syntax.htm
https://www.tcl.tk/man/tcl8.6/TclCmd/Tcl.htm

> This all said, Alan's rtx_costs patch touches this same area and he talked
> about removing a similar splitter, so I think I will wait for his code to
> be committed and then rework this on top of his changes.

Yes, good plan.  Thanks!


Segher


Re: [PATCH] Allow copying of symbolic ranges to an irange.

2020-09-15 Thread Andrew MacLeod via Gcc-patches

On 9/15/20 11:57 AM, Aldy Hernandez wrote:
This fixes an ICE when trying to copy a legacy value_range containing 
a symbolic to a multi-range:


min = make_ssa_name (type);
max = build_int_cst (type, 55);
value_range vv (min, max);
int_range<2> vr = vv;

This doesn't affect anything currently, as we don't have a lot of 
interactions between value_range's and multi_range's in trunk right, 
but it will become a problem as soon as someone tries to get a range 
from evrp and copy it over to a multi-range.


OK pending tests?

gcc/ChangeLog:

* range-op.cc (multi_precision_range_tests): Normalize symbolics 
when copying to a multi-range.

* value-range.cc (irange::copy_legacy_range): Add test.
---
 gcc/range-op.cc    |  9 +
 gcc/value-range.cc | 12 +++-
 2 files changed, 20 insertions(+), 1 deletion(-)

diff --git a/gcc/range-op.cc b/gcc/range-op.cc
index c5f511422f4..8e52d5318e9 100644
--- a/gcc/range-op.cc
+++ b/gcc/range-op.cc
@@ -3463,6 +3463,15 @@ multi_precision_range_tests ()
   small = big;
   ASSERT_TRUE (small == int_range<1> (INT (21), INT (21), 
VR_ANTI_RANGE));


+  // Copying a legacy symbolic to an int_range should normalize the
+  // symbolic at copy time.
+  {
+    value_range legacy_range (make_ssa_name (integer_type_node), INT 
(25));

+    int_range<2> copy = legacy_range;
+    ASSERT_TRUE (copy == int_range<2>  (vrp_val_min (integer_type_node),
+    INT (25)));
+  }
+
   range3_tests ();
 }

diff --git a/gcc/value-range.cc b/gcc/value-range.cc
index 20aa4f114c9..26ccd143e5c 100644
--- a/gcc/value-range.cc
+++ b/gcc/value-range.cc
@@ -101,7 +101,17 @@ irange::copy_legacy_range (const irange &src)
    VR_ANTI_RANGE);
 }
   else
-    set (src.min (), src.max (), VR_RANGE);
+    {
+  // If copying legacy to int_range, normalize any symbolics.
+  if (src.legacy_mode_p () && !range_has_numeric_bounds_p (&src))
+    {
+  value_range cst (src);
+  cst.normalize_symbolics ();
+  set (cst.min (), cst.max ());
+  return;
+    }
+  set (src.min (), src.max ());
+    }
 }

 // Swap min/max if they are out of order.  Return TRUE if further
these seems OK, but can't there be anti-ranges with symbolics  too? ie  
~[a_12, a_12]

The code for that just does:

 else if (src.kind () == VR_ANTI_RANGE)
    set (src.min (), src.max (), VR_ANTI_RANGE);

That should just go to varying I guess?

The conversion to legacy anti-range code also seems a little suspect in 
some cases...


Finally, we theoretically shouldn't be accessing 'min()' and 'max()' 
fields in a multirange, which also looks like might happen in the final 
else clause.


I wonder if it might be less complex to simply have 2 routines, like 
copy_to_legacy()  and copy_from_legacy()  instead of trying to handle 
then together?  I do find it seems to require more thinking than it 
should to follow the cases :-)


Andrew



Re: [PATCH 2/4, revised patch applied] PowerPC: Rename functions for min, max, cmove

2020-09-15 Thread Segher Boessenkool
On Tue, Sep 15, 2020 at 03:38:05PM -0300, Alexandre Oliva wrote:
> On Sep 11, 2020, Michael Meissner via Gcc-patches  
> wrote:
> 
> > +case SFmode:
> > +case DFmode:
> 
> gcc110 (ppc64) in the build farm didn't like this.  The bootstrap
> compiler barfs on these expressions, because of some constexpr issue I
> haven't really looked into.

Yeah, the system compiler is 4.8.5 (this is centos7).

> I'm testing this patch.  I'll check it in when I'm done.

It is pre-approved, just check it in already please!


Segher


> --- a/gcc/config/rs6000/rs6000.c
> +++ b/gcc/config/rs6000/rs6000.c
> @@ -15190,8 +15190,8 @@ have_compare_and_set_mask (machine_mode mode)
>  {
>switch (mode)
>  {
> -case SFmode:
> -case DFmode:
> +case E_SFmode:
> +case E_DFmode:
>return TARGET_P9_MINMAX;
>  
>  default:


Re: [PATCH v3] c, c++: Implement -Wsizeof-array-div [PR91741]

2020-09-15 Thread Marek Polacek via Gcc-patches
On Tue, Sep 15, 2020 at 09:04:41AM +0200, Jakub Jelinek via Gcc-patches wrote:
> On Mon, Sep 14, 2020 at 09:30:44PM -0400, Marek Polacek via Gcc-patches wrote:
> > --- a/gcc/c/c-tree.h
> > +++ b/gcc/c/c-tree.h
> > @@ -147,6 +147,11 @@ struct c_expr
> >   etc), so we stash a copy here.  */
> >source_range src_range;
> >  
> > +  /* True iff the sizeof expression was enclosed in parentheses.
> > + NB: This member is currently only initialized when .original_code
> > + is a SIZEOF_EXPR.  ??? Add a default constructor to this class.  */
> > +  bool parenthesized_p;
> > +
> >/* Access to the first and last locations within the source spelling
> >   of this expression.  */
> >location_t get_start () const { return src_range.m_start; }
> 
> I think a magic tree code would be better, c_expr is used in too many places
> and returned by many functions, so it is copied over and over.
> Even if you must add it, it would be better to change the struct layout,
> because right now there are fields: tree, location_t, tree, 2xlocation_t,
> which means 32-bit gap on 64-bit hosts before the second tree, so the new
> field would fit in there.  But, if it is mostly uninitialized, it is kind of
> unclean.

Ok, here's a version with PAREN_SIZEOF_EXPR.  It doesn't require changes to
c_expr, but adding a new tree code is always a pain...

Bootstrapped/regtested on x86_64-pc-linux-gnu, ok for trunk?

-- >8 --
This patch implements a new warning, -Wsizeof-array-div.  It warns about
code like

  int arr[10];
  sizeof (arr) / sizeof (short);

where we have a division of two sizeof expressions, where the first
argument is an array, and the second sizeof does not equal the size
of the array element.  See e.g. .

Clang makes it possible to suppress the warning by parenthesizing the
second sizeof like this:

  sizeof (arr) / (sizeof (short));

so I followed suit.  In the C++ FE this was rather easy, because
finish_parenthesized_expr already set TREE_NO_WARNING.  In the C FE
I've added a new tree code, PAREN_SIZEOF_EXPR, to discern between the
non-() and () versions.

This warning is enabled by -Wall.  An example of the output:

x.c:5:23: warning: expression does not compute the number of elements in this 
array; element type is ‘int’, not ‘short int’ [-Wsizeof-array-div]
5 |   return sizeof (arr) / sizeof (short);
  |  ~^~~~
x.c:5:25: note: add parentheses around ‘sizeof (short int)’ to silence this 
warning
5 |   return sizeof (arr) / sizeof (short);
  | ^~
  | ( )
x.c:4:7: note: array ‘arr’ declared here
4 |   int arr[10];
  |   ^~~

gcc/c-family/ChangeLog:

PR c++/91741
* c-common.c (verify_tree): Handle PAREN_SIZEOF_EXPR.
(c_common_init_ts): Likewise.
* c-common.def (PAREN_SIZEOF_EXPR): New tree code.
* c-common.h (maybe_warn_sizeof_array_div): Declare.
* c-warn.c (sizeof_pointer_memaccess_warning): Unwrap NOP_EXPRs.
(maybe_warn_sizeof_array_div): New function.
* c.opt (Wsizeof-array-div): New option.

gcc/c/ChangeLog:

PR c++/91741
* c-parser.c (c_parser_binary_expression): Implement -Wsizeof-array-div.
(c_parser_postfix_expression): Set PAREN_SIZEOF_EXPR.
(c_parser_expr_list): Handle PAREN_SIZEOF_EXPR like SIZEOF_EXPR.
* c-tree.h (char_type_p): Declare.
* c-typeck.c (char_type_p): No longer static.

gcc/cp/ChangeLog:

PR c++/91741
* typeck.c (cp_build_binary_op): Implement -Wsizeof-array-div.

gcc/ChangeLog:

PR c++/91741
* doc/invoke.texi: Document -Wsizeof-array-div.

gcc/testsuite/ChangeLog:

PR c++/91741
* c-c++-common/Wsizeof-pointer-div.c: Add dg-warning.
* c-c++-common/Wsizeof-array-div1.c: New test.
* g++.dg/warn/Wsizeof-array-div1.C: New test.
* g++.dg/warn/Wsizeof-array-div2.C: New test.
---
 gcc/c-family/c-common.c   |  2 +
 gcc/c-family/c-common.def |  3 +
 gcc/c-family/c-common.h   |  1 +
 gcc/c-family/c-warn.c | 47 
 gcc/c-family/c.opt|  5 ++
 gcc/c/c-parser.c  | 48 ++--
 gcc/c/c-tree.h|  1 +
 gcc/c/c-typeck.c  |  2 +-
 gcc/cp/typeck.c   | 10 +++-
 gcc/doc/invoke.texi   | 19 +++
 .../c-c++-common/Wsizeof-array-div1.c | 56 +++
 .../c-c++-common/Wsizeof-pointer-div.c|  2 +-
 .../g++.dg/warn/Wsizeof-array-div1.C  | 37 
 .../g++.dg/warn/Wsizeof-array-div2.C  | 15 +
 14 files changed, 226 insertions(+), 22 deletions(-)
 create mode 100644 gcc/testsuite/c-c++-common/Wsizeof-array-div1.c
 create mode 1

[PATCH] PR fortran/97036 - [F2018] Allow ELEMENTAL RECURSIVE procedure prefix

2020-09-15 Thread Harald Anlauf
As stated in the PR, the Fortran 2018 standard removed the restriction
prohibiting ELEMENTAL RECURSIVE procedures.  Adjust the relevant check.

Regtested on x86_64-pc-linux-gnu.

OK for master?

Thanks,
Harald


PR fortran/97036 - [F2018] Allow ELEMENTAL RECURSIVE procedure prefix

gcc/fortran/ChangeLog:

* symbol.c (gfc_check_conflict): Allow ELEMENTAL RECURSIVE
procedure prefix for -std=f2018.

gcc/testsuite/ChangeLog:

* gfortran.dg/pr97036.f90: New test.

diff --git a/gcc/fortran/symbol.c b/gcc/fortran/symbol.c
index abd3b5ccfd0..df1e8965daa 100644
--- a/gcc/fortran/symbol.c
+++ b/gcc/fortran/symbol.c
@@ -569,7 +569,7 @@ gfc_check_conflict (symbol_attribute *attr, const char *name, locus *where)
   conf_std (allocatable, dummy, GFC_STD_F2003);
   conf_std (allocatable, function, GFC_STD_F2003);
   conf_std (allocatable, result, GFC_STD_F2003);
-  conf (elemental, recursive);
+  conf_std (elemental, recursive, GFC_STD_F2018);

   conf (in_common, dummy);
   conf (in_common, allocatable);
diff --git a/gcc/testsuite/gfortran.dg/pr97036.f90 b/gcc/testsuite/gfortran.dg/pr97036.f90
new file mode 100644
index 000..cfe51debce1
--- /dev/null
+++ b/gcc/testsuite/gfortran.dg/pr97036.f90
@@ -0,0 +1,27 @@
+! { dg-do compile }
+! { dg-options "-std=f2018" }
+! PR fortran/97036 - [F2018] Allow ELEMENTAL RECURSIVE procedure prefix
+
+module m97036
+  implicit none
+contains
+  impure elemental recursive subroutine foo (n)
+integer, intent(in) :: n
+integer :: k(n), f(n), i
+k = [ (i-1, i=1,n) ]
+f = fac (k)
+print *, f
+  end subroutine foo
+  elemental recursive subroutine bla ()
+  end subroutine bla
+  elemental recursive function fac (k) result (f)
+integer, intent(in) :: k
+integer :: f
+f = 1
+if (k > 1) f = k*fac (k-1)
+  end function fac
+end module
+  use m97036
+  implicit none
+  call foo ([4,5])
+end


Re: [PATCH 4/4] PowerPC: Add power10 xscmp{eq,gt,ge}qp support

2020-09-15 Thread Segher Boessenkool
On Wed, Aug 26, 2020 at 10:46:37PM -0400, Michael Meissner wrote:
> diff --git a/gcc/config/rs6000/predicates.md b/gcc/config/rs6000/predicates.md
> index 2709e46f7e5..60b45601e9b 100644
> --- a/gcc/config/rs6000/predicates.md
> +++ b/gcc/config/rs6000/predicates.md
> @@ -1225,6 +1225,11 @@ (define_predicate "fpmask_comparison_operator"
>  (define_predicate "invert_fpmask_comparison_operator"
>(match_code "ne,unlt,unle"))
>  
> +;; Return 1 if OP is either a fpmask_comparison_operator or
> +;; invert_fpmask_comparsion_operator.
> +(define_predicate "fpmask_normal_or_invert_operator"
> +  (match_code "eq,gt,ge,ne,unlt,unle"))

Keep "comparison" in the name?  Maybe "any_fpmask_comparison_operator",
we have other things named in that scheme already.

> --- a/gcc/config/rs6000/rs6000.c
> +++ b/gcc/config/rs6000/rs6000.c
> @@ -15177,6 +15177,10 @@ have_compare_and_set_mask (machine_mode mode)
>  case DFmode:
>return TARGET_P9_MINMAX;
>  
> +case KFmode:
> +case TFmode:
> +  return FLOAT128_IEEE_MINMAX_P (mode);

That needs the E_ stuff as well.

> +;; Secondary iterator for scalar binary floating point operations.  This is
> +;; used for the conditional moves when we have a compare and set mask
> +;; instruction.  Using this instruction allows us to do a conditional move
> +;; where the comparison type might be different from the values being moved.
> +(define_mode_iterator FSCALAR2 [SF
> + DF
> + (KF "FLOAT128_IEEE_MINMAX_P (KFmode)")
> + (TF "FLOAT128_IEEE_MINMAX_P (TFmode)")])

Needs a name change just like FSCALAR.  Maybe BFP?  Or better, just FP,
and rename the current FP to something else (it is only used for cstore
and cbranch, it should use a much less generic name there).


Please cut down the comment.  See GPR/GPR2 for example:

; This mode iterator allows :GPR to be used to indicate the allowable size
; of whole values in GPRs.
(define_mode_iterator GPR [SI (DI "TARGET_POWERPC64")])

; And again, for patterns that need two (potentially) different integer modes.
(define_mode_iterator GPR2 [SI (DI "TARGET_POWERPC64")])

It should not talk about an example where it is used: it can much easier
say something much more generic!


(And then send a patch first doing FP just as SFDF and replacing it
where we want it; and then a later patch adding KF.  That way, your
patch might be readable!)

Thanks,


Segher


[committed] analyzer: fix ICE when merging constraints w/o transitivity [PR96650]

2020-09-15 Thread David Malcolm via Gcc-patches
PR analyzer/96650 reports an assertion failure when merging the
intersection of two sets of constraints, due to the resulting
constraints being infeasible.

It turns out that the two input sets were each infeasible if
transitivity were considered, but -fanalyzer-transitivity was off.
However for this case, the merging code was "discovering" the
transitive infeasibility of the intersection of the constraints even
when -fanalyzer-transitivity is off, triggering an assertion failure.

I attempted various fixes for this, but each of them would have
introduced O(N^2) logic into the constraint-handling code into the
-fno-analyzer-transitivity case (with N == the number of constraints).

This patch fixes the ICE by tweaking the assertion, so that we
silently drop such constraints if -fanalyzer-transitivity is off.

Successfully bootstrapped & regrtested on x86_64-pc-linux-gnu.
Pushed to master as r11-3212-g50ddbd0282e06614b29f0d3f3be5fbe70085a8bd.

gcc/analyzer/ChangeLog:
PR analyzer/96650
* constraint-manager.cc (merger_fact_visitor::on_fact): Replace
assertion that add_constraint succeeded with an assertion that
if it fails, -fanalyzer-transitivity is off.

gcc/testsuite/ChangeLog:
PR analyzer/96650
* gcc.dg/analyzer/pr96650-1-notrans.c: New test.
* gcc.dg/analyzer/pr96650-1-trans.c: New test.
* gcc.dg/analyzer/pr96650-2-notrans.c: New test.
* gcc.dg/analyzer/pr96650-2-trans.c: New test.
---
 gcc/analyzer/constraint-manager.cc| 10 ++-
 .../gcc.dg/analyzer/pr96650-1-notrans.c   | 30 +++
 .../gcc.dg/analyzer/pr96650-1-trans.c | 30 +++
 .../gcc.dg/analyzer/pr96650-2-notrans.c   | 30 +++
 .../gcc.dg/analyzer/pr96650-2-trans.c | 30 +++
 5 files changed, 129 insertions(+), 1 deletion(-)
 create mode 100644 gcc/testsuite/gcc.dg/analyzer/pr96650-1-notrans.c
 create mode 100644 gcc/testsuite/gcc.dg/analyzer/pr96650-1-trans.c
 create mode 100644 gcc/testsuite/gcc.dg/analyzer/pr96650-2-notrans.c
 create mode 100644 gcc/testsuite/gcc.dg/analyzer/pr96650-2-trans.c

diff --git a/gcc/analyzer/constraint-manager.cc 
b/gcc/analyzer/constraint-manager.cc
index e578e0502f2..521501fd4f4 100644
--- a/gcc/analyzer/constraint-manager.cc
+++ b/gcc/analyzer/constraint-manager.cc
@@ -1752,7 +1752,15 @@ public:
 if (m_cm_b->eval_condition (lhs, code, rhs).is_true ())
   {
bool sat = m_out->add_constraint (lhs, code, rhs);
-   gcc_assert (sat);
+   if (!sat)
+ {
+   /* If -fanalyzer-transitivity is off, we can encounter cases
+  where at least one of the two constraint_managers being merged
+  is infeasible, but we only discover that infeasibility
+  during merging (PR analyzer/96650).
+  Silently drop such constraints.  */
+   gcc_assert (!flag_analyzer_transitivity);
+ }
   }
   }
 
diff --git a/gcc/testsuite/gcc.dg/analyzer/pr96650-1-notrans.c 
b/gcc/testsuite/gcc.dg/analyzer/pr96650-1-notrans.c
new file mode 100644
index 000..94c755540b0
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/analyzer/pr96650-1-notrans.c
@@ -0,0 +1,30 @@
+/* { dg-additional-options "-O2 -fno-analyzer-transitivity" } */
+
+int *wf;
+
+void
+yd (void);
+
+int
+cy (void);
+
+int *
+ee (int hp)
+{
+  if (hp != 0)
+yd ();
+
+  return 0;
+}
+
+void
+z0 (int co)
+{
+  int l4 = sizeof (int);
+
+ aq:
+  wf = ee (l4);
+  if (l4 < co)
+l4 = cy () + sizeof (int);
+  goto aq;
+}
diff --git a/gcc/testsuite/gcc.dg/analyzer/pr96650-1-trans.c 
b/gcc/testsuite/gcc.dg/analyzer/pr96650-1-trans.c
new file mode 100644
index 000..b20630bb806
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/analyzer/pr96650-1-trans.c
@@ -0,0 +1,30 @@
+/* { dg-additional-options "-O2 -fanalyzer-transitivity" } */
+
+int *wf;
+
+void
+yd (void);
+
+int
+cy (void);
+
+int *
+ee (int hp)
+{
+  if (hp != 0)
+yd ();
+
+  return 0;
+}
+
+void
+z0 (int co)
+{
+  int l4 = sizeof (int);
+
+ aq:
+  wf = ee (l4);
+  if (l4 < co)
+l4 = cy () + sizeof (int);
+  goto aq;
+}
diff --git a/gcc/testsuite/gcc.dg/analyzer/pr96650-2-notrans.c 
b/gcc/testsuite/gcc.dg/analyzer/pr96650-2-notrans.c
new file mode 100644
index 000..fc7c045a32f
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/analyzer/pr96650-2-notrans.c
@@ -0,0 +1,30 @@
+/* { dg-additional-options "-fno-analyzer-transitivity" } */
+
+#include "analyzer-decls.h"
+
+int foo (void);
+
+/* Infeasible path, requiring transitivity to find.  */
+
+void test_1 (int co, int y)
+{
+  if (4 < co)
+if (co < y)
+  if (y == 0)
+   __analyzer_dump_path (); /* { dg-message "path" } */
+}
+
+/* Infeasible path, requiring transitivity to find, with a merger.  */
+
+void test_2 (int co, int y, int z)
+{
+  if (4 < co)
+if (co < y)
+  if (y == 0)
+   {
+ while (foo ())
+   {
+   }
+ __analyzer_dump_path (); /* { dg-message "path"

Re: [RS6000] Count rldimi constant insns

2020-09-15 Thread Segher Boessenkool
On Tue, Sep 15, 2020 at 10:49:39AM +0930, Alan Modra wrote:
> rldimi is generated by rs6000_emit_set_long_const when the high and
> low 32 bits of a 64-bit constant are equal.
> 
>   * config/rs6000/rs6000.c (num_insns_constant_gpr): Count rldimi
>   constants correctly.

Wow, did I miss that?  Whoops.  (That was PR93012, 72b2f3317b44.)

Okay for trunk.  Thanks!


Segher


Re: PING [Patch][Middle-end]Add -fzero-call-used-regs=[skip|used-gpr|all-gpr|used|all]

2020-09-15 Thread Qing Zhao via Gcc-patches



> On Sep 15, 2020, at 2:41 PM, Segher Boessenkool  
> wrote:
> 
> On Tue, Sep 15, 2020 at 10:11:41AM +0100, Richard Sandiford wrote:
>> Qing Zhao  writes:
 On Sep 14, 2020, at 2:20 PM, Richard Sandiford  
 wrote:
> (Putting correct info in DF, inserting the new insns in pro_and_epi).
> 
> But, scheduling runs *after* that, and then you need to prevent the
> inserted (zeroing) insns from moving -- if you don't, the code after
> some zeroing can be used as gadget!  You want to always have all
> zeroing insns after *any* computational insn, or it becomes a gadget.

Please see the previous discussion, we have agreed to put the new pass   
(pass_zero_call_used_regs) 
in the beginning of the pass_late_compilation as following:

 PUSH_INSERT_PASSES_WITHIN (pass_late_compilation)
  NEXT_PASS (pass_zero_call_used_regs);
 NEXT_PASS (pass_compute_alignments);
 NEXT_PASS (pass_variable_tracking);
 NEXT_PASS (pass_free_cfg);
 NEXT_PASS (pass_machine_reorg);
 NEXT_PASS (pass_cleanup_barriers);
 NEXT_PASS (pass_delay_slots);

Scheduling has been done already. 

Qing


> 
> 
> Segher



Re: [RS6000] rs6000_rtx_costs for PLUS/MINUS constant

2020-09-15 Thread Segher Boessenkool
On Tue, Sep 15, 2020 at 10:49:40AM +0930, Alan Modra wrote:
> These functions do behave a little differently for SImode, so the
> mode should be passed.
> 
>   * config/rs6000/rs6000.c (rs6000_rtx_costs): Pass mode to
>   reg_or_add_cint_operand and reg_or_sub_cint_operand.

Okay for trunk.  Thanks!

(Btw, please use [patch 2/6] etc. markers?  It helps refer to them :-) )


Segher


Re: [PING 2][PATCH 2/5] C front end support to detect out-of-bounds accesses to array parameters

2020-09-15 Thread Joseph Myers
On Wed, 9 Sep 2020, Martin Sebor via Gcc-patches wrote:

> Joseph, do you have any concerns with or comments on the most
> recent patch or is it okay as is?
> 
> https://gcc.gnu.org/pipermail/gcc-patches/2020-August/552266.html

I'm not yet convinced by the logic for extracting an array bound from a 
parameter declared using a typedef for an array type.

Say you have

typedef int A[3];

void f (A *x[*]);

so an argument that is an array, using [*], of pointers to arrays, where 
those latter arrays are specified using the typedef.  As I read the logic, 
first the pointer declarator is handled (ignored), then the array 
declarator results in [*] being stored in spec, then the "if (pd->kind == 
cdk_id)" handling comes into play - and because spec is "*" and vbchain is 
NULL_TREE, the upper bound of A gets extracted, but the upper bound of A 
should be irrelevant here because it's a type that's the target of a 
pointer.  The information from parm->specs->type logically comes before, 
not after, the information from the declarator.

As far as I can see, if one declaration gets part of the parameter type 
(involving VLAs) from a typedef and another declaration gets that part of 
the type directly in the declaration, the two spec strings constructed 
might differ in the number of VLA bounds mentioned in the spec strings.  
Is the code using those strings robust to handling the case where some of 
the VLA bounds are missing because they came from a typedef?

-- 
Joseph S. Myers
jos...@codesourcery.com


Re: PING [Patch][Middle-end]Add -fzero-call-used-regs=[skip|used-gpr|all-gpr|used|all]

2020-09-15 Thread Segher Boessenkool
On Tue, Sep 15, 2020 at 05:31:48PM -0500, Qing Zhao wrote:
> > But, scheduling runs *after* that, and then you need to prevent the
> > inserted (zeroing) insns from moving -- if you don't, the code after
> > some zeroing can be used as gadget!  You want to always have all
> > zeroing insns after *any* computational insn, or it becomes a gadget.
> 
> Please see the previous discussion, we have agreed to put the new pass   
> (pass_zero_call_used_regs) 
> in the beginning of the pass_late_compilation as following:

Yes, I know that at some point it was said that seemed like a good place
for it.

>  PUSH_INSERT_PASSES_WITHIN (pass_late_compilation)
>   NEXT_PASS (pass_zero_call_used_regs);
>  NEXT_PASS (pass_compute_alignments);
>  NEXT_PASS (pass_variable_tracking);
>  NEXT_PASS (pass_free_cfg);
>  NEXT_PASS (pass_machine_reorg);
>  NEXT_PASS (pass_cleanup_barriers);
>  NEXT_PASS (pass_delay_slots);
> 
> Scheduling has been done already. 

But there are many more passes that can reorder things.  Like
machine_reorg (which is a big deal).  I don't think other passes here
are harmful (maybe the shorten stuff)?  But.  Targets can also insert
more passes here.

If you want the zeroing insns to stay with the return, you have to
express that in RTL.  Anything else is extremely fragile.


Segher


Re: [PING][PATCH] improve validation of attribute arguments (PR c/78666)

2020-09-15 Thread Joseph Myers
On Wed, 9 Sep 2020, Martin Sebor via Gcc-patches wrote:

> Ping: https://gcc.gnu.org/pipermail/gcc-patches/2020-August/552500.html
> 
> Aldy provided a bunch of comments on this patch but I'm still looking
> for a formal approval.

This patch is OK.

> > Some testing revealed that the code has different semantics for
> > strings: it compares them including all nuls embedded in them,
> > while I think the right semantics for attributes is to only consider
> > strings up and including the first nul (i.e., apply the usual string
> > semantics).  A test case for this corner case is as follows:
> > 
> >    __attribute__ ((section ("foo\0bar"))) void f (void);
> >    __attribute__ ((section ("foo"))) void f (void) { }
> > 
> > Without operand_equal_p() GCC accepts this and puts f in section
> > foo.  With the operand_equal_p() change above, it complains:
> > 
> > ignoring attribute ‘section ("foo\x00bar")’ because it conflicts with
> > previous ‘section ("foor")’ [-Wattributes]
> > 
> > I would rather not change the behavior in this corner case just to
> > save a few lines of code.  If it's thought that string arguments
> > to attributes (some or all) should be interpreted differently than
> > in other contexts it seems that such a change should be made
> > separately and documented in the manual.

I think that for at least the section attribute, embedded NULs are 
suspect.  In that case, so are wide strings, but for some attributes wide 
strings should be accepted and handled sensibly (but aren't, see bug 
91182).

-- 
Joseph S. Myers
jos...@codesourcery.com


[r11-3207 Regression] FAIL: gcc.dg/tree-ssa/20030807-10.c scan-tree-dump-times vrp1 " & 3" 1 on Linux/x86_64 (-m64)

2020-09-15 Thread sunil.k.pandey via Gcc-patches
On Linux/x86_64,

8f0d743c2dee6afae5c6f861b0642b7b112a4a70 is the first bad commit
commit 8f0d743c2dee6afae5c6f861b0642b7b112a4a70
Author: Feng Xue 
Date:   Mon Aug 17 23:00:35 2020 +0800

tree-optimization/94234 - add plusminus-with-convert pattern

caused

FAIL: gcc.dg/tree-ssa/20030807-10.c scan-tree-dump-times vrp1 " >> 2" 1
FAIL: gcc.dg/tree-ssa/20030807-10.c scan-tree-dump-times vrp1 " & 3" 1

with GCC configured with

../../gcc/configure 
--prefix=/local/skpandey/gccwork/toolwork/gcc-bisect-master/master/r11-3207/usr 
--enable-clocale=gnu --with-system-zlib --with-demangler-in-ld 
--with-fpmath=sse --enable-languages=c,c++,fortran --enable-cet --without-isl 
--enable-libmpx x86_64-linux --disable-bootstrap

To reproduce:

$ cd {build_dir}/gcc && make check 
RUNTESTFLAGS="tree-ssa.exp=gcc.dg/tree-ssa/20030807-10.c 
--target_board='unix{-m64}'"

(Please do not reply to this email, for question about this report, contact me 
at skpgkp2 at gmail dot com)


[r11-3207 Regression] FAIL: gcc.dg/ifcvt-3.c scan-rtl-dump ce1 "3 true changes made" on Linux/x86_64 (-m64)

2020-09-15 Thread sunil.k.pandey via Gcc-patches
On Linux/x86_64,

8f0d743c2dee6afae5c6f861b0642b7b112a4a70 is the first bad commit
commit 8f0d743c2dee6afae5c6f861b0642b7b112a4a70
Author: Feng Xue 
Date:   Mon Aug 17 23:00:35 2020 +0800

tree-optimization/94234 - add plusminus-with-convert pattern

caused

FAIL: gcc.dg/ifcvt-3.c scan-rtl-dump ce1 "3 true changes made"

with GCC configured with

../../gcc/configure 
--prefix=/local/skpandey/gccwork/toolwork/gcc-bisect-master/master/r11-3207/usr 
--enable-clocale=gnu --with-system-zlib --with-demangler-in-ld 
--with-fpmath=sse --enable-languages=c,c++,fortran --enable-cet --without-isl 
--enable-libmpx x86_64-linux --disable-bootstrap

To reproduce:

$ cd {build_dir}/gcc && make check RUNTESTFLAGS="dg.exp=gcc.dg/ifcvt-3.c 
--target_board='unix{-m64}'"

(Please do not reply to this email, for question about this report, contact me 
at skpgkp2 at gmail dot com)


libgo: add additional references in sysinfo.c

2020-09-15 Thread Ian Lance Taylor via Gcc-patches
This libgo patch by Than McIntosh adds a few more explicit references
to enumeration constants (RUSAGE_SELF, DT_UNKNOWN) in sysinfo.c to
insure that their hosting enums are emitted into DWARF, when using a
clang host compiler during the gollvm build.  Bootstrapped and ran Go
testsuite on x86_64-pc-linux-gnu.  Committed to mainline.

Ian
d04a5acb18867932d937351ab9804d6cfc83c5f0
diff --git a/gcc/go/gofrontend/MERGE b/gcc/go/gofrontend/MERGE
index dc63f4a696a..df9d2118dfd 100644
--- a/gcc/go/gofrontend/MERGE
+++ b/gcc/go/gofrontend/MERGE
@@ -1,4 +1,4 @@
-b75a139fcc7c56988ce2d5b3a2b9e274eb521b0d
+a47485cd0e9ce6a8b3e88e53ccc0a440f0bd4351
 
 The first line of this file holds the git revision number of the last
 merge done from the gofrontend repository.
diff --git a/libgo/sysinfo.c b/libgo/sysinfo.c
index 0692fd41eb7..76405597128 100644
--- a/libgo/sysinfo.c
+++ b/libgo/sysinfo.c
@@ -316,6 +316,7 @@ enum {
 SREF(dirent);
 SREF(dirent64);
 OTREF(DIR);
+EREF(DT_UNKNOWN);
 
 // From fcntl.h
 SREF(flock);
@@ -437,6 +438,7 @@ SREF(rusage);
 SREF(rlimit64);
 EREF(RLIMIT_NOFILE);
 EREF(PRIO_USER);
+EREF(RUSAGE_SELF);
 
 // From sys/select.h
 TREF(fd_set);


Re: PING [Patch][Middle-end]Add -fzero-call-used-regs=[skip|used-gpr|all-gpr|used|all]

2020-09-15 Thread Qing Zhao via Gcc-patches



> On Sep 15, 2020, at 6:09 PM, Segher Boessenkool  
> wrote:
> 
> On Tue, Sep 15, 2020 at 05:31:48PM -0500, Qing Zhao wrote:
>>> But, scheduling runs *after* that, and then you need to prevent the
>>> inserted (zeroing) insns from moving -- if you don't, the code after
>>> some zeroing can be used as gadget!  You want to always have all
>>> zeroing insns after *any* computational insn, or it becomes a gadget.
>> 
>> Please see the previous discussion, we have agreed to put the new pass   
>> (pass_zero_call_used_regs) 
>> in the beginning of the pass_late_compilation as following:
> 
> Yes, I know that at some point it was said that seemed like a good place
> for it.
> 
>> PUSH_INSERT_PASSES_WITHIN (pass_late_compilation)
>>   NEXT_PASS (pass_zero_call_used_regs);
>> NEXT_PASS (pass_compute_alignments);
>> NEXT_PASS (pass_variable_tracking);
>> NEXT_PASS (pass_free_cfg);
>> NEXT_PASS (pass_machine_reorg);
>> NEXT_PASS (pass_cleanup_barriers);
>> NEXT_PASS (pass_delay_slots);
>> 
>> Scheduling has been done already. 
> 
> But there are many more passes that can reorder things.  Like
> machine_reorg (which is a big deal).  I don't think other passes here
> are harmful (maybe the shorten stuff)?  But.  Targets can also insert
> more passes here.
> 
> If you want the zeroing insns to stay with the return, you have to
> express that in RTL.  

What do you mean by “express that in RTL”?
Could you please explain this in more details?

Do you mean to implement this in “targetm.gen_return” and 
“targetm.gen_simple_return”?

Qing

> Anything else is extremely fragile.
> 
> 
> Segher



[PATCH] rs6000: Fix misnamed built-in

2020-09-15 Thread Bill Schmidt via Gcc-patches

The description in rs6000-builtin.def provides for a builtin named
__builtin_altivec_xst_len_r.  However, it is hand-defined in
altivec_init_builtins as __builtin_xst_len_r, against the usual naming
practice.  Fix that.

Bootstrapped and tested on powerpc64le-unknown-linux-gnu with no
regressions; committed as obvious.


2020-09-15  Bill Schmidt  

gcc/
* config/rs6000/rs6000-call.c (altivec_init_builtins): Fix name
of __builtin_altivec_xst_len_r.


diff --git a/gcc/config/rs6000/rs6000-call.c b/gcc/config/rs6000/rs6000-call.c
index 77c7a1149fb..a8b520834c7 100644
--- a/gcc/config/rs6000/rs6000-call.c
+++ b/gcc/config/rs6000/rs6000-call.c
@@ -13490,7 +13490,7 @@ altivec_init_builtins (void)
 {
   def_builtin ("__builtin_altivec_stxvl", void_ftype_v16qi_pvoid_long,
   P9V_BUILTIN_STXVL);
-  def_builtin ("__builtin_xst_len_r", void_ftype_v16qi_pvoid_long,
+  def_builtin ("__builtin_altivec_xst_len_r", void_ftype_v16qi_pvoid_long,
   P9V_BUILTIN_XST_LEN_R);
 }
 



Re: [PATCH v3] C-SKY: Support -mfloat-abi=hard.

2020-09-15 Thread Xianmiao Qu

It looks good to me, pushed it to trunck.


Thanks,

Cooper

On 9/15/20 4:08 PM, Jojo R wrote:

gcc/ChangeLog:

* config/csky/csky.md (CSKY_NPARM_FREGS): New.
(call_value_internal_vs/d): New.
(untyped_call): New.
* config/csky/csky.h (TARGET_SINGLE_FPU): New.
(TARGET_DOUBLE_FPU): New.
(FUNCTION_VARG_REGNO_P): New.
(CSKY_VREG_MODE_P): New.
(FUNCTION_VARG_MODE_P): New.
(CUMULATIVE_ARGS): Add extra regs info.
(INIT_CUMULATIVE_ARGS): Use csky_init_cumulative_args.
(FUNCTION_ARG_REGNO_P): Use FUNCTION_VARG_REGNO_P.
* config/csky/csky-protos.h (csky_init_cumulative_args): Extern.
* config/csky/csky.c (csky_cpu_cpp_builtins): Support 
TARGET_HARD_FLOAT_ABI.
(csky_function_arg): Likewise.
(csky_num_arg_regs): Likewise.
(csky_function_arg_advance): Likewise.
(csky_function_value): Likewise.
(csky_libcall_value): Likewise.
(csky_function_value_regno_p): Likewise.
(csky_arg_partial_bytes): Likewise.
(csky_setup_incoming_varargs): Likewise.
(csky_init_cumulative_args): New.

gcc/testsuite/ChangeLog:

* gcc.dg/builtin-apply2.c : Skip if CSKY.
* gcc.dg/torture/stackalign/builtin-apply-2.c : Likewise.

---
  gcc/config/csky/csky-protos.h  |  2 +
  gcc/config/csky/csky.c | 96 +++---
  gcc/config/csky/csky.h | 34 ++--
  gcc/config/csky/csky.md| 84 +++
  gcc/testsuite/gcc.dg/builtin-apply2.c  |  2 +-
  .../gcc.dg/torture/stackalign/builtin-apply-2.c|  2 +-
  6 files changed, 200 insertions(+), 20 deletions(-)

diff --git a/gcc/config/csky/csky-protos.h b/gcc/config/csky/csky-protos.h
index cc1a033..2c02399 100644
--- a/gcc/config/csky/csky-protos.h
+++ b/gcc/config/csky/csky-protos.h
@@ -68,4 +68,6 @@ extern int csky_compute_pushpop_length (rtx *);
  
  extern int csky_default_branch_cost (bool, bool);

  extern bool csky_default_logical_op_non_short_circuit (void);
+
+extern void csky_init_cumulative_args (CUMULATIVE_ARGS *, tree, rtx, tree);
  #endif /* GCC_CSKY_PROTOS_H */
diff --git a/gcc/config/csky/csky.c b/gcc/config/csky/csky.c
index 7ba3ed3..8463d8f 100644
--- a/gcc/config/csky/csky.c
+++ b/gcc/config/csky/csky.c
@@ -328,6 +328,16 @@ csky_cpu_cpp_builtins (cpp_reader *pfile)
  {
builtin_define ("__csky_hard_float__");
builtin_define ("__CSKY_HARD_FLOAT__");
+  if (TARGET_HARD_FLOAT_ABI)
+   {
+ builtin_define ("__csky_hard_float_abi__");
+ builtin_define ("__CSKY_HARD_FLOAT_ABI__");
+   }
+  if (TARGET_SINGLE_FPU)
+   {
+ builtin_define ("__csky_hard_float_fpu_sf__");
+ builtin_define ("__CSKY_HARD_FLOAT_FPU_SF__");
+   }
  }
else
  {
@@ -1790,9 +1800,22 @@ static rtx
  csky_function_arg (cumulative_args_t pcum_v, const function_arg_info &arg)
  {
CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
+  int reg = pcum->reg;
+  machine_mode mode = arg.mode;
  
-  if (*pcum < CSKY_NPARM_REGS)

-return gen_rtx_REG (arg.mode, CSKY_FIRST_PARM_REGNUM + *pcum);
+  if (FUNCTION_VARG_MODE_P(mode)
+  && !pcum->is_stdarg)
+{
+  reg = pcum->freg;
+
+  if (reg < CSKY_NPARM_FREGS)
+   return gen_rtx_REG (mode, CSKY_FIRST_VFP_REGNUM + reg);
+  else
+   return NULL_RTX;
+}
+
+  if (reg < CSKY_NPARM_REGS)
+return gen_rtx_REG (mode, CSKY_FIRST_PARM_REGNUM + reg);
  
return NULL_RTX;

  }
@@ -1802,7 +1825,7 @@ csky_function_arg (cumulative_args_t pcum_v, const 
function_arg_info &arg)
 MODE and TYPE.  */
  
  static int

-csky_num_arg_regs (machine_mode mode, const_tree type)
+csky_num_arg_regs (machine_mode mode, const_tree type, bool is_stdarg)
  {
int size;
  
@@ -1811,6 +1834,14 @@ csky_num_arg_regs (machine_mode mode, const_tree type)

else
  size = GET_MODE_SIZE (mode);
  
+  if (TARGET_HARD_FLOAT_ABI

+  && !is_stdarg)
+{
+  if (CSKY_VREG_MODE_P(mode)
+ && !TARGET_SINGLE_FPU)
+   return ((CSKY_NUM_WORDS (size) + 1) / 2);
+}
+
return CSKY_NUM_WORDS (size);
  }
  
@@ -1822,12 +1853,23 @@ csky_function_arg_advance (cumulative_args_t pcum_v,

   const function_arg_info &arg)
  {
CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
-  int param_size = csky_num_arg_regs (arg.mode, arg.type);
+  int *reg = &pcum->reg;
+  machine_mode mode = arg.mode;
  
-  if (*pcum + param_size > CSKY_NPARM_REGS)

-*pcum = CSKY_NPARM_REGS;
+  int param_size = csky_num_arg_regs (mode, arg.type, pcum->is_stdarg);
+  int param_regs_nums = CSKY_NPARM_REGS;
+
+  if (FUNCTION_VARG_MODE_P(mode)
+  && !pcum->is_stdarg)
+{
+  reg = &pcum->freg;
+  param_regs_nums = CSKY_NPARM_FREGS;
+}
+
+  if (*reg + param_size > param_regs_nums)
+*reg = param_regs_nums;
else
-*pcum += p

[PATCH] Increase rtx_cost of sse_to_integer in skylake_cost.

2020-09-15 Thread Hongtao Liu via Gcc-patches
Hi:
  Rtx cost of sse_to_integer would be used by pass_stv as a
measurement for the scalar-to-vector transformation. As
https://gcc.gnu.org/pipermail/gcc-patches/2019-August/528839.html
indicates, movement between sse regs and gprs should be much expensive
than movement inside gprs(which is 2 as default). This patch would
also fix "pr96861".

  Bootstrap is ok, regression test is ok for both "i386.exp=*
--target_board='unix{-m32,}'" and "i386.exp=*
--target_board='unix{-m32\ -march=cascadelake,-m64\
-march=cascadelake}"".
  No big impact on SPEC2017.
  Ok for trunk?

gcc/ChangeLog

PR target/96861
* config/i386/x86-tune-costs.h (skylake_cost): increase rtx
cost of sse_to_integer from 2 to 6.

gcc/testsuite

* gcc.target/i386/pr95021-3.c: Add -mtune=generic.


--
BR,
Hongtao
From 2a9a943ec56ca3ea1ba1a2447a32b103c2a1c790 Mon Sep 17 00:00:00 2001
From: liuhongt 
Date: Wed, 16 Sep 2020 10:53:52 +0800
Subject: [PATCH] Increase rtx cost of sse_to_integer in skylake_cost.

As https://gcc.gnu.org/pipermail/gcc-patches/2019-August/528839.html
indicates, movement between SSE and gpr should be much expensive than
movement inside gpr(which is 2 as default).

gcc/ChangeLog

	PR target/96861
	* config/i386/x86-tune-costs.h (skylake_cost): increase rtx
	cost of sse_to_integer from 2 to 6.

gcc/testsuite

	* gcc.target/i386/pr95021-3.c: Add -mtune=generic.
---
 gcc/config/i386/x86-tune-costs.h  | 2 +-
 gcc/testsuite/gcc.target/i386/pr95021-3.c | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/gcc/config/i386/x86-tune-costs.h b/gcc/config/i386/x86-tune-costs.h
index a782a9dd9e3..c289b6ba454 100644
--- a/gcc/config/i386/x86-tune-costs.h
+++ b/gcc/config/i386/x86-tune-costs.h
@@ -1769,7 +1769,7 @@ struct processor_costs skylake_cost = {
   {6, 6, 6, 10, 20},			/* cost of unaligned loads.  */
   {8, 8, 8, 8, 16},			/* cost of unaligned stores.  */
   2, 2, 4,/* cost of moving XMM,YMM,ZMM register */
-  2,	/* cost of moving SSE register to integer.  */
+  6,	/* cost of moving SSE register to integer.  */
   20, 8,/* Gather load static, per_elt.  */
   22, 10,/* Gather store static, per_elt.  */
   64,	/* size of l1 cache.  */
diff --git a/gcc/testsuite/gcc.target/i386/pr95021-3.c b/gcc/testsuite/gcc.target/i386/pr95021-3.c
index 1748161a77c..52f9e4569b3 100644
--- a/gcc/testsuite/gcc.target/i386/pr95021-3.c
+++ b/gcc/testsuite/gcc.target/i386/pr95021-3.c
@@ -1,5 +1,5 @@
 /* { dg-do compile { target ia32 } } */
-/* { dg-options "-O2 -msse2 -mstv -mregparm=3 -W" } */
+/* { dg-options "-O2 -msse2 -mstv -mregparm=3 -W -mtune=generic" } */
 /* { dg-final { scan-assembler "movq\[ \t\]+\[^\n\]*, %xmm" } } */
 
 #include "pr95021-1.c"
-- 
2.18.1



Re: [PATCH] Check calls before loop unrolling

2020-09-15 Thread Jiufu Guo via Gcc-patches
Hi all,

This patch sets the default value to 16 for parameter
max_unrolled_average_calls which could be used to restict calls in loop
when unrolling.  This default value(16) is a big number which keeps
current behavior for almost all cases.

Bootstrap and regtest pass on powerpc64le.  Is this ok for trunk?

Thanks for comments!

Jiufu Guo

gcc/ChangeLog
2020-09-16  Jiufu Guo   

* params.opt (param_max_unrolled_average_calls_x1): New param.
* cfgloop.h (average_num_loop_calls): New declare.
* cfgloopanal.c (average_num_loop_calls): New function.
* loop-unroll.c (decide_unroll_constant_iteration,
decide_unroll_runtime_iterations,
decide_unroll_stupid): Check average_num_loop_calls and
param_max_unrolled_average_calls_x1.
---
 gcc/cfgloop.h |  2 ++
 gcc/cfgloopanal.c | 25 +
 gcc/loop-unroll.c | 10 ++
 gcc/params.opt|  4 
 4 files changed, 41 insertions(+)

diff --git a/gcc/cfgloop.h b/gcc/cfgloop.h
index 18b404e292f..dab933da150 100644
--- a/gcc/cfgloop.h
+++ b/gcc/cfgloop.h
@@ -21,6 +21,7 @@ along with GCC; see the file COPYING3.  If not see
 #define GCC_CFGLOOP_H
 
 #include "cfgloopmanip.h"
+#include "sreal.h"
 
 /* Structure to hold decision about unrolling/peeling.  */
 enum lpt_dec
@@ -387,6 +388,7 @@ extern vec get_loop_exit_edges (const class loop *, 
basic_block * = NULL);
 extern edge single_exit (const class loop *);
 extern edge single_likely_exit (class loop *loop, vec);
 extern unsigned num_loop_branches (const class loop *);
+extern sreal average_num_loop_calls (const class loop *);
 
 extern edge loop_preheader_edge (const class loop *);
 extern edge loop_latch_edge (const class loop *);
diff --git a/gcc/cfgloopanal.c b/gcc/cfgloopanal.c
index 0b33e8272a7..a314db4e0c0 100644
--- a/gcc/cfgloopanal.c
+++ b/gcc/cfgloopanal.c
@@ -233,6 +233,31 @@ average_num_loop_insns (const class loop *loop)
   return ret;
 }
 
+/* Count the number of call insns in LOOP.  */
+sreal
+average_num_loop_calls (const class loop *loop)
+{
+  basic_block *bbs;
+  rtx_insn *insn;
+  unsigned int i, bncalls;
+  sreal ncalls = 0;
+
+  bbs = get_loop_body (loop);
+  for (i = 0; i < loop->num_nodes; i++)
+{
+  bncalls = 0;
+  FOR_BB_INSNS (bbs[i], insn)
+   if (CALL_P (insn))
+ bncalls++;
+
+  ncalls += (sreal) bncalls
+   * bbs[i]->count.to_sreal_scale (loop->header->count);
+}
+  free (bbs);
+
+  return ncalls;
+}
+
 /* Returns expected number of iterations of LOOP, according to
measured or guessed profile.
 
diff --git a/gcc/loop-unroll.c b/gcc/loop-unroll.c
index 693c7768868..56b8fb37d2a 100644
--- a/gcc/loop-unroll.c
+++ b/gcc/loop-unroll.c
@@ -370,6 +370,10 @@ decide_unroll_constant_iterations (class loop *loop, int 
flags)
 nunroll = nunroll_by_av;
   if (nunroll > (unsigned) param_max_unroll_times)
 nunroll = param_max_unroll_times;
+  if (!loop->unroll
+  && (average_num_loop_calls (loop) * (sreal) 1).to_int ()
+  > (unsigned) param_max_unrolled_average_calls_x1)
+nunroll = 0;
 
   if (targetm.loop_unroll_adjust)
 nunroll = targetm.loop_unroll_adjust (nunroll, loop);
@@ -689,6 +693,9 @@ decide_unroll_runtime_iterations (class loop *loop, int 
flags)
 nunroll = nunroll_by_av;
   if (nunroll > (unsigned) param_max_unroll_times)
 nunroll = param_max_unroll_times;
+  if ((average_num_loop_calls (loop) * (sreal) 1).to_int ()
+  > (unsigned) param_max_unrolled_average_calls_x1)
+nunroll = 0;
 
   if (targetm.loop_unroll_adjust)
 nunroll = targetm.loop_unroll_adjust (nunroll, loop);
@@ -1173,6 +1180,9 @@ decide_unroll_stupid (class loop *loop, int flags)
 nunroll = nunroll_by_av;
   if (nunroll > (unsigned) param_max_unroll_times)
 nunroll = param_max_unroll_times;
+  if ((average_num_loop_calls (loop) * (sreal) 1).to_int ()
+  > (unsigned) param_max_unrolled_average_calls_x1)
+nunroll = 0;
 
   if (targetm.loop_unroll_adjust)
 nunroll = targetm.loop_unroll_adjust (nunroll, loop);
diff --git a/gcc/params.opt b/gcc/params.opt
index f39e5d1a012..80605861223 100644
--- a/gcc/params.opt
+++ b/gcc/params.opt
@@ -634,6 +634,10 @@ The maximum number of unrollings of a single loop.
 Common Joined UInteger Var(param_max_unrolled_insns) Init(200) Param 
Optimization
 The maximum number of instructions to consider to unroll in a loop.
 
+-param=max-unrolled-average-calls-x1=
+Common Joined UInteger Var(param_max_unrolled_average_calls_x1) 
Init(16) Param Optimization
+The maximum number of calls to consider to unroll in a loop on average and 
multiply 1.
+
 -param=max-unswitch-insns=
 Common Joined UInteger Var(param_max_unswitch_insns) Init(50) Param 
Optimization
 The maximum number of insns of an unswitched loop.
-- 
2.25.1



Jan Hubicka  writes:

>> On Thu, Aug 20, 2020 at 6:35 AM guojiufu via Gcc-patches
>>  wrote:
>> >
>> > Hi,
>> >
>> > When unroll loops, if there are cal

[PATCH] C-SKY: Fix wrong ld name with option -mfloat-abi=hard.

2020-09-15 Thread Jojo R
gcc/ChangeLog:

* config/csky/csky-linux-elf.h (GLIBC_DYNAMIC_LINKER): Use mfloat-abi.

---
 gcc/config/csky/csky-linux-elf.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/gcc/config/csky/csky-linux-elf.h b/gcc/config/csky/csky-linux-elf.h
index 9a57dd04..cf587ae 100644
--- a/gcc/config/csky/csky-linux-elf.h
+++ b/gcc/config/csky/csky-linux-elf.h
@@ -63,7 +63,7 @@
   %{mvdsp:-mvdsp}  \
   "
 
-#define GLIBC_DYNAMIC_LINKER 
"/lib/ld-linux-cskyv2%{mhard-float:-hf}%{mbig-endian:-be}.so.1"
+#define GLIBC_DYNAMIC_LINKER 
"/lib/ld-linux-cskyv2%{mfloat-abi=hard:-hf}%{mbig-endian:-be}.so.1"
 
 #define LINUX_TARGET_LINK_SPEC "%{h*} %{version:-v}\
%{b}\
-- 
1.9.1



Re: [aarch64] Backport missing NEON intrinsics to GCC8

2020-09-15 Thread Pop, Sebastian via Gcc-patches
Thanks Kyrill for your review.

I committed the patches to the gcc-8 branch:
https://gcc.gnu.org/git/?p=gcc.git;a=commitdiff;h=2c55e6caa9432b2c1f081cb3aeddd36abec03233
https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=a4004f62d60ada3a20dbf30146ca461047a575cc

and to the gcc-9 branch:
https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=c5aca0333b723d5e20366593cd01047d105f54e4

Sebastian

On 9/15/20, 7:46 AM, "Kyrylo Tkachov"  wrote:

Hi Sebastian,

This patch implements missing intrinsics.
I'm okay with this being applied to the GCC 8 branch as these intrinsics 
have been defined in ACLE for a long time.
It is arguably a bug that they've been missing from GCC8.
Their implementation is fairly self-contained we haven't had any bugs 
reported against these in my recollection.

So ok on the grounds that it's a bug-fix.
Thanks,
Kyrill

From: Pop, Sebastian 
Sent: 11 September 2020 20:54
To: gcc-patches@gcc.gnu.org; Kyrylo Tkachov 
Subject: [aarch64] Backport missing NEON intrinsics to GCC8

Hi,

gcc-8 branch is missing NEON intrinsics for loads and stores.
Attached patches pass bootstrap and regression testing on Graviton2 
aarch64-linux.

Ok to commit to gcc-8 branch?

Thanks,
Sebastian



Re: [PATCH v2] rs6000: Remove useless insns fed into lvx/stvx [PR97019]

2020-09-15 Thread Kewen.Lin via Gcc-patches
Hi Segher,

Thanks for your suggestions!

>> +  for (unsigned i = 0; i < and_insns.length (); ++i)
> 
> "i++" is used more often, is more traditional.
> 

Updated.

>> --- /dev/null
>> +++ b/gcc/testsuite/gcc.target/powerpc/pr97019.c
>> @@ -0,0 +1,82 @@
>> +/* This issue can only exist on little-endian P8 targets, since
>> +   the built-in functions vec_ld/vec_st will use lxvd2x/stxvd2x
>> +   (P8 big-endian) or lxv/stxv (P9 and later).  */
>> +/* { dg-do compile { target { powerpc_p8vector_ok && le } } } */
>> +/* { dg-options "-O2 -mdejagnu-cpu=power8" } */
> 
> Do you need to test for LE?  If not, just always run it?  If it works,
> it works, it doesn't matter that you do not expect it to ever fail (we
> do not really expect *any* test we have to ever fail *anywhere*, heh).
> 

Yes, I did test it on P8 BE, it can generate lxvd2x/stxvd2x on some paths
fed by rldicr ...0,59 which are necessary, but they will make the
not-rldicr testing fail unexpectedly.

>> +/* { dg-final { scan-assembler-not "rldicr\[ \t\]+\[0-9\]+,\[0-9\]+,0,59" } 
>> } */
> 
> Please use {} quotes, and \s and \d.
> 
> You can also use  {(?n)rldicr.*,0,59}  since (?n) makes . not match
> newlines anymore.

Updated with /* { dg-final { scan-assembler-not {(?n)rldicr.*,0,59} } } */

Re-tested and committed it in r11-3217.  Thanks!

BR,
Kewen


Re: [PATCH v2] rs6000: Expand vec_insert in expander instead of gimple [PR79251]

2020-09-15 Thread luoxhu via Gcc-patches



On 2020/9/15 14:51, Richard Biener wrote:


>> I only see VAR_DECL and PARM_DECL, is there any function to check the tree
>> variable is global? I added DECL_REGISTER, but the RTL still expands to 
>> stack:
> 
> is_global_var () or alternatively !auto_var_in_fn_p (), I think doing
> IFN_SET only
> makes sense if there's the chance we can promote the variable to a
> register.  But it
> would be an incorrect transform (it stores the whole vector) if the
> vector storage
> could "escape" to another thread - which means you probably have to check
> !TREE_ADDRESSABLE as well.
> 

The tree of param "u" will be marked ADDRESSABLE when generating 
"VIEW_CONVERT_EXPR(D.3190)[_1] = i;", if check !TREE_ADDRESSABLE, no 
IFN_SET
will be produced in gimple-isel.


#1  0x1066c700 in convert_vector_to_array_for_subscript (loc=5307072, 
vecp=0x7fffc5d0,
index=) at ../../gcc/gcc/c-family/c-common.c:8169
#2  0x10553b54 in build_array_ref (loc=5307072, array=, index=<
trunc_mod_expr 0x759c73a0>) at ../../gcc/gcc/c/c-typeck.c:2668
#3  0x105c8824 in c_parser_postfix_expression_after_primary 
(parser=0x77f703f0, expr_lo
c=5307040, expr=...) at ../../gcc/gcc/c/c-parser.c:10494
#4  0x105c7570 in c_parser_postfix_expression (parser=0x77f703f0) 
at ../../gcc/gcc/c/c-
parser.c:10216

>>
>> My current implementation does:
>>
>> 1)  v = vec_insert (i, u, n);
>>
>> =>gimple:
>> {
>>register __vector signed int D.3190;
>>D.3190 = u;// *new decl and copy u first.*
>>_1 = n & 3;
>>VIEW_CONVERT_EXPR(D.3190)[_1] = i;   // *update op0 of 
>> VIEW_CONVERT_EXPR*
>>_2 = D.3190;
>>...
>> }
>>
>> =>isel:
>> {
>>register __vector signed int D.3190;
>>D.3190 = u_4(D);
>>_1 = n_6(D) & 3;
>>.VEC_SET (&D.3190, i_7(D), _1);
> 
> why are you passing the address of D.3190 to .VEC_SET?  That will not
> make D.3190
> be expanded to a pseudo.   You really need to have GIMPLE registers
> here (SSA name)
> and thus a return value, leaving the argument unmodified
> 
>D.3190_3 = .VEC_SET (D.3190_4, i_7(D), _1);
> 
> note this is why I asked about the actual CPU instruction - as I read
> Seghers mail
> the instruction modifies a vector register, not memory.
> 

Updated the code and got expected gimple-isel output and ASM for both 2 cases:

pr79251.c.236t.isel:

__attribute__((noinline))
test (__vector signed int u, int i, size_t n)
{
  long unsigned int _1;
  __vector signed int _6;
  vector(4) int _7;
  vector(4) int vec_set_dst_8;

   [local count: 1073741824]:
  _1 = n_2(D) & 3;
  _7 = u;
  vec_set_dst_8 = .VEC_SET (_7, i_4(D), _1);
  u = vec_set_dst_8;
  _6 = u;
  return _6;

}

But tree variable "u" need to be set to "TREE_ADDRESSABLE (view_op0) = 0;"
(Maybe check IFN VEC_SET and set to 0 in discover_nonconstant_array_refs
is better later.) after generating the IFN VEC_SET, otherwise, "u" will still
be expanded to stack since expander:
 "Replacing Expressions:  _7 replace with --> _7 = u;".

Setting "u" to non-addressable also seems really unreasonable as for below
case, as u[n+1] need be ADDRESSABLE:

__attribute__ ((noinline)) vector TYPE
test (vector TYPE u, TYPE i, size_t n)
{
 u[n % 4] = i;
 u[n+1] = i+1;
 return u;
}

=> gimple-isel with both VEC_SET and VIEW_CONVERT_EXPR:

test (__vector signed int u, int i, size_t n)
{
  long unsigned int _1;
  long unsigned int _2;
  int _3;
  __vector signed int _9;
  vector(4) int _10;
  vector(4) int vec_set_dst_11;

   [local count: 1073741824]:
  _1 = n_4(D) & 3;
  _10 = u;
  vec_set_dst_11 = .VEC_SET (_10, i_6(D), _1);
  u = vec_set_dst_11;
  _2 = n_4(D) + 1;
  _3 = i_6(D) + 1;
  VIEW_CONVERT_EXPR(u)[_2] = _3;
  _9 = u;
  return _9;

}


Below code are generating the IFN call, create_tmp_reg or 
create_tmp_reg_ssa_name
seems not create a variable that will be allocated on register?


diff --git a/gcc/gimple-isel.cc b/gcc/gimple-isel.cc
index b330cf4c20e..a699022cd09 100644
--- a/gcc/gimple-isel.cc
+++ b/gcc/gimple-isel.cc
...
+  if (!is_global_var (view_op0)
+ && TREE_CODE (TREE_TYPE (view_op0)) == VECTOR_TYPE
+ && tree_fits_uhwi_p (TYPE_SIZE (TREE_TYPE (view_op0)))
+ && tree_to_uhwi (TYPE_SIZE (TREE_TYPE (view_op0))) == 128
+ && determine_value_range (pos, &minv, &maxv) == VR_RANGE
+ && wi::geu_p (minv, 0)
+ && wi::leu_p (maxv, (128 / GET_MODE_BITSIZE (innermode
+   {
+ location_t loc = gimple_location (stmt);
+ tree var_src = create_tmp_reg (TREE_TYPE (view_op0));
+ tree var_dst
+   = make_temp_ssa_name (TREE_TYPE (view_op0), NULL, "vec_set_dst");
+ TREE_ADDRESSABLE (view_op0) = 0;
+
+ ass_stmt = gimple_build_assign (var_src, view_op0);
+ gimple_set_location (ass_stmt, loc);
+ gsi_insert_before (gsi, ass_stmt, GSI_SAME_STMT);
+
+ new_stmt
+   = gimple_build_call_internal (IFN_VEC_SET, 3, var_src, val, pos);
+
+ gimple_call_set_lhs (new_stmt, var_dst);
+
+  

[PATCH] -mno-xsave should imply -mno-avx since -mavx implies -mxsave

2020-09-15 Thread Hongtao Liu via Gcc-patches
Hi:
If -mavx implies -mxsave, then -mno-xsave should imply -mno-avx.
Current status is -mno-avx implies -mno-xsave which should be wrong.

Bootstrap is ok, Regression test is ok for i386/x86 backend.
Ok for trunk?

gcc/ChangeLog

* common/config/i386/i386-common.c
(OPTION_MASK_ISA_AVX_UNSET): Remove OPTION_MASK_ISA_XSAVE_UNSET.
(OPTION_MASK_ISA_XSAVE_UNSET): Add OPTION_MASK_ISA_AVX_UNSET.

gcc/testsuite/ChangeLog

* gcc.target/i386/xsave-avx-1.c: New test.



-- 
BR,
Hongtao
From c76f6fee90d95a6f84cb0710d8700279a276cdfb Mon Sep 17 00:00:00 2001
From: liuhongt 
Date: Wed, 16 Sep 2020 13:56:30 +0800
Subject: [PATCH] If -mavx implies -mxsave, then -mno-xsave should imply
 -mno-avx.

Current status is -mno-avx implies -mno-xsave which should be wrong.

gcc/ChangeLog

	* common/config/i386/i386-common.c
	(OPTION_MASK_ISA_AVX_UNSET): Remove OPTION_MASK_ISA_XSAVE_UNSET.
	(OPTION_MASK_ISA_XSAVE_UNSET): Add OPTION_MASK_ISA_AVX_UNSET.

gcc/testsuite/ChangeLog

	* gcc.target/i386/xsave-avx-1.c: New test.
---
 gcc/common/config/i386/i386-common.c|  5 +++--
 gcc/testsuite/gcc.target/i386/xsave-avx-1.c | 12 
 2 files changed, 15 insertions(+), 2 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/i386/xsave-avx-1.c

diff --git a/gcc/common/config/i386/i386-common.c b/gcc/common/config/i386/i386-common.c
index 5305145a8c9..6e3409556fb 100644
--- a/gcc/common/config/i386/i386-common.c
+++ b/gcc/common/config/i386/i386-common.c
@@ -187,12 +187,13 @@ along with GCC; see the file COPYING3.  If not see
 #define OPTION_MASK_ISA_AVX_UNSET \
   (OPTION_MASK_ISA_AVX | OPTION_MASK_ISA_FMA_UNSET \
| OPTION_MASK_ISA_FMA4_UNSET | OPTION_MASK_ISA_F16C_UNSET \
-   | OPTION_MASK_ISA_AVX2_UNSET | OPTION_MASK_ISA_XSAVE_UNSET)
+   | OPTION_MASK_ISA_AVX2_UNSET )
 #define OPTION_MASK_ISA_FMA_UNSET OPTION_MASK_ISA_FMA
 #define OPTION_MASK_ISA_FXSR_UNSET OPTION_MASK_ISA_FXSR
 #define OPTION_MASK_ISA_XSAVE_UNSET \
   (OPTION_MASK_ISA_XSAVE | OPTION_MASK_ISA_XSAVEOPT_UNSET \
-   | OPTION_MASK_ISA_XSAVES_UNSET | OPTION_MASK_ISA_XSAVEC_UNSET)
+   | OPTION_MASK_ISA_XSAVES_UNSET | OPTION_MASK_ISA_XSAVEC_UNSET \
+   | OPTION_MASK_ISA_AVX_UNSET)
 #define OPTION_MASK_ISA_XSAVEOPT_UNSET OPTION_MASK_ISA_XSAVEOPT
 #define OPTION_MASK_ISA_AVX2_UNSET \
   (OPTION_MASK_ISA_AVX2 | OPTION_MASK_ISA_AVX512F_UNSET)
diff --git a/gcc/testsuite/gcc.target/i386/xsave-avx-1.c b/gcc/testsuite/gcc.target/i386/xsave-avx-1.c
new file mode 100644
index 000..ca87a791446
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/xsave-avx-1.c
@@ -0,0 +1,12 @@
+/* { dg-do compile } */
+/* { dg-options "-mxsave -mno-avx" } */
+
+#include 
+
+extern int m;
+
+void
+avx_imply_save (void)
+{
+  _xgetbv (m);
+}
-- 
2.18.1



[PATCH] store-merging: Consider also overlapping stores earlier in the by bitpos sorting [PR97053]

2020-09-15 Thread Jakub Jelinek via Gcc-patches
Hi!

As the testcases show, if we have something like:
  MEM  [&b + 8B] = {};
  MEM[(short *) &b] = 5;
  _5 = *x_4(D);
  MEM  [&b + 2B] = _5;
  MEM[(char *)&b + 16B] = 88;
  MEM[(int *)&b + 20B] = 1;
then in sort_by_bitpos the stores are almost like in the given order,
except the first store is after the = _5; store.
We can't coalesce the = 5; store with = _5;, because the latter is MEM_REF,
while the former INTEGER_CST, and we can't coalesce the = _5 store with
the = {} store because the former is MEM_REF, the latter INTEGER_CST.
But we happily coalesce the remaining 3 stores, which is wrong, because the
= _5; store overlaps those and is in between them in the program order.
We already have code to deal with similar cases in check_no_overlap, but we
deal only with the following stores in sort_by_bitpos order, not the earlier
ones.

The following patch checks also the earlier ones.  In coalesce_immediate_stores
it computes the first one that needs to be checked (all the ones whose
bitpos + bitsize is smaller or equal to merged_store->start don't need to be
checked and don't need to be checked even for any following attempts because
of the sort_by_bitpos sorting) and the end of that (that is the first store
in the merged_store).

Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?

2020-09-16  Jakub Jelinek  

PR tree-optimization/97053
* gimple-ssa-store-merging.c (check_no_overlap): Add FIRST_ORDER,
START, FIRST_EARLIER and LAST_EARLIER arguments.  Return false if
any stores between FIRST_EARLIER inclusive and LAST_EARLIER exclusive
has order in between FIRST_ORDER and LAST_ORDER and overlaps the to
be merged store.
(imm_store_chain_info::try_coalesce_bswap): Add FIRST_EARLIER argument.
Adjust check_no_overlap caller.
(imm_store_chain_info::coalesce_immediate_stores): Add first_earlier
and last_earlier variables, adjust them during iterations.  Adjust
check_no_overlap callers, call check_no_overlap even when extending
overlapping stores by extra INTEGER_CST stores.

* gcc.dg/store_merging_31.c: New test.
* gcc.dg/store_merging_32.c: New test.

--- gcc/gimple-ssa-store-merging.c.jj   2020-08-12 12:45:46.0 +0200
+++ gcc/gimple-ssa-store-merging.c  2020-09-15 16:51:11.393453396 +0200
@@ -2116,7 +2116,8 @@ public:
   }
   }
   bool terminate_and_process_chain ();
-  bool try_coalesce_bswap (merged_store_group *, unsigned int, unsigned int);
+  bool try_coalesce_bswap (merged_store_group *, unsigned int, unsigned int,
+  unsigned int);
   bool coalesce_immediate_stores ();
   bool output_merged_store (merged_store_group *);
   bool output_merged_stores ();
@@ -2443,14 +2444,39 @@ gather_bswap_load_refs (vec *refs,
into the group.  That way it will be its own store group and will
not be touched.  If ALL_INTEGER_CST_P and there are overlapping
INTEGER_CST stores, those are mergeable using merge_overlapping,
-   so don't return false for those.  */
+   so don't return false for those.
+
+   Similarly, check stores from FIRST_EARLIER (inclusive) to END_EARLIER
+   (exclusive), whether they don't overlap the bitrange START to END
+   and have order in between FIRST_ORDER and LAST_ORDER.  This is to
+   prevent merging in cases like:
+ MEM  [&b + 8B] = {};
+ MEM[(short *) &b] = 5;
+ _5 = *x_4(D);
+ MEM  [&b + 2B] = _5;
+ MEM[(char *)&b + 16B] = 88;
+ MEM[(int *)&b + 20B] = 1;
+   The = {} store comes in sort_by_bitpos before the = 88 store, and can't
+   be merged with it, because the = _5 store overlaps these and is in between
+   them in sort_by_order ordering.  If it was merged, the merged store would
+   go after the = _5 store and thus change behavior.  */
 
 static bool
 check_no_overlap (vec m_store_info, unsigned int i,
- bool all_integer_cst_p, unsigned int last_order,
- unsigned HOST_WIDE_INT end)
+ bool all_integer_cst_p, unsigned int first_order,
+ unsigned int last_order, unsigned HOST_WIDE_INT start,
+ unsigned HOST_WIDE_INT end, unsigned int first_earlier,
+ unsigned end_earlier)
 {
   unsigned int len = m_store_info.length ();
+  for (unsigned int j = first_earlier; j < end_earlier; j++)
+{
+  store_immediate_info *info = m_store_info[j];
+  if (info->order > first_order
+ && info->order < last_order
+ && info->bitpos + info->bitsize > start)
+   return false;
+}
   for (++i; i < len; ++i)
 {
   store_immediate_info *info = m_store_info[i];
@@ -2471,7 +2497,8 @@ check_no_overlap (vecbitsize;
@@ -2611,7 +2638,8 @@ imm_store_chain_info::try_coalesce_bswap
   if (n.base_addr == NULL_TREE && !is_gimple_val (n.src))
 return false;
 
-  if (!check_no_overlap (m_store_info, last, false, last_order, end))
+  if (!check_no_overlap (m_store_info, last, false, first_or

Re: [PATCH] store-merging: Consider also overlapping stores earlier in the by bitpos sorting [PR97053]

2020-09-15 Thread Richard Biener
On Wed, 16 Sep 2020, Jakub Jelinek wrote:

> Hi!
> 
> As the testcases show, if we have something like:
>   MEM  [&b + 8B] = {};
>   MEM[(short *) &b] = 5;
>   _5 = *x_4(D);
>   MEM  [&b + 2B] = _5;
>   MEM[(char *)&b + 16B] = 88;
>   MEM[(int *)&b + 20B] = 1;
> then in sort_by_bitpos the stores are almost like in the given order,
> except the first store is after the = _5; store.
> We can't coalesce the = 5; store with = _5;, because the latter is MEM_REF,
> while the former INTEGER_CST, and we can't coalesce the = _5 store with
> the = {} store because the former is MEM_REF, the latter INTEGER_CST.
> But we happily coalesce the remaining 3 stores, which is wrong, because the
> = _5; store overlaps those and is in between them in the program order.
> We already have code to deal with similar cases in check_no_overlap, but we
> deal only with the following stores in sort_by_bitpos order, not the earlier
> ones.
> 
> The following patch checks also the earlier ones.  In 
> coalesce_immediate_stores
> it computes the first one that needs to be checked (all the ones whose
> bitpos + bitsize is smaller or equal to merged_store->start don't need to be
> checked and don't need to be checked even for any following attempts because
> of the sort_by_bitpos sorting) and the end of that (that is the first store
> in the merged_store).
> 
> Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?

OK.

Thanks,
Richard.

> 2020-09-16  Jakub Jelinek  
> 
>   PR tree-optimization/97053
>   * gimple-ssa-store-merging.c (check_no_overlap): Add FIRST_ORDER,
>   START, FIRST_EARLIER and LAST_EARLIER arguments.  Return false if
>   any stores between FIRST_EARLIER inclusive and LAST_EARLIER exclusive
>   has order in between FIRST_ORDER and LAST_ORDER and overlaps the to
>   be merged store.
>   (imm_store_chain_info::try_coalesce_bswap): Add FIRST_EARLIER argument.
>   Adjust check_no_overlap caller.
>   (imm_store_chain_info::coalesce_immediate_stores): Add first_earlier
>   and last_earlier variables, adjust them during iterations.  Adjust
>   check_no_overlap callers, call check_no_overlap even when extending
>   overlapping stores by extra INTEGER_CST stores.
> 
>   * gcc.dg/store_merging_31.c: New test.
>   * gcc.dg/store_merging_32.c: New test.
> 
> --- gcc/gimple-ssa-store-merging.c.jj 2020-08-12 12:45:46.0 +0200
> +++ gcc/gimple-ssa-store-merging.c2020-09-15 16:51:11.393453396 +0200
> @@ -2116,7 +2116,8 @@ public:
>}
>}
>bool terminate_and_process_chain ();
> -  bool try_coalesce_bswap (merged_store_group *, unsigned int, unsigned int);
> +  bool try_coalesce_bswap (merged_store_group *, unsigned int, unsigned int,
> +unsigned int);
>bool coalesce_immediate_stores ();
>bool output_merged_store (merged_store_group *);
>bool output_merged_stores ();
> @@ -2443,14 +2444,39 @@ gather_bswap_load_refs (vec *refs,
> into the group.  That way it will be its own store group and will
> not be touched.  If ALL_INTEGER_CST_P and there are overlapping
> INTEGER_CST stores, those are mergeable using merge_overlapping,
> -   so don't return false for those.  */
> +   so don't return false for those.
> +
> +   Similarly, check stores from FIRST_EARLIER (inclusive) to END_EARLIER
> +   (exclusive), whether they don't overlap the bitrange START to END
> +   and have order in between FIRST_ORDER and LAST_ORDER.  This is to
> +   prevent merging in cases like:
> + MEM  [&b + 8B] = {};
> + MEM[(short *) &b] = 5;
> + _5 = *x_4(D);
> + MEM  [&b + 2B] = _5;
> + MEM[(char *)&b + 16B] = 88;
> + MEM[(int *)&b + 20B] = 1;
> +   The = {} store comes in sort_by_bitpos before the = 88 store, and can't
> +   be merged with it, because the = _5 store overlaps these and is in between
> +   them in sort_by_order ordering.  If it was merged, the merged store would
> +   go after the = _5 store and thus change behavior.  */
>  
>  static bool
>  check_no_overlap (vec m_store_info, unsigned int i,
> -   bool all_integer_cst_p, unsigned int last_order,
> -   unsigned HOST_WIDE_INT end)
> +   bool all_integer_cst_p, unsigned int first_order,
> +   unsigned int last_order, unsigned HOST_WIDE_INT start,
> +   unsigned HOST_WIDE_INT end, unsigned int first_earlier,
> +   unsigned end_earlier)
>  {
>unsigned int len = m_store_info.length ();
> +  for (unsigned int j = first_earlier; j < end_earlier; j++)
> +{
> +  store_immediate_info *info = m_store_info[j];
> +  if (info->order > first_order
> +   && info->order < last_order
> +   && info->bitpos + info->bitsize > start)
> + return false;
> +}
>for (++i; i < len; ++i)
>  {
>store_immediate_info *info = m_store_info[i];
> @@ -2471,7 +2497,8 @@ check_no_overlap (vec  bool
>  imm_store_chain_info::try_coalesce_bswap (merged_stor