Re: [PATCH] Fix ms-sysv generator with gcc-4.8 host-g++

2018-09-17 Thread Richard Biener
On Sun, 16 Sep 2018, Bernd Edlinger wrote:

> Hi,
> 
> 
> this prevents test failures when gcc-4.8 is used as host-g++ tool, since this 
> version does
> understand -std=c++11 but not -fno-diagnostics-show-line-numbers and 
> -fdiagnostics-color=never.
> With slightly newer gcc tools like gcc-5 everything works fine.
> 
> Apparently the following commit added those options to TEST_ALWAYS_FLAGS.
> 
> 2018-08-09  David Malcolm  
> 
>  PR other/84889
>  * gcc.dg/plugin/diagnostic-test-show-locus-bw-line-numbers.c: New
>  test.
>  * gcc.dg/plugin/diagnostic-test-show-locus-color-line-numbers.c:
>  New test.
>  * gcc.dg/plugin/plugin.exp (plugin_test_list): Add the new tests.
>  * lib/prune.exp: Add -fno-diagnostics-show-line-numbers to
>  TEST_ALWAYS_FLAGS.
> 
> Work around is not using TEST_ALWAYS_FLAGS in the host tool invocation.
> 
> 
> Bootstrapped and reg-tested with gcc-4.8 and gcc-5 host tools.
> Is it OK for trunk?

OK.
 
> 
> Thanks
> Bernd.
> 
> 

-- 
Richard Biener 
SUSE LINUX GmbH, GF: Felix Imendoerffer, Jane Smithard, Graham Norton, HRB 
21284 (AG Nuernberg)


[patch] Fix PR tree-optimization/86990

2018-09-17 Thread Eric Botcazou
Hi,

this is a regression present on the mainline only: now that the GIMPLE store 
merging pass is able to mix constants and SSA_NAMEs on the RHS of stores to 
bit-fields, we need to check that the entire merged store group is made of 
constants only when encountering overlapping stores.

Tested on x86_64-suse-linux, OK for the mainline?


2018-09-17  Eric Botcazou  

PR tree-optimization/86990
* gimple-ssa-store-merging.c (imm_store_chain_info::coalesce_immediate):
Check that the entire merged store group is made of constants only for
overlapping stores.


2018-09-17  Eric Botcazou  

* gcc.c-torture/execute/20180917-1.c: New test.

-- 
Eric BotcazouIndex: gimple-ssa-store-merging.c
===
--- gimple-ssa-store-merging.c	(revision 264342)
+++ gimple-ssa-store-merging.c	(working copy)
@@ -2701,16 +2701,25 @@ imm_store_chain_info::coalesce_immediate
 		merged_store->start + merged_store->width - 1))
 	{
 	  /* Only allow overlapping stores of constants.  */
-	  if (info->rhs_code == INTEGER_CST
-	  && merged_store->stores[0]->rhs_code == INTEGER_CST)
+	  if (info->rhs_code == INTEGER_CST)
 	{
+	  bool only_constants = true;
+	  store_immediate_info *infoj;
+	  unsigned int j;
+	  FOR_EACH_VEC_ELT (merged_store->stores, j, infoj)
+		if (infoj->rhs_code != INTEGER_CST)
+		  {
+		only_constants = false;
+		break;
+		  }
 	  unsigned int last_order
 		= MAX (merged_store->last_order, info->order);
 	  unsigned HOST_WIDE_INT end
 		= MAX (merged_store->start + merged_store->width,
 		   info->bitpos + info->bitsize);
-	  if (check_no_overlap (m_store_info, i, INTEGER_CST,
-last_order, end))
+	  if (only_constants
+		  && check_no_overlap (m_store_info, i, INTEGER_CST,
+   last_order, end))
 		{
 		  /* check_no_overlap call above made sure there are no
 		 overlapping stores with non-INTEGER_CST rhs_code
/* PR tree-optimization/86990 */
/* Testcase by Zhendong Su  */

const char *ss;

int __attribute__((noipa)) dummy (const char *s, ...)
{
  ss = s;
}

int i[6];
static int j, v, e, f, h = 5, k, l, n, o, p, q, r, s, u, w, x, y, z, aa, ab, ac,
   ad, ae, af, ag = 8, ah, ai, aj, ak, al;
char c;
struct a {
  unsigned b;
  int c : 9;
  int d;
} static g = {9, 5};
static short m[1], t = 95, am;
int an, ao, ap;
void aq(int ar) {
  j = j & 5 ^ i[j ^ v & 5];
  j = j & 5 ^ i[(j ^ v) & 5];
  j = j & 4095 ^ (j ^ v) & 5;
}
void as(int ar) {
  if (n)
s = 0;
}
static unsigned at() {
  int au[] = {2080555007, 0};
  for (; al; al--) {
if (r)
  --x;
if (g.d)
  l++;
dummy("", j);
if (u)
  ae = n = au[al];
  }
  r = 0;
  return 0;
}
int aw(int ar) {
  int ax[] = {9, 5, 5, 9, 5}, ay = 3;
  struct a az = {1, 3};
av:
  an = (as((at(), ax)[2]), ax[4]);
  {
int ba[] = {5, 5, 9, 8, 1, 0, 5, 5, 9, 8, 1, 0,
5, 5, 9, 8, 1, 0, 5, 5, 9, 8, 1};
int a[] = {8, 2, 8, 2, 8, 2, 8};
int b[] = {1027239, 8, 1, 7, 9, 2, 9, 4, 4, 2, 8, 1, 0, 4, 4, 2,
   4,   4, 2, 9, 2, 9, 8, 1, 7, 9, 2, 9, 4, 4, 2};
if (z) {
  struct a bc;
bb:
  for (; e; e++)
for (; q;)
  return ax[e];
  if (bc.c < g.d <= a[7])
aa--;
}
{
  struct a bd = {5};
  int d[20] = {1, 9, 7, 7, 8, 4, 4, 4, 4, 8, 1, 9, 7, 7, 8, 4, 4, 4, 4};
  c = h | r % g.c ^ x;
  dummy("", g);
  am -= t | x;
  if (h)
while (1) {
  if (a[o]) {
struct a be;
if (ar) {
  struct a bf = {908, 5, 3};
  int bg[3], bh = k, bj = ag | ae, bk = aj + 3, bl = u << e;
  if (f)
if (ac)
  ak = w;
  ag = -(ag & t);
  af = ag ^ af;
  if (8 < af)
break;
  if (bj)
goto bi;
  if (s)
dummy("", 6);
  be.d = k;
  w = f - bh;
  dummy("", be);
  if (w)
goto bb;
  ao = r - aa && g.b;
  if (y)
k++;
  goto av;
bi:
  if (aa)
continue;
  if (f)
if (k)
  dummy("", g);
  aj = ac + k ^ g.c;
  g.c = bk;
  ah = 0;
  for (; ah < 3; ah++)
if (s)
  bg[ah] = 8;
  if (!ay)
dummy("", ai);
  u = bl;
  g = bf;
} else
  for (;; o += a[ap])
;
int bm[] = {0};
for (; p; p++)
  c = ad;
ad = l;
if (bd.c) {
 

Re: [PATCH 07/25] [pr82089] Don't sign-extend SFV 1 in BImode

2018-09-17 Thread Richard Sandiford
 writes:
> This is an update of the patch posted to PR82089 long ago.  We ran into the
> same bug on GCN, so we need this fixed as part of this series.
>
> 2018-09-05  Andrew Stubbs  
> Tom de Vries  
>
>   PR82089
>
>   gcc/
>   * expmed.c (emit_cstore): Fix handling of result_mode == BImode and
>   STORE_FLAG_VALUE == 1.
> ---
>  gcc/expmed.c | 15 +++
>  1 file changed, 11 insertions(+), 4 deletions(-)
>
> diff --git a/gcc/expmed.c b/gcc/expmed.c
> index 29ce10b..0b87fdc 100644
> --- a/gcc/expmed.c
> +++ b/gcc/expmed.c
> @@ -5464,11 +5464,18 @@ emit_cstore (rtx target, enum insn_code icode, enum 
> rtx_code code,
>   If STORE_FLAG_VALUE does not have the sign bit set when
>   interpreted in MODE, we can do this conversion as unsigned, which
>   is usually more efficient.  */
> -  if (GET_MODE_SIZE (int_target_mode) > GET_MODE_SIZE (result_mode))
> +  if (GET_MODE_SIZE (int_target_mode) > GET_MODE_SIZE (result_mode)
> +  || (result_mode == BImode && int_target_mode != BImode))

Would be better to test GET_MODE_PRECISION instead of GET_MODE_SIZE,
if that works, instead of treating BImode as a special case.

>  {
> -  convert_move (target, subtarget,
> - val_signbit_known_clear_p (result_mode,
> -STORE_FLAG_VALUE));
> +  gcc_assert (GET_MODE_SIZE (result_mode) != 1
> +   || STORE_FLAG_VALUE == 1 || STORE_FLAG_VALUE == -1);
> +  bool unsignedp
> + = (GET_MODE_SIZE (result_mode) == 1
> +? STORE_FLAG_VALUE == 1
> +: val_signbit_known_clear_p (result_mode, STORE_FLAG_VALUE));
> +
> +  convert_move (target, subtarget, unsignedp);
> +

GET_MODE_SIZE == 1 would also trigger for QImode, which shouldn't be treated
differently from HImode etc.

The original val_signbit_known_clear_p test seems like it might be an
abstraction too far.  In practice STORE_FLAG_VALUE has to fit within
the mode of a natural (unextended) condition result, so I think we can
simply test STORE_FLAG_VALUE >= 0 for all modes to see whether the target
wants the result to be treated as signed or unsigned.

Thanks,
Richard


Re: [PATCH 10/25] Convert BImode vectors.

2018-09-17 Thread Richard Sandiford
 writes:
> GCN uses V64BImode to represent vector masks in the middle-end, and DImode
> bit-masks to represent them in the back-end.  These must be converted at 
> expand
> time and the most convenient way is to simply use a SUBREG.
>
> This works fine except that simplify_subreg needs to be able to convert
> immediates, mostly for REG_EQUAL and REG_EQUIV, and currently does not know 
> how
> to convert vectors to integers where there is more than one element per byte.
>
> This patch implements such conversions for the cases that we need.
>
> I don't know why this is not a problem for other targets that use BImode
> vectors, such as ARM SVE, so it's possible I missed some magic somewhere?

FWIW, SVE never converts predicates to integers: they stay as V..BImode.

Richard


Re: [PATCH v3] combine: perform jump threading at the end

2018-09-17 Thread Ilya Leoshkevich
> Am 14.09.2018 um 23:35 schrieb Segher Boessenkool 
> :
> 
> Hi!
> 
> On Mon, Sep 10, 2018 at 12:59:19PM +0200, Ilya Leoshkevich wrote:
>> Consider the following RTL:
>> 
>> (code_label 11 10 26 4 2 (nil) [1 uses])
>> (note 26 11 12 4 [bb 4] NOTE_INSN_BASIC_BLOCK)
>> (insn 12 26 15 4 (set (reg:SI 65)
>>(if_then_else:SI (eq (reg:CCZ 33 %cc)
>>(const_int 0 [0]))
>>(const_int 1 [0x1])
>>(const_int 0 [0]))) "pr80080-4.c":9 1674 {*movsicc})
>> (insn 15 12 16 4 (parallel [
>>(set (reg:CCZ 33 %cc)
>>(compare:CCZ (reg:SI 65)
>>(const_int 0 [0])))
>>(clobber (scratch:SI))
>>]) "pr80080-4.c":9 1216 {*tstsi_cconly_extimm})
>> (jump_insn 16 15 17 4 (set (pc)
>>(if_then_else (ne (reg:CCZ 33 %cc)
>>(const_int 0 [0]))
>>(label_ref:DI 23)
>>(pc))) "pr80080-4.c":9 1897 {*cjump_64})
>> 
>> Combine simplifies this into:
>> 
>> (code_label 11 10 26 4 2 (nil) [1 uses])
>> (note 26 11 12 4 [bb 4] NOTE_INSN_BASIC_BLOCK)
>> (note 12 26 15 4 NOTE_INSN_DELETED)
>> (note 15 12 16 4 NOTE_INSN_DELETED)
>> (jump_insn 16 15 17 4 (set (pc)
>>(if_then_else (eq (reg:CCZ 33 %cc)
>>(const_int 0 [0]))
>>(label_ref:DI 23)
>>(pc))) "pr80080-4.c":9 1897 {*cjump_64})
>> 
>> opening up the possibility to perform jump threading.  Since this
>> happens infrequently, perform jump threading only when there is a
>> changed basic block, whose sole side effect is a trailing jump.
> 
> So this happens because now there is *only* a conditional jump in this BB?

Yes.

> Could you please show generated code before and after this patch?
> I mean generated assembler code.  What -S gives you.

Before:

foo4:
.LFB0:
.cfi_startproc
lt  %r1,0(%r2)
jne .L2
lhi %r3,1
cs  %r1,%r3,0(%r2)
.L2:
jne .L5
br  %r14
.L5:
jg  bar

After:

foo4:
.LFB0:
.cfi_startproc
lt  %r1,0(%r2)
jne .L4
lhi %r3,1
cs  %r1,%r3,0(%r2)
jne .L4
br  %r14
.L4:
jg  bar

>> +/* Return true iff the only side effect of BB is its trailing jump_insn.  */
>> +
>> +static bool
>> +is_single_jump_bb (basic_block bb)
>> +{
>> +  rtx_insn *end = BB_END (bb);
>> +  rtx_insn *insn;
>> +
>> +  if (!JUMP_P (end))
>> +return false;
>> +
>> +  for (insn = BB_HEAD (bb); insn != end; insn = NEXT_INSN (insn))
>> +if (INSN_P (insn) && side_effects_p (PATTERN (insn)))
>> +  return false;
>> +  return true;
>> +}
> 
> Hrm, so it is more than that.

The intention of this check is to match the „Short circuit cases where
block B contains some side effects, as we can't safely bypass it“ one in
thread_jump ().  I've just noticed that I got it wrong - I should also
check whether JUMP_INSN itself has side-effects, like it's done there.

> Why does the existing jump threading not work for you; should it happen
> at another time?

We call cleanup_cfg (CLEANUP_THREADING) only once - during the „jump“
pass, which happens before combine.  There is also „jump2“ pass, which
happens afterwards, and after discussion with Ulrich Weigand I tried to
move jump threading there.  While this change had the desired effect on
the testcase, the code got worse in another places. Example from
GemsFDTD:

Before:

103e96c:   b9 04 00 31 lgr %r3,%r1
103e970:   a7 18 00 00 lhi %r1,0
103e974:   e3 20 f0 d0 00 0d   dsg %r2,208(%r15)
103e97a:   b9 20 00 c3 cgr %r12,%r3
103e97e:   a7 29 ff ff lghi%r2,-1
103e982:   ec 12 00 01 00 42   lochih  %r1,1
103e988:   e3 20 f0 f8 00 82   xg  %r2,248(%r15)
103e98e:   1a 15   ar  %r1,%r5
103e990:   b9 e9 c0 72 sgrk%r7,%r2,%r12
103e994:   b3 c1 00 e2 ldgr%f14,%r2
103e998:   b9 f8 a0 21 ark %r2,%r1,%r10
103e99c:   12 99   ltr %r9,%r9
103e99e:   a7 18 00 00 lhi %r1,0
103e9a2:   ec 1c 00 01 00 42   lochile %r1,1
103e9a8:   50 10 f1 28 st  %r1,296(%r15)
103e9ac:   c2 9d 00 00 00 00   cfi %r9,0
103e9b2:   c0 c4 00 01 28 4e   jgle1063a4e
103e9b8:   e5 4c f1 08 00 00   mvhi264(%r15),0
103e9be:   eb 14 00 03 00 0d   sllg%r1,%r4,3

After:

103e96c:   b9 04 00 31 lgr %r3,%r1   
103e970:   e5 4c f1 18 00 01   mvhi280(%r15),1
103e976:   a7 18 00 00 lhi %r1,0 
103e97a:   e3 20 f0 d0 00 0d   dsg %r2,208(%r15) 
103e980:   b9 20 00 c3 cgr %r12,%r3  
103e984:   a7 29 ff ff lghi%r2,-1
103e988:   ec 12 00 01 00 42   lochih  %r1,1 
103e98e:   e3 20 f0 f8 00 82   xg  %r2,248

RE: [PATCH] [ARC]: core3 features are default for core4

2018-09-17 Thread Claudiu Zissulescu
Committed with the suggested mods.

Thank you Andrew for your review,
Claudiu

From: Andrew Burgess [andrew.burg...@embecosm.com]
Sent: Tuesday, September 11, 2018 5:23 PM
To: Vineet Gupta
Cc: claudiu.zissule...@synopsys.com; gcc-patches@gcc.gnu.org; 
linux-snps-...@lists.infradead.org
Subject: Re: [PATCH] [ARC]: core3 features are default for core4

* Vineet Gupta  [2018-09-10 11:26:35 -0700]:

>* config/arc/arc.c: object attributes for core4 not reflected correctly
>* config/arc/arc.h: Don't restrict DBNZ to core3 (core4 includes core3)
>
> Signed-off-by: Vineet Gupta 
> ---
>  gcc/ChangeLog| 7 +++
>  gcc/config/arc/arc.c | 2 +-
>  gcc/config/arc/arc.h | 2 +-
>  3 files changed, 9 insertions(+), 2 deletions(-)
>
> diff --git a/gcc/ChangeLog b/gcc/ChangeLog
> index 6dbe8147b3ec..3a022d156445 100644
> --- a/gcc/ChangeLog
> +++ b/gcc/ChangeLog
> @@ -1,3 +1,10 @@
> +2018-09-10  Vineet Gupta 
> +
> + * config/arc/arc.c: object attributes for core4 not reflected
> + correctly
> + * config/arc/arc.h: Don't restrict DBNZ to core3 (core4 includes
> + core3)

These entries should be formatted as sentences with capital letter and
full stop.

> +
>  2018-09-09  Uros Bizjak  
>
>   * config/i386/i386.md (float partial SSE register stall splitter): Move
> diff --git a/gcc/config/arc/arc.c b/gcc/config/arc/arc.c
> index c186e02e0f18..0171e8a7c615 100644
> --- a/gcc/config/arc/arc.c
> +++ b/gcc/config/arc/arc.c
> @@ -5181,7 +5181,7 @@ static void arc_file_start (void)
>  TARGET_OPTFPE ? 1 : 0);
>if (TARGET_V2)
>  asm_fprintf (asm_out_file, "\t.arc_attribute Tag_ARC_CPU_variation, 
> %d\n",
> -  arc_tune == ARC_TUNE_CORE_3 ? 3 : 2);
> +  arc_tune < ARC_TUNE_CORE_3 ? 2 : (arc_tune == ARC_TUNE_CORE_3 
> ? 3 : 4) );
>  }
>
>  /* Implement `TARGET_ASM_FILE_END'.  */
> diff --git a/gcc/config/arc/arc.h b/gcc/config/arc/arc.h
> index de09b6b2f09e..4d38f9ec174f 100644
> --- a/gcc/config/arc/arc.h
> +++ b/gcc/config/arc/arc.h
> @@ -1636,6 +1636,6 @@ enum
>  #define TARGET_FPX_QUARK(TARGET_EM && TARGET_SPFP\
>&& (arc_fpu_build == FPX_QK))
>  /* DBNZ support is available for ARCv2 core3 cpus.  */
> -#define TARGET_DBNZ (TARGET_V2 && (arc_tune == ARC_TUNE_CORE_3))
> +#define TARGET_DBNZ (TARGET_V2 && (arc_tune >= ARC_TUNE_CORE_3))

Could you update the comment too please.

Otherwise, looks fine.

Thanks,
Andrew

>
>  #endif /* GCC_ARC_H */
> --
> 2.7.4
>


Re: [Patch, Fortran, OOP] PR 46313: OOP-ABI issue, ALLOCATE issue, CLASS renaming issue

2018-09-17 Thread Bernhard Reutner-Fischer
On Tue, 9 Nov 2010 at 11:41, Janus Weil  wrote:
>
> >> Ok, so it seems to me that using two leading underscores is really the
> >> best option, since it's safe against collisions with Fortran and C
> >> user code, and also safe to use with -fdollar-ok.
> >>
> >> The attached patch adds double underscores for the vtabs, vtypes,
> >> class containers and temporaries.
> >
> > OK. Thanks for the patch!
>
> Committed as r166480.
>
> Thanks for all the helpful comments, everyone!

Index: gcc/fortran/module.c
===
--- gcc/fortran/module.c (revision 166419)
+++ gcc/fortran/module.c (working copy)
@@ -4372,8 +4372,8 @@ read_module (void)
 p = name;

   /* Exception: Always import vtabs & vtypes.  */
-  if (p == NULL && (strncmp (name, "vtab$", 5) == 0
-|| strncmp (name, "vtype$", 6) == 0))
+  if (p == NULL && (strncmp (name, "__vtab_", 5) == 0
+|| strncmp (name, "__vtype_", 6) == 0))
 p = name;

   /* Skip symtree nodes not in an ONLY clause, unless there

---8<---

Sorry for the late follow-up but current trunk still has the code
quoted above where we forgot to add 2 to the length parameter of both
strncmp calls.

I think it would be nice to teach the C and C++ frontends to warn
about this even though it might trigger in quite some code in the
wild.

Looking at gcc/fortran alone there are
gcc/fortran/interface.c:  if (strncmp (mode, "unformatted", 9) == 0) // 11 !
gcc/fortran/module.c: && (strncmp (name, "__vtab_", 5) == 0 // 7 !
gcc/fortran/module.c: || strncmp (name, "__vtype_", 6) == 0)) // 8!
gcc/fortran/module.c: || (strncmp (name, "__vtab_", 5) != 0 // 7!
gcc/fortran/module.c: && strncmp (name, "__vtype_", 6)
!= 0)) // 8!

so warning by default with -Wall or at least per default in -Wextra
would make sense IMO.

cheers,


Re: [PATCH 11/25] Simplify vec_merge according to the mask.

2018-09-17 Thread Richard Sandiford
 writes:
> This patch was part of the original patch we acquired from Honza and Martin.
>
> It simplifies vector elements that are inactive, according to the mask.
>
> 2018-09-05  Jan Hubicka  
>   Martin Jambor  
>
>   * simplify-rtx.c (simplify_merge_mask): New function.
>   (simplify_ternary_operation): Use it, also see if VEC_MERGEs with the
>   same masks are used in op1 or op2.

Would be good to have self-tests for the new transforms.

> +/* X is an operand number OP of VEC_MERGE operation with MASK.

"of a".  Might also be worth mentioning that X can be a nested
operation of a VEC_MERGE with a different mode, although it always
has the same number of elements as MASK.

> +   Try to simplify using knowledge that values outside of MASK

"simplify X"

> +   will not be used.  */
> +
> +rtx
> +simplify_merge_mask (rtx x, rtx mask, int op)
> +{
> +  gcc_assert (VECTOR_MODE_P (GET_MODE (x)));
> +  poly_uint64 nunits = GET_MODE_NUNITS (GET_MODE (x));
> +  if (GET_CODE (x) == VEC_MERGE && rtx_equal_p (XEXP (x, 2), mask))
> +{
> +  if (!side_effects_p (XEXP (x, 1 - op)))
> + return XEXP (x, op);
> +}
> +  if (side_effects_p (x))
> +return NULL_RTX;
> +  if (UNARY_P (x)
> +  && VECTOR_MODE_P (GET_MODE (XEXP (x, 0)))
> +  && maybe_eq (GET_MODE_NUNITS (GET_MODE (XEXP (x, 0))), nunits))

known_eq, since we require equality for correctness.  Same for the
other tests.

> +{
> +  rtx top0 = simplify_merge_mask (XEXP (x, 0), mask, op);
> +  if (top0)
> + return simplify_gen_unary (GET_CODE (x), GET_MODE (x), top0,
> +GET_MODE (XEXP (x, 0)));
> +}
> +  if (BINARY_P (x)
> +  && VECTOR_MODE_P (GET_MODE (XEXP (x, 0)))
> +  && maybe_eq (GET_MODE_NUNITS (GET_MODE (XEXP (x, 0))), nunits)
> +  && VECTOR_MODE_P (GET_MODE (XEXP (x, 1)))
> +  && maybe_eq (GET_MODE_NUNITS (GET_MODE (XEXP (x, 1))), nunits))
> +{
> +  rtx top0 = simplify_merge_mask (XEXP (x, 0), mask, op);
> +  rtx top1 = simplify_merge_mask (XEXP (x, 1), mask, op);
> +  if (top0 || top1)
> + return simplify_gen_binary (GET_CODE (x), GET_MODE (x),
> + top0 ? top0 : XEXP (x, 0),
> + top1 ? top1 : XEXP (x, 1));
> +}
> +  if (GET_RTX_CLASS (GET_CODE (x)) == RTX_TERNARY
> +  && VECTOR_MODE_P (GET_MODE (XEXP (x, 0)))
> +  && maybe_eq (GET_MODE_NUNITS (GET_MODE (XEXP (x, 0))), nunits)
> +  && VECTOR_MODE_P (GET_MODE (XEXP (x, 1)))
> +  && maybe_eq (GET_MODE_NUNITS (GET_MODE (XEXP (x, 1))), nunits)
> +  && VECTOR_MODE_P (GET_MODE (XEXP (x, 2)))
> +  && maybe_eq (GET_MODE_NUNITS (GET_MODE (XEXP (x, 2))), nunits))
> +{
> +  rtx top0 = simplify_merge_mask (XEXP (x, 0), mask, op);
> +  rtx top1 = simplify_merge_mask (XEXP (x, 1), mask, op);
> +  rtx top2 = simplify_merge_mask (XEXP (x, 2), mask, op);
> +  if (top0 || top1)
> + return simplify_gen_ternary (GET_CODE (x), GET_MODE (x),
> +  GET_MODE (XEXP (x, 0)),
> +  top0 ? top0 : XEXP (x, 0),
> +  top1 ? top1 : XEXP (x, 1),
> +  top2 ? top2 : XEXP (x, 2));
> +}
> +  return NULL_RTX;
> +}
> +
>  
>  /* Simplify CODE, an operation with result mode MODE and three operands,
> OP0, OP1, and OP2.  OP0_MODE was the mode of OP0 before it became
> @@ -5967,6 +6026,28 @@ simplify_ternary_operation (enum rtx_code code, 
> machine_mode mode,
> && !side_effects_p (op2) && !side_effects_p (op1))
>   return op0;
>  
> +  if (!side_effects_p (op2))
> + {
> +   rtx top0 = simplify_merge_mask (op0, op2, 0);
> +   rtx top1 = simplify_merge_mask (op1, op2, 1);
> +   if (top0 || top1)
> + return simplify_gen_ternary (code, mode, mode,
> +  top0 ? top0 : op0,
> +  top1 ? top1 : op1, op2);
> + }
> +
> +  if (GET_CODE (op0) == VEC_MERGE
> +   && rtx_equal_p (op2, XEXP (op0, 2))
> +   && !side_effects_p (XEXP (op0, 1)) && !side_effects_p (op2))
> + return simplify_gen_ternary (code, mode, mode,
> +  XEXP (op0, 0), op1, op2);
> +
> +  if (GET_CODE (op1) == VEC_MERGE
> +   && rtx_equal_p (op2, XEXP (op1, 2))
> +   && !side_effects_p (XEXP (op0, 0)) && !side_effects_p (op2))
> + return simplify_gen_ternary (code, mode, mode,
> +  XEXP (op0, 1), op1, op2);

Doesn't simplify_merge_mask make the second two redundant?  I couldn't
see the difference between them and the first condition tested by
simplify_merge_mask.

Thanks,
Richard


Re: [PATCH]: Allow TARGET_SCHED_ADJUST_PRIORITY hook to reduce priority

2018-09-17 Thread Andreas Schwab
PR rtl-optimization/85458
* sel-sched.c (sel_target_adjust_priority): Remove wrong
assertion.

diff --git a/gcc/sel-sched.c b/gcc/sel-sched.c
index 824f1ec340..1be977d70b 100644
--- a/gcc/sel-sched.c
+++ b/gcc/sel-sched.c
@@ -3330,8 +3330,6 @@ sel_target_adjust_priority (expr_t expr)
   else
 new_priority = priority;
 
-  gcc_assert (new_priority >= 0);
-
   /* If the priority has changed, adjust EXPR_PRIORITY_ADJ accordingly.  */
   EXPR_PRIORITY_ADJ (expr) = new_priority - EXPR_PRIORITY (expr);
 
-- 
2.19.0


Andreas.

-- 
Andreas Schwab, SUSE Labs, sch...@suse.de
GPG Key fingerprint = 0196 BAD8 1CE9 1970 F4BE  1748 E4D4 88E3 0EEA B9D7
"And now for something completely different."


[rs6000] Do not generate sibling call to nested function

2018-09-17 Thread Eric Botcazou
Hi,

more precisely, to a nested function that requires a static chain.  The reason 
is that the sibling call epilogue may clobber the static chain register r11.

Tested on PowerPC/Linux, OK for the mainline?


2018-09-17  Eric Botcazou  

* config/rs6000/rs6000.c (rs6000_function_ok_for_sibcall): Return false
if the call takes a static chain.


2018-09-17  Eric Botcazou  

* gcc.dg/nested-func-11.c: New test.

-- 
Eric Botcazou/* { dg-do run } */
/* { dg-options "-O2 -fno-omit-frame-pointer" } */

int __attribute__((noipa)) foo (int i)
{
  int a;

  void __attribute__((noipa)) nested2 (int i)
  {
a = i;
  }

  void  __attribute__((noipa)) nested1 (int i)
  {
int b[32];

for (int j = 0; j < 32; j++)
  b[j] = i + j;

nested2 (b[i]);
  }

  nested1 (i);

  return a;
}

int main (void)
{
  if (foo (4) != 8)
__builtin_abort ();

  return 0;
}
Index: config/rs6000/rs6000.c
===
--- config/rs6000/rs6000.c	(revision 264342)
+++ config/rs6000/rs6000.c	(working copy)
@@ -24332,6 +24332,10 @@ rs6000_function_ok_for_sibcall (tree dec
 {
   tree fntype;
 
+  /* The sibcall epilogue may clobber the static chain register.  */
+  if (CALL_EXPR_STATIC_CHAIN (exp))
+return false;
+
   if (decl)
 fntype = TREE_TYPE (decl);
   else


Re: [PATCH 14/25] Disable inefficient vectorization of elementwise loads/stores.

2018-09-17 Thread Richard Sandiford
 writes:
> If the autovectorizer tries to load a GCN 64-lane vector elementwise then it
> blows away the register file and produces horrible code.

Do all the registers really need to be live at once, or is it "just" bad
scheduling?  I'd have expected the initial rtl to load each element and
then insert it immediately, so that the number of insertions doesn't
directly affect register pressure.

> This patch simply disallows elementwise loads for such large vectors.  Is 
> there
> a better way to disable this in the middle-end?

Do you ever want elementwise accesses for GCN?  If not, it might be
better to disable them in the target's cost model.

Thanks,
Richard

>
> 2018-09-05  Julian Brown  
>
>   gcc/
>   * tree-vect-stmts.c (get_load_store_type): Don't use VMAT_ELEMENTWISE
>   loads/stores with many-element (>=64) vectors.
> ---
>  gcc/tree-vect-stmts.c | 20 
>  1 file changed, 20 insertions(+)
>
> diff --git a/gcc/tree-vect-stmts.c b/gcc/tree-vect-stmts.c
> index 8875201..a333991 100644
> --- a/gcc/tree-vect-stmts.c
> +++ b/gcc/tree-vect-stmts.c
> @@ -2452,6 +2452,26 @@ get_load_store_type (stmt_vec_info stmt_info, tree 
> vectype, bool slp,
>   *memory_access_type = VMAT_CONTIGUOUS;
>  }
>  
> +  /* FIXME: Element-wise accesses can be extremely expensive if we have a
> + large number of elements to deal with (e.g. 64 for AMD GCN) using the
> + current generic code expansion.  Until an efficient code sequence is
> + supported for affected targets instead, don't attempt vectorization for
> + VMAT_ELEMENTWISE at all.  */
> +  if (*memory_access_type == VMAT_ELEMENTWISE)
> +{
> +  poly_uint64 nelements = TYPE_VECTOR_SUBPARTS (vectype);
> +
> +  if (maybe_ge (nelements, 64))
> + {
> +   if (dump_enabled_p ())
> + dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
> +   "too many elements (%u) for elementwise accesses\n",
> +   (unsigned) nelements.to_constant ());
> +
> +   return false;
> + }
> +}
> +
>if ((*memory_access_type == VMAT_ELEMENTWISE
> || *memory_access_type == VMAT_STRIDED_SLP)
>&& !nunits.is_constant ())


Re: VRP: convert pointers of known quantity better

2018-09-17 Thread Aldy Hernandez

On 09/14/2018 05:25 PM, Jeff Law wrote:

On 9/14/18 4:31 AM, Aldy Hernandez wrote:

Apparently, my work on VRP will never finish. There's an infinity of
things that can be tweaked ;-).

First, we shouldn't drop to null/non-null when we know what the actual
pointer value is.  For example, [1, 3] which we get when we store magic
numbers in a pointer (p == (char *)1).

BTW, for this bit, I would much rather change range_int_cst_p to allow
VR_ANTI_RANGE, instead of inlining as I've done.  Is there a reason
range_int_cst_p doesn't handle anti ranges?

Also, [&foo, &foo] is known to be non-null.  Don't drop to varying.

OK?

curr.patch

commit 7f42e101d5d26ea866b739692858289a8dff4396
Author: Aldy Hernandez 
Date:   Fri Sep 14 00:11:34 2018 +0200

 * tree-vrp.c (extract_range_from_unary_expr): Handle pointers of
 known quantity.

diff --git a/gcc/tree-vrp.c b/gcc/tree-vrp.c
index 622ccbc2df7..22e5ee3c729 100644
--- a/gcc/tree-vrp.c
+++ b/gcc/tree-vrp.c
@@ -1842,9 +1842,14 @@ extract_range_from_unary_expr (value_range *vr,
tree inner_type = op0_type;
tree outer_type = type;
  
-  /* If the expression evaluates to a pointer, we are only interested in

-determining if it evaluates to NULL [0, 0] or non-NULL (~[0, 0]).  */
-  if (POINTER_TYPE_P (type))
+  /* If the expression evaluates to a pointer of unknown quantity,
+we are only interested in determining if it evaluates to NULL
+[0, 0] or non-NULL (~[0, 0]).  */
+  if (POINTER_TYPE_P (type)
+ && !((vr0.type == VR_RANGE
+   || vr0.type == VR_ANTI_RANGE)
+  && TREE_CODE (vr0.min) == INTEGER_CST
+  && TREE_CODE (vr0.max) == INTEGER_CST))
{
  if (!range_includes_zero_p (&vr0))
set_value_range_to_nonnull (vr, type);

I think this part is fine.  Though I question how much time we want to
spend dealing some clowns that do things like store integers into
pointer objects :-)


Hmmm.  You're right.  I take it back.  I don't want to further 
complicate things.  I never quite liked clowns as a kid.


I have another approach to cleaning up the pointer conversion code that 
I will post separately.





@@ -1855,6 +1860,16 @@ extract_range_from_unary_expr (value_range *vr,
  return;
}
  
+  /* If we have a non-constant range that we know is non-zero (for

+example [&foo, &foo] or [&foo, +MAX]), make it known, so we
+don't drop to VR_VARYING later.  */
+  if (POINTER_TYPE_P (op0_type)
+ && vr0.type == VR_RANGE
+ && (TREE_CODE (vr0.min) != INTEGER_CST
+ || TREE_CODE (vr0.max) != INTEGER_CST)
+ && !range_includes_zero_p (&vr0))
+   set_value_range_to_nonnull (&vr0, op0_type);
+

I don't think this is correct in the presence of weak symbols.  I think
you can query maybe_nonzero_address on the min/max and if both return >
0, then you know the result is non-null.


I'm taking it all back.  Perhaps I'll revisit this and test with weak 
symbols later.


Thanks, and sorry for the noise.

Aldy


Re: [PATCH 15/25] Don't double-count early-clobber matches.

2018-09-17 Thread Richard Sandiford
 writes:
> Given a pattern with a number of operands:
>
> (match_operand 0 "" "=&v")
> (match_operand 1 "" " v0")
> (match_operand 2 "" " v0")
> (match_operand 3 "" " v0")
>
> GCC will currently increment "reject" once, for operand 0, and then decrement
> it once for each of the other operands, ending with reject == -2 and an
> assertion failure.  If there's a conflict then it might try to decrement 
> reject
> yet again.
>
> Incidentally, what these patterns are trying to achieve is an allocation in
> which operand 0 may match one of the other operands, but may not partially
> overlap any of them.  Ideally there'd be a better way to do this.
>
> In any case, it will affect any pattern in which multiple operands may (or
> must) match an early-clobber operand.
>
> The patch only allows a reject-- when one has not already occurred, for that
> operand.
>
> 2018-09-05  Andrew Stubbs  
>
>   gcc/
>   * lra-constraints.c (process_alt_operands): Check
>   matching_early_clobber before decrementing reject, and set
>   matching_early_clobber after.
>   * lra-int.h (struct lra_operand_data): Add matching_early_clobber.
>   * lra.c (setup_operand_alternative): Initialize matching_early_clobber.
> ---
>  gcc/lra-constraints.c | 22 ++
>  gcc/lra-int.h |  3 +++
>  gcc/lra.c |  1 +
>  3 files changed, 18 insertions(+), 8 deletions(-)
>
> diff --git a/gcc/lra-constraints.c b/gcc/lra-constraints.c
> index 8be4d46..55163f1 100644
> --- a/gcc/lra-constraints.c
> +++ b/gcc/lra-constraints.c
> @@ -2202,7 +2202,13 @@ process_alt_operands (int only_alternative)
>"%d Matching earlyclobber alt:"
>" reject--\n",
>nop);
> - reject--;
> + if (!curr_static_id->operand[m]
> +  .matching_early_clobber)
> +   {
> + reject--;
> + curr_static_id->operand[m]
> + .matching_early_clobber = 1;
> +   }
> }
>   /* Otherwise we prefer no matching
>  alternatives because it gives more freedom
> @@ -2948,15 +2954,11 @@ process_alt_operands (int only_alternative)
> curr_alt_dont_inherit_ops[curr_alt_dont_inherit_ops_num++]
>   = last_conflict_j;
> losers++;
> -   /* Early clobber was already reflected in REJECT. */
> -   lra_assert (reject > 0);
> if (lra_dump_file != NULL)
>   fprintf
> (lra_dump_file,
>  "%d Conflict early clobber reload: reject--\n",
>  i);
> -   reject--;
> -   overall += LRA_LOSER_COST_FACTOR - 1;
>   }
> else
>   {
> @@ -2980,17 +2982,21 @@ process_alt_operands (int only_alternative)
>   }
> curr_alt_win[i] = curr_alt_match_win[i] = false;
> losers++;
> -   /* Early clobber was already reflected in REJECT. */
> -   lra_assert (reject > 0);
> if (lra_dump_file != NULL)
>   fprintf
> (lra_dump_file,
>  "%d Matched conflict early clobber reloads: "
>  "reject--\n",
>  i);
> + }
> +   /* Early clobber was already reflected in REJECT. */
> +   if (!curr_static_id->operand[i].matching_early_clobber)
> + {
> +   lra_assert (reject > 0);
> reject--;
> -   overall += LRA_LOSER_COST_FACTOR - 1;
> +   curr_static_id->operand[i].matching_early_clobber = 1;
>   }
> +   overall += LRA_LOSER_COST_FACTOR - 1;
>   }
>if (lra_dump_file != NULL)
>   fprintf (lra_dump_file, "  
> alt=%d,overall=%d,losers=%d,rld_nregs=%d\n",

The idea looks good to me FWIW, but you can't use curr_static_id for
the state, since that's a static description of the .md pattern rather
than data about this particular instance.

Thanks,
Richard


Re: [PATCH 16/25] Fix IRA ICE.

2018-09-17 Thread Richard Sandiford
 writes:
> The IRA pass makes an assumption that any pseudos created after the pass 
> begins
> were created explicitly by the pass itself and therefore will have
> corresponding entries in its other tables.
>
> The GCN back-end, however, often creates additional pseudos, in expand
> patterns, to represent the necessary EXEC value, and these break IRA's
> assumption and cause ICEs.
>
> This patch simply has IRA skip unknown pseudos, and the problem goes away.
>
> Presumably, it's not ideal that these registers have not been processed by 
> IRA,
> but it does not appear to do any real harm.

Could you go into more detail about how this happens?  Other targets
also create pseudos in their move patterns.

Richard


Re: [PATCH 22/25] Add dg-require-effective-target exceptions

2018-09-17 Thread Richard Sandiford
 writes:
> There are a number of tests that fail because they assume that exceptions are
> available, but GCN does not support them, yet.
>
> This patch adds "dg-require-effective-target exceptions" in all the affected
> tests.  There's probably an automatic way to test for exceptions, but the
> current implementation simply says that AMD GCN does not support them.  This
> should ensure that no other targets are affected by the change.

Manual markup seems fine as long as it's agreed that maintainers of
affected targets are the ones responsible for keeping the markup up
to date (under the obvious rule of course).  There are so many target
selectors that it's hard to remember which options require explicit
tests and which don't, so I don't think the onus should be on every
developer adding a new exception-related test.

The new selector needs an entry in doc/sourcebuild.texi.
OK with that change, thanks.

Richard

>
> 2018-09-05  Andrew Stubbs  
>   Kwok Cheung Yeung  
>   Julian Brown  
>   Tom de Vries  
>
>   gcc/testsuite/
>   * c-c++-common/ubsan/pr71512-1.c: Require exceptions.
>   * c-c++-common/ubsan/pr71512-2.c: Require exceptions.
>   * gcc.c-torture/compile/pr34648.c: Require exceptions.
>   * gcc.c-torture/compile/pr41469.c: Require exceptions.
>   * gcc.dg/20111216-1.c: Require exceptions.
>   * gcc.dg/cleanup-10.c: Require exceptions.
>   * gcc.dg/cleanup-11.c: Require exceptions.
>   * gcc.dg/cleanup-12.c: Require exceptions.
>   * gcc.dg/cleanup-13.c: Require exceptions.
>   * gcc.dg/cleanup-5.c: Require exceptions.
>   * gcc.dg/cleanup-8.c: Require exceptions.
>   * gcc.dg/cleanup-9.c: Require exceptions.
>   * gcc.dg/gomp/pr29955.c: Require exceptions.
>   * gcc.dg/lto/pr52097_0.c: Require exceptions.
>   * gcc.dg/nested-func-5.c: Require exceptions.
>   * gcc.dg/pch/except-1.c: Require exceptions.
>   * gcc.dg/pch/valid-2.c: Require exceptions.
>   * gcc.dg/pr41470.c: Require exceptions.
>   * gcc.dg/pr42427.c: Require exceptions.
>   * gcc.dg/pr44545.c: Require exceptions.
>   * gcc.dg/pr47086.c: Require exceptions.
>   * gcc.dg/pr51481.c: Require exceptions.
>   * gcc.dg/pr51644.c: Require exceptions.
>   * gcc.dg/pr52046.c: Require exceptions.
>   * gcc.dg/pr54669.c: Require exceptions.
>   * gcc.dg/pr56424.c: Require exceptions.
>   * gcc.dg/pr64465.c: Require exceptions.
>   * gcc.dg/pr65802.c: Require exceptions.
>   * gcc.dg/pr67563.c: Require exceptions.
>   * gcc.dg/tree-ssa/pr41469-1.c: Require exceptions.
>   * gcc.dg/tree-ssa/ssa-dse-28.c: Require exceptions.
>   * gcc.dg/vect/pr46663.c: Require exceptions.
>   * lib/target-supports.exp (check_effective_target_exceptions): New.
> ---
>  gcc/testsuite/c-c++-common/ubsan/pr71512-1.c  |  1 +
>  gcc/testsuite/c-c++-common/ubsan/pr71512-2.c  |  1 +
>  gcc/testsuite/gcc.c-torture/compile/pr34648.c |  1 +
>  gcc/testsuite/gcc.c-torture/compile/pr41469.c |  1 +
>  gcc/testsuite/gcc.dg/20111216-1.c |  1 +
>  gcc/testsuite/gcc.dg/cleanup-10.c |  1 +
>  gcc/testsuite/gcc.dg/cleanup-11.c |  1 +
>  gcc/testsuite/gcc.dg/cleanup-12.c |  1 +
>  gcc/testsuite/gcc.dg/cleanup-13.c |  1 +
>  gcc/testsuite/gcc.dg/cleanup-5.c  |  1 +
>  gcc/testsuite/gcc.dg/cleanup-8.c  |  1 +
>  gcc/testsuite/gcc.dg/cleanup-9.c  |  1 +
>  gcc/testsuite/gcc.dg/gomp/pr29955.c   |  1 +
>  gcc/testsuite/gcc.dg/lto/pr52097_0.c  |  1 +
>  gcc/testsuite/gcc.dg/nested-func-5.c  |  1 +
>  gcc/testsuite/gcc.dg/pch/except-1.c   |  1 +
>  gcc/testsuite/gcc.dg/pch/valid-2.c|  2 +-
>  gcc/testsuite/gcc.dg/pr41470.c|  1 +
>  gcc/testsuite/gcc.dg/pr42427.c|  1 +
>  gcc/testsuite/gcc.dg/pr44545.c|  1 +
>  gcc/testsuite/gcc.dg/pr47086.c|  1 +
>  gcc/testsuite/gcc.dg/pr51481.c|  1 +
>  gcc/testsuite/gcc.dg/pr51644.c|  1 +
>  gcc/testsuite/gcc.dg/pr52046.c|  1 +
>  gcc/testsuite/gcc.dg/pr54669.c|  1 +
>  gcc/testsuite/gcc.dg/pr56424.c|  1 +
>  gcc/testsuite/gcc.dg/pr64465.c|  1 +
>  gcc/testsuite/gcc.dg/pr65802.c|  1 +
>  gcc/testsuite/gcc.dg/pr67563.c|  1 +
>  gcc/testsuite/gcc.dg/tree-ssa/pr41469-1.c |  1 +
>  gcc/testsuite/gcc.dg/tree-ssa/ssa-dse-28.c|  1 +
>  gcc/testsuite/gcc.dg/vect/pr46663.c   |  1 +
>  gcc/testsuite/lib/target-supports.exp | 10 ++
>  33 files changed, 42 insertions(+), 1 deletion(-)
>
> diff --git a/gcc/testsuite/c-c++-common/ubsan/pr71512-1.c 
> b/gcc/testsuite/c-c++-common/ubsan/pr71512-1.c
> index 2a90ab1..8af9365 100644
> --- a/gcc/testsuite/c-c++-common/ubsan/pr71512-1.c
> +++ b/gcc/testsuite/c-c++-common/ubsan/pr71512-1.c
> @@ -1,5 +1,6 @@

Re: [PATCH 14/25] Disable inefficient vectorization of elementwise loads/stores.

2018-09-17 Thread Andrew Stubbs

On 17/09/18 10:14, Richard Sandiford wrote:

 writes:

If the autovectorizer tries to load a GCN 64-lane vector elementwise then it
blows away the register file and produces horrible code.


Do all the registers really need to be live at once, or is it "just" bad
scheduling?  I'd have expected the initial rtl to load each element and
then insert it immediately, so that the number of insertions doesn't
directly affect register pressure.


They don't need to be live at once, architecturally speaking, but that's 
the way it happened.  No doubt there is another solution to fix it, but 
it's not a use case I believe we want to spend time optimizing.


Actually, I've not tested what happens without this in GCC 9, so that's 
probably worth checking, but I'd still be concerned about it blowing up 
on real code somewhere.



This patch simply disallows elementwise loads for such large vectors.  Is there
a better way to disable this in the middle-end?


Do you ever want elementwise accesses for GCN?  If not, it might be
better to disable them in the target's cost model.


The hardware is perfectly capable of extracting or setting vector 
elements, but given that it can do full gather/scatter from arbitrary 
addresses it's not something we want to do in general.


A normal scalar load will use a vector register (lane 0). The value then 
has to be moved to a scalar register, and only then can v_writelane 
insert it into the final destination.


Alternatively you could use a mask_load to load the value directly to 
the correct lane, but I don't believe that's something GCC does.


Andrew


Re: VRP: allow unsigned truncating conversions that will fit

2018-09-17 Thread Aldy Hernandez




On 09/14/2018 08:33 AM, Michael Matz wrote:

Hi,

On Fri, 14 Sep 2018, Aldy Hernandez wrote:


Is there a subtle reason why we're avoiding unsigned truncating conversions of
the form:

[X, +INF]

If the X fits in the new type, why can't we just build [X, +INF] in the new
type?


But (uin8_t)((unsigned)[255,+INF]) aka ([255,+INF] & 255) isn't [255,+INF]
but rather [0,+INF].  What am I missing?  Even considering that the caller
must canonicalize, I don't see how that would transform [255,+INF] (which
is a normal range) into [0,+INF].


Ughh, yes.  You're absolutely right.  I was getting confused with how we 
special handled anti-ranges of pointers which make no sense when you 
split them up into it's pieces and try to do the math.  And it made no 
sense because I missed that pointers are actually special and we only 
care about null / non-nullness.


Anyways...ignore the noise.  I have a much cleaner patch to the 
tree-vrp.c conversion code that I'm testing now.


Thanks for your input.
Aldy


Re: bootstrap -O3 failure libgo on x64_86

2018-09-17 Thread graham stott via gcc-patches
Ian
I've bootstrapped gcc 9 with gcc 8 again and I'm unable to recreate failure 
boostrapping with gcc 9
so ignore I'll report if happens again
Graham

 Original message 
From: Ian Lance Taylor via gcc-patches  
Date: 15/09/2018  16:01  (GMT+00:00) 
To: graham stott  
Cc: gcc-patches  
Subject: Re: bootstrap -O3 failure libgo on x64_86 

On Sat, Sep 15, 2018 at 7:12 AM, graham stott via gcc-patches
 wrote:
> Hi
> Fails due to undefind symbols runtime.*
> O2 works

I tried my best guess at recreating the problem, and everything worked fine.

Please tell us exactly what you did and exactly what happened.

Thanks.

Ian


Re: [PATCH] PR86957

2018-09-17 Thread Martin Liška
On 9/16/18 12:58 AM, Indu Bhagat wrote:
> Thanks for the reviews. I have incorporated them in this patch except the one 
> (changes in common.opt) below.
> 
> In this patch,
> 
> 1. -Wmissing-profile is a warning by default and is ON by default with 
>-fprofile-use
> 2. Attached pr86957-missing-profile-diagnostic-2 shows the warning messages
> 3. Added a testcase for warning in the case of missing profile feedback data
>file for a compilation unit
> 
> Thanks

Hi.

The patch looks fine for me now. Honza can you approve it?

Martin

> 
> gcc/ChangeLog:
> 
> 2018-09-14  "Indu Bhagat"  <"indu.bha...@oracle.com">
> 
> * common.opt: New warning option -Wmissing-profile.
> * coverage.c (get_coverage_counts): Add warning for missing .gcda 
> file.
> * doc/invoke.texi: Document -Wmissing-profile.
> 
> gcc/testsuite/ChangeLog:
> 
> 2018-09-14  "Indu Bhagat"  <"indu.bha...@oracle.com">
> 
> * gcc.dg/Wmissing-profile.c: New test.
> 
> 
> On 09/11/2018 02:21 AM, Martin Liška wrote:
>>> --- a/gcc/common.opt
>>> +++ b/gcc/common.opt
>>> @@ -811,6 +811,10 @@ Wcoverage-mismatch
>>>  Common Var(warn_coverage_mismatch) Init(1) Warning
>>>  Warn in case profiles in -fprofile-use do not match.
>>>  
>>> +Wmissing-profile
>>> +Common Var(warn_missing_profile) Init(1) Warning
>>> +Warn in case profiles in -fprofile-use do not exist.
>> Maybe 'Want about missing profile for a function in -fprofile-use build.' ?
>>
> Since, it also warns when feedback file is missing for a compilation unit, the
> suggested text above will be more restrictive. So I did not change.
> 
> 



[Patch, fortran] PR64120 - [F03] Wrong handling of allocatable character string

2018-09-17 Thread Paul Richard Thomas
This patch is relatively trivial. This initialization of the string
length was not being done.

Bootstraps and regtests on FC28/x86_64. OK for trunk?

Paul


2018-09-17  Paul Thomas  

PR fortran/64120
* trans-decl.c (gfc_get_symbol_decl): Flag allocatable, scalar
characters with a variable length expression for deferred init.
(gfc_trans_deferred_vars): Perform the assignment for these
symbols by calling gfc_conv_string_length.

2018-09-17  Paul Thomas  

PR fortran/64120
* gfortran.dg/allocatable_scalar_14.f90 : New test.
Index: gcc/fortran/trans-decl.c
===
*** gcc/fortran/trans-decl.c	(revision 264358)
--- gcc/fortran/trans-decl.c	(working copy)
*** gfc_get_symbol_decl (gfc_symbol * sym)
*** 1745,1750 
--- 1745,1757 
  	  && !(sym->attr.use_assoc && !intrinsic_array_parameter)))
  gfc_defer_symbol_init (sym);
  
+   if (sym->ts.type == BT_CHARACTER
+   && sym->attr.allocatable
+   && !sym->attr.dimension
+   && sym->ts.u.cl && sym->ts.u.cl->length
+   && sym->ts.u.cl->length->expr_type == EXPR_VARIABLE)
+ gfc_defer_symbol_init (sym);
+ 
/* Associate names can use the hidden string length variable
   of their associated target.  */
if (sym->ts.type == BT_CHARACTER
*** gfc_trans_deferred_vars (gfc_symbol * pr
*** 4603,4608 
--- 4610,4622 
  	  gfc_set_backend_locus (&sym->declared_at);
  	  gfc_start_block (&init);
  
+ 	  if (sym->ts.type == BT_CHARACTER
+ 		  && sym->attr.allocatable
+ 		  && !sym->attr.dimension
+ 		  && sym->ts.u.cl && sym->ts.u.cl->length
+ 		  && sym->ts.u.cl->length->expr_type == EXPR_VARIABLE)
+ 		gfc_conv_string_length (sym->ts.u.cl, NULL, &init);
+ 
  	  if (!sym->attr.pointer)
  		{
  		  /* Nullify and automatic deallocation of allocatable
Index: gcc/testsuite/gfortran.dg/allocatable_scalar_14.f90
===
*** gcc/testsuite/gfortran.dg/allocatable_scalar_14.f90	(nonexistent)
--- gcc/testsuite/gfortran.dg/allocatable_scalar_14.f90	(working copy)
***
*** 0 
--- 1,17 
+ ! { dg-do run }
+ !
+ ! Test the fix for PR64120 in which the initialisation of the
+ ! string length of 's' was not being done.
+ !
+ ! Contributed by Francois-Xavier Coudert  
+ !
+call g(1)
+call g(2)
+ contains
+   subroutine g(x)
+   integer :: x
+   character(len=x), allocatable :: s
+   allocate(s)
+   if (len(s) .ne. x) stop x
+   end subroutine
+ end


VRP: special case all pointer conversion code

2018-09-17 Thread Aldy Hernandez
It seems most of the remaining anti range code in 
extract_range_from_unary_expr for CONVERT_EXPR_P is actually dealing 
with non-nullness in practice.


Anti-range handling is mostly handled by canonicalizing anti-ranges into 
its two set constituents (~[10,20] => [MIN,9] U [21,MAX]) and dealing 
with them piece-meal.  For that matter, the only way we can reach the 
conversion code in extract_range_from_unary_expr with an anti-range is 
either with a pointer (because pointers are ignored from 
ranges_from_anti_range on purpose), or when converting integers of the 
form ~[SSA, SSA].  I verified this with a bootstrap + tests with some 
specially placed asserts, BTW.


So... if we special handle pointer conversions (both to and fro, as 
opposed to just to), we get rid of any anti-ranges with the exception of 
~[SSA, SSA] between integers.  And anti-ranges of unknown quantities 
(SSAs) will be handled automatically already (courtesy of 
extract_range_into_wide_ints).


I propose we handle pointers at the beginning, and everything else just 
falls into place, with no special code.


As commented in the code, this will pessimize conversions from (char 
*)~[0, 2] to int, because we will forget that the range can also not be 
1 or 2.  But as Jeff commented, we really only care about null or 
non-nullness.  Special handling magic pointers with constants IMO is a 
wasted effort.  For that matter, I think it was me that added this 
spaghetti a few weeks ago to make sure we handled ~[0,2].  We weren't 
even handling it a short while back :-).  Furthermore, in a bootstrap, I 
think we only triggered this twice.  And I'm not even sure we make 
further use of anything null/not-null for pointers later on.


This patch simplifies the code, and removes more special handling and 
cryptic comments related to anti-ranges.


Tested with all languages including Ada and Go.

OK for trunk?

commit 10406735080ebba81f31b9e7b36247446e07fb69
Author: Aldy Hernandez 
Date:   Mon Sep 17 11:05:57 2018 +0200

* tree-vrp.c (extract_range_from_unary_expr): Special case all
pointer conversions.
Do not do anything special for anti-ranges.

diff --git a/gcc/tree-vrp.c b/gcc/tree-vrp.c
index 622ccbc2df7..3e12d1d9bda 100644
--- a/gcc/tree-vrp.c
+++ b/gcc/tree-vrp.c
@@ -1842,9 +1842,14 @@ extract_range_from_unary_expr (value_range *vr,
   tree inner_type = op0_type;
   tree outer_type = type;
 
-  /* If the expression evaluates to a pointer, we are only interested in
-	 determining if it evaluates to NULL [0, 0] or non-NULL (~[0, 0]).  */
-  if (POINTER_TYPE_P (type))
+  /* If the expression involves a pointer, we are only interested in
+	 determining if it evaluates to NULL [0, 0] or non-NULL (~[0, 0]).
+
+	 This may lose precision when converting (char *)~[0,2] to
+	 int, because we'll forget that the pointer can also not be 1
+	 or 2.  In practice we don't care, as this is some idiot
+	 storing a magic constant to a pointer.  */
+  if (POINTER_TYPE_P (type) || POINTER_TYPE_P (op0_type))
 	{
 	  if (!range_includes_zero_p (&vr0))
 	set_value_range_to_nonnull (vr, type);
@@ -1855,15 +1860,12 @@ extract_range_from_unary_expr (value_range *vr,
 	  return;
 	}
 
-  /* We normalize everything to a VR_RANGE, but for constant
-	 anti-ranges we must handle them by leaving the final result
-	 as an anti range.  This allows us to convert things like
-	 ~[0,5] seamlessly.  */
-  value_range_type vr_type = VR_RANGE;
-  if (vr0.type == VR_ANTI_RANGE
-	  && TREE_CODE (vr0.min) == INTEGER_CST
-	  && TREE_CODE (vr0.max) == INTEGER_CST)
-	vr_type = VR_ANTI_RANGE;
+  /* The POINTER_TYPE_P code above will have dealt with all
+	 pointer anti-ranges.  Any remaining anti-ranges at this point
+	 will be integer conversions from SSA names that will be
+	 normalized into VARYING.  For instance: ~[x_55, x_55].  */
+  gcc_assert (vr0.type != VR_ANTI_RANGE
+		  || TREE_CODE (vr0.min) != INTEGER_CST);
 
   /* NOTES: Previously we were returning VARYING for all symbolics, but
 	 we can do better by treating them as [-MIN, +MAX].  For
@@ -1886,7 +1888,7 @@ extract_range_from_unary_expr (value_range *vr,
 	{
 	  tree min = wide_int_to_tree (outer_type, wmin);
 	  tree max = wide_int_to_tree (outer_type, wmax);
-	  set_and_canonicalize_value_range (vr, vr_type, min, max, NULL);
+	  set_and_canonicalize_value_range (vr, VR_RANGE, min, max, NULL);
 	}
   else
 	set_value_range_to_varying (vr);



[PATCH] Fix out-of-bounds in gcov.c (PR gcov-profile/85871).

2018-09-17 Thread Martin Liška
Hi.

One obvious patch where we access src->lines one element after the end.

Survives gcov.exp tests, I'm going to install the patch.

Martin

gcc/ChangeLog:

2018-09-17  Martin Liska  

PR gcov-profile/85871
* gcov.c (output_intermediate_file): Fix out of bounds
access.
---
 gcc/gcov.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)


diff --git a/gcc/gcov.c b/gcc/gcov.c
index 6a24a320046..c6cf79b0f53 100644
--- a/gcc/gcov.c
+++ b/gcc/gcov.c
@@ -1085,7 +1085,8 @@ output_intermediate_file (FILE *gcov_file, source_info *src)
 	}
 
   /* Follow with lines associated with the source file.  */
-  output_intermediate_line (gcov_file, &src->lines[line_num], line_num);
+  if (line_num < src->lines.size ())
+	output_intermediate_line (gcov_file, &src->lines[line_num], line_num);
 }
 }
 



[PATCH] Fix PR87301

2018-09-17 Thread Richard Biener


Bootstrapped and tested on x86_64-unknown-linux-gnu, applied.

Richard.

2018-09-17  Richard Biener  

PR tree-optimization/87301
* tree-ssa-sccvn.c (eliminate_dom_walker::eliminate_cleanup): Properly
clean EH info from leftover copy assignments.

* gcc.dg/torture/pr87301.c: New testcase.

Index: gcc/tree-ssa-sccvn.c
===
--- gcc/tree-ssa-sccvn.c(revision 264357)
+++ gcc/tree-ssa-sccvn.c(working copy)
@@ -5461,7 +5461,10 @@ eliminate_dom_walker::eliminate_cleanup
if (is_gimple_assign (stmt))
  {
gimple_assign_set_rhs_from_tree (&gsi, sprime);
-   update_stmt (gsi_stmt (gsi));
+   stmt = gsi_stmt (gsi);
+   update_stmt (stmt);
+   if (maybe_clean_or_replace_eh_stmt (stmt, stmt))
+ bitmap_set_bit (need_eh_cleanup, gimple_bb (stmt)->index);
continue;
  }
else
Index: gcc/testsuite/gcc.dg/torture/pr87301.c
===
--- gcc/testsuite/gcc.dg/torture/pr87301.c  (nonexistent)
+++ gcc/testsuite/gcc.dg/torture/pr87301.c  (working copy)
@@ -0,0 +1,34 @@
+/* { dg-do compile } */
+/* { dg-additional-options "-Wno-div-by-zero" } */
+
+void
+bl (int *be)
+{
+  int lo;
+{
+  int **ny;
+
+  if (*be == 0)
+   {
+ int ***k8 = &ny;
+ int uj = (__INTPTR_TYPE__)&lo;
+
+ for (;;)
+   if (***k8 == 0)
+ {
+   uj = !!(1 / 0) ? !(lo = 0) : 0;
+   (void) uj;
+
+   if (*ny == 0)
+ for (;;)
+   if (***k8 == 0)
+ {
+ }
+
+   for (lo = 0; lo < 2; ++lo)
+ {
+ }
+ }
+   }
+}
+}


[PATCH] Middle-end changes for fixing S/390 conditional returns

2018-09-17 Thread Ilya Leoshkevich
Hi all,

Could anyone please take a look at 
https://gcc.gnu.org/ml/gcc-patches/2018-09/msg00237.html ?

Best regards,
Ilya


Re: [Patch, fortran] PR64120 - [F03] Wrong handling of allocatable character string

2018-09-17 Thread Janne Blomqvist
On Mon, Sep 17, 2018 at 1:10 PM Paul Richard Thomas <
paul.richard.tho...@gmail.com> wrote:

> This patch is relatively trivial. This initialization of the string
> length was not being done.
>
> Bootstraps and regtests on FC28/x86_64. OK for trunk?
>

Ok.

-- 
Janne Blomqvist


Re: [PATCH 00/25] AMD GCN Port

2018-09-17 Thread Bernd Edlinger
 writes:
> Given a pattern with a number of operands:
>
> (match_operand 0 "" "=&v")
> (match_operand 1 "" " v0")
> (match_operand 2 "" " v0")
> (match_operand 3 "" " v0")
>
> GCC will currently increment "reject" once, for operand 0, and then decrement
> it once for each of the other operands, ending with reject == -2 and an
> assertion failure.  If there's a conflict then it might try to decrement 
> reject
> yet again.
>
> Incidentally, what these patterns are trying to achieve is an allocation in
> which operand 0 may match one of the other operands, but may not partially
> overlap any of them.  Ideally there'd be a better way to do this.

I have seen something similar in the arm.md:
where this allocation problem is solved differently:

(define_insn_and_split "*arm_subdi3"
  [(set (match_operand:DI   0 "arm_general_register_operand" 
"=&r,&r,&r")
(minus:DI (match_operand:DI 1 "arm_general_register_operand" "0,r,0")
  (match_operand:DI 2 "arm_general_register_operand" "r,0,0")))


DI registers are always consecutive register pairs,
but they should not partially overlap.
They solve that by using alternatives.


Bernd.

Re: [PATCH] PR86957

2018-09-17 Thread Jan Hubicka
> On 9/16/18 12:58 AM, Indu Bhagat wrote:
> > Thanks for the reviews. I have incorporated them in this patch except the 
> > one 
> > (changes in common.opt) below.
> > 
> > In this patch,
> > 
> > 1. -Wmissing-profile is a warning by default and is ON by default with 
> >-fprofile-use
> > 2. Attached pr86957-missing-profile-diagnostic-2 shows the warning messages
> > 3. Added a testcase for warning in the case of missing profile feedback data
> >file for a compilation unit
> > 
> > Thanks
> 
> Hi.
> 
> The patch looks fine for me now. Honza can you approve it?
Patch looks OK.  We used to have this warning but it was removed by google floks
because if you build a static library it may happen that object file was never 
linked
into your test application, so its use is a bit limited. Having an option for 
this makes
sense to me though.

Honza
> 
> Martin
> 
> > 
> > gcc/ChangeLog:
> > 
> > 2018-09-14  "Indu Bhagat"  <"indu.bha...@oracle.com">
> > 
> > * common.opt: New warning option -Wmissing-profile.
> > * coverage.c (get_coverage_counts): Add warning for missing .gcda 
> > file.
> > * doc/invoke.texi: Document -Wmissing-profile.
> > 
> > gcc/testsuite/ChangeLog:
> > 
> > 2018-09-14  "Indu Bhagat"  <"indu.bha...@oracle.com">
> > 
> > * gcc.dg/Wmissing-profile.c: New test.
> > 
> > 
> > On 09/11/2018 02:21 AM, Martin Liška wrote:
> >>> --- a/gcc/common.opt
> >>> +++ b/gcc/common.opt
> >>> @@ -811,6 +811,10 @@ Wcoverage-mismatch
> >>>  Common Var(warn_coverage_mismatch) Init(1) Warning
> >>>  Warn in case profiles in -fprofile-use do not match.
> >>>  
> >>> +Wmissing-profile
> >>> +Common Var(warn_missing_profile) Init(1) Warning
> >>> +Warn in case profiles in -fprofile-use do not exist.
> >> Maybe 'Want about missing profile for a function in -fprofile-use build.' ?
> >>
> > Since, it also warns when feedback file is missing for a compilation unit, 
> > the
> > suggested text above will be more restrictive. So I did not change.

Perhaps we want also to have reference from -fprofile-use documentation so 
users notice
this option.

Honza
> > 
> > 
> 


[ARM] Fix ICE during thunk generation with -mlong-calls

2018-09-17 Thread Eric Botcazou
Hi,

this is a regression present on mainline, 8 and 7 branches.  The new, RTL 
implementation of arm32_output_mi_thunk breaks during the libstdc++ build if 
you configure the compiler with -mlong-calls by default:

0xdb57eb gen_reg_rtx(machine_mode)
/home/eric/svn/gcc/gcc/emit-rtl.c:1155
0xde9ae7 force_reg(machine_mode, rtx_def*)
/home/eric/svn/gcc/gcc/explow.c:654
0x1bf73bf gen_sibcall(rtx_def*, rtx_def*, rtx_def*)
/home/eric/svn/gcc/gcc/config/arm/arm.md:8272
0x187d3b1 arm32_output_mi_thunk
/home/eric/svn/gcc/gcc/config/arm/arm.c:26762
0x187d4af arm_output_mi_thunk
/home/eric/svn/gcc/gcc/config/arm/arm.c:26783
0xcb9c94 cgraph_node::expand_thunk(bool, bool)
/home/eric/svn/gcc/gcc/cgraphunit.c:1783

because the code is wired for a short call.  Moreover, in PIC mode you need to 
work harder and fix up the minipool too with -mlong-calls.

Tested on ARM/Linux, OK for mainline, 8 and 7 branches?


2018-09-17  Eric Botcazou  

* config/arm/arm.c (arm_reorg): Skip Thumb reorg pass for thunks.
(arm32_output_mi_thunk): Deal with long calls.


2018-09-17  Eric Botcazou  

* g++.dg/other/thunk2a.C: New test.
* g++.dg/other/thunk2b.C: Likewise.

-- 
Eric BotcazouIndex: config/arm/arm.c
===
--- config/arm/arm.c	(revision 264342)
+++ config/arm/arm.c	(working copy)
@@ -17647,7 +17647,9 @@ arm_reorg (void)
 
   if (use_cmse)
 cmse_nonsecure_call_clear_caller_saved ();
-  if (TARGET_THUMB1)
+  if (cfun->is_thunk)
+;
+  else if (TARGET_THUMB1)
 thumb1_reorg ();
   else if (TARGET_THUMB2)
 thumb2_reorg ();
@@ -26721,6 +26723,8 @@ static void
 arm32_output_mi_thunk (FILE *file, tree, HOST_WIDE_INT delta,
 		   HOST_WIDE_INT vcall_offset, tree function)
 {
+  const bool long_call_p = arm_is_long_call_p (function);
+
   /* On ARM, this_regno is R0 or R1 depending on
  whether the function returns an aggregate or not.
   */
@@ -26758,9 +26762,22 @@ arm32_output_mi_thunk (FILE *file, tree,
   TREE_USED (function) = 1;
 }
   rtx funexp = XEXP (DECL_RTL (function), 0);
+  if (long_call_p)
+{
+  emit_move_insn (temp, funexp);
+  funexp = temp;
+}
   funexp = gen_rtx_MEM (FUNCTION_MODE, funexp);
-  rtx_insn * insn = emit_call_insn (gen_sibcall (funexp, const0_rtx, NULL_RTX));
+  rtx_insn *insn = emit_call_insn (gen_sibcall (funexp, const0_rtx, NULL_RTX));
   SIBLING_CALL_P (insn) = 1;
+  emit_barrier ();
+
+  /* Indirect calls require a bit of fixup in PIC mode.  */
+  if (long_call_p)
+{
+  split_all_insns_noflow ();
+  arm_reorg ();
+}
 
   insn = get_insns ();
   shorten_branches (insn);
// { dg-do compile { target arm*-*-* } }
// { dg-options "-mlong-calls -ffunction-sections }

class a {
public:
  virtual ~a();
};

class b : virtual a {};

class c : b {
  ~c();
};

c::~c() {}
// { dg-do compile { target arm*-*-* && fpic } }
// { dg-options "-mlong-calls -ffunction-sections -fPIC }

class a {
public:
  virtual ~a();
};

class b : virtual a {};

class c : b {
  ~c();
};

c::~c() {}


Re: [Patch, fortran] PR64120 - [F03] Wrong handling of allocatable character string

2018-09-17 Thread Paul Richard Thomas
Hi Janne,

Thanks. Committed as revision 264365.

Paul


On 17 September 2018 at 11:34, Janne Blomqvist
 wrote:
> On Mon, Sep 17, 2018 at 1:10 PM Paul Richard Thomas
>  wrote:
>>
>> This patch is relatively trivial. This initialization of the string
>> length was not being done.
>>
>> Bootstraps and regtests on FC28/x86_64. OK for trunk?
>
>
> Ok.
>
> --
> Janne Blomqvist



-- 
"If you can't explain it simply, you don't understand it well enough"
- Albert Einstein


[PATCH c++/pr87295] -fdebug-types-section ICE

2018-09-17 Thread Nathan Sidwell

Richard,
this patch makes the ICE go away, but I really don't know if it's 
correct.  When cloning the type die I copy die_id, so it is found during 
the (currently ICEing) hash lookup.  In this particular testcase we 
clone the die twice.  Once from break_out_comdat_types and once from 
copy_decls_for_unworthy_types. That seems a little strange (but I have 
only the smallest understanding of debug data) As these tracebacks show:


Breakpoint 5, clone_as_declaration (
die=>)

at ../../../src/gcc/dwarf2out.c:8179
8179  clone->die_id = die->die_id;
(gdb) back
#0  clone_as_declaration (die=DW_TAG_structure_type >)

at ../../../src/gcc/dwarf2out.c:8179
#1  0x00e34faf in generate_skeleton_ancestor_tree 
(node=0x7fffe230) at ../../../src/gcc/dwarf2out.c:8345
#2  0x00e351c3 in generate_skeleton_bottom_up 
(parent=0x7fffe230) at ../../../src/gcc/dwarf2out.c:8419

#3  0x00e35291 in generate_skeleton (
die=>)

at ../../../src/gcc/dwarf2out.c:8449
#4  0x00e352ce in remove_child_or_replace_with_skeleton 
(unit=,
child=>,
prev=>)

at ../../../src/gcc/dwarf2out.c:8471
#5  0x00e3570a in break_out_comdat_types (die=0x773260a0 DW_TAG_compile_unit>)

at ../../../src/gcc/dwarf2out.c:8636
#6  0x00e75153 in dwarf2out_early_finish (filename=0x34de9d0 
"bug.ii") at ../../../src/gcc/dwarf2out.c:32034
#7  0x00daa210 in symbol_table::finalize_compilation_unit 
(this=0x7730d100) at ../../../src/gcc/cgraphunit.c:2783

#8  0x01417ec2 in compile_file () at ../../../src/gcc/toplev.c:480
#9  0x0141a90c in do_compile () at ../../../src/gcc/toplev.c:2170
#10 0x0141abf8 in toplev::main (this=0x7fffe56e, argc=8, 
argv=0x7fffe668) at ../../../src/gcc/toplev.c:2305
#11 0x0227731e in main (argc=8, argv=0x7fffe668) at 
../../../src/gcc/main.c:39

(gdb) p clone
$2 = 
(gdb) c
Continuing.

Breakpoint 5, clone_as_declaration (die=DW_TAG_structure_type >)

at ../../../src/gcc/dwarf2out.c:8179
8179  clone->die_id = die->die_id;
(gdb) p clone
$3 = 
(gdb) back
#0  clone_as_declaration (die=DW_TAG_structure_type >)

at ../../../src/gcc/dwarf2out.c:8179
#1  0x00e34dac in copy_ancestor_tree (unit=0x773260a0 DW_TAG_compile_unit>,
die=>, decl_table=0x7fffe2f0)

at ../../../src/gcc/dwarf2out.c:8267
#2  0x00e35adb in copy_decls_walk (unit=0x773260a0 DW_TAG_compile_unit>,
die=>, decl_table=0x7fffe2f0)

at ../../../src/gcc/dwarf2out.c:8764
#3  0x00e35ba6 in copy_decls_walk (unit=0x773260a0 DW_TAG_compile_unit>,
die=>, decl_table=0x7fffe2f0)

at ../../../src/gcc/dwarf2out.c:8790
#4  0x00e35ba6 in copy_decls_walk (unit=0x773260a0 DW_TAG_compile_unit>,
die=, 
decl_table=0x7fffe2f0) at ../../../src/gcc/dwarf2out.c:8790
#5  0x00e35c09 in copy_decls_for_unworthy_types 
(unit=)

at ../../../src/gcc/dwarf2out.c:8804
#6  0x00e7519a in dwarf2out_early_finish (filename=0x34de9d0 
"bug.ii") at ../../../src/gcc/dwarf2out.c:32047
#7  0x00daa210 in symbol_table::finalize_compilation_unit 
(this=0x7730d100) at ../../../src/gcc/cgraphunit.c:2783

#8  0x01417ec2 in compile_file () at ../../../src/gcc/toplev.c:480
#9  0x0141a90c in do_compile () at ../../../src/gcc/toplev.c:2170
#10 0x0141abf8 in toplev::main (this=0x7fffe56e, argc=8, 
argv=0x7fffe668) at ../../../src/gcc/toplev.c:2305
#11 0x0227731e in main (argc=8, argv=0x7fffe668) at 
../../../src/gcc/main.c:39

(gdb)


--
Nathan Sidwell
2018-09-17  Nathan Sidwell  

	PR c++/87295
	* dwarf2out.c (clone_as_declaration): Copy die_id and set
	comdat_type_p as appropriate.

Index: dwarf2out.c
===
--- dwarf2out.c	(revision 264332)
+++ dwarf2out.c	(working copy)
@@ -8176,8 +8176,12 @@ clone_as_declaration (dw_die_ref die)
 }
 }
 
+  clone->die_id = die->die_id;
   if (die->comdat_type_p)
-add_AT_die_ref (clone, DW_AT_signature, die);
+{
+  clone->comdat_type_p = true;
+  add_AT_die_ref (clone, DW_AT_signature, die);
+}
 
   add_AT_flag (clone, DW_AT_declaration, 1);
   return clone;
Index: testsuite/g++.dg/debug/pr87295.C
===
--- testsuite/g++.dg/debug/pr87295.C	(revision 0)
+++ testsuite/g++.dg/debug/pr87295.C	(working copy)
@@ -0,0 +1,20 @@
+// PR c++/87295 ICE in dwarf2out
+// { dg-options "-flto -ffat-lto-objects -fdebug-types-section -g -std=gnu++17" }
+
+template
+struct integral_constant
+{
+  static constexpr _Tp value = __v;
+  typedef _Tp value_type;
+  constexpr operator value_type() const noexcept { return value; }
+};
+
+typedef integral_constant false_type;
+
+template
+struct __or_;
+
+template<>
+struct __or_<>
+  : public false_type
+{ };


Re: [PATCH] Introduce libgcov.so run-time library (PR gcov-profile/84107).

2018-09-17 Thread Martin Liška
On 9/12/18 4:48 PM, Richard Biener wrote:
> On Wed, Sep 12, 2018 at 11:27 AM Martin Liška  wrote:
>>
>> On 9/11/18 5:08 PM, Alexander Monakov wrote:
>>> On Tue, 11 Sep 2018, Martin Liška wrote:
 I've discussed the topic with Alexander on the Cauldron and we hoped
 that the issue with unique __gcov_root can be handled with DECL_COMMON in 
 each DSO.
 Apparently this approach does not work as all DSOs in an executable have 
 eventually
 just a single instance of the __gcov_root, which is bad.
>>>
>>> No, that happens only if they have default visibility.  Emitting them with
>>> "hidden" visibility solves this.
>>
>> Ah, ok, thanks. So theoretically that way should work.
>>
>>>
 So that I returned back to catch the root of the failure. It shows that 
 even with
 -Wl,--dynamic-list-data __gcov_indirect_call_callee point to a different 
 variable
 among the DSOs. The reason is that the variable is TLS:
>>>
>>> (no, that the variable is TLS is not causing the bug; the issue is libraries
>>> carry public definitions of just one or both variables depending on if they
>>> have indirect calls or not, and the library state becomes "diverged" when
>>> one variable gets interposed while the other does not)
>>
>> I see, I'm attaching patch that does that. I can confirm your test-case works
>> fine w/o -Wl,--dynamic-list-data.
>>
>> I'm wondering if it will work as well with dlopen/dlsym machinery? Or now
>> the linker flag will be needed?
>>
>>>
 That said I would like to go this direction. I would be happy to receive
 a feedback. Then I'll test the patch.
>>>
>>> Hm, this TLS change is attacking the issue from a wrong direction.  What I
>>> suggested on the Cauldron as a simple and backportable way of solving this
>>> is to consolidate the two TLS variables into one TLS struct, which is then
>>> either interposed or not as a whole.
>>
>> Got it, current I prefer the solution.
> 
> Sounds good.  I notice the code had this before but...
> 
> +  TREE_PUBLIC (ic_tuple_var) = 1;
> +  DECL_EXTERNAL (ic_tuple_var) = 1;
> +  TREE_STATIC (ic_tuple_var) = 1;
> 
> that's one flag too much?!  I suggest to drop DECL_EXTERNAL.

I've done that. I'm sending updated version of the patch.
I'm currently testing the patch. Ready after it finishes tests?

Martin

> 
> Richard.
> 
>> Martin
>>
>>>
>>> Alexander
>>>
>>

>From 7b6c90b291141095072f916b0626e37ee9f2f40a Mon Sep 17 00:00:00 2001
From: marxin 
Date: Wed, 12 Sep 2018 11:24:59 +0200
Subject: [PATCH] Fix divergence in indirect profiling (PR gcov-profile/84107).

gcc/ChangeLog:

2018-09-17  Martin Liska  

	PR gcov-profile/84107
	* tree-profile.c (init_ic_make_global_vars):
	Remove ic_void_ptr_var and ic_gcov_type_ptr_var.
	Come up with new ic_tuple* variables.  Emit
	__gcov_indirect_call{,_topn} variables.
	(gimple_gen_ic_profiler): Access the variable
	and emit gimple.
	(gimple_gen_ic_func_profiler): Access
	__gcov_indirect_call.callee field.

libgcc/ChangeLog:

2018-09-17  Martin Liska  

	PR gcov-profile/84107
	* libgcov-profiler.c (__gcov_indirect_call):
	Change type to indirect_call_tuple.
	(struct indirect_call_tuple): New struct.
	(__gcov_indirect_call_topn_profiler): Change type.
	(__gcov_indirect_call_profiler_v2): Use the new
	variables.
	* libgcov.h (struct indirect_call_tuple): New struct
	definition.
---
 gcc/tree-profile.c| 78 ++-
 libgcc/libgcov-profiler.c | 25 -
 libgcc/libgcov.h  |  9 +
 3 files changed, 62 insertions(+), 50 deletions(-)

diff --git a/gcc/tree-profile.c b/gcc/tree-profile.c
index f96bd4b9704..2b11e60cd53 100644
--- a/gcc/tree-profile.c
+++ b/gcc/tree-profile.c
@@ -53,6 +53,8 @@ along with GCC; see the file COPYING3.  If not see
 #include "stringpool.h"
 #include "attribs.h"
 #include "tree-pretty-print.h"
+#include "langhooks.h"
+#include "stor-layout.h"
 
 static GTY(()) tree gcov_type_node;
 static GTY(()) tree tree_interval_profiler_fn;
@@ -64,8 +66,9 @@ static GTY(()) tree tree_ior_profiler_fn;
 static GTY(()) tree tree_time_profiler_counter;
 
 
-static GTY(()) tree ic_void_ptr_var;
-static GTY(()) tree ic_gcov_type_ptr_var;
+static GTY(()) tree ic_tuple_var;
+static GTY(()) tree ic_tuple_counters_field;
+static GTY(()) tree ic_tuple_callee_field;
 static GTY(()) tree ptr_void;
 
 /* Do initialization work for the edge profiler.  */
@@ -81,38 +84,35 @@ init_ic_make_global_vars (void)
   tree gcov_type_ptr;
 
   ptr_void = build_pointer_type (void_type_node);
+  gcov_type_ptr = build_pointer_type (get_gcov_type ());
 
-  ic_void_ptr_var
-= build_decl (UNKNOWN_LOCATION, VAR_DECL,
-		  get_identifier (
-			  (PARAM_VALUE (PARAM_INDIR_CALL_TOPN_PROFILE) ?
-			   "__gcov_indirect_call_topn_callee" :
-			   "__gcov_indirect_call_callee")),
-		  ptr_void);
-  TREE_PUBLIC (ic_void_ptr_var) = 1;
-  DECL_EXTERNAL (ic_void_ptr_var) = 1;
-  TREE_STATIC (ic_void_ptr_var) = 1;
-  DECL_ARTIFICIAL (ic_void_ptr_var) = 1;
-  DECL_INITIAL (ic

Re: [PATCH] Introduce libgcov.so run-time library (PR gcov-profile/84107).

2018-09-17 Thread Martin Liška
On 9/12/18 5:02 PM, Alexander Monakov wrote:
> On Wed, 12 Sep 2018, Martin Liška wrote:
>> I see, I'm attaching patch that does that. I can confirm your test-case works
>> fine w/o -Wl,--dynamic-list-data.
>>
>> I'm wondering if it will work as well with dlopen/dlsym machinery? Or now
>> the linker flag will be needed?
> 
> No, for this issue dlopen doesn't pose extra complications as far as I know.
> 
>>> Hm, this TLS change is attacking the issue from a wrong direction.  What I
>>> suggested on the Cauldron as a simple and backportable way of solving this
>>> is to consolidate the two TLS variables into one TLS struct, which is then
>>> either interposed or not as a whole.
>>
>> Got it, current I prefer the solution.
> 
> Ack, I think this is a good way to solve the specific issue in the PR.
> 
> I'd like to remind that the point of the PR was to draw attention to larger
> design issues in libgcov.
> 
> There's no decision on the topic of gcov.so. At the Cauldron I had the chance
> to talk to you and Richi separately, but we didn't meet together.  Would it
> help if I started a new thread summarizing the current status from my point of
> view?

Yes please. Note that we have also fully working version of the shared library 
right now.
The only question is whether it worth coming up with it? I'm open for 
introduction of it.

Martin

> 
> Alexander
> 



Re: [PATCH 14/25] Disable inefficient vectorization of elementwise loads/stores.

2018-09-17 Thread Richard Sandiford
Andrew Stubbs  writes:
> On 17/09/18 10:14, Richard Sandiford wrote:
>>  writes:
>>> If the autovectorizer tries to load a GCN 64-lane vector elementwise then it
>>> blows away the register file and produces horrible code.
>> 
>> Do all the registers really need to be live at once, or is it "just" bad
>> scheduling?  I'd have expected the initial rtl to load each element and
>> then insert it immediately, so that the number of insertions doesn't
>> directly affect register pressure.
>
> They don't need to be live at once, architecturally speaking, but that's 
> the way it happened.  No doubt there is another solution to fix it, but 
> it's not a use case I believe we want to spend time optimizing.
>
> Actually, I've not tested what happens without this in GCC 9, so that's 
> probably worth checking, but I'd still be concerned about it blowing up 
> on real code somewhere.
>
>>> This patch simply disallows elementwise loads for such large vectors.
>>> Is there
>>> a better way to disable this in the middle-end?
>> 
>> Do you ever want elementwise accesses for GCN?  If not, it might be
>> better to disable them in the target's cost model.
>
> The hardware is perfectly capable of extracting or setting vector 
> elements, but given that it can do full gather/scatter from arbitrary 
> addresses it's not something we want to do in general.
>
> A normal scalar load will use a vector register (lane 0). The value then 
> has to be moved to a scalar register, and only then can v_writelane 
> insert it into the final destination.

OK, sounds like the cost of vec_construct is too low then.  But looking
at the port, I see you have:

/* Implement TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST.  */

int
gcn_vectorization_cost (enum vect_cost_for_stmt ARG_UNUSED (type_of_cost),
tree ARG_UNUSED (vectype), int ARG_UNUSED (misalign))
{
  /* Always vectorize.  */
  return 1;
}

which short-circuits the cost-model altogether.  Isn't that part
of the problem?

Richard

>
> Alternatively you could use a mask_load to load the value directly to 
> the correct lane, but I don't believe that's something GCC does.
>
> Andrew


Re: [PATCH 14/25] Disable inefficient vectorization of elementwise loads/stores.

2018-09-17 Thread Andrew Stubbs

On 17/09/18 12:43, Richard Sandiford wrote:

OK, sounds like the cost of vec_construct is too low then.  But looking
at the port, I see you have:

/* Implement TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST.  */

int
gcn_vectorization_cost (enum vect_cost_for_stmt ARG_UNUSED (type_of_cost),
tree ARG_UNUSED (vectype), int ARG_UNUSED (misalign))
{
   /* Always vectorize.  */
   return 1;
}

which short-circuits the cost-model altogether.  Isn't that part
of the problem?


Well, it's possible that that's a little simplistic. ;-)

Although, actually the elementwise issue predates the existence of 
gcn_vectorization_cost, and the default does appear to penalize 
vec_construct somewhat.


Actually, the default definition doesn't seem to do much besides 
increase vec_construct, so I'm not sure now why I needed to change it? 
Hmm, more experiments to do.


Thanks for the pointer.

Andrew


Re: [PATCH] Optimize sin(atan(x)), take 2

2018-09-17 Thread Giuliano Augusto Faulin Belinassi
Ping.

On Mon, Sep 3, 2018 at 4:11 PM, Giuliano Augusto Faulin Belinassi
 wrote:
> Fixed the issues pointed by the previous discussions. Closes PR86829.
>
> Adds substitution rules for sin(atan(x)) and cos(atan(x)), being
> careful with overflow issues by constructing a assumed convergence
> constant (see comment in real.c).
>
> 2018-09-03  Giuliano Belinassi 
>
> * match.pd: add simplification rules to sin(atan(x)) and cos(atan(x)).
> * real.c: add code for assumed convergence constant to sin(atan(x)).
> * real.h: allows the added code from real.c to be called externally.
> * tree.c: add code for bulding nodes with the convergence constant.
> * tree.h: allows the added code from tree.c to be called externally.
> * sinatan-1.c: tests assumed convergence constant.
> * sinatan-2.c: tests simplification rule.
> * sinatan-3.c: likewise.
>
> There seems to be no broken tests in trunk that are related to this
> modification.


[PATCH 1/2] [ARC] Check for odd-even register when emitting double mac ops.

2018-09-17 Thread Claudiu Zissulescu
Avoid generate dmac instructions when the register is not odd-even,
use instead the equivalent mac instruction.

gcc/
Claudiu Zissulescu  

* config/arc/arc.md (maddsidi4_split): Don't use dmac if the
destination register is not odd-even.
(umaddsidi4_split): Likewise.

gcc/testsuite/
Claudiu Zissulescu  

* gcc.target/arc/tmac-3.c: New file.
---
 gcc/config/arc/arc.md |  4 ++--
 gcc/testsuite/gcc.target/arc/tmac-3.c | 17 +
 2 files changed, 19 insertions(+), 2 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/arc/tmac-3.c

diff --git a/gcc/config/arc/arc.md b/gcc/config/arc/arc.md
index dbcd7098bec..2d108ef166d 100644
--- a/gcc/config/arc/arc.md
+++ b/gcc/config/arc/arc.md
@@ -6078,7 +6078,7 @@ core_3, archs4x, archs4xd, archs4xd_slow"
   "{
rtx acc_reg = gen_rtx_REG (DImode, ACC_REG_FIRST);
emit_move_insn (acc_reg, operands[3]);
-   if (TARGET_PLUS_MACD)
+   if (TARGET_PLUS_MACD && even_register_operand (operands[0], DImode))
  emit_insn (gen_macd (operands[0], operands[1], operands[2]));
else
  {
@@ -6178,7 +6178,7 @@ core_3, archs4x, archs4xd, archs4xd_slow"
   "{
rtx acc_reg = gen_rtx_REG (DImode, ACC_REG_FIRST);
emit_move_insn (acc_reg, operands[3]);
-   if (TARGET_PLUS_MACD)
+   if (TARGET_PLUS_MACD && even_register_operand (operands[0], DImode))
  emit_insn (gen_macdu (operands[0], operands[1], operands[2]));
else
  {
diff --git a/gcc/testsuite/gcc.target/arc/tmac-3.c 
b/gcc/testsuite/gcc.target/arc/tmac-3.c
new file mode 100644
index 000..3c8c1201f83
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arc/tmac-3.c
@@ -0,0 +1,17 @@
+/* { dg-do compile } */
+/* { dg-skip-if "" { ! { clmcpu } } } */
+/* { dg-options "-mcpu=hs38 -Os" } */
+
+/* The compiler will assign r1r2 as a DI register, but it doesn't fit
+   the macd operation, hence we need to fall back on the mac
+   instruction.  */
+typedef long long myint64_t;
+
+extern int d (int, myint64_t);
+int b (int c)
+{
+  int x = (int) d;
+  d(c, (myint64_t)x * 2 + 1);
+}
+
+/* { dg-final { scan-assembler "mac\\\s+r1" } } */
-- 
2.17.1



[PATCH 0/2] [ARC] Bug fix, improve size figures.

2018-09-17 Thread Claudiu Zissulescu
Hi Andrew,

Please find two patches, one is fixing the dmac issue with non odd-even 
register pairs (test added). The second patch tries to improve the size figures 
by managing the long immediate field.

Thanks,
Claudiu


Claudiu Zissulescu (2):
  [ARC] Check for odd-even register when emitting double mac ops.
  [ARC] Avoid specific constants to end in limm field.

 gcc/config/arc/arc.md   | 55 ++---
 gcc/config/arc/constraints.md   |  6 +++
 gcc/testsuite/gcc.target/arc/tmac-3.c   | 17 
 gcc/testsuite/gcc.target/arc/tph_addx.c | 53 
 4 files changed, 97 insertions(+), 34 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/arc/tmac-3.c
 create mode 100644 gcc/testsuite/gcc.target/arc/tph_addx.c

-- 
2.17.1



[PATCH 2/2] [ARC] Avoid specific constants to end in limm field.

2018-09-17 Thread Claudiu Zissulescu
The 3-operand instructions accepts to place an immediate into the
second operand. However, this immediate will end up in the long
immediate field. This patch avoids constants to end up in the limm
field for particular instructions when compiling for size.

gcc/
-xx-xx  Claudiu Zissulescu  

* config/arc/arc.md (*add_n): Clean up pattern, update instruction
constraints.
(ashlsi3_insn): Update instruction constraints.
(ashrsi3_insn): Likewise.
(rotrsi3): Likewise.
(add_shift): Likewise.
* config/arc/constraints.md (Csz): New 32 bit constraint. It
avoids placing in the limm field small constants which, otherwise,
could end into a small instruction.
---
 gcc/config/arc/arc.md   | 51 +---
 gcc/config/arc/constraints.md   |  6 +++
 gcc/testsuite/gcc.target/arc/tph_addx.c | 53 +
 3 files changed, 78 insertions(+), 32 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/arc/tph_addx.c

diff --git a/gcc/config/arc/arc.md b/gcc/config/arc/arc.md
index 2d108ef166d..c28a87cd3b0 100644
--- a/gcc/config/arc/arc.md
+++ b/gcc/config/arc/arc.md
@@ -3056,30 +3056,17 @@ core_3, archs4x, archs4xd, archs4xd_slow"
(set (match_dup 3) (match_dup 4))])
 
 (define_insn "*add_n"
-  [(set (match_operand:SI 0 "dest_reg_operand" "=Rcqq,Rcw,W,W,w,w")
-   (plus:SI (ashift:SI (match_operand:SI 1 "register_operand" 
"Rcqq,c,c,c,c,c")
-   (match_operand:SI 2 "_1_2_3_operand" ""))
-(match_operand:SI 3 "nonmemory_operand" 
"0,0,c,?Cal,?c,??Cal")))]
+  [(set (match_operand:SI 0 "dest_reg_operand" "=q,r,r")
+   (plus:SI (mult:SI (match_operand:SI 1 "register_operand" "q,r,r")
+ (match_operand:SI 2 "_2_4_8_operand" ""))
+(match_operand:SI 3 "nonmemory_operand" "0,r,Csz")))]
   ""
-  "add%c2%? %0,%3,%1%&"
+  "add%z2%?\\t%0,%3,%1%&"
   [(set_attr "type" "shift")
-   (set_attr "length" "*,4,4,8,4,8")
-   (set_attr "predicable" "yes,yes,no,no,no,no")
-   (set_attr "cond" "canuse,canuse,nocond,nocond,nocond,nocond")
-   (set_attr "iscompact" "maybe,false,false,false,false,false")])
-
-(define_insn "*add_n"
-  [(set (match_operand:SI 0 "dest_reg_operand"  "=Rcqq,Rcw,W,  
W,w,w")
-   (plus:SI (mult:SI (match_operand:SI 1 "register_operand" "Rcqq,  c,c,  
c,c,c")
- (match_operand:SI 2 "_2_4_8_operand"   ""))
-(match_operand:SI 3 "nonmemory_operand""0,  
0,c,Cal,c,Cal")))]
-  ""
-  "add%z2%? %0,%3,%1%&"
-  [(set_attr "type" "shift")
-   (set_attr "length" "*,4,4,8,4,8")
-   (set_attr "predicable" "yes,yes,no,no,no,no")
-   (set_attr "cond" "canuse,canuse,nocond,nocond,nocond,nocond")
-   (set_attr "iscompact" "maybe,false,false,false,false,false")])
+   (set_attr "length" "*,4,8")
+   (set_attr "predicable" "yes,no,no")
+   (set_attr "cond" "canuse,nocond,nocond")
+   (set_attr "iscompact" "maybe,false,false")])
 
 ;; N.B. sub[123] has the operands of the MINUS in the opposite order from
 ;; what synth_mult likes.
@@ -3496,7 +3483,7 @@ core_3, archs4x, archs4xd, archs4xd_slow"
 ; provide one alternatice for this, without condexec support.
 (define_insn "*ashlsi3_insn"
   [(set (match_operand:SI 0 "dest_reg_operand"   "=Rcq,Rcqq,Rcqq,Rcw, 
w,   w")
-   (ashift:SI (match_operand:SI 1 "nonmemory_operand" "!0,Rcqq,   0,  0, 
c,cCal")
+   (ashift:SI (match_operand:SI 1 "nonmemory_operand" "!0,Rcqq,   0,  0, 
c,cCsz")
   (match_operand:SI 2 "nonmemory_operand"  "K,  K,RcqqM, 
cL,cL,cCal")))]
   "TARGET_BARREL_SHIFTER
&& (register_operand (operands[1], SImode)
@@ -3509,7 +3496,7 @@ core_3, archs4x, archs4xd, archs4xd_slow"
 
 (define_insn "*ashrsi3_insn"
   [(set (match_operand:SI 0 "dest_reg_operand" 
"=Rcq,Rcqq,Rcqq,Rcw, w,   w")
-   (ashiftrt:SI (match_operand:SI 1 "nonmemory_operand" "!0,Rcqq,   0,  0, 
c,cCal")
+   (ashiftrt:SI (match_operand:SI 1 "nonmemory_operand" "!0,Rcqq,   0,  0, 
c,cCsz")
 (match_operand:SI 2 "nonmemory_operand"  "K,  K,RcqqM, 
cL,cL,cCal")))]
   "TARGET_BARREL_SHIFTER
&& (register_operand (operands[1], SImode)
@@ -3536,7 +3523,7 @@ core_3, archs4x, archs4xd, archs4xd_slow"
 
 (define_insn "rotrsi3"
   [(set (match_operand:SI 0 "dest_reg_operand" "=Rcw, w,   w")
-   (rotatert:SI (match_operand:SI 1 "register_operand"  " 0,cL,cCal")
+   (rotatert:SI (match_operand:SI 1 "register_operand"  " 0,cL,cCsz")
 (match_operand:SI 2 "nonmemory_operand" "cL,cL,cCal")))]
   "TARGET_BARREL_SHIFTER"
   "ror%? %0,%1,%2"
@@ -4284,16 +4271,16 @@ core_3, archs4x, archs4xd, archs4xd_slow"
 (define_peephole2
   [(set (match_operand:SI 0 "dest_reg_operand" "")
(ashift:SI (match_operand:SI 1 "register_operand" "")
-  (match_operand:SI 2 "const_int_operand" "")))
+  (match_operand:SI 2 "_1_2_

Re: [ARM] Fix ICE during thunk generation with -mlong-calls

2018-09-17 Thread Richard Earnshaw (lists)
On 17/09/18 12:19, Eric Botcazou wrote:
> Hi,
> 
> this is a regression present on mainline, 8 and 7 branches.  The new, RTL 
> implementation of arm32_output_mi_thunk breaks during the libstdc++ build if 
> you configure the compiler with -mlong-calls by default:
> 
> 0xdb57eb gen_reg_rtx(machine_mode)
> /home/eric/svn/gcc/gcc/emit-rtl.c:1155
> 0xde9ae7 force_reg(machine_mode, rtx_def*)
> /home/eric/svn/gcc/gcc/explow.c:654
> 0x1bf73bf gen_sibcall(rtx_def*, rtx_def*, rtx_def*)
> /home/eric/svn/gcc/gcc/config/arm/arm.md:8272
> 0x187d3b1 arm32_output_mi_thunk
> /home/eric/svn/gcc/gcc/config/arm/arm.c:26762
> 0x187d4af arm_output_mi_thunk
> /home/eric/svn/gcc/gcc/config/arm/arm.c:26783
> 0xcb9c94 cgraph_node::expand_thunk(bool, bool)
> /home/eric/svn/gcc/gcc/cgraphunit.c:1783
> 
> because the code is wired for a short call.  Moreover, in PIC mode you need 
> to 
> work harder and fix up the minipool too with -mlong-calls.
> 
> Tested on ARM/Linux, OK for mainline, 8 and 7 branches?
> 
> 
> 2018-09-17  Eric Botcazou  
> 
>   * config/arm/arm.c (arm_reorg): Skip Thumb reorg pass for thunks.

this seems to contradict your statement above about having to work
harder to fix up minipools.  why is it correct to skip this entirely?

>   (arm32_output_mi_thunk): Deal with long calls.
> 
> 
> 2018-09-17  Eric Botcazou  
> 
>   * g++.dg/other/thunk2a.C: New test.
>   * g++.dg/other/thunk2b.C: Likewise.
> 
> 
> p.diff
> 
> 
> Index: config/arm/arm.c
> ===
> --- config/arm/arm.c  (revision 264342)
> +++ config/arm/arm.c  (working copy)
> @@ -17647,7 +17647,9 @@ arm_reorg (void)
>  
>if (use_cmse)
>  cmse_nonsecure_call_clear_caller_saved ();
> -  if (TARGET_THUMB1)
> +  if (cfun->is_thunk)
> +;
> +  else if (TARGET_THUMB1)
>  thumb1_reorg ();
>else if (TARGET_THUMB2)
>  thumb2_reorg ();
> @@ -26721,6 +26723,8 @@ static void
>  arm32_output_mi_thunk (FILE *file, tree, HOST_WIDE_INT delta,
>  HOST_WIDE_INT vcall_offset, tree function)
>  {
> +  const bool long_call_p = arm_is_long_call_p (function);
> +
>/* On ARM, this_regno is R0 or R1 depending on
>   whether the function returns an aggregate or not.
>*/
> @@ -26758,9 +26762,22 @@ arm32_output_mi_thunk (FILE *file, tree,
>TREE_USED (function) = 1;
>  }
>rtx funexp = XEXP (DECL_RTL (function), 0);
> +  if (long_call_p)
> +{
> +  emit_move_insn (temp, funexp);
> +  funexp = temp;
> +}
>funexp = gen_rtx_MEM (FUNCTION_MODE, funexp);
> -  rtx_insn * insn = emit_call_insn (gen_sibcall (funexp, const0_rtx, 
> NULL_RTX));
> +  rtx_insn *insn = emit_call_insn (gen_sibcall (funexp, const0_rtx, 
> NULL_RTX));
>SIBLING_CALL_P (insn) = 1;
> +  emit_barrier ();

Why do we need a barrier here unconditionally (ie in the non-longcall case)?

R.

> +
> +  /* Indirect calls require a bit of fixup in PIC mode.  */
> +  if (long_call_p)
> +{
> +  split_all_insns_noflow ();
> +  arm_reorg ();
> +}
>  
>insn = get_insns ();
>shorten_branches (insn);
> 
> 
> thunk2a.C
> 
> 
> // { dg-do compile { target arm*-*-* } }
> // { dg-options "-mlong-calls -ffunction-sections }
> 
> class a {
> public:
>   virtual ~a();
> };
> 
> class b : virtual a {};
> 
> class c : b {
>   ~c();
> };
> 
> c::~c() {}
> 
> 
> thunk2b.C
> 
> 
> // { dg-do compile { target arm*-*-* && fpic } }
> // { dg-options "-mlong-calls -ffunction-sections -fPIC }
> 
> class a {
> public:
>   virtual ~a();
> };
> 
> class b : virtual a {};
> 
> class c : b {
>   ~c();
> };
> 
> c::~c() {}
> 



Re: [PATCH, rs6000, committed] Use correct ABI type in mmintrin.h and xmmintrin.h

2018-09-17 Thread Bill Schmidt
On 9/15/18 4:21 PM, Segher Boessenkool wrote:
> On Sat, Sep 15, 2018 at 03:28:17PM -0500, Bill Schmidt wrote:
>> Jinsong Ji reported that these header files are using the non-ABI type
>> __int128_t rather than __int128.  This patch from Jinsong corrects this.
>> Bootstrapped and tested on powerpc64le-linux-gnu with no regressions,
>> committed as obvious.
> Thanks to both of you!
>
> Does it actually matter for something, other than being cleaner?

Yes, it's a compatibility issue.  Cleaning this up makes it easier to port
the headers for use with other compilers.

Bill

>
> Segher
>



[PATCH] Fix PR87328

2018-09-17 Thread Richard Biener


Bootstrapped and tested on x86_64-unknown-linux-gnu, applied.

Richard.

2018-09-17  Richard Biener  

PR tree-optimization/87328
* tree-ssa-sccvn.c (process_bb): Remove assertion about not
visiting unexecutable backedges when not iterating.
(do_rpo_vn): Mark all edges not executable even when not
iterating.

* gcc.dg/torture/pr87328.c: New testcase.

Index: gcc/tree-ssa-sccvn.c
===
--- gcc/tree-ssa-sccvn.c(revision 264364)
+++ gcc/tree-ssa-sccvn.c(working copy)
@@ -5978,7 +5978,6 @@ process_bb (rpo_elim &avail, basic_block
fprintf (dump_file,
 "marking outgoing edge %d -> %d executable\n",
 e->src->index, e->dest->index);
- gcc_checking_assert (iterate || !(e->flags & EDGE_DFS_BACK));
  e->flags |= EDGE_EXECUTABLE;
  e->dest->flags |= BB_EXECUTABLE;
}
@@ -6125,7 +6124,6 @@ process_bb (rpo_elim &avail, basic_block
 "marking known outgoing %sedge %d -> %d executable\n",
 e->flags & EDGE_DFS_BACK ? "back-" : "",
 e->src->index, e->dest->index);
- gcc_checking_assert (iterate || !(e->flags & EDGE_DFS_BACK));
  e->flags |= EDGE_EXECUTABLE;
  e->dest->flags |= BB_EXECUTABLE;
}
@@ -6148,7 +6146,6 @@ process_bb (rpo_elim &avail, basic_block
fprintf (dump_file,
 "marking outgoing edge %d -> %d executable\n",
 e->src->index, e->dest->index);
- gcc_checking_assert (iterate || !(e->flags & EDGE_DFS_BACK));
  e->flags |= EDGE_EXECUTABLE;
  e->dest->flags |= BB_EXECUTABLE;
}
@@ -6390,10 +6387,7 @@ do_rpo_vn (function *fn, edge entry, bit
{
  if (e->flags & EDGE_DFS_BACK)
has_backedges = true;
- if (! iterate && (e->flags & EDGE_DFS_BACK))
-   e->flags |= EDGE_EXECUTABLE;
- else
-   e->flags &= ~EDGE_EXECUTABLE;
+ e->flags &= ~EDGE_EXECUTABLE;
  if (e == entry)
continue;
  if (bb_to_rpo[e->src->index] > i)
Index: gcc/testsuite/gcc.dg/torture/pr87328.c
===
--- gcc/testsuite/gcc.dg/torture/pr87328.c  (nonexistent)
+++ gcc/testsuite/gcc.dg/torture/pr87328.c  (working copy)
@@ -0,0 +1,25 @@
+/* { dg-do compile } */
+/* { dg-additional-options "-fno-tree-ccp -fno-tree-forwprop" } */
+
+void
+tp (void)
+{
+  int qt;
+
+  qt = 0;
+  if (qt != 0)
+{
+  if (0)
+   {
+h5:
+ qt = 0;
+ while (qt < 1)
+   {
+   }
+   }
+
+  ++qt;
+}
+
+  goto h5;
+}


Re: [PATCH]: Allow TARGET_SCHED_ADJUST_PRIORITY hook to reduce priority

2018-09-17 Thread John David Anglin

On 2018-09-17 5:08 AM, Andreas Schwab wrote:

PR rtl-optimization/85458
* sel-sched.c (sel_target_adjust_priority): Remove wrong
assertion.

diff --git a/gcc/sel-sched.c b/gcc/sel-sched.c
index 824f1ec340..1be977d70b 100644
--- a/gcc/sel-sched.c
+++ b/gcc/sel-sched.c
@@ -3330,8 +3330,6 @@ sel_target_adjust_priority (expr_t expr)
else
  new_priority = priority;
  
-  gcc_assert (new_priority >= 0);

-
/* If the priority has changed, adjust EXPR_PRIORITY_ADJ accordingly.  */
EXPR_PRIORITY_ADJ (expr) = new_priority - EXPR_PRIORITY (expr);
  
I added the assert because the hppa implementation of 
TARGET_SCHED_ADJUST_PRIORITY assumes
scheduling priorities are non negative.  If that is not the case, I tend 
to think this should be documented.


It seems ia64 is the only target tripping on the assert.

Dave

--
John David Anglin  dave.ang...@bell.net



[PATCH] Fix PR63155

2018-09-17 Thread Richard Biener


The following fixes the memory use at -O0 from out-of-SSA coalescing
(conflict computation in particular) with lots of abnormals (setjmp
calls).  After reviewing I found no reason to not use
compute_optimized_partition_bases since we populate the coalesce lists
and used_in_copy pieces correctly at -O0 and using 
compute_optimized_partition_bases avoids having gigantic bases for
type-equivalent variables, specifically:

-   /* This restricts what anonymous SSA names we can coalesce
-  as it restricts the sets we compute conflicts for.
-  Using TREE_TYPE to generate sets is the easiest as
-  type equivalency also holds for SSA names with the same
-  underlying decl.
-
-  Check gimple_can_coalesce_p when changing this code.  */
-   m->base.from = (TYPE_CANONICAL (TREE_TYPE (var))
-   ? TYPE_CANONICAL (TREE_TYPE (var))
-   : TREE_TYPE (var));

but instead uses bases that only include things that we desire to
coalesce later.  Memory use of the conflict bitmaps is quadratic
in the sizes of the base sets so splitting up to more bases is
desirable (and doesn't change code generation).

{,-O0} Bootstrap and regtest running on x86_64-unknown-linux-gnu.

I still have one other memory/compile-time improvement patch but
I lack a testcase that shows a measurable difference.

Richard.

2018-09-17  Richard Biener  

PR middle-end/63155
* tree-ssa-coalesce.c (tree_int_map_hasher): Remove.
(compute_samebase_partition_bases): Likewise.
(coalesce_ssa_name): Always use compute_optimized_partition_bases.

Index: gcc/tree-ssa-coalesce.c
===
--- gcc/tree-ssa-coalesce.c (revision 264368)
+++ gcc/tree-ssa-coalesce.c (working copy)
@@ -1720,89 +1720,6 @@ compute_optimized_partition_bases (var_m
   partition_delete (tentative);
 }
 
-/* Hashtable helpers.  */
-
-struct tree_int_map_hasher : nofree_ptr_hash 
-{
-  static inline hashval_t hash (const tree_int_map *);
-  static inline bool equal (const tree_int_map *, const tree_int_map *);
-};
-
-inline hashval_t
-tree_int_map_hasher::hash (const tree_int_map *v)
-{
-  return tree_map_base_hash (v);
-}
-
-inline bool
-tree_int_map_hasher::equal (const tree_int_map *v, const tree_int_map *c)
-{
-  return tree_int_map_eq (v, c);
-}
-
-/* This routine will initialize the basevar fields of MAP with base
-   names.  Partitions will share the same base if they have the same
-   SSA_NAME_VAR, or, being anonymous variables, the same type.  This
-   must match gimple_can_coalesce_p in the non-optimized case.  */
-
-static void
-compute_samebase_partition_bases (var_map map)
-{
-  int x, num_part;
-  tree var;
-  struct tree_int_map *m, *mapstorage;
-
-  num_part = num_var_partitions (map);
-  hash_table tree_to_index (num_part);
-  /* We can have at most num_part entries in the hash tables, so it's
- enough to allocate so many map elements once, saving some malloc
- calls.  */
-  mapstorage = m = XNEWVEC (struct tree_int_map, num_part);
-
-  /* If a base table already exists, clear it, otherwise create it.  */
-  free (map->partition_to_base_index);
-  map->partition_to_base_index = (int *) xmalloc (sizeof (int) * num_part);
-
-  /* Build the base variable list, and point partitions at their bases.  */
-  for (x = 0; x < num_part; x++)
-{
-  struct tree_int_map **slot;
-  unsigned baseindex;
-  var = partition_to_var (map, x);
-  if (SSA_NAME_VAR (var)
- && (!VAR_P (SSA_NAME_VAR (var))
- || !DECL_IGNORED_P (SSA_NAME_VAR (var
-   m->base.from = SSA_NAME_VAR (var);
-  else
-   /* This restricts what anonymous SSA names we can coalesce
-  as it restricts the sets we compute conflicts for.
-  Using TREE_TYPE to generate sets is the easiest as
-  type equivalency also holds for SSA names with the same
-  underlying decl.
-
-  Check gimple_can_coalesce_p when changing this code.  */
-   m->base.from = (TYPE_CANONICAL (TREE_TYPE (var))
-   ? TYPE_CANONICAL (TREE_TYPE (var))
-   : TREE_TYPE (var));
-  /* If base variable hasn't been seen, set it up.  */
-  slot = tree_to_index.find_slot (m, INSERT);
-  if (!*slot)
-   {
- baseindex = m - mapstorage;
- m->to = baseindex;
- *slot = m;
- m++;
-   }
-  else
-   baseindex = (*slot)->to;
-  map->partition_to_base_index[x] = baseindex;
-}
-
-  map->num_basevars = m - mapstorage;
-
-  free (mapstorage);
-}
-
 /* Given an initial var_map MAP, coalesce variables and return a partition map
with the resulting coalesce.  Note that this function is called in either
live range computation context or out-of-ssa context, indicated by MAP.  */
@@ -1824,10 +1741,7 @@ coalesce_ssa_name (var_map map)
 
   partition_view_bitmap (map, used_in_copies);
 
-  if (flag

Re: [PATCH]: Allow TARGET_SCHED_ADJUST_PRIORITY hook to reduce priority

2018-09-17 Thread Andreas Schwab
On Sep 17 2018, John David Anglin  wrote:

> On 2018-09-17 5:08 AM, Andreas Schwab wrote:
>>  PR rtl-optimization/85458
>>  * sel-sched.c (sel_target_adjust_priority): Remove wrong
>>  assertion.
>>
>> diff --git a/gcc/sel-sched.c b/gcc/sel-sched.c
>> index 824f1ec340..1be977d70b 100644
>> --- a/gcc/sel-sched.c
>> +++ b/gcc/sel-sched.c
>> @@ -3330,8 +3330,6 @@ sel_target_adjust_priority (expr_t expr)
>> else
>>   new_priority = priority;
>>   -  gcc_assert (new_priority >= 0);
>> -
>> /* If the priority has changed, adjust EXPR_PRIORITY_ADJ accordingly.  */
>> EXPR_PRIORITY_ADJ (expr) = new_priority - EXPR_PRIORITY (expr);
>>   
> I added the assert because the hppa implementation of
> TARGET_SCHED_ADJUST_PRIORITY assumes
> scheduling priorities are non negative.  If that is not the case, I tend
> to think this should be documented.
>
> It seems ia64 is the only target tripping on the assert.

The assertion only happens at -O3, see
.

Andreas.

-- 
Andreas Schwab, SUSE Labs, sch...@suse.de
GPG Key fingerprint = 0196 BAD8 1CE9 1970 F4BE  1748 E4D4 88E3 0EEA B9D7
"And now for something completely different."


Re: [PATCH][Middle-end][Version 2]Add a new option to control inlining only on static functions

2018-09-17 Thread Qing Zhao


> On Sep 14, 2018, at 3:45 PM, Andrew Pinski  wrote:
> 
> On Fri, Sep 14, 2018 at 1:42 PM Andrew Pinski  > wrote:
>> 
>> On Fri, Sep 14, 2018 at 1:34 PM Qing Zhao  wrote:
>>> 
>>> Hi,
>>> 
>>> this is the 2nd version of the patch to add a new first-class option
>>> 
>>> -finline-only-static
>>> 
>>> to guide inlining only on static functions.
>>> 
>>> -finline-only-static
>>>  By default, GCC inlines functions without considering whether they are 
>>> static
>>>  or not. This flag guides inliner to only inline static functions.
>>> 
>>>  Off by default.
>>> 
>>> The major purpose of this option is to provide a way for the user to 
>>> disable inlining of global functions.
>>> this is mainly to help online patching users to control the memory 
>>> consumption and ease the debugging.
>>> 
>>> please take a look and let me know any comments and suggestions:
>> 
>> How does this interact with -finline-functions?  Or rather isn't this
>> just the opposite of -finline-functions?
> 
> Seems to me, that this new option should just disable
> -finline-small-functions and -finline-functions.
> That is it is an alias with -fno-inline-small-functions -fno-inline-functions.

NO,  this option is NOT to disable -finline-small-funcitons and 
-finline-functions. 

when it’s specified, -finline-small-function and -finline-functions will ONLY 
inline static functions, all global functions will be disabled to 
be inlined.

Qing

> 
> My question comes up if someone uses -finline-functions
> -finline-only-static; or -finline-only-static -finline-functions.
> That it is a rather interesting combition of options but the
> interaction of the two options are not documented and seems backwards.
> 
> Thanks,
> Andrew Pinski
> 
>> 
>> Thanks,
>> Andrew
>> 
>>> 
>>> thanks a lot.
>>> 
>>> Qing
>>> 
>>> gcc/ChangeLog
>>> 
>>> +2018-09-13  Qing Zhao  
>>> +
>>> +   * cif-code.def (FUNCTION_EXTERN): New CIFCODE.
>>> +   * common.opt (-finline-only-static): New option.
>>> +   * doc/invoke.texi: Document -finline-only-static.
>>> +   * ipa-inline.c (can_inline_edge_p): Control inlining based on
>>> +   function's visibility.
>>> 
>>> gcc/testsuite/ChangeLog
>>> 
>>> +2018-09-13  Qing Zhao  
>>> +
>>> +   * gcc.dg/inline_only_static.c: New test.
>>> +
>>> 
>>> 
>>> 
 On Sep 11, 2018, at 10:12 AM, Qing Zhao  wrote:
 
 Hi,
 
 This is a simple patch to add a new first-class option
 
 -finline-visibility={all|extern|static}
 
 to finer control inlining based on function’s visibility.
 
 '-finline-visibility=[all|extern|static]'
By default, GCC inlines functions without considering their
visibility.  This flag allows finer control of inlining based on
their visibility.
 
The value 'extern' tells the compiler to only inline functions with
external visibility.  The value 'static' tells the compiler to only
inline functions with static visibility.  The value 'all' tells the
compilers to inline functions without considering their visibility.
 
The default value of '-finline-visibility' is 'all'.
 
 The major purpose of this option is to provide a way for the user
 to finer choose the inline candidates based on function’s visibility.
 For example, some online patching users might want to limit the inlining
 to only static functions to avoid patching the callers of global functions
 in order to control the memory consumption caused by online patching.
 
 let me know any comments and suggestions.
 
 thanks.
 
 Qing



[RFA] Minor cleanup to VRP/EVRP handling of deferred edge/switch optimization

2018-09-17 Thread Jeff Law
This is a relatively minor cleanup that I should have caught last cycle,
but somehow missed.

We have two structures TO_REMOVE_EDGES and TO_UPDATE_SWITCH_STMTS which
are used by the VRP/EVRP code to record edges to remove and switch
statements that need updating.

They are currently implemented as globals within tree-vrp.c with an
appropriate extern within tree-vrp.h.

The code to walk those vectors was only implemented in VRP, but we can
(and do) add to those vectors within EVRP.   So EVRP would detect
certain edges as dead or switches that were needed simplification, but
they were left as-is because EVRP never walked the vectors to do the
necessary cleanup.

This change pushes the vectors into the vr_values structure.  They're
initialized in the ctor and we verify they're properly cleaned up in the
dtor.  This obviously avoids the global object carrying state, but also
catches cases where we record that an optimization was possible but
failed to update the IL appropriately.

As a side effect, we don't need to bother with initializing and wiping
EDGE_IGNORE for jump threading in VRP.  We just mark the appropriate
edges at the same time we put them in TO_REMOVE_EDGES within vr_values.

vrp113.c is an example of where EVRP detected optimizations should be
possible, but failed to update the IL.  Given the test really wanted to
check VRP's behavior, I've disabled EVRP for that test.

Bootstrapped and regression tested on x86_64-linux-gnu.

OK for the trunk?

Jeff


* gimple-ssa-evrp.c (evrp_dom_walker::cleanup): Call
vr_values::cleanup_edges_and_switches.
* tree-vrp.c (to_remove_edges, to_update_switch_stmts): Moved into
vr_values class.
(identify_jump_threads): Remove EDGE_IGNORE handling.
(execute_vrp): Move handling of to_remove_edges and
to_update_switch_stmts into vr_values class member functions.
* tree-vrp.h (switch_update, to_remove_edges): Remove declarations.
(to_update_switch_stmts): Likewise.
* vr-values.c: Include cfghooks.h. 
(vr_values::vr_values): Initialize to_remove_edges and
to_update_switch_stmts.
(vr_values::~vr_values): Verify to_remove_edges and
to_update_switch_stmts are empty.
(vr_values::simplify_switch_using_ranges): Set EDGE_IGNORE as needed.
(vr_values::cleanup_edges_and_switches): New member function.
* vr-values.h (vr_values): Add cleanup_edges_and_switches member
function.  Add new data members.

* gcc.dg/tree-ssa/vrp113.c: Disable EVRP.

diff --git a/gcc/gimple-ssa-evrp.c b/gcc/gimple-ssa-evrp.c
index b9a054fd2ee..50e8adc1aad 100644
--- a/gcc/gimple-ssa-evrp.c
+++ b/gcc/gimple-ssa-evrp.c
@@ -287,6 +287,8 @@ evrp_dom_walker::cleanup (void)
   gimple *stmt = stmts_to_fixup.pop ();
   fixup_noreturn_call (stmt);
 }
+
+  evrp_folder.vr_values->cleanup_edges_and_switches ();
 }
 
 /* Main entry point for the early vrp pass which is a simplified non-iterative
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/vrp113.c 
b/gcc/testsuite/gcc.dg/tree-ssa/vrp113.c
index 5069fdfa784..ab8d91e0f10 100644
--- a/gcc/testsuite/gcc.dg/tree-ssa/vrp113.c
+++ b/gcc/testsuite/gcc.dg/tree-ssa/vrp113.c
@@ -1,5 +1,5 @@
 /* { dg-do compile } */
-/* { dg-options "-O2 -fdump-tree-vrp1" } */
+/* { dg-options "-O2 -fdump-tree-vrp1 -fdisable-tree-evrp" } */
 
 int f(int a) {
 switch (a & 1) {
diff --git a/gcc/tree-vrp.c b/gcc/tree-vrp.c
index 622ccbc2df7..ab222a385f6 100644
--- a/gcc/tree-vrp.c
+++ b/gcc/tree-vrp.c
@@ -121,10 +121,6 @@ static bitmap need_assert_for;
ASSERT_EXPRs for SSA name N_I should be inserted.  */
 static assert_locus **asserts_for;
 
-vec to_remove_edges;
-vec to_update_switch_stmts;
-
-
 /* Return the maximum value for TYPE.  */
 
 tree
@@ -6404,9 +6400,6 @@ vrp_dom_walker::after_dom_children (basic_block bb)
 static void
 identify_jump_threads (class vr_values *vr_values)
 {
-  int i;
-  edge e;
-
   /* Ugh.  When substituting values earlier in this pass we can
  wipe the dominance information.  So rebuild the dominator
  information as we need it within the jump threading code.  */
@@ -6419,11 +6412,6 @@ identify_jump_threads (class vr_values *vr_values)
  recompute it.  */
   mark_dfs_back_edges ();
 
-  /* Do not thread across edges we are about to remove.  Just marking
- them as EDGE_IGNORE will do.  */
-  FOR_EACH_VEC_ELT (to_remove_edges, i, e)
-e->flags |= EDGE_IGNORE;
-
   /* Allocate our unwinder stack to unwind any temporary equivalences
  that might be recorded.  */
   const_and_copies *equiv_stack = new const_and_copies ();
@@ -6437,10 +6425,6 @@ identify_jump_threads (class vr_values *vr_values)
   walker.vr_values = vr_values;
   walker.walk (cfun->cfg->x_entry_block_ptr);
 
-  /* Clear EDGE_IGNORE.  */
-  FOR_EACH_VEC_ELT (to_remove_edges, i, e)
-e->flags &= ~EDGE_IGNORE;
-
   /* We do not actually update the CFG or SSA graphs at this point as
  ASSERT_EXPRs are still in the 

Re: [committed] hppa: Fix canonicalize of method and void pointers in comparison operations

2018-09-17 Thread Jeff Law
On 9/14/18 5:44 PM, John David Anglin wrote:
> The attached change fixes the canonicalization of method and void
> pointers in comparisons against
> another method or function pointer on 32-bit hppa targets.  As far as I
> know, 32-bit hppa is the only
> architecture that requires function pointer canonicalization due to lazy
> binding.
> 
> Tested on hppa2.0w-hp-hpux11.11 and hppa-unknown-linux-gnu, GCC trunk
> and 8.  Committed to
> trunk and gcc-8 branch.
My recollection is HP's engineers were really concerned about the cost
of indirect calls and wanted to drop all the $$dyncall and associated
canonicalization of function pointers as they went to the 64 bit ABI.

This was made easier by no longer having to support MPE.  So they could
rely on more standard mechanisms to handle function pointers.

Jeff


[PATCH, i386]: Remove some x87 rounding patterns ...

2018-09-17 Thread Uros Bizjak
... and extend operands to XFmode instead.

2018-09-17  Uros Bizjak  

* config/i386/i386.md (truncxf2_i387_noop_unspec): Change
operand 0 predicate to nonimmediate operand.
(rint2_frndint): Remove insn pattern.
(rint2): Change operand 1 predicate to general_operand.
Extend operand 1 to XFmode and generate rintxf2 insn.
(frndintxf2_): Rename from frndint2_.
Do not use X87MODEF mode macro.
(frndintxf2__i387): Rename from
frndint2__i387.  Do not use X87MODEF mode macro.
(2): For non-SSE modes, extend operand 1
to XFmode and generate significandxf3 insn.

Bootstrapped and regression tested on x86_64-linux-gnu {,-m32}.

Committed to mainline SVN.

Uros.
Index: config/i386/i386.md
===
--- config/i386/i386.md (revision 264319)
+++ config/i386/i386.md (working copy)
@@ -15093,7 +15093,7 @@
 ;; all fancy i386 XFmode math functions.
 
 (define_insn "truncxf2_i387_noop_unspec"
-  [(set (match_operand:MODEF 0 "register_operand" "=f")
+  [(set (match_operand:MODEF 0 "nonimmediate_operand" "=mf")
(unspec:MODEF [(match_operand:XF 1 "register_operand" "f")]
UNSPEC_TRUNC_NOOP))]
   "TARGET_USE_FANCY_MATH_387"
@@ -16109,22 +16109,10 @@
(set_attr "znver1_decode" "vector")
(set_attr "mode" "XF")])
 
-(define_insn "rint2_frndint"
-  [(set (match_operand:MODEF 0 "register_operand" "=f")
-   (unspec:MODEF [(match_operand:MODEF 1 "register_operand" "0")]
- UNSPEC_FRNDINT))]
-  "TARGET_USE_FANCY_MATH_387"
-  "frndint"
-  [(set_attr "type" "fpspc")
-   (set_attr "znver1_decode" "vector")
-   (set_attr "mode" "")])
-
 (define_expand "rint2"
   [(use (match_operand:MODEF 0 "register_operand"))
-   (use (match_operand:MODEF 1 "register_operand"))]
-  "(TARGET_USE_FANCY_MATH_387
-&& (!(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
-   || TARGET_MIX_SSE_I387))
+   (use (match_operand:MODEF 1 "nonimmediate_operand"))]
+  "TARGET_USE_FANCY_MATH_387
|| (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)"
 {
   if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
@@ -16136,7 +16124,14 @@
ix86_expand_rint (operands[0], operands[1]);
 }
   else
-emit_insn (gen_rint2_frndint (operands[0], operands[1]));
+{
+  rtx op0 = gen_reg_rtx (XFmode);
+  rtx op1 = gen_reg_rtx (XFmode);
+
+  emit_insn (gen_extendxf2 (op1, operands[1]));
+  emit_insn (gen_rintxf2 (op0, op1));
+  emit_insn (gen_truncxf2_i387_noop_unspec (operands[0], op0));
+}
   DONE;
 })
 
@@ -16254,9 +16249,9 @@
 (UNSPEC_FIST_CEIL "CEIL")])
 
 ;; Rounding mode control word calculation could clobber FLAGS_REG.
-(define_insn_and_split "frndint2_"
-  [(set (match_operand:X87MODEF 0 "register_operand")
-   (unspec:X87MODEF [(match_operand:X87MODEF 1 "register_operand")]
+(define_insn_and_split "frndintxf2_"
+  [(set (match_operand:XF 0 "register_operand")
+   (unspec:XF [(match_operand:XF 1 "register_operand")]
   FRNDINT_ROUNDING))
(clobber (reg:CC FLAGS_REG))]
   "TARGET_USE_FANCY_MATH_387
@@ -16271,18 +16266,18 @@
   operands[2] = assign_386_stack_local (HImode, SLOT_CW_STORED);
   operands[3] = assign_386_stack_local (HImode, SLOT_CW_);
 
-  emit_insn (gen_frndint2__i387 (operands[0], operands[1],
-operands[2], operands[3]));
+  emit_insn (gen_frndintxf2__i387 (operands[0], operands[1],
+operands[2], operands[3]));
   DONE;
 }
   [(set_attr "type" "frndint")
(set_attr "i387_cw" "")
-   (set_attr "mode" "")])
+   (set_attr "mode" "XF")])
 
-(define_insn "frndint2__i387"
-  [(set (match_operand:X87MODEF 0 "register_operand" "=f")
-   (unspec:X87MODEF [(match_operand:X87MODEF 1 "register_operand" "0")]
-FRNDINT_ROUNDING))
+(define_insn "frndintxf2__i387"
+  [(set (match_operand:XF 0 "register_operand" "=f")
+   (unspec:XF [(match_operand:XF 1 "register_operand" "0")]
+  FRNDINT_ROUNDING))
(use (match_operand:HI 2 "memory_operand" "m"))
(use (match_operand:HI 3 "memory_operand" "m"))]
   "TARGET_USE_FANCY_MATH_387
@@ -16290,7 +16285,7 @@
   "fldcw\t%3\n\tfrndint\n\tfldcw\t%2"
   [(set_attr "type" "frndint")
(set_attr "i387_cw" "")
-   (set_attr "mode" "")])
+   (set_attr "mode" "XF")])
 
 (define_expand "xf2"
   [(parallel [(set (match_operand:XF 0 "register_operand")
@@ -16310,11 +16305,11 @@
|| TARGET_MIX_SSE_I387)
 && (flag_fp_int_builtin_inexact || !flag_trapping_math))
|| (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH
-   && (TARGET_SSE4_1 || !flag_trapping_math
-  || flag_fp_int_builtin_inexact))"
+   && (TARGET_SSE4_1 || flag_fp_int_builtin_inexact
+  || !flag_trapping_math))"
 {
   if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH
-  && (TARGET_SSE4_1 || !flag_trapping_math || flag_fp_int_builtin_inexact))
+  && (TARGET_SSE4_1 || flag_fp_int_builtin_inexact |

Re: [PATCH] Maybe fix PR87134

2018-09-17 Thread Iain Sandoe
Hi Rainer,

> On 6 Sep 2018, at 21:21, Rainer Orth  wrote:
> 

>> I can confirm the same, repeatable, fail on i686-darwin10 (and it
>> reproduces with -save-temps)
>> (and the vNULL change does not fix it there either) - don’t have access to
>> the machine now until
>> later in the month tho.
> 
> same here on i386-apple-darwin11, and I can easily fire off tests.

I just re-tried (i686-darwin10) - and the build gets further - it now OOMs at 
stage-2 build of insn-extract.c …
… the OOM goes away with -save-temps.  Only 2G of real memory on that box.

This is attempting an Ada-enabled bootstrap using GCC 5.4.
Trunk won’t bootstrap with apple-4.x because of 86739 .. so maybe I could build 
a more recent branch and try that as bootstrap / try to debug what’s causing 
the OOM ...

Did you get any further?

Iain



Re: vector _M_start and 0 offset

2018-09-17 Thread Jonathan Wakely

On 15/09/18 14:27 +0200, Marc Glisse wrote:

Hello,

as explained in the PR, the implementation of vector is weirdly 
wasteful. Preserving the ABI prevents from changing much for now, but 
this small tweak can help the compiler remove quite a bit of dead 
code.


I think most other direct uses of _M_start are in constructors where 
the offset has just been initialized to 0, so the compiler should 
already know enough there, but I may have missed a few relevant places 
where the same idea could be used.


I used C++11 syntax because I find it nicer, and the compiler accepts 
it in C++98 mode with just a warning, suppressed in a standard header.


Bootstrap+regtest on powerpc64le-unknown-linux-gnu.

2018-09-15  Marc Glisse  

   PR libstdc++/87258
* include/bits/stl_bvector.h (vector::begin(), vector::cbegin()):
Rebuild _M_start with an explicit 0 offset.

--
Marc Glisse



Index: include/bits/stl_bvector.h
===
--- include/bits/stl_bvector.h  (revision 264178)
+++ include/bits/stl_bvector.h  (working copy)
@@ -802,25 +802,25 @@ _GLIBCXX_BEGIN_NAMESPACE_CONTAINER
#endif

#if __cplusplus >= 201103L
  void
  assign(initializer_list __l)
  { _M_assign_aux(__l.begin(), __l.end(), random_access_iterator_tag()); }
#endif

  iterator
  begin() _GLIBCXX_NOEXCEPT
-  { return this->_M_impl._M_start; }
+  { return { this->_M_impl._M_start._M_p, 0 }; }

  const_iterator
  begin() const _GLIBCXX_NOEXCEPT
-  { return this->_M_impl._M_start; }
+  { return { this->_M_impl._M_start._M_p, 0 }; }


Won't this fail to compile in C++98 mode?




Re: [committed] hppa: Fix canonicalize of method and void pointers in comparison operations

2018-09-17 Thread John David Anglin

On 2018-09-17 11:02 AM, Jeff Law wrote:

On 9/14/18 5:44 PM, John David Anglin wrote:

The attached change fixes the canonicalization of method and void
pointers in comparisons against
another method or function pointer on 32-bit hppa targets.  As far as I
know, 32-bit hppa is the only
architecture that requires function pointer canonicalization due to lazy
binding.

Tested on hppa2.0w-hp-hpux11.11 and hppa-unknown-linux-gnu, GCC trunk
and 8.  Committed to
trunk and gcc-8 branch.

My recollection is HP's engineers were really concerned about the cost
of indirect calls and wanted to drop all the $$dyncall and associated
canonicalization of function pointers as they went to the 64 bit ABI.
As things stand, the linker and glibc would have to change to use OPDs.  
I doubt very much
much that this can be done without breaking compatibility with existing 
applications.  In linux,
there is a problem with the trampoline design used for lazy binding.  Of 
course, the code to

canonicalize function pointers is pretty horrible and fragile.

Helge has been pushing to get the 64-bit ABI working on linux.  I tend 
to think it would be easier
to get a 64-bit glibc working than solve the above.  Helge has the 
kernel interface more or less

done.

Dave

--
John David Anglin  dave.ang...@bell.net



Re: [PATCH v3] combine: perform jump threading at the end

2018-09-17 Thread Segher Boessenkool
On Mon, Sep 17, 2018 at 10:50:58AM +0200, Ilya Leoshkevich wrote:
> > Am 14.09.2018 um 23:35 schrieb Segher Boessenkool 
> > :
> > Could you please show generated code before and after this patch?
> > I mean generated assembler code.  What -S gives you.
> 
> Before:
> 
> foo4:
> .LFB0:
>   .cfi_startproc
>   lt  %r1,0(%r2)
>   jne .L2
>   lhi %r3,1
>   cs  %r1,%r3,0(%r2)
> .L2:
>   jne .L5
>   br  %r14
> .L5:
>   jg  bar
> 
> After:
> 
> foo4:
> .LFB0:
>   .cfi_startproc
>   lt  %r1,0(%r2)
>   jne .L4
>   lhi %r3,1
>   cs  %r1,%r3,0(%r2)
>   jne .L4
>   br  %r14
> .L4:
>   jg  bar

Ah.  And a compiler of some weeks old gives

foo4:
.LFB0:
.cfi_startproc
lhi %r3,0
lhi %r4,1
cs  %r3,%r4,0(%r2)
jne .L4
br  %r14
.L4:
jg  bar

so this is all caused by the recent optimisation that avoids the "cs" if
it can.

> > Why does the existing jump threading not work for you; should it happen
> > at another time?
> 
> We call cleanup_cfg (CLEANUP_THREADING) only once - during the „jump“
> pass, which happens before combine.  There is also „jump2“ pass, which
> happens afterwards, and after discussion with Ulrich Weigand I tried to
> move jump threading there.  While this change had the desired effect on
> the testcase, the code got worse in another places. Example from
> GemsFDTD:
> 
> Before:
> 
> 103e96c:   b9 04 00 31 lgr %r3,%r1
> 103e970:   a7 18 00 00 lhi %r1,0
> 103e974:   e3 20 f0 d0 00 0d   dsg %r2,208(%r15)
> 103e97a:   b9 20 00 c3 cgr %r12,%r3
> 103e97e:   a7 29 ff ff lghi%r2,-1
> 103e982:   ec 12 00 01 00 42   lochih  %r1,1
> 103e988:   e3 20 f0 f8 00 82   xg  %r2,248(%r15)
> 103e98e:   1a 15   ar  %r1,%r5
> 103e990:   b9 e9 c0 72 sgrk%r7,%r2,%r12
> 103e994:   b3 c1 00 e2 ldgr%f14,%r2
> 103e998:   b9 f8 a0 21 ark %r2,%r1,%r10
> 103e99c:   12 99   ltr %r9,%r9
> 103e99e:   a7 18 00 00 lhi %r1,0
> 103e9a2:   ec 1c 00 01 00 42   lochile %r1,1
> 103e9a8:   50 10 f1 28 st  %r1,296(%r15)
> 103e9ac:   c2 9d 00 00 00 00   cfi %r9,0
> 103e9b2:   c0 c4 00 01 28 4e   jgle1063a4e
> 103e9b8:   e5 4c f1 08 00 00   mvhi264(%r15),0
> 103e9be:   eb 14 00 03 00 0d   sllg%r1,%r4,3
> 
> After:
> 
> 103e96c:   b9 04 00 31 lgr %r3,%r1   
> 103e970:   e5 4c f1 18 00 01   mvhi280(%r15),1
> 103e976:   a7 18 00 00 lhi %r1,0 
> 103e97a:   e3 20 f0 d0 00 0d   dsg %r2,208(%r15) 
> 103e980:   b9 20 00 c3 cgr %r12,%r3  
> 103e984:   a7 29 ff ff lghi%r2,-1
> 103e988:   ec 12 00 01 00 42   lochih  %r1,1 
> 103e98e:   e3 20 f0 f8 00 82   xg  %r2,248(%r15) 
> 103e994:   1a 15   ar  %r1,%r5   
> 103e996:   b3 c1 00 e2 ldgr%f14,%r2
> 103e99a:   b9 e9 c0 72 sgrk%r7,%r2,%r12  
> 103e99e:   b9 f8 a0 21 ark %r2,%r1,%r10  
> 103e9a2:   c2 9d 00 00 00 00   cfi %r9,0 
> 103e9a8:   c0 c4 00 01 28 51   jgle1063a4a
> 103e9ae:   e5 4c f1 18 00 00   mvhi280(%r15),0
> 103e9b4:   e5 4c f1 08 00 00   mvhi264(%r15),0   
> 103e9ba:   eb 14 00 03 00 0d   sllg%r1,%r4,3 
> 
> Diff:
> 
> lgr %rn,%rn
> -   mvhim(%rx),1
> lhi %rn,0
> 
> ar  %rn,%rn
> -   ldgr%fn,%rn
> sgrk%rn,%rn,%rn
> +   ldgr%fn,%rn
> ark %rn,%rn,%rn
> +   ltr %rn,%rn
> +   lhi %rn,0
> +   lochile %rn,1
> +   st  %rn,m(%rx)
> cfi %rn,0
> 
> mvhim(%rx),0
> -   mvhim(%rx),0
> sllg%rn,%rn,3
> 
> The code used to compare %r9 with 0, jump based on the comparison
> result, and set the local variable in the branch.  Now it compares %r9
> with 0 twice - I didn’t look at the details yet, but apparently we’re
> missing some optimization because jump threading is done too late.

Yeah, jump2 is quite late.  I think you should do it before postreload_cse
or similar.

Btw, what percentage of files (or subroutines) does jump threading run
after combine, with your patch?


Segher


Re: [PATCH] look harder for MEM_REF operand equality to avoid -Wstringop-truncation (PR 84561)

2018-09-17 Thread Jeff Law
On 9/15/18 2:14 AM, Bernd Edlinger wrote:
> On 9/14/18, Martin Sebor wrote:
>> As I said above, this happens during the dom walk in the ccp
>> pass:
>>
>>   substitute_and_fold_dom_walker walker (CDI_DOMINATORS, this);
>>   walker.walk (ENTRY_BLOCK_PTR_FOR_FN (cfun));
>>
>> The warning is issued during the walker.walk() call as
>> strncpy is being folded into memcpy.  The prior assignments are
>> only propagated later, when the next statement after the strncpy
>> call is reached.  It happens in
>> substitute_and_fold_dom_walker::before_dom_children(). So during
>> the strncpy folding we see the next statement as:
>>
>>   MEM[(struct S *)_1].a[n_7] = 0;
>>
>> After the strncpy call is transformed to memcpy, the assignment
>> above is transformed to
>>
>>   MEM[(struct S *)_8].a[3] = 0;
>>
>>
>>> If they're only discovered as copies within the pass where you're trying
>>> to issue the diagnostic, then you'd want to see if the pass has any
>>> internal structures that tell you about equivalences.
>>
>>
>> I don't know if this is possible.  I don't see any APIs in
>> tree-ssa-propagate.h that would let me query the internal data
>> somehow to find out during folding (when the warning is issued).
> 
> 
> Well,
> 
> if I see this right, the CCP is doing tree transformations
> while from the folding of strncpy the predicate maybe_diag_stxncpy_trunc
> is called, and sees inconsistent information, in the tree,
> and therefore it issues a warning.
> 
> I understand that walking the references is fragile at least
> in this state.
> 
> But why not just prevent warnings when this is called from CCP?
> 
> 
> Like this?
> 
> Bootstrapped and reg-tested on x86_64-pc-linux-gnu.
> Is it OK for trunk?
No.  That's just hacking around the real problem.

jeff


Re: [PATCH] Cleanup strcpy/stpcpy no nul warning code

2018-09-17 Thread Jeff Law
On 9/16/18 1:58 PM, Bernd Edlinger wrote:
> Hi,
> 
> this is a cleanup of the recently added strlen/strcpy/stpcpy
> no nul warning code.
> 
> Most importantly it moves the SSA_NAME handling from
> unterminated_array to string_constant, thereby fixing
> another round of xfails in the strlen and stpcpy test cases.
> 
> I need to say that the fix for bug 86622 is relying in
> type info on the pointer which is probably not safe in
> GIMPLE in the light of the recent discussion.
> 
> I had to add two further exceptions, which should
> be safe in general: that is a pointer arithmentic on a string
> literal is okay, and arithmetic on a string constant
> that is exactly the size of the whole DECL, cannot
> access an adjacent member.
> 
> 
> Bootstrapped and reg-tested on x86_64-pc-linux-gnu.
> Is it OK for trunk?
> 
> 
> Thanks
> Bernd.
> 
> 
> patch-cleanup-no-nul.diff
> 
> gcc:
> 2018-09-16  Bernd Edlinger  
> 
>   * builtins.h (unterminated_array): Remove prototype.
> * builtins.c (unterminated_array): Simplify.  Make static.
> (expand_builtin_strcpy_args): Don't use TREE_NO_WARNING here.
>   (expand_builtin_stpcpy_1): Remove warn_string_no_nul without effect.
>   * expr.c (string_constant): Handle SSA_NAME.  Add more exceptions
>   where pointer arithmetic is safe.
>   * gimple-fold.c (get_range_strlen): Handle nonstr like in c_strlen.
>   (get_max_strlen): Remove the unnecessary mynonstr handling.
>   (gimple_fold_builtin_strcpy): Simplify.
>   (gimple_fold_builtin_stpcpy): Simplify.
>   (gimple_fold_builtin_sprintf): Remove NO_WARNING propagation
>   without effect.
>   (gimple_fold_builtin_strlen): Simplify.
> 
> testsuite:
> 2018-09-16  Bernd Edlinger  
> 
>   * gcc.dg/warn-stpcpy-no-nul.c: Remove xfails.
>   * gcc.dg/warn-strlen-no-nul.c: Remove xfails.
> 
> Index: gcc/builtins.c
> ===
> --- gcc/builtins.c(revision 264342)
> +++ gcc/builtins.c(working copy)
> @@ -567,31 +567,12 @@ warn_string_no_nul (location_t loc, const char *fn
> the declaration of the object of which the array is a member or
> element.  Otherwise return null.  */
>  
> -tree
> +static tree
>  unterminated_array (tree exp)
>  {
> -  if (TREE_CODE (exp) == SSA_NAME)
> -{
> -  gimple *stmt = SSA_NAME_DEF_STMT (exp);
> -  if (!is_gimple_assign (stmt))
> - return NULL_TREE;
> -
> -  tree rhs1 = gimple_assign_rhs1 (stmt);
> -  tree_code code = gimple_assign_rhs_code (stmt);
> -  if (code == ADDR_EXPR
> -   && TREE_CODE (TREE_OPERAND (rhs1, 0)) == ARRAY_REF)
> - rhs1 = rhs1;
> -  else if (code != POINTER_PLUS_EXPR)
> - return NULL_TREE;
> -
> -  exp = rhs1;
> -}
> -
>tree nonstr = NULL;
> -  if (c_strlen (exp, 1, &nonstr, 1) == NULL && nonstr)
> -return nonstr;
> -
> -  return NULL_TREE;
> +  c_strlen (exp, 1, &nonstr);
> +  return nonstr;
>  }
Sigh.  This is going to conflict with patch #6 from Martin's kit.



>  
>  /* Compute the length of a null-terminated character string or wide
> @@ -3936,8 +3917,7 @@ expand_builtin_strcpy_args (tree exp, tree dest, t
>if (tree nonstr = unterminated_array (src))
>  {
>/* NONSTR refers to the non-nul terminated constant array.  */
> -  if (!TREE_NO_WARNING (exp))
> - warn_string_no_nul (EXPR_LOCATION (exp), "strcpy", src, nonstr);
> +  warn_string_no_nul (EXPR_LOCATION (exp), "strcpy", src, nonstr);
>return NULL_RTX;
>  }
There's also some target dependencies to be concerned about.  I'll have
to dig out the thread, but in general removing these TREE_NO_WARNING
checks is probably a bad idea -- you're likely introducing redundant
warnings on one or more targets (but not x86).


There may be things in there we want to take.  But my focus is going to
be on getting the #4 and #6 patches from Martin's kit resolved (while he
works on the string length range stuff).

Jeff


Re: [PATCH] look harder for MEM_REF operand equality to avoid -Wstringop-truncation (PR 84561)

2018-09-17 Thread Richard Biener
On September 17, 2018 7:24:16 PM GMT+02:00, Jeff Law  wrote:
>On 9/15/18 2:14 AM, Bernd Edlinger wrote:
>> On 9/14/18, Martin Sebor wrote:
>>> As I said above, this happens during the dom walk in the ccp
>>> pass:
>>>
>>>   substitute_and_fold_dom_walker walker (CDI_DOMINATORS, this);
>>>   walker.walk (ENTRY_BLOCK_PTR_FOR_FN (cfun));
>>>
>>> The warning is issued during the walker.walk() call as
>>> strncpy is being folded into memcpy.  The prior assignments are
>>> only propagated later, when the next statement after the strncpy
>>> call is reached.  It happens in
>>> substitute_and_fold_dom_walker::before_dom_children(). So during
>>> the strncpy folding we see the next statement as:
>>>
>>>   MEM[(struct S *)_1].a[n_7] = 0;
>>>
>>> After the strncpy call is transformed to memcpy, the assignment
>>> above is transformed to
>>>
>>>   MEM[(struct S *)_8].a[3] = 0;
>>>
>>>
 If they're only discovered as copies within the pass where
>you're trying
 to issue the diagnostic, then you'd want to see if the pass has
>any
 internal structures that tell you about equivalences.
>>>
>>>
>>> I don't know if this is possible.  I don't see any APIs in
>>> tree-ssa-propagate.h that would let me query the internal data
>>> somehow to find out during folding (when the warning is issued).
>> 
>> 
>> Well,
>> 
>> if I see this right, the CCP is doing tree transformations
>> while from the folding of strncpy the predicate
>maybe_diag_stxncpy_trunc
>> is called, and sees inconsistent information, in the tree,
>> and therefore it issues a warning.
>> 
>> I understand that walking the references is fragile at least
>> in this state.
>> 
>> But why not just prevent warnings when this is called from CCP?
>> 
>> 
>> Like this?
>> 
>> Bootstrapped and reg-tested on x86_64-pc-linux-gnu.
>> Is it OK for trunk?
>No.  That's just hacking around the real problem.

The real problem is emitting diagnostics from folding code. 

Richard. 

>
>jeff



Re: [PATCH] Maybe fix PR87134

2018-09-17 Thread Rainer Orth
Hi Iain,

> Hi Rainer,
>
>> On 6 Sep 2018, at 21:21, Rainer Orth  wrote:
>> 
>
>>> I can confirm the same, repeatable, fail on i686-darwin10 (and it
>>> reproduces with -save-temps)
>>> (and the vNULL change does not fix it there either) - don’t have access to
>>> the machine now until
>>> later in the month tho.
>> 
>> same here on i386-apple-darwin11, and I can easily fire off tests.
>
> I just re-tried (i686-darwin10) - and the build gets further - it now OOMs
> at stage-2 build of insn-extract.c …
> … the OOM goes away with -save-temps.  Only 2G of real memory on that box.
>
> This is attempting an Ada-enabled bootstrap using GCC 5.4.
> Trunk won’t bootstrap with apple-4.x because of 86739 .. so maybe I could
> build a more recent branch and try that as bootstrap / try to debug what’s
> causing the OOM ...
>
> Did you get any further?

I did: I've been using gcc 7.1.0 (using 64-bit binaries with -m32) as
host compiler for a successfull all,ada bootstrap on
i386-apple-darwin11.4.2.  The machine (a Mac mini) has 2 GB RAM, too.

Rainer

-- 
-
Rainer Orth, Center for Biotechnology, Bielefeld University


Re: [PATCH 22/25] Add dg-require-effective-target exceptions

2018-09-17 Thread Mike Stump
On Sep 5, 2018, at 4:52 AM, a...@codesourcery.com wrote:
> There are a number of tests that fail because they assume that exceptions are
> available, but GCN does not support them, yet.

So, generally we don't goop up the testsuite with the day to day port stuff 
when it is being developed.  If the port is finished, and EH can't be done, 
this type of change is fine.  If someone plans on doing it in the next 5 years 
and the port is still being developed, there is likely little reason to do 
this.  People that track regressions do so by differencing, and that easily 
handles massive amounts of failures seamlessly.

So, my question would be, has is just not been worked on yet, or, is it 
basically impossible to ever do it?

[C++ Patch PING] [C++ Patch] PR 85065 ("[concepts] ICE with invalid use of a concept")

2018-09-17 Thread Paolo Carlini

Hi again,

On 9/3/18 10:59 PM, Paolo Carlini wrote:
in this error-recovery ICE, upon the error make_constrained_auto 
assigns error_mark_node to PLACEHOLDER_TYPE_CONSTRAINTS (type) which 
then causes a crash later when hash_placeholder_constraint is called 
on it. I think we should cope with this somehow, I believe that 
consistency with the way we use error_mark_node in this part of the 
front-end prevents us from avoiding to assign the error_mark_node in 
the first place and, for the reasons explained in my previous patch, 
we want to unconditionally call make_constrained_auto. This said, 
catching in practice the error_mark_node would normally mean 
renouncing to the pattern 'if (tree c = ...)' which we lately appear 
to like a lot and seems indeed neat. Thus I'm wondering if we want 
instead to add a macro like ERROR_AS_NULL, which of course would be 
also useful in many other places - essentially in all the 
circumstances where we want to check for a kosher node, thus neither 
null nor error_mark_node. What do you think? What about the name, in 
case? Tested x86_64-linux.


Today I reviewed again this issue, for which I sent a tentative patch a 
couple of weeks ago. All in all, I still believe that is the right place 
to catch the error_mark_node and avoid ICE-ing later, the quick 
rationale being that PLACEHOLDER_TYPE_CONSTRAINTS can be error_mark_node 
for other reasons too. As regards the ERROR_AS_NULL idea, I'm still not 
sure, on one hand it would allow for more compact and neat code in some 
cases, on the other hand could be seen as some sort of obfuscation - 
well, some people out there consider an obfuscation the very 'if (c 
=...)' pattern ;) Anyway, I'm attaching the normal versions of the fix, 
which, per a recent message from Jason, probably is almost obvious...


Thanks, Paolo.

/

Index: cp/pt.c
===
--- cp/pt.c (revision 264362)
+++ cp/pt.c (working copy)
@@ -26121,7 +26121,8 @@ struct auto_hash : default_hash_traits
 inline hashval_t
 auto_hash::hash (tree t)
 {
-  if (tree c = PLACEHOLDER_TYPE_CONSTRAINTS (t))
+  tree c = PLACEHOLDER_TYPE_CONSTRAINTS (t);
+  if (c && c != error_mark_node)
 /* Matching constrained-type-specifiers denote the same template
parameter, so hash the constraint.  */
 return hash_placeholder_constraint (c);
@@ -26880,50 +26881,53 @@ do_auto_deduction (tree type, tree init, tree auto
 
   /* Check any placeholder constraints against the deduced type. */
   if (flag_concepts && !processing_template_decl)
-if (tree constr = PLACEHOLDER_TYPE_CONSTRAINTS (auto_node))
-  {
-/* Use the deduced type to check the associated constraints. If we
-   have a partial-concept-id, rebuild the argument list so that
-   we check using the extra arguments. */
-gcc_assert (TREE_CODE (constr) == CHECK_CONSTR);
-tree cargs = CHECK_CONSTR_ARGS (constr);
-if (TREE_VEC_LENGTH (cargs) > 1)
-  {
-cargs = copy_node (cargs);
-TREE_VEC_ELT (cargs, 0) = TREE_VEC_ELT (targs, 0);
-  }
-else
-  cargs = targs;
-if (!constraints_satisfied_p (constr, cargs))
-  {
-if (complain & tf_warning_or_error)
-  {
-   auto_diagnostic_group d;
-switch (context)
-  {
-  case adc_unspecified:
- case adc_unify:
-error("placeholder constraints not satisfied");
-break;
-  case adc_variable_type:
- case adc_decomp_type:
-error ("deduced initializer does not satisfy "
-   "placeholder constraints");
-break;
-  case adc_return_type:
-error ("deduced return type does not satisfy "
-   "placeholder constraints");
-break;
-  case adc_requirement:
-   error ("deduced expression type does not satisfy "
-   "placeholder constraints");
-break;
-  }
-diagnose_constraints (input_location, constr, targs);
-  }
-return error_mark_node;
-  }
-  }
+{
+  tree constr = PLACEHOLDER_TYPE_CONSTRAINTS (auto_node);
+  if (constr && constr != error_mark_node)
+   {
+ /* Use the deduced type to check the associated constraints. If we
+have a partial-concept-id, rebuild the argument list so that
+we check using the extra arguments. */
+ gcc_assert (TREE_CODE (constr) == CHECK_CONSTR);
+ tree cargs = CHECK_CONSTR_ARGS (constr);
+ if (TREE_VEC_LENGTH (cargs) > 1)
+   {
+ cargs = copy_node (cargs);
+ TREE_VEC_ELT (cargs, 0) = TREE_VEC_ELT (targs, 0);
+ 

Re: vector _M_start and 0 offset

2018-09-17 Thread Marc Glisse

On Mon, 17 Sep 2018, Jonathan Wakely wrote:


On 15/09/18 14:27 +0200, Marc Glisse wrote:

Hello,

as explained in the PR, the implementation of vector is weirdly 
wasteful. Preserving the ABI prevents from changing much for now, but this 
small tweak can help the compiler remove quite a bit of dead code.


I think most other direct uses of _M_start are in constructors where the 
offset has just been initialized to 0, so the compiler should already know 
enough there, but I may have missed a few relevant places where the same 
idea could be used.


I used C++11 syntax because I find it nicer, and the compiler accepts it in 
C++98 mode with just a warning, suppressed in a standard header.


^^


Bootstrap+regtest on powerpc64le-unknown-linux-gnu.

2018-09-15  Marc Glisse  

   PR libstdc++/87258
* include/bits/stl_bvector.h (vector::begin(), vector::cbegin()):
Rebuild _M_start with an explicit 0 offset.

--
Marc Glisse



Index: include/bits/stl_bvector.h
===
--- include/bits/stl_bvector.h  (revision 264178)
+++ include/bits/stl_bvector.h  (working copy)
@@ -802,25 +802,25 @@ _GLIBCXX_BEGIN_NAMESPACE_CONTAINER
#endif

#if __cplusplus >= 201103L
  void
  assign(initializer_list __l)
  { _M_assign_aux(__l.begin(), __l.end(), 
random_access_iterator_tag()); }

#endif

  iterator
  begin() _GLIBCXX_NOEXCEPT
-  { return this->_M_impl._M_start; }
+  { return { this->_M_impl._M_start._M_p, 0 }; }

  const_iterator
  begin() const _GLIBCXX_NOEXCEPT
-  { return this->_M_impl._M_start; }
+  { return { this->_M_impl._M_start._M_p, 0 }; }


Won't this fail to compile in C++98 mode?


"I used C++11 syntax because I find it nicer, and the compiler accepts it 
in C++98 mode with just a warning, suppressed in a standard header."


Even with -Wsystem-headers I don't get a warning, I have to precompile 
with -P -E then compile the result to get "warning: extended initializer 
lists only available with -std=c++11 or -std=gnu++11".


--
Marc Glisse


Re: [PATCH] Cleanup strcpy/stpcpy no nul warning code

2018-09-17 Thread Bernd Edlinger
On 09/17/18 19:33, Jeff Law wrote:
> On 9/16/18 1:58 PM, Bernd Edlinger wrote:
>> Hi,
>>
>> this is a cleanup of the recently added strlen/strcpy/stpcpy
>> no nul warning code.
>>
>> Most importantly it moves the SSA_NAME handling from
>> unterminated_array to string_constant, thereby fixing
>> another round of xfails in the strlen and stpcpy test cases.
>>
>> I need to say that the fix for bug 86622 is relying in
>> type info on the pointer which is probably not safe in
>> GIMPLE in the light of the recent discussion.
>>
>> I had to add two further exceptions, which should
>> be safe in general: that is a pointer arithmentic on a string
>> literal is okay, and arithmetic on a string constant
>> that is exactly the size of the whole DECL, cannot
>> access an adjacent member.
>>
>>
>> Bootstrapped and reg-tested on x86_64-pc-linux-gnu.
>> Is it OK for trunk?
>>
>>
>> Thanks
>> Bernd.
>>
>>
>> patch-cleanup-no-nul.diff
>>
>> gcc:
>> 2018-09-16  Bernd Edlinger  
>>
>>  * builtins.h (unterminated_array): Remove prototype.
>>  * builtins.c (unterminated_array): Simplify.  Make static.
>>  (expand_builtin_strcpy_args): Don't use TREE_NO_WARNING here.
>>  (expand_builtin_stpcpy_1): Remove warn_string_no_nul without effect.
>>  * expr.c (string_constant): Handle SSA_NAME.  Add more exceptions
>>  where pointer arithmetic is safe.
>>  * gimple-fold.c (get_range_strlen): Handle nonstr like in c_strlen.
>>  (get_max_strlen): Remove the unnecessary mynonstr handling.
>>  (gimple_fold_builtin_strcpy): Simplify.
>>  (gimple_fold_builtin_stpcpy): Simplify.
>>  (gimple_fold_builtin_sprintf): Remove NO_WARNING propagation
>>  without effect.
>>  (gimple_fold_builtin_strlen): Simplify.
>>
>> testsuite:
>> 2018-09-16  Bernd Edlinger  
>>
>>  * gcc.dg/warn-stpcpy-no-nul.c: Remove xfails.
>>  * gcc.dg/warn-strlen-no-nul.c: Remove xfails.
>>
>> Index: gcc/builtins.c
>> ===
>> --- gcc/builtins.c   (revision 264342)
>> +++ gcc/builtins.c   (working copy)
>> @@ -567,31 +567,12 @@ warn_string_no_nul (location_t loc, const char *fn
>>  the declaration of the object of which the array is a member or
>>  element.  Otherwise return null.  */
>>   
>> -tree
>> +static tree
>>   unterminated_array (tree exp)
>>   {
>> -  if (TREE_CODE (exp) == SSA_NAME)
>> -{
>> -  gimple *stmt = SSA_NAME_DEF_STMT (exp);
>> -  if (!is_gimple_assign (stmt))
>> -return NULL_TREE;
>> -
>> -  tree rhs1 = gimple_assign_rhs1 (stmt);
>> -  tree_code code = gimple_assign_rhs_code (stmt);
>> -  if (code == ADDR_EXPR
>> -  && TREE_CODE (TREE_OPERAND (rhs1, 0)) == ARRAY_REF)
>> -rhs1 = rhs1;
>> -  else if (code != POINTER_PLUS_EXPR)
>> -return NULL_TREE;
>> -
>> -  exp = rhs1;
>> -}
>> -
>> tree nonstr = NULL;
>> -  if (c_strlen (exp, 1, &nonstr, 1) == NULL && nonstr)
>> -return nonstr;
>> -
>> -  return NULL_TREE;
>> +  c_strlen (exp, 1, &nonstr);
>> +  return nonstr;
>>   }
> Sigh.  This is going to conflict with patch #6 from Martin's kit.
> 

Sorry, it just feels wrong to do this folding here instead of in
string_constant.

I think the handling of POINTER_PLUS_EXPR above, is faulty,
because it is ignoring the offset parameter, which typically
contains the offset part, may add an offset to a different
structure member or another array index.  That is what happened
in PR 86622.

So you are likely looking at the wrong index, or even the wrong
structure member.


> 
>>   
>>   /* Compute the length of a null-terminated character string or wide
>> @@ -3936,8 +3917,7 @@ expand_builtin_strcpy_args (tree exp, tree dest, t
>> if (tree nonstr = unterminated_array (src))
>>   {
>> /* NONSTR refers to the non-nul terminated constant array.  */
>> -  if (!TREE_NO_WARNING (exp))
>> -warn_string_no_nul (EXPR_LOCATION (exp), "strcpy", src, nonstr);
>> +  warn_string_no_nul (EXPR_LOCATION (exp), "strcpy", src, nonstr);
>> return NULL_RTX;
>>   }
> There's also some target dependencies to be concerned about.  I'll have
> to dig out the thread, but in general removing these TREE_NO_WARNING
> checks is probably a bad idea -- you're likely introducing redundant
> warnings on one or more targets (but not x86).
> 

I just wondered why use if (!TREE_NO_WARNING(exp)) when warn_string_no_nul
uses if (!TREE_NO_WARNING(src)), and sets TREE_NO_WARNING(src) on success.

> 
> There may be things in there we want to take.  But my focus is going to
> be on getting the #4 and #6 patches from Martin's kit resolved (while he
> works on the string length range stuff).
> 

Oh, you are aware that I have a proposed patch on the string length range
vs. gimple semantics here:

https://gcc.gnu.org/ml/gcc-patches/2018-09/msg00805.html


Bernd.


Re: [PATCH] Cleanup strcpy/stpcpy no nul warning code

2018-09-17 Thread Jeff Law
On 9/17/18 12:20 PM, Bernd Edlinger wrote:
> On 09/17/18 19:33, Jeff Law wrote:
>> On 9/16/18 1:58 PM, Bernd Edlinger wrote:
>>> Hi,
>>>
>>> this is a cleanup of the recently added strlen/strcpy/stpcpy
>>> no nul warning code.
>>>
>>> Most importantly it moves the SSA_NAME handling from
>>> unterminated_array to string_constant, thereby fixing
>>> another round of xfails in the strlen and stpcpy test cases.
>>>
>>> I need to say that the fix for bug 86622 is relying in
>>> type info on the pointer which is probably not safe in
>>> GIMPLE in the light of the recent discussion.
>>>
>>> I had to add two further exceptions, which should
>>> be safe in general: that is a pointer arithmentic on a string
>>> literal is okay, and arithmetic on a string constant
>>> that is exactly the size of the whole DECL, cannot
>>> access an adjacent member.
>>>
>>>
>>> Bootstrapped and reg-tested on x86_64-pc-linux-gnu.
>>> Is it OK for trunk?
>>>
>>>
>>> Thanks
>>> Bernd.
>>>
>>>
>>> patch-cleanup-no-nul.diff
>>>
>>> gcc:
>>> 2018-09-16  Bernd Edlinger  
>>>
>>> * builtins.h (unterminated_array): Remove prototype.
>>>  * builtins.c (unterminated_array): Simplify.  Make static.
>>>  (expand_builtin_strcpy_args): Don't use TREE_NO_WARNING here.
>>> (expand_builtin_stpcpy_1): Remove warn_string_no_nul without effect.
>>> * expr.c (string_constant): Handle SSA_NAME.  Add more exceptions
>>> where pointer arithmetic is safe.
>>> * gimple-fold.c (get_range_strlen): Handle nonstr like in c_strlen.
>>> (get_max_strlen): Remove the unnecessary mynonstr handling.
>>> (gimple_fold_builtin_strcpy): Simplify.
>>> (gimple_fold_builtin_stpcpy): Simplify.
>>> (gimple_fold_builtin_sprintf): Remove NO_WARNING propagation
>>> without effect.
>>> (gimple_fold_builtin_strlen): Simplify.
>>>
>>> testsuite:
>>> 2018-09-16  Bernd Edlinger  
>>>
>>> * gcc.dg/warn-stpcpy-no-nul.c: Remove xfails.
>>> * gcc.dg/warn-strlen-no-nul.c: Remove xfails.
>>>
>>> Index: gcc/builtins.c
>>> ===
>>> --- gcc/builtins.c  (revision 264342)
>>> +++ gcc/builtins.c  (working copy)
>>> @@ -567,31 +567,12 @@ warn_string_no_nul (location_t loc, const char *fn
>>>  the declaration of the object of which the array is a member or
>>>  element.  Otherwise return null.  */
>>>   
>>> -tree
>>> +static tree
>>>   unterminated_array (tree exp)
>>>   {
>>> -  if (TREE_CODE (exp) == SSA_NAME)
>>> -{
>>> -  gimple *stmt = SSA_NAME_DEF_STMT (exp);
>>> -  if (!is_gimple_assign (stmt))
>>> -   return NULL_TREE;
>>> -
>>> -  tree rhs1 = gimple_assign_rhs1 (stmt);
>>> -  tree_code code = gimple_assign_rhs_code (stmt);
>>> -  if (code == ADDR_EXPR
>>> - && TREE_CODE (TREE_OPERAND (rhs1, 0)) == ARRAY_REF)
>>> -   rhs1 = rhs1;
>>> -  else if (code != POINTER_PLUS_EXPR)
>>> -   return NULL_TREE;
>>> -
>>> -  exp = rhs1;
>>> -}
>>> -
>>> tree nonstr = NULL;
>>> -  if (c_strlen (exp, 1, &nonstr, 1) == NULL && nonstr)
>>> -return nonstr;
>>> -
>>> -  return NULL_TREE;
>>> +  c_strlen (exp, 1, &nonstr);
>>> +  return nonstr;
>>>   }
>> Sigh.  This is going to conflict with patch #6 from Martin's kit.
>>
> 
> Sorry, it just feels wrong to do this folding here instead of in
> string_constant.
> 
> I think the handling of POINTER_PLUS_EXPR above, is faulty,
> because it is ignoring the offset parameter, which typically
> contains the offset part, may add an offset to a different
> structure member or another array index.  That is what happened
> in PR 86622.
> 
> So you are likely looking at the wrong index, or even the wrong
> structure member.
I'm not disagreeing that something's wrong here -- the whole concept
that we extract the rhs and totally ignore the offset seems wrong.  I've
stumbled over it working through issues with either patch #4 or #6 from
Martin.  But I felt I needed to go back and reevaluate any assumptions I
had about how the code was supposed to be used before I called it out.


Your expr.c changes may be worth pushing through independently of the
rest.AFAICT they're really just exposing more cases where we can
determine that we're working with a stirng constant.

> 
> 
>>
>>>   
>>>   /* Compute the length of a null-terminated character string or wide
>>> @@ -3936,8 +3917,7 @@ expand_builtin_strcpy_args (tree exp, tree dest, t
>>> if (tree nonstr = unterminated_array (src))
>>>   {
>>> /* NONSTR refers to the non-nul terminated constant array.  */
>>> -  if (!TREE_NO_WARNING (exp))
>>> -   warn_string_no_nul (EXPR_LOCATION (exp), "strcpy", src, nonstr);
>>> +  warn_string_no_nul (EXPR_LOCATION (exp), "strcpy", src, nonstr);
>>> return NULL_RTX;
>>>   }
>> There's also some target dependencies to be concerned about.  I'll have
>> to dig out the thread, but in general removing these TREE_NO_WARNING
>> checks is probably a bad idea -- you're likely introduc

Re: [PATCH] look harder for MEM_REF operand equality to avoid -Wstringop-truncation (PR 84561)

2018-09-17 Thread Bernd Edlinger
On 09/17/18 19:35, Richard Biener wrote:
> On September 17, 2018 7:24:16 PM GMT+02:00, Jeff Law  wrote:
>> On 9/15/18 2:14 AM, Bernd Edlinger wrote:
>>> On 9/14/18, Martin Sebor wrote:
 As I said above, this happens during the dom walk in the ccp
 pass:

substitute_and_fold_dom_walker walker (CDI_DOMINATORS, this);
walker.walk (ENTRY_BLOCK_PTR_FOR_FN (cfun));

 The warning is issued during the walker.walk() call as
 strncpy is being folded into memcpy.  The prior assignments are
 only propagated later, when the next statement after the strncpy
 call is reached.  It happens in
 substitute_and_fold_dom_walker::before_dom_children(). So during
 the strncpy folding we see the next statement as:

MEM[(struct S *)_1].a[n_7] = 0;

 After the strncpy call is transformed to memcpy, the assignment
 above is transformed to

MEM[(struct S *)_8].a[3] = 0;


>  If they're only discovered as copies within the pass where
>> you're trying
>  to issue the diagnostic, then you'd want to see if the pass has
>> any
>  internal structures that tell you about equivalences.


 I don't know if this is possible.  I don't see any APIs in
 tree-ssa-propagate.h that would let me query the internal data
 somehow to find out during folding (when the warning is issued).
>>>
>>>
>>> Well,
>>>
>>> if I see this right, the CCP is doing tree transformations
>>> while from the folding of strncpy the predicate
>> maybe_diag_stxncpy_trunc
>>> is called, and sees inconsistent information, in the tree,
>>> and therefore it issues a warning.
>>>
>>> I understand that walking the references is fragile at least
>>> in this state.
>>>
>>> But why not just prevent warnings when this is called from CCP?
>>>
>>>
>>> Like this?
>>>
>>> Bootstrapped and reg-tested on x86_64-pc-linux-gnu.
>>> Is it OK for trunk?
>> No.  That's just hacking around the real problem.
> 
> The real problem is emitting diagnostics from folding code.
> 

Yes, I am also very concerned about that.

So if this is a design bug, then it is probably impossible
to fix at the implementation, and we should fix the design.

It is unfortunate, that this means regressions on the existing
(and probably also proposed, future) test cases.


Bernd.


Re: [PATCH] gcov: emit hotness colors to easily find hot code.

2018-09-17 Thread David Malcolm
On Wed, 2018-09-12 at 14:36 +0200, Martin Liška wrote:
> Hi.
> 
> This is follow-up of:
> https://gcc.gnu.org/ml/gcc/2018-08/msg00162.html
> 
> I'm suggesting to introduce using colors in order to indicate hotness
> of lines. Legend is printed at the very beginning of the output file.
> Example: https://pasteboard.co/HDxK4Nm.png

One comment: color seems to be being used for two different purposes:
(a) in the left-hand column (presumably the profile count), where e.g.
lines that are 0 are being marked as red
(b) in the middle column (the line number), lines that are > 50% are
being marked as red.

So red seems to be being used both for very hot lines (for the line
number), and for unexecuted lines (for the profile count).

Perhaps the "hotness legend" could instead say something like:

Colorization: profile count: blah blah blah
Colorization: line numbers: hotness: > 50 % > 20% > 10%

to explain both colorization schemes?  (I'm not in love with the above
wording, but hopefully the idea makes sense).

> Patch survives gcov.exp test-suite. Will install next week if no
> objections.
> 
> Martin
> 
> gcc/ChangeLog:
> 
> 2018-09-12  Martin Liska  
> 
>   * doc/gcov.texi: Document new option --use-hotness-colors.
>   * gcov.c (struct source_info): Declare new field.
>   (source_info::source_info): Set default for maximum_count.
>   (print_usage): Add new -q option.
>   (process_args): Process it.
>   (accumulate_line_info): Save src->maximum_count.
>   (output_line_beginning): Make color line number if
>   flag_use_hotness_colors is set.
>   (output_line_details): Pass default argument value.
>   (output_lines): Pass src->maximum_count.
> ---
>  gcc/doc/gcov.texi |  8 ++-
>  gcc/gcov.c| 56 +--
> 
>  2 files changed, 56 insertions(+), 8 deletions(-)
> 
> 


Re: vector _M_start and 0 offset

2018-09-17 Thread Jonathan Wakely

On 17/09/18 19:57 +0200, Marc Glisse wrote:

On Mon, 17 Sep 2018, Jonathan Wakely wrote:


On 15/09/18 14:27 +0200, Marc Glisse wrote:

Hello,

as explained in the PR, the implementation of vector is 
weirdly wasteful. Preserving the ABI prevents from changing much 
for now, but this small tweak can help the compiler remove quite a 
bit of dead code.


I think most other direct uses of _M_start are in constructors 
where the offset has just been initialized to 0, so the compiler 
should already know enough there, but I may have missed a few 
relevant places where the same idea could be used.


I used C++11 syntax because I find it nicer, and the compiler 
accepts it in C++98 mode with just a warning, suppressed in a 
standard header.


^^


Bootstrap+regtest on powerpc64le-unknown-linux-gnu.

2018-09-15  Marc Glisse  

  PR libstdc++/87258
* include/bits/stl_bvector.h (vector::begin(), vector::cbegin()):
Rebuild _M_start with an explicit 0 offset.

--
Marc Glisse



Index: include/bits/stl_bvector.h
===
--- include/bits/stl_bvector.h  (revision 264178)
+++ include/bits/stl_bvector.h  (working copy)
@@ -802,25 +802,25 @@ _GLIBCXX_BEGIN_NAMESPACE_CONTAINER
#endif

#if __cplusplus >= 201103L
 void
 assign(initializer_list __l)
 { _M_assign_aux(__l.begin(), __l.end(), 
random_access_iterator_tag()); }

#endif

 iterator
 begin() _GLIBCXX_NOEXCEPT
-  { return this->_M_impl._M_start; }
+  { return { this->_M_impl._M_start._M_p, 0 }; }

 const_iterator
 begin() const _GLIBCXX_NOEXCEPT
-  { return this->_M_impl._M_start; }
+  { return { this->_M_impl._M_start._M_p, 0 }; }


Won't this fail to compile in C++98 mode?


"I used C++11 syntax because I find it nicer, and the compiler accepts 
it in C++98 mode with just a warning, suppressed in a standard 
header."


Oh sorry, I just looked at the patch and replied without reading the
top bit.

Even with -Wsystem-headers I don't get a warning, I have to precompile 
with -P -E then compile the result to get "warning: extended 
initializer lists only available with -std=c++11 or -std=gnu++11".


OK for trunk then.



Re: [PATCH 12/25] Make default_static_chain return NULL in non-static functions

2018-09-17 Thread Richard Sandiford
 writes:
> This patch allows default_static_chain to be called from the back-end without
> it knowing if the function is static or not.  Or, to put it another way,
> without duplicating the check everywhere it's used.
>
> 2018-09-05  Tom de Vries  
>
>   gcc/
>   * targhooks.c (default_static_chain): Return NULL in non-static
>   functions.
> ---
>  gcc/targhooks.c | 8 +++-
>  1 file changed, 7 insertions(+), 1 deletion(-)
>
> diff --git a/gcc/targhooks.c b/gcc/targhooks.c
> index afd56f3..742cfbf 100644
> --- a/gcc/targhooks.c
> +++ b/gcc/targhooks.c
> @@ -1021,8 +1021,14 @@ default_internal_arg_pointer (void)
>  }
>  
>  rtx
> -default_static_chain (const_tree ARG_UNUSED (fndecl_or_type), bool 
> incoming_p)
> +default_static_chain (const_tree fndecl_or_type, bool incoming_p)
>  {
> +  /* While this function won't be called by the middle-end when a static
> + chain isn't needed, it's also used throughout the backend so it's
> + easiest to keep this check centralized.  */
> +  if (DECL_P (fndecl_or_type) && !DECL_STATIC_CHAIN (fndecl_or_type))
> +return NULL;
> +
>if (incoming_p)
>  {
>  #ifdef STATIC_CHAIN_INCOMING_REGNUM

Not sure about this.  The caller has to make sure the query's sensible
for types, since types don't indicate whether they need a static chain.
Allowing it to be more sloppy for decls seems a bit dangerous.

Which part of the backend needs this?  I couldn't tell from a quick
grep where the call came from.

Thanks,
Richard


Re: vector _M_start and 0 offset

2018-09-17 Thread Ville Voutilainen
On Mon, 17 Sep 2018 at 21:50, Jonathan Wakely  wrote:
 >"I used C++11 syntax because I find it nicer, and the compiler accepts
> >it in C++98 mode with just a warning, suppressed in a standard
> >header."
>
> Oh sorry, I just looked at the patch and replied without reading the
> top bit.
>
> >Even with -Wsystem-headers I don't get a warning, I have to precompile
> >with -P -E then compile the result to get "warning: extended
> >initializer lists only available with -std=c++11 or -std=gnu++11".
>
> OK for trunk then.

Do other compilers besides gcc suppress the same way?


Re: vector _M_start and 0 offset

2018-09-17 Thread Jonathan Wakely

On 17/09/18 21:55 +0300, Ville Voutilainen wrote:

On Mon, 17 Sep 2018 at 21:50, Jonathan Wakely  wrote:
>"I used C++11 syntax because I find it nicer, and the compiler accepts

>it in C++98 mode with just a warning, suppressed in a standard
>header."

Oh sorry, I just looked at the patch and replied without reading the
top bit.

>Even with -Wsystem-headers I don't get a warning, I have to precompile
>with -P -E then compile the result to get "warning: extended
>initializer lists only available with -std=c++11 or -std=gnu++11".

OK for trunk then.


Do other compilers besides gcc suppress the same way?


No, clang doesn't:

In file included from bv.cc:1:
In file included from 
/home/jwakely/gcc/latest/lib/gcc/x86_64-pc-linux-gnu/9.0.0/../../../../include/c++/9.0.0/vector:65:
/home/jwakely/gcc/latest/lib/gcc/x86_64-pc-linux-gnu/9.0.0/../../../../include/c++/9.0.0/bits/stl_bvector.h:812:16:
 error: non-aggregate type 'std::vector::iterator' 
(aka 'std::_Bit_iterator') cannot be initialized with an initializer list
 { return { this->_M_impl._M_start, 0 }; }
  ^
bv.cc:6:5: note: in instantiation of member function 'std::vector >::begin' requested here
 b.begin();
   ^
1 error generated.

So I do think we should stick to C++98 syntax.



Re: vector _M_start and 0 offset

2018-09-17 Thread Jonathan Wakely

On 17/09/18 20:10 +0100, Jonathan Wakely wrote:

On 17/09/18 21:55 +0300, Ville Voutilainen wrote:

On Mon, 17 Sep 2018 at 21:50, Jonathan Wakely  wrote:

"I used C++11 syntax because I find it nicer, and the compiler accepts

it in C++98 mode with just a warning, suppressed in a standard
header."


Oh sorry, I just looked at the patch and replied without reading the
top bit.


Even with -Wsystem-headers I don't get a warning, I have to precompile
with -P -E then compile the result to get "warning: extended
initializer lists only available with -std=c++11 or -std=gnu++11".


OK for trunk then.


Do other compilers besides gcc suppress the same way?


No, clang doesn't:

In file included from bv.cc:1:
In file included from 
/home/jwakely/gcc/latest/lib/gcc/x86_64-pc-linux-gnu/9.0.0/../../../../include/c++/9.0.0/vector:65:
/home/jwakely/gcc/latest/lib/gcc/x86_64-pc-linux-gnu/9.0.0/../../../../include/c++/9.0.0/bits/stl_bvector.h:812:16:
 error: non-aggregate type 'std::vector::iterator' 
(aka 'std::_Bit_iterator') cannot be initialized with an initializer list
{ return { this->_M_impl._M_start, 0 }; }


And for the avoidance of doubt, it's the same error if I make the
correct change to the header:

 { return { this->_M_impl._M_start._M_p, 0 }; }



 ^
bv.cc:6:5: note: in instantiation of member function 'std::vector >::begin' requested here
b.begin();
  ^
1 error generated.

So I do think we should stick to C++98 syntax.



Re: [PATCH] Cleanup strcpy/stpcpy no nul warning code

2018-09-17 Thread Bernd Edlinger
On 09/17/18 20:32, Jeff Law wrote:
> On 9/17/18 12:20 PM, Bernd Edlinger wrote:
>> On 09/17/18 19:33, Jeff Law wrote:
>>> On 9/16/18 1:58 PM, Bernd Edlinger wrote:
 Hi,

 this is a cleanup of the recently added strlen/strcpy/stpcpy
 no nul warning code.

 Most importantly it moves the SSA_NAME handling from
 unterminated_array to string_constant, thereby fixing
 another round of xfails in the strlen and stpcpy test cases.

 I need to say that the fix for bug 86622 is relying in
 type info on the pointer which is probably not safe in
 GIMPLE in the light of the recent discussion.

 I had to add two further exceptions, which should
 be safe in general: that is a pointer arithmentic on a string
 literal is okay, and arithmetic on a string constant
 that is exactly the size of the whole DECL, cannot
 access an adjacent member.


 Bootstrapped and reg-tested on x86_64-pc-linux-gnu.
 Is it OK for trunk?


 Thanks
 Bernd.


 patch-cleanup-no-nul.diff

 gcc:
 2018-09-16  Bernd Edlinger  

* builtins.h (unterminated_array): Remove prototype.
   * builtins.c (unterminated_array): Simplify.  Make static.
   (expand_builtin_strcpy_args): Don't use TREE_NO_WARNING here.
(expand_builtin_stpcpy_1): Remove warn_string_no_nul without effect.
* expr.c (string_constant): Handle SSA_NAME.  Add more exceptions
where pointer arithmetic is safe.
* gimple-fold.c (get_range_strlen): Handle nonstr like in c_strlen.
(get_max_strlen): Remove the unnecessary mynonstr handling.
(gimple_fold_builtin_strcpy): Simplify.
(gimple_fold_builtin_stpcpy): Simplify.
(gimple_fold_builtin_sprintf): Remove NO_WARNING propagation
without effect.
(gimple_fold_builtin_strlen): Simplify.

 testsuite:
 2018-09-16  Bernd Edlinger  

* gcc.dg/warn-stpcpy-no-nul.c: Remove xfails.
* gcc.dg/warn-strlen-no-nul.c: Remove xfails.

 Index: gcc/builtins.c
 ===
 --- gcc/builtins.c (revision 264342)
 +++ gcc/builtins.c (working copy)
 @@ -567,31 +567,12 @@ warn_string_no_nul (location_t loc, const char *fn
   the declaration of the object of which the array is a member or
   element.  Otherwise return null.  */

 -tree
 +static tree
unterminated_array (tree exp)
{
 -  if (TREE_CODE (exp) == SSA_NAME)
 -{
 -  gimple *stmt = SSA_NAME_DEF_STMT (exp);
 -  if (!is_gimple_assign (stmt))
 -  return NULL_TREE;
 -
 -  tree rhs1 = gimple_assign_rhs1 (stmt);
 -  tree_code code = gimple_assign_rhs_code (stmt);
 -  if (code == ADDR_EXPR
 -&& TREE_CODE (TREE_OPERAND (rhs1, 0)) == ARRAY_REF)
 -  rhs1 = rhs1;
 -  else if (code != POINTER_PLUS_EXPR)
 -  return NULL_TREE;
 -
 -  exp = rhs1;
 -}
 -
  tree nonstr = NULL;
 -  if (c_strlen (exp, 1, &nonstr, 1) == NULL && nonstr)
 -return nonstr;
 -
 -  return NULL_TREE;
 +  c_strlen (exp, 1, &nonstr);
 +  return nonstr;
}
>>> Sigh.  This is going to conflict with patch #6 from Martin's kit.
>>>
>>
>> Sorry, it just feels wrong to do this folding here instead of in
>> string_constant.
>>
>> I think the handling of POINTER_PLUS_EXPR above, is faulty,
>> because it is ignoring the offset parameter, which typically
>> contains the offset part, may add an offset to a different
>> structure member or another array index.  That is what happened
>> in PR 86622.
>>
>> So you are likely looking at the wrong index, or even the wrong
>> structure member.
> I'm not disagreeing that something's wrong here -- the whole concept
> that we extract the rhs and totally ignore the offset seems wrong.  I've
> stumbled over it working through issues with either patch #4 or #6 from
> Martin.  But I felt I needed to go back and reevaluate any assumptions I
> had about how the code was supposed to be used before I called it out.
> 
> 
> Your expr.c changes may be worth pushing through independently of the
> rest.AFAICT they're really just exposing more cases where we can
> determine that we're working with a stirng constant.
> 

I think that moves just the folding from here to expr.c,
I don't see how the change in part #6 on unterminated_array
is supposed to work, I quote it here and add some comments:

>  tree
> -unterminated_array (tree exp)
> +unterminated_array (tree exp, tree *size /* = NULL */, bool *exact /* = NULL 
> */)
>  {
> +  /* Offset from the beginning of the array or null.  */
> +  tree off = NULL_TREE;
> +
>if (TREE_CODE (exp) == SSA_NAME)
>  {
>gimple *stmt = SSA_NAME_DEF_STMT (exp);
> @@ -595,18 +600,43 @@ unterminated_array (tree exp)
>  
>tree rhs1 = gimple_ass

Re: vector _M_start and 0 offset

2018-09-17 Thread Marc Glisse

On Mon, 17 Sep 2018, Jonathan Wakely wrote:


Do other compilers besides gcc suppress the same way?


No, clang doesn't:


What version is that? I didn't test on this exact patch, but clang 6 and
7 print, for similar code:

warning: generalized initializer lists are a C++11 extension
  [-Wc++11-extensions]


So I do think we should stick to C++98 syntax.


What is the oldest version of clang we are supposed to support? I
thought historically we mostly supported whatever version of clang was
released *after* (i.e. clang does the support).

--
Marc Glisse


Re: [Patch, Fortran, OOP] PR 46313: OOP-ABI issue, ALLOCATE issue, CLASS renaming issue

2018-09-17 Thread Janus Weil
Am Mo., 17. Sep. 2018 um 10:59 Uhr schrieb Bernhard Reutner-Fischer
:
>
> On Tue, 9 Nov 2010 at 11:41, Janus Weil  wrote:
> >
> > >> Ok, so it seems to me that using two leading underscores is really the
> > >> best option, since it's safe against collisions with Fortran and C
> > >> user code, and also safe to use with -fdollar-ok.
> > >>
> > >> The attached patch adds double underscores for the vtabs, vtypes,
> > >> class containers and temporaries.
> > >
> > > OK. Thanks for the patch!
> >
> > Committed as r166480.
> >
> > Thanks for all the helpful comments, everyone!
>
> Index: gcc/fortran/module.c
> ===
> --- gcc/fortran/module.c (revision 166419)
> +++ gcc/fortran/module.c (working copy)
> @@ -4372,8 +4372,8 @@ read_module (void)
>  p = name;
>
>/* Exception: Always import vtabs & vtypes.  */
> -  if (p == NULL && (strncmp (name, "vtab$", 5) == 0
> -|| strncmp (name, "vtype$", 6) == 0))
> +  if (p == NULL && (strncmp (name, "__vtab_", 5) == 0
> +|| strncmp (name, "__vtype_", 6) == 0))
>  p = name;
>
>/* Skip symtree nodes not in an ONLY clause, unless there
>
> ---8<---
>
> Sorry for the late follow-up

'Late' is a pretty bold understatement for 8 years ;D

But in any case, 'late' is certainly better than 'never' ...


> but current trunk still has the code
> quoted above where we forgot to add 2 to the length parameter of both
> strncmp calls.

True! Thanks for noticing. I'll take care of fixing it.


> I think it would be nice to teach the C and C++ frontends to warn
> about this even though it might trigger in quite some code in the
> wild.

I don't really think this is a good idea. There are actually valid use
cases of strncmp, where the 'num' argument does not correspond to the
length of any of the two strings (in particular if they're not const).

Instead, for the sake of gfortran, how about a macro like this?

#define gfc_str_startswith(str, pref) \
(strncmp ((str), (pref), strlen (pref)) == 0)

(In fact I just noticed that something like this already exists in
trans-intrinsic.c, so I would just move it into gfortran.h and rename
it.)


> Looking at gcc/fortran alone there are
> gcc/fortran/interface.c:  if (strncmp (mode, "unformatted", 9) == 0) // 11 !

I think this one could actually be a 'strcmp'?


> gcc/fortran/module.c: && (strncmp (name, "__vtab_", 5) == 0 // 7 !
> gcc/fortran/module.c: || strncmp (name, "__vtype_", 6) == 0)) // 
> 8!
> gcc/fortran/module.c: || (strncmp (name, "__vtab_", 5) != 0 // 7!
> gcc/fortran/module.c: && strncmp (name, "__vtype_", 6)
> != 0)) // 8!

Here the new macro could be applied (and in a few other cases as
well), see attached patch.

I'm regtesting the patch now. Ok for trunk if it passes?

Cheers,
Janus
diff --git a/gcc/fortran/decl.c b/gcc/fortran/decl.c
index 3d19ad479e5..91a1f34d7f1 100644
--- a/gcc/fortran/decl.c
+++ b/gcc/fortran/decl.c
@@ -2529,7 +2529,7 @@ variable_decl (int elem)
 }
 
   /* %FILL components may not have initializers.  */
-  if (strncmp (name, "%FILL", 5) == 0 && gfc_match_eos () != MATCH_YES)
+  if (gfc_str_startswith (name, "%FILL") && gfc_match_eos () != MATCH_YES)
 {
   gfc_error ("%qs entity cannot have an initializer at %C", "%FILL");
   m = MATCH_ERROR;
@@ -7811,7 +7811,7 @@ gfc_match_end (gfc_statement *st)
 {
 case COMP_ASSOCIATE:
 case COMP_BLOCK:
-  if (!strncmp (block_name, "block@", strlen("block@")))
+  if (gfc_str_startswith (block_name, "block@"))
 	block_name = NULL;
   break;
 
diff --git a/gcc/fortran/gfortran.h b/gcc/fortran/gfortran.h
index 04b0024a992..8f37a51d71c 100644
--- a/gcc/fortran/gfortran.h
+++ b/gcc/fortran/gfortran.h
@@ -3305,6 +3305,9 @@ bool gfc_is_compile_time_shape (gfc_array_spec *);
 bool gfc_ref_dimen_size (gfc_array_ref *, int dimen, mpz_t *, mpz_t *);
 
 
+#define gfc_str_startswith(str, pref) \
+	(strncmp ((str), (pref), strlen (pref)) == 0)
+
 /* interface.c -- FIXME: some of these should be in symbol.c */
 void gfc_free_interface (gfc_interface *);
 bool gfc_compare_derived_types (gfc_symbol *, gfc_symbol *);
diff --git a/gcc/fortran/interface.c b/gcc/fortran/interface.c
index f85c76bad0f..ff6b2bb7ece 100644
--- a/gcc/fortran/interface.c
+++ b/gcc/fortran/interface.c
@@ -122,9 +122,9 @@ fold_unary_intrinsic (gfc_intrinsic_op op)
 static gfc_intrinsic_op
 dtio_op (char* mode)
 {
-  if (strncmp (mode, "formatted", 9) == 0)
+  if (strcmp (mode, "formatted") == 0)
 return INTRINSIC_FORMATTED;
-  if (strncmp (mode, "unformatted", 9) == 0)
+  if (strcmp (mode, "unformatted") == 0)
 return INTRINSIC_UNFORMATTED;
   return INTRINSIC_NONE;
 }
diff --git a/gcc/fortran/iresolve.c b/gcc/fortran/iresolve.c
index 2eb8f7c9113..f2d6bbaec5c 100644
--- a/gcc/fortran/iresolve.c
+++ b/gcc/fortran/iresolve.c
@@ -698,7 +698,7 @@ is_trig_resolved (gfc_expr *f)
   /* We know we've already resolved the function if we see the 

Re: vector _M_start and 0 offset

2018-09-17 Thread Marc Glisse

On Mon, 17 Sep 2018, Marc Glisse wrote:


On Mon, 17 Sep 2018, Jonathan Wakely wrote:


Do other compilers besides gcc suppress the same way?


No, clang doesn't:


What version is that? I didn't test on this exact patch, but clang 6 and
7 print, for similar code:

warning: generalized initializer lists are a C++11 extension
 [-Wc++11-extensions]


Ah, with the exact code I do get an error indeed. I'll change the code :-(


So I do think we should stick to C++98 syntax.


What is the oldest version of clang we are supposed to support? I
thought historically we mostly supported whatever version of clang was
released *after* (i.e. clang does the support).


--
Marc Glisse


Re: [PATCH 13/25] Create TARGET_DISABLE_CURRENT_VECTOR_SIZE

2018-09-17 Thread Richard Sandiford
 writes:
> This feature probably ought to be reworked as a proper target hook, but I 
> would
> like to know if this is the correct solution to the problem first.
>
> The problem is that GCN vectors have a fixed number of elements (64) and the
> vector size varies with element size.  E.g. V64QI is 64 bytes and V64SI is 256
> bytes.
>
> This is a problem because GCC has an assumption that a) vector registers are
> fixed size, and b) if there are multiple vector sizes you want to pick one 
> size
> and stick with it for the whole function.

The whole of the current vectorisation region rather than the whole function,
but yeah, this is a fundamental assumption with the current autovec code.
It's something that would be really good to fix...

> This is a problem in various places, but mostly it's not fatal. However,
> get_vectype_for_scalar_type caches the vector size for the first type it
> encounters and then tries to apply that to all subsequent vectors, which
> completely destroys vectorization.  The caching feature appears to be an
> attempt to cope with AVX having a different vector size to other x86 vector
> options.
>
> This patch simply disables the cache so that it must ask the backend for the
> preferred mode for every type.

TBH I'm surprised this works.  Obviously it does, otherwise you wouldn't
have posted it, but it seems like an accident.  Various parts of the
vectoriser query current_vector_size and expect it to be stable for
the current choice of vector size.

The underlying problem also affects (at least) base AArch64, SVE and x86_64.
We try to choose vector types on the fly based only on the type of a given
scalar value, but in reality, the type we want for a 32-bit element (say)
often depends on whether the vectorisation region also has smaller or
larger elements.  And in general we only know that after
vect_mark_stmts_to_be_vectorized, but we want to know the vector types
earlier, such as in pattern recognition and while building SLP trees.
It's a bit of a chicken-and-egg problem...

Richard


Re: [PATCH]: Allow TARGET_SCHED_ADJUST_PRIORITY hook to reduce priority

2018-09-17 Thread Jeff Law
On 9/17/18 3:08 AM, Andreas Schwab wrote:
>   PR rtl-optimization/85458
>   * sel-sched.c (sel_target_adjust_priority): Remove wrong
>   assertion.
Under what conditions is the new priority negative?  Without digging
deep into the ia64 port that just seems wrong.

jeff


Re: vector _M_start and 0 offset

2018-09-17 Thread Jonathan Wakely

On 17/09/18 21:18 +0200, Marc Glisse wrote:

On Mon, 17 Sep 2018, Jonathan Wakely wrote:


Do other compilers besides gcc suppress the same way?


No, clang doesn't:


What version is that? I didn't test on this exact patch, but clang 6 and
7 print, for similar code:

warning: generalized initializer lists are a C++11 extension
 [-Wc++11-extensions]


A 5 month old build from the source repo:

clang version 7.0.0 (https://git.llvm.org/git/clang.git 
1a4f56e161a5ab24aa022f7e8a754e71fa5347a1) (https://git.llvm.org/git/llvm.git 
afb8c1fed21eb4848d86f2d28e9cb3afcfbb2656)

With -Wsystem-headers it gives the -Wc++11-extensions warning, but if
the begin() member function is actually called then it gives the
error, even without -Wsystem-headers.



So I do think we should stick to C++98 syntax.


What is the oldest version of clang we are supposed to support? I
thought historically we mostly supported whatever version of clang was
released *after* (i.e. clang does the support).


There's no precise policy. I generally try to keep the most recent one
or two releases working though.

But usually when we introduce something that needs a recent Clang it's
bleeding edge stuff like new C++17 or C++2a support. In that case, I
think it's fine to require a new Clang to use new libstdc++ headers.
For std::vector in C++98 mode it would be unfortunate to require
the latest and greatest version.




Re: vector _M_start and 0 offset

2018-09-17 Thread Jonathan Wakely

On 17/09/18 21:24 +0200, Marc Glisse wrote:

On Mon, 17 Sep 2018, Marc Glisse wrote:


On Mon, 17 Sep 2018, Jonathan Wakely wrote:


Do other compilers besides gcc suppress the same way?


No, clang doesn't:


What version is that? I didn't test on this exact patch, but clang 6 and
7 print, for similar code:

warning: generalized initializer lists are a C++11 extension
[-Wc++11-extensions]


Ah, with the exact code I do get an error indeed. I'll change the code :-(


Thanks. I feel your pain, but I think we'd need a better reason to
break it (the valid C++98 code is uglier but not too painful).

The cbegin() change can still use C++11 syntax.


So I do think we should stick to C++98 syntax.


What is the oldest version of clang we are supposed to support? I
thought historically we mostly supported whatever version of clang was
released *after* (i.e. clang does the support).


--
Marc Glisse


Re: vector _M_start and 0 offset

2018-09-17 Thread Marc Glisse

On Mon, 17 Sep 2018, Jonathan Wakely wrote:


On 17/09/18 21:24 +0200, Marc Glisse wrote:

On Mon, 17 Sep 2018, Marc Glisse wrote:


On Mon, 17 Sep 2018, Jonathan Wakely wrote:


Do other compilers besides gcc suppress the same way?


No, clang doesn't:


What version is that? I didn't test on this exact patch, but clang 6 and
7 print, for similar code:

warning: generalized initializer lists are a C++11 extension
[-Wc++11-extensions]


Ah, with the exact code I do get an error indeed. I'll change the code :-(


Thanks. I feel your pain, but I think we'd need a better reason to
break it (the valid C++98 code is uglier but not too painful).

The cbegin() change can still use C++11 syntax.


I think I'd rather keep the 3 lines identical, or make cbegin() call
begin().

--
Marc Glisse


[patch,nvptx] Add atomic_fetch* support for SImode arguments.

2018-09-17 Thread Cesar Philippidis
I've committed this patch extends the nvptx atomic_fetch_
pattern to accept SImode arguments regardless of the -misa argument
supplied. Tom had pre-approved this patch awhile ago. As the test case
demonstrates, it only works 32-bit pointers.

While adding the new test case, I noticed that I named atomic-fetch-2.c
incorrectly; there should be an underscore between atomic and fetch.
This patch also fixes that.

I tested this patch using both a standalone nvptx compiler and x86_64
Linux with nvptx offloading.

Cesar
[nvptx] Add atomic_fetch* support for SImode arguments.

2018-09-17  Cesar Philippidis  
	Bernd Schmidt 

	gcc/
	* config/nvptx/nvptx.md (atomic_fetch_): Enable with
	SImode args.

	gcc/testsuite/
	* gcc.target/nvptx/atomic-fetch-2.c: Rename to ...
	* gcc.target/nvptx/atomic_fetch-2.c: ... this.
	* gcc.target/nvptx/atomic_fetch-3.c: New test.

---
 gcc/config/nvptx/nvptx.md |  2 +-
 .../{atomic-fetch-2.c => atomic_fetch-2.c}|  0
 .../gcc.target/nvptx/atomic_fetch-3.c | 24 +++
 3 files changed, 25 insertions(+), 1 deletion(-)
 rename gcc/testsuite/gcc.target/nvptx/{atomic-fetch-2.c => atomic_fetch-2.c} (100%)
 create mode 100644 gcc/testsuite/gcc.target/nvptx/atomic_fetch-3.c

diff --git a/gcc/config/nvptx/nvptx.md b/gcc/config/nvptx/nvptx.md
index dd6032d021b..ca00b1d8073 100644
--- a/gcc/config/nvptx/nvptx.md
+++ b/gcc/config/nvptx/nvptx.md
@@ -1449,7 +1449,7 @@
 	  UNSPECV_LOCK))
(set (match_operand:SDIM 0 "nvptx_register_operand" "=R")
 	(match_dup 1))]
-  "TARGET_SM35"
+  "mode == SImode || TARGET_SM35"
   "%.\\tatom%A1.b%T0.\\t%0, %1, %2;"
   [(set_attr "atomic" "true")])
 
diff --git a/gcc/testsuite/gcc.target/nvptx/atomic-fetch-2.c b/gcc/testsuite/gcc.target/nvptx/atomic_fetch-2.c
similarity index 100%
rename from gcc/testsuite/gcc.target/nvptx/atomic-fetch-2.c
rename to gcc/testsuite/gcc.target/nvptx/atomic_fetch-2.c
diff --git a/gcc/testsuite/gcc.target/nvptx/atomic_fetch-3.c b/gcc/testsuite/gcc.target/nvptx/atomic_fetch-3.c
new file mode 100644
index 000..36a83ebba9b
--- /dev/null
+++ b/gcc/testsuite/gcc.target/nvptx/atomic_fetch-3.c
@@ -0,0 +1,24 @@
+/* Test the nvptx atomic instructions for __atomic_fetch_OP for
+   SImode arguments.  */
+
+/* { dg-do compile } */
+/* { dg-options "-O2 -m32" } */
+
+int
+main()
+{
+  unsigned long a = ~0;
+  unsigned b = 0xa;
+
+  __atomic_fetch_add (&a, b, 0);
+  __atomic_fetch_and (&a, b, 0);
+  __atomic_fetch_or (&a, b, 0);
+  __atomic_fetch_xor (&a, b, 0);
+  
+  return a;
+}
+
+/* { dg-final { scan-assembler "atom.add.u32" } } */
+/* { dg-final { scan-assembler "atom.b32.and" } } */
+/* { dg-final { scan-assembler "atom.b32.or" } } */
+/* { dg-final { scan-assembler "atom.b32.xor" } } */
-- 
2.17.1



Re: [Patch, Fortran, OOP] PR 46313: OOP-ABI issue, ALLOCATE issue, CLASS renaming issue

2018-09-17 Thread Janus Weil
Am Mo., 17. Sep. 2018 um 21:21 Uhr schrieb Janus Weil :
>
> Instead, for the sake of gfortran, how about a macro like this?
>
> #define gfc_str_startswith(str, pref) \
> (strncmp ((str), (pref), strlen (pref)) == 0)
>
> (In fact I just noticed that something like this already exists in
> trans-intrinsic.c, so I would just move it into gfortran.h and rename
> it.)
>
>
> > Looking at gcc/fortran alone there are
> > gcc/fortran/interface.c:  if (strncmp (mode, "unformatted", 9) == 0) // 11 !
>
> I think this one could actually be a 'strcmp'?
>
>
> > gcc/fortran/module.c: && (strncmp (name, "__vtab_", 5) == 0 // 7 !
> > gcc/fortran/module.c: || strncmp (name, "__vtype_", 6) == 0)) 
> > // 8!
> > gcc/fortran/module.c: || (strncmp (name, "__vtab_", 5) != 0 // 
> > 7!
> > gcc/fortran/module.c: && strncmp (name, "__vtype_", 6)
> > != 0)) // 8!
>
> Here the new macro could be applied (and in a few other cases as
> well), see attached patch.
>
> I'm regtesting the patch now. Ok for trunk if it passes?

The regtest was successful. I don't think the off-by-two error for the
vtab/vtype comparisons is a big problem in practice, since the number
of internal symbols with leading underscores is very limited, but of
course it should still be fixed ...

Cheers,
Janus


[PATCH, i386]: Emit fldln2 earlier in ix86_emit_i387_log1p.

2018-09-17 Thread Uros Bizjak
Eliminate common subexpression in both branches.

2018-09-17  Uros Bizjak  

* config/i386/i386.c (ix86_emit_i387_log1p): Emit fldln2 earlier.

Bootstrapped and regression tested on x86_64-linux-gnu.

Committed to mainline SVN.

Uros.
Index: config/i386/i386.c
===
--- config/i386/i386.c  (revision 264369)
+++ config/i386/i386.c  (working copy)
@@ -43941,13 +43941,14 @@ void ix86_emit_i387_log1p (rtx op0, rtx op1)
   rtx test;
 
   emit_insn (gen_absxf2 (tmp, op1));
+  emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
   test = gen_rtx_GE (VOIDmode, tmp,
 const_double_from_real_value (
REAL_VALUE_ATOF ("0.29289321881345247561810596348408353", XFmode),
XFmode));
-  emit_jump_insn (gen_cbranchxf4 (test, XEXP (test, 0), XEXP (test, 1), 
label1));
+  emit_jump_insn
+(gen_cbranchxf4 (test, XEXP (test, 0), XEXP (test, 1), label1));
 
-  emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
   emit_insn (gen_fyl2xp1xf3_i387 (op0, op1, tmp2));
   emit_jump (label2);
 
@@ -43954,7 +43955,6 @@ void ix86_emit_i387_log1p (rtx op0, rtx op1)
   emit_label (label1);
   emit_move_insn (tmp, CONST1_RTX (XFmode));
   emit_insn (gen_addxf3 (tmp, op1, tmp));
-  emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
   emit_insn (gen_fyl2xxf3_i387 (op0, tmp, tmp2));
 
   emit_label (label2);


Transform assertion into optimization hints

2018-09-17 Thread François Dumont

We talk about it a while back.

I've run testsuite several times since I have this patch on my local 
copy. Note that when I implemented it the wrong way tests started to 
fail so it is clearly having an effect on the generated code.


* include/bits/c++config [__OPTIMIZE__](__glibcxx_assert): Define as 
optimization hint


using __builtin_unreachable.

Ok to commit ?

François

diff --git a/libstdc++-v3/include/bits/c++config b/libstdc++-v3/include/bits/c++config
index d499d32b51e..24bdc97e0f5 100644
--- a/libstdc++-v3/include/bits/c++config
+++ b/libstdc++-v3/include/bits/c++config
@@ -466,6 +466,9 @@ namespace std
 
 #if defined(_GLIBCXX_ASSERTIONS)
 # define __glibcxx_assert(_Condition) __glibcxx_assert_impl(_Condition)
+#elif defined(__OPTIMIZE__)
+# define __glibcxx_assert(_Condition)	 \
+  do { if (! (_Condition)) __builtin_unreachable(); } while (false)
 #else
 # define __glibcxx_assert(_Condition)
 #endif


Re: [PATCH 0/5] RFC: gimple-ssa-sprintf.c: a new approach (PR middle-end/77696)

2018-09-17 Thread Martin Sebor

On 09/14/2018 12:17 PM, David Malcolm wrote:

Martin: on the way back from Cauldron I had a go at implementing new
output formats for the warnings from gimple-ssa-sprintf.c, to try to
better indicate to the end user what the problem is.  My plan was to
implement some of the ASCII art ideas we've been discussing.  However,
to try to understand what the pass is doing, I tried visualizing the
existing data structures, and I ended up liking the output of that so much,
that I think that it is what we ought to show the end-user.

Consider the examples from PR middle-end/77696:

char d[4];

typedef __SIZE_TYPE__ size_t;

extern int sprintf (char*, const char*, ...);
extern int snprintf (char*, size_t, const char*, ...);


void f (int i)
{
  // bounded, definite truncation in a directve
  snprintf (d, sizeof d, "%i", 1235);

  // bounded, definite truncation copying format string
  snprintf (d, sizeof d, "%iAB", 123);

  // unbounded, definite overflow in a directve
  sprintf (d, "%i", 1235);

  // unbounded, definite overflow copying format string
  sprintf (d, "%iAB", 123);

  // bounded, possible truncation a directve
  snprintf (d, sizeof d, "%i", i);

  // bounded, possible overflow copying format string
  snprintf (d, sizeof d, "%iAB", i);

  // unbounded, possible overflow in a directve
  sprintf (d, "%i", i);

  // unbounded, possible overflow copying format string
  sprintf (d, "%iAB", 123);

  // unbounded, possible overflow copying format string
  const char *s = i ? "123" : "1234";
  sprintf (d, "%sAB", s);
}

extern char buf_10[80];
extern char tmpdir[80];
extern char fname[8];

void g (int num)
{
  sprintf (buf_10, "/%s/%s-%i.tmp", tmpdir, fname, num);
}

trunk currently emits:

zz.c: In function ‘f’:
zzz.c:12:29: warning: ‘snprintf’ output truncated before the last format 
character [-Wformat-truncation=]
12 |   snprintf (d, sizeof d, "%i", 1235);
   | ^
zzz.c:12:3: note: ‘snprintf’ output 5 bytes into a destination of size 4
12 |   snprintf (d, sizeof d, "%i", 1235);
   |   ^~
zzz.c:15:30: warning: ‘AB’ directive output truncated writing 2 bytes into a 
region of size 1 [-Wformat-truncation=]
15 |   snprintf (d, sizeof d, "%iAB", 123);
   | ~^
zzz.c:15:3: note: ‘snprintf’ output 6 bytes into a destination of size 4
15 |   snprintf (d, sizeof d, "%iAB", 123);
   |   ^~~
zzz.c:18:18: warning: ‘sprintf’ writing a terminating nul past the end of the 
destination [-Wformat-overflow=]
18 |   sprintf (d, "%i", 1235);
   |  ^
zzz.c:18:3: note: ‘sprintf’ output 5 bytes into a destination of size 4
18 |   sprintf (d, "%i", 1235);
   |   ^~~
zzz.c:21:19: warning: ‘AB’ directive writing 2 bytes into a region of size 1 
[-Wformat-overflow=]
21 |   sprintf (d, "%iAB", 123);
   |  ~^
zzz.c:21:3: note: ‘sprintf’ output 6 bytes into a destination of size 4
21 |   sprintf (d, "%iAB", 123);
   |   ^~~~
zzz.c:33:19: warning: ‘AB’ directive writing 2 bytes into a region of size 1 
[-Wformat-overflow=]
33 |   sprintf (d, "%iAB", 123);
   |  ~^
zzz.c:33:3: note: ‘sprintf’ output 6 bytes into a destination of size 4
33 |   sprintf (d, "%iAB", 123);
   |   ^~~~
zzz.c:37:19: warning: ‘AB’ directive writing 2 bytes into a region of size 
between 0 and 1 [-Wformat-overflow=]
37 |   sprintf (d, "%sAB", s);
   |  ~^
zzz.c:37:3: note: ‘sprintf’ output between 6 and 7 bytes into a destination of 
size 4
37 |   sprintf (d, "%sAB", s);
   |   ^~
zzz.c: In function ‘g’:
zzz.c:46:24: warning: ‘/’ directive writing 1 byte into a region of size 
between 0 and 79 [-Wformat-overflow=]
46 |   sprintf (buf_10, "/%s/%s-%i.tmp", tmpdir, fname, num);
   |^
zzz.c:46:3: note: ‘sprintf’ output between 9 and 105 bytes into a destination 
of size 80
46 |   sprintf (buf_10, "/%s/%s-%i.tmp", tmpdir, fname, num);
   |   ^


With this patch kit, gcc emits:

zzz.c: In function ‘f’:
zzz.c:12:13: warning: ‘snprintf’ will truncate the output (5 bytes) to size 4 
[-Wformat-truncation=]
12 |   snprintf (d, sizeof d, "%i", 1235);
   | ^ ^~^
   | | | |
   | | | 1 byte (for NUL terminator)
   | | 4 bytes
   | capacity: 4 bytes
zzz.c:1:6: note: destination declared here
1 | char d[4];
  |  ^
zzz.c:15:13: warning: ‘snprintf’ will truncate the output (6 bytes) to size 4 
[-Wformat-truncation=]
15 |   snprintf (d, sizeof d, "%iAB", 123);
   | ^ ^~^~^
   | | | | |
   | | | | 1 byte (for NUL terminator)
   | | | 2 bytes
   | | 3 bytes
   | capacity: 4 bytes
zzz.c:1:6: note: destination declar

Re: Transform assertion into optimization hints

2018-09-17 Thread Marc Glisse

On Mon, 17 Sep 2018, François Dumont wrote:


We talk about it a while back.

I've run testsuite several times since I have this patch on my local copy. 
Note that when I implemented it the wrong way tests started to fail so it is 
clearly having an effect on the generated code.


* include/bits/c++config [__OPTIMIZE__](__glibcxx_assert): Define as 
optimization hint


using __builtin_unreachable.

Ok to commit ?


I see for instance in bits/regex_automaton.tcc:

  __glibcxx_assert(__m.count(__ref._M_next) > 0);

where __m is a map, which does not look so well suited for a 
__builtin_unreachable. Is it using the wrong macro?


--
Marc Glisse


Re: [PATCH] look harder for MEM_REF operand equality to avoid -Wstringop-truncation (PR 84561)

2018-09-17 Thread Martin Sebor

On 09/17/2018 11:35 AM, Richard Biener wrote:

On September 17, 2018 7:24:16 PM GMT+02:00, Jeff Law  wrote:

On 9/15/18 2:14 AM, Bernd Edlinger wrote:

On 9/14/18, Martin Sebor wrote:

As I said above, this happens during the dom walk in the ccp
pass:

  substitute_and_fold_dom_walker walker (CDI_DOMINATORS, this);
  walker.walk (ENTRY_BLOCK_PTR_FOR_FN (cfun));

The warning is issued during the walker.walk() call as
strncpy is being folded into memcpy.  The prior assignments are
only propagated later, when the next statement after the strncpy
call is reached.  It happens in
substitute_and_fold_dom_walker::before_dom_children(). So during
the strncpy folding we see the next statement as:

  MEM[(struct S *)_1].a[n_7] = 0;

After the strncpy call is transformed to memcpy, the assignment
above is transformed to

  MEM[(struct S *)_8].a[3] = 0;



If they're only discovered as copies within the pass where

you're trying

to issue the diagnostic, then you'd want to see if the pass has

any

internal structures that tell you about equivalences.



I don't know if this is possible.  I don't see any APIs in
tree-ssa-propagate.h that would let me query the internal data
somehow to find out during folding (when the warning is issued).



Well,

if I see this right, the CCP is doing tree transformations
while from the folding of strncpy the predicate

maybe_diag_stxncpy_trunc

is called, and sees inconsistent information, in the tree,
and therefore it issues a warning.

I understand that walking the references is fragile at least
in this state.

But why not just prevent warnings when this is called from CCP?


Like this?

Bootstrapped and reg-tested on x86_64-pc-linux-gnu.
Is it OK for trunk?

No.  That's just hacking around the real problem.


The real problem is emitting diagnostics from folding code.


Strncpy is a particularly dangerous function that's often
misunderstood and misused.  IMO, it would be a worthwhile
tradeoff to move the strncpy to memcpy transformation to
the strlen pass where these bugs could be detected more
reliably, and with fewer false positives.  I would not
expect it to have a noticeable impact on code efficiency.

I'd be happy to modify the patch to do that if you find it
acceptable.

Martin


Re: [PATCH v2] libgcc: m68k: avoid TEXTRELs in shared library (PR 86224)

2018-09-17 Thread Sergei Trofimovich via gcc-patches
On Sat, 28 Jul 2018 20:42:02 +0100
"slyfox.inbox.ru via gcc-patches"  wrote:

> From: Sergei Trofimovich 
> 
> Cc: Ian Lance Taylor 
> Cc: Jeff Law 
> Cc: Andreas Schwab 
> Signed-off-by: Sergei Trofimovich 
> ---
>  libgcc/config/m68k/lb1sf68.S | 3 +++
>  1 file changed, 3 insertions(+)
> 
> diff --git a/libgcc/config/m68k/lb1sf68.S b/libgcc/config/m68k/lb1sf68.S
> index 325a7c17d9b..d5240d4aa55 100644
> --- a/libgcc/config/m68k/lb1sf68.S
> +++ b/libgcc/config/m68k/lb1sf68.S
> @@ -435,6 +435,7 @@ $_exception_handler:
>   .text
>   FUNC(__mulsi3)
>   .globl  SYM (__mulsi3)
> + .hidden SYM (__mulsi3)
>  SYM (__mulsi3):
>   movew   sp@(4), d0  /* x0 -> d0 */
>   muluw   sp@(10), d0 /* x0*y1 */
> @@ -458,6 +459,7 @@ SYM (__mulsi3):
>   .text
>   FUNC(__udivsi3)
>   .globl  SYM (__udivsi3)
> + .hidden SYM (__udivsi3)
>  SYM (__udivsi3):
>  #ifndef __mcoldfire__
>   movel   d2, sp@-
> @@ -534,6 +536,7 @@ L2:   subql   IMM (1),d4
>   .text
>   FUNC(__divsi3)
>   .globl  SYM (__divsi3)
> + .hidden SYM (__divsi3)
>  SYM (__divsi3):
>   movel   d2, sp@-
>  
> -- 
> 2.18.0
> 

Patch ping. Not sure which is preferable v1 (as other targets do) or v2.

-- 

  Sergei


pgpLNOtBofVZq.pgp
Description: Цифровая подпись OpenPGP


Re: [PATCH v2] libgcc: m68k: avoid TEXTRELs in shared library (PR 86224)

2018-09-17 Thread Jeff Law
On 9/17/18 3:18 PM, Sergei Trofimovich wrote:
> On Sat, 28 Jul 2018 20:42:02 +0100
> "slyfox.inbox.ru via gcc-patches"  wrote:
> 
>> From: Sergei Trofimovich 
>>
>> Cc: Ian Lance Taylor 
>> Cc: Jeff Law 
>> Cc: Andreas Schwab 
>> Signed-off-by: Sergei Trofimovich 
>> ---
>>  libgcc/config/m68k/lb1sf68.S | 3 +++
>>  1 file changed, 3 insertions(+)
>>
>> diff --git a/libgcc/config/m68k/lb1sf68.S b/libgcc/config/m68k/lb1sf68.S
>> index 325a7c17d9b..d5240d4aa55 100644
>> --- a/libgcc/config/m68k/lb1sf68.S
>> +++ b/libgcc/config/m68k/lb1sf68.S
>> @@ -435,6 +435,7 @@ $_exception_handler:
>>  .text
>>  FUNC(__mulsi3)
>>  .globl  SYM (__mulsi3)
>> +.hidden SYM (__mulsi3)
>>  SYM (__mulsi3):
>>  movew   sp@(4), d0  /* x0 -> d0 */
>>  muluw   sp@(10), d0 /* x0*y1 */
>> @@ -458,6 +459,7 @@ SYM (__mulsi3):
>>  .text
>>  FUNC(__udivsi3)
>>  .globl  SYM (__udivsi3)
>> +.hidden SYM (__udivsi3)
>>  SYM (__udivsi3):
>>  #ifndef __mcoldfire__
>>  movel   d2, sp@-
>> @@ -534,6 +536,7 @@ L2:  subql   IMM (1),d4
>>  .text
>>  FUNC(__divsi3)
>>  .globl  SYM (__divsi3)
>> +.hidden SYM (__divsi3)
>>  SYM (__divsi3):
>>  movel   d2, sp@-
>>  
>> -- 
>> 2.18.0
>>
> 
> Patch ping. Not sure which is preferable v1 (as other targets do) or v2.
Hmm, I thought Andreas NAK'd V1.  And I think Rich's comments
essentially NAK'd V2.

Jeff


Re: C++ PATCH to implement P1064R0, Virtual Function Calls in Constant Expressions

2018-09-17 Thread Marek Polacek
On Fri, Sep 14, 2018 at 04:45:22PM -0400, Marek Polacek wrote:
> On Fri, Sep 14, 2018 at 04:30:46PM -0400, Jason Merrill wrote:
> > On Fri, Sep 14, 2018 at 1:19 PM, Marek Polacek  wrote:
> > > This patch implements another bit of C++20, virtual calls in constant
> > > expression:
> > > 
> > > The basic idea is that since in a constant expression we know the dynamic
> > > type (to detect invalid code etc.), the restriction that prohibits virtual
> > > calls is unnecessary.
> > >
> > > Handling virtual function calls turned out to be fairly easy (as 
> > > anticipated);
> > > I simply let the constexpr machinery figure out the dynamic type and then
> > > OBJ_TYPE_REF_TOKEN gives us the index into the virtual function table.  
> > > That
> > > way we get the function decl we're interested in, and 
> > > cxx_eval_call_expression
> > > takes it from there.
> > >
> > > But handling pointer-to-virtual-member-functions doesn't work like that.
> > > get_member_function_from_ptrfunc creates a COND_EXPR which looks like
> > > if (pf.__pfn & 1) // is it a virtual function?
> > >   // yes, find the pointer in the vtable
> > > else
> > >   // no, just return the pointer
> > > so ideally we want to evaluate the then-branch.  Eventually it'll 
> > > evaluate it
> > > to something like _ZTV2X2[2], but the vtable isn't constexpr so we'd end 
> > > up
> > > with "not a constant expression" error.
> > 
> > Then let's mark the vtable as constexpr, there's no reason for it not to be.

Done.  But then I had to add indexes to the vtable's ctor (because 
find_array_ctor_elt
expects it), which broke an assert in gimple_get_virt_method_for_vtable.  But I
don't need the array_ref hack anymore!

Also, I had to set DECL_DECLARED_CONSTEXPR_P after maybe_commonize_var,
otherwise we run into the sorry in that function with -fno-weak...

> Ok, unfortunately it wasn't as easy as merely marking it 
> DECL_DECLARED_CONSTEXPR_P
> in initialize_artificial_var because then I saw "used in its own initializer"
> error.  Which I don't know why, but now that I know you agree with this 
> direction
> I can dig deeper.
>  
> > > Since the vtable initializer is
> > > a compile-time constant, I thought we could make it work by a hack as the 
> > > one
> > > in cxx_eval_array_reference.  We'll then let cxx_eval_call_expression do 
> > > its
> > > job and everything is hunky-dory.
> > >
> > > Except when it isn't: I noticed that the presence of _vptr doesn't make 
> > > the
> > > class non-empty, and when cxx_eval_constant_expression saw a decl with an 
> > > empty
> > > class type, it just evaluated it to { }.  But such a class still had 
> > > gotten an
> > > initializer that looks like {.D.2082 = {._vptr.X2 = &_ZTV2X2 + 16}}.  So
> > > replacing it with { } will lose the proper initializer whereupon we fail.
> > > The check I've added to cxx_eval_constant_expression won't win any beauty
> > > contests but unfortunately EMPTY_CONSTRUCTOR_P doesn't work there.
> > 
> > Perhaps we should check !TYPE_POLYMORPHIC_P as well as
> > is_really_empty_class.  Perhaps there should be a predicate for that,
> > say, is_really_nearly_empty_class...

For now I've only added the !TYPE_POLYMORPHIC_P check, which works just fine.

Bootstrapped/regtested on x86_64-linux, ok for trunk?

2018-09-17  Marek Polacek  

P1064R0 - Allowing Virtual Function Calls in Constant Expressions
* call.c (build_over_call): Add FIXME.
* class.c (initialize_vtable): Mark the vtable as constexpr.
(build_vtbl_initializer): Build vtable's constructor with indexes.
* constexpr.c (cxx_eval_constant_expression): Don't ignore _vptr's
initializer.  Handle OBJ_TYPE_REF.
(potential_constant_expression_1): Handle OBJ_TYPE_REF.
* decl.c (grokdeclarator): Change error to pedwarn.  Only warn when
pedantic and not C++2a.

* gimple-fold.c (gimple_get_virt_method_for_vtable): Remove assert.

* g++.dg/cpp0x/constexpr-virtual5.C: Adjust dg-error.
* g++.dg/cpp2a/constexpr-virtual1.C: New test.
* g++.dg/cpp2a/constexpr-virtual2.C: New test.
* g++.dg/cpp2a/constexpr-virtual3.C: New test.
* g++.dg/cpp2a/constexpr-virtual4.C: New test.
* g++.dg/cpp2a/constexpr-virtual5.C: New test.
* g++.dg/cpp2a/constexpr-virtual6.C: New test.
* g++.dg/cpp2a/constexpr-virtual7.C: New test.
* g++.dg/cpp2a/constexpr-virtual8.C: New test.
* g++.dg/cpp2a/constexpr-virtual9.C: New test.
* g++.dg/diagnostic/virtual-constexpr.C: Skip for C++2a.  Use
-pedantic-errors.  Adjust dg-error.

diff --git gcc/cp/call.c gcc/cp/call.c
index 69503ca7920..6c70874af40 100644
--- gcc/cp/call.c
+++ gcc/cp/call.c
@@ -8401,7 +8401,8 @@ build_over_call (struct z_candidate *cand, int flags, 
tsubst_flags_t complain)
 
   if (DECL_VINDEX (fn) && (flags & LOOKUP_NONVIRTUAL) == 0
   /* Don't mess with virtual look

Re: [PATCH v2] libgcc: m68k: avoid TEXTRELs in shared library (PR 86224)

2018-09-17 Thread Sergei Trofimovich via gcc-patches
On Mon, 17 Sep 2018 15:29:08 -0600
Jeff Law  wrote:

> On 9/17/18 3:18 PM, Sergei Trofimovich wrote:
> > On Sat, 28 Jul 2018 20:42:02 +0100
> > "slyfox.inbox.ru via gcc-patches"  wrote:
> >   
> >> From: Sergei Trofimovich 
> >>
> >> Cc: Ian Lance Taylor 
> >> Cc: Jeff Law 
> >> Cc: Andreas Schwab 
> >> Signed-off-by: Sergei Trofimovich 
> >> ---
> >>  libgcc/config/m68k/lb1sf68.S | 3 +++
> >>  1 file changed, 3 insertions(+)
> >>
> >> diff --git a/libgcc/config/m68k/lb1sf68.S b/libgcc/config/m68k/lb1sf68.S
> >> index 325a7c17d9b..d5240d4aa55 100644
> >> --- a/libgcc/config/m68k/lb1sf68.S
> >> +++ b/libgcc/config/m68k/lb1sf68.S
> >> @@ -435,6 +435,7 @@ $_exception_handler:
> >>.text
> >>FUNC(__mulsi3)
> >>.globl  SYM (__mulsi3)
> >> +  .hidden SYM (__mulsi3)
> >>  SYM (__mulsi3):
> >>movew   sp@(4), d0  /* x0 -> d0 */
> >>muluw   sp@(10), d0 /* x0*y1 */
> >> @@ -458,6 +459,7 @@ SYM (__mulsi3):
> >>.text
> >>FUNC(__udivsi3)
> >>.globl  SYM (__udivsi3)
> >> +  .hidden SYM (__udivsi3)
> >>  SYM (__udivsi3):
> >>  #ifndef __mcoldfire__
> >>movel   d2, sp@-
> >> @@ -534,6 +536,7 @@ L2:subql   IMM (1),d4
> >>.text
> >>FUNC(__divsi3)
> >>.globl  SYM (__divsi3)
> >> +  .hidden SYM (__divsi3)
> >>  SYM (__divsi3):
> >>movel   d2, sp@-
> >>  
> >> -- 
> >> 2.18.0
> >>  
> > 
> > Patch ping. Not sure which is preferable v1 (as other targets do) or v2.  
> Hmm, I thought Andreas NAK'd V1.  And I think Rich's comments
> essentially NAK'd V2.

Aha. I'm trying to clarify the desired state then to try to tweak
patch properly:

Current:
  - static library:
* global public __divsi3
  - shared library: 
* global public __divsi3

Desired:
  - static library:
* global hidden __divsi3 [ABI change:public->hidden]
  - shared library: 
* global public __divsi3 [no change]
* global hidden __divsi3_internal [change:new symbol for internal 
references]

Does that sound reasonable?
Should all targets follow the same pattern? I think they don't today.

v1 was a step in desired direction but without user-visible ABI change.

-- 

  Sergei


pgppmyTjSoYAk.pgp
Description: Цифровая подпись OpenPGP


Re: [PATCH] rtlanal: Fix nonzero_bits for non-load paradoxical subregs (PR85925)

2018-09-17 Thread Segher Boessenkool
On Mon, Jun 04, 2018 at 10:57:05PM +0200, Eric Botcazou wrote:
> > In the PR we have insns:
> > 
> > Trying 23 -> 24:
> >23: r123:SI=zero_extend(r122:HI)
> >   REG_DEAD r122:HI
> >24: [r115:SI]=r123:SI
> >   REG_DEAD r123:SI
> > 
> > which should be combined to
> > 
> > (set (mem:SI (reg/f:SI 115 [ pretmp_19 ]) [1 *pretmp_19+0 S4 A32])
> > (and:SI (subreg:SI (reg:HI 122) 0)
> > (const_int 32767 [0x7fff])))
> > 
> > But nonzero_bits of reg:HI 122 is 0x7fff, and nonzero_bits1 thinks it
> > then also has that same nonzero_bits for the subreg.  This is not
> > correct: the bit outside of HImode are undefined.  load_extend_op
> > applies to loads from memory only, not anything else.  Which means the
> > whole AND is optimised away.
> 
> No, this is done on purpose for WORD_REGISTER_OPERATIONS targets and your 
> patch will pessimize them.  I'm going to have a look at the PR then.

Hi Eric,

Do you have any progress with this?


Segher


Re: [PATCH,rs6000] Add builtins for accessing the FPSCR

2018-09-17 Thread Carl Love
Segher:

This is an updated patch to add the following builtins:
__builtin_mffsl, __builtin_set_fpscr_rn, __builtin_set_fpscr_rn,
__builtin_mtfsb0, __builtin_mtfsb1.

I have addressed you comments with regards to the change log entries. 
I have also addressed the various comments about the code, function
names etc.  

I have also addressed your comment about the builtins not working in
32-bit mode.  The builtins __builtin_mffsl, __builtin_set_fpscr_rn,
__builtin_mtfsb0, __builtin_mtfsb1 are supported in 32-bit mode. 
Builtin __builtin_set_fpscr_drn is only supported in 64-bit mode.

Note, rs6000_mffsl, builtins __builtin_set_fpscr_rn,
__builtin_set_fpscr_drn use the ISA 3.0 instructions if compiling for
ISA 3.0 or beyond.  Otherwise, they use logical operations to emulate
the ISA 3.0 instructions.

The the tests for the __builtin_set_fpscr_drn builtin were separated
into separate files since they are only supported in 64-bit mode.


The patch has been tested on 

powerpc64le-unknown-linux-gnu (Power 8 LE  64-bit mode only) 
powerpc64-unknown-linux-gnu (Power 8 BE  32-bit and 64-bit modes) 
    powerpc64le-unknown-linux-gnu (Power 9 LE  64-bit mode only)

With no regressions.

Please let me know if the patch looks OK for trunk.

 Carl Love



gcc/ChangeLog:

2018-09-17  Carl Love  

* config/rs6000/rs6000-builtin.def (__builtin_mffsl): New.
(__builtin_mtfsb0): New.
(__builtin_mtfsb1): New.
( __builtin_set_fpscr_rn): New.
(__builtin_set_fpscr_drn): New.
* config/rs6000.c (rs6000_expand_mtfsb0_mtfsb1_builtin): Add.
(rs6000_expand_set_fpscr_rn_builtin): Add.
(rs6000_expand_set_fpscr_drn_builtin): Add.
(rs6000_expand_builtin): Add case statement entries for
RS6000_BUILTIN_MTFSB0, RS6000_BUILTIN_MTFSB1,
RS6000_BUILTIN_SET_FPSCR_RN, RS6000_BUILTIN_SET_FPSCR_DRN,
RS6000_BUILTIN_MFFSL.
(rs6000_init_builtins): Add ftype initialization and def_builtin
calls for __builtin_mffsl, __builtin_mtfsb0, __builtin_mtfsb1,
__builtin_set_fpscr_rn, __builtin_set_fpscr_drn.
* config/rs6000.md (rs6000_mtfsb0, rs6000_mtfsb1, rs6000_mffscrn,
rs6000_mffscdrn): Add define_insn.
(rs6000_set_fpscr_rn, rs6000_set_fpscr_drn): Add define_expand.
* doc/extend.texi: Add documentation for the builtins.

gcc/testsuite/ChangeLog:

2018-09-17  Carl Love  

* gcc.target/powerpc/test_mffsl-p9.c: New file.
* gcc.target/powerpc/test_fpscr_rn_builtin.c: New file.
* gcc.target/powerpc/test_fpscr_drn_builtin.c: New file.
* gcc.target/powerpc/test_fpscr_rn_builtin_error.c: New file.
* gcc.target/powerpc/test_fpscr_drn_builtin_error.c: New file.
---
 gcc/config/rs6000/rs6000-builtin.def   |  23 +++
 gcc/config/rs6000/rs6000.c | 148 
 gcc/config/rs6000/rs6000.md| 160 -
 gcc/doc/extend.texi|  36 +++-
 .../gcc.target/powerpc/test_fpscr_drn_builtin.c| 116 +
 .../powerpc/test_fpscr_drn_builtin_error.c |  17 ++
 .../gcc.target/powerpc/test_fpscr_rn_builtin.c | 190 +
 .../powerpc/test_fpscr_rn_builtin_error.c  |  22 +++
 gcc/testsuite/gcc.target/powerpc/test_mffsl.c  |  34 
 9 files changed, 744 insertions(+), 2 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/powerpc/test_fpscr_drn_builtin.c
 create mode 100644 
gcc/testsuite/gcc.target/powerpc/test_fpscr_drn_builtin_error.c
 create mode 100644 gcc/testsuite/gcc.target/powerpc/test_fpscr_rn_builtin.c
 create mode 100644 
gcc/testsuite/gcc.target/powerpc/test_fpscr_rn_builtin_error.c
 create mode 100644 gcc/testsuite/gcc.target/powerpc/test_mffsl.c

diff --git a/gcc/config/rs6000/rs6000-builtin.def 
b/gcc/config/rs6000/rs6000-builtin.def
index f799681..9e960eb 100644
--- a/gcc/config/rs6000/rs6000-builtin.def
+++ b/gcc/config/rs6000/rs6000-builtin.def
@@ -2486,11 +2486,34 @@ BU_SPECIAL_X (RS6000_BUILTIN_MFTB, "__builtin_ppc_mftb",
 BU_SPECIAL_X (RS6000_BUILTIN_MFFS, "__builtin_mffs",
  RS6000_BTM_ALWAYS, RS6000_BTC_MISC)
 
+BU_SPECIAL_X (RS6000_BUILTIN_MFFSL, "__builtin_mffsl",
+ RS6000_BTM_ALWAYS, RS6000_BTC_MISC)
+
 RS6000_BUILTIN_X (RS6000_BUILTIN_MTFSF, "__builtin_mtfsf",
  RS6000_BTM_ALWAYS,
  RS6000_BTC_MISC | RS6000_BTC_UNARY | RS6000_BTC_VOID,
  CODE_FOR_rs6000_mtfsf)
 
+RS6000_BUILTIN_X (RS6000_BUILTIN_MTFSB0, "__builtin_mtfsb0",
+ RS6000_BTM_ALWAYS,
+ RS6000_BTC_MISC | RS6000_BTC_UNARY,
+ CODE_FOR_rs6000_mtfsb0)
+
+RS6000_BUILTIN_X (RS6000_BUILTIN_MTFSB1, "__builtin_mtfsb1",
+ RS6000_BTM_ALWAYS,
+ RS6000_BTC_MISC | RS6000_BTC_UNARY,
+ CODE_FOR_rs6000_mtfsb1

Re: [PATCH v2] libgcc: m68k: avoid TEXTRELs in shared library (PR 86224)

2018-09-17 Thread Jeff Law
On 9/17/18 3:50 PM, Sergei Trofimovich wrote:
> On Mon, 17 Sep 2018 15:29:08 -0600
> Jeff Law  wrote:
> 
>> On 9/17/18 3:18 PM, Sergei Trofimovich wrote:
>>> On Sat, 28 Jul 2018 20:42:02 +0100
>>> "slyfox.inbox.ru via gcc-patches"  wrote:
>>>   
 From: Sergei Trofimovich 

 Cc: Ian Lance Taylor 
 Cc: Jeff Law 
 Cc: Andreas Schwab 
 Signed-off-by: Sergei Trofimovich 
 ---
  libgcc/config/m68k/lb1sf68.S | 3 +++
  1 file changed, 3 insertions(+)

 diff --git a/libgcc/config/m68k/lb1sf68.S b/libgcc/config/m68k/lb1sf68.S
 index 325a7c17d9b..d5240d4aa55 100644
 --- a/libgcc/config/m68k/lb1sf68.S
 +++ b/libgcc/config/m68k/lb1sf68.S
 @@ -435,6 +435,7 @@ $_exception_handler:
.text
FUNC(__mulsi3)
.globl  SYM (__mulsi3)
 +  .hidden SYM (__mulsi3)
  SYM (__mulsi3):
movew   sp@(4), d0  /* x0 -> d0 */
muluw   sp@(10), d0 /* x0*y1 */
 @@ -458,6 +459,7 @@ SYM (__mulsi3):
.text
FUNC(__udivsi3)
.globl  SYM (__udivsi3)
 +  .hidden SYM (__udivsi3)
  SYM (__udivsi3):
  #ifndef __mcoldfire__
movel   d2, sp@-
 @@ -534,6 +536,7 @@ L2:subql   IMM (1),d4
.text
FUNC(__divsi3)
.globl  SYM (__divsi3)
 +  .hidden SYM (__divsi3)
  SYM (__divsi3):
movel   d2, sp@-
  
 -- 
 2.18.0
  
>>>
>>> Patch ping. Not sure which is preferable v1 (as other targets do) or v2.  
>> Hmm, I thought Andreas NAK'd V1.  And I think Rich's comments
>> essentially NAK'd V2.
> 
> Aha. I'm trying to clarify the desired state then to try to tweak
> patch properly:
> 
> Current:
>   - static library:
> * global public __divsi3
>   - shared library: 
> * global public __divsi3
Right.  And presumably the problem here is with public visibility we
have to support the possibility to interposition which can be costly.

> 
> Desired:
>   - static library:
> * global hidden __divsi3 [ABI change:public->hidden]
Right.  But since everything should be linking against libgcc, each DSO
or executable should end up carrying whatever libgcc functions it needs
and we can hide them from the dynamic linker becuase we don't need to
support interposition and/or function pointer testing on them.

>   - shared library: 
> * global public __divsi3 [no change]
> * global hidden __divsi3_internal [change:new symbol for internal 
> references]
> 
> Does that sound reasonable?
> Should all targets follow the same pattern? I think they don't today.
And in the shared case we need to leave the original symbol alone and
provide an alias with hidden visibility IIRC.

I might have mis-understood Rich Felker's objection.

Jeff



Re: [libgcc] Use v2 map syntax in libgcc-unwind.map if Solaris ld supports it

2018-09-17 Thread Jeff Law
On 9/16/18 5:28 AM, Rainer Orth wrote:
> Currently, the libgcc-unwind.map file generated for use with Solaris ld
> 
>   http://gcc.gnu.org/ml/gcc-patches/2014-01/msg01088.html
> 
> uses the v1 linker map file syntax because both that's supported
> everywhere.  However, with ld -z guidance, newer versions of ld warn
> about this:
> 
> ld: guidance: version 2 mapfile syntax recommended: ./libgcc-unwind.map
> 
> Since it is easy to detect if ld supports v2 map syntax (introduced in
> Solaris 11 and later backported to some Solaris 10 patches) and the
> mapfile is generated at build time, the following patch performs this
> check and generates a v2 mapfile if ld supports it.
> 
> While testing the patch, I found that the arg to AC_TRY_COMMAND needed
> quoting to avoid the embedded commas in -Wl,-M,... ended the command.
> Shouldn't the other uses of AC_TRY_COMMAND receive the same quoting for
> safety and consistency?
> 
> Bootstrapped on i386-pc-solaris2.10 (with older v1-only ld) and
> i386-pc-solaris2.11 without regressions.
> 
> Ok for mainline?
OK with a suitable ChangeLog.


Jeff


Re: [PATCH 04/25] SPECIAL_REGNO_P

2018-09-17 Thread Jeff Law
On 9/13/18 8:08 AM, Andrew Stubbs wrote:
> On 13/09/18 11:01, Andrew Stubbs wrote:
>> The assert is caused because the def-use chains indicate that SCC
>> conflicts with itself. I suppose the question is why is it doing that,
>> but it's probably do do with that being a special register that gets
>> used in split2 (particularly by the addptrdi3 pattern). Although,
>> those patterns are careful to save SCC to one side and then restore it
>> again after, so I'd have thought the DF analysis would work out?
> 
> I think I may have a theory on this one now
> 
> The addptrdi3 pattern must use two 32-bit adds with a carry in SCC, but
> addptr patterns are not allowed to clobber SCC, so the splitter
> carefully saves and restores the old value.
> 
> This is correct at runtime, and looks correct in RTL dumps, but it means
> that there's still a single rtx REG instance holding the live SCC
> register, and its still live before and after the new add instruction.
> 
> Would I be right in thinking that the dataflow analysis doesn't like this?
> 
> I think I have a work-around (by using different instructions), but is
> there a correct way to do this if there weren't an alternative?
I would expect dataflow to treat the SCC save as a use of the SCC
register.  That's likely to cause it to be live on all paths from the
entry to the SCC save.

Jeff


Re: [GCC][PATCH][Aarch64] Added pattern to match zero extended bfxil

2018-09-17 Thread Christophe Lyon
On Fri, 14 Sep 2018 at 12:04, Sam Tebbs  wrote:
>
>
>
> On 08/28/2018 11:54 PM, James Greenhalgh wrote:
>
> 
> >
> > OK once the other one is approved, with the obvious rebase over the renamed
> > function.
> >
> > James
>
> Here is the rebased patch. Still OK for me to commit to trunk now that
> the other patch has been committed?
>
> Sam
>
> >
> >> gcc/
> >> 2018-07-31  Sam Tebbs  
> >>
> >>   PR target/85628
> >>   * config/aarch64/aarch64.md (*aarch64_bfxilsi_uxtw): Define.
> >>
> >> gcc/testsuite
> >> 2018-07-31  Sam Tebbs  
> >>
> >>   PR target/85628
> >>   * gcc.target/aarch64/combine_bfxil.c
> >> (combine_zero_extended_int, foo6):
> >>   New functions.
>

Hi Sam,

This patch causes a regression when using -mabi=ilp32:
FAIL: gcc.target/aarch64/combine_bfxil.c scan-assembler-not uxtw\\t

How much do we care about ilp32?

Christophe


Re: [PATCH] Cleanup strcpy/stpcpy no nul warning code

2018-09-17 Thread Jeff Law
On 9/17/18 1:18 PM, Bernd Edlinger wrote:
> 
> I suppose the expr.c change will likely work alone, but
> it was designed to replace the stuff here.
> 
> So I will try to split the expr.c part out, and post it
> tomorrow.
It does work alone.  I've already verified that here :-)  BUt will
probably call it a night without pushing it to the trunk.

jeff


Re: [GCC][PATCH][Aarch64] Added pattern to match zero extended bfxil

2018-09-17 Thread Ramana Radhakrishnan
On Mon, 17 Sep 2018, 23:56 Christophe Lyon, 
wrote:

> On Fri, 14 Sep 2018 at 12:04, Sam Tebbs  wrote:
> >
> >
> >
> > On 08/28/2018 11:54 PM, James Greenhalgh wrote:
> >
> > 
> > >
> > > OK once the other one is approved, with the obvious rebase over the
> renamed
> > > function.
> > >
> > > James
> >
> > Here is the rebased patch. Still OK for me to commit to trunk now that
> > the other patch has been committed?
> >
> > Sam
> >
> > >
> > >> gcc/
> > >> 2018-07-31  Sam Tebbs  
> > >>
> > >>   PR target/85628
> > >>   * config/aarch64/aarch64.md (*aarch64_bfxilsi_uxtw): Define.
> > >>
> > >> gcc/testsuite
> > >> 2018-07-31  Sam Tebbs  
> > >>
> > >>   PR target/85628
> > >>   * gcc.target/aarch64/combine_bfxil.c
> > >> (combine_zero_extended_int, foo6):
> > >>   New functions.
> >
>
> Hi Sam,
>
> This patch causes a regression when using -mabi=ilp32:
> FAIL: gcc.target/aarch64/combine_bfxil.c scan-assembler-not uxtw\\t
>
> How much do we care about ilp32?
>


Lets keep the testsuite clean please.


Ramana

>
> Christophe
>


Re: [PATCH] look harder for MEM_REF operand equality to avoid -Wstringop-truncation (PR 84561)

2018-09-17 Thread Jeff Law
On 9/14/18 4:11 PM, Martin Sebor wrote:
> On 09/14/2018 03:35 PM, Jeff Law wrote:
>> On 9/12/18 11:46 AM, Martin Sebor wrote:
>>> On 08/31/2018 04:07 AM, Richard Biener wrote:
 On Thu, Aug 30, 2018 at 7:39 PM Martin Sebor  wrote:
>
> On 08/30/2018 11:22 AM, Richard Biener wrote:
>> On August 30, 2018 6:54:21 PM GMT+02:00, Martin Sebor
>>  wrote:
>>> On 08/30/2018 02:35 AM, Richard Biener wrote:
 On Thu, Aug 30, 2018 at 2:12 AM Martin Sebor 
>>> wrote:
>
> The attached patch adds code to work harder to determine whether
> the destination of an assignment involving MEM_REF is the same
> as the destination of a prior strncpy call.  The included test
> case demonstrates when this situation comes up.  During ccp,
> dstbase and lhsbase returned by get_addr_base_and_unit_offset()
> end up looking like this:

 "During CCP" means exactly when?  The CCP lattice tracks copies
 so CCP should already know that _1 == _8.  I suppose during
 substitute_and_fold then?  But that replaces uses before folding
 the stmt.
>>>
>>> Yes, when ccp_finalize() performs the final substitution during
>>> substitute_and_fold().
>>
>> But then you shouldn't need the loop but at most look at the pointer
>> SSA Def to get at the non-invariant ADDR_EXPR.
>
> I don't follow.   Are you suggesting to compare
> SSA_NAME_DEF_STMT (dstbase) to SSA_NAME_DEF_STMT (lhsbase) for
> equality?  They're not equal.

 No.

> The first loop iterates once and retrieves
>
>    1.  _8 = &pb_3(D)->a;
>
> The second loop iterates three times and retrieves:
>
>    1.  _1 = _9
>    2.  _9 = _8
>    3.  _8 = &pb_3(D)->a;
>
> How do I get from _1 to &pb_3(D)->a without iterating?  Or are
> you saying to still iterate but compare the SSA_NAME_DEF_STMT?

 I say you should retrieve _8 = &pb_3(D)->a immediately since the
 copies should be
 propagated out at this stage.
>>>
>>> The warning is issued as the strncpy call is being folded (during
>>> the dom walk in substitute_and_fold_engine::substitute_and_fold)
>>> but before the subsequent statements have been folded (during
>>> the subsequent loop to eliminate statements).  So at the point
>>> of the strncpy folding the three assignments above are still
>>> there.
>>>
>>> I can't think of a good way to solve this problem that's not
>>> overly intrusive.  Unless you have some suggestions for how
>>> to deal with it, is the patch okay as is?
>> In what pass do you see the the naked copies?  In general those should
>> have been propagated away.
> 
> As I said above, this happens during the dom walk in the ccp
> pass:
My bad.  Sigh. CCP doesn't track copies, just constants, so there's not
going to be any data structure you can exploit.  And I don't think
there's a value number you can use to determine the two objects are the
same.

Hmm, let's back up a bit, what is does the relevant part of the IL look
like before CCP?  Is the real problem here that we have unpropagated
copies lying around in the IL?  Hmm, more likely the IL looksl ike:

   _8 = &pb_3(D)->a;
   _9 = _8;
   _1 = _9;
   strncpy (MEM_REF (&pb_3(D)->a), ...);
   MEM[(struct S *)_1].a[n_7] = 0;

If we were to propagate the copies out we'd at best have:

   _8 = &pb_3(D)->a;
   strncpy (MEM_REF (&pb_3(D)->a), ...);
   MEM[(struct S *)_8].a[n_7] = 0;


Is that in a form you can handle?  Or would we also need to forward
propagate the address computation into the use of _8?


jeff



Re: [PATCH] look harder for MEM_REF operand equality to avoid -Wstringop-truncation (PR 84561)

2018-09-17 Thread Jeff Law
On 9/17/18 3:15 PM, Martin Sebor wrote:
> On 09/17/2018 11:35 AM, Richard Biener wrote:
>> On September 17, 2018 7:24:16 PM GMT+02:00, Jeff Law 
>> wrote:
>>> On 9/15/18 2:14 AM, Bernd Edlinger wrote:
 On 9/14/18, Martin Sebor wrote:
> As I said above, this happens during the dom walk in the ccp
> pass:
>
>   substitute_and_fold_dom_walker walker (CDI_DOMINATORS, this);
>   walker.walk (ENTRY_BLOCK_PTR_FOR_FN (cfun));
>
> The warning is issued during the walker.walk() call as
> strncpy is being folded into memcpy.  The prior assignments are
> only propagated later, when the next statement after the strncpy
> call is reached.  It happens in
> substitute_and_fold_dom_walker::before_dom_children(). So during
> the strncpy folding we see the next statement as:
>
>   MEM[(struct S *)_1].a[n_7] = 0;
>
> After the strncpy call is transformed to memcpy, the assignment
> above is transformed to
>
>   MEM[(struct S *)_8].a[3] = 0;
>
>
>>     If they're only discovered as copies within the pass where
>>> you're trying
>>     to issue the diagnostic, then you'd want to see if the pass has
>>> any
>>     internal structures that tell you about equivalences.
>
>
> I don't know if this is possible.  I don't see any APIs in
> tree-ssa-propagate.h that would let me query the internal data
> somehow to find out during folding (when the warning is issued).


 Well,

 if I see this right, the CCP is doing tree transformations
 while from the folding of strncpy the predicate
>>> maybe_diag_stxncpy_trunc
 is called, and sees inconsistent information, in the tree,
 and therefore it issues a warning.

 I understand that walking the references is fragile at least
 in this state.

 But why not just prevent warnings when this is called from CCP?


 Like this?

 Bootstrapped and reg-tested on x86_64-pc-linux-gnu.
 Is it OK for trunk?
>>> No.  That's just hacking around the real problem.
>>
>> The real problem is emitting diagnostics from folding code.
> 
> Strncpy is a particularly dangerous function that's often
> misunderstood and misused.  IMO, it would be a worthwhile
> tradeoff to move the strncpy to memcpy transformation to
> the strlen pass where these bugs could be detected more
> reliably, and with fewer false positives.  I would not
> expect it to have a noticeable impact on code efficiency.
> 
> I'd be happy to modify the patch to do that if you find it
> acceptable.
That natural question is what is the immediate (ie testsuite) fallout?

Jeff


[PATCHv3][PR 81376] Remove unnecessary float casts in comparisons

2018-09-17 Thread Yuri Gribov
Hi all,

This is a second iteration of patch which gets rid of float casts in
comparisons when all values of casted integral type are exactly
representable by the float type
(https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81376). The new version
addresses issue spotted by Richard in previous version
https://gcc.gnu.org/ml/gcc-patches/2018-02/msg01119.html

Bootstrapped and regtested on x64_64.

-Y


0001-Add-pattern-to-remove-useless-float-casts-in-compari.patch
Description: Binary data


[PATCH 01/14] Add D front-end (DMD) language implementation and license.

2018-09-17 Thread Iain Buclaw
This patch adds the DMD front-end proper and license (Boost) files,
comprised of a lexer, parser, and semantic analyzer.

ftp://ftp.gdcproject.org/patches/v4/01-v4-d-frontend-dmd.patch

---
 gcc/d/dmd/access.c   |  670 +++
 gcc/d/dmd/aggregate.h|  335 ++
 gcc/d/dmd/aliasthis.c|  169 +
 gcc/d/dmd/aliasthis.h|   30 +
 gcc/d/dmd/apply.c|  150 +
 gcc/d/dmd/argtypes.c |  504 ++
 gcc/d/dmd/arrayop.c  |  638 +++
 gcc/d/dmd/arraytypes.h   |   62 +
 gcc/d/dmd/attrib.c   | 1604 ++
 gcc/d/dmd/attrib.h   |  275 +
 gcc/d/dmd/blockexit.c|  502 ++
 gcc/d/dmd/boostlicense.txt   |   23 +
 gcc/d/dmd/canthrow.c |  317 ++
 gcc/d/dmd/checkedint.c   |  564 ++
 gcc/d/dmd/checkedint.h   |   24 +
 gcc/d/dmd/clone.c| 1233 +
 gcc/d/dmd/compiler.h |   19 +
 gcc/d/dmd/complex_t.h|   71 +
 gcc/d/dmd/cond.c |  376 ++
 gcc/d/dmd/cond.h |  107 +
 gcc/d/dmd/constfold.c| 1932 +++
 gcc/d/dmd/cppmangle.c| 1109 
 gcc/d/dmd/ctfe.h |  267 +
 gcc/d/dmd/ctfeexpr.c | 2109 
 gcc/d/dmd/dcast.c| 3842 ++
 gcc/d/dmd/dclass.c   | 1958 +++
 gcc/d/dmd/declaration.c  | 2572 +
 gcc/d/dmd/declaration.h  |  899 
 gcc/d/dmd/delegatize.c   |  210 +
 gcc/d/dmd/denum.c|  757 +++
 gcc/d/dmd/dimport.c  |  501 ++
 gcc/d/dmd/dinterpret.c   | 7172 +
 gcc/d/dmd/dmacro.c   |  468 ++
 gcc/d/dmd/dmangle.c  |  897 
 gcc/d/dmd/dmodule.c  | 1444 +
 gcc/d/dmd/doc.c  | 2804 ++
 gcc/d/dmd/doc.h  |   14 +
 gcc/d/dmd/dscope.c   |  764 +++
 gcc/d/dmd/dstruct.c  | 1470 ++
 gcc/d/dmd/dsymbol.c  | 1803 +++
 gcc/d/dmd/dsymbol.h  |  406 ++
 gcc/d/dmd/dtemplate.c| 9009 +++
 gcc/d/dmd/dversion.c |  202 +
 gcc/d/dmd/entity.c   | 2392 +
 gcc/d/dmd/enum.h |   95 +
 gcc/d/dmd/errors.h   |   50 +
 gcc/d/dmd/escape.c   | 1234 +
 gcc/d/dmd/expression.c   | 6983 
 gcc/d/dmd/expression.h   | 1559 ++
 gcc/d/dmd/expressionsem.c| 8929 +++
 gcc/d/dmd/func.c | 5739 
 gcc/d/dmd/globals.h  |  315 ++
 gcc/d/dmd/hdrgen.c   | 3419 
 gcc/d/dmd/hdrgen.h   |   52 +
 gcc/d/dmd/iasm.c |   44 +
 gcc/d/dmd/iasmgcc.c  |  358 ++
 gcc/d/dmd/identifier.c   |  190 +
 gcc/d/dmd/identifier.h   |   49 +
 gcc/d/dmd/idgen.c|  503 ++
 gcc/d/dmd/impcnvgen.c|  599 +++
 gcc/d/dmd/imphint.c  |   72 +
 gcc/d/dmd/import.h   |   60 +
 gcc/d/dmd/init.c |  286 +
 gcc/d/dmd/init.h |  119 +
 gcc/d/dmd/initsem.c  |  920 
 gcc/d/dmd/intrange.c | 1109 
 gcc/d/dmd/intrange.h |  152 +
 gcc/d/dmd/json.c |  890 
 gcc/d/dmd/json.h |   17 +
 gcc/d/dmd/lexer.c| 2422 +
 gcc/d/dmd/lexer.h|   75 +
 gcc/d/dmd/macro.h|   45 +
 gcc/d/dmd/mangle.h   |   33 +
 gcc/d/dmd/mars.h |   95 +
 gcc/d/dmd/module.h   |  179 +
 gcc/d/dmd/mtype.c| 9606 ++
 gcc/d/dmd/mtype.h|  934 
 gcc/d/dmd/nogc.c |  241 +
 gcc/d/dmd/nspace.c   |  255 +
 gcc/d/dmd/nspace.h   |   38 +
 gcc/d/dmd/objc.c |   84 +
 gcc/d/dmd/objc.h |   53 +
 gcc/d/dmd/opover.c   | 1966 +++
 gcc/d/dmd/optimize.c | 1274 +
 gcc/d/dmd/parse.c| 8102 
 gcc/d/dmd/parse.h|  188 +
 gcc/d/dmd/readme.txt |   13 +
 gcc/d/dmd/root/aav.c |  192 +
 gcc/d/dmd/root/aav.h |   18 +
 gcc/d/dmd/root/array.h   |  235 +
 gcc/d/dmd/root/ctfloat.h |   47 +
 gcc/d/dmd/root/dcompat.h |   18 +
 gcc/d/dmd/root/file.c|  265 +
 gcc/d/dmd/root/file.h|   54 +
 gcc/d/dmd/root/filename.c|  679 +++
 gcc/d/dmd/root/filename.h|   51 +
 gcc/d/dmd/root/hash.h|   75 +
 gcc/d/dmd/root/object.h  |   60 +
 gcc/d/dmd/root/outbuffer.c   |  401 ++
 gcc/d/dmd/root/outbuffer.h   |   77 +
 gcc/d/dmd/root/port.h|   43 +
 gcc/d/dmd/root/rmem.c|  162 +
 gcc/d/dmd/root/rmem.h|   35 +
 gcc/d/dmd/root/root.h|   19 +
 gcc/d/dmd/root/rootobject.c  |   49 +
 gcc/d/dmd/root/speller.c |  294 ++
 gcc/d/dmd/root/speller.h |   14 +
 gcc/d/dmd/root/stringtable.c |  200 +
 gcc/d/dmd/root/stringtable.h |   57 +
 gcc/d/dmd/safe.c |  168 +
 gcc/d/dmd/sapply.c   |  156 +
 gcc/d/dmd/scope.h|  158 +
 gcc/d/dmd/sideeffect.c   |  439 ++
 gcc/d/dmd/statement.c| 1669 ++
 gcc/

  1   2   >