date:20171102

On Fri, Oct 27, 2017 at 02:27:50PM +0100, Richard Sandiford wrote:
> Later patches turn the number of vector units into a poly_int.
> We deliberately don't support applying GEN_INT to those (except
> in target code that doesn't disguish between poly_ints and normal
> constants); gen_int_mode needs to be used instead.
> 
> This patch therefore replaces instances of:
> 
>   GEN_INT (ENDIAN_LANE_N (builtin_mode, INTVAL (op[opc])))
> 
> with uses of a new endian_lane_rtx function.

OK.

Reviewed-by:  James Greenhalgh 

Thanks,
James

> 
> 
> 2017-10-26  Richard Sandiford  
>   Alan Hayward  
>   David Sherwood  
> 
> gcc/
>   * config/aarch64/aarch64-protos.h (aarch64_endian_lane_rtx): Declare.
>   * config/aarch64/aarch64.c (aarch64_endian_lane_rtx): New function.
>   * config/aarch64/aarch64.h (ENDIAN_LANE_N): Take the number
>   of units rather than the mode.
>   * config/aarch64/iterators.md (nunits): New mode attribute.
>   * config/aarch64/aarch64-builtins.c (aarch64_simd_expand_args):
>   Use aarch64_endian_lane_rtx instead of GEN_INT (ENDIAN_LANE_N ...).
>   * config/aarch64/aarch64-simd.md (aarch64_dup_lane)
>   (aarch64_dup_lane_, *aarch64_mul3_elt)
>   (*aarch64_mul3_elt_): Likewise.
>   (*aarch64_mul3_elt_to_64v2df, *aarch64_mla_elt): Likewise.
>   (*aarch64_mla_elt_, *aarch64_mls_elt)
>   (*aarch64_mls_elt_, *aarch64_fma4_elt)
>   (*aarch64_fma4_elt_):: Likewise.
>   (*aarch64_fma4_elt_to_64v2df, *aarch64_fnma4_elt): Likewise.
>   (*aarch64_fnma4_elt_): Likewise.
>   (*aarch64_fnma4_elt_to_64v2df, reduc_plus_scal_): Likewise.
>   (reduc_plus_scal_v4sf, reduc__scal_): Likewise.
>   (reduc__scal_): Likewise.
>   (*aarch64_get_lane_extend): Likewise.
>   (*aarch64_get_lane_zero_extendsi): Likewise.
>   (aarch64_get_lane, *aarch64_mulx_elt_)
>   (*aarch64_mulx_elt, *aarch64_vgetfmulx): Likewise.
>   (aarch64_sqdmulh_lane, aarch64_sqdmulh_laneq)
>   (aarch64_sqrdmlh_lane): Likewise.
>   (aarch64_sqrdmlh_laneq): Likewise.
>   (aarch64_sqdmll_lane): Likewise.
>   (aarch64_sqdmll_laneq): Likewise.
>   (aarch64_sqdmll2_lane_internal): Likewise.
>   (aarch64_sqdmll2_laneq_internal): Likewise.
>   (aarch64_sqdmull_lane, aarch64_sqdmull_laneq): Likewise.
>   (aarch64_sqdmull2_lane_internal): Likewise.
>   (aarch64_sqdmull2_laneq_internal): Likewise.
>   (aarch64_vec_load_lanesoi_lane): Likewise.
>   (aarch64_vec_store_lanesoi_lane): Likewise.
>   (aarch64_vec_load_lanesci_lane): Likewise.
>   (aarch64_vec_store_lanesci_lane): Likewise.
>   (aarch64_vec_load_lanesxi_lane): Likewise.
>   (aarch64_vec_store_lanesxi_lane): Likewise.
>   (aarch64_simd_vec_set): Update use of ENDIAN_LANE_N.
>   (aarch64_simd_vec_setv2di): Likewise.
> 
> Index: gcc/config/aarch64/aarch64-protos.h
> ===
> --- gcc/config/aarch64/aarch64-protos.h   2017-10-27 14:11:56.993658452 
> +0100
> +++ gcc/config/aarch64/aarch64-protos.h   2017-10-27 14:12:00.601693018 
> +0100
> @@ -437,6 +437,7 @@ void aarch64_simd_emit_reg_reg_move (rtx
>  rtx aarch64_simd_expand_builtin (int, tree, rtx);
>  
>  void aarch64_simd_lane_bounds (rtx, HOST_WIDE_INT, HOST_WIDE_INT, 
> const_tree);
> +rtx aarch64_endian_lane_rtx (machine_mode, unsigned int);
>  
>  void aarch64_split_128bit_move (rtx, rtx);
>  
> Index: gcc/config/aarch64/aarch64.c
> ===
> --- gcc/config/aarch64/aarch64.c  2017-10-27 14:11:56.995515870 +0100
> +++ gcc/config/aarch64/aarch64.c  2017-10-27 14:12:00.603550436 +0100
> @@ -12083,6 +12083,15 @@ aarch64_simd_lane_bounds (rtx operand, H
>}
>  }
>  
> +/* Peform endian correction on lane number N, which indexes a vector
> +   of mode MODE, and return the result as an SImode rtx.  */
> +
> +rtx
> +aarch64_endian_lane_rtx (machine_mode mode, unsigned int n)
> +{
> +  return gen_int_mode (ENDIAN_LANE_N (GET_MODE_NUNITS (mode), n), SImode);
> +}
> +
>  /* Return TRUE if OP is a valid vector addressing mode.  */
>  bool
>  aarch64_simd_mem_operand_p (rtx op)
> Index: gcc/config/aarch64/aarch64.h
> ===
> --- gcc/config/aarch64/aarch64.h  2017-10-27 14:05:38.132936808 +0100
> +++ gcc/config/aarch64/aarch64.h  2017-10-27 14:12:00.603550436 +0100
> @@ -910,8 +910,8 @@ #define AARCH64_VALID_SIMD_QREG_MODE(MOD
> || (MODE) == V4SFmode || (MODE) == V8HFmode || (MODE) == V2DImode \
> || (MODE) == V2DFmode)
>  
> -#define ENDIAN_LANE_N(mode, n)  \
> -  (BYTES_BIG_ENDIAN ? GET_MODE_NUNITS (mode) - 1 - n : n)
> +#define ENDIAN_LANE_N(NUNITS, N) \
> +  (BYTES_BIG_ENDIAN ? NUNITS - 1 - N : N)
>  
>  /* Support for a configure-time default CPU, etc.  We currently support
> --with-arch and --with-cpu.  Both are ignored if either is specified
> Index: gcc/co

Re: [07/nn] [AArch64] Pass number of units to aarch64_reverse_mask

On Fri, Oct 27, 2017 at 02:28:27PM +0100, Richard Sandiford wrote:
> This patch passes the number of units to aarch64_reverse_mask,
> which avoids a to_constant () once GET_MODE_NUNITS is variable.

OK

Reviewed-by: James Greenhalgh 

Thanks,
James

> 
> 
> 2017-10-26  Richard Sandiford  
>   Alan Hayward  
>   David Sherwood  
> 
> gcc/
>   * config/aarch64/aarch64-protos.h (aarch64_reverse_mask): Take
>   the number of units too.
>   * config/aarch64/aarch64.c (aarch64_reverse_mask): Likewise.
>   * config/aarch64/aarch64-simd.md (vec_load_lanesoi)
>   (vec_store_lanesoi, vec_load_lanesci)
>   (vec_store_lanesci, vec_load_lanesxi)
>   (vec_store_lanesxi): Update accordingly.
> 
> Index: gcc/config/aarch64/aarch64-protos.h
> ===
> --- gcc/config/aarch64/aarch64-protos.h   2017-10-27 14:12:00.601693018 
> +0100
> +++ gcc/config/aarch64/aarch64-protos.h   2017-10-27 14:12:04.192082112 
> +0100
> @@ -365,7 +365,7 @@ bool aarch64_mask_and_shift_for_ubfiz_p
>  bool aarch64_zero_extend_const_eq (machine_mode, rtx, machine_mode, rtx);
>  bool aarch64_move_imm (HOST_WIDE_INT, machine_mode);
>  bool aarch64_mov_operand_p (rtx, machine_mode);
> -rtx aarch64_reverse_mask (machine_mode);
> +rtx aarch64_reverse_mask (machine_mode, unsigned int);
>  bool aarch64_offset_7bit_signed_scaled_p (machine_mode, HOST_WIDE_INT);
>  char *aarch64_output_scalar_simd_mov_immediate (rtx, scalar_int_mode);
>  char *aarch64_output_simd_mov_immediate (rtx, unsigned,
> Index: gcc/config/aarch64/aarch64.c
> ===
> --- gcc/config/aarch64/aarch64.c  2017-10-27 14:12:00.603550436 +0100
> +++ gcc/config/aarch64/aarch64.c  2017-10-27 14:12:04.193939530 +0100
> @@ -13945,16 +13945,18 @@ aarch64_vectorize_vec_perm_const_ok (mac
>return ret;
>  }
>  
> +/* Generate a byte permute mask for a register of mode MODE,
> +   which has NUNITS units.  */
> +
>  rtx
> -aarch64_reverse_mask (machine_mode mode)
> +aarch64_reverse_mask (machine_mode mode, unsigned int nunits)
>  {
>/* We have to reverse each vector because we dont have
>   a permuted load that can reverse-load according to ABI rules.  */
>rtx mask;
>rtvec v = rtvec_alloc (16);
> -  int i, j;
> -  int nunits = GET_MODE_NUNITS (mode);
> -  int usize = GET_MODE_UNIT_SIZE (mode);
> +  unsigned int i, j;
> +  unsigned int usize = GET_MODE_UNIT_SIZE (mode);
>  
>gcc_assert (BYTES_BIG_ENDIAN);
>gcc_assert (AARCH64_VALID_SIMD_QREG_MODE (mode));
> Index: gcc/config/aarch64/aarch64-simd.md
> ===
> --- gcc/config/aarch64/aarch64-simd.md2017-10-27 14:12:00.602621727 
> +0100
> +++ gcc/config/aarch64/aarch64-simd.md2017-10-27 14:12:04.193010821 
> +0100
> @@ -4632,7 +4632,7 @@ (define_expand "vec_load_lanesoi"
>if (BYTES_BIG_ENDIAN)
>  {
>rtx tmp = gen_reg_rtx (OImode);
> -  rtx mask = aarch64_reverse_mask (mode);
> +  rtx mask = aarch64_reverse_mask (mode, );
>emit_insn (gen_aarch64_simd_ld2 (tmp, operands[1]));
>emit_insn (gen_aarch64_rev_reglistoi (operands[0], tmp, mask));
>  }
> @@ -4676,7 +4676,7 @@ (define_expand "vec_store_lanesoi"
>if (BYTES_BIG_ENDIAN)
>  {
>rtx tmp = gen_reg_rtx (OImode);
> -  rtx mask = aarch64_reverse_mask (mode);
> +  rtx mask = aarch64_reverse_mask (mode, );
>emit_insn (gen_aarch64_rev_reglistoi (tmp, operands[1], mask));
>emit_insn (gen_aarch64_simd_st2 (operands[0], tmp));
>  }
> @@ -4730,7 +4730,7 @@ (define_expand "vec_load_lanesci"
>if (BYTES_BIG_ENDIAN)
>  {
>rtx tmp = gen_reg_rtx (CImode);
> -  rtx mask = aarch64_reverse_mask (mode);
> +  rtx mask = aarch64_reverse_mask (mode, );
>emit_insn (gen_aarch64_simd_ld3 (tmp, operands[1]));
>emit_insn (gen_aarch64_rev_reglistci (operands[0], tmp, mask));
>  }
> @@ -4774,7 +4774,7 @@ (define_expand "vec_store_lanesci"
>if (BYTES_BIG_ENDIAN)
>  {
>rtx tmp = gen_reg_rtx (CImode);
> -  rtx mask = aarch64_reverse_mask (mode);
> +  rtx mask = aarch64_reverse_mask (mode, );
>emit_insn (gen_aarch64_rev_reglistci (tmp, operands[1], mask));
>emit_insn (gen_aarch64_simd_st3 (operands[0], tmp));
>  }
> @@ -4828,7 +4828,7 @@ (define_expand "vec_load_lanesxi"
>if (BYTES_BIG_ENDIAN)
>  {
>rtx tmp = gen_reg_rtx (XImode);
> -  rtx mask = aarch64_reverse_mask (mode);
> +  rtx mask = aarch64_reverse_mask (mode, );
>emit_insn (gen_aarch64_simd_ld4 (tmp, operands[1]));
>emit_insn (gen_aarch64_rev_reglistxi (operands[0], tmp, mask));
>  }
> @@ -4872,7 +4872,7 @@ (define_expand "vec_store_lanesxi"
>if (BYTES_BIG_ENDIAN)
>  {
>rtx tmp = gen_reg_rtx (XImode);
> -  rtx mask = aarch64_reverse_mask (mode);
> +  rtx mask

Re: [08/nn] [AArch64] Pass number of units to aarch64_simd_vect_par_cnst_half

On Fri, Oct 27, 2017 at 02:28:57PM +0100, Richard Sandiford wrote:
> This patch passes the number of units to aarch64_simd_vect_par_cnst_half,
> which avoids a to_constant () once GET_MODE_NUNITS is variable.

OK.


Reviewed-by: James GReenhalgh 

Thanks,
James

> 2017-10-27  Richard Sandiford  
>   Alan Hayward  
>   David Sherwood  
> 
> gcc/
>   * config/aarch64/aarch64-protos.h (aarch64_simd_vect_par_cnst_half):
>   Take the number of units too.
>   * config/aarch64/aarch64.c (aarch64_simd_vect_par_cnst_half): Likewise.
>   (aarch64_simd_check_vect_par_cnst_half): Update call accordingly,
>   but check for a vector mode before rather than after the call.
>   * config/aarch64/aarch64-simd.md (aarch64_split_simd_mov)
>   (move_hi_quad_, vec_unpack_hi_)
>   (vec_unpack_lo_mult_lo_)
>   (vec_widen_mult_hi_, vec_unpacks_lo_)
>   (vec_unpacks_hi_, aarch64_saddl2, aarch64_uaddl2)
>   (aarch64_ssubl2, aarch64_usubl2, widen_ssum3)
>   (widen_usum3, aarch64_saddw2, aarch64_uaddw2)
>   (aarch64_ssubw2, aarch64_usubw2, aarch64_sqdmlal2)
>   (aarch64_sqdmlsl2, aarch64_sqdmlal2_lane)
>   (aarch64_sqdmlal2_laneq, aarch64_sqdmlsl2_lane)
>   (aarch64_sqdmlsl2_laneq, aarch64_sqdmlal2_n)
>   (aarch64_sqdmlsl2_n, aarch64_sqdmull2)
>   (aarch64_sqdmull2_lane, aarch64_sqdmull2_laneq)
>   (aarch64_sqdmull2_n): Update accordingly.
>

Re: [09/nn] [AArch64] Pass number of units to aarch64_expand_vec_perm(_const)

On Fri, Oct 27, 2017 at 02:29:30PM +0100, Richard Sandiford wrote:
> This patch passes the number of units to aarch64_expand_vec_perm
> and aarch64_expand_vec_perm_const, which avoids a to_constant ()
> once GET_MODE_NUNITS is variable.

OK.

Reviewed-by: James Greenhalgh 

Thanks,
James

> 2017-10-27  Richard Sandiford  
>   Alan Hayward  
>   David Sherwood  
> 
> gcc/
>   * config/aarch64/aarch64-protos.h (aarch64_expand_vec_perm)
>   (aarch64_expand_vec_perm_const): Take the number of units too.
>   * config/aarch64/aarch64.c (aarch64_expand_vec_perm)
>   (aarch64_expand_vec_perm_const): Likewise.
>   * config/aarch64/aarch64-simd.md (vec_perm_const)
>   (vec_perm): Update accordingly.
>

Re: [12/nn] [AArch64] Add const_offset field to aarch64_address_info

On Fri, Oct 27, 2017 at 02:31:35PM +0100, Richard Sandiford wrote:
> This patch records the integer value of the address offset in
> aarch64_address_info, so that it doesn't need to be re-extracted
> from the rtx.  The SVE port will make more use of this.  The patch
> also uses poly_int64 routines to manipulate the offset, rather than
> just handling CONST_INTs.

OK.

Reviewed-by: James Greenhalgh 

Thanks,
James

> 2017-10-27  Richard Sandiford  
>   Alan Hayward  
>   David Sherwood  
> 
> gcc/
>   * config/aarch64/aarch64.c (aarch64_address_info): Add a const_offset
>   field.
>   (aarch64_classify_address): Initialize it.  Track polynomial offsets.
>   (aarch64_print_operand_address): Use it to check for a zero offset.

[PATCH, i386] Enable option -mprefer-avx256 as default for Intel Skylake configuration

2017-11-02 Thread Shalnov, Sergey

Hi,
This patch makes "prefer-avx256" option as default tuning for "skylake-avx512".
This is due to better performance of 256-bit code for some of the cases. In 
case of 
Skylake Server the Optimization Manual has following "Since port 0 and port 1 
are 256-bits wide, 
Intel AVX-512 operations that will be dispatched to port 0 will execute on both 
port 0 and port 1" 
(chapter 2.1 in 
https://software.intel.com/sites/default/files/managed/9e/bc/64-ia-32-architectures-optimization-manual.pdf).

It is platform and case specific and not applicable to every AVX512-available 
platform. 
Option purpose is to make this code generation more performance efficient. It 
provides 
performance gain on SPEC2017int on Skylake Server for -march=skylake-avx512. 
Patch leaves opportunity for some of AVX-512 instructions generated where they 
are the most efficient.

2017-10-31  Sergey Shalnov  
gcc/
* config/i386/x86-tune.def (X86_TUNE_AVX256_OPTIMAL): Add tuning
option prefer-avx256 for skylake-avx512 configuration.
* config/i386/i386.c (ix86_option_override_internal): Ditto.
(get_builtin_code_for_version): Ditto.

Sergey



0003-Make-prefer-avx256-default-for-skylake-avx512.patch
Description: 0003-Make-prefer-avx256-default-for-skylake-avx512.patch

RE: [PATCH] [ARC][ZOL] Account for empty body loops

> > gcc/
> > 2017-09-01  Claudiu Zissulescu 
> >
> > * config/arc/arc.c (hwloop_optimize): Account for empty
> > body loops.
> 
> Looks good to me.
> 
> Thanks,
> Andrew
> 
> 

Pushed. Thank you for your review,
Claudiu

Re: [doc] Remove Tru64 UNIX and IRIX references in install.texi

2017-11-02 Thread Rainer Orth

Hi Jeff,

> On 10/27/2017 05:48 AM, Rainer Orth wrote:
>> I happened to notice that install.texi still contains references to the
>> Tru64 UNIX and IRIX ports I've removed in GCC 4.8.  I believe it's time
>> now to get rid of those completely.
>> 
>> Tested with make doc/gccinstall.info and doc/gccinstall.pdf.  Ok for
>> mainline?  This falls under my prior maintainership, I guess, but
>> think it's best to get a second opinion.
> OK.  THere's probably a ton of ancient host/build issues that should
> just get removed.

probably, but it's hard for anyone but the respective maintainers to
know/check.  I've tried to keep things clean for my ports at least when
obsoleting some older OS version or the port as a whole.

Rainer

-- 
-
Rainer Orth, Center for Biotechnology, Bielefeld University

Re: [patch, Fortran] PR 55806 - Inefficient ANY with array constructors

2017-11-02 Thread Bernhard Reutner-Fischer

On Mon, Jan 14, 2013 at 10:51:25PM +0100, Thomas Koenig wrote:
> Am 14.01.2013 14:29, schrieb Mikael Morin:
> > Le 13/01/2013 23:14, Thomas Koenig a écrit :
> 
> > > OK for trunk?
> > > 
> > OK with the changes suggested above. Thanks.
> 
> Committed as rev. 195179 with your changes.

s/intrnisics/intrinsics/; # in a comment

And in r232774 this found it's way into a runtime error:
s/intrnisic/intrinsic/g
( gfortran.dg/matmul_bounds_9.f90 needs adjustment)

TIA,

Re: [doc] Remove Tru64 UNIX and IRIX references in install.texi

2017-11-02 Thread Rainer Orth

Hi Sandra,

> On 10/27/2017 05:48 AM, Rainer Orth wrote:
>> I happened to notice that install.texi still contains references to the
>> Tru64 UNIX and IRIX ports I've removed in GCC 4.8.  I believe it's time
>> now to get rid of those completely.
>>
>> Tested with make doc/gccinstall.info and doc/gccinstall.pdf.  Ok for
>> mainline?  This falls under my prior maintainership, I guess, but
>> think it's best to get a second opinion.
>
> Thanks for catching this.  In general I think we should document only GCC's
> current behavior and not mention removed functionality except in release
> notes.  I think this whole document needs review for that sort of thing,
> but every bit helps.

this example shows that you're probably right: my thinking a the time
probably was that it's easier to find the obsoletion info in the
(current) install.texi than checking through various older changes
documents, but obviously I missed the time to finally remove it before now.

> Can you fix this nit while you're in there?
>
>> @@ -3353,8 +3347,7 @@ The workaround is disabled by default if
>>  @anchor{alpha-x-x}
>>  @heading alpha*-*-*
>>  This section contains general configuration information for all
>> -alpha-based platforms using ELF (in particular, ignore this section for
>> -DEC OSF/1, Digital UNIX and Tru64 UNIX)@.  In addition to reading this
>> +alpha-based platforms using ELF@.  In addition to reading this
>>  section, please read all other sections that match your target.
>>
>>  We require binutils 2.11.2 or newer.
>
> s/alpha-based/Alpha-based/

Sure, done.

Thanks.
Rainer

-- 
-
Rainer Orth, Center for Biotechnology, Bielefeld University

[build] Cleanup Solaris linker version checks

2017-11-02 Thread Rainer Orth

There's some potential to cleanup Solaris linker version checks:

* gcc_AC_INITFINI_ARRAY in acinclude.m4 is unused, but still serves as
  documentation for the various variants of ld version strings.  This is
  now moved to configure.ac.

* Solaris 11.4 has (or shortly will have) some changes to the version
  numbering: until now this is something like

  ld: Software Generation Utilities - Solaris Link Editors: 5.11-1.2560

  but will become

  ld: Solaris ELF Utilities: 11.4-1.3123

  like the rest of Solaris binutils (ar, nm, ...) already have.

  The patch below accomodates both formats.

Bootstrapped without regressions on i386-pc-solaris2.11.4,
i386-pc-solaris2.10, to be installed on mainline shortly.

Rainer

-- 
-
Rainer Orth, Center for Biotechnology, Bielefeld University


2017-10-26  Rainer Orth  

* acinclude.m4 (gcc_AC_INITFINI_ARRAY): Don't require
gcc_SUN_LD_VERSION.
(gcc_GAS_CHECK_FEATURE): Remove.
* configure.ac (ld_vers) <*-*-solaris2*>: Move comments from
gcc_AC_INITFINI_ARRAY here.  Update for Solaris 11.4 changes.
* configure: Regenerate.

# HG changeset patch
# Parent  d45c8c8a41993c685d1de58840fc675b4cb7b5c0
Cleanup Solaris linker version checks

diff --git a/gcc/acinclude.m4 b/gcc/acinclude.m4
--- a/gcc/acinclude.m4
+++ b/gcc/acinclude.m4
@@ -277,8 +277,7 @@ fi
 fi])
 
 AC_DEFUN([gcc_AC_INITFINI_ARRAY],
-[AC_REQUIRE([gcc_SUN_LD_VERSION])dnl
-AC_ARG_ENABLE(initfini-array,
+[AC_ARG_ENABLE(initfini-array,
 	[  --enable-initfini-array	use .init_array/.fini_array sections],
 	[], [
 AC_CACHE_CHECK(for .preinit_array/.init_array/.fini_array support,
@@ -556,43 +555,6 @@ if test $[$2] != yes; then
   $8
 fi])])
 
-dnl gcc_SUN_LD_VERSION
-dnl
-dnl Determines Sun linker version numbers, setting gcc_cv_sun_ld_vers to
-dnl the complete version number and gcc_cv_sun_ld_vers_{major, minor} to
-dnl the corresponding fields.
-dnl
-dnl ld and ld.so.1 are guaranteed to be updated in lockstep, so ld version
-dnl numbers can be used in ld.so.1 feature checks even if a different
-dnl linker is configured.
-dnl
-AC_DEFUN([gcc_SUN_LD_VERSION],
-[changequote(,)dnl
-if test "x${build}" = "x${target}" && test "x${build}" = "x${host}"; then
-  case "${target}" in
-*-*-solaris2*)
-  #
-  # Solaris 2 ld -V output looks like this for a regular version:
-  #
-  # ld: Software Generation Utilities - Solaris Link Editors: 5.11-1.1699
-  #
-  # but test versions add stuff at the end:
-  #
-  # ld: Software Generation Utilities - Solaris Link Editors: 5.11-1.1701:onnv-ab196087-6931056-03/25/10
-  #
-  gcc_cv_sun_ld_ver=`/usr/ccs/bin/ld -V 2>&1`
-  if echo "$gcc_cv_sun_ld_ver" | grep 'Solaris Link Editors' > /dev/null; then
-	gcc_cv_sun_ld_vers=`echo $gcc_cv_sun_ld_ver | sed -n \
-	  -e 's,^.*: 5\.[0-9][0-9]*-\([0-9]\.[0-9][0-9]*\).*$,\1,p'`
-	gcc_cv_sun_ld_vers_major=`expr "$gcc_cv_sun_ld_vers" : '\([0-9]*\)'`
-	gcc_cv_sun_ld_vers_minor=`expr "$gcc_cv_sun_ld_vers" : '[0-9]*\.\([0-9]*\)'`
-  fi
-  ;;
-  esac
-fi
-changequote([,])dnl
-])
-
 dnl GCC_TARGET_TEMPLATE(KEY)
 dnl 
 dnl Define KEY as a valid configure key on the target machine.
diff --git a/gcc/configure.ac b/gcc/configure.ac
--- a/gcc/configure.ac
+++ b/gcc/configure.ac
@@ -2587,15 +2587,25 @@ if test $in_tree_ld != yes ; then
   else
 case "${target}" in
   *-*-solaris2*)
-	# See acinclude.m4 (gcc_SUN_LD_VERSION) for the version number
-	# format.
+	# Solaris 2 ld -V output looks like this for a regular version:
 	#
-	# Don't reuse gcc_gv_sun_ld_vers_* in case a linker other than
-	# /usr/ccs/bin/ld has been configured.
+	# ld: Software Generation Utilities - Solaris Link Editors: 5.11-1.1699
+	#
+	# but test versions add stuff at the end:
+	#
+	# ld: Software Generation Utilities - Solaris Link Editors: 5.11-1.1701:onnv-ab196087-6931056-03/25/10
+	#
+	# In Solaris 11.4, this was changed to
+	#
+	# ld: Solaris ELF Utilities: 11.4-1.3123
+	#
+	# ld and ld.so.1 are guaranteed to be updated in lockstep, so ld version
+	# numbers can be used in ld.so.1 feature checks even if a different
+	# linker is configured.
 	ld_ver=`$gcc_cv_ld -V 2>&1`
-	if echo "$ld_ver" | grep 'Solaris Link Editors' > /dev/null; then
+	if echo "$ld_ver" | $EGREP 'Solaris Link Editors|Solaris ELF Utilities' > /dev/null; then
 	  ld_vers=`echo $ld_ver | sed -n \
-	-e 's,^.*: 5\.[0-9][0-9]*-\([0-9]\.[0-9][0-9]*\).*$,\1,p'`
+	-e 's,^.*: \(5\|1[0-9]\)\.[0-9][0-9]*-\([0-9]\.[0-9][0-9]*\).*$,\2,p'`
 	  ld_vers_major=`expr "$ld_vers" : '\([0-9]*\)'`
 	  ld_vers_minor=`expr "$ld_vers" : '[0-9]*\.\([0-9]*\)'`
 	fi

Re: [PATCH PR79868 ][aarch64] Fix error calls in aarch64 code so they can be translated (version 2)

2017-11-02 Thread Richard Earnshaw (lists)

On 31/10/17 16:53, Steve Ellcey wrote:
> On Tue, 2017-10-31 at 09:57 +, Richard Earnshaw (lists) wrote:
>>  
>> This is looking better...
>>
>> I may have missed some discussion on this topic, but what's the
>> reasoning behind changing the quoting around the 'str' parameter
>> value in
>>
>> -error ("unknown value %qs for 'cpu' target %s", str,
>> pragma_or_attr);
>> +error ("invalid name (\"%s\") in % pragma
>> or
>> attribute", str);
>>
>> And also with the new generic message does the %
>> still make sense?  My feeling is that the original text here is perhaps
>> more appropriate.  Similarly for other messages.
>>
>> R.
> 
> 
> %qs uses single quotes vs. double quotes, changing that was suggested
> by Martin Sebor in this comment:
> 
> https://gcc.gnu.org/ml/gcc-patches/2017-09/msg01569.html
> 
> using '%' was also suggested by Martin in that
> same thread at:
> 
> https://gcc.gnu.org/ml/gcc-patches/2017-09/msg01277.html
> https://gcc.gnu.org/ml/gcc-patches/2017-09/msg01469.html
> 
> as being more consistent with other usage (mainly config/i386/i386.c).
> 
> Steve Ellcey
> sell...@cavium.com
> 

Thanks.

On that basis, this patch is OK.

R.

Re: [PATCH][AArch64] Define MALLOC_ABI_ALIGNMENT

2017-11-02 Thread Richard Earnshaw

On 01/11/17 17:40, James Greenhalgh wrote:
> On Tue, Oct 31, 2017 at 05:07:54PM +, Wilco Dijkstra wrote:
>> The AArch64 backend currently doesn't set MALLOC_ABI_ALIGNMENT, so
>> add this to enable alignment optimizations on malloc pointers.
>>
>> OK for commit?
> 
> As far as I understand it, because we have 128-bit types, a malloc of
> anything greater than 16 bytes would require 16-byte alignment. So, assuming
> this macro isn't required to desribe possibly unaligned smaller allocations
> (for example 1 byte allocations), this is OK.
> 
> Reviewed-By: James Greenhalgh 
> 
> Thanks,
> James
> 
>>
>> 2017-10-31  Wilco Dijkstra  
>>
>>  * config/aarch64/aarch64.h (MALLOC_ABI_ALIGNMENT): New define.
>> --
>> diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h
>> index 
>> 8e7093f0476fa7fed3ba6d6cb008743106d1ff58..159dde7c7134d4d0e5378951d1d8a1d6dab48ba2
>>  100644
>> --- a/gcc/config/aarch64/aarch64.h
>> +++ b/gcc/config/aarch64/aarch64.h
>> @@ -111,6 +111,9 @@
>>  
>>  #define STRUCTURE_SIZE_BOUNDARY 8
>>  
>> +/* Heap alignment.  */
>> +#define MALLOC_ABI_ALIGNMENT  BIGGEST_ALIGNMENT
>> +
>>  /* Defined by the ABI */
>>  #define WCHAR_TYPE "unsigned int"
>>  #define WCHAR_TYPE_SIZE 32

I wonder if it would be safer to define this explicitly as the current
value of BIGGEST_ALIGNMENT; then if we ever have to change the latter we
won't get silent breakage.

R.

Re: [PATCH] enhance -Warray-bounds to detect out-of-bounds offsets (PR 82455)

On Mon, 30 Oct 2017, Jeff Law wrote:

> On 10/30/2017 05:29 PM, Martin Sebor wrote:
> > On 10/30/2017 03:48 PM, Jeff Law wrote:
> >> On 10/30/2017 09:19 AM, Martin Sebor wrote:
> >>> On 10/30/2017 05:45 AM, Richard Biener wrote:
>  On Sun, 29 Oct 2017, Martin Sebor wrote:
> 
> > In my work on -Wrestrict, to issue meaningful warnings, I found
> > it important to detect both out of bounds array indices as well
> > as offsets in calls to restrict-qualified functions like strcpy.
> > GCC already detects some of these cases but my tests for
> > the enhanced warning exposed a few gaps.
> >
> > The attached patch enhances -Warray-bounds to detect more instances
> > out-of-bounds indices and offsets to member arrays and non-array
> > members.  For example, it detects the out-of-bounds offset in the
> > call to strcpy below.
> >
> > The patch is meant to be applied on top posted here but not yet
> > committed:
> >     https://gcc.gnu.org/ml/gcc-patches/2017-10/msg01304.html
> >
> > Richard, since this also touches tree-vrp.c I look for your comments.
> 
>  You fail to tell what you are changing and why - I have to reverse
>  engineer this from the patch which a) isn't easy in this case, b) feels
>  like a waste of time.  Esp. since the patch does many things.
> 
>  My first question is why do you add a warning from forwprop?  It
>  _feels_ like you're trying to warn about arbitrary out-of-bound
>  addresses at the point they are folded to MEM_REFs.  And it looks
>  like you're warning about pointer arithmetic like &p->a + 6.
>  That doesn't look correct to me.  Pointer arithmetic in GIMPLE
>  is not restricted to operate within fields that are appearantly
>  accessed here - the only restriction is with respect to the
>  whole underlying pointed-to-object.
> 
>  By doing the warning from forwprop you'll run into all such cases
>  introduced by GCC itself during quite late optimization passes.
> >>>
> >>> I haven't run into any such cases.  What would a more appropriate
> >>> place to detect out-of-bounds offsets?  I'm having a hard time
> >>> distinguishing what is appropriate and what isn't.  For instance,
> >>> if it's okay to detect some out of bounds offsets/indices in vrp
> >>> why is it wrong to do a better job of it in forwpropI think part of
> >>> the problem is there isn't a well defined place to do
> >> this kind of warning.  I suspect it's currently in VRP simply because
> >> that is where we had range information in the past.  It's still the
> >> location with the most accurate range information.
> >>
> >> In a world where we have an embedded context sensitive range analysis
> >> engine, we should *really* look at pulling the out of bounds array
> >> warnings out of any optimization pass an have a distinct pass to deal
> >> with them.
> >>
> >> I guess in the immediate term the question I would ask Martin is what is
> >> it about forwprop that makes it interesting?  Is it because of the
> >> lowering issues we touched on last week?  If so I wonder if we could
> >> recreate an array form from a MEM_REF for the purposes of optimization.
> >> Or if we could just handle MEM_REFs better within the existing warning.
> > 
> > I put it in forwprop only because that was the last stage where
> > there's still enough context before the POINTER_PLUS_EXPR is
> > folded into MEM_REF to tell an offset from the beginning of
> > a subobject from the one from the beginning of the bigger object
> > of which the subobject is a member.  I certainly don't mind moving
> > it somewhere else more appropriate if this isn't ideal, just as
> > long it doesn't cripple the detection (e.g., as long as we still
> > have range information).
> Understood.

Well, it's a long-standing issue with how we do these kind of
warnings, likewise for _b_o_s which also "can't stand" component-refs
to be folded into the MEM_REF offset.

I've said in the past that _b_o_s relying on component-refs to stay
and for them to be constrained the same way they are in C is bogus.
We've added an early _b_o_s pass to mitigate that "issue" somewhat.

Now you're trying to "solve" the same issue as _b_o_s -- in the
end it looks like the warning could well reside in that pass
rather than in forwprop.

Richard.

> 
> [ ... ]
> 
> > 
> > I of course don't want to break anything.  I didn't see any fallout
> > in my testing and I normally test all the front ends, including Ada,
> > but let me check to make sure I tested it this time (I had made some
> > temporary changes to my build script and may have disabled it.)  Let
> > me double check it after I get back from my trip.
> No worries.  Hopefully by the time you're back I'll have something
> publishable on the ripping apart tree-vrp front and we can prototype the
> effectiveness of doing this kind of stuff outside tree-vrp.c
> 
> We should also revisit Aldy's work from last year which

Re: [PATCH] enhance -Warray-bounds to detect out-of-bounds offsets (PR 82455)

On Mon, 30 Oct 2017, Martin Sebor wrote:

> On 10/30/2017 02:56 PM, Richard Biener wrote:
> > On October 30, 2017 9:13:04 PM GMT+01:00, Martin Sebor 
> > wrote:
> > > On 10/30/2017 01:53 PM, Richard Biener wrote:
> > > > On October 30, 2017 4:19:25 PM GMT+01:00, Martin Sebor
> > >  wrote:
> > > > > On 10/30/2017 05:45 AM, Richard Biener wrote:
> > > > > > On Sun, 29 Oct 2017, Martin Sebor wrote:
> > > > > > 
> > > > > > > In my work on -Wrestrict, to issue meaningful warnings, I found
> > > > > > > it important to detect both out of bounds array indices as well
> > > > > > > as offsets in calls to restrict-qualified functions like strcpy.
> > > > > > > GCC already detects some of these cases but my tests for
> > > > > > > the enhanced warning exposed a few gaps.
> > > > > > > 
> > > > > > > The attached patch enhances -Warray-bounds to detect more
> > > instances
> > > > > > > out-of-bounds indices and offsets to member arrays and non-array
> > > > > > > members.  For example, it detects the out-of-bounds offset in the
> > > > > > > call to strcpy below.
> > > > > > > 
> > > > > > > The patch is meant to be applied on top posted here but not yet
> > > > > > > committed:
> > > > > > >  https://gcc.gnu.org/ml/gcc-patches/2017-10/msg01304.html
> > > > > > > 
> > > > > > > Richard, since this also touches tree-vrp.c I look for your
> > > > > comments.
> > > > > > 
> > > > > > You fail to tell what you are changing and why - I have to reverse
> > > > > > engineer this from the patch which a) isn't easy in this case, b)
> > > > > feels
> > > > > > like a waste of time.  Esp. since the patch does many things.
> > > > > > 
> > > > > > My first question is why do you add a warning from forwprop?  It
> > > > > > _feels_ like you're trying to warn about arbitrary out-of-bound
> > > > > > addresses at the point they are folded to MEM_REFs.  And it looks
> > > > > > like you're warning about pointer arithmetic like &p->a + 6.
> > > > > > That doesn't look correct to me.  Pointer arithmetic in GIMPLE
> > > > > > is not restricted to operate within fields that are appearantly
> > > > > > accessed here - the only restriction is with respect to the
> > > > > > whole underlying pointed-to-object.
> > > > > > 
> > > > > > By doing the warning from forwprop you'll run into all such cases
> > > > > > introduced by GCC itself during quite late optimization passes.
> > > > > 
> > > > > I haven't run into any such cases.  What would a more appropriate
> > > > > place to detect out-of-bounds offsets?  I'm having a hard time
> > > > > distinguishing what is appropriate and what isn't.  For instance,
> > > > > if it's okay to detect some out of bounds offsets/indices in vrp
> > > > > why is it wrong to do a better job of it in forwprop?
> > > > > 
> > > > > > 
> > > > > > You're trying to re-do __builtin_object_size even when that wasn't
> > > > > > used.
> > > > > 
> > > > > That's not the quite my intent, although it is close.
> > > > > 
> > > > > > 
> > > > > > So it looks like you're on the wrong track.  Yes,
> > > > > > 
> > > > > > strcpy (p->a + 6, "y");
> > > > > > 
> > > > > > _may_ be "invalid" C (I'm not even sure about that!) but it
> > > > > > is certainly not invalid GIMPLE.
> > > > > 
> > > > > Adding (or subtracting) an integer to/from a pointer to an array
> > > > > is defined in both C and C++ only if the result points to an element
> > > > > of the array or just past the last element of the array.  Otherwise
> > > > > it's undefined. (A non-array object is considered an array of one
> > > > > for this purpose.)
> > > > 
> > > > On GIMPLE this is indistinguishable from (short *) (p->a) + 3.
> > > 
> > > Sure, they're both the same:
> > > 
> > >pa_3 = &p_2(D)->a;
> > >_1 = pa_3 + 6;
> > > 
> > > and that's fine because the implementation of the warning sees and
> > > uses the byte offset from the beginning of a, so I don't understand
> > > the problem you are pointing out.  Can you clarify what you mean?
> > 
> > It does not access the array but the underlying object. On GIMPLE it is just
> > an address calculation without constraints.
> 
> But the computation starts with the subobject and so is only
> valid within the bounds of the subobject.  Or are you saying
> that GCC emits such GIMPLE expressions for valid code?  If so,
> can you give an example of such code?

There were elaborate transforms of ptr + CST to ptr->a.b.c[3] in the
past.  We have ripped out _most_ of them because of bad interaction
with dependence analysis and _b_o_s warnings.

But for example PRE might still end up propagating

 _1 = &ptr->a.b.c;
 _2 = (*_1)[i_3];

as

 _2 = ptr->a.b.c[i_3];

But it's not so much GCC building up GIMPLE expressions that would
cause false positives but "valid" C code and "invalid" C code
represented exactly the same in GCC.  Let's say

struct S {
short s[4];
int i;
};

char foo (struct S *p)
{
  *((char *)p->s + 8);
}

for example which I think is perfectly valid but ends up as

foo (struct

Re: [PATCH] PR fortran/82796 -- common entity in equivalence in pure routine

2017-11-02 Thread Paul Richard Thomas

Hi Steve,

I read the correspondence on clf and your earlier posting here. With
those in mind, the patch looks to be OK to commit.

Thanks

Paul

On 2 November 2017 at 01:09, Steve Kargl
 wrote:
> The attached patch fixes a regression where gfortran was
> issuing an error for an entity in a common block within
> a module when it appears in equivalence, and the entity
> *is not* use associated in a pure subprogram.  OK to
> commit?
>
>
> 2017-11-01  Steven G. Kargl  
>
> PR fortran/82796
> * resolve.c (resolve_equivalence): An entity in a common block within
> a module cannot appear in an equivalence statement if the entity is
> with a pure procedure.
>
> 2017-11-01  Steven G. Kargl  
>
> PR fortran/82796
> * gfortran.dg/equiv_pure.f90: New test.
>
> --
> Steve



-- 
"If you can't explain it simply, you don't understand it well enough"
- Albert Einstein

Re: [PATCH][AArch64] Define MALLOC_ABI_ALIGNMENT

2017-11-02 Thread Wilco Dijkstra

Richard Earnshaw wrote:
> On 01/11/17 17:40, James Greenhalgh wrote:

>> As far as I understand it, because we have 128-bit types, a malloc of
>> anything greater than 16 bytes would require 16-byte alignment. So, assuming
>> this macro isn't required to desribe possibly unaligned smaller allocations
>> (for example 1 byte allocations), this is OK.

I'm sure one can create structures with 16-byte alignment that are smaller than
16 bytes. For example union of say a char and __int128 empty_array[0] should do 
it.

>> +#define MALLOC_ABI_ALIGNMENT  BIGGEST_ALIGNMENT

> I wonder if it would be safer to define this explicitly as the current
> value of BIGGEST_ALIGNMENT; then if we ever have to change the latter we
> won't get silent breakage.

I'll do that for the commit. I used BIGGEST_ALIGNMENT since that is what the
Arm port does...

Wilco

RE: [patch][x86] GFNI enabling [2/4]

2017-11-02 Thread Koval, Julia

The documentation is right, I was wrong not adding SSE/AVX flags in these 
builtin declaratuin.

> The exceptions are
> MMX, AVX512VL and 64BIT is also special.
> So, shall GFNI be added to that set?  
Turns out only GFNI and VAES(haven't sent those yet, they are from the same 
Icelake pdf) are like this, others rely on AVX512VL/BW. But what do you think 
about adding AVX/SSE flags to this special set instead? Looks like they more 
probably will be used as a flags, on which new instructions may depend in the 
future, than GFNI/VAES flags.

-Julia

> -Original Message-
> From: gcc-patches-ow...@gcc.gnu.org [mailto:gcc-patches-
> ow...@gcc.gnu.org] On Behalf Of Jakub Jelinek
> Sent: Tuesday, October 31, 2017 8:28 PM
> To: Koval, Julia 
> Cc: Kirill Yukhin ; GCC Patches  patc...@gcc.gnu.org>
> Subject: Re: [patch][x86] GFNI enabling [2/4]
> 
> On Mon, Oct 30, 2017 at 07:02:23PM +, Koval, Julia wrote:
> > gcc/testsuite/
> > * gcc.target/i386/avx-1.c: Handle new intrinsics.
> > * gcc.target/i386/avx512-check.h: Check GFNI bit.
> > * gcc.target/i386/avx512f-gf2p8affineinvqb-2.c: Runtime test.
> > * gcc.target/i386/avx512vl-gf2p8affineinvqb-2.c: Runtime test.
> > * gcc.target/i386/gfni-1.c: New.
> > * gcc.target/i386/gfni-2.c: New.
> > * gcc.target/i386/gfni-3.c: New.
> > * gcc.target/i386/gfni-4.c: New.
> 
> The gfni-4.c testcase ICEs on i686-linux (e.g. try
> make check-gcc RUNTESTFLAGS='--target_board=unix\{-m32/-msse,-m32/-
> mno-sse,-m64\} i386.exp=gfni*'
> to see it).
> 
> I must say I'm confused by the CPUIDs, the
> https://software.intel.com/sites/default/files/managed/c5/15/architecture-
> instruction-set-extensions-programming-reference.pdf
> lists GFNI; 2x AVX+GFNI; 2x AVX512VL+GFNI; AVX512F+GFNI CPUIDs for the
> instructions, but i386-builtins.def has:
> BDESC (OPTION_MASK_ISA_GFNI, CODE_FOR_vgf2p8affineinvqb_v64qi,
> "__builtin_ia32_vgf2p8affineinvqb_v64qi",
> IX86_BUILTIN_VGF2P8AFFINEINVQB512, UNKNOWN
> BDESC (OPTION_MASK_ISA_GFNI | OPTION_MASK_ISA_AVX512BW,
> CODE_FOR_vgf2p8affineinvqb_v64qi_mask,
> "__builtin_ia32_vgf2p8affineinvqb_v64qi_mask", IX86_
> BDESC (OPTION_MASK_ISA_GFNI, CODE_FOR_vgf2p8affineinvqb_v32qi,
> "__builtin_ia32_vgf2p8affineinvqb_v32qi",
> IX86_BUILTIN_VGF2P8AFFINEINVQB256, UNKNOWN
> BDESC (OPTION_MASK_ISA_GFNI | OPTION_MASK_ISA_AVX512BW,
> CODE_FOR_vgf2p8affineinvqb_v32qi_mask,
> "__builtin_ia32_vgf2p8affineinvqb_v32qi_mask", IX86_
> BDESC (OPTION_MASK_ISA_GFNI, CODE_FOR_vgf2p8affineinvqb_v16qi,
> "__builtin_ia32_vgf2p8affineinvqb_v16qi",
> IX86_BUILTIN_VGF2P8AFFINEINVQB128, UNKNOWN
> BDESC (OPTION_MASK_ISA_GFNI | OPTION_MASK_ISA_AVX512BW,
> CODE_FOR_vgf2p8affineinvqb_v16qi_mask,
> "__builtin_ia32_vgf2p8affineinvqb_v16qi_mask", IX86_
> and the gfniintrin.h requires just gfni for the first insn,
> and then some combinations of gfni,avx, or gfni,avx512vl, or
> gfni,avx512vl,avx512bw, or gfni,avx512f,avx512bw.
> 
> So, what is right, the paper, i386-builtins.def or gfniintrin.h?
> 
> Obviously even if the GF2P8AFFINEINVQB instruction doesn't list SSE as
> required CPUID, we can't really emit it without at least SSE because
> then the operands can't be emitted.  So, at least in GCC we should
> require both GFNI and SSE for the first instruction.
> 
> Which leads to another issue, as ix86_expand_builtin documents,
> we treat the BDESC ISAs OPTION_MASK_ISA_ISA1 | OPTION_MASK_ISA_ISA2
> as either ISA1 or ISA2, not ISA1 and ISA2.  The exceptions are
> MMX, AVX512VL and 64BIT is also special.
> So, shall GFNI be added to that set?  Do we have other ISAs that
> should be handled the same?  I guess maybe OPTION_MASK_ISA_AES, but
> that is handled weirdly.
> 
>   Jakub

[PATCH] [ARC] Reimplement exception handling support.

This is patch which solves the ARC issues with exception handling support.

Ok to apply?
Claudiu

2016-06-09  Claudiu Zissulescu  
Andrew Burgess  

* config/arc/arc-protos.h (arc_compute_frame_size): Delete
declaration.
(arc_return_slot_offset): Likewise.
(arc_eh_return_address_location): New declaration.
* config/arc/arc.c (TARGET_BUILTIN_SETJMP_FRAME_VALUE): Define.
(MUST_SAVE_REGISTER): Add exception handler case.
(MUST_SAVE_RETURN_ADDR): Likewise.
(arc_frame_pointer_required): Likewise.
(arc_frame_pointer_needed): New function.
(arc_compute_frame_size): Changed.
(arc_expand_prologue): Likewise.
(arc_expand_epilogue): Likewise.
(arc_initial_elimination_offset): Likewise.
(arc_return_slot_offset): Delete.
(arc_eh_return_address_location): New function.
(arc_builtin_setjmp_frame_value): Likewise.
* config/arc/arc.h (EH_RETURN_DATA_REGNO): Use 2 registers.
(EH_RETURN_STACKADJ_RTX): Define.
(EH_RETURN_HANDLER_RTX): Likewise.
* config/arc/arc.md (eh_return): Delete.
---
 gcc/config/arc/arc-protos.h |   2 +-
 gcc/config/arc/arc.c| 202 +++-
 gcc/config/arc/arc.h|   7 +-
 gcc/config/arc/arc.md   |  33 
 4 files changed, 166 insertions(+), 78 deletions(-)

diff --git a/gcc/config/arc/arc-protos.h b/gcc/config/arc/arc-protos.h
index 1c7031c..6e7239f 100644
--- a/gcc/config/arc/arc-protos.h
+++ b/gcc/config/arc/arc-protos.h
@@ -111,8 +111,8 @@ extern bool arc_epilogue_uses (int regno);
 extern bool arc_eh_uses (int regno);
 /* insn-attrtab.c doesn't include reload.h, which declares regno_clobbered_p. 
*/
 extern int regno_clobbered_p (unsigned int, rtx_insn *, machine_mode, int);
-extern int arc_return_slot_offset (void);
 extern bool arc_legitimize_reload_address (rtx *, machine_mode, int, int);
 extern void arc_secondary_reload_conv (rtx, rtx, rtx, bool);
 extern void arc_cpu_cpp_builtins (cpp_reader *);
 extern bool arc_store_addr_hazard_p (rtx_insn *, rtx_insn *);
+extern rtx arc_eh_return_address_location (void);
diff --git a/gcc/config/arc/arc.c b/gcc/config/arc/arc.c
index a0b66758..75d35cd 100644
--- a/gcc/config/arc/arc.c
+++ b/gcc/config/arc/arc.c
@@ -597,6 +597,8 @@ static void arc_finalize_pic (void);
 
 #undef TARGET_MODES_TIEABLE_P
 #define TARGET_MODES_TIEABLE_P arc_modes_tieable_p
+#undef TARGET_BUILTIN_SETJMP_FRAME_VALUE
+#define TARGET_BUILTIN_SETJMP_FRAME_VALUE arc_builtin_setjmp_frame_value
 
 /* Try to keep the (mov:DF _, reg) as early as possible so
that the dh-lr insns appear together and can
@@ -2556,8 +2558,7 @@ arc_compute_function_type (struct function *fun)
Addition for pic: The gp register needs to be saved if the current
function changes it to access gotoff variables.
FIXME: This will not be needed if we used some arbitrary register
-   instead of r26.
-*/
+   instead of r26.  */
 
 static bool
 arc_must_save_register (int regno, struct function *func)
@@ -2620,14 +2621,51 @@ arc_must_save_return_addr (struct function *func)
 /* Helper function to wrap FRAME_POINTER_NEEDED.  We do this as
FRAME_POINTER_NEEDED will not be true until the IRA (Integrated
Register Allocator) pass, while we want to get the frame size
-   correct earlier than the IRA pass.  */
+   correct earlier than the IRA pass.
+
+   When a function uses eh_return we must ensure that the fp register
+   is saved and then restored so that the unwinder can restore the
+   correct value for the frame we are going to jump to.
+
+   To do this we force all frames that call eh_return to require a
+   frame pointer (see changes to arc_frame_pointer_required), this
+   will ensure that the previous frame pointer is stored on entry to
+   the function, and will then be reloaded at function exit.
+
+   As the frame pointer is handled as a special case in our prologue
+   and epilogue code it must not be saved and restored using the
+   MUST_SAVE_REGISTER mechanism otherwise we run into issues where GCC
+   believes that the function is not using a frame pointer and that
+   the value in the fp register is the frame pointer, while the
+   prologue and epilogue are busy saving and restoring the fp
+   register.  This issue is fixed in this commit too.
+
+   During compilation of a function the frame size is evaluated
+   multiple times, it is not until the reload pass is complete the the
+   frame size is considered fixed (it is at this point that space for
+   all spills has been allocated).  However the frame_pointer_needed
+   variable is not set true until the register allocation pass, as a
+   result in the early stages the frame size does not include space
+   for the frame pointer to be spilled.
+
+   The problem that this causes, that I have not yet tracked down, is
+   that the rtl generated for EH_RETURN_HANDLER_RTX uses the details
+   of the frame size to comp

Re: Adjust empty class parameter passing ABI (PR c++/60336)

On Wed, 1 Nov 2017, Marek Polacek wrote:

> On Fri, Oct 27, 2017 at 12:46:12PM +0200, Richard Biener wrote:
> > On Fri, 27 Oct 2017, Jakub Jelinek wrote:
> > 
> > > On Fri, Oct 27, 2017 at 12:31:46PM +0200, Richard Biener wrote:
> > > > I fear it doesn't work at all with LTO (you'll always get the old ABI
> > > > if I read the patch correctly).  This is because the function
> > > > computing the size looks at flag_abi_version which isn't saved
> > > > per function / TU.
> > > > 
> > > > Similarly you'll never get the ABI warning with LTO (less of a big
> > > > deal of course) because the langhook doesn't reflect things correctly
> > > > either.
> > > > 
> > > > So...  can we instead compute whether a type is "empty" according
> > > > to the ABI early and store the result in the type (thinking of
> > > > doing this in layout_type?).  Similarly set a flag whether to
> > > > warn.  Why do you warn from backends / code emission and not
> > > > from the FEs?  Is that to avoid warnings for calls that got inlined?
> > > > Maybe the FE could set a flag on the call itself (ok, somewhat
> > > > awkward to funnel through gimple).
> > > 
> > > Warning in the FE is too early both because of the inlining, never
> > > emitted functions and because whether an empty struct is passed 
> > > differently
> > > from the past matters on the backend (whether its psABI says it should be
> > > not passed at all or not).
> > > 
> > > Perhaps if empty types are rare enough it could be an artificial attribute
> > > on the type if we can't get a spare bit for that.  But computing in the FE
> > > or before free_lang_data and saving on the type whether it is empty or not
> > > seems reasonable to me.
> > 
> > There are 18 unused bits in tree_type_common if we don't want to re-use
> > any.  For the warning I first thought of setting TREE_NO_WARNING on it
> > but that bit is used already.  OTOH given the "fit" of TREE_NO_WARNING
> > I'd move TYPE_ARTIFICIAL somewhere else.
> 
> All right, should be done in the below.  I've introduced two new flags,
> TYPE_EMPTY_P (says whether the type is empty according to the psABI), and
> TYPE_WARN_EMPTY_P (whether we should warn).  I've added two new fields to
> type_type_common and moved TYPE_ARTIFICIAL there; TYPE_WARN_EMPTY_P is now
> mapped to nowarning_flag.  So this should work with LTO, as demonstrated
> by g++.dg/lto/pr60336_0.C.  
> 
> Regarding LTO and -Wabi warning, I've added Optimization to c.opt so that
> we get warnings with LTO.  But as pointed out IRC, this doesn't fully work
> with cross-inlining.  I tried to do some flags merging in inline_call, but
> that didn't help, one of the problems is that warn_abi_version lives in
> c-family only.  Not sure if I'll be able to improve things here though.
> 
> Bootstrapped/regtested on x86_64-linux, ppc64-linux, and aarch64-linux.
> Bootstrap-lto passed on x86_64-linux and ppc64-linux.

To me the tree.c stuff is_empty_type looks awfully ABI dependent
and should thus reside in i386.c near the target hook implementation?

What goes wrong if we do not introduce new int_maybe_empty_type_size
and maybe_empty_type_size but instead change int_size_in_bytes and
size_in_bytes to return 0 if TYPE_EMPTY_P ()?  If the ABI can omit
passing things assuming the size is zero should work as well, no?
Otherwise I'd really prefer seeing explicit TYPE_EMPTY_P checks
which would reduce the number of "indirect" greps one has to do when
looking for effects of TYPE_EMPTY_P.

Otherwise the middle-end/LTO parts look ok.

I'd omit the 'Optimization' change on the Wabi warning flag if it
doesn't fully give us what we want and address this as a followup.

I think 'Optimization' is also used for -help reporting and thus
could be confusing at first.

Still needs FE and target maintainer approval -- the target maintainer
wants to look at the seemingly ABI independent functions in tree.c.

Thanks,
Richard.

> 2017-11-01  Marek Polacek  
>   H.J. Lu  
>   Jason Merrill  
> 
>   PR c++/60336
>   PR middle-end/67239
>   PR target/68355
>   * c.opt (Wabi, Wabi=): Add Optimization.
> 
>   * class.c (layout_class_type): Set TYPE_EMPTY_P and TYPE_WARN_EMPTY_P.
>   * cp-tree.h (array_type_nelts_top): Remove.
>   * tree.c (array_type_nelts_top): Move to tree.c.
> 
>   * lto.c (compare_tree_sccs_1): Compare TYPE_WARN_EMPTY_P and
>   TYPE_EMPTY_P.
> 
>   * calls.c (initialize_argument_information): Call
>   warn_parameter_passing_abi target hook.
>   (store_one_arg): Use 0 for empty record size.  Don't push 0 size
>   argument onto stack.
>   (must_pass_in_stack_var_size_or_pad): Return false for empty types.
>   * common.opt: Update -fabi-version description.
>   * config/i386/i386.c (init_cumulative_args): Set cum->warn_empty.
>   (ix86_function_arg_advance): Skip empty records.
>   (ix86_return_in_memory): Return false for empty types.
>   (ix86_gimplify_va_arg): Call int_maybe_empty_type_size

[PATCH 0/6] [ARC] New baremetal features and fixes

From: claziss 

Hi,

This set of patches are adding support for:
 - JLI/SJLI instructions;
 - reduced register file 16 specific for particular ARC configurations;
 - 'uncached' attribute, used to toggle .di flag on variable basis;
 - 'aux' attribute, used to easily access auxiliary registers.

Also I've rework delegitimate address hook in a easier way.

Ok to apply?
Claudiu


Claudiu Zissulescu (3):
  [ARC] Add JLI support.
  [ARC] Add SJLI support.
  [ARC] Add support for "register file 16" reduced register set
  [ARC] Rework delegitimate_address hook
  [ARC] Add 'uncached' attribute.
  [ARC] Add 'aux' variable attribute.

 gcc/config/arc/arc-arches.def |   8 +-
 gcc/config/arc/arc-c.def  |   1 +
 gcc/config/arc/arc-cpus.def   |   1 +
 gcc/config/arc/arc-options.def|   2 +-
 gcc/config/arc/arc-protos.h   |   4 +
 gcc/config/arc/arc-tables.opt |   3 +
 gcc/config/arc/arc.c  | 645 --
 gcc/config/arc/arc.h  |   2 +-
 gcc/config/arc/arc.md | 138 ++---
 gcc/config/arc/arc.opt|   8 +
 gcc/config/arc/constraints.md |  13 +
 gcc/config/arc/elf.h  |   7 +
 gcc/config/arc/fpx.md |  18 +-
 gcc/config/arc/genmultilib.awk|   2 +
 gcc/config/arc/linux.h|  10 +
 gcc/config/arc/predicates.md  |   7 +-
 gcc/config/arc/simdext.md |   2 +-
 gcc/config/arc/t-multilib |   4 +-
 gcc/doc/extend.texi   |  29 +
 gcc/doc/invoke.texi   |  15 +-
 gcc/testsuite/gcc.dg/builtin-apply2.c |   8 +-
 gcc/testsuite/gcc.target/arc/jli-1.c  |  19 +
 gcc/testsuite/gcc.target/arc/jli-2.c  |  19 +
 gcc/testsuite/gcc.target/arc/taux-1.c |  38 ++
 gcc/testsuite/gcc.target/arc/taux-2.c |  15 +
 gcc/testsuite/gcc.target/arc/tdelegitimize_addr.c |  23 +
 gcc/testsuite/gcc.target/arc/uncached.c   |  18 +
 libgcc/config/arc/lib1funcs.S |  22 +-
 28 files changed, 920 insertions(+), 161 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/arc/jli-1.c
 create mode 100644 gcc/testsuite/gcc.target/arc/jli-2.c
 create mode 100644 gcc/testsuite/gcc.target/arc/taux-1.c
 create mode 100644 gcc/testsuite/gcc.target/arc/taux-2.c
 create mode 100755 gcc/testsuite/gcc.target/arc/tdelegitimize_addr.c
 create mode 100644 gcc/testsuite/gcc.target/arc/uncached.c

-- 
1.9.1

[PATCH 4/6] [ARC] Rework delegitimate_address hook

From: claziss 

Delegitimize address is used to undo the obfuscating effect of PIC
addresses, returning the address in a way which is understood by the
compiler.

gcc/
2017-04-25  Claudiu Zissulescu  

* config/arc/arc.c (arc_delegitimize_address_0): Refactored to
recognize new pic like addresses.
(arc_delegitimize_address): Clean up.

testsuite/
2017-08-31  Claudiu Zissulescu  

* testsuite/gcc.target/arc/tdelegitimize_addr.c: New test.
---
 gcc/config/arc/arc.c  | 91 ++-
 gcc/testsuite/gcc.target/arc/tdelegitimize_addr.c | 23 ++
 2 files changed, 62 insertions(+), 52 deletions(-)
 create mode 100755 gcc/testsuite/gcc.target/arc/tdelegitimize_addr.c

diff --git a/gcc/config/arc/arc.c b/gcc/config/arc/arc.c
index e7194a2..07dd072 100644
--- a/gcc/config/arc/arc.c
+++ b/gcc/config/arc/arc.c
@@ -9506,68 +9506,55 @@ arc_legitimize_address (rtx orig_x, rtx oldx, 
machine_mode mode)
 }
 
 static rtx
-arc_delegitimize_address_0 (rtx x)
+arc_delegitimize_address_0 (rtx op)
 {
-  rtx u, gp, p;
-
-  if (GET_CODE (x) == CONST && GET_CODE (u = XEXP (x, 0)) == UNSPEC)
+  switch (GET_CODE (op))
 {
-  if (XINT (u, 1) == ARC_UNSPEC_GOT
- || XINT (u, 1) == ARC_UNSPEC_GOTOFFPC)
-   return XVECEXP (u, 0, 0);
+case CONST:
+  return arc_delegitimize_address_0 (XEXP (op, 0));
+
+case UNSPEC:
+  switch (XINT (op, 1))
+   {
+   case ARC_UNSPEC_GOT:
+   case ARC_UNSPEC_GOTOFFPC:
+ return XVECEXP (op, 0, 0);
+   default:
+ break;
+   }
+  break;
+
+case PLUS:
+  {
+   rtx t1 = arc_delegitimize_address_0 (XEXP (op, 0));
+   rtx t2 = XEXP (op, 1);
+
+   if (t1 && t2)
+ return gen_rtx_PLUS (GET_MODE (op), t1, t2);
+   break;
+  }
+
+default:
+  break;
 }
-  else if (GET_CODE (x) == CONST && GET_CODE (p = XEXP (x, 0)) == PLUS
-  && GET_CODE (u = XEXP (p, 0)) == UNSPEC
-  && (XINT (u, 1) == ARC_UNSPEC_GOT
-  || XINT (u, 1) == ARC_UNSPEC_GOTOFFPC))
-return gen_rtx_CONST
-   (GET_MODE (x),
-gen_rtx_PLUS (GET_MODE (p), XVECEXP (u, 0, 0), XEXP (p, 1)));
-  else if (GET_CODE (x) == PLUS
-  && ((REG_P (gp = XEXP (x, 0))
-   && REGNO (gp) == PIC_OFFSET_TABLE_REGNUM)
-  || (GET_CODE (gp) == CONST
-  && GET_CODE (u = XEXP (gp, 0)) == UNSPEC
-  && XINT (u, 1) == ARC_UNSPEC_GOT
-  && GET_CODE (XVECEXP (u, 0, 0)) == SYMBOL_REF
-  && !strcmp (XSTR (XVECEXP (u, 0, 0), 0), "_DYNAMIC")))
-  && GET_CODE (XEXP (x, 1)) == CONST
-  && GET_CODE (u = XEXP (XEXP (x, 1), 0)) == UNSPEC
-  && XINT (u, 1) == ARC_UNSPEC_GOTOFF)
-return XVECEXP (u, 0, 0);
-  else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
-  && ((REG_P (gp = XEXP (XEXP (x, 0), 1))
-   && REGNO (gp) == PIC_OFFSET_TABLE_REGNUM)
-  || (GET_CODE (gp) == CONST
-  && GET_CODE (u = XEXP (gp, 0)) == UNSPEC
-  && XINT (u, 1) == ARC_UNSPEC_GOT
-  && GET_CODE (XVECEXP (u, 0, 0)) == SYMBOL_REF
-  && !strcmp (XSTR (XVECEXP (u, 0, 0), 0), "_DYNAMIC")))
-  && GET_CODE (XEXP (x, 1)) == CONST
-  && GET_CODE (u = XEXP (XEXP (x, 1), 0)) == UNSPEC
-  && XINT (u, 1) == ARC_UNSPEC_GOTOFF)
-return gen_rtx_PLUS (GET_MODE (x), XEXP (XEXP (x, 0), 0),
-XVECEXP (u, 0, 0));
-  else if (GET_CODE (x) == PLUS
-  && (u = arc_delegitimize_address_0 (XEXP (x, 1
-return gen_rtx_PLUS (GET_MODE (x), XEXP (x, 0), u);
   return NULL_RTX;
 }
 
 static rtx
-arc_delegitimize_address (rtx x)
+arc_delegitimize_address (rtx orig_x)
 {
-  rtx orig_x = x = delegitimize_mem_from_attrs (x);
-  if (GET_CODE (x) == MEM)
+  rtx x = orig_x;
+
+  if (MEM_P (x))
 x = XEXP (x, 0);
+
   x = arc_delegitimize_address_0 (x);
-  if (x)
-{
-  if (MEM_P (orig_x))
-   x = replace_equiv_address_nv (orig_x, x);
-  return x;
-}
-  return orig_x;
+  if (!x)
+return orig_x;
+
+  if (MEM_P (orig_x))
+x = replace_equiv_address_nv (orig_x, x);
+  return x;
 }
 
 /* Return a REG rtx for acc1.  N.B. the gcc-internal representation may
diff --git a/gcc/testsuite/gcc.target/arc/tdelegitimize_addr.c 
b/gcc/testsuite/gcc.target/arc/tdelegitimize_addr.c
new file mode 100755
index 000..0d010ff
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arc/tdelegitimize_addr.c
@@ -0,0 +1,23 @@
+/* { dg-do compile } */
+/* { dg-skip-if "" { ! { clmcpu } } } */
+/* { dg-options "-mcpu=archs -g -O1 -fpic -mlra" } */
+
+/* Check if delegitimize address returns correctly the un-obfuscated
+   address.  */
+
+typedef struct {
+  long long tv_usec;
+} t_a;
+
+static t_a a;
+
+int b;
+extern void fn2 (t_a);
+
+void fn1 (void)
+{
+ again:
+  fn2(a);
+  if (b)
+goto again;
+}
-- 
1.9.1

[PATCH 3/6] [ARC] Add support for "register file 16" reduced register set

gcc/
2017-03-20  Claudiu Zissulescu  

* config/arc/arc-arches.def: Option mrf16 valid for all
architectures.
* config/arc/arc-c.def (__ARC_RF16__): New predefined macro.
* config/arc/arc-cpus.def (em_mini): New cpu with rf16 on.
* config/arc/arc-options.def (FL_RF16): Add mrf16 option.
* config/arc/arc-tables.opt: Regenerate.
* config/arc/arc.c (arc_conditional_register_usage): Handle
reduced register file case.
(arc_file_start): Set must have build attributes.
* config/arc/arc.h (MAX_ARC_PARM_REGS): Conditional define using
mrf16 option value.
* config/arc/arc.opt (mrf16): Add new option.
* config/arc/elf.h (ATTRIBUTE_PCS): Define.
* config/arc/genmultilib.awk: Handle new mrf16 option.
* config/arc/linux.h (ATTRIBUTE_PCS): Define.
* config/arc/t-multilib: Regenerate.
* doc/invoke.texi (ARC Options): Document mrf16 option.

gcc/testsuite/
2017-03-20  Claudiu Zissulescu  

* gcc.dg/builtin-apply2.c: Change for the ARC's reduced register
set file case.

libgcc/
2017-09-18  Claudiu Zissulescu  

* config/arc/lib1funcs.S (__udivmodsi4): Use safe version for RF16
option.
(__divsi3): Use RF16 safe registers.
(__modsi3): Likewise.
---
 gcc/config/arc/arc-arches.def |  8 
 gcc/config/arc/arc-c.def  |  1 +
 gcc/config/arc/arc-cpus.def   |  1 +
 gcc/config/arc/arc-options.def|  2 +-
 gcc/config/arc/arc-tables.opt |  3 +++
 gcc/config/arc/arc.c  | 27 +++
 gcc/config/arc/arc.h  |  2 +-
 gcc/config/arc/arc.opt|  4 
 gcc/config/arc/elf.h  |  4 
 gcc/config/arc/genmultilib.awk|  2 ++
 gcc/config/arc/linux.h|  9 +
 gcc/config/arc/t-multilib |  4 ++--
 gcc/doc/invoke.texi   |  8 +++-
 gcc/testsuite/gcc.dg/builtin-apply2.c |  8 +++-
 libgcc/config/arc/lib1funcs.S | 22 +++---
 15 files changed, 84 insertions(+), 21 deletions(-)

diff --git a/gcc/config/arc/arc-arches.def b/gcc/config/arc/arc-arches.def
index 29cb9c4..a0d585b 100644
--- a/gcc/config/arc/arc-arches.def
+++ b/gcc/config/arc/arc-arches.def
@@ -40,15 +40,15 @@
 
 ARC_ARCH ("arcem", em, FL_MPYOPT_1_6 | FL_DIVREM | FL_CD | FL_NORM \
  | FL_BS | FL_SWAP | FL_FPUS | FL_SPFP | FL_DPFP   \
- | FL_SIMD | FL_FPUDA | FL_QUARK, 0)
+ | FL_SIMD | FL_FPUDA | FL_QUARK | FL_RF16, 0)
 ARC_ARCH ("archs", hs, FL_MPYOPT_7_9 | FL_DIVREM | FL_NORM | FL_CD \
  | FL_ATOMIC | FL_LL64 | FL_BS | FL_SWAP   \
- | FL_FPUS | FL_FPUD,  \
+ | FL_FPUS | FL_FPUD | FL_RF16,\
  FL_CD | FL_ATOMIC | FL_BS | FL_NORM | FL_SWAP)
 ARC_ARCH ("arc6xx", 6xx, FL_BS | FL_NORM | FL_SWAP | FL_MUL64 | FL_MUL32x16 \
- | FL_SPFP | FL_ARGONAUT | FL_DPFP, 0)
+ | FL_SPFP | FL_ARGONAUT | FL_DPFP | FL_RF16, 0)
 ARC_ARCH ("arc700", 700, FL_ATOMIC | FL_BS | FL_NORM | FL_SWAP | FL_EA \
- | FL_SIMD | FL_SPFP | FL_ARGONAUT | FL_DPFP, \
+ | FL_SIMD | FL_SPFP | FL_ARGONAUT | FL_DPFP | FL_RF16,   \
  FL_BS | FL_NORM | FL_SWAP)
 
 /* Local Variables: */
diff --git a/gcc/config/arc/arc-c.def b/gcc/config/arc/arc-c.def
index 8c5097e..c9443c9 100644
--- a/gcc/config/arc/arc-c.def
+++ b/gcc/config/arc/arc-c.def
@@ -28,6 +28,7 @@ ARC_C_DEF ("__ARC_NORM__",TARGET_NORM)
 ARC_C_DEF ("__ARC_MUL64__",TARGET_MUL64_SET)
 ARC_C_DEF ("__ARC_MUL32BY16__", TARGET_MULMAC_32BY16_SET)
 ARC_C_DEF ("__ARC_SIMD__", TARGET_SIMD_SET)
+ARC_C_DEF ("__ARC_RF16__", TARGET_RF16)
 
 ARC_C_DEF ("__ARC_BARREL_SHIFTER__", TARGET_BARREL_SHIFTER)
 
diff --git a/gcc/config/arc/arc-cpus.def b/gcc/config/arc/arc-cpus.def
index 60b4045..c2b0062 100644
--- a/gcc/config/arc/arc-cpus.def
+++ b/gcc/config/arc/arc-cpus.def
@@ -46,6 +46,7 @@
TUNE  Tune value for the given configuration, otherwise NONE.  */
 
 ARC_CPU (em,   em, 0, NONE)
+ARC_CPU (em_mini,   em, FL_RF16, NONE)
 ARC_CPU (arcem,em, FL_MPYOPT_2|FL_CD|FL_BS, NONE)
 ARC_CPU (em4,  em, FL_CD, NONE)
 ARC_CPU (em4_dmips, em, FL_MPYOPT_2|FL_CD|FL_DIVREM|FL_NORM|FL_SWAP|FL_BS, 
NONE)
diff --git a/gcc/config/arc/arc-options.def b/gcc/config/arc/arc-options.def
index be51614..8fc7b50 100644
--- a/gcc/config/arc/arc-options.def
+++ b/gcc/config/arc/arc-options.def
@@ -60,7 +60,7 @@
 ARC_OPT (FL_CD,  (1ULL << 0), MASK_CODE_DENSITY,  "code 
density")
 ARC_OPT (FL_DIVREM,   (1ULL << 1), MASK_DIVREM,   "div/rem")
 ARC_OPT (FL_NORM, (1ULL << 2), MASK_NORM_SET, "norm")
-
+ARC_OPT (FL_RF16, (1ULL << 3), MASK_RF16,  "rf16")
 ARC_OPT (FL_ATOMIC,   (1ULL << 4), MASK_ATOMIC,

[PATCH 5/6] [ARC] Add 'uncached' attribute.

From: claziss 

The _Uncached type qualifier can be used to bypass the cache without
resorting to declaring variables as volatile.

gcc/
2017-07-12  Claudiu Zissulescu  

* config/arc/arc-protos.h (arc_is_uncached_mem_p): Function proto.
* config/arc/arc.c (arc_handle_uncached_attribute): New function.
(arc_attribute_table): Add 'uncached' attribute.
(arc_print_operand): Print '.di' flag for uncached memory
accesses.
(arc_in_small_data_p): Do not consider for small data the uncached
types.
(arc_is_uncached_mem_p): New function.
* config/arc/predicates.md (compact_store_memory_operand): Check
for uncached memory accesses.
(nonvol_nonimm_operand): Likewise.

gcc/testsuite
2017-07-12  Claudiu Zissulescu  

* gcc.target/arc/uncached.c: New test.
---
 gcc/config/arc/arc-protos.h |  1 +
 gcc/config/arc/arc.c| 65 -
 gcc/config/arc/predicates.md|  7 +++-
 gcc/testsuite/gcc.target/arc/uncached.c | 18 +
 4 files changed, 88 insertions(+), 3 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/arc/uncached.c

diff --git a/gcc/config/arc/arc-protos.h b/gcc/config/arc/arc-protos.h
index f8e7937..cc00730 100644
--- a/gcc/config/arc/arc-protos.h
+++ b/gcc/config/arc/arc-protos.h
@@ -47,6 +47,7 @@ extern void arc_expand_compare_and_swap (rtx *);
 extern bool compact_memory_operand_p (rtx, machine_mode, bool, bool);
 extern int arc_return_address_register (unsigned int);
 extern unsigned int arc_compute_function_type (struct function *);
+extern bool arc_is_uncached_mem_p (rtx);
 #endif /* RTX_CODE */
 
 extern unsigned int arc_compute_frame_size (int);
diff --git a/gcc/config/arc/arc.c b/gcc/config/arc/arc.c
index 07dd072..a397cbd 100644
--- a/gcc/config/arc/arc.c
+++ b/gcc/config/arc/arc.c
@@ -222,7 +222,7 @@ static tree arc_handle_interrupt_attribute (tree *, tree, 
tree, int, bool *);
 static tree arc_handle_fndecl_attribute (tree *, tree, tree, int, bool *);
 static tree arc_handle_jli_attribute (tree *, tree, tree, int, bool *);
 static tree arc_handle_secure_attribute (tree *, tree, tree, int, bool *);
-
+static tree arc_handle_uncached_attribute (tree *, tree, tree, int, bool *);
 
 /* Initialized arc_attribute_table to NULL since arc doesnot have any
machine specific supported attributes.  */
@@ -254,6 +254,9 @@ const struct attribute_spec arc_attribute_table[] =
   /* Call a function using secure-mode.  */
   { "secure_call",  1, 1, false, true, true, arc_handle_secure_attribute,
 false },
+  /* Bypass caches using .di flag.  */
+  { "uncached", 0, 0, false, true, false, arc_handle_uncached_attribute,
+false },
   { NULL, 0, 0, false, false, false, NULL, false }
 };
 static int arc_comp_type_attributes (const_tree, const_tree);
@@ -4135,7 +4138,8 @@ arc_print_operand (FILE *file, rtx x, int code)
 refs are defined to use the cache bypass mechanism.  */
   if (GET_CODE (x) == MEM)
{
- if (MEM_VOLATILE_P (x) && !TARGET_VOLATILE_CACHE_SET )
+ if ((MEM_VOLATILE_P (x) && !TARGET_VOLATILE_CACHE_SET)
+ || arc_is_uncached_mem_p (x))
fputs (".di", file);
}
   else
@@ -8038,6 +8042,7 @@ static bool
 arc_in_small_data_p (const_tree decl)
 {
   HOST_WIDE_INT size;
+  tree attr;
 
   /* Only variables are going into small data area.  */
   if (TREE_CODE (decl) != VAR_DECL)
@@ -8061,6 +8066,11 @@ arc_in_small_data_p (const_tree decl)
   && TREE_THIS_VOLATILE (decl))
 return false;
 
+  /* Likewise for uncached data.  */
+  attr = TYPE_ATTRIBUTES (TREE_TYPE (decl));
+  if (lookup_attribute ("uncached", attr))
+return false;
+
   if (DECL_SECTION_NAME (decl) != 0)
 {
   const char *name = DECL_SECTION_NAME (decl);
@@ -11066,6 +11076,57 @@ arc_is_secure_call_p (rtx pat)
   return false;
 }
 
+/* Handle "uncached" qualifier.  */
+
+static tree
+arc_handle_uncached_attribute (tree *node,
+  tree name, tree args,
+  int flags ATTRIBUTE_UNUSED,
+  bool *no_add_attrs)
+{
+  if (DECL_P (*node) && TREE_CODE (*node) != TYPE_DECL)
+{
+  error ("%qE attribute only applies to types",
+name);
+  *no_add_attrs = true;
+}
+  else if (args)
+{
+  warning (OPT_Wattributes, "argument of %qE attribute ignored", name);
+}
+  return NULL_TREE;
+}
+
+/* Return TRUE if PAT is a memory addressing an uncached data.  */
+
+bool
+arc_is_uncached_mem_p (rtx pat)
+{
+  tree attrs;
+  tree ttype;
+  struct mem_attrs *refattrs;
+
+  if (!MEM_P (pat))
+return false;
+
+  /* Get the memory attributes.  */
+  refattrs = MEM_ATTRS (pat);
+  if (!refattrs
+  || !refattrs->expr)
+return false;
+
+  /* Get the type declaration.  */
+  ttype = TREE_TYPE (refattrs->expr);
+  if (!ttype)
+return false;
+
+  /* Get the type attributes.  */
+  attrs

[PATCH 2/6] [ARC] Add SJLI support.

gcc/
2017-02-20  Claudiu Zissulescu  

* config/arc/arc-protos.h: Add arc_is_secure_call_p proto.
* config/arc/arc.c (arc_handle_secure_attribute): New function.
(arc_attribute_table): Add 'secure_call' attribute.
(arc_print_operand): Print secure call operand.
(arc_function_ok_for_sibcall): Don't optimize tail calls when
secure.
(arc_is_secure_call_p): New function.
* config/arc/arc.md (call_i): Add support for sjli instruction.
(call_value_i): Likewise.
* config/arc/constraints.md (Csc): New constraint.
---
 gcc/config/arc/arc-protos.h   |   1 +
 gcc/config/arc/arc.c  | 164 +++---
 gcc/config/arc/arc.md |  32 +
 gcc/config/arc/constraints.md |   7 ++
 gcc/doc/extend.texi   |   6 ++
 5 files changed, 155 insertions(+), 55 deletions(-)

diff --git a/gcc/config/arc/arc-protos.h b/gcc/config/arc/arc-protos.h
index 1557293..f8e7937 100644
--- a/gcc/config/arc/arc-protos.h
+++ b/gcc/config/arc/arc-protos.h
@@ -118,3 +118,4 @@ extern bool arc_store_addr_hazard_p (rtx_insn *, rtx_insn 
*);
 extern rtx arc_eh_return_address_location (void);
 extern bool arc_is_jli_call_p (rtx);
 extern void arc_file_end (void);
+extern bool arc_is_secure_call_p (rtx);
diff --git a/gcc/config/arc/arc.c b/gcc/config/arc/arc.c
index 489cb91..9867e6e 100644
--- a/gcc/config/arc/arc.c
+++ b/gcc/config/arc/arc.c
@@ -221,6 +221,7 @@ static int get_arc_condition_code (rtx);
 static tree arc_handle_interrupt_attribute (tree *, tree, tree, int, bool *);
 static tree arc_handle_fndecl_attribute (tree *, tree, tree, int, bool *);
 static tree arc_handle_jli_attribute (tree *, tree, tree, int, bool *);
+static tree arc_handle_secure_attribute (tree *, tree, tree, int, bool *);
 
 
 /* Initialized arc_attribute_table to NULL since arc doesnot have any
@@ -250,6 +251,9 @@ const struct attribute_spec arc_attribute_table[] =
  table is given as input parameter.  */
   { "jli_fixed",1, 1, false, true,  true,  arc_handle_jli_attribute,
 false },
+  /* Call a function using secure-mode.  */
+  { "secure_call",  1, 1, false, true, true, arc_handle_secure_attribute,
+false },
   { NULL, 0, 0, false, false, false, NULL, false }
 };
 static int arc_comp_type_attributes (const_tree, const_tree);
@@ -3687,6 +3691,46 @@ arc_trampoline_adjust_address (rtx addr)
   return plus_constant (Pmode, addr, 2);
 }
 
+/* Add the given function declaration to emit code in JLI section.  */
+
+static void
+arc_add_jli_section (rtx pat)
+{
+  const char *name;
+  tree attrs;
+  arc_jli_section *sec = arc_jli_sections, *new_section;
+  tree decl = SYMBOL_REF_DECL (pat);
+
+  if (!pat)
+return;
+
+  if (decl)
+{
+  /* For fixed locations do not generate the jli table entry.  It
+should be provided by the user as an asm file.  */
+  attrs = TYPE_ATTRIBUTES (TREE_TYPE (decl));
+  if (lookup_attribute ("jli_fixed", attrs))
+   return;
+}
+
+  name = XSTR (pat, 0);
+
+  /* Don't insert the same symbol twice.  */
+  while (sec != NULL)
+{
+  if(strcmp (name, sec->name) == 0)
+   return;
+  sec = sec->next;
+}
+
+  /* New name, insert it.  */
+  new_section = (arc_jli_section *) xmalloc (sizeof (arc_jli_section));
+  gcc_assert (new_section != NULL);
+  new_section->name = name;
+  new_section->next = arc_jli_sections;
+  arc_jli_sections = new_section;
+}
+
 /* This is set briefly to 1 when we output a ".as" address modifer, and then
reset when we output the scaled address.  */
 static int output_scaled = 0;
@@ -3714,6 +3758,7 @@ static int output_scaled = 0;
 'D'
 'R': Second word
 'S': JLI instruction
+'j': used by mov instruction to properly emit jli related labels.
 'B': Branch comparison operand - suppress sda reference
 'H': Most significant word
 'L': Least significant word
@@ -3928,6 +3973,7 @@ arc_print_operand (FILE *file, rtx x, int code)
   else
output_operand_lossage ("invalid operand to %%R code");
   return;
+case 'j':
 case 'S' :
   if (GET_CODE (x) == SYMBOL_REF
  && arc_is_jli_call_p (x))
@@ -3939,6 +3985,9 @@ arc_print_operand (FILE *file, rtx x, int code)
: NULL_TREE);
  if (lookup_attribute ("jli_fixed", attrs))
{
+ /* No special treatment for jli_fixed functions.  */
+ if (code == 'j' )
+   break;
  fprintf (file, "%ld\t; @",
   TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attrs;
  assemble_name (file, XSTR (x, 0));
@@ -3947,6 +3996,22 @@ arc_print_operand (FILE *file, rtx x, int code)
}
  fprintf (file, "@__jli.");
  assemble_name (file, XSTR (x, 0));
+ if (code == 'j')
+   arc_add_jli_section (x);
+ return;
+   }
+  if (GET_CODE (x) == SYMBOL_REF
+

[PATCH 1/6] [ARC] Add JLI support.

The ARCv2 ISA provides the JLI instruction, which is two-byte instructions
that can be used to reduce code size in an application. To make use of it,
we provide two new function attributes 'jli_always' and 'jli_fixed' which
will force the compiler to call the indicated function using a jli_s
instruction. The compiler also generates the entries in the JLI table for
the case when we use 'jli_always' attribute. In the case of 'jli_fixed'
the compiler assumes a fixed position of the function into JLI
table. Thus, the user needs to provide an assembly file with the JLI table
for the final link. This is usefully when we want to have a table in ROM
and a second table in the RAM memory.

The jli instruction usage can be also forced without the need to annotate
the source code via '-mjli-always' command.

gcc/
2017-02-10  Claudiu Zissulescu  
John Eric Martin 

* config/arc/arc-protos.h: Add arc_is_jli_call_p proto.
* config/arc/arc.c (_arc_jli_section): New struct.
(arc_jli_section): New type.
(rc_jli_sections): New static variable.
(arc_handle_jli_attribute): New function.
(arc_attribute_table): Add jli_always and jli_fixed attribute.
(arc_file_end): New function.
(TARGET_ASM_FILE_END): Define.
(arc_print_operand): Reuse 'S' letter for JLI output instruction.
(arc_add_jli_section): New function.
(jli_call_scan): Likewise.
(arc_reorg): Call jli_call_scan.
(arc_output_addsi): Remove 'S' from printing asm operand.
(arc_is_jli_call_p): New function.
* config/arc/arc.md (movqi_insn): Remove 'S' from printing asm
operand.
(movhi_insn): Likewise.
(movsi_insn): Likewise.
(movsi_set_cc_insn): Likewise.
(loadqi_update): Likewise.
(load_zeroextendqisi_update): Likewise.
(load_signextendqisi_update): Likewise.
(loadhi_update): Likewise.
(load_zeroextendhisi_update): Likewise.
(load_signextendhisi_update): Likewise.
(loadsi_update): Likewise.
(loadsf_update): Likewise.
(movsicc_insn): Likewise.
(bset_insn): Likewise.
(bxor_insn): Likewise.
(bclr_insn): Likewise.
(bmsk_insn): Likewise.
(bicsi3_insn): Likewise.
(cmpsi_cc_c_insn): Likewise.
(movsi_ne): Likewise.
(movsi_cond_exec): Likewise.
(clrsbsi2): Likewise.
(norm_f): Likewise.
(normw): Likewise.
(swap): Likewise.
(divaw): Likewise.
(flag): Likewise.
(sr): Likewise.
(kflag): Likewise.
(ffs): Likewise.
(ffs_f): Likewise.
(fls): Likewise.
(call_i): Remove 'S' asm letter, add jli instruction.
(call_value_i): Likewise.
* config/arc/arc.op (mjli-always): New option.
* config/arc/constraints.md (Cji): New constraint.
* config/arc/fpx.md (addsf3_fpx): Remove 'S' from printing asm
operand.
(subsf3_fpx): Likewise.
(mulsf3_fpx): Likewise.
* config/arc/simdext.md (vendrec_insn): Remove 'S' from printing
asm operand.
* doc/extend.texi (ARC): Document 'jli-always' and 'jli-fixed'
function attrbutes.
* doc/invoke.texi (ARC): Document mjli-always option.

gcc/testsuite
2017-02-10  Claudiu Zissulescu  

* gcc.target/arc/jli-1.c: New file.
* gcc.target/arc/jli-2.c: Likewise.
---
 gcc/config/arc/arc-protos.h  |   2 +
 gcc/config/arc/arc.c | 220 ++-
 gcc/config/arc/arc.md| 128 ++--
 gcc/config/arc/arc.opt   |   4 +
 gcc/config/arc/constraints.md|   6 +
 gcc/config/arc/elf.h |   3 +
 gcc/config/arc/fpx.md|  18 +--
 gcc/config/arc/linux.h   |   1 +
 gcc/config/arc/simdext.md|   2 +-
 gcc/doc/extend.texi  |  10 ++
 gcc/doc/invoke.texi  |   7 +-
 gcc/testsuite/gcc.target/arc/jli-1.c |  19 +++
 gcc/testsuite/gcc.target/arc/jli-2.c |  19 +++
 13 files changed, 358 insertions(+), 81 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/arc/jli-1.c
 create mode 100644 gcc/testsuite/gcc.target/arc/jli-2.c

diff --git a/gcc/config/arc/arc-protos.h b/gcc/config/arc/arc-protos.h
index 6e7239f..1557293 100644
--- a/gcc/config/arc/arc-protos.h
+++ b/gcc/config/arc/arc-protos.h
@@ -116,3 +116,5 @@ extern void arc_secondary_reload_conv (rtx, rtx, rtx, bool);
 extern void arc_cpu_cpp_builtins (cpp_reader *);
 extern bool arc_store_addr_hazard_p (rtx_insn *, rtx_insn *);
 extern rtx arc_eh_return_address_location (void);
+extern bool arc_is_jli_call_p (rtx);
+extern void arc_file_end (void);
diff --git a/gcc/config/arc/arc.c b/gcc/config/arc/arc.c
index e35d198..489cb91 100644
--- a/gcc/config/arc/arc.c
+++ b/gcc/config/arc/arc.c
@@ -71,6 +71,14 @@ along with GCC; see the file COPYING3.  If not see
 static char arc_c

[PATCH 6/6] [ARC] Add 'aux' variable attribute.

From: claziss 

The 'aux' variable attribute is used to directly access the auxiliary
register space from C.

gcc/
2017-07-25  Claudiu Zissulescu  

* config/arc/arc.c (arc_handle_aux_attribute): New function.
(arc_attribute_table): Add 'aux' attribute.
(arc_in_small_data_p): Consider aux like variables.
(arc_is_aux_reg_p): New function.
(arc_asm_output_aligned_decl_local): Ignore 'aux' like variables.
(arc_get_aux_arg): New function.
(prepare_move_operands): Handle aux-register access.
(arc_handle_aux_attribute): New function.
* doc/extend.texi (ARC Variable attributes): Add subsection.

testsuite/
2017-07-25  Claudiu Zissulescu  

* gcc.target/arc/taux-1.c: New test.
* gcc.target/arc/taux-2.c: Likewise.
---
 gcc/config/arc/arc.c  | 160 +-
 gcc/doc/extend.texi   |  13 +++
 gcc/testsuite/gcc.target/arc/taux-1.c |  38 
 gcc/testsuite/gcc.target/arc/taux-2.c |  15 
 4 files changed, 225 insertions(+), 1 deletion(-)
 create mode 100644 gcc/testsuite/gcc.target/arc/taux-1.c
 create mode 100644 gcc/testsuite/gcc.target/arc/taux-2.c

diff --git a/gcc/config/arc/arc.c b/gcc/config/arc/arc.c
index a397cbd..33f68ef 100644
--- a/gcc/config/arc/arc.c
+++ b/gcc/config/arc/arc.c
@@ -223,6 +223,7 @@ static tree arc_handle_fndecl_attribute (tree *, tree, 
tree, int, bool *);
 static tree arc_handle_jli_attribute (tree *, tree, tree, int, bool *);
 static tree arc_handle_secure_attribute (tree *, tree, tree, int, bool *);
 static tree arc_handle_uncached_attribute (tree *, tree, tree, int, bool *);
+static tree arc_handle_aux_attribute (tree *, tree, tree, int, bool *);
 
 /* Initialized arc_attribute_table to NULL since arc doesnot have any
machine specific supported attributes.  */
@@ -257,6 +258,8 @@ const struct attribute_spec arc_attribute_table[] =
   /* Bypass caches using .di flag.  */
   { "uncached", 0, 0, false, true, false, arc_handle_uncached_attribute,
 false },
+  /* Declare a variable as aux.  */
+  { "aux", 0, 1, true, false, false, arc_handle_aux_attribute, false },
   { NULL, 0, 0, false, false, false, NULL, false }
 };
 static int arc_comp_type_attributes (const_tree, const_tree);
@@ -8071,6 +8074,11 @@ arc_in_small_data_p (const_tree decl)
   if (lookup_attribute ("uncached", attr))
 return false;
 
+  /* and for aux regs.  */
+  attr = DECL_ATTRIBUTES (decl);
+  if (lookup_attribute ("aux", attr))
+return false;
+
   if (DECL_SECTION_NAME (decl) != 0)
 {
   const char *name = DECL_SECTION_NAME (decl);
@@ -8238,6 +8246,35 @@ compact_sda_memory_operand (rtx op, machine_mode mode, 
bool short_p)
   return false;
 }
 
+/* Return TRUE if PAT is accessing an aux-reg.  */
+
+static bool
+arc_is_aux_reg_p (rtx pat)
+{
+  tree attrs = NULL_TREE;
+  tree addr;
+
+  if (!MEM_P (pat))
+return false;
+
+  /* Get the memory attributes.  */
+  addr = MEM_EXPR (pat);
+  if (!addr)
+return false;
+
+  /* Get the attributes.  */
+  if (TREE_CODE (addr) == VAR_DECL)
+attrs = DECL_ATTRIBUTES (addr);
+  else if (TREE_CODE (addr) == MEM_REF)
+attrs = TYPE_ATTRIBUTES (TREE_TYPE (TREE_OPERAND (addr, 0)));
+  else
+return false;
+
+  if (lookup_attribute ("aux", attrs))
+return true;
+  return false;
+}
+
 /* Implement ASM_OUTPUT_ALIGNED_DECL_LOCAL.  */
 
 void
@@ -8246,7 +8283,14 @@ arc_asm_output_aligned_decl_local (FILE * stream, tree 
decl, const char * name,
   unsigned HOST_WIDE_INT align,
   unsigned HOST_WIDE_INT globalize_p)
 {
-  int in_small_data =   arc_in_small_data_p (decl);
+  int in_small_data = arc_in_small_data_p (decl);
+  rtx mem = decl == NULL_TREE ? NULL_RTX : DECL_RTL (decl);
+
+  /* Don't output aux-reg symbols.  */
+  if (mem != NULL_RTX && MEM_P (mem)
+  && SYMBOL_REF_P (XEXP (mem, 0))
+  && arc_is_aux_reg_p (mem))
+return;
 
   if (in_small_data)
 switch_to_section (get_named_section (NULL, ".sbss", 0));
@@ -8586,12 +8630,80 @@ arc_expand_movmem (rtx *operands)
   return true;
 }
 
+static bool
+arc_get_aux_arg (rtx pat, int *auxr)
+{
+  tree attr, addr = MEM_EXPR (pat);
+  if (TREE_CODE (addr) != VAR_DECL)
+return false;
+
+  attr = DECL_ATTRIBUTES (addr);
+  if (lookup_attribute ("aux", attr))
+{
+  tree arg = TREE_VALUE (attr);
+  if (arg)
+   {
+ *auxr = TREE_INT_CST_LOW (TREE_VALUE (arg));
+ return true;
+   }
+}
+
+  return false;
+}
+
 /* Prepare operands for move in MODE.  Return true iff the move has
been emitted.  */
 
 bool
 prepare_move_operands (rtx *operands, machine_mode mode)
 {
+  /* First handle aux attribute.  */
+  if (mode == SImode
+  && (MEM_P (operands[0]) || MEM_P (operands[1])))
+{
+  rtx tmp;
+  int auxr = 0;
+  if (MEM_P (operands[0]) && arc_is_aux_reg_p (operands[0]))
+   {
+ /* Save operation.  */
+

Re: Generalize -(-X) a little

On Wed, Nov 1, 2017 at 12:47 PM, Marc Glisse  wrote:
> Hello,
>
> just a little tweak to that transformation. There is some overlap between
> the 2 versions, but it seemed easier to handle the NOP case (including the
> case without convert and the vector case) separately from the narrowing /
> sign-extending scalar integer case.
>
> At some point it would be good to have fold_negate_expr call
> generic_simplify so we could remove some transformations from fold-const.c.
>
> Bootstrap+regtest on powerpc64le-unknown-linux-gnu.

+  (negate (convert (negate @1)))
+  (if (INTEGRAL_TYPE_P (type)
+   && (TYPE_PRECISION (type) <= TYPE_PRECISION (TREE_TYPE (@1))
+  || (!TYPE_UNSIGNED (TREE_TYPE (@1))
+  && TYPE_OVERFLOW_UNDEFINED (TREE_TYPE (@1
+   && !TYPE_OVERFLOW_SANITIZED (type)
+   && !TYPE_OVERFLOW_SANITIZED (TREE_TYPE (@1)))

so I don't understand this fully -- a widening conversion is ok only for
signed types with undefined overflow?  How does overflow come into play
here at all?

The testcase doesn't tell ...

For floats eliding any conversion should be ok if not flag_rounding_math
and flag_unsafe_math_optimizations (we remove a rounding step)?

Richard.

> 2017-11-01  Marc Glisse  
>
> gcc/
> * match.pd (-(-A)): Rewrite.
>
> gcc/testsuite/
> * gcc.dg/negneg.c: New file.
>
> --
> Marc Glisse

Re: [PATCH 6/9] [LVU] Allow final_start_function to skip initial insns

On Wed, Nov 1, 2017 at 7:18 PM, Alexandre Oliva  wrote:
> On Oct 31, 2017, Jeff Law  wrote:
>
>> On 09/30/2017 03:08 AM, Alexandre Oliva wrote:
>>> This API change will enable final_start_function() to "consume"
>>> initial insns, and choose the first insn to be passed to final().
>>>
>>> Many ports call final_start_function() and final() when creating
>>> thunks and whatnot, so they needed adjusting.
>> So I haven't really followed the discussion until now.  What's driving
>> the need to have some insns "consumed" and have more control over what
>> tthe first insn passed to final will be?
>
> We want to build debug notes that bind arguments into the initial view
> in a function.  That initial view (first .loc note) is emitted in
> final_start_function.  So we don't want to process the initial debug
> bind insns in final_start_function, and not process them again in final.
>
> In response to richi's objections, I reverted the API exposed by final.c
> so that we process the loc notes in final_start_function, and just skip
> them in final, so that no changes are required to the various backends,
> at a very slight performance penalty as the leading debug insns will be
> looked at twice instead of just once, when final is so used by the
> backends.

That works for me - we can still improve with some refactoring but didn't
introduce some ugliness in the way.

Richard.

> As for uses within final.c, those benefit from an API change internal to
> that file, that allows the leading debug insns to be processed just
> once.  Here are the relevant snippets from the updated patchset (yet to
> be posted):
>
>
> +/* We want to emit param bindings (before the first begin_stmt) in the
> +   initial view, if we are emitting views.  To that end, we may
> +   consume initial notes in the function, processing them in
> +   final_start_function, before signaling the beginning of the
> +   prologue, rather than in final.
> +
> +   We don't test whether the DECLs are PARM_DECLs: the assumption is
> +   that there will be a NOTE_INSN_BEGIN_STMT marker before any
> +   non-parameter NOTE_INSN_VAR_LOCATION.  It's ok if the marker is not
> +   there, we'll just have more variable locations bound in the initial
> +   view, which is consistent with their being bound without any code
> +   that would give them a value.  */
> +
> +static inline bool
> +in_initial_view_p (rtx_insn *insn)
> +{
> +  return !DECL_IGNORED_P (current_function_decl)
> +&& debug_variable_location_views
> +&& insn && GET_CODE (insn) == NOTE
> +&& NOTE_KIND (insn) == NOTE_INSN_VAR_LOCATION;
> +}
> +
>  /* Output assembler code for the start of a function,
> and initialize some of the variables in this file
> for the new function.  The label for the function and associated
> @@ -1757,12 +1819,15 @@ get_some_local_dynamic_name ()
>
> FIRST is the first insn of the rtl for the function being compiled.
> FILE is the file to write assembler code to.
> +   SEEN should be initially set to zero, and it may be updated to
> +   indicate we have references to the next location view, that would
> +   require us to emit it at the current PC.
> OPTIMIZE_P is nonzero if we should eliminate redundant
>   test and compare insns.  */
>
> -void
> -final_start_function (rtx_insn *first, FILE *file,
> - int optimize_p ATTRIBUTE_UNUSED)
> +static void
> +final_start_function_1 (rtx_insn **firstp, FILE *file, int *seen,
> +   int optimize_p ATTRIBUTE_UNUSED)
>  {
>block_depth = 0;
>
> @@ -1780,8 +1845,21 @@ final_start_function (rtx_insn *first, FILE *file,
>if (flag_sanitize & SANITIZE_ADDRESS)
>  asan_function_start ();
>
> +  rtx_insn *first = *firstp;
> +  if (in_initial_view_p (first))
> +{
> +  do
> +   {
> + final_scan_insn (first, file, 0, 0, seen);
> + first = NEXT_INSN (first);
> +   }
> +  while (in_initial_view_p (first));
> +  *firstp = first;
> +}
> +
>if (!DECL_IGNORED_P (current_function_decl))
> @@ -1856,6 +1934,17 @@ final_start_function (rtx_insn *first, FILE *file,
>  profile_after_prologue (file);
>  }
>
> +/* This is an exported final_start_function_1, callable without SEEN.  */
> +
> +void
> +final_start_function (rtx_insn *first, FILE *file,
> + int optimize_p ATTRIBUTE_UNUSED)
> +{
> +  int seen = 0;
> +  final_start_function_1 (&first, file, &seen, optimize_p);
> +  gcc_assert (seen == 0);
> +}
> +
>  static void
>  profile_after_prologue (FILE *file ATTRIBUTE_UNUSED)
>  {
> @@ -1987,11 +2076,10 @@ dump_basic_block_info (FILE *file, rtx_insn *insn, 
> basic_block *start_to_bb,
>  /* Output assembler code for some insns: all or part of a function.
> For description of args, see `final_start_function', above.  */
>
> -void
> -final (rtx_insn *first, FILE *file, int optimize_p)
> +static void
> +final_1 (rtx_insn *first, FILE *file, int seen, int optimize_p)
>  {
>rtx_insn *insn, *next;
> -  int s

Re: Generalize -(-X) a little

2017-11-02 Thread Marc Glisse


On Thu, 2 Nov 2017, Richard Biener wrote:


On Wed, Nov 1, 2017 at 12:47 PM, Marc Glisse  wrote:

Hello,

just a little tweak to that transformation. There is some overlap between
the 2 versions, but it seemed easier to handle the NOP case (including the
case without convert and the vector case) separately from the narrowing /
sign-extending scalar integer case.

At some point it would be good to have fold_negate_expr call
generic_simplify so we could remove some transformations from fold-const.c.

Bootstrap+regtest on powerpc64le-unknown-linux-gnu.


+  (negate (convert (negate @1)))
+  (if (INTEGRAL_TYPE_P (type)
+   && (TYPE_PRECISION (type) <= TYPE_PRECISION (TREE_TYPE (@1))
+  || (!TYPE_UNSIGNED (TREE_TYPE (@1))
+  && TYPE_OVERFLOW_UNDEFINED (TREE_TYPE (@1
+   && !TYPE_OVERFLOW_SANITIZED (type)
+   && !TYPE_OVERFLOW_SANITIZED (TREE_TYPE (@1)))

so I don't understand this fully -- a widening conversion is ok only for
signed types with undefined overflow?  How does overflow come into play
here at all?


Sign-extension is ok, as long as negate does what one expects. For 
INT_MIN, that's not the case.


Consider -(long)(-INT_MIN). -INT_MIN is INT_MIN, cast to long it remains 
negative, and the final value is positive (assuming long is larger than 
int), while (long)INT_MIN is negative.


Undefined overflow allows us to assume that X is not INT_MIN. I could
instead query VRP (and teach VRP that NEGATE_EXPR of VARYING has range
[-INT_MAX,INT_MAX] with undefined overflow), but that seems a bit
overkill.


The testcase doesn't tell ...


Probably I should add a second testcase with cases that must not be 
simplified. Maybe even a runtime test for INT_MIN...



For floats eliding any conversion should be ok if not flag_rounding_math
and flag_unsafe_math_optimizations (we remove a rounding step)?


I was leaving that for another time (currently this is handled differently 
in fold-const.c), but ok.


For true floats (not fixed point), any extension seems fine, and narrowing 
indeed requires !HONOR_SIGN_DEPENDENT_ROUNDING (on the smaller type). It 
isn't clear to me that we need flag_unsafe_math_optimizations, with 
round-to-nearest -(float)(-dbl) does look equivalent to (float)dbl.


I'll post a new patch with floats and more testcases when I get the time.

--
Marc Glisse

[PATCH] Fix PR82795


Bootstrapped and tested on x86_64-unknown-linux-gnu, applied.

Richard.

2017-11-02  Richard Biener  

PR tree-optimization/82795
* tree-if-conv.c (predicate_mem_writes): Remove bogus assert.

* gcc.target/i386/pr82795.c: New testcase.

Index: gcc/tree-if-conv.c
===
--- gcc/tree-if-conv.c  (revision 254211)
+++ gcc/tree-if-conv.c  (working copy)
@@ -2247,10 +2247,7 @@ predicate_mem_writes (loop_p loop)
 TREE_OPERAND (cond, 0),
 TREE_OPERAND (cond, 1));
  else
-   {
- gcc_assert (TREE_CODE (cond) == SSA_NAME);
- mask = cond;
-   }
+   mask = cond;
 
  if (swap)
{
Index: gcc/testsuite/gcc.target/i386/pr82795.c
===
--- gcc/testsuite/gcc.target/i386/pr82795.c (nonexistent)
+++ gcc/testsuite/gcc.target/i386/pr82795.c (working copy)
@@ -0,0 +1,23 @@
+/* { dg-do compile } */
+/* { dg-options "-O3 -mavx2" } */
+
+void
+sj (int qh, int rn, int *by)
+{
+  for (;;)
+if (qh != 0)
+  {
+   int dc;
+
+   for (dc = 0; dc < 17; ++dc)
+ {
+   int nn;
+
+   nn = (rn != 0) ? qh : dc;
+   if (nn != 0)
+ qh = nn;
+   else
+ qh = (qh != 0) ? *by : dc;
+ }
+  }
+}

Re: [PATCH] Add offset_int to guard HOST_WIDE_INT overflow (PR tree-optimization/82042).

On 10/19/2017 01:58 PM, Richard Biener wrote:
> In the end I don't think we want to slow down code just for the sake of
> UBSAN.  IMHO for code invoking undefined behavior (object too large)
> it's reasonable for the compiler to invoke undefined behavior 

Agree that it's very artificial test-case. That said, I'm leaving that.

Martin

Re: [PATCH] Fix UBSAN errors in dse.c (PR rtl-optimization/82044).

PING^1

On 10/19/2017 01:36 PM, Martin Liška wrote:
> On 09/20/2017 10:15 AM, Jakub Jelinek wrote:
>> On Wed, Sep 20, 2017 at 09:50:32AM +0200, Martin Liška wrote:
>>> Hello.
>>>
>>> Following patch handles UBSAN (overflow) in dce.c.
>>
>> dse.c ;)
>>
>>> --- a/gcc/dse.c
>>> +++ b/gcc/dse.c
>>> @@ -929,7 +929,9 @@ set_usage_bits (group_info *group, HOST_WIDE_INT 
>>> offset, HOST_WIDE_INT width,
>>>  {
>>>HOST_WIDE_INT i;
>>>bool expr_escapes = can_escape (expr);
>>> -  if (offset > -MAX_OFFSET && offset + width < MAX_OFFSET)
>>> +  if (offset > -MAX_OFFSET
>>> +  && offset < MAX_OFFSET
>>> +  && offset + width < MAX_OFFSET)
>>
>> This can still overflow if width is close to HOST_WIDE_INT_MAX.
>> Anyway, I don't remember this code too much, but wonder if either offset or
>> width or their sum is outside of the -MAX_OFFSET, MAX_OFFSET range if we
>> still don't want to record usage bits at least in the intersection of
>> -MAX_OFFSET, MAX_OFFSET and offset, offset + width (the latter performed
>> with infinite precision; though, if record_store is changed as suggested
>> below, offset + width shouldn't overflow).
>>
>>>  for (i=offset; i>>{
>>> bitmap store1;
>>> @@ -1536,7 +1538,11 @@ record_store (rtx body, bb_info_t bb_info)
>>>  }
>>>store_info->group_id = group_id;
>>>store_info->begin = offset;
>>> -  store_info->end = offset + width;
>>> +  if (offset > HOST_WIDE_INT_MAX - width)
>>> +store_info->end = HOST_WIDE_INT_MAX;
>>> +  else
>>> +store_info->end = offset + width;
>>
>> If offset + width overflows, I think we risk wrong-code by doing this, plus
>> there are 3 other offset + width computations earlier in record_store
>> before we reach this.  I think instead we should treat such cases as wild
>> stores early, i.e.:
>>if (!canon_address (mem, &group_id, &offset, &base))
>>  {
>>clear_rhs_from_active_local_stores ();
>>return 0;
>>  }
>>  
>>if (GET_MODE (mem) == BLKmode)
>>  width = MEM_SIZE (mem);
>>else
>>  width = GET_MODE_SIZE (GET_MODE (mem));
>>
>> +  if (offset > HOST_WIDE_INT_MAX - width)
>> +{
>> +  clear_rhs_from_active_local_stores ();
>> +  return 0;
>> +}
>>
>> or so.
>>
>>> +
>>>store_info->is_set = GET_CODE (body) == SET;
>>>store_info->rhs = rhs;
>>>store_info->const_rhs = const_rhs;
>>> @@ -1976,6 +1982,14 @@ check_mem_read_rtx (rtx *loc, bb_info_t bb_info)
>>>return;
>>>  }
>>>  
>>> +  if (offset > MAX_OFFSET)
>>> +{
>>> +  if (dump_file && (dump_flags & TDF_DETAILS))
>>> +   fprintf (dump_file, " reaches MAX_OFFSET.\n");
>>> +  add_wild_read (bb_info);
>>> +  return;
>>> +}
>>> +
> 
> Hi.
> 
> The later one works for me. I'm going to regtest that.
> 
> Ready after it survives regression tests?
> 
> Thanks,
> Martin
> 
>>
>> Is offset > MAX_OFFSET really problematic (and not just the width != -1 &&
>> offset + width overflowing case)?
>>
>>>if (GET_MODE (mem) == BLKmode)
>>>  width = -1;
>>>else
>>>
>>
>>
>>  Jakub
>>
>

Re: Adjust empty class parameter passing ABI (PR c++/60336)

On Thu, Nov 2, 2017 at 8:21 AM, Richard Biener  wrote:
> On Wed, 1 Nov 2017, Marek Polacek wrote:
>
>> On Fri, Oct 27, 2017 at 12:46:12PM +0200, Richard Biener wrote:
>> > On Fri, 27 Oct 2017, Jakub Jelinek wrote:
>> >
>> > > On Fri, Oct 27, 2017 at 12:31:46PM +0200, Richard Biener wrote:
>> > > > I fear it doesn't work at all with LTO (you'll always get the old ABI
>> > > > if I read the patch correctly).  This is because the function
>> > > > computing the size looks at flag_abi_version which isn't saved
>> > > > per function / TU.
>> > > >
>> > > > Similarly you'll never get the ABI warning with LTO (less of a big
>> > > > deal of course) because the langhook doesn't reflect things correctly
>> > > > either.
>> > > >
>> > > > So...  can we instead compute whether a type is "empty" according
>> > > > to the ABI early and store the result in the type (thinking of
>> > > > doing this in layout_type?).  Similarly set a flag whether to
>> > > > warn.  Why do you warn from backends / code emission and not
>> > > > from the FEs?  Is that to avoid warnings for calls that got inlined?
>> > > > Maybe the FE could set a flag on the call itself (ok, somewhat
>> > > > awkward to funnel through gimple).
>> > >
>> > > Warning in the FE is too early both because of the inlining, never
>> > > emitted functions and because whether an empty struct is passed 
>> > > differently
>> > > from the past matters on the backend (whether its psABI says it should be
>> > > not passed at all or not).
>> > >
>> > > Perhaps if empty types are rare enough it could be an artificial 
>> > > attribute
>> > > on the type if we can't get a spare bit for that.  But computing in the 
>> > > FE
>> > > or before free_lang_data and saving on the type whether it is empty or 
>> > > not
>> > > seems reasonable to me.
>> >
>> > There are 18 unused bits in tree_type_common if we don't want to re-use
>> > any.  For the warning I first thought of setting TREE_NO_WARNING on it
>> > but that bit is used already.  OTOH given the "fit" of TREE_NO_WARNING
>> > I'd move TYPE_ARTIFICIAL somewhere else.
>>
>> All right, should be done in the below.  I've introduced two new flags,
>> TYPE_EMPTY_P (says whether the type is empty according to the psABI), and
>> TYPE_WARN_EMPTY_P (whether we should warn).  I've added two new fields to
>> type_type_common and moved TYPE_ARTIFICIAL there; TYPE_WARN_EMPTY_P is now
>> mapped to nowarning_flag.  So this should work with LTO, as demonstrated
>> by g++.dg/lto/pr60336_0.C.
>>
>> Regarding LTO and -Wabi warning, I've added Optimization to c.opt so that
>> we get warnings with LTO.  But as pointed out IRC, this doesn't fully work
>> with cross-inlining.  I tried to do some flags merging in inline_call, but
>> that didn't help, one of the problems is that warn_abi_version lives in
>> c-family only.  Not sure if I'll be able to improve things here though.
>>
>> Bootstrapped/regtested on x86_64-linux, ppc64-linux, and aarch64-linux.
>> Bootstrap-lto passed on x86_64-linux and ppc64-linux.
>
> To me the tree.c stuff is_empty_type looks awfully ABI dependent
> and should thus reside in i386.c near the target hook implementation?

I think there should be a default version in common code, to hopefully
be shared by all targets that want this behavior.

> What goes wrong if we do not introduce new int_maybe_empty_type_size
> and maybe_empty_type_size but instead change int_size_in_bytes and
> size_in_bytes to return 0 if TYPE_EMPTY_P ()?  If the ABI can omit
> passing things assuming the size is zero should work as well, no?

We need to distinguish between size in general and size for calling
convention purposes, but the function names should mention the calling
convention rather than "maybe_empty".  Maybe something like
"arg_size_in_bytes"?

> Otherwise I'd really prefer seeing explicit TYPE_EMPTY_P checks
> which would reduce the number of "indirect" greps one has to do when
> looking for effects of TYPE_EMPTY_P.

Hmm, yes, I was hoping we could encapsulate this in target code, but
needing these flags for LTO messes that up; if we can't have full
encapsulation, maybe we want less?

> Still needs FE and target maintainer approval -- the target maintainer
> wants to look at the seemingly ABI independent functions in tree.c.

Instead of moving array_type_nelts_top to tree.c, you can use
integer_minus_onep (array_type_nelts (ftype)).

I'm still not sure why you want to consider a type with a flexible
array member non-empty.  Is this to avoid changing the C ABI?  I'm
surprised it's even allowed to pass/return by value a struct with a
flexible array member, since that doesn't copy the contents of the
array.

Jason

Enable inc/dec generation on Haswell+

2017-11-02 Thread Jan Hubicka

Hi,
core2 used to have quite large penalty for partial flag registers store
done by INCDEC.  This was improved on Sandybridge where extra merging uop
is produced and more at Haswell where there is no extra uop unless there
is instruction accessing both.  For this reason we can use inc/dec again
on modern variants of core.

Bootstrapped/regtested x86_64-linux and tested on Haswell spec2k/spec2k6
with no measurable performance impact.

Honza

* x86-tune.def (X86_TUNE_USE_INCDEC): Enable for Haswell+.

Index: config/i386/x86-tune.def
===
--- config/i386/x86-tune.def(revision 254199)
+++ config/i386/x86-tune.def(working copy)
@@ -220,10 +220,15 @@ DEF_TUNE (X86_TUNE_LCP_STALL, "lcp_stall
as "add mem, reg".  */
 DEF_TUNE (X86_TUNE_READ_MODIFY, "read_modify", ~(m_PENT | m_LAKEMONT | m_PPRO))
 
-/* X86_TUNE_USE_INCDEC: Enable use of inc/dec instructions.   */
+/* X86_TUNE_USE_INCDEC: Enable use of inc/dec instructions.
+
+   Core2 and nehalem has stall of 7 cycles for partial flag register stalls.
+   Sandy bridge and Ivy bridge generate extra uop.  On Haswell this extra uop
+   is output only when the values needs to be really merged, which is not
+   done by GCC generated code.  */
 DEF_TUNE (X86_TUNE_USE_INCDEC, "use_incdec",
-  ~(m_P4_NOCONA | m_CORE_ALL | m_BONNELL | m_SILVERMONT | m_INTEL
-  |  m_KNL | m_KNM | m_GENERIC))
+  ~(m_P4_NOCONA | m_CORE2 | m_NEHALEM  | m_SANDYBRIDGE
+   | m_BONNELL | m_SILVERMONT | m_INTEL |  m_KNL | m_KNM | m_GENERIC))
 
 /* X86_TUNE_INTEGER_DFMODE_MOVES: Enable if integer moves are preferred
for DFmode copies */

Re: [PATCH] fix fdump-lang-raw ICE


On 11/01/2017 03:40 PM, Jakub Jelinek wrote:

On Wed, Nov 01, 2017 at 03:24:56PM -0400, Nathan Sidwell wrote:

Thomas Koenig noticed -fdump-lang-raw gives an immediate ICE.  I broke it
with the DECL_ASSEMBLER_NAME changes last month.  Fixed thusly, applied as
obvious.


Shouldn't there be a testcase with that option?


Oh, very well then :)

nathan
--
Nathan Sidwell
2017-11-02  Nathan Sidwell  

	* g++.dg/lang-dump.C: New.

Index: g++.dg/lang-dump.C
===
--- g++.dg/lang-dump.C	(revision 0)
+++ g++.dg/lang-dump.C	(working copy)
@@ -0,0 +1,21 @@
+// { dg-additional-options "-fdump-lang-all" }
+// Just check we don't explode when asking for language dumps.  Does
+// not necessarily mean any particular language dump is useful.
+
+struct X 
+{
+  int m;
+  virtual ~X ();
+};
+
+X::~X () {}
+
+struct Y : X
+{
+};
+
+int frob (int a)
+{
+  return 2 * a;
+}
+

[PATCH] Improve store merging to handle load+store or bitwise logicals (PR tree-optimization/78821)

2017-11-02 Thread Jakub Jelinek

Hi!

The following patch improves store merging, so that it doesn't handle
just constant stores into adjacent memory, but also adjacent memory copying
and simple bitwise logical ops where at least one argument is a load
from adjacent memory and the other argument as well or a constant.
The loads are limited to be either all using the same vuse, or each using
vuse of the corresponding stores.  So examples of what can be handled are:
  s.a = 1; s.b = 2; // we could handle this before this patch already
  _1 = t.a; _2 = t.b; s.a = _1; s.b = _2; // copying with the same vuse
  _1 = t.a; s.a = _1; _2 = t.b; s.b = _2; // copying with vuse of the store
  _1 = s.a; _2 = _1 | 23; _3 = s.b; _4 = _3 | 12345; s.a = _2; s.b = _4; // | 
with one load and one constant
etc.
What the patch doesn't handle yet because
terminate_all_aliasing_chains uses:
  /* We can't use the base object here as that does not reliably exist.
 Build a ao_ref from the base object address (if we know the
 minimum and maximum offset and the maximum size we could improve
 things here).  */
  ao_ref chain_ref;
  ao_ref_init_from_ptr_and_size (&chain_ref, cur->base_addr, NULL_TREE);
is e.g.
void
f3 (struct S *__restrict p, struct S *__restrict q)
{
  p->a |= q->a; p->b |= q->b; p->c |= q->c; p->d |= q->d;
  p->e |= q->e; p->f |= q->f; p->g |= q->g; p->h |= q->h;
}
I'll try to improve that incrementally by preserving the underlying original
reference and tracking minimum/maximum offsets from that.

The patch also doesn't hook in the bswap infrastructure to recognize say
struct S { char a, b, c, d; } u, v;
void foo (void) { u.a = v.d; u.b = v.c; u.c = v.b; u.d = v.a; }
though wonder if it is worth it (whether there is any real-world code like
that at all or common enough to worth the work on it).

Bootstrapped/regtested on {x86_64,i686,powerpc64,powerpc64le}-linux, ok for
trunk?

I'm now doing another x86_64/i686 bootstrap/regtest to gather some
statistics, both are still regtesting now, the current numbers show:
rhs_codesplit_stores.length ()  orig_num_stmts
integer_cst 135533  275746
mem_ref 13289   27852
bit_*_expr  36  81
so the first row shows that already before this patch when we decided to
optimize constant stores we decreased the number to 50% on average, for
memory copying around 10% cases of the constant stores and the reason
why the bitwise logical don't trigger much is probably related to the
above mentioned ao_ref_init* missed-opt as well as such constructs being
far less common.  In theory we could handle also mixed rhs codes, but not
sure it is worth the effort - e.g. if somebody does:
  s.a = 5; s.b |= 4; s.c &= 2; s.d ^= 5;
we could load the memory and do some |/&/^ on it.

2017-11-02  Jakub Jelinek  

PR tree-optimization/78821
* gimple-ssa-store-merging.c (struct store_operand_info): New type.
(store_operand_info::store_operand_info): New constructor.
(struct store_immediate_info): Add rhs_code and ops data members.
(store_immediate_info::store_immediate_info): Add rhscode, op0r
and op1r arguments to the ctor, initialize corresponding data members.
(struct merged_store_group): Add load_align_base and load_align
data members.
(merged_store_group::merged_store_group): Initialize them.
(merged_store_group::do_merge): Update them.
(merged_store_group::apply_stores): Pick the constant for
encode_tree_to_bitpos from one of the two operands, or skip
encode_tree_to_bitpos if neither operand is a constant.
(class pass_store_merging): Add process_store method decl.  Remove
bool argument from terminate_all_aliasing_chains method decl.
(pass_store_merging::terminate_all_aliasing_chains): Remove
var_offset_p argument and corresponding handling.
(stmts_may_clobber_ref_p): New function.
(compatible_load_p): New function.
(imm_store_chain_info::coalesce_immediate_stores): Terminate group
if there is overlap and rhs_code is not INTEGER_CST.  For
non-overlapping stores terminate group if rhs is not mergeable.
(get_alias_type_for_stmts): Change first argument from
auto_vec & to vec &.  Add IS_LOAD, CLIQUEP and
BASEP arguments.  If IS_LOAD is true, look at rhs1 of the stmts
instead of lhs.  Compute *CLIQUEP and *BASEP in addition to the
alias type.
(get_location_for_stmts): Change first argument from
auto_vec & to vec &.
(struct split_store): Remove orig_stmts data member, add orig_stores.
(split_store::split_store): Create orig_stores rather than orig_stmts.
(find_constituent_stmts): Renamed to ...
(find_constituent_stores): ... this.  Change second argument from
vec * to vec *, push pointers
to info structures rather than the stateme

Re: [C++ PATCH] overloaded operator fns [8/N]


On 11/01/2017 04:08 PM, Jason Merrill wrote:

On Wed, Nov 1, 2017 at 2:29 PM, Nathan Sidwell  wrote:



-&& IDENTIFIER_NEWDEL_OP_P (unqualified_id)))
+&& IDENTIFIER_OVL_OP_P (unqualified_id)
+&& (IDENTIFIER_OVL_OP_FLAGS (unqualified_id) & OVL_OP_FLAG_ALLOC)))


Why not keep the name IDENTIFIER_NEWDEL_OP_P, which expands to this?


Hm, I guess that would be better.  This patch does that, and introduces 
IDENTIFIER_NEW_OP_P, for when we just want the new operators.


nathan

--
Nathan Sidwell
2017-11-02  Nathan Sidwell  

	* cp-tree.h (IDENTIFIER_NEWDEL_OP_P): Restore, adjust.
	(IDENTIFIER_NEW_OP_P): New.
	* decl.c (grokdeclarator): Restore IDENTIFIER_NEWDEL_OP_P use.
	* pt.c (push_template_decl_real): Likewise.
	* typeck.c (check_return_expr): Use IDENTIFIER_NEW_OP_P.

Index: cp/cp-tree.h
===
--- cp/cp-tree.h	(revision 254344)
+++ cp/cp-tree.h	(working copy)
@@ -1070,6 +1070,17 @@ enum cp_identifier_kind {
& IDENTIFIER_KIND_BIT_1 (NODE)		\
& (!IDENTIFIER_KIND_BIT_0 (NODE)))
 
+/* True if this identifier is a new or delete operator.  */
+#define IDENTIFIER_NEWDEL_OP_P(NODE)		\
+  (IDENTIFIER_OVL_OP_P (NODE)			\
+   && IDENTIFIER_OVL_OP_FLAGS (NODE) & OVL_OP_FLAG_ALLOC)
+
+/* True if this identifier is a new operator.  */
+#define IDENTIFIER_NEW_OP_P(NODE)	\
+  (IDENTIFIER_OVL_OP_P (NODE)		\
+   && (IDENTIFIER_OVL_OP_FLAGS (NODE)	\
+   & (OVL_OP_FLAG_ALLOC | OVL_OP_FLAG_DELETE)) == OVL_OP_FLAG_ALLOC)
+
 /* Access a C++-specific index for identifier NODE.
Used to optimize operator mappings etc.  */
 #define IDENTIFIER_CP_INDEX(NODE)		\
Index: cp/decl.c
===
--- cp/decl.c	(revision 254344)
+++ cp/decl.c	(working copy)
@@ -11744,8 +11744,7 @@ grokdeclarator (const cp_declarator *dec
 
   if (ctype && TREE_CODE (type) == FUNCTION_TYPE && staticp < 2
   && !(identifier_p (unqualified_id)
-	   && IDENTIFIER_OVL_OP_P (unqualified_id)
-	   && (IDENTIFIER_OVL_OP_FLAGS (unqualified_id) & OVL_OP_FLAG_ALLOC)))
+	   && IDENTIFIER_NEWDEL_OP_P (unqualified_id)))
 {
   cp_cv_quals real_quals = memfn_quals;
   if (cxx_dialect < cxx14 && constexpr_p
@@ -11858,9 +11857,7 @@ grokdeclarator (const cp_declarator *dec
 
 		if (virtualp
 		&& identifier_p (unqualified_id)
-		&& IDENTIFIER_OVL_OP_P (unqualified_id)
-		&& (IDENTIFIER_OVL_OP_FLAGS (unqualified_id)
-			& OVL_OP_FLAG_ALLOC))
+		&& IDENTIFIER_NEWDEL_OP_P (unqualified_id))
 		  {
 		error ("%qD cannot be declared %, since it "
 			   "is always static", unqualified_id);
Index: cp/pt.c
===
--- cp/pt.c	(revision 254344)
+++ cp/pt.c	(working copy)
@@ -5329,9 +5329,7 @@ push_template_decl_real (tree decl, bool
 	  error ("destructor %qD declared as member template", decl);
 	  return error_mark_node;
 	}
-	  if (IDENTIFIER_OVL_OP_P (DECL_NAME (decl))
-	  && (IDENTIFIER_OVL_OP_FLAGS (DECL_NAME (decl))
-		  & OVL_OP_FLAG_ALLOC)
+	  if (IDENTIFIER_NEWDEL_OP_P (DECL_NAME (decl))
 	  && (!prototype_p (TREE_TYPE (decl))
 		  || TYPE_ARG_TYPES (TREE_TYPE (decl)) == void_list_node
 		  || !TREE_CHAIN (TYPE_ARG_TYPES (TREE_TYPE (decl)))
Index: cp/typeck.c
===
--- cp/typeck.c	(revision 254344)
+++ cp/typeck.c	(working copy)
@@ -9073,9 +9073,7 @@ check_return_expr (tree retval, bool *no
 }
 
   /* Only operator new(...) throw(), can return NULL [expr.new/13].  */
-  if (IDENTIFIER_OVL_OP_P (DECL_NAME (current_function_decl))
-  && ((IDENTIFIER_OVL_OP_FLAGS (DECL_NAME (current_function_decl))
-	   & (OVL_OP_FLAG_ALLOC | OVL_OP_FLAG_DELETE)) == OVL_OP_FLAG_ALLOC)
+  if (IDENTIFIER_NEW_OP_P (DECL_NAME (current_function_decl))
   && !TYPE_NOTHROW_P (TREE_TYPE (current_function_decl))
   && ! flag_check_new
   && retval && null_ptr_cst_p (retval))

Re: [PATCH] PR fortran/82796 -- common entity in equivalence in pure routine

2017-11-02 Thread Steve Kargl

On Thu, Nov 02, 2017 at 11:48:36AM +, Paul Richard Thomas wrote:
> Hi Steve,
> 
> I read the correspondence on clf and your earlier posting here. With
> those in mind, the patch looks to be OK to commit.
> 
> Thanks
> 

Thanks.  The c.l.f posting is addressing a different issue.
This patch simply fixes a regression that was introduced
ages ago.  Here, one needs the combination of COMMON,
EQUIVALENCE, and a PURE subprogram.  From F2003, it comes
from C1272.  The problem is that gfortran was issuing an
error when it should not have.  The patch suppresses that
error.

-- 
Steve

Re: [C++ Patch] PR 80955 (Macros expanded in definition of user-defined literals)

On Wed, Nov 1, 2017 at 4:45 PM, Mukesh Kapoor  wrote:
> On 11/1/2017 1:02 PM, Jason Merrill wrote:
>>
>> On Tue, Oct 31, 2017 at 12:17 PM, Mukesh Kapoor
>>  wrote:
>>>
>>> On 10/25/2017 6:44 PM, Mukesh Kapoor wrote:

 On 10/25/2017 4:20 AM, Nathan Sidwell wrote:
>
> On 10/25/2017 12:03 AM, Mukesh Kapoor wrote:
>
>> Thanks for pointing this out. Checking in the front end will be
>> difficult because the front end gets tokens after macro expansion. I
>> think
>> the difficulty of fixing this bug comes because of the requirement to
>> maintain backward compatibility with the option -Wliteral-suffix for
>> -std=c++11.
>
>
> IIUC the warning's intent is to catch cases of:
> printf ("some format"PRIx64 ..., ...);
> where there's no space between the string literals and the PRIx64
> macro.
> I suspect it's very common for there to be a following string-literal,
> so
> perhaps the preprocessor could detect:
>
> NON-FN-MACRO
>
> and warn on that sequence?


 Yes, this can be done easily and this is also the usage mentioned in the
 man page. I made this change in the compiler, bootstrapped it and ran
 the
 tests. The following two tests fail after the fix:

 g++.dg/cpp0x/Wliteral-suffix.C
 g++.dg/cpp0x/warn_cxx0x4.C

 Both tests have code similar to the following (from Wliteral-suffix.C):

 #define BAR "bar"
 #define PLUS_ONE + 1

char c = '3'PLUS_ONE;   // { dg-warning "invalid suffix on literal" }
char s[] = "foo"BAR;// { dg-warning "invalid suffix on literal" }

 Other compilers don't accept this code. Maybe I should just modify these
 tests to have error messages instead of warnings and submit my revised
 fix?
>>>
>>>
>>> Actually, according to the man page for -Wliteral-suffix, only macro
>>> names
>>> that don't start with an underscore should be considered when issuing a
>>> warning:
>>>
>>> -Wliteral-suffix (C++ and Objective-C++ only)
>>> Warn when a string or character literal is followed by a
>>> ud-suffix
>>> which does not begin with an underscore...
>>>
>>> So the fix is simply to check if the macro name in is_macro() starts with
>>> an
>>> underscore. The function is_macro() is called only at three places. At
>>> two
>>> places it's used to check for the warning related to -Wliteral-suffix and
>>> the check for underscore should be made for these two cases; at one place
>>> it
>>> is used to check for the warning related to -Wc++11-compat and there is
>>> no
>>> need to check for underscore for this case.
>>>
>>> The fix is simply to pass a bool flag as an additional argument to
>>> is_macro() to decide whether the macro name starts with an underscore or
>>> not. I have tested the attached patch on x86_64-linux. Thanks.
>>
>> Rather than add a mysterious parameter to is_macro, how about checking
>> *cur != '_' before we call it?
>
> This is a good suggestion. I have attached the revised patch. Thanks.

OK, thanks!

Jason

Re: [PATCH] RFC: Preserving locations for variable-uses and constants (PR 43486)

On Tue, Oct 31, 2017 at 5:09 PM, David Malcolm  wrote:
> On Tue, 2017-10-24 at 09:53 -0400, Jason Merrill wrote:
>> On Fri, Oct 20, 2017 at 5:53 PM, David Malcolm 
>> wrote:
>> > Design questions:
>> >
>> > * The patch introduces a new kind of tree node, currently called
>> >   DECL_WRAPPER_EXPR (although it's used for wrapping constants as
>> > well
>> >   as decls).  Should wrappers be a new kind of tree node, or should
>> > they
>> >   reuse an existing TREE_CODE? (e.g. NOP_EXPR, CONVERT_EXPR, etc).
>> > * NOP_EXPR: seems to be for use as an rvalue
>> > * CONVERT_EXPR: for type conversions
>> > * NON_LVALUE_EXPR: "Value is same as argument, but guaranteed
>> > not an
>> >   lvalue"
>> >   * but we *do* want to support lvalues here
>>
>> I think using NON_LVALUE_EXPR for constants would be appropriate.
>>
>> > * VIEW_CONVERT_EXPR: viewing one thing as of a different type
>> >   * can it support lvalues?
>>
>> Yes, the purpose of VIEW_CONVERT_EXPR is to support lvalues, it seems
>> like the right choice.
>>
>> Jason
>
> Thanks.  I've been working on a new version of the patch using those
> tree codes, but have run into an issue.
>
> In g++.dg/conversion/reinterpret1.C:
>
>   // PR c++/15076
>
>   struct Y { Y(int &); };
>
>   int v;
>   Y y1(reinterpret_cast(v));  // { dg-error "" }
>
> With trunk, this successfully generates an error:
>
>   reinterpret1.C:6:6: error: cannot bind non-const lvalue reference of type 
> ‘int&’ to an rvalue of type ‘int’
>Y y1(reinterpret_cast(v));  // { dg-error "" }
> ^~~~
>   reinterpret1.C:3:12: note:   initializing argument 1 of ‘Y::Y(int&)’
>struct Y { Y(int &); };
>   ^
>
> where internally there's a NON_LVALUE_EXPR around a VAR_DECL, where
> both have the same type:
>
> (gdb) call debug_tree (expr)
>   type  size 
> unit-size 
> align:32 warn_if_not_align:0 symtab:0 alias-set -1 canonical-type 
> 0x7132e5e8 precision:32 min  max 
> 
> pointer_to_this  reference_to_this 
> >
>
> arg:0 
> used public static tree_1 read SI 
> /home/david/coding-3/gcc-git-expr-vs-decl/src/gcc/testsuite/g++.dg/conversion/reinterpret1.C:5:5
>  size  unit-size 
> align:32 warn_if_not_align:0 context  0x7131e168 
> /home/david/coding-3/gcc-git-expr-vs-decl/src/gcc/testsuite/g++.dg/conversion/reinterpret1.C>
> chain 
> public decl_2 VOID 
> /home/david/coding-3/gcc-git-expr-vs-decl/src/gcc/testsuite/g++.dg/conversion/reinterpret1.C:3:8
> align:8 warn_if_not_align:0 context  0x7131e168 
> /home/david/coding-3/gcc-git-expr-vs-decl/src/gcc/testsuite/g++.dg/conversion/reinterpret1.C>
>  chain >>
> 
> /home/david/coding-3/gcc-git-expr-vs-decl/src/gcc/testsuite/g++.dg/conversion/reinterpret1.C:6:6
>  start: 
> /home/david/coding-3/gcc-git-expr-vs-decl/src/gcc/testsuite/g++.dg/conversion/reinterpret1.C:6:6
>  finish: 
> /home/david/coding-3/gcc-git-expr-vs-decl/src/gcc/testsuite/g++.dg/conversion/reinterpret1.C:6:29>
>
> The problem is that this reinterpret cast "looks" just like one of my
> location wrappers.

Your code shouldn't strip a NON_LVALUE_EXPR around a VAR_DECL.

> I see a similar issue with constants, where with:
>
>   struct Y { Y(int &); };
>   Y y1(reinterpret_cast(42));
>
> trunk generates an error like the above, but my code handles the
>   NON_LVALUE_EXPR(INTEGER_CST(42))
> as if it were a location wrapper around the INTEGER_CST, and thus
> doesn't emit the error.

Why doesn't it emit the error?  We should get the same error whether
or not we strip the wrapper.

Jason

Re: [PATCH] PR debug/81570: dwarf2cfi.c: Update cfa.offset in create_pseudo_cfg

On Tue, Oct 31, 2017 at 1:57 PM, H.J. Lu  wrote:
> On Tue, Oct 24, 2017 at 8:26 PM, Jason Merrill  wrote:
>> On Thu, Jul 27, 2017 at 3:50 PM, H.J. Lu  wrote:
>>> execute_dwarf2_frame is called for each funtion.  But create_cie_data
>>> is called only once to initialize cie_cfi_row for all functions.  Since
>>> INCOMING_FRAME_SP_OFFSET may be different for each function, we can't
>>> use the same INCOMING_FRAME_SP_OFFSET in cie_cfi_row for all functions.
>>> This patch sets cie_cfi_row->cfa.offset to INCOMING_FRAME_SP_OFFSET in
>>> create_pseudo_cfg which is called for each function.
>>>
>>> Tested on x86-64.  OK for trunk?
>>
>> This looks wrong.  cie_cfi_row is the state produced by the
>> instructions in the CIE, which don't vary between functions.  If
>
> /* The state of the first row of the FDE table, which includes the
>state provided by the CIE.  */
> static GTY(()) dw_cfi_row *cie_cfi_row;
>
> cie_cfi_row is created by
>
>   cie_cfi_row = cur_row = new_cfi_row ();
>
>   /* On entry, the Canonical Frame Address is at SP.  */
>   memset (&loc, 0, sizeof (loc));
>   loc.reg = dw_stack_pointer_regnum;
>   loc.offset = INCOMING_FRAME_SP_OFFSET;
>   def_cfa_1 (&loc);
>
> and used by create_pseudo_cfg
>
>   ti.beg_row = cie_cfi_row;
>   ti.cfa_store = cie_cfi_row->cfa;
>
> The problem is that the offset field in cie_cfi_row->cfa may not be the same 
> for
> all functions.

Sure, the desired value of the offset field may not be the same.  But
cie_cfi_row->cfa reflects what the actual DWARF instructions emitted
in the CIE tell the consumer.  If what those instructions tell the
consumer is wrong for some functions, then we need to add instructions
to the FDE for such functions in order to correct the information.
Pretending that the CIE means different things to different functions
will just mean that unwinding fails.

> cie_cfi_row does change in this case.  My patch simply corrects
> the offset in CFA of the first row of the FDE table.
>
>> INCOMING_FRAME_SP_OFFSET varies, we need to add actual FDE
>> instructions to reflect that, not just clobber our current model of
>> what the CIE means.
>>
>> Jason
>
>
>
> --
> H.J.

Re: Adjust empty class parameter passing ABI (PR c++/60336)

2017-11-02 Thread Marek Polacek

On Thu, Nov 02, 2017 at 01:21:17PM +0100, Richard Biener wrote:
> On Wed, 1 Nov 2017, Marek Polacek wrote:
> 
> > On Fri, Oct 27, 2017 at 12:46:12PM +0200, Richard Biener wrote:
> > > On Fri, 27 Oct 2017, Jakub Jelinek wrote:
> > > 
> > > > On Fri, Oct 27, 2017 at 12:31:46PM +0200, Richard Biener wrote:
> > > > > I fear it doesn't work at all with LTO (you'll always get the old ABI
> > > > > if I read the patch correctly).  This is because the function
> > > > > computing the size looks at flag_abi_version which isn't saved
> > > > > per function / TU.
> > > > > 
> > > > > Similarly you'll never get the ABI warning with LTO (less of a big
> > > > > deal of course) because the langhook doesn't reflect things correctly
> > > > > either.
> > > > > 
> > > > > So...  can we instead compute whether a type is "empty" according
> > > > > to the ABI early and store the result in the type (thinking of
> > > > > doing this in layout_type?).  Similarly set a flag whether to
> > > > > warn.  Why do you warn from backends / code emission and not
> > > > > from the FEs?  Is that to avoid warnings for calls that got inlined?
> > > > > Maybe the FE could set a flag on the call itself (ok, somewhat
> > > > > awkward to funnel through gimple).
> > > > 
> > > > Warning in the FE is too early both because of the inlining, never
> > > > emitted functions and because whether an empty struct is passed 
> > > > differently
> > > > from the past matters on the backend (whether its psABI says it should 
> > > > be
> > > > not passed at all or not).
> > > > 
> > > > Perhaps if empty types are rare enough it could be an artificial 
> > > > attribute
> > > > on the type if we can't get a spare bit for that.  But computing in the 
> > > > FE
> > > > or before free_lang_data and saving on the type whether it is empty or 
> > > > not
> > > > seems reasonable to me.
> > > 
> > > There are 18 unused bits in tree_type_common if we don't want to re-use
> > > any.  For the warning I first thought of setting TREE_NO_WARNING on it
> > > but that bit is used already.  OTOH given the "fit" of TREE_NO_WARNING
> > > I'd move TYPE_ARTIFICIAL somewhere else.
> > 
> > All right, should be done in the below.  I've introduced two new flags,
> > TYPE_EMPTY_P (says whether the type is empty according to the psABI), and
> > TYPE_WARN_EMPTY_P (whether we should warn).  I've added two new fields to
> > type_type_common and moved TYPE_ARTIFICIAL there; TYPE_WARN_EMPTY_P is now
> > mapped to nowarning_flag.  So this should work with LTO, as demonstrated
> > by g++.dg/lto/pr60336_0.C.  
> > 
> > Regarding LTO and -Wabi warning, I've added Optimization to c.opt so that
> > we get warnings with LTO.  But as pointed out IRC, this doesn't fully work
> > with cross-inlining.  I tried to do some flags merging in inline_call, but
> > that didn't help, one of the problems is that warn_abi_version lives in
> > c-family only.  Not sure if I'll be able to improve things here though.
> > 
> > Bootstrapped/regtested on x86_64-linux, ppc64-linux, and aarch64-linux.
> > Bootstrap-lto passed on x86_64-linux and ppc64-linux.
> 
> To me the tree.c stuff is_empty_type looks awfully ABI dependent
> and should thus reside in i386.c near the target hook implementation?
> 
> What goes wrong if we do not introduce new int_maybe_empty_type_size
> and maybe_empty_type_size but instead change int_size_in_bytes and
> size_in_bytes to return 0 if TYPE_EMPTY_P ()?  If the ABI can omit
> passing things assuming the size is zero should work as well, no?
> Otherwise I'd really prefer seeing explicit TYPE_EMPTY_P checks
> which would reduce the number of "indirect" greps one has to do when
> looking for effects of TYPE_EMPTY_P.
 
More on this in another mail.

> Otherwise the middle-end/LTO parts look ok.

Thanks.

> I'd omit the 'Optimization' change on the Wabi warning flag if it
> doesn't fully give us what we want and address this as a followup.
>
> I think 'Optimization' is also used for -help reporting and thus
> could be confusing at first.

Done.  Alternatively we could change lto_handle_option to handle 
even OPT_Wabi_ case.  The current code looks dubious anyway: in
the OPT_Wabi case we change warn_psabi instead of warn_abi...

Marek

[committed][PATCH] Trivial cleanups to new classes



As has been discussed on-list.  This patch adds a virtual destructor to 
the new classes in tree-ssa-propagate.h per our coding conventions and 
what are considered best practices.  It doesn't matter for any code I'm 
aware of today -- it's a defensive measure.


This also drops the "virtual" keyword on the FINAL OVERRIDE member 
functions in gimple-ssa-sprintf's sprintf_dom_walker class.  Opinions 
here are more mixed.  It's agreed that the keyword is redundant in this 
context.  The question is whether or not it adds confusion or reduces 
confusion.


The virtual keyword intuitively implies to me the member can be 
overridden by a derived class, but that's in direct conflict with the 
FINAL keyword.


Others focus more on the fact that the virtual keyword implies that the 
calls are typically indirect.   But in the case of a FINAL, one of the 
hopes is that devirt can use the information to change the indirect call 
into a direct call.


In the end the arguments for dropping the "virtual" seemed stronger to me.

Bootstrapped and regression tested on x86.  Installing on the trunk.

Jeff

ps. I suspect there's similar cleanups we ought to be doing on other 
classes used within GCC.


* gimple-ssa-sprintf.c (sprintf_dom_walker): Remove
virtual keyword on FINAL OVERRIDE members.

* tree-ssa-propagate.h (ssa_propagation_engine): Group
virtuals together.  Add virtual destructor.
(substitute_and_fold_engine): Similarly.

diff --git a/gcc/gimple-ssa-sprintf.c b/gcc/gimple-ssa-sprintf.c
index 7415413..35ceb2c 100644
--- a/gcc/gimple-ssa-sprintf.c
+++ b/gcc/gimple-ssa-sprintf.c
@@ -120,7 +120,7 @@ class sprintf_dom_walker : public dom_walker
   sprintf_dom_walker () : dom_walker (CDI_DOMINATORS) {}
   ~sprintf_dom_walker () {}
 
-  virtual edge before_dom_children (basic_block) FINAL OVERRIDE;
+  edge before_dom_children (basic_block) FINAL OVERRIDE;
   bool handle_gimple_call (gimple_stmt_iterator *);
 
   struct call_info;
diff --git a/gcc/tree-ssa-propagate.h b/gcc/tree-ssa-propagate.h
index 629ae77..be4500b 100644
--- a/gcc/tree-ssa-propagate.h
+++ b/gcc/tree-ssa-propagate.h
@@ -81,14 +81,16 @@ class ssa_propagation_engine
 {
  public:
 
-  /* Main interface into the propagation engine.  */
-  void ssa_propagate (void);
+  virtual ~ssa_propagation_engine (void) { }
 
   /* Virtual functions the clients must provide to visit statements
  and phi nodes respectively.  */
   virtual enum ssa_prop_result visit_stmt (gimple *, edge *, tree *) = 0;
   virtual enum ssa_prop_result visit_phi (gphi *) = 0;
 
+  /* Main interface into the propagation engine.  */
+  void ssa_propagate (void);
+
  private:
   /* Internal implementation details.  */
   void simulate_stmt (gimple *stmt);
@@ -100,10 +102,12 @@ class ssa_propagation_engine
 class substitute_and_fold_engine
 {
  public:
-  bool substitute_and_fold (void);
-  bool replace_uses_in (gimple *);
+  virtual ~substitute_and_fold_engine (void) { }
   virtual bool fold_stmt (gimple_stmt_iterator *) { return false; }
   virtual tree get_value (tree) { return NULL_TREE; }
+
+  bool substitute_and_fold (void);
+  bool replace_uses_in (gimple *);
   bool replace_phi_args_in (gphi *);
 };

Re: Generalize -(-X) a little

On Thu, Nov 2, 2017 at 2:11 PM, Marc Glisse  wrote:
> On Thu, 2 Nov 2017, Richard Biener wrote:
>
>> On Wed, Nov 1, 2017 at 12:47 PM, Marc Glisse  wrote:
>>>
>>> Hello,
>>>
>>> just a little tweak to that transformation. There is some overlap between
>>> the 2 versions, but it seemed easier to handle the NOP case (including
>>> the
>>> case without convert and the vector case) separately from the narrowing /
>>> sign-extending scalar integer case.
>>>
>>> At some point it would be good to have fold_negate_expr call
>>> generic_simplify so we could remove some transformations from
>>> fold-const.c.
>>>
>>> Bootstrap+regtest on powerpc64le-unknown-linux-gnu.
>>
>>
>> +  (negate (convert (negate @1)))
>> +  (if (INTEGRAL_TYPE_P (type)
>> +   && (TYPE_PRECISION (type) <= TYPE_PRECISION (TREE_TYPE (@1))
>> +  || (!TYPE_UNSIGNED (TREE_TYPE (@1))
>> +  && TYPE_OVERFLOW_UNDEFINED (TREE_TYPE (@1
>> +   && !TYPE_OVERFLOW_SANITIZED (type)
>> +   && !TYPE_OVERFLOW_SANITIZED (TREE_TYPE (@1)))
>>
>> so I don't understand this fully -- a widening conversion is ok only for
>> signed types with undefined overflow?  How does overflow come into play
>> here at all?
>
>
> Sign-extension is ok, as long as negate does what one expects. For INT_MIN,
> that's not the case.
>
> Consider -(long)(-INT_MIN). -INT_MIN is INT_MIN, cast to long it remains
> negative, and the final value is positive (assuming long is larger than
> int), while (long)INT_MIN is negative.
>
> Undefined overflow allows us to assume that X is not INT_MIN. I could
> instead query VRP (and teach VRP that NEGATE_EXPR of VARYING has range
> [-INT_MAX,INT_MAX] with undefined overflow), but that seems a bit
> overkill.
>
>> The testcase doesn't tell ...
>
>
> Probably I should add a second testcase with cases that must not be
> simplified. Maybe even a runtime test for INT_MIN...
>
>> For floats eliding any conversion should be ok if not flag_rounding_math
>> and flag_unsafe_math_optimizations (we remove a rounding step)?
>
>
> I was leaving that for another time (currently this is handled differently
> in fold-const.c), but ok.
>
> For true floats (not fixed point), any extension seems fine, and narrowing
> indeed requires !HONOR_SIGN_DEPENDENT_ROUNDING (on the smaller type). It
> isn't clear to me that we need flag_unsafe_math_optimizations, with
> round-to-nearest -(float)(-dbl) does look equivalent to (float)dbl.
>
> I'll post a new patch with floats and more testcases when I get the time.

You can handle floats as followup but some testcases that shouldn't be optimized
for the INT_MIN / unsigned case would be nice.

Richard.

> --
> Marc Glisse

Re: [committed][PATCH] Trivial cleanups to new classes

On Thu, Nov 2, 2017 at 3:55 PM, Jeff Law  wrote:
>
> As has been discussed on-list.  This patch adds a virtual destructor to the
> new classes in tree-ssa-propagate.h per our coding conventions and what are
> considered best practices.  It doesn't matter for any code I'm aware of
> today -- it's a defensive measure.
>
> This also drops the "virtual" keyword on the FINAL OVERRIDE member functions
> in gimple-ssa-sprintf's sprintf_dom_walker class.  Opinions here are more
> mixed.  It's agreed that the keyword is redundant in this context.  The
> question is whether or not it adds confusion or reduces confusion.
>
> The virtual keyword intuitively implies to me the member can be overridden
> by a derived class, but that's in direct conflict with the FINAL keyword.
>
> Others focus more on the fact that the virtual keyword implies that the
> calls are typically indirect.   But in the case of a FINAL, one of the hopes
> is that devirt can use the information to change the indirect call into a
> direct call.

Does omitting 'virtual' have semantic meaning in C++?  I don't see
code-generation
differences for

struct X {
virtual void foo (void);
void bar();
};
struct Y : public X {
void foo (void);
void baz();
};

void X::bar()
{
  foo ();
}

void Y::baz()
{
  foo ();
}

when looking at bar vs. baz.  Even deriving from Y and overriding foo is
valid again.

Richard.

> In the end the arguments for dropping the "virtual" seemed stronger to me.
>
> Bootstrapped and regression tested on x86.  Installing on the trunk.
>
> Jeff
>
> ps. I suspect there's similar cleanups we ought to be doing on other classes
> used within GCC.
>
>
> * gimple-ssa-sprintf.c (sprintf_dom_walker): Remove
> virtual keyword on FINAL OVERRIDE members.
>
> * tree-ssa-propagate.h (ssa_propagation_engine): Group
> virtuals together.  Add virtual destructor.
> (substitute_and_fold_engine): Similarly.
>
> diff --git a/gcc/gimple-ssa-sprintf.c b/gcc/gimple-ssa-sprintf.c
> index 7415413..35ceb2c 100644
> --- a/gcc/gimple-ssa-sprintf.c
> +++ b/gcc/gimple-ssa-sprintf.c
> @@ -120,7 +120,7 @@ class sprintf_dom_walker : public dom_walker
>sprintf_dom_walker () : dom_walker (CDI_DOMINATORS) {}
>~sprintf_dom_walker () {}
>
> -  virtual edge before_dom_children (basic_block) FINAL OVERRIDE;
> +  edge before_dom_children (basic_block) FINAL OVERRIDE;
>bool handle_gimple_call (gimple_stmt_iterator *);
>
>struct call_info;
> diff --git a/gcc/tree-ssa-propagate.h b/gcc/tree-ssa-propagate.h
> index 629ae77..be4500b 100644
> --- a/gcc/tree-ssa-propagate.h
> +++ b/gcc/tree-ssa-propagate.h
> @@ -81,14 +81,16 @@ class ssa_propagation_engine
>  {
>   public:
>
> -  /* Main interface into the propagation engine.  */
> -  void ssa_propagate (void);
> +  virtual ~ssa_propagation_engine (void) { }
>
>/* Virtual functions the clients must provide to visit statements
>   and phi nodes respectively.  */
>virtual enum ssa_prop_result visit_stmt (gimple *, edge *, tree *) = 0;
>virtual enum ssa_prop_result visit_phi (gphi *) = 0;
>
> +  /* Main interface into the propagation engine.  */
> +  void ssa_propagate (void);
> +
>   private:
>/* Internal implementation details.  */
>void simulate_stmt (gimple *stmt);
> @@ -100,10 +102,12 @@ class ssa_propagation_engine
>  class substitute_and_fold_engine
>  {
>   public:
> -  bool substitute_and_fold (void);
> -  bool replace_uses_in (gimple *);
> +  virtual ~substitute_and_fold_engine (void) { }
>virtual bool fold_stmt (gimple_stmt_iterator *) { return false; }
>virtual tree get_value (tree) { return NULL_TREE; }
> +
> +  bool substitute_and_fold (void);
> +  bool replace_uses_in (gimple *);
>bool replace_phi_args_in (gphi *);
>  };
>
>

Re: [PATCH 3/7] GCOV: add support for lines with an unexecuted lines.

2017-11-02 Thread Eric Botcazou

> It's possible to have a line of code that has a non-zero coverage.
> However, it can contain unexecuted blocks and I hope adding a
> notification can be usefull. LLVM also does that:
> 
> -:0:Source:ternary.c
> -:0:Graph:ternary.gcno
> -:0:Data:ternary.gcda
> -:0:Runs:1
> -:0:Programs:1
> -:1:int b, c, d, e;
> -:2:
> 1:3:int main()
> -:4:{
>1*:5:  int a = b < 1 ? (c < 3 ? d : c) : e;
> 1:6:return a;
> -:7:}

That can be annoying for languages more expressive than C's family though.

For example Ada has built-in overflow checking for addition, which means that:

  function Add (I1, I2 : Integer) return Integer is
  begin
return I1 + I2;
  end;

will now have the '*' symbol:

1:3:  function Add (I1, I2 : Integer) return Integer is
-:4:  begin
   1*:5:return I1 + I2;
-:6:  end;

which doesn't really make sense from the user's viewpoint.  How does LLVM deal 
with that (assuming it does)?

Testcase attached, compile with gnatmake p -fprofile-arcs -ftest-coverage.

-- 
Eric Botcazouprocedure P is

  function Add (I1, I2 : Integer) return Integer is
  begin
return I1 + I2;
  end;

begin
  if Add (1, 2) /= 3 then
raise Program_Error;
  end if;
end;

Re: [committed][PATCH] Trivial cleanups to new classes

2017-11-02 Thread Markus Trippelsdorf

On 2017.11.02 at 08:55 -0600, Jeff Law wrote:
> 
> As has been discussed on-list.  This patch adds a virtual destructor to 
> the new classes in tree-ssa-propagate.h per our coding conventions and 
> what are considered best practices.  It doesn't matter for any code I'm 
> aware of today -- it's a defensive measure.
> 
> This also drops the "virtual" keyword on the FINAL OVERRIDE member 
> functions in gimple-ssa-sprintf's sprintf_dom_walker class.  Opinions 
> here are more mixed.  It's agreed that the keyword is redundant in this 
> context.  The question is whether or not it adds confusion or reduces 
> confusion.
> 
> The virtual keyword intuitively implies to me the member can be 
> overridden by a derived class, but that's in direct conflict with the 
> FINAL keyword.
> 
> Others focus more on the fact that the virtual keyword implies that the 
> calls are typically indirect.   But in the case of a FINAL, one of the 
> hopes is that devirt can use the information to change the indirect call 
> into a direct call.
> 
> In the end the arguments for dropping the "virtual" seemed stronger to me.
> 
> Bootstrapped and regression tested on x86.  Installing on the trunk.

Even specifying both override and final is normally frowned upon, see:
https://github.com/isocpp/CppCoreGuidelines/blob/master/CppCoreGuidelines.md#c128-virtual-functions-should-specify-exactly-one-of-virtual-override-or-final

-- 
Markus

Re: [committed][PATCH] Trivial cleanups to new classes


On 11/02/2017 09:31 AM, Richard Biener wrote:

On Thu, Nov 2, 2017 at 3:55 PM, Jeff Law  wrote:


As has been discussed on-list.  This patch adds a virtual destructor to the
new classes in tree-ssa-propagate.h per our coding conventions and what are
considered best practices.  It doesn't matter for any code I'm aware of
today -- it's a defensive measure.

This also drops the "virtual" keyword on the FINAL OVERRIDE member functions
in gimple-ssa-sprintf's sprintf_dom_walker class.  Opinions here are more
mixed.  It's agreed that the keyword is redundant in this context.  The
question is whether or not it adds confusion or reduces confusion.

The virtual keyword intuitively implies to me the member can be overridden
by a derived class, but that's in direct conflict with the FINAL keyword.

Others focus more on the fact that the virtual keyword implies that the
calls are typically indirect.   But in the case of a FINAL, one of the hopes
is that devirt can use the information to change the indirect call into a
direct call.


Does omitting 'virtual' have semantic meaning in C++?  I don't see
code-generation
differences for

In the cases we're dealing with it has no semantic meaning.

jeff

Re: [PATCH] Improve store merging to handle load+store or bitwise logicals (PR tree-optimization/78821)

2017-11-02 Thread Kyrill Tkachov


Hi Jakub,

On 02/11/17 14:10, Jakub Jelinek wrote:

Hi!

The following patch improves store merging, so that it doesn't handle
just constant stores into adjacent memory, but also adjacent memory copying
and simple bitwise logical ops where at least one argument is a load
from adjacent memory and the other argument as well or a constant.
The loads are limited to be either all using the same vuse, or each using
vuse of the corresponding stores.  So examples of what can be handled are:
   s.a = 1; s.b = 2; // we could handle this before this patch already
   _1 = t.a; _2 = t.b; s.a = _1; s.b = _2; // copying with the same vuse
   _1 = t.a; s.a = _1; _2 = t.b; s.b = _2; // copying with vuse of the store
   _1 = s.a; _2 = _1 | 23; _3 = s.b; _4 = _3 | 12345; s.a = _2; s.b = _4; // | 
with one load and one constant
etc.
What the patch doesn't handle yet because
terminate_all_aliasing_chains uses:
   /* We can't use the base object here as that does not reliably exist.
  Build a ao_ref from the base object address (if we know the
  minimum and maximum offset and the maximum size we could improve
  things here).  */
   ao_ref chain_ref;
   ao_ref_init_from_ptr_and_size (&chain_ref, cur->base_addr, NULL_TREE);
is e.g.
void
f3 (struct S *__restrict p, struct S *__restrict q)
{
   p->a |= q->a; p->b |= q->b; p->c |= q->c; p->d |= q->d;
   p->e |= q->e; p->f |= q->f; p->g |= q->g; p->h |= q->h;
}
I'll try to improve that incrementally by preserving the underlying original
reference and tracking minimum/maximum offsets from that.

The patch also doesn't hook in the bswap infrastructure to recognize say
struct S { char a, b, c, d; } u, v;
void foo (void) { u.a = v.d; u.b = v.c; u.c = v.b; u.d = v.a; }
though wonder if it is worth it (whether there is any real-world code like
that at all or common enough to worth the work on it).

Bootstrapped/regtested on {x86_64,i686,powerpc64,powerpc64le}-linux, ok for
trunk?

I'm now doing another x86_64/i686 bootstrap/regtest to gather some
statistics, both are still regtesting now, the current numbers show:
rhs_codesplit_stores.length ()  orig_num_stmts
integer_cst 135533  275746
mem_ref 13289   27852
bit_*_expr  36  81
so the first row shows that already before this patch when we decided to
optimize constant stores we decreased the number to 50% on average, for
memory copying around 10% cases of the constant stores and the reason
why the bitwise logical don't trigger much is probably related to the
above mentioned ao_ref_init* missed-opt as well as such constructs being
far less common.  In theory we could handle also mixed rhs codes, but not
sure it is worth the effort - e.g. if somebody does:
   s.a = 5; s.b |= 4; s.c &= 2; s.d ^= 5;
we could load the memory and do some |/&/^ on it.


this looks great! I have a couple of comments.
* Can you please extend file comments for gimple-ssa-store-merging.c ?
Currently it mostly describes how we merge constants together. Once we 
start accepting non-constant members

we should mention it in there.

* Can we also handle BIT_NOT_EXPRESSIONS? i.e. Copying memory locations 
but but with a unary op applied on top.
Don't know how often that comes up though. Maybe it will complicate 
store_operand_info and its handling too much

to be worth it...

2017-11-02  Jakub Jelinek  

PR tree-optimization/78821
* gimple-ssa-store-merging.c (struct store_operand_info): New type.
(store_operand_info::store_operand_info): New constructor.
(struct store_immediate_info): Add rhs_code and ops data members.
(store_immediate_info::store_immediate_info): Add rhscode, op0r
and op1r arguments to the ctor, initialize corresponding data members.
(struct merged_store_group): Add load_align_base and load_align
data members.
(merged_store_group::merged_store_group): Initialize them.
(merged_store_group::do_merge): Update them.
(merged_store_group::apply_stores): Pick the constant for
encode_tree_to_bitpos from one of the two operands, or skip
encode_tree_to_bitpos if neither operand is a constant.
(class pass_store_merging): Add process_store method decl.  Remove
bool argument from terminate_all_aliasing_chains method decl.
(pass_store_merging::terminate_all_aliasing_chains): Remove
var_offset_p argument and corresponding handling.
(stmts_may_clobber_ref_p): New function.
(compatible_load_p): New function.
(imm_store_chain_info::coalesce_immediate_stores): Terminate group
if there is overlap and rhs_code is not INTEGER_CST.  For
non-overlapping stores terminate group if rhs is not mergeable.
(get_alias_type_for_stmts): Change first argument from
auto_vec & to vec &.  Add IS_LOAD, CLIQUEP and
BASEP arguments.  If IS_LOAD is true, lo

Re: [committed][PATCH] Trivial cleanups to new classes

On 11/02/2017 09:33 AM, Markus Trippelsdorf wrote:

On 2017.11.02 at 08:55 -0600, Jeff Law wrote:

As has been discussed on-list. This patch adds a virtual destructor to
the new classes in tree-ssa-propagate.h per our coding conventions and
what are considered best practices. It doesn't matter for any code I'm
aware of today -- it's a defensive measure.

This also drops the "virtual" keyword on the FINAL OVERRIDE member
functions in gimple-ssa-sprintf's sprintf_dom_walker class. Opinions
here are more mixed. It's agreed that the keyword is redundant in this
context. The question is whether or not it adds confusion or reduces
confusion.

The virtual keyword intuitively implies to me the member can be
overridden by a derived class, but that's in direct conflict with the
FINAL keyword.

Others focus more on the fact that the virtual keyword implies that the
calls are typically indirect. But in the case of a FINAL, one of the
hopes is that devirt can use the information to change the indirect call
into a direct call.

In the end the arguments for dropping the "virtual" seemed stronger to me.

Bootstrapped and regression tested on x86. Installing on the trunk.

Even specifying both override and final is normally frowned upon, see:
https://github.com/isocpp/CppCoreGuidelines/blob/master/CppCoreGuidelines.md#c128-virtual-functions-should-specify-exactly-one-of-virtual-override-or-final
Yea, I hadn't researched that aspect as thoroughly. ISTM we should
probably pull some of this into our own guidelines.

Jeff

Re: [PATCH] Improve store merging to handle load+store or bitwise logicals (PR tree-optimization/78821)

2017-11-02 Thread Jakub Jelinek

On Thu, Nov 02, 2017 at 03:38:45PM +, Kyrill Tkachov wrote:
> this looks great! I have a couple of comments.
> * Can you please extend file comments for gimple-ssa-store-merging.c ?
> Currently it mostly describes how we merge constants together. Once we start
> accepting non-constant members
> we should mention it in there.

If you mean the file comment, yeah, I can try to adjust it/extend it.

> * Can we also handle BIT_NOT_EXPRESSIONS? i.e. Copying memory locations but
> but with a unary op applied on top.
> Don't know how often that comes up though. Maybe it will complicate
> store_operand_info and its handling too much
> to be worth it...

I guess we can.  If we only supported s.a = ~t.a; s.b = ~t.b;, it could be
handled very easily by adding support for BIT_NOT_EXPR as rhs_code with
a single load operand.  But, perhaps we want to also handle
s.a = t.a & ~u.a; s.b = t.b & ~u.b; and similar, in which case it would be
better to have a bool flag for BIT_NOT_EXPR on the load in
store_operand_info.  In any case, I'd prefer to handle this incrementally,
as I wrote, I want to also handle the aliasing issue.
> > +bool
> > +stmts_may_clobber_ref_p (gimple *first, gimple *last, tree ref)
> > +{
> > +  ao_ref r;
> > +  ao_ref_init (&r, ref);
> > +  unsigned int count = 0;
> > +  tree vop = gimple_vdef (last);
> > +  gimple *stmt;
> > +
> > +  gcc_checking_assert (gimple_bb (first) == gimple_bb (last));
> > +  do
> > +{
> > +  stmt = SSA_NAME_DEF_STMT (vop);
> > +  if (stmt_may_clobber_ref_p_1 (stmt, &r))
> > +   return true;
> > +  if (++count > 64)
> > +   return true;
> 
> Magic number 64? Don't know if it's worth having a PARAM for it, but at
> least a comment saying we're bailing out
> for compile time considerations would be good.

I guess I could add a #define MAX_STORE_ALIAS_CHECKS 64 next to the other
two defines in the file.  Or go for a param.
> > + if (!integer_zerop (mask))
> > +   TREE_NO_WARNING (ops[j]) = 1;
> 
> Please add a comment here as to why we need the TREE_NO_WARNING here.

There is a comment on the other TREE_NO_WARNING, but this one comes
first, I'm not against copying that comment here.

Jakub

Re: [RFA] Implement __VA_OPT__

2017-11-02 Thread Tom Tromey

Tom> [ __VA_OPT__ ]
Tom> Here's v3.

Tom> Ping.

Tom> Ping #2.

Ping #3.

Tom

[PATCH] RISC-V: Handle non-legitimate address in riscv_legitimize_move

2017-11-02 Thread Palmer Dabbelt

From: Kito Cheng 

GCC may generate non-legitimate address due to we allow some
load/store with non-legitimate address in pic.md.

gcc/ChangeLog

2017-11-02  Kito Cheng  

* config/riscv/riscv.c (riscv_legitimize_move): Handle
non-legitimate address.
---
 gcc/config/riscv/riscv.c | 16 
 1 file changed, 16 insertions(+)

diff --git a/gcc/config/riscv/riscv.c b/gcc/config/riscv/riscv.c
index c34468e018d6..b81a2d29fbfd 100644
--- a/gcc/config/riscv/riscv.c
+++ b/gcc/config/riscv/riscv.c
@@ -1332,6 +1332,22 @@ riscv_legitimize_move (machine_mode mode, rtx dest, rtx 
src)
   return true;
 }
 
+  /* RISC-V GCC may generate non-legitimate address due to we provide some
+ pattern for optimize access PIC local symbol and it's make GCC generate
+ unrecognizable instruction during optmizing.  */
+
+  if (MEM_P (dest) && !riscv_legitimate_address_p (mode, XEXP (dest, 0),
+  reload_completed))
+{
+  XEXP (dest, 0) = riscv_force_address (XEXP (dest, 0), mode);
+}
+
+  if (MEM_P (src) && !riscv_legitimate_address_p (mode, XEXP (src, 0),
+ reload_completed))
+{
+  XEXP (src, 0) = riscv_force_address (XEXP (src, 0), mode);
+}
+
   return false;
 }
 
-- 
2.13.6

Re: [C++ Patch] PR 81957 ("ICE decltype")

On Tue, Oct 31, 2017 at 5:45 AM, Paolo Carlini  wrote:
> this ICE on valid seems rather easy to fix, one of those bugs where we
> aren't propagating the tsubst_flags_t argument. In this case, we aren't
> propagating from tsubst_pack_expansion to make_pack_expansion. Doing it,
> fixes the ICE and we actually accept the code as we should. In general,
> make_pack_expansion is also called from many other places, eg, the parser,
> thus I'm using a default tf_warning_or_error for it. Similarly to other past
> fixes, I ended up adding tsubst_flags_t parameters to a few other functions
> in pt.c, eg template_parms_to_args, which eventually use make_pack_expansion
> without propagating.
>
> There are a couple of places where I'm proposing passing a /tf_none/ to
> template_parms_to_args: is_compatible_template_arg, which is currently
> calling template_parms_to_args and passing the result together with a
> tf_none to tsubst_constraint_info; convert_generic_types_to_packs, which is
> currently calling template_parms_to_args and passing the result (via
> add_to_template_args) together with tf_none to tsubst. Of course the
> "conservative" choice would be instead passing tf_warning_or_error.

Do we really need to add tsubst_flags_t to template_parm_to_arg and
friends?  It should never produce an error.

Jason

Re: Adjust empty class parameter passing ABI (PR c++/60336)

2017-11-02 Thread Marek Polacek

On Thu, Nov 02, 2017 at 09:53:33AM -0400, Jason Merrill wrote:
> On Thu, Nov 2, 2017 at 8:21 AM, Richard Biener  wrote:
> > On Wed, 1 Nov 2017, Marek Polacek wrote:
> >
> >> On Fri, Oct 27, 2017 at 12:46:12PM +0200, Richard Biener wrote:
> >> > On Fri, 27 Oct 2017, Jakub Jelinek wrote:
> >> >
> >> > > On Fri, Oct 27, 2017 at 12:31:46PM +0200, Richard Biener wrote:
> >> > > > I fear it doesn't work at all with LTO (you'll always get the old ABI
> >> > > > if I read the patch correctly).  This is because the function
> >> > > > computing the size looks at flag_abi_version which isn't saved
> >> > > > per function / TU.
> >> > > >
> >> > > > Similarly you'll never get the ABI warning with LTO (less of a big
> >> > > > deal of course) because the langhook doesn't reflect things correctly
> >> > > > either.
> >> > > >
> >> > > > So...  can we instead compute whether a type is "empty" according
> >> > > > to the ABI early and store the result in the type (thinking of
> >> > > > doing this in layout_type?).  Similarly set a flag whether to
> >> > > > warn.  Why do you warn from backends / code emission and not
> >> > > > from the FEs?  Is that to avoid warnings for calls that got inlined?
> >> > > > Maybe the FE could set a flag on the call itself (ok, somewhat
> >> > > > awkward to funnel through gimple).
> >> > >
> >> > > Warning in the FE is too early both because of the inlining, never
> >> > > emitted functions and because whether an empty struct is passed 
> >> > > differently
> >> > > from the past matters on the backend (whether its psABI says it should 
> >> > > be
> >> > > not passed at all or not).
> >> > >
> >> > > Perhaps if empty types are rare enough it could be an artificial 
> >> > > attribute
> >> > > on the type if we can't get a spare bit for that.  But computing in 
> >> > > the FE
> >> > > or before free_lang_data and saving on the type whether it is empty or 
> >> > > not
> >> > > seems reasonable to me.
> >> >
> >> > There are 18 unused bits in tree_type_common if we don't want to re-use
> >> > any.  For the warning I first thought of setting TREE_NO_WARNING on it
> >> > but that bit is used already.  OTOH given the "fit" of TREE_NO_WARNING
> >> > I'd move TYPE_ARTIFICIAL somewhere else.
> >>
> >> All right, should be done in the below.  I've introduced two new flags,
> >> TYPE_EMPTY_P (says whether the type is empty according to the psABI), and
> >> TYPE_WARN_EMPTY_P (whether we should warn).  I've added two new fields to
> >> type_type_common and moved TYPE_ARTIFICIAL there; TYPE_WARN_EMPTY_P is now
> >> mapped to nowarning_flag.  So this should work with LTO, as demonstrated
> >> by g++.dg/lto/pr60336_0.C.
> >>
> >> Regarding LTO and -Wabi warning, I've added Optimization to c.opt so that
> >> we get warnings with LTO.  But as pointed out IRC, this doesn't fully work
> >> with cross-inlining.  I tried to do some flags merging in inline_call, but
> >> that didn't help, one of the problems is that warn_abi_version lives in
> >> c-family only.  Not sure if I'll be able to improve things here though.
> >>
> >> Bootstrapped/regtested on x86_64-linux, ppc64-linux, and aarch64-linux.
> >> Bootstrap-lto passed on x86_64-linux and ppc64-linux.
> >
> > To me the tree.c stuff is_empty_type looks awfully ABI dependent
> > and should thus reside in i386.c near the target hook implementation?
> 
> I think there should be a default version in common code, to hopefully
> be shared by all targets that want this behavior.

That was my thinking too.  Also, I don't see anything target-specific in
is_empty_type.

> > What goes wrong if we do not introduce new int_maybe_empty_type_size
> > and maybe_empty_type_size but instead change int_size_in_bytes and
> > size_in_bytes to return 0 if TYPE_EMPTY_P ()?  If the ABI can omit
> > passing things assuming the size is zero should work as well, no?
> 
> We need to distinguish between size in general and size for calling
> convention purposes, but the function names should mention the calling
> convention rather than "maybe_empty".  Maybe something like
> "arg_size_in_bytes"?

Sure, that works for me.  Changed.

> > Otherwise I'd really prefer seeing explicit TYPE_EMPTY_P checks
> > which would reduce the number of "indirect" greps one has to do when
> > looking for effects of TYPE_EMPTY_P.
> 
> Hmm, yes, I was hoping we could encapsulate this in target code, but
> needing these flags for LTO messes that up; if we can't have full
> encapsulation, maybe we want less?

So I don't know what to do here, I could go either way.  The explicit
checks with ?: striked me as ugly but I can go back on that.

> > Still needs FE and target maintainer approval -- the target maintainer
> > wants to look at the seemingly ABI independent functions in tree.c.
> 
> Instead of moving array_type_nelts_top to tree.c, you can use
> integer_minus_onep (array_type_nelts (ftype)).

Okay, done.

> I'm still not sure why you want to consider a type with a flexible
> array member non-empty.  I

Re: Drop frequencies from basic blocks


Hi Honza.

Thanks for the huge patch. I'm willing to help you with testing, but I can't
apply the patch on top of r254348:

../../gcc/profile.c: In function ‘void compute_branch_probabilities(unsigned 
int, unsigned int)’:
../../gcc/profile.c:794:11: error: ‘flag_guess_branch_probability’ was not 
declared in this scope
   || !flag_guess_branch_probability)
   ^
../../gcc/profile.c:794:11: note: suggested alternative: 
‘OPT_fguess_branch_probability’
   || !flag_guess_branch_probability)
   ^
   OPT_fguess_branch_probability
../../gcc/profile.c:801:21: error: no match for ‘operator!=’ (operand types are 
‘profile_count’ and ‘profile_count’)
   if (bb->count != profile_count::zero ())
   ~~^
In file included from ../../gcc/coretypes.h:397:0,
 from ../../gcc/profile.c:52:
../../gcc/wide-int.h:3158:19: note: candidate: template typename 
wi::binary_traits::predicate_result operator!=(const T1&, const T2&)
 BINARY_PREDICATE (operator !=, ne_p)
   ^
../../gcc/wide-int.h:3142:3: note: in definition of macro ‘BINARY_PREDICATE’
   OP (const T1 &x, const T2 &y) \
   ^~
../../gcc/wide-int.h:3158:19: note:   template argument deduction/substitution 
failed:
 BINARY_PREDICATE (operator !=, ne_p)
   ^
../../gcc/wide-int.h:3142:3: note: in definition of macro ‘BINARY_PREDICATE’
   OP (const T1 &x, const T2 &y) \
   ^~
../../gcc/wide-int.h: In substitution of ‘template typename 
wi::binary_traits::predicate_result operator!=(const T1&, const T2&) [with T1 
= profile_count; T2 = profile_count]’:
../../gcc/profile.c:801:45:   required from here
../../gcc/wide-int.h:3158:19: error: incomplete type 
‘wi::int_traits’ used in nested name specifier
 BINARY_PREDICATE (operator !=, ne_p)
   ^
../../gcc/wide-int.h:3142:3: note: in definition of macro ‘BINARY_PREDICATE’
   OP (const T1 &x, const T2 &y) \
   ^~

Can you please check that?

Thanks,
Martin

Re: [PATCH] New option saphira for Qualcomm server part

2017-11-02 Thread Siddhesh Poyarekar

Ping!

Siddhesh

On 27 October 2017 at 18:13, Siddhesh Poyarekar  wrote:
> From: Siddhesh Poyarekar 
>
> This patch adds an mcpu option for the Qualcomm saphira server part.
> Tested on aarch64 and did not find any regressions resulting from this
> patch.
>
> Siddhesh
>
> 2017-10-27  Siddhesh Poyarekar  
> Jim Wilson  
>
> gcc/
> * config/aarch64/aarch64-cores.def (saphira): New.
> * config/aarch64/aarch64-tune.md: Regenerated.
> * doc/invoke.texi (AArch64 Options/-mtune): Add "saphira".
> * gcc/config/aarch64/aarch64.c (saphira_tunings): New.
>
> Change-Id: I23c4a1ab74e4376c3800cb1481c508bc27418508
> ---
>  gcc/config/aarch64/aarch64-cores.def |  5 +
>  gcc/config/aarch64/aarch64-tune.md   |  2 +-
>  gcc/config/aarch64/aarch64.c | 28 
>  gcc/doc/invoke.texi  |  2 +-
>  4 files changed, 35 insertions(+), 2 deletions(-)
>
> diff --git a/gcc/config/aarch64/aarch64-cores.def 
> b/gcc/config/aarch64/aarch64-cores.def
> index 16e4485..cdf047c 100644
> --- a/gcc/config/aarch64/aarch64-cores.def
> +++ b/gcc/config/aarch64/aarch64-cores.def
> @@ -86,6 +86,11 @@ AARCH64_CORE("thunderx2t99",  thunderx2t99,  thunderx2t99, 
> 8_1A,  AARCH64_FL_FOR
>  AARCH64_CORE("cortex-a55",  cortexa55, cortexa53, 8_2A,  
> AARCH64_FL_FOR_ARCH8_2 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD, cortexa53, 
> 0x41, 0xd05, -1)
>  AARCH64_CORE("cortex-a75",  cortexa75, cortexa57, 8_2A,  
> AARCH64_FL_FOR_ARCH8_2 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD, cortexa73, 
> 0x41, 0xd0a, -1)
>
> +/* ARMv8.3-A Architecture Processors.  */
> +
> +/* Qualcomm ('Q') cores. */
> +AARCH64_CORE("saphira", saphira,falkor,8_3A,  
> AARCH64_FL_FOR_ARCH8_3 | AARCH64_FL_CRYPTO | AARCH64_FL_RCPC, saphira,   
> 0x51, 0xC01, -1)
> +
>  /* ARMv8-A big.LITTLE implementations.  */
>
>  AARCH64_CORE("cortex-a57.cortex-a53",  cortexa57cortexa53, cortexa53, 8A,  
> AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC, cortexa57, 0x41, AARCH64_BIG_LITTLE 
> (0xd07, 0xd03), -1)
> diff --git a/gcc/config/aarch64/aarch64-tune.md 
> b/gcc/config/aarch64/aarch64-tune.md
> index 7fcd6cb..7b3a746 100644
> --- a/gcc/config/aarch64/aarch64-tune.md
> +++ b/gcc/config/aarch64/aarch64-tune.md
> @@ -1,5 +1,5 @@
>  ;; -*- buffer-read-only: t -*-
>  ;; Generated automatically by gentune.sh from aarch64-cores.def
>  (define_attr "tune"
> -   
> "cortexa35,cortexa53,cortexa57,cortexa72,cortexa73,thunderx,thunderxt88p1,thunderxt88,thunderxt81,thunderxt83,xgene1,falkor,qdf24xx,exynosm1,thunderx2t99p1,vulcan,thunderx2t99,cortexa55,cortexa75,cortexa57cortexa53,cortexa72cortexa53,cortexa73cortexa35,cortexa73cortexa53,cortexa75cortexa55"
> +   
> "cortexa35,cortexa53,cortexa57,cortexa72,cortexa73,thunderx,thunderxt88p1,thunderxt88,thunderxt81,thunderxt83,xgene1,falkor,qdf24xx,exynosm1,thunderx2t99p1,vulcan,thunderx2t99,cortexa55,cortexa75,saphira,cortexa57cortexa53,cortexa72cortexa53,cortexa73cortexa35,cortexa73cortexa53,cortexa75cortexa55"
> (const (symbol_ref "((enum attr_tune) aarch64_tune)")))
> diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
> index d1aaf19..f554ffb 100644
> --- a/gcc/config/aarch64/aarch64.c
> +++ b/gcc/config/aarch64/aarch64.c
> @@ -822,6 +822,34 @@ static const struct tune_params qdf24xx_tunings =
>&qdf24xx_prefetch_tune
>  };
>
> +/* Tuning structure for the Qualcomm Saphira core.  Default to falkor values
> +   for now.  */
> +static const struct tune_params saphira_tunings =
> +{
> +  &generic_extra_costs,
> +  &generic_addrcost_table,
> +  &generic_regmove_cost,
> +  &generic_vector_cost,
> +  &generic_branch_cost,
> +  &generic_approx_modes,
> +  4, /* memmov_cost  */
> +  4, /* issue_rate  */
> +  (AARCH64_FUSE_MOV_MOVK | AARCH64_FUSE_ADRP_ADD
> +   | AARCH64_FUSE_MOVK_MOVK), /* fuseable_ops  */
> +  16,  /* function_align.  */
> +  8,   /* jump_align.  */
> +  16,  /* loop_align.  */
> +  2,   /* int_reassoc_width.  */
> +  4,   /* fp_reassoc_width.  */
> +  1,   /* vec_reassoc_width.  */
> +  2,   /* min_div_recip_mul_sf.  */
> +  2,   /* min_div_recip_mul_df.  */
> +  0,   /* max_case_values.  */
> +  tune_params::AUTOPREFETCHER_WEAK,/* autoprefetcher_model.  */
> +  (AARCH64_EXTRA_TUNE_NONE),   /* tune_flags.  */
> +  &generic_prefetch_tune
> +};
> +
>  static const struct tune_params thunderx2t99_tunings =
>  {
>&thunderx2t99_extra_costs,
> diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
> index 71b2445..bc480ad 100644
> --- a/gcc/doc/invoke.texi
> +++ b/gcc/doc/invoke.texi
> @@ -14326,7 +14326,7 @@ Specify the name of the target processor for which 
> GCC should tune the
>  performance of the code.  Permissible values for this option are:
>  @samp{generic}, @samp{cortex-a35}, @samp{cortex-a53}, @samp{cortex-a55},
>  @samp{cortex-a57}, @samp{cortex-a72}, @samp{cortex-a73}, @samp{cortex-a75},
> -@samp{exynos-m1}, @samp{falkor}, @samp{qdf24xx},
> +@samp{exynos-m1}, @samp{falkor},

[PATCH] Improve store merging to handle load+store or bitwise logicals (PR tree-optimization/78821, take 2)

2017-11-02 Thread Jakub Jelinek

On Thu, Nov 02, 2017 at 03:38:45PM +, Kyrill Tkachov wrote:
> this looks great! I have a couple of comments.
> * Can you please extend file comments for gimple-ssa-store-merging.c ?
> Currently it mostly describes how we merge constants together. Once we start
> accepting non-constant members
> we should mention it in there.

The following updated patch introduced the #define and updates comments.
I'll do the BIT_NOT_EXPR work incrementally.

BTW, finished the statistics gathering from combined x86_64 and i686-linux
bootstraps.  With my recent gimple-ssa-store-merging.c (the bitfield
handling etc.) changes reverted, the split_stores.length () and orig_num_stmts
counts at the end of successful output_merged_store was (sum from all
cases):
integer_cst 199245  413294
with the recent change in plus this patch:
integer_cst 215274  442134
mem_ref 16943   35369
bit_and_expr37  88
bit_ior_expr19  46
bit_xor_expr27  58
I think the integer_cst numbers without/with this patch should be roughly
the same.

2017-11-02  Jakub Jelinek  

PR tree-optimization/78821
* gimple-ssa-store-merging.c: Update the file comment.
(MAX_STORE_ALIAS_CHECKS): Define.
(struct store_operand_info): New type.
(store_operand_info::store_operand_info): New constructor.
(struct store_immediate_info): Add rhs_code and ops data members.
(store_immediate_info::store_immediate_info): Add rhscode, op0r
and op1r arguments to the ctor, initialize corresponding data members.
(struct merged_store_group): Add load_align_base and load_align
data members.
(merged_store_group::merged_store_group): Initialize them.
(merged_store_group::do_merge): Update them.
(merged_store_group::apply_stores): Pick the constant for
encode_tree_to_bitpos from one of the two operands, or skip
encode_tree_to_bitpos if neither operand is a constant.
(class pass_store_merging): Add process_store method decl.  Remove
bool argument from terminate_all_aliasing_chains method decl.
(pass_store_merging::terminate_all_aliasing_chains): Remove
var_offset_p argument and corresponding handling.
(stmts_may_clobber_ref_p): New function.
(compatible_load_p): New function.
(imm_store_chain_info::coalesce_immediate_stores): Terminate group
if there is overlap and rhs_code is not INTEGER_CST.  For
non-overlapping stores terminate group if rhs is not mergeable.
(get_alias_type_for_stmts): Change first argument from
auto_vec & to vec &.  Add IS_LOAD, CLIQUEP and
BASEP arguments.  If IS_LOAD is true, look at rhs1 of the stmts
instead of lhs.  Compute *CLIQUEP and *BASEP in addition to the
alias type.
(get_location_for_stmts): Change first argument from
auto_vec & to vec &.
(struct split_store): Remove orig_stmts data member, add orig_stores.
(split_store::split_store): Create orig_stores rather than orig_stmts.
(find_constituent_stmts): Renamed to ...
(find_constituent_stores): ... this.  Change second argument from
vec * to vec *, push pointers
to info structures rather than the statements.
(split_group): Rename ALLOW_UNALIGNED argument to
ALLOW_UNALIGNED_STORE, add ALLOW_UNALIGNED_LOAD argument and handle
it.  Adjust find_constituent_stores caller.
(imm_store_chain_info::output_merged_store): Handle rhs_code other
than INTEGER_CST, adjust split_group, get_alias_type_for_stmts and
get_location_for_stmts callers.  Set MR_DEPENDENCE_CLIQUE and
MR_DEPENDENCE_BASE on the MEM_REFs if they are the same in all stores.
(mem_valid_for_store_merging): New function.
(handled_load): New function.
(pass_store_merging::process_store): New method.
(pass_store_merging::execute): Use process_store method.  Adjust
terminate_all_aliasing_chains caller.

* gcc.dg/store_merging_13.c: New test.
* gcc.dg/store_merging_14.c: New test.

--- gcc/gimple-ssa-store-merging.c.jj   2017-11-01 22:49:18.123965696 +0100
+++ gcc/gimple-ssa-store-merging.c  2017-11-02 17:24:04.236317245 +0100
@@ -19,7 +19,8 @@
.  */

 /* The purpose of this pass is to combine multiple memory stores of
-   constant values to consecutive memory locations into fewer wider stores.
+   constant values, values loaded from memory or bitwise operations
+   on those to consecutive memory locations into fewer wider stores.
For example, if we have a sequence peforming four byte stores to
consecutive memory locations:
[p ] := imm1;
@@ -29,21 +30,49 @@
we can transform this into a single 4-byte store if the target supports it:
   [p] := imm1:imm2:imm3:imm4 //concatenated immediates according to endianness.

+   Or:
+   [p ] := [q ];
+   [p + 1B] := [q

Re: [RFA] Implement __VA_OPT__


On 09/17/2017 11:44 AM, Tom Tromey wrote:

+@code{@w{__VA_OPT__}} is also available in GNU C and GNU C++.



+{ /*  c99 c++ xnum xid c11 std digr ulit rlit udlit bincst digsep 
trig u8chlit vaopt */
+  /* GNUC89   */  { 0,  0,  1,  0,  0,  0,  1,   0,   0,   0,0, 0, 
0,   0,  0 },
+  /* GNUC99   */  { 1,  0,  1,  1,  0,  0,  1,   1,   1,   0,0, 0, 
0,   0,  0 },
+  /* GNUC11   */  { 1,  0,  1,  1,  1,  0,  1,   1,   1,   0,0, 0, 
0,   0,  0 },

[...]

Do we want 1s for vaopt in the GNU rows, then?  It seems to only be used 
for controlling the pedwarn about needing at least one argument for the 
variadic parameter.



+maybe_va_opt_error (cpp_reader *pfile, cpp_hashnode *node)


Do we also want to look at the va_opt option in this function, to 
complain if pedantic and it isn't set?


Jason

[PR c++/82710] false positive paren warning


This patch fixes pr82710, were we erroneously warn on something like:
   friend class_X (::other::name (...));
the parens are needed, otherwise the '::other' is taken to be a 
qualified lookup inside the class_X.


Unfortunately, at the point we can check, we've lost information that 
'::' was used.  So I back off when we see a qualified name there.


nathan
--
Nathan Sidwell
2017-11-02  Nathan Sidwell  

	PR c++/82710
	* decl.c (grokdeclarator): Don't warn when parens protect a return
	type from a qualified name.

	PR c++/82710
	* g++.dg/warn/pr82710.C: New.

Index: cp/decl.c
===
--- cp/decl.c	(revision 254344)
+++ cp/decl.c	(working copy)
@@ -10788,18 +10788,25 @@ grokdeclarator (const cp_declarator *dec
 	attr_flags);
 	}
 
+  inner_declarator = declarator->declarator;
+
   /* We don't want to warn in parmeter context because we don't
 	 yet know if the parse will succeed, and this might turn out
 	 to be a constructor call.  */
   if (decl_context != PARM
-	  && declarator->parenthesized != UNKNOWN_LOCATION)
+	  && declarator->parenthesized != UNKNOWN_LOCATION
+	  /* If the type is a class and the inner name used a global
+	 namespace qualifier, we need the parens.  Unfortunately
+	 all we can tell is that a qualified name was used.  */
+	  && !(CLASS_TYPE_P (type)
+	   && inner_declarator
+	   && inner_declarator->kind == cdk_id
+	   && inner_declarator->u.id.qualifying_scope))
 	warning_at (declarator->parenthesized, OPT_Wparentheses,
 		"unnecessary parentheses in declaration of %qs", name);
   if (declarator->kind == cdk_id || declarator->kind == cdk_decomp)
 	break;
 
-  inner_declarator = declarator->declarator;
-
   switch (declarator->kind)
 	{
 	case cdk_array:
Index: testsuite/g++.dg/warn/pr82710.C
===
--- testsuite/g++.dg/warn/pr82710.C	(revision 0)
+++ testsuite/g++.dg/warn/pr82710.C	(working copy)
@@ -0,0 +1,32 @@
+// { dg-additional-options -Wparentheses }
+
+// the MVP warning triggered on a friend decl.  */
+class X;
+
+namespace here 
+{
+  // these friends
+  X friendFunc1();
+  X *friendFunc2 ();
+  int friendFunc3 ();
+}
+
+namespace nm
+{
+  namespace here 
+  {
+// Not these friends
+void friendFunc1 ();
+void friendFunc2 ();
+void friendFunc3 ();
+  }
+
+  class TestClass
+  {
+friend X (::here::friendFunc1 ()); // parens are needed
+friend X *(::here::friendFunc2 ()); // { dg-warning "" }
+friend X *::here::friendFunc2 ();
+friend int (::here::friendFunc3 ()); // { dg-warning "" }
+  };
+}
+

[PATCH][AArch64] Set default sched pressure algorithm

2017-11-02 Thread Wilco Dijkstra

The Arm backend sets the default sched-pressure algorithm to
SCHED_PRESSURE_MODEL.  Benchmarking on AArch64 shows this 
speeds up floating point performance on SPEC - eg. CactusBSSN improves
by ~16%.  The gains are mostly due to less spilling, so enable this on AArch64
by default.

OK for commit?

2017-11-02  Wilco Dijkstra  

* config/aarch64/aarch64.c (aarch64_override_options_internal):
Set PARAM_SCHED_PRESSURE_ALGORITHM to SCHED_PRESSURE_MODEL.

--
diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
index 
34456e96497ac7b6d2f9931187ff05619e1934a4..750b0bc29c0963742d5d7bb4ae4619d93bec3e4a
 100644
--- a/gcc/config/aarch64/aarch64.c
+++ b/gcc/config/aarch64/aarch64.c
@@ -9276,6 +9276,11 @@ aarch64_override_options_internal (struct gcc_options 
*opts)
   opts->x_param_values,
   global_options_set.x_param_values);
 
+  /* Use the alternative scheduling-pressure algorithm by default.  */
+  maybe_set_param_value (PARAM_SCHED_PRESSURE_ALGORITHM, SCHED_PRESSURE_MODEL,
+opts->x_param_values,
+global_options_set.x_param_values);
+
   /* Enable sw prefetching at specified optimization level for
  CPUS that have prefetch.  Lower optimization level threshold by 1
  when profiling is enabled.  */

Re: Adjust empty class parameter passing ABI (PR c++/60336)

On Thu, Nov 2, 2017 at 1:08 PM, Marek Polacek  wrote:
> On Thu, Nov 02, 2017 at 09:53:33AM -0400, Jason Merrill wrote:
>> On Thu, Nov 2, 2017 at 8:21 AM, Richard Biener  wrote:
>> > On Wed, 1 Nov 2017, Marek Polacek wrote:
>> >
>> >> On Fri, Oct 27, 2017 at 12:46:12PM +0200, Richard Biener wrote:
>> >> > On Fri, 27 Oct 2017, Jakub Jelinek wrote:
>> >> >
>> >> > > On Fri, Oct 27, 2017 at 12:31:46PM +0200, Richard Biener wrote:
>> >> > > > I fear it doesn't work at all with LTO (you'll always get the old 
>> >> > > > ABI
>> >> > > > if I read the patch correctly).  This is because the function
>> >> > > > computing the size looks at flag_abi_version which isn't saved
>> >> > > > per function / TU.
>> >> > > >
>> >> > > > Similarly you'll never get the ABI warning with LTO (less of a big
>> >> > > > deal of course) because the langhook doesn't reflect things 
>> >> > > > correctly
>> >> > > > either.
>> >> > > >
>> >> > > > So...  can we instead compute whether a type is "empty" according
>> >> > > > to the ABI early and store the result in the type (thinking of
>> >> > > > doing this in layout_type?).  Similarly set a flag whether to
>> >> > > > warn.  Why do you warn from backends / code emission and not
>> >> > > > from the FEs?  Is that to avoid warnings for calls that got inlined?
>> >> > > > Maybe the FE could set a flag on the call itself (ok, somewhat
>> >> > > > awkward to funnel through gimple).
>> >> > >
>> >> > > Warning in the FE is too early both because of the inlining, never
>> >> > > emitted functions and because whether an empty struct is passed 
>> >> > > differently
>> >> > > from the past matters on the backend (whether its psABI says it 
>> >> > > should be
>> >> > > not passed at all or not).
>> >> > >
>> >> > > Perhaps if empty types are rare enough it could be an artificial 
>> >> > > attribute
>> >> > > on the type if we can't get a spare bit for that.  But computing in 
>> >> > > the FE
>> >> > > or before free_lang_data and saving on the type whether it is empty 
>> >> > > or not
>> >> > > seems reasonable to me.
>> >> >
>> >> > There are 18 unused bits in tree_type_common if we don't want to re-use
>> >> > any.  For the warning I first thought of setting TREE_NO_WARNING on it
>> >> > but that bit is used already.  OTOH given the "fit" of TREE_NO_WARNING
>> >> > I'd move TYPE_ARTIFICIAL somewhere else.
>> >>
>> >> All right, should be done in the below.  I've introduced two new flags,
>> >> TYPE_EMPTY_P (says whether the type is empty according to the psABI), and
>> >> TYPE_WARN_EMPTY_P (whether we should warn).  I've added two new fields to
>> >> type_type_common and moved TYPE_ARTIFICIAL there; TYPE_WARN_EMPTY_P is now
>> >> mapped to nowarning_flag.  So this should work with LTO, as demonstrated
>> >> by g++.dg/lto/pr60336_0.C.
>> >>
>> >> Regarding LTO and -Wabi warning, I've added Optimization to c.opt so that
>> >> we get warnings with LTO.  But as pointed out IRC, this doesn't fully work
>> >> with cross-inlining.  I tried to do some flags merging in inline_call, but
>> >> that didn't help, one of the problems is that warn_abi_version lives in
>> >> c-family only.  Not sure if I'll be able to improve things here though.
>> >>
>> >> Bootstrapped/regtested on x86_64-linux, ppc64-linux, and aarch64-linux.
>> >> Bootstrap-lto passed on x86_64-linux and ppc64-linux.
>> >
>> > To me the tree.c stuff is_empty_type looks awfully ABI dependent
>> > and should thus reside in i386.c near the target hook implementation?
>>
>> I think there should be a default version in common code, to hopefully
>> be shared by all targets that want this behavior.
>
> That was my thinking too.  Also, I don't see anything target-specific in
> is_empty_type.

We probably want to call them something like default_is_empty_type and
default_is_empty_record, though.

>> > What goes wrong if we do not introduce new int_maybe_empty_type_size
>> > and maybe_empty_type_size but instead change int_size_in_bytes and
>> > size_in_bytes to return 0 if TYPE_EMPTY_P ()?  If the ABI can omit
>> > passing things assuming the size is zero should work as well, no?
>>
>> We need to distinguish between size in general and size for calling
>> convention purposes, but the function names should mention the calling
>> convention rather than "maybe_empty".  Maybe something like
>> "arg_size_in_bytes"?
>
> Sure, that works for me.  Changed.
>
>> > Otherwise I'd really prefer seeing explicit TYPE_EMPTY_P checks
>> > which would reduce the number of "indirect" greps one has to do when
>> > looking for effects of TYPE_EMPTY_P.
>>
>> Hmm, yes, I was hoping we could encapsulate this in target code, but
>> needing these flags for LTO messes that up; if we can't have full
>> encapsulation, maybe we want less?
>
> So I don't know what to do here, I could go either way.  The explicit
> checks with ?: striked me as ugly but I can go back on that.
>
>> > Still needs FE and target maintainer approval -- the target maintainer
>> > wants to look

Re: [PING][patch] PR81794: have "would be stringified in traditional C" warning in libcpp/macro.c be controlled by -Wtraditional

2017-11-02 Thread Eric Gallager

Ping: https://gcc.gnu.org/ml/gcc-patches/2017-10/msg01834.html

On 10/25/17, Eric Gallager  wrote:
> On Sat, Sep 30, 2017 at 8:05 PM, Eric Gallager 
> wrote:
>> On Fri, Sep 29, 2017 at 11:15 AM, David Malcolm 
>> wrote:
>>> On Sun, 2017-09-17 at 20:00 -0400, Eric Gallager wrote:
 Attached is a version of
 https://gcc.gnu.org/ml/gcc-patches/2017-05/msg00481.html that
 contains
 a combination of both the fix and the testcase update, as requested
 in
 https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81794#c2

 I had to use a different computer than I usually use to send this
 email, as the hard drive that originally had this patch is currently
 unresponsive. Since it's also the one with my ssh keys on it, I can't
 commit with it. Sorry if the ChangeLogs get mangled.
>>>
>>> Thanks for putting this together; sorry about the delay in reviewing
>>> it.
>>>
>>> The patch mostly looks good.
>>>
>>> Did you perform a full bootstrap and run of the testsuite with this
>>> patch?  If so, it's best to state this in the email, so that we know
>>> that the patch has survived this level of testing.
>>
>> Yes, I bootstrapped with it, but I haven't done a full run of the
>> testsuite with it yet; just the one testcase I updated.
>
> Update: I've now run the testsuite with it; test results are here:
> https://gcc.gnu.org/ml/gcc-testresults/2017-10/msg01751.html
> I'm pretty sure all the FAILs are unrelated to this patch.
>
>>
>>>
>>> Some nits below:
>>>
 libcpp/ChangeLog:

 2017-03-24  Eric Gallager  

  * macro.c (check_trad_stringification): Have warning be
 controlled by
  -Wtraditional.

 gcc/testsuite/ChangeLog:

 2017-09-17  Eric Gallager  

 PR preprocessor/81794
 * gcc.dg/pragma-diag-7.c: Update to include check for
 stringification.

 On Sat, May 6, 2017 at 11:33 AM, Eric Gallager 
 wrote:
 > Pinging this: https://gcc.gnu.org/ml/gcc-patches/2017-03/msg01325.h
 > tml
 >
 > On 3/24/17, Eric Gallager  wrote:
 > > It seemed odd to me that gcc was issuing a warning about
 > > compatibility
 > > with traditional C that I couldn't turn off by pushing/popping
 > > -Wtraditional over the problem area, so I made the attached
 > > (minor)
 > > patch to fix it. Survives bootstrap, but the only testing I've
 > > done
 > > with it has been compiling the one file that was giving me issues
 > > previously, which I'd need to reduce further to turn it into a
 > > proper
 > > test case.
 > >
 > > Thanks,
 > > Eric Gallager
 > >
 > > libcpp/ChangeLog:
 > >
 > > 2017-03-24  Eric Gallager  
 > >
 > >   * macro.c (check_trad_stringification): Have warning be
 > > controlled by
 > >   -Wtraditional.
 > >
 >
 > So I did the reducing I mentioned above and now have a testcase for
 > it; it was pretty similar to the one from here:
 > https://gcc.gnu.org/ml/gcc-patches/2017-03/msg01319.html
 > so I combined them into a single testcase and have attached the
 > combined version. I can confirm that the testcase passes with my
 > patch
 > applied.
>>>
>>> [...]
>>>
 diff --git a/gcc/testsuite/gcc.dg/pragma-diag-7.c
 b/gcc/testsuite/gcc.dg/pragma-diag-7.c
 index 402ee56..e06c410 100644
 --- a/gcc/testsuite/gcc.dg/pragma-diag-7.c
 +++ b/gcc/testsuite/gcc.dg/pragma-diag-7.c
 @@ -7,3 +7,16 @@ unsigned long bad = 1UL; /* { dg-warning "suffix" } */
  /* Note the extra space before the pragma on this next line: */
   #pragma GCC diagnostic pop
  unsigned long ok_again = 2UL; /* { dg-bogus "suffix" } */
 +
 +/* Redundant with the previous pop, but just shows that it fails to
 stop the
 + * following warning with an unpatched GCC: */
 +#pragma GCC diagnostic ignored "-Wtraditional"
 +
 +/* { dg-bogus "would be stringified" .+1 } */
>>>
>>> As far as I can tell, this dg-bogus line doesn't actually get matched;
>>> when I run the testsuite without the libcpp fix, I get:
>>>
>>>   FAIL: gcc.dg/pragma-diag-7.c (test for excess errors)
>>>
>>> If I update the dg-bogus line to read:
>>>
>>>   /* { dg-bogus "would be stringified" "" { target *-*-* } .+1 } */
>>>
>>> then it's matched, and I get:
>>>
>>>   FAIL: gcc.dg/pragma-diag-7.c  (test for bogus messages, line 16)
>>>
>>> I believe that as written the ".+1" 2nd argument is interpreted as a
>>> human-readable description of the problem, rather than as a line
>>> offset; I believe you would need to add positional args for the
>>> description and filter so that the line offset is argument 4.
>>>
>>> That said, I think the dg-bogus here is unnecessary: if the warning is
>>> erroneously emitted, we get:
>>>
>>>   FAIL: gcc.dg/pragma-diag-7.c (test for excess errors)
>>>
>>> (where "errors" really means "excess errors, warnings and extraneous
>>> gunk that isn't a note").
>>>

[patch committed FT32] Add FT32B support

2017-11-02 Thread James Bowman

FT32B is a new FT32 architecture type. Ft32B has a code compression
scheme which uses linker relaxations. It also has a security option to
prevent reads from program memory.

[gcc]

2017-11-02  James Bowman  

* config/ft32/ft32.c (ft32_addr_space_legitimate_address_p): increase
offset range for FT32B.
* config/ft32/ft32.h: option "mcompress" enables relaxation.
* config/ft32/ft32.md: Add TARGET_NOPM.
* config/ft32/ft32.opt: Add mft32b, mcompress, mnopm.
* gcc/doc/invoke.texi: Add mft32b, mcompress, mnopm.

Index: gcc/config/ft32/ft32.c
===
--- gcc/config/ft32/ft32.c  (revision 254348)
+++ gcc/config/ft32/ft32.c  (working copy)
@@ -866,6 +866,8 @@
 ft32_addr_space_legitimate_address_p (machine_mode mode, rtx x, bool strict,
   addr_space_t as ATTRIBUTE_UNUSED)
 {
+  int max_offset = TARGET_FT32B ? 16384 : 128;
+
   if (mode != BLKmode)
 {
   if (GET_CODE (x) == PLUS)
@@ -875,8 +877,9 @@
   op2 = XEXP (x, 1);
   if (GET_CODE (op1) == REG
   && CONST_INT_P (op2)
-  && INTVAL (op2) >= -128
-  && INTVAL (op2) < 128 && reg_ok_for_base_p (op1, strict))
+  && (-max_offset <= INTVAL (op2))
+  && (INTVAL (op2) < max_offset)
+  && reg_ok_for_base_p (op1, strict))
 goto yes;
   if (GET_CODE (op1) == SYMBOL_REF && CONST_INT_P (op2))
 goto yes;
Index: gcc/config/ft32/ft32.h
===
--- gcc/config/ft32/ft32.h  (revision 254348)
+++ gcc/config/ft32/ft32.h  (working copy)
@@ -39,6 +39,7 @@
 
 #undef LIB_SPEC
 #define LIB_SPEC "%{!shared:%{!symbolic:-lc}} \
+   %{mcompress:--relax} \
%{msim:-Tsim.ld}"
 
 #undef  LINK_SPEC
@@ -199,12 +200,12 @@
 
 #define GLOBAL_ASM_OP "\t.global\t"
 
-#define JUMP_TABLES_IN_TEXT_SECTION 1
+#define JUMP_TABLES_IN_TEXT_SECTION (TARGET_NOPM ? 0 : 1)
 
 /* This is how to output an element of a case-vector that is absolute.  */
 
 #define ASM_OUTPUT_ADDR_VEC_ELT(FILE, VALUE)  \
-fprintf (FILE, "\tjmp\t.L%d\n", VALUE);\
+fprintf (FILE, "\t.long\t.L%d\n", VALUE);  \
 
 /* Passing Arguments in Registers */
 
@@ -469,7 +470,7 @@
 #define ADDR_SPACE_PM 1
 
 #define REGISTER_TARGET_PRAGMAS() do { \
-  c_register_addr_space ("__flash__", ADDR_SPACE_PM); \
+  c_register_addr_space ("__flash__", TARGET_NOPM ? 0 : ADDR_SPACE_PM); \
 } while (0);
 
 extern int ft32_is_mem_pm(rtx o);
Index: gcc/config/ft32/ft32.md
===
--- gcc/config/ft32/ft32.md (revision 254348)
+++ gcc/config/ft32/ft32.md (working copy)
@@ -777,8 +777,12 @@
(clobber (match_scratch:SI 2 "=&r"))
   ]
   ""
-  "ldk.l\t$cc,%l1\;ashl.l\t%2,%0,2\;add.l\t%2,%2,$cc\;jmpi\t%2"
-  )
+  {
+if (TARGET_NOPM)
+  return 
\"ldk.l\t$cc,%l1\;ashl.l\t%2,%0,2\;add.l\t%2,%2,$cc\;ldi.l\t%2,%2,0\;jmpi\t%2\";
+else
+  return 
\"ldk.l\t$cc,%l1\;ashl.l\t%2,%0,2\;add.l\t%2,%2,$cc\;lpmi.l\t%2,%2,0\;jmpi\t%2\";
+  })
 
 ;; -
 ;; Atomic exchange instruction
Index: gcc/config/ft32/ft32.opt
===
--- gcc/config/ft32/ft32.opt(revision 254348)
+++ gcc/config/ft32/ft32.opt(working copy)
@@ -29,3 +29,15 @@
 mnodiv
 Target Report Mask(NODIV)
 Avoid use of the DIV and MOD instructions
+
+mft32b
+Target Report Mask(FT32B)
+target the FT32B architecture
+
+mcompress
+Target Report Mask(COMPRESS)
+enable FT32B code compression
+
+mnopm
+Target Report Mask(NOPM)
+Avoid placing any readable data in program memory
Index: gcc/doc/invoke.texi
===
--- gcc/doc/invoke.texi (revision 254348)
+++ gcc/doc/invoke.texi (working copy)
@@ -743,7 +743,7 @@
 @gccoptlist{-msmall-model  -mno-lsim}
 
 @emph{FT32 Options}
-@gccoptlist{-msim  -mlra  -mnodiv}
+@gccoptlist{-msim  -mlra  -mnodiv  -mft32b  -mcompress  -mnopm}
 
 @emph{FRV Options}
 @gccoptlist{-mgpr-32  -mgpr-64  -mfpr-32  -mfpr-64 @gol
@@ -17768,6 +17768,18 @@
 @opindex mnodiv
 Do not use div and mod instructions.
 
+@item -mft32b
+@opindex mft32b
+Enable use of the extended instructions of the FT32B processor.
+
+@item -mcompress
+@opindex mcompress
+Compress all code using the Ft32B code compression scheme.
+
+@item -mnopm
+@opindex  mnopm
+Do not generate code that reads program memory.
+
 @end table
 
 @node FRV Options

Re: [C++ Patch] PR 81957 ("ICE decltype")

2017-11-02 Thread Paolo Carlini


Hi,

On 02/11/2017 17:54, Jason Merrill wrote:

Do we really need to add tsubst_flags_t to template_parm_to_arg and
friends?  It should never produce an error.
I see. I'm finishing testing the below, everything looks good so far. 
Looks ok?


Thanks,
Paolo.

/
/cp
2017-11-02  Paolo Carlini  

PR c++/81957
* pt.c (make_pack_expansion): Add tsubst_flags_t parameter.
(expand_integer_pack, convert_template_argument, coerce_template_parms,
gen_elem_of_pack_expansion_instantiation, tsubst_pack_expansion,
unify): Adjust calls.
* tree.c (cp_build_qualified_type_real): Likewise.
* cp-tree.h (make_pack_expansion, template_parm_to_arg): Adjust
declaration.

/testsuite
2017-11-02  Paolo Carlini  

PR c++/81957
* g++.dg/cpp0x/variadic-crash5.C: New.
Index: cp/cp-tree.h
===
--- cp/cp-tree.h(revision 254350)
+++ cp/cp-tree.h(working copy)
@@ -6482,7 +6482,7 @@ extern bool uses_parameter_packs(t
 extern bool template_parameter_pack_p   (const_tree);
 extern bool function_parameter_pack_p  (const_tree);
 extern bool function_parameter_expanded_from_pack_p (tree, tree);
-extern tree make_pack_expansion (tree);
+extern tree make_pack_expansion (tree, tsubst_flags_t = 
tf_warning_or_error);
 extern bool check_for_bare_parameter_packs  (tree);
 extern tree build_template_info(tree, tree);
 extern tree get_template_info  (const_tree);
Index: cp/pt.c
===
--- cp/pt.c (revision 254350)
+++ cp/pt.c (working copy)
@@ -3435,7 +3435,7 @@ expand_integer_pack (tree call, tree args, tsubst_
  call = copy_node (call);
  CALL_EXPR_ARG (call, 0) = hi;
}
-  tree ex = make_pack_expansion (call);
+  tree ex = make_pack_expansion (call, complain);
   tree vec = make_tree_vec (1);
   TREE_VEC_ELT (vec, 0) = ex;
   return vec;
@@ -3724,7 +3724,7 @@ uses_parameter_packs (tree t)
EXPR_PACK_EXPANSION, TYPE_PACK_EXPANSION, or TREE_LIST,
respectively.  */
 tree 
-make_pack_expansion (tree arg)
+make_pack_expansion (tree arg, tsubst_flags_t complain)
 {
   tree result;
   tree parameter_packs = NULL_TREE;
@@ -3770,7 +3770,9 @@ tree
 
   if (parameter_packs == NULL_TREE)
 {
-  error ("base initializer expansion %qT contains no parameter packs", 
arg);
+ if (complain & tf_error)
+   error ("base initializer expansion %qT contains no parameter packs",
+  arg);
   delete ppd.visited;
   return error_mark_node;
 }
@@ -3834,10 +3836,13 @@ tree
   /* Make sure we found some parameter packs.  */
   if (parameter_packs == NULL_TREE)
 {
-  if (TYPE_P (arg))
-error ("expansion pattern %qT contains no argument packs", arg);
-  else
-error ("expansion pattern %qE contains no argument packs", arg);
+  if (complain & tf_error)
+   {
+ if (TYPE_P (arg))
+   error ("expansion pattern %qT contains no argument packs", arg);
+ else
+   error ("expansion pattern %qE contains no argument packs", arg);
+   }
   return error_mark_node;
 }
   PACK_EXPANSION_PARAMETER_PACKS (result) = parameter_packs;
@@ -7694,7 +7699,7 @@ convert_template_argument (tree parm,
   if (DECL_TEMPLATE_TEMPLATE_PARM_P (val))
 val = TREE_TYPE (val);
  if (TREE_CODE (orig_arg) == TYPE_PACK_EXPANSION)
-   val = make_pack_expansion (val);
+   val = make_pack_expansion (val, complain);
 }
}
  else
@@ -8188,7 +8193,7 @@ coerce_template_parms (tree parms,
  else if (TYPE_P (conv) && !TYPE_P (pattern))
/* Recover from missing typename.  */
TREE_VEC_ELT (inner_args, arg_idx)
- = make_pack_expansion (conv);
+ = make_pack_expansion (conv, complain);
 
   /* We don't know how many args we have yet, just
  use the unconverted ones for now.  */
@@ -11161,7 +11166,7 @@ gen_elem_of_pack_expansion_instantiation (tree pat
   the Ith element resulting from the substituting is going to
   be a pack expansion as well.  */
   if (ith_elem_is_expansion)
-t = make_pack_expansion (t);
+t = make_pack_expansion (t, complain);
 
   return t;
 }
@@ -11573,7 +11578,7 @@ tsubst_pack_expansion (tree t, tree args, tsubst_f
   /* We got some full packs, but we can't substitute them in until we
 have values for all the packs.  So remember these until then.  */
 
-  t = make_pack_expansion (pattern);
+  t = make_pack_expansion (pattern, complain);
   PACK_EXPANSION_EXTRA_ARGS (t) = args

[patch][libgcc] FreeBSD amd64 unwind fix

2017-11-02 Thread Andreas Tobler


Hi all,

The attached patch fixes the PR 82635. I tested it on all active 
branches including the gcc-6 branch where we make extensive use of this 
MD_FALLBACK_FRAME_STATE_FOR macro. Namely in the libjava test suite. 
(Found no other suitable tests which do make use of this macro).

No regressions.

If there are no objections, I'm going to commit the patch below to all 
active branches in the next days.


Thanks,
Andreas

2017-11-02  Andreas Tobler  

PR libgcc/82635
* config/i386/freebsd-unwind.h (MD_FALLBACK_FRAME_STATE_FOR): Use a
sysctl to determine whether we're in a trampoline.
Keep the pattern matching method for systems without
KERN_PROC_SIGTRAMP sysctl.

Index: libgcc/config/i386/freebsd-unwind.h
===
--- libgcc/config/i386/freebsd-unwind.h (revision 254350)
+++ libgcc/config/i386/freebsd-unwind.h (working copy)
@@ -28,7 +28,10 @@
 
 #include 
 #include 
+#include 
+#include 
 #include 
+#include 
 #include 
 
 #define REG_NAME(reg)  sf_uc.uc_mcontext.mc_## reg
@@ -36,6 +39,38 @@
 #ifdef __x86_64__
 #define MD_FALLBACK_FRAME_STATE_FOR x86_64_freebsd_fallback_frame_state
 
+#ifdef KERN_PROC_SIGTRAMP
+/* FreeBSD past 9.3 provides a kern.proc.sigtramp. sysctl that
+   returns the location of the signal trampoline. Use this to find
+   out whether we're in a trampoline.
+*/
+static int
+x86_64_outside_sigtramp_range (unsigned char *pc)
+{
+  static int sigtramp_range_determined = 0;
+  static unsigned char *sigtramp_start, *sigtramp_end;
+
+  if (sigtramp_range_determined == 0)
+{
+  struct kinfo_sigtramp kst = {0};
+  size_t len = sizeof (kst);
+  int mib[4] = { CTL_KERN, KERN_PROC, KERN_PROC_SIGTRAMP, getpid() };
+
+  sigtramp_range_determined = 1;
+  if (sysctl (mib, 4, &kst, &len, NULL, 0) == 0)
+  {
+sigtramp_range_determined = 2;
+sigtramp_start = kst.ksigtramp_start;
+sigtramp_end   = kst.ksigtramp_end;
+  }
+}
+  if (sigtramp_range_determined < 2)  /* sysctl failed if < 2 */
+return 1;
+
+  return (pc < sigtramp_start || pc >= sigtramp_end);
+}
+#endif
+
 static _Unwind_Reason_Code
 x86_64_freebsd_fallback_frame_state
 (struct _Unwind_Context *context, _Unwind_FrameState *fs)
@@ -43,6 +78,7 @@
   struct sigframe *sf;
   long new_cfa;
 
+#ifndef KERN_PROC_SIGTRAMP
   /* Prior to FreeBSD 9, the signal trampoline was located immediately
  before the ps_strings.  To support non-executable stacks on AMD64,
  the sigtramp was moved to a shared page for FreeBSD 9.  Unfortunately
@@ -62,12 +98,15 @@
 && *(unsigned int *)(context->ra +  8) == 0x01a1c0c7
 && *(unsigned int *)(context->ra + 12) == 0x050f ))
 return _URC_END_OF_STACK;
+#else
+  if (x86_64_outside_sigtramp_range(context->ra))
+return _URC_END_OF_STACK;
+#endif
 
   sf = (struct sigframe *) context->cfa;
   new_cfa = sf->REG_NAME(rsp);
   fs->regs.cfa_how = CFA_REG_OFFSET;
-  /* Register 7 is rsp  */
-  fs->regs.cfa_reg = 7;
+  fs->regs.cfa_reg =  __LIBGCC_STACK_POINTER_REGNUM__;
   fs->regs.cfa_offset = new_cfa - (long) context->cfa;
 
   /* The SVR4 register numbering macros aren't usable in libgcc.  */

Re: [PATCH, rs6000 V4] Add Power 8 support to vec_revb

2017-11-02 Thread Carl Love

GCC Maintainers:

I have addressed the issues that Segher mentioned.  Per our discussions,
I have added new iterator VSX_XXBR to cover the needed integer and float
values with the exception of V16QI.  The define_expand for revb uses the
new iterator to generate the needed Power 8 code.  The V16QI is handled
by an additional iterator to just generate a move operator as reversing
the order of bytes in a byte element is a no op.

The patch has been retested on

 powerpc64le-unknown-linux-gnu (Power 8 LE),   
 powerpc64le-unknown-linux-gnu (Power 9 LE) 

without regressions.  

Please let me know if the following patch is acceptable.  Thanks.

   Carl Love


--

gcc/ChangeLog:

2017-11-01  Carl Love  

* config/rs6000/rs6000-c.c (P8V_BUILTIN_VEC_REVB): Add power 8
definitions.
(P9V_BUILTIN_VEC_REVB): Remove the power 9 instance definitions.
* config/rs6000/altivec.h (vec_revb): Change the #define from power 9
to power 8.
* config/rs6000/r6000-protos.h (swap_selector_for_mode): Add extern
declaration.
* config/rs6000/rs6000.c (swap_endianess_selector_for_mode): Add
function.
* config/rs6000/rs6000-builtin.def (BU_P8V_VSX_1, BU_P8V_OVERLOAD_1):
Add power 8 macro expansions.
(BU_P9V_OVERLOAD_1): Remove power 9 overload expansion.
* config/rs6000/vsx.md (revb_): Add define_expand to generate
power 8 instructions.  (VSX_XXBR): Add iterator.

gcc/testsuite/ChangeLog:

2017-11-01  Carl Love  

* gcc.target/powerpc/builtins-revb-runnable.c: New runnable test file.
---
 gcc/config/rs6000/altivec.h|   3 +-
 gcc/config/rs6000/rs6000-builtin.def   |  10 +-
 gcc/config/rs6000/rs6000-c.c   |  44 +--
 gcc/config/rs6000/rs6000-protos.h  |   2 +
 gcc/config/rs6000/rs6000.c |  71 +
 gcc/config/rs6000/vsx.md   |  43 +++
 .../gcc.target/powerpc/builtins-revb-runnable.c| 346 +
 7 files changed, 494 insertions(+), 25 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/powerpc/builtins-revb-runnable.c

diff --git a/gcc/config/rs6000/altivec.h b/gcc/config/rs6000/altivec.h
index 94a4db2..cec617a 100644
--- a/gcc/config/rs6000/altivec.h
+++ b/gcc/config/rs6000/altivec.h
@@ -415,6 +415,7 @@
 #define vec_vsubuqm __builtin_vec_vsubuqm
 #define vec_vupkhsw __builtin_vec_vupkhsw
 #define vec_vupklsw __builtin_vec_vupklsw
+#define vec_revb __builtin_vec_revb
 #endif
 
 #ifdef __POWER9_VECTOR__
@@ -478,8 +479,6 @@
 
 #define vec_xlx __builtin_vec_vextulx
 #define vec_xrx __builtin_vec_vexturx
-
-#define vec_revb __builtin_vec_revb
 #endif
 
 /* Predicates.
diff --git a/gcc/config/rs6000/rs6000-builtin.def 
b/gcc/config/rs6000/rs6000-builtin.def
index ac9ddae..bf3d3e4 100644
--- a/gcc/config/rs6000/rs6000-builtin.def
+++ b/gcc/config/rs6000/rs6000-builtin.def
@@ -1892,6 +1892,13 @@ BU_P6_64BIT_2 (CMPB, "cmpb", CONST,  cmpbdi3)
 /* 1 argument VSX instructions added in ISA 2.07.  */
 BU_P8V_VSX_1 (XSCVSPDPN,  "xscvspdpn", CONST,  vsx_xscvspdpn)
 BU_P8V_VSX_1 (XSCVDPSPN,  "xscvdpspn", CONST,  vsx_xscvdpspn)
+BU_P8V_VSX_1 (REVB_V1TI,  "revb_v1ti", CONST,  revb_v1ti)
+BU_P8V_VSX_1 (REVB_V2DI,  "revb_v2di", CONST,  revb_v2di)
+BU_P8V_VSX_1 (REVB_V4SI,  "revb_v4si", CONST,  revb_v4si)
+BU_P8V_VSX_1 (REVB_V8HI,  "revb_v8hi", CONST,  revb_v8hi)
+BU_P8V_VSX_1 (REVB_V16QI, "revb_v16qi",CONST,  revb_v16qi)
+BU_P8V_VSX_1 (REVB_V2DF,  "revb_v2df", CONST,  revb_v2df)
+BU_P8V_VSX_1 (REVB_V4SF,  "revb_v4sf", CONST,  revb_v4sf)
 
 /* 1 argument altivec instructions added in ISA 2.07.  */
 BU_P8V_AV_1 (ABS_V2DI,   "abs_v2di",   CONST,  absv2di2)
@@ -2001,6 +2008,7 @@ BU_P8V_OVERLOAD_1 (VPOPCNTUH, "vpopcntuh")
 BU_P8V_OVERLOAD_1 (VPOPCNTUW,  "vpopcntuw")
 BU_P8V_OVERLOAD_1 (VPOPCNTUD,  "vpopcntud")
 BU_P8V_OVERLOAD_1 (VGBBD,  "vgbbd")
+BU_P8V_OVERLOAD_1 (REVB,   "revb")
 
 /* ISA 2.07 vector overloaded 2 argument functions.  */
 BU_P8V_OVERLOAD_2 (EQV,"eqv")
@@ -2112,8 +2120,6 @@ BU_P9V_OVERLOAD_1 (VSTDCNQP,  "scalar_test_neg_qp")
 BU_P9V_OVERLOAD_1 (VSTDCNDP,   "scalar_test_neg_dp")
 BU_P9V_OVERLOAD_1 (VSTDCNSP,   "scalar_test_neg_sp")
 
-BU_P9V_OVERLOAD_1 (REVB,   "revb")
-
 BU_P9V_OVERLOAD_1 (VEXTRACT_FP_FROM_SHORTH, "vextract_fp_from_shorth")
 BU_P9V_OVERLOAD_1 (VEXTRACT_FP_FROM_SHORTL, "vextract_fp_from_shortl")
 
diff --git a/gcc/config/rs6000/rs6000-c.c b/gcc/config/rs6000/rs6000-c.c
index 8e58124..34517e6 100644
--- a/gcc/config/rs6000/rs6000-c.c
+++ b/gcc/config/rs6000/rs6000-c.c
@@ -5551,36 +5551,38 @@ const struct altivec_builtin_types 
altivec_overloaded_builtins[] = {
 RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI,
 RS6000_BTI_unsigned_V16QI, 0 },
 
-  { P9V_BUILTIN_VEC_REVB, P9V_BUILTIN_XXBRQ_V16QI,
-RS6000_BTI_unsi

Re: [C++ Patch] PR 81957 ("ICE decltype")

OK.

On Thu, Nov 2, 2017 at 3:44 PM, Paolo Carlini  wrote:
> Hi,
>
> On 02/11/2017 17:54, Jason Merrill wrote:
>>
>> Do we really need to add tsubst_flags_t to template_parm_to_arg and
>> friends?  It should never produce an error.
>
> I see. I'm finishing testing the below, everything looks good so far. Looks
> ok?
>
> Thanks,
> Paolo.
>
> /

[committed] Move selftest::test_diagnostic_context to its own header

It's useful to not rely on global_dc in selftests, so this patch
moves class selftest::test_diagnostic_context from
diagnostic-show-locus.c to a new header and source file.

Successfully bootstrapped & regrtested on x86_64-pc-linux-gnu.

Committed to trunk as r254354.

gcc/ChangeLog:
* Makefile.in (OBJS-libcommon): Add selftest-diagnostic.o.
* diagnostic-show-locus.c: Include "selftest-diagnostic.h".
(class selftest::test_diagnostic_context): Move to...
* selftest-diagnostic.c: New file.
* selftest-diagnostic.h: New file.
---
 gcc/Makefile.in |  2 +-
 gcc/diagnostic-show-locus.c | 29 +
 gcc/selftest-diagnostic.c   | 62 +
 gcc/selftest-diagnostic.h   | 49 +++
 4 files changed, 113 insertions(+), 29 deletions(-)
 create mode 100644 gcc/selftest-diagnostic.c
 create mode 100644 gcc/selftest-diagnostic.h

diff --git a/gcc/Makefile.in b/gcc/Makefile.in
index 5f5f403..51968e4 100644
--- a/gcc/Makefile.in
+++ b/gcc/Makefile.in
@@ -1591,7 +1591,7 @@ OBJS-libcommon = diagnostic.o diagnostic-color.o 
diagnostic-show-locus.o \
pretty-print.o intl.o \
sbitmap.o \
vec.o input.o version.o hash-table.o ggc-none.o memory-block.o \
-   selftest.o
+   selftest.o selftest-diagnostic.o
 
 # Objects in libcommon-target.a, used by drivers and by the core
 # compiler and containing target-dependent code.
diff --git a/gcc/diagnostic-show-locus.c b/gcc/diagnostic-show-locus.c
index 3512111..a1ce682 100644
--- a/gcc/diagnostic-show-locus.c
+++ b/gcc/diagnostic-show-locus.c
@@ -29,6 +29,7 @@ along with GCC; see the file COPYING3.  If not see
 #include "diagnostic-color.h"
 #include "gcc-rich-location.h"
 #include "selftest.h"
+#include "selftest-diagnostic.h"
 
 #ifdef HAVE_TERMIOS_H
 # include 
@@ -1987,34 +1988,6 @@ namespace selftest {
 
 /* Selftests for diagnostic_show_locus.  */
 
-/* Convenience subclass of diagnostic_context for testing
-   diagnostic_show_locus.  */
-
-class test_diagnostic_context : public diagnostic_context
-{
- public:
-  test_diagnostic_context ()
-  {
-diagnostic_initialize (this, 0);
-show_caret = true;
-show_column = true;
-start_span = start_span_cb;
-  }
-  ~test_diagnostic_context ()
-  {
-diagnostic_finish (this);
-  }
-
-  /* Implementation of diagnostic_start_span_fn, hiding the
- real filename (to avoid printing the names of tempfiles).  */
-  static void
-  start_span_cb (diagnostic_context *context, expanded_location exploc)
-  {
-exploc.file = "FILENAME";
-default_diagnostic_start_span_fn (context, exploc);
-  }
-};
-
 /* Verify that diagnostic_show_locus works sanely on UNKNOWN_LOCATION.  */
 
 static void
diff --git a/gcc/selftest-diagnostic.c b/gcc/selftest-diagnostic.c
new file mode 100644
index 000..2018062
--- /dev/null
+++ b/gcc/selftest-diagnostic.c
@@ -0,0 +1,62 @@
+/* Selftest support for diagnostics.
+   Copyright (C) 2016-2017 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 3, or (at your option) any later
+version.
+
+GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+.  */
+
+#include "config.h"
+#include "system.h"
+#include "coretypes.h"
+#include "diagnostic.h"
+#include "selftest.h"
+#include "selftest-diagnostic.h"
+
+/* The selftest code should entirely disappear in a production
+   configuration, hence we guard all of it with #if CHECKING_P.  */
+
+#if CHECKING_P
+
+namespace selftest {
+
+/* Implementation of class selftest::test_diagnostic_context.  */
+
+test_diagnostic_context::test_diagnostic_context ()
+{
+  diagnostic_initialize (this, 0);
+  show_caret = true;
+  show_column = true;
+  start_span = start_span_cb;
+}
+
+test_diagnostic_context::~test_diagnostic_context ()
+{
+  diagnostic_finish (this);
+}
+
+/* Implementation of diagnostic_start_span_fn, hiding the
+   real filename (to avoid printing the names of tempfiles).  */
+
+void
+test_diagnostic_context::start_span_cb (diagnostic_context *context,
+   expanded_location exploc)
+{
+  exploc.file = "FILENAME";
+  default_diagnostic_start_span_fn (context, exploc);
+}
+
+} // namespace selftest
+
+#endif /* #if CHECKING_P */
diff --git a/gcc/selftest-diagnostic.h b/gcc/selftest-diagnostic.h
new file mode 100644
index 000..61525dc
--- /dev/null
+++ b/gcc/selftest-diagnostic.h
@@ -0,0 +1,49 @@
+/* Selftest support for diagnostics.
+   Copyright (C

Re: [Patch, fortran] PR81447 - [7/8] gfortran fails to recognize the exact dynamic type of a polymorphic entity that was allocated in a external procedure

2017-11-02 Thread Paul Richard Thomas

Dear All,

Please find attached the revised version of the patch following my
late realizations in yesterday's submission.

Cheers

Paul


On 1 November 2017 at 18:22, Paul Richard Thomas
 wrote:
> Dear All,
>
> This patch is adequately described by the comment in the second chunk
> applied to resolve.c.
>
> Note, however, that the 'unconditionally' is promptly undermined by
> the subsequent conditions. I will change the adjective appropriately.
> In writing this, I have just realised that access=private need not
> have a vtable generated unless it is required for a class within the
> module. I will make it so a regtest once more.
>
> Some of the increases in counts in the tree dumps look alarming. They
> are however just a reflection of the number of derived types in some
> of the tests and are due to the auxiliary vtable functions.
>
> Bootstrapped and regtested on FC23/x86_64 - OK for trunk and then 7- branch?
>
> Paul
>
> 2017-11-01  Paul Thomas  
>
> PR fortran/81447
> PR fortran/82783
> * resolve.c (resolve_component): There is no need to resolve
> the components of a use associated vtype.
> (resolve_fl_derived): Unconditionally generate a vtable for any
> module derived type, as long as the standard is F2003 or later
> and it is not a vtype or a PDT template.
>
> 2017-11-01  Paul Thomas  
>
> PR fortran/81447
> * gfortran.dg/class_65.f90: New test.
> * gfortran.dg/alloc_comp_basics_1.f90: Increase builtin_free
> count from 18 to 21.
> * gfortran.dg/allocatable_scalar_9.f90: Increase builtin_free
> count from 32 to 54.
> * gfortran.dg/auto_dealloc_1.f90: Increase builtin_free
> count from 4 to 10.
> * gfortran.dg/coarray_lib_realloc_1.f90: Increase builtin_free
> count from 3 to 6. Likewise _gfortran_caf_deregister from 2 to
> 3, builtin_malloc from 1 to 4 and builtin_memcpy|= MEM from
> 2 to 5.
> * gfortran.dg/finalize_28.f90: Increase builtin_free
> count from 3 to 6.
> * gfortran.dg/move_alloc_15.f90: Increase builtin_free and
> builtin_malloc counts from 11 to 14.
> * gfortran.dg/typebound_proc_27.f03: Increase builtin_free
> count from 7 to 10. Likewise builtin_malloc from 12 to 15.



-- 
"If you can't explain it simply, you don't understand it well enough"
- Albert Einstein
Index: gcc/fortran/resolve.c
===
*** gcc/fortran/resolve.c   (revision 254300)
--- gcc/fortran/resolve.c   (working copy)
*** resolve_component (gfc_component *c, gfc
*** 13496,13501 
--- 13496,13504 
if (c->attr.artificial)
  return true;

+   if (sym->attr.vtype && sym->attr.use_assoc)
+ return true;
+
/* F2008, C442.  */
if ((!sym->attr.is_class || c != sym->components)
&& c->attr.codimension
*** resolve_fl_derived (gfc_symbol *sym)
*** 14075,14080 
--- 14078,14097 
if (!resolve_typebound_procedures (sym))
  return false;

+   /* Generate module vtables subject to their accessibility and their not
+  being vtables or pdt templates. If this is not done class declarations
+  in external procedures wind up with their own version and so SELECT TYPE
+  fails because the vptrs do not have the same address.  */
+   if (gfc_option.allow_std & GFC_STD_F2003
+   && sym->ns->proc_name
+   && sym->ns->proc_name->attr.flavor == FL_MODULE
+   && sym->attr.access != ACCESS_PRIVATE
+   && !(sym->attr.use_assoc || sym->attr.vtype || sym->attr.pdt_template))
+ {
+   gfc_symbol *vtab = gfc_find_derived_vtab (sym);
+   gfc_set_sym_referenced (vtab);
+ }
+
return true;
  }

Index: gcc/testsuite/gfortran.dg/alloc_comp_basics_1.f90
===
*** gcc/testsuite/gfortran.dg/alloc_comp_basics_1.f90   (revision 254300)
--- gcc/testsuite/gfortran.dg/alloc_comp_basics_1.f90   (working copy)
*** contains
*** 141,144 
  end subroutine check_alloc2

  end program alloc
! ! { dg-final { scan-tree-dump-times "builtin_free" 18 "original" } }
--- 141,144 
  end subroutine check_alloc2

  end program alloc
! ! { dg-final { scan-tree-dump-times "builtin_free" 21 "original" } }
Index: gcc/testsuite/gfortran.dg/allocatable_scalar_9.f90
===
*** gcc/testsuite/gfortran.dg/allocatable_scalar_9.f90  (revision 254300)
--- gcc/testsuite/gfortran.dg/allocatable_scalar_9.f90  (working copy)
***
*** 5,17 
  !
  ! Contributed by Tobias Burnus 

! module m
! type st
!   integer , allocatable :: a1
! end type st
! type at
!   integer , allocatable :: a2(:)
! end type at

  type t1
type(st), allocatable :: b1
--- 5,17 
  !
  ! Contributed by Tobias Burnus 

! module m
! type st
!   integer , allocatable :: a1
! end type st
! type at
!   integer , allocatable :: a2(:)
! end type at

  type t1
type(st), allocata

[committed] Add selftest for diagnostic_get_location_text

This patch adds some selftesting of diagnostic_get_location_text,
since Nathan was looking at rewriting the insides.

Successfully bootstrapped & regrtested on x86_64-pc-linux-gnu.
I also tested it with:
  LANG=de_DE.UTF-8 make selftest
to avoid recurrences of PR bootstrap/71481 comment #8.

Committed to trunk as r254355.

gcc/ChangeLog:
* diagnostic.c: Include "selftest-diagnostic.h".
(selftest::assert_location_text): New function.
(selftest::test_diagnostic_get_location_text): New function.
(selftest::diagnostic_c_tests): Call it.
---
 gcc/diagnostic.c | 41 +
 1 file changed, 41 insertions(+)

diff --git a/gcc/diagnostic.c b/gcc/diagnostic.c
index 9db4b46..813bca6 100644
--- a/gcc/diagnostic.c
+++ b/gcc/diagnostic.c
@@ -33,6 +33,7 @@ along with GCC; see the file COPYING3.  If not see
 #include "diagnostic-color.h"
 #include "edit-context.h"
 #include "selftest.h"
+#include "selftest-diagnostic.h"
 
 #ifdef HAVE_TERMIOS_H
 # include 
@@ -1627,6 +1628,45 @@ test_print_parseable_fixits_replace ()
pp_formatted_text (&pp));
 }
 
+/* Verify that
+ diagnostic_get_location_text (..., SHOW_COLUMN)
+   generates EXPECTED_LOC_TEXT, given FILENAME, LINE, COLUMN, with
+   colorization disabled.  */
+
+static void
+assert_location_text (const char *expected_loc_text,
+ const char *filename, int line, int column,
+ bool show_column)
+{
+  test_diagnostic_context dc;
+  dc.show_column = show_column;
+
+  expanded_location xloc;
+  xloc.file = filename;
+  xloc.line = line;
+  xloc.column = column;
+  xloc.data = NULL;
+  xloc.sysp = false;
+
+  char *actual_loc_text = diagnostic_get_location_text (&dc, xloc);
+  ASSERT_STREQ (expected_loc_text, actual_loc_text);
+  free (actual_loc_text);
+}
+
+/* Verify that diagnostic_get_location_text works as expected.  */
+
+static void
+test_diagnostic_get_location_text ()
+{
+  const char *old_progname = progname;
+  progname = "PROGNAME";
+  assert_location_text ("PROGNAME:", NULL, 0, 0, true);
+  assert_location_text (":", "", 42, 10, true);
+  assert_location_text ("foo.c:42:10:", "foo.c", 42, 10, true);
+  assert_location_text ("foo.c:42:", "foo.c", 42, 10, false);
+  progname = old_progname;
+}
+
 /* Run all of the selftests within this file.  */
 
 void
@@ -1637,6 +1677,7 @@ diagnostic_c_tests ()
   test_print_parseable_fixits_insert ();
   test_print_parseable_fixits_remove ();
   test_print_parseable_fixits_replace ();
+  test_diagnostic_get_location_text ();
 }
 
 } // namespace selftest
-- 
1.8.5.3

Re: Drop frequencies from basic blocks


On 11/02/2017 08:06 PM, Jan Hubicka wrote:

Sorry, I must have used older diff file, because it is one of unfinished 
chnages I made today.
I am attaching correct diff.



Thank you. This one works for me, however I see various errors for postgres PGO:

cd src/backend/replication/
marxin@marxinbox:~/Programming/postgres/src/backend/replication> gcc -Wall 
-Wmissing-prototypes -Wpointer-arith -Wdeclaration-after-statement -Wendif-labels 
-Wmissing-format-attribute -Wformat-security -fno-strict-aliasing -fwrapv 
-fexcess-precision=standard -O2 -fprofile-use -I. -I. -I../../../src/include  
-D_GNU_SOURCE   -c -o walsender.o walsender.c

In file included from walsender.c:56:0:
walsender.c: In function ‘XLogRead’:
../../../src/include/access/xlog_internal.h:188:26: error: corrupted value 
profile: interval profile counter (0 out of 0) inconsistent with basic-block 
count (7531)
 (uint32) ((logSegNo) % XLogSegmentsPerXLogId(wal_segsz_bytes)))
  ^
walsender.c:2367:4: note: in expansion of macro ‘XLogFilePath’
XLogFilePath(path, curFileTimeLine, sendSegNo, wal_segment_size);
^~~~


Problem is following:

Here we estimate count:

Old value = -1
New value = 7531
0x00aecb52 in estimate_bb_frequencies (force=force@entry=false) at 
../../gcc/predict.c:3590
3590bb->count = count.guessed_local ();
(gdb) bt
#0  0x00aecb52 in estimate_bb_frequencies (force=force@entry=false) at 
../../gcc/predict.c:3590
#1  0x00af05a4 in tree_estimate_probability 
(dry_run=dry_run@entry=false) at ../../gcc/predict.c:2828
#2  0x00af0b9c in (anonymous namespace)::pass_profile::execute 
(this=, fun=0x75b9e790) at ../../gcc/predict.c:3722
#3  0x00ad23f1 in execute_one_pass (pass=pass@entry=0x210faf0) at 
../../gcc/passes.c:2497
#4  0x00ad2cb1 in execute_pass_list_1 (pass=0x210faf0) at 
../../gcc/passes.c:2586
#5  0x00ad2cc3 in execute_pass_list_1 (pass=0x210f470) at 
../../gcc/passes.c:2587
#6  0x00ad2d05 in execute_pass_list (fn=, pass=) at ../../gcc/passes.c:2597
#7  0x00ad1631 in do_per_function_toporder (callback=callback@entry=0xad2cf0 
, data=0x210f2f0) at 
../../gcc/passes.c:1739
#8  0x00ad3387 in execute_ipa_pass_list (pass=0x210f290) at 
../../gcc/passes.c:2937
#9  0x0078fb42 in ipa_passes () at ../../gcc/cgraphunit.c:2423
#10 symbol_table::compile (this=this@entry=0x76817100) at 
../../gcc/cgraphunit.c:2558
#11 0x007923c7 in symbol_table::compile (this=0x76817100) at 
../../gcc/cgraphunit.c:2719
#12 symbol_table::finalize_compilation_unit (this=0x76817100) at 
../../gcc/cgraphunit.c:2716
#13 0x00bb20d3 in compile_file () at ../../gcc/toplev.c:479
#14 0x005d7fc5 in do_compile () at ../../gcc/toplev.c:2059
#15 toplev::main (this=this@entry=0x7fffd85e, argc=, 
argc@entry=38, argv=, argv@entry=0x7fffd958) at 
../../gcc/toplev.c:2194
#16 0x005da46b in main (argc=38, argv=0x7fffd958) at 
../../gcc/main.c:39

And later on we check it with real value of an interval counter:

Breakpoint 1, error_at (loc=2147497451, gmsgid=0x1605208 "corrupted value profile: 
%s profile counter (%d out of %d) inconsistent with basic-block count (%d)") at 
../../gcc/diagnostic.c:1354
1354{
(gdb) bt
#0  error_at (loc=2147497451, gmsgid=0x1605208 "corrupted value profile: %s profile 
counter (%d out of %d) inconsistent with basic-block count (%d)") at 
../../gcc/diagnostic.c:1354
#1  0x00eae5b9 in check_counter (stmt=0x758359f8, name=0x16052fc 
"interval", count=0x7fffd5a8, all=0x7fffd5c0, bb_count_d=...) at 
../../gcc/value-prof.c:607
#2  0x00eafe95 in gimple_mod_subtract_transform (si=0x7fffd640) at 
../../gcc/value-prof.c:1133
#3  0x00eae709 in gimple_value_profile_transformations () at 
../../gcc/value-prof.c:658
#4  0x00ca712c in tree_profiling () at ../../gcc/tree-profile.c:687
#5  (anonymous namespace)::pass_ipa_tree_profile::execute (this=) at ../../gcc/tree-profile.c:780
#6  0x00ad23f1 in execute_one_pass (pass=pass@entry=0x2110350) at 
../../gcc/passes.c:2497
#7  0x00ad33f2 in execute_ipa_pass_list (pass=0x2110350) at 
../../gcc/passes.c:2932
#8  0x0078fb42 in ipa_passes () at ../../gcc/cgraphunit.c:2423
#9  symbol_table::compile (this=this@entry=0x76817100) at 
../../gcc/cgraphunit.c:2558
#10 0x007923c7 in symbol_table::compile (this=0x76817100) at 
../../gcc/cgraphunit.c:2719
#11 symbol_table::finalize_compilation_unit (this=0x76817100) at 
../../gcc/cgraphunit.c:2716
#12 0x00bb20d3 in compile_file () at ../../gcc/toplev.c:479
#13 0x005d7fc5 in do_compile () at ../../gcc/toplev.c:2059
#14 toplev::main (this=this@entry=0x7fffd85e, argc=, 
argc@entry=38, argv=, argv@entry=0x7fffd958) at 
../../gcc/toplev.c:2194
#15 0x005da46b in main (argc=38, argv=0x7fffd958) at 
../../gcc/main.

Re: [Diagnostic Patch] don't print column zero


On 10/26/2017 10:34 AM, David Malcolm wrote:

[CCing Rainer and Mike for the gcc-dg.exp part]



My Tcl skills aren't great, so hopefully someone else can review this;
CCing Rainer and Mike.


Ping?

https://gcc.gnu.org/ml/gcc-patches/2017-10/msg01911.html

nathan

--
Nathan Sidwell

[PATCH] RFC: add taxonomy IDs to diagnostics (CERT C, CWE, etc)

We currently identify our diagnostics via command-line options, and by
the text of the option itself.

This patch adds a way to supply metadata with a diagnostic, classifying
the problem being reported, according to one of the software problem
taxonomies e.g. "INT15-C" within the CERT C Secure Coding Standard,
or "CWE-681" within the Common Weakness Enumeration (CWE).

The patch tags some of our diagnostics with CERT C IDs.

For example:

t.c: In function 'f1':
t.c:9:20: warning: division 'sizeof (int *) / sizeof (int)' does not
compute the number of array elements [-Wsizeof-pointer-div] [ARR01-C]
   i = sizeof array / sizeof *array;  /* { dg-warning "does not compute the 
number of array elements" } */
^
t.c:6:10: note: first 'sizeof' operand was declared here
 f1 (int *array)
 ~^

Note the " [ARR01-C]" appended after the "[-Wsizeof-pointer-div]"
above.  The "ARR01-C" is colorized (assuming colorization is enabled).

Such metadata IDs can be useful for categorizing problems, or for
searching for helpful recommendations for addressing the diagnostic.

For example, if I search for on Google for "-Wsizeof-pointer-div",
I get no results (owing to the leading dash having meaning for
Google); if I drop the leading dash, I get a number of
different pages describing the implementation of the warning.

If I search instead for "ARR01-C", the first hit takes me to
the article about that issue within the SEI CERT C Coding Standard,
giving lots of useful information about the problem and how to fix it.

The new output can be suppressed using a new -fno-diagnostics-show-id
flag.

Implementation-wise, the patch replaces the "int opt" taken by our
internal APIs (like warning_at), in place of a "class diag_id" which
can be implicitly constructed from an "int opt", so no changes are
needed at callsites that emit diagnostics, until ID tags are added,
so e.g.:

  if (warning_at (body_loc, OPT_Wmultistatement_macros,
  "macro expands to multiple statements"))
...

continues to compile, but can be converted to:

  if (warning_at (body_loc,
  diag_id (OPT_Wmultistatement_macros, "PRE10-C"),
  "macro expands to multiple statements"))
...

I didn't touch the Fortran error API.

Some known unknowns/questions:
- is this useful?  (I think so)
- if we implement this, should we go through the existing
  diagnostics tagging them?  if so, what taxonomy/taxonomies should we use?
  CERT vs CWE etc
- should the ID be tagged with the taxonomy it comes from? (so e.g.
  "ARR01-C" could be tagged as being from "CERT C", somehow).  How
  would this be presented to the end-user? Maybe:
[-Wsizeof-pointer-div] [CERT-C: ARR01-C]
- could/should we support multiple taxonomies e.g. CERT vs CWE e.g.
"[CERT-C: ARR01C] [CWE: CWE-467]" or somesuch
- if so, do we want taxonomies to be "pluggable"?  consider the
  use-case of a plugin that implements, say, the C++ Core Guidelines,
  and hence could print stuff like:
"[c++-core-guidelines: C.128]" or somesuch
- are there license/trademark issues here?

Successfully bootstrapped & regrtested on x86_64-pc-linux-gnu.

Thoughts?

gcc/c-family/ChangeLog:
* c-common.c (c_cpp_error): Provide NULL for new id parameter of
diagnostic_set_info_translated.
* c-warn.c (warn_for_multistatement_macros): Identify the
diagnostic as "PRE10-C".

gcc/c/ChangeLog:
* c-decl.c (warn_defaults_to): Convert param from int to
const diag_id &, and pass on taxonomy id to the diagnostic_info.
* c-errors.c (pedwarn_c99): Likewise.
(pedwarn_c90): Likewise.
* c-parser.c (c_parser_binary_expression): Mark
OPT_Wsizeof_pointer_div warning as "ARR01-C".
* c-tree.h (pedwarn_c90): Convert param from int to
const diag_id &.
(pedwarn_c99): Likewise.
* c-typeck.c (c_expr_sizeof_expr): Mark OPT_Wsizeof_array_argument
warning as "ARR01-C".

gcc/cp/ChangeLog:
* cp-tree.h (pedwarn_cxx98): Convert param from int to
const diag_id &.
* error.c (pedwarn_cxx98): Likewise; pass on taxonomy id to the
diagnostic_info.
* typeck.c (cxx_sizeof_expr): Mark OPT_Wsizeof_array_argument
warning as "ARR01-C".
(cp_build_binary_op): Likewise for OPT_Wsizeof_pointer_div
warning.

gcc/ChangeLog:
* common.opt (fdiagnostics-show-id): New option.
* diagnostic-core.h (class diag_id): New class.
(warning): Convert param from int to const diag_id &.
(warning_n): Likewise.
(warning_at): Likewise.
(pedwarn): Likewise.
(emit_diagnostic): Likewise.
(emit_diagnostic_valist): Likewise.
* diagnostic.c (diagnostic_initialize): Initialize new "show_id"
field.
(diagnostic_set_info_translated): Add param "taxonomy_id" and
use it to initialize new field of same name.
(diagnostic_set_info): Add param "taxonomy_i

Re: Drop frequencies from basic blocks


Can be also seen in GCC PGO:

checking for ssize_t... ../../libdecnumber/decNumber.c: In function ‘decDecap’:
../../libdecnumber/decNumber.c:7640:25: error: corrupted value profile: 
interval profile counter (0 out of 0) inconsistent with basic-block count (4356)
   if (cut!=DECDPUN) *msu%=powers[cut]; /* clear left digits */
 ^~
no
checking unwind.h usability... ../../libdecnumber/decNumber.c: In function 
‘decNumberRotate’:
../../libdecnumber/decNumber.c:2526:9: error: corrupted value profile: interval 
profile counter (0 out of 0) inconsistent with basic-block count (8)
uInt save=res->lsu[0]%powers[shift];   /* save low digit(s) */
 ^~~~
../../libdecnumber/decNumber.c:2529:11: error: corrupted value profile: 
interval profile counter (0 out of 0) inconsistent with basic-block count (4)
  uInt rem=save%powers[shift-msudigits];/* split save */
   ^~~
../../libdecnumber/decNumber.c:2546:11: error: corrupted value profile: 
interval profile counter (0 out of 0) inconsistent with basic-block count (2)
  uInt save=res->lsu[0]%powers[shift];  /* save low digit(s) */
   ^~~~

Re: Drop frequencies from basic blocks



Apart from that I also see this GCC PGO:

../../libiberty/pex-unix.c:789:1: warning: Missing counts for called function 
pex_child_error.isra.1/69
 }
 ^
during IPA pass: inline
../../libiberty/pex-unix.c: In function ‘pex_child_error.isra.1’:
../../libiberty/pex-unix.c:373:1: internal compiler error: in operator>, at 
profile-count.h:821
 pex_child_error (struct pex_obj *obj, const char *executable,
 ^~~
0x13b2e53 profile_count::operator>(long) const
../../gcc/profile-count.h:821
0x13b2e53 inline_transform(cgraph_node*)
../../gcc/ipa-inline-transform.c:680
0x5d8dae execute_one_ipa_transform_pass
../../gcc/passes.c:2239
0x5d8dae execute_all_ipa_transforms()
../../gcc/passes.c:2281
0x894e4f cgraph_node::expand()
../../gcc/cgraphunit.c:2132
0x8961e0 expand_all_functions
../../gcc/cgraphunit.c:2275
0x8961e0 symbol_table::compile()
../../gcc/cgraphunit.c:2623
0x898ac6 symbol_table::compile()
../../gcc/cgraphunit.c:2719
0x898ac6 symbol_table::finalize_compilation_unit()
../../gcc/cgraphunit.c:2716

Feel free to ask me about details if necessary.

Martin

Re: [PATCH] RFC: add taxonomy IDs to diagnostics (CERT C, CWE, etc)

2017-11-02 Thread Martin Sebor


On 11/02/2017 02:51 PM, David Malcolm wrote:

We currently identify our diagnostics via command-line options, and by
the text of the option itself.

This patch adds a way to supply metadata with a diagnostic, classifying
the problem being reported, according to one of the software problem
taxonomies e.g. "INT15-C" within the CERT C Secure Coding Standard,
or "CWE-681" within the Common Weakness Enumeration (CWE).

The patch tags some of our diagnostics with CERT C IDs.

For example:

t.c: In function 'f1':
t.c:9:20: warning: division 'sizeof (int *) / sizeof (int)' does not
compute the number of array elements [-Wsizeof-pointer-div] [ARR01-C]
i = sizeof array / sizeof *array;  /* { dg-warning "does not compute the number 
of array elements" } */
 ^
t.c:6:10: note: first 'sizeof' operand was declared here
  f1 (int *array)
  ~^

Note the " [ARR01-C]" appended after the "[-Wsizeof-pointer-div]"
above.  The "ARR01-C" is colorized (assuming colorization is enabled).

Such metadata IDs can be useful for categorizing problems, or for
searching for helpful recommendations for addressing the diagnostic.

For example, if I search for on Google for "-Wsizeof-pointer-div",
I get no results (owing to the leading dash having meaning for
Google); if I drop the leading dash, I get a number of
different pages describing the implementation of the warning.

If I search instead for "ARR01-C", the first hit takes me to
the article about that issue within the SEI CERT C Coding Standard,
giving lots of useful information about the problem and how to fix it.

The new output can be suppressed using a new -fno-diagnostics-show-id
flag.

Implementation-wise, the patch replaces the "int opt" taken by our
internal APIs (like warning_at), in place of a "class diag_id" which
can be implicitly constructed from an "int opt", so no changes are
needed at callsites that emit diagnostics, until ID tags are added,
so e.g.:

   if (warning_at (body_loc, OPT_Wmultistatement_macros,
   "macro expands to multiple statements"))
 ...

continues to compile, but can be converted to:

   if (warning_at (body_loc,
   diag_id (OPT_Wmultistatement_macros, "PRE10-C"),
   "macro expands to multiple statements"))
 ...

I didn't touch the Fortran error API.

Some known unknowns/questions:
- is this useful?  (I think so)
- if we implement this, should we go through the existing
   diagnostics tagging them?  if so, what taxonomy/taxonomies should we use?
   CERT vs CWE etc
- should the ID be tagged with the taxonomy it comes from? (so e.g.
   "ARR01-C" could be tagged as being from "CERT C", somehow).  How
   would this be presented to the end-user? Maybe:
 [-Wsizeof-pointer-div] [CERT-C: ARR01-C]
- could/should we support multiple taxonomies e.g. CERT vs CWE e.g.
 "[CERT-C: ARR01C] [CWE: CWE-467]" or somesuch
- if so, do we want taxonomies to be "pluggable"?  consider the
   use-case of a plugin that implements, say, the C++ Core Guidelines,
   and hence could print stuff like:
 "[c++-core-guidelines: C.128]" or somesuch
- are there license/trademark issues here?

Successfully bootstrapped & regrtested on x86_64-pc-linux-gnu.

Thoughts?


I think it would be useful with some additional (likely substantial)
effort.  My concern (and a pet peeve) with the CERT IDs is that they
are not stable but tend to change from one release of the CERT coding
standard to the next.  To avoid the problem of GCC getting out of sync
with the current CERT standard we would need to maintain a mapping.
Another consideration is that the CERT C and C++ coding standards
don't always have the same ids and C++-specific aspects of certain
problems have their own C++ ids distinct from the C ones (e.g.,
EXP53-CPP. Do not read uninitialized memory vs EXP33-C. Do not read
uninitialized memory).  This implies a finer-grained classification
than a straightforward mapping from GCC OPT_Wfoo enums to CERT or
CWE ids can support).

In contrast to CERT, I have the impression that the CWE ids are more
stable (I don't know for certain that they're guaranteed to be).  By
their sheer number I would expect them to provide a finer-grained
mapping that, if used to tag GCC diagnostiscs, could then be used to
map them to other standards (and even GCC's own OPT_Wfoo ids).  This 
would suggest using CWE as the primary ID and mapping that to

everything else.  But I'm pretty sure this would require a lot of
effort to get right, and keep in sync as existing GCC warnings are
tweaked and enhanced.

Sorry if this sounds discouraging.  I like the idea but I worry
about the implementation and maintenance challenges.

Martin

PS A couple of other popular standards to consider are MISRA C and
C++ and the Lockheed Martin JSF C++ coding standard.



gcc/c-family/ChangeLog:
* c-common.c (c_cpp_error): Provide NULL for new id parameter of
diagnostic_set_info_translated.
* c-warn.c (warn_for_multi

Re: [PATCH] Simplify _Node_insert_return to avoid including

2017-11-02 Thread Tim Song

Um, why are those member get's there at all (and with an index mapping
that doesn't agree with the member order)? [container.insert.return]
says that "It has no base classes or members other than those
specified."

Re: [Diagnostic Patch] don't print column zero

2017-11-02 Thread Martin Sebor


On 10/26/2017 12:23 PM, Nathan Sidwell wrote:

On 10/26/2017 02:12 PM, Eric Gallager wrote:

On 10/26/17, Nathan Sidwell  wrote:

On 10/26/2017 10:34 AM, David Malcolm wrote:



Possibly a silly question, but is it OK to have a formatted string
call in which some of the arguments aren't consumed? (here "col" is 
only
consumed for the true case, which consumes 2 arguments; it's not 
consumed

for the false case).


Yes.


I think I remember clang disagreeing; I remember it printing warnings
from -Wformat-extra-args in a similar situation in gnulib's
error_at_line module


C++ 21.10.1 defers to C.  C-99 7.15.1 has no words saying va_arg must be 
applied to exactly all arguments captured by va_list object. (and I'm 
pretty sure scanf can bail early)


Now, it might be sensible to warn about:
   printf ("", 5);
because printf's semantics are known.  But that's not ill-formed, just 
inefficient.  And in this case we're doing the equivalent of:

   printf (not-compile-time-constant, 5);



C says excess arguments are ignored:

  If the format is exhausted while arguments remain, the excess
  arguments are evaluated (as always) but are otherwise ignored.

-Wformat normally warns on this case when the format string is
constant.  It doesn't when the string is non-constant but that's
likely just a consequence of the warning running very early.  If
it ran later on it would warn.  It would be quite useful to have
-Wformat run later to catch mistakes in conditional format strings
(like the growing number of GCC's own uses of conditionals in
warning_at type of calls).  If/when -Wformat is ever moved to run
later, these cases will either have to be fixed or the warning
relaxed to allow them.  There have been requests to add
a portability level to -Wformat so this case could be moved into
some pedantic level if one were ever added.

Martin

PR82809: register handling in ix86_vector_duplicate_value

2017-11-02 Thread Richard Sandiford

When adding the call to gen_vec_duplicate, I failed to notice that
code further down modified the VEC_DUPLICATE in place.  That isn't
safe if gen_vec_duplicate returned a const_vector.

Tested on x86_64-linux-gnu.  OK to install?

Thanks,
Richard


2017-11-02  Richard Sandiford  

gcc/
PR target/82809
* config/i386/i386.c (ix86_vector_duplicate_value): Use
gen_vec_duplicate after forcing the scalar into a register.

gcc/testsuite/
* gcc.dg/pr82809.c: New test.

Index: gcc/config/i386/i386.c
===
--- gcc/config/i386/i386.c  2017-11-01 09:20:07.353478095 +
+++ gcc/config/i386/i386.c  2017-11-02 22:22:41.073913390 +
@@ -41232,7 +41232,7 @@ ix86_vector_duplicate_value (machine_mod
   reg = force_reg (innermode, val);
   if (GET_MODE (reg) != innermode)
reg = gen_lowpart (innermode, reg);
-  XEXP (dup, 0) = reg;
+  SET_SRC (PATTERN (insn)) = gen_vec_duplicate (mode, reg);
   seq = get_insns ();
   end_sequence ();
   if (seq)
Index: gcc/testsuite/gcc.dg/pr82809.c
===
--- /dev/null   2017-11-02 18:54:18.211305311 +
+++ gcc/testsuite/gcc.dg/pr82809.c  2017-11-02 22:22:41.076914027 +
@@ -0,0 +1,22 @@
+/* { dg-do compile } */
+/* { dg-options "-Ofast -fno-tree-dominator-opts" } */
+
+struct locale_time_t
+{
+  const char *abday[7];
+  const unsigned int *wabday[7];
+};
+
+static const unsigned int empty_wstr[1] = { 0 };
+
+void
+time_read (struct locale_time_t *time)
+{
+  int cnt;
+
+  for (cnt=0; cnt < 7; cnt++)
+{
+  time->abday[cnt] = "";
+  time->wabday[cnt] = empty_wstr;
+}
+}

Re: [PATCH 2/2] [i386] PR82002 Part 2: Correct non-immediate offset/invalid INSN

2017-11-02 Thread Daniel Santos

On 10/31/2017 04:31 AM, Uros Bizjak wrote:
> On Tue, Oct 31, 2017 at 3:09 AM, Daniel Santos  
> wrote:
>> When we are realigning the stack pointer, making an ms_abi to sysv_abi
>> call and alllocating 2GiB or more on the stack we end up with an invalid
>> INSN due to a non-immediate offset.  This occurs both with and without
>> -mcall-ms2sysv-xlogues.  Additionally, I've discovered that the stack
>> allocation with -mcall-ms2sysv-xlogues is incorrect as it ignores stack
>> checking, stack clash checking and probing.
>>
>> This patch fixes these problems by
>>
>> 1. No longer allocate stack space in ix86_emit_outlined_ms2sysv_save.
>> 2. Rearrange where we emit SSE saves or stub call:
>>a. Before frame allocation when offset from frame to save area is >= 2GiB.
>>b. After frame allocation when frame is < 2GiB.  (Stack allocations
>>   prior to the stub call can't be combined with those afterwards, so
>>   this is better when possible.)
>> 3. Modify choose_baseaddr to take an optional scratch_regno argument
>>and never return rtx that cannot be used as an immediate.
>>
>> gcc:
>> config/i386/i386.c (choose_basereg): Use optional scratch
>> register and add assertion.
>> (x86_emit_outlined_ms2sysv_save): use scratch register when
>> needed, and don't allocate stack.
>> (ix86_expand_prologue): Rearrange where SSE saves/stub call is
>> emitted, correct wrong allocation with -mcall-ms2sysv-xlogues.
>> (ix86_emit_outlined_ms2sysv_restore): Fix non-immediate offsets.
>>
>> gcc/testsuite:
>> gcc.target/i386/pr82002-2a.c: Change from xfail to fail.
>> gcc.target/i386/pr82002-2b.c: Likewise.
>>
>> Signed-off-by: Daniel Santos 
>> ---
>>  gcc/config/i386/i386.c | 76 
>> --
>>  gcc/testsuite/gcc.target/i386/pr82002-2a.c |  2 -
>>  gcc/testsuite/gcc.target/i386/pr82002-2b.c |  2 -
>>  3 files changed, 62 insertions(+), 18 deletions(-)
>>
>> diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
>> index 83a07afb3e1..abd8e937e0d 100644
>> --- a/gcc/config/i386/i386.c
>> +++ b/gcc/config/i386/i386.c
>> @@ -11520,7 +11520,8 @@ choose_basereg (HOST_WIDE_INT cfa_offset, rtx 
>> &base_reg,
>> The valid base registers are taken from CFUN->MACHINE->FS.  */
>>
>>  static rtx
>> -choose_baseaddr (HOST_WIDE_INT cfa_offset, unsigned int *align)
>> +choose_baseaddr (HOST_WIDE_INT cfa_offset, unsigned int *align,
>> +int scratch_regno = -1)
>>  {
>>rtx base_reg = NULL;
>>HOST_WIDE_INT base_offset = 0;
>> @@ -11534,6 +11535,28 @@ choose_baseaddr (HOST_WIDE_INT cfa_offset, unsigned 
>> int *align)
>>  choose_basereg (cfa_offset, base_reg, base_offset, 0, align);
>>
>>gcc_assert (base_reg != NULL);
>> +
>> +  if (TARGET_64BIT)
>> +{
>> +  rtx base_offset_rtx = GEN_INT (base_offset);
>> +
>> +  if (scratch_regno >= 0)
>> +   {
>> + if (!x86_64_immediate_operand (base_offset_rtx, DImode))
>> +   {
>> + rtx tmp;
>> + rtx scratch_reg = gen_rtx_REG (DImode, scratch_regno);
>> +
>> + emit_insn (gen_rtx_SET (scratch_reg, base_offset_rtx));
>> + tmp = gen_rtx_PLUS (DImode, scratch_reg, base_reg);
>> + emit_insn (gen_rtx_SET (scratch_reg, tmp));
>> + return scratch_reg;
>> +   }
>> +   }
>> +  else
>> +   gcc_assert (x86_64_immediate_operand (base_offset_rtx, DImode));
>> +}
>> +
>>return plus_constant (Pmode, base_reg, base_offset);
>>  }
> This function doesn't need to return a register, it can return plus
> RTX. I'd suggest the following implementation:
>
> --cut here--
> Index: i386.c
> ===
> --- i386.c  (revision 254243)
> +++ i386.c  (working copy)
> @@ -11520,7 +11520,8 @@
> The valid base registers are taken from CFUN->MACHINE->FS.  */
>
>  static rtx
> -choose_baseaddr (HOST_WIDE_INT cfa_offset, unsigned int *align)
> +choose_baseaddr (HOST_WIDE_INT cfa_offset, unsigned int *align,
> +unsigned int scratch_regno = INVALID_REGNUM)
>  {
>rtx base_reg = NULL;
>HOST_WIDE_INT base_offset = 0;
> @@ -11534,6 +11535,19 @@
>  choose_basereg (cfa_offset, base_reg, base_offset, 0, align);
>
>gcc_assert (base_reg != NULL);
> +
> +  rtx base_offset_rtx = GEN_INT (base_offset);
> +
> +  if (!x86_64_immediate_operand (base_offset_rtx, Pmode))
> +{
> +  gcc_assert (scratch_regno != INVALID_REGNUM);
> +
> +  rtx scratch_reg = gen_rtx_REG (Pmode, scratch_regno);
> +  emit_move_insn (scratch_reg, base_offset_rtx);
> +
> +  return gen_rtx_PLUS (Pmode, base_reg, scratch_reg);
> +}
> +
>return plus_constant (Pmode, base_reg, base_offset);
>  }

Oh, that's much better, thanks.

> --cut here--
>
> You have to always return Pmode, otherwise x32 will complain (you may
> try with -maddress-mode=short). Also, the above will

Re: [PATCH, rs6000 V4] Add Power 8 support to vec_revb

2017-11-02 Thread Segher Boessenkool

Hi Carl,

On Thu, Nov 02, 2017 at 12:54:57PM -0700, Carl Love wrote:
> 2017-11-01  Carl Love  
> 
>   * config/rs6000/rs6000-c.c (P8V_BUILTIN_VEC_REVB): Add power 8
>   definitions.
>   (P9V_BUILTIN_VEC_REVB): Remove the power 9 instance definitions.
>   * config/rs6000/altivec.h (vec_revb): Change the #define from power 9
>   to power 8.
>   * config/rs6000/r6000-protos.h (swap_selector_for_mode): Add extern
>   declaration.

swap_endian_selector_for_mode, and just "New" or "New declaration"
please.

>   * config/rs6000/rs6000.c (swap_endianess_selector_for_mode): Add
>   function.

"New" or "New function".  And spelling of the function name.

> +(define_mode_attr VSX_XXBR  [(V8HI  "h")
> +(V4SI  "w")

Please make V4SI line up with V8HI.

> +;; Swap all bytes in each element of vector
> +(define_expand "revb_"
> +  [(set (match_operand:VEC_REVB 0 "vsx_register_operand")
> + (bswap:VEC_REVB (match_operand:VEC_REVB 1 "vsx_register_operand")))]
> +  ""
> +{
> +  rtx sel;

Declare it at its first use please:

> +  sel = swap_endian_selector_for_mode(mode);

  rtx sel = swap_endian_selector_for_mode(mode);

Okay for trunk with those changes.  Thanks!


Segher

Re: Drop frequencies from basic blocks

2017-11-02 Thread Jan Hubicka

> Can be also seen in GCC PGO:
> 
> checking for ssize_t... ../../libdecnumber/decNumber.c: In function 
> ‘decDecap’:
> ../../libdecnumber/decNumber.c:7640:25: error: corrupted value profile: 
> interval profile counter (0 out of 0) inconsistent with basic-block count 
> (4356)
>if (cut!=DECDPUN) *msu%=powers[cut]; /* clear left digits */

Hmm, I have restarted profiledbootstrap and it also reproduces to me.
It is odd that those counters are 0 in basic block that is executed. Having 
small
reproducer would be cool, but I will try to debug it from libdecnumber.

That other ICE is another issue with Theresa's code for dropping profiles that
are mismatched (it is only place we mismatch profile in GCC).  I will take a 
look
what went wrong here.

Thanks,
Honza
>  ^~
> no
> checking unwind.h usability... ../../libdecnumber/decNumber.c: In function 
> ‘decNumberRotate’:
> ../../libdecnumber/decNumber.c:2526:9: error: corrupted value profile: 
> interval profile counter (0 out of 0) inconsistent with basic-block count (8)
> uInt save=res->lsu[0]%powers[shift];   /* save low digit(s) */
>  ^~~~
> ../../libdecnumber/decNumber.c:2529:11: error: corrupted value profile: 
> interval profile counter (0 out of 0) inconsistent with basic-block count (4)
>   uInt rem=save%powers[shift-msudigits];/* split save */
>^~~
> ../../libdecnumber/decNumber.c:2546:11: error: corrupted value profile: 
> interval profile counter (0 out of 0) inconsistent with basic-block count (2)
>   uInt save=res->lsu[0]%powers[shift];  /* save low digit(s) */
>^~~~

[PATCH] combine: Print insns we try to combine

2017-11-02 Thread Segher Boessenkool

This adds some extra debug info to the dump file for combine: print
the insns that are input to try_combine.  I was worried printing more
will make the dump file only harder to read, but especially the info
from the REG_DEAD notes is invaluable.

Committing to trunk.


Segher


2017-11-02  Segher Boessenkool  

* combine (try_combine): Print the insns input to try_combine to the
dump file.

---
 gcc/combine.c | 7 +++
 1 file changed, 7 insertions(+)

diff --git a/gcc/combine.c b/gcc/combine.c
index 5782013..910d4b0 100644
--- a/gcc/combine.c
+++ b/gcc/combine.c
@@ -2797,6 +2797,13 @@ try_combine (rtx_insn *i3, rtx_insn *i2, rtx_insn *i1, 
rtx_insn *i0,
   else
fprintf (dump_file, "\nTrying %d -> %d:\n",
 INSN_UID (i2), INSN_UID (i3));
+
+  if (i0)
+   dump_insn_slim (dump_file, i0);
+  if (i1)
+   dump_insn_slim (dump_file, i1);
+  dump_insn_slim (dump_file, i2);
+  dump_insn_slim (dump_file, i3);
 }
 
   /* If multiple insns feed into one of I2 or I3, they can be in any
-- 
1.8.3.1

[PATCH] c-family: add name_hint/deferred_diagnostic (v3)

Jeff: You previously had concerns about the refcounting used in v1
of this patch; this avoids that in favor of using gnu::unique_ptr.
Joseph already approved the C frontend parts of v2 of this
patch.  

Successfully bootstrapped & regrtested on x86_64-pc-linux-gnu.
OK for trunk?

Changed in v3:
- We can't directly include "unique-ptr.h" due to the fix for
  PR bootstrap/82610; see:
https://gcc.gnu.org/ml/gcc-patches/2017-10/msg01289.html
  The fix is to define INCLUDE_UNIQUE_PTR before including system.h.
  This version of the patch moves the usage of gnu::unique_ptr from
  c-common.h to a new name-hint.h header, to avoid having to define
  INCLUDE_UNIQUE_PTR everywhere that uses c-common.h.
- Updated for *_at_rich_loc renaming

Changed in v2:
- dropped refcounting in favor of using gnu::unique_ptr.  One
  wart with this is that the handling of suppressed diagnostics
  has to happen in every deferred_diagnostic subclass, rather
  than in the name_hint class.  It would be possible to fix this
  by introducing another dynamically-allocated object to manage
  this concern, but adding another dynamic allocation seemed like
  overkill.

Blurb from v1:

In various places we use lookup_name_fuzzy to provide a hint,
and can report messages of the form:
  error: unknown foo named 'bar'
or:
  error: unknown foo named 'bar'; did you mean 'SUGGESTION?

This patch provides a way for lookup_name_fuzzy to provide
both the suggestion above, and (optionally) additional hints
that can be printed e.g.

  note: did you forget to include ?

This patch provides the mechanism and ports existing users
of lookup_name_fuzzy to the new return type.
There are no uses of such hints in this patch, but followup
patches provide various front-end specific uses of this.

gcc/c-family/ChangeLog:
* c-common.h (enum lookup_name_fuzzy_kind): Move to name-hint.h.
(lookup_name_fuzzy): Likewise.  Convert return type from
const char * to name_hint.  Add location_t param.
* name-hint.h: New header.

gcc/c/ChangeLog:
* c-decl.c: Define INCLUDE_UNIQUE_PTR before including system.h.
Include "c-family/name-hint.h"
(implicit_decl_warning): Convert "hint" from
const char * to name_hint.  Pass location to
lookup_name_fuzzy.  Suppress any deferred diagnostic if the
warning was not printed.
(undeclared_variable): Likewise for "guessed_id".
(lookup_name_fuzzy): Convert return type from const char *
to name_hint.  Add location_t param.
* c-parser.c: Define INCLUDE_UNIQUE_PTR before including system.h.
Include "c-family/name-hint.h"
(c_parser_declaration_or_fndef): Convert "hint" from
const char * to name_hint.  Pass location to lookup_name_fuzzy.
(c_parser_parameter_declaration): Likewise.

gcc/cp/ChangeLog:
* name-lookup.c: Define INCLUDE_UNIQUE_PTR before including system.h.
Include "c-family/name-hint.h"
(suggest_alternatives_for): Convert "fuzzy_name" from const char *
to name_hint, and rename to "hint".  Pass location to
lookup_name_fuzzy.
(lookup_name_fuzzy): Convert return type from const char *
to name_hint.  Add location_t param.
* parser.c: Define INCLUDE_UNIQUE_PTR before including system.h.
Include "c-family/name-hint.h"
(cp_parser_diagnose_invalid_type_name): Convert
"suggestion" from const char * to name_hint, and rename to "hint".
Pass location to lookup_name_fuzzy.
---
 gcc/c-family/c-common.h  |  12 -
 gcc/c-family/name-hint.h | 121 +++
 gcc/c/c-decl.c   |  37 ---
 gcc/c/c-parser.c |  18 ---
 gcc/cp/name-lookup.c |  15 +++---
 gcc/cp/parser.c  |  12 +++--
 6 files changed, 169 insertions(+), 46 deletions(-)
 create mode 100644 gcc/c-family/name-hint.h

diff --git a/gcc/c-family/c-common.h b/gcc/c-family/c-common.h
index 7e1877e..0f84de9 100644
--- a/gcc/c-family/c-common.h
+++ b/gcc/c-family/c-common.h
@@ -990,18 +990,6 @@ extern tree lookup_label (tree);
 extern tree lookup_name (tree);
 extern bool lvalue_p (const_tree);
 
-enum lookup_name_fuzzy_kind {
-  /* Names of types.  */
-  FUZZY_LOOKUP_TYPENAME,
-
-  /* Names of function decls.  */
-  FUZZY_LOOKUP_FUNCTION_NAME,
-
-  /* Any name.  */
-  FUZZY_LOOKUP_NAME
-};
-extern const char *lookup_name_fuzzy (tree, enum lookup_name_fuzzy_kind);
-
 extern bool vector_targets_convertible_p (const_tree t1, const_tree t2);
 extern bool vector_types_convertible_p (const_tree t1, const_tree t2, bool 
emit_lax_note);
 extern tree c_build_vec_perm_expr (location_t, tree, tree, tree, bool = true);
diff --git a/gcc/c-family/name-hint.h b/gcc/c-family/name-hint.h
new file mode 100644
index 000..9f342c8
--- /dev/null
+++ b/gcc/c-family/name-hint.h
@@ -0,0 +1,121 @@
+/* Support for offering suggestions for handling unrecognized names.
+   Copyright (C) 2016-2017 Free Softwa

[PATCH] C/C++: more stdlib header hints (PR c/81404) (v4)

Here's an updated version of the patch.

Successfully bootstrapped & regrtested on x86_64-pc-linux-gnu.
OK for trunk?

Changed in v4:
- updated for changes of "inform_at_rich_loc" to "inform"
- added #define INCLUDE_UNIQUE_PTR to known-headers.cc

Changed in v3:
- fixed WINT_MAX and WINT_MIN

Changed in v2:
- move the data to known-headers and unify the C and C++ data
  into one array.

Blurb from v1:

This patch depends on:

* "[PATCH] c-family: add name_hint/deferred_diagnostic (v2)"
* https://gcc.gnu.org/ml/gcc-patches/2017-10/msg01021.html
(waiting review)

* [PATCH 3/3] C: hints for missing stdlib includes for macros and types
* https://gcc.gnu.org/ml/gcc-patches/2017-07/msg00125.html
(approved, pending the prereq above)

It extends the C frontend's "knowledge" of the C stdlib within
get_c_name_hint to cover some more macros and functions, covering
a case reported in PR c/81404 ("INT_MAX"), so that rather than printing:

  t.c:5:12: error: 'INT_MAX' undeclared here (not in a function); did you mean 
'__INT_MAX__'?
   int test = INT_MAX;
  ^~~
  __INT_MAX__

we instead print:

  t.c:5:12: error: 'INT_MAX' undeclared here (not in a function)
   int test = INT_MAX;
  ^~~
  t.c:5:12: note: 'INT_MAX' is defined in header ''; did you forget 
to '#include '?
  t.c:1:1:
  +#include 

  t.c:5:12:
int test = INT_MAX;
   ^~~

It also adds generalizes some of the code for this (and for the "std::"
namespace hints in the C++ frontend), moving it to a new
c-family/known-headers.cc and .h, and introducing a class known_headers.
This currently just works by scanning a hardcoded array of known
name/header associations, but perhaps in the future could be turned
into some kind of symbol database so that the compiler could record API
uses and use that to offer suggestions e.g.

foo.cc: error: 'myapi::foo' was not declared in this scope
foo.cc: note: 'myapi::foo" was declared in header 'myapi/private.h'
(included via 'myapi/public.h') when compiling 'bar.cc'; did you forget to
'#include "myapi/public.h"'?

or somesuch.

In any case, moving this to a class gives an easier way to locate the
hardcoded knowledge about the stdlib.

The patch also adds similar code to the C++ frontend covering
unqualified names in the standard library, so that rather than just
e.g.:

  t.cc:19:13: error: 'NULL' was not declared in this scope
   void *ptr = NULL;
   ^~~~

we can emit:

  t.cc:19:13: error: 'NULL' was not declared in this scope
   void *ptr = NULL;
   ^~~~
  t.cc:19:13: note: 'NULL' is defined in header ''; did you forget
  to '#include '?
  t.cc:1:1:
  +#include 

  t.cc:19:13:
   void *ptr = NULL;
   ^~~~

(Also XFAIL for PR c++/80567 added for the C++ testcase; this is a
separate pre-existing bug exposed by the testcase for PR 81404).

gcc/ChangeLog:
PR c/81404
* Makefile.in (C_COMMON_OBJS): Add c-family/known-headers.o.

gcc/c-family/ChangeLog:
PR c/81404
* known-headers.cc: New file, based on material from c/c-decl.c.
(suggest_missing_header): Copied as-is.
(get_stdlib_header_for_name): New, based on get_c_name_hint but
heavily edited to add C++ support.  Add some knowledge about
, , and .
* known-headers.h: Likewise.

gcc/c/ChangeLog:
PR c/81404
* c-decl.c: Include "c-family/known-headers.h".
(get_c_name_hint): Rename to get_stdlib_header_for_name and move
to known-headers.cc.
(class suggest_missing_header): Move to known-header.h.
(lookup_name_fuzzy): Call get_c_stdlib_header_for_name rather
than get_c_name_hint.

gcc/cp/ChangeLog:
PR c/81404
* name-lookup.c: Include "c-family/known-headers.h"
(lookup_name_fuzzy): Call get_cp_stdlib_header_for_name and
potentially return a new suggest_missing_header hint.

gcc/testsuite/ChangeLog:
PR c/81404
* g++.dg/spellcheck-stdlib.C: New.
* gcc.dg/spellcheck-stdlib.c (test_INT_MAX): New.
---
 gcc/Makefile.in  |   2 +-
 gcc/c-family/known-headers.cc| 169 +++
 gcc/c-family/known-headers.h |  41 
 gcc/c/c-decl.c   |  82 +--
 gcc/cp/name-lookup.c |  11 ++
 gcc/testsuite/g++.dg/spellcheck-stdlib.C |  84 +++
 gcc/testsuite/gcc.dg/spellcheck-stdlib.c |   9 ++
 7 files changed, 319 insertions(+), 79 deletions(-)
 create mode 100644 gcc/c-family/known-headers.cc
 create mode 100644 gcc/c-family/known-headers.h
 create mode 100644 gcc/testsuite/g++.dg/spellcheck-stdlib.C

diff --git a/gcc/Makefile.in b/gcc/Makefile.in
index 5f5f403..63c10e3 100644
--- a/gcc/Makefile.in
+++ b/gcc/Makefile.in
@@ -1191,7 +1191,7 @@ C_COMMON_OBJS = c-family/c-common.o 
c-family/c-cppbuiltin.o c-family/c-dump.o \
   c-family/c-semantics.o c-family/c-ada-spec.o \
   c-family/c-cilkplus.o \

Re: Generalize -(-X) a little

2017-11-02 Thread Marc Glisse


On Thu, 2 Nov 2017, Richard Biener wrote:


You can handle floats as followup but some testcases that shouldn't be
optimized for the INT_MIN / unsigned case would be nice.


New version, that does a bit more.

Bootstrap+regtest on powerpc64le-unknown-linux-gnu.

2017-11-03  Marc Glisse  

gcc/
* fold-const.c (fold_negate_expr_1): Call generic_simplify. Remove
NEGATE_EXPR handling.
* match.pd (-(-A)): Rewrite.

gcc/testsuite/
* gcc.dg/tree-ssa/negneg-1.c: New file.
* gcc.dg/tree-ssa/negneg-2.c: Likewise.
* gcc.dg/tree-ssa/negneg-3.c: Likewise.
* gcc.dg/tree-ssa/negneg-4.c: Likewise.


--
Marc GlisseIndex: gcc/fold-const.c
===
--- gcc/fold-const.c	(revision 254348)
+++ gcc/fold-const.c	(working copy)
@@ -585,25 +585,20 @@ fold_negate_expr_1 (location_t loc, tree
 fold_negate_expr (loc, TREE_OPERAND (t, 0)),
 fold_negate_expr (loc, TREE_OPERAND (t, 1)));
   break;
 
 case CONJ_EXPR:
   if (negate_expr_p (t))
 	return fold_build1_loc (loc, CONJ_EXPR, type,
 fold_negate_expr (loc, TREE_OPERAND (t, 0)));
   break;
 
-case NEGATE_EXPR:
-  if (!TYPE_OVERFLOW_SANITIZED (type))
-	return TREE_OPERAND (t, 0);
-  break;
-
 case PLUS_EXPR:
   if (!HONOR_SIGN_DEPENDENT_ROUNDING (element_mode (type))
 	  && !HONOR_SIGNED_ZEROS (element_mode (type)))
 	{
 	  /* -(A + B) -> (-B) - A.  */
 	  if (negate_expr_p (TREE_OPERAND (t, 1)))
 	{
 	  tem = negate_expr (TREE_OPERAND (t, 1));
 	  return fold_build2_loc (loc, MINUS_EXPR, type,
   tem, TREE_OPERAND (t, 0));
@@ -706,21 +701,21 @@ fold_negate_expr_1 (location_t loc, tree
 	  temp = fold_build2_loc (loc, RSHIFT_EXPR, ntype, temp, op1);
 	  return fold_convert_loc (loc, type, temp);
 	}
 	}
   break;
 
 default:
   break;
 }
 
-  return NULL_TREE;
+  return generic_simplify (loc, NEGATE_EXPR, type, t);
 }
 
 /* A wrapper for fold_negate_expr_1.  */
 
 static tree
 fold_negate_expr (location_t loc, tree t)
 {
   tree type = TREE_TYPE (t);
   STRIP_SIGN_NOPS (t);
   tree tem = fold_negate_expr_1 (loc, t);
Index: gcc/match.pd
===
--- gcc/match.pd	(revision 254348)
+++ gcc/match.pd	(working copy)
@@ -1496,26 +1496,45 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
   (if (tree_nop_conversion_p (type, TREE_TYPE (@1))
&& !TYPE_OVERFLOW_SANITIZED (type))
(with
 {
  tree t1 = type;
  if (INTEGRAL_TYPE_P (type)
 	 && TYPE_OVERFLOW_WRAPS (type) != TYPE_OVERFLOW_WRAPS (TREE_TYPE (@1)))
t1 = TYPE_OVERFLOW_WRAPS (type) ? type : TREE_TYPE (@1);
 }
 (convert (plus (convert:t1 @0) (convert:t1 @1))
- /* -(-A) -> A */
+ /* -(T)(-A) -> (T)A
+Sign-extension is ok except for INT_MIN, which thankfully cannot
+happen without overflow.  */
  (simplify
-  (negate (convert? (negate @1)))
-  (if (tree_nop_conversion_p (type, TREE_TYPE (@1))
-   && !TYPE_OVERFLOW_SANITIZED (type))
+  (negate (convert (negate @1)))
+  (if (INTEGRAL_TYPE_P (type)
+   && (TYPE_PRECISION (type) <= TYPE_PRECISION (TREE_TYPE (@1))
+	   || (!TYPE_UNSIGNED (TREE_TYPE (@1))
+	   && TYPE_OVERFLOW_UNDEFINED (TREE_TYPE (@1
+   && !TYPE_OVERFLOW_SANITIZED (type)
+   && !TYPE_OVERFLOW_SANITIZED (TREE_TYPE (@1)))
(convert @1)))
+ (simplify
+  (negate (convert negate_expr_p@1))
+  (if (SCALAR_FLOAT_TYPE_P (type)
+   && ((DECIMAL_FLOAT_TYPE_P (type)
+	== DECIMAL_FLOAT_TYPE_P (TREE_TYPE (@1))
+	&& TYPE_PRECISION (type) >= TYPE_PRECISION (TREE_TYPE (@1)))
+	   || !HONOR_SIGN_DEPENDENT_ROUNDING (type)))
+   (convert (negate @1
+ (simplify
+  (negate (nop_convert (negate @1)))
+  (if (!TYPE_OVERFLOW_SANITIZED (type)
+   && !TYPE_OVERFLOW_SANITIZED (TREE_TYPE (@1)))
+   (view_convert @1)))
 
  /* We can't reassociate floating-point unless -fassociative-math
 or fixed-point plus or minus because of saturation to +-Inf.  */
  (if ((!FLOAT_TYPE_P (type) || flag_associative_math)
   && !FIXED_POINT_TYPE_P (type))
 
   /* Match patterns that allow contracting a plus-minus pair
  irrespective of overflow issues.  */
   /* (A +- B) - A   ->  +- B */
   /* (A +- B) -+ B  ->  A */
Index: gcc/testsuite/gcc.dg/tree-ssa/negneg-1.c
===
--- gcc/testsuite/gcc.dg/tree-ssa/negneg-1.c	(nonexistent)
+++ gcc/testsuite/gcc.dg/tree-ssa/negneg-1.c	(working copy)
@@ -0,0 +1,24 @@
+/* { dg-do compile } */
+/* { dg-options "-O -frounding-math -fdump-tree-optimized-raw -Wno-psabi" } */
+
+#define DEF(num, T1, T2) T2 f##num(T1 x) { \
+T1 y = -x; \
+T2 z = (T2)y; \
+return -z; \
+}
+DEF(0, int, long long)
+DEF(1, int, unsigned long long)
+DEF(2, long long, int)
+DEF(3, unsigned long long, int)
+DEF(4, long long, unsigned)
+DEF(5, unsigned long long, unsigned)
+DEF(6, float, double)
+
+typedef int vec __attribu

Re: [PATCH 5/6] [ARC] Add 'uncached' attribute.

2017-11-02 Thread Sandra Loosemore


On 11/02/2017 06:30 AM, Claudiu Zissulescu wrote:

From: claziss 

The _Uncached type qualifier can be used to bypass the cache without
resorting to declaring variables as volatile.

gcc/
2017-07-12  Claudiu Zissulescu  

* config/arc/arc-protos.h (arc_is_uncached_mem_p): Function proto.
* config/arc/arc.c (arc_handle_uncached_attribute): New function.
(arc_attribute_table): Add 'uncached' attribute.
(arc_print_operand): Print '.di' flag for uncached memory
accesses.
(arc_in_small_data_p): Do not consider for small data the uncached
types.
(arc_is_uncached_mem_p): New function.
* config/arc/predicates.md (compact_store_memory_operand): Check
for uncached memory accesses.
(nonvol_nonimm_operand): Likewise.


I see no documentation here.

-Sandra

[RFA][PATCH] Refactor duplicated code used by various dom walkers





Several passes which perform dominator walks want to identify when block 
has a single incoming edge, ignoring loop backedges.


I'm aware of 4 implementations of this code.  3 of the 4 are identical 
in function.  The 4th (tree-ssa-dom.c) has an additional twist that it 
also ignores edges that are not marked as executable.


So I've taken the more general implementation from tree-ssa-dom.c and 
conditionalized the handling of unexecutable edges on a flag and moved 
the implementation into cfganal.c where it more naturally belongs.


Bootstrapped and regression tested on x86_64.  OK for the trunk?

Jeff
* cfganal.c (single_incoming_edge_ignoring_loop_edges): New function
extracted from tree-ssa-dom.c.
* cfganal.h (single_incoming_edge_ignoring_loop_edges): Prototype.
* tree-ssa-dom.c (single_incoming_edge_ignoring_loop_edges): Remove.
(record_equivalences_from_incoming_edge): Add additional argument
to single_incoming_edge_ignoring_loop_edges call.
* tree-ssa-uncprop.c (single_incoming_edge_ignoring_loop_edges): Remove.
(uncprop_dom_walker::before_dom_children): Add additional argument
to single_incoming_edge_ignoring_loop_edges call.
* tree-ssa-sccvn.c (sccvn_dom_walker::before_dom_children): Use
single_incoming_edge_ignoring_loop_edges rather than open coding.
* tree-vrp.c (evrp_dom_walker::before_dom_children): Similarly.





diff --git a/gcc/cfganal.c b/gcc/cfganal.c
index c506067..14d94b2 100644
--- a/gcc/cfganal.c
+++ b/gcc/cfganal.c
@@ -1554,3 +1554,38 @@ single_pred_before_succ_order (void)
 #undef MARK_VISITED
 #undef VISITED_P
 }
+
+/* Ignoring loop backedges, if BB has precisely one incoming edge then
+   return that edge.  Otherwise return NULL.  */
+edge
+single_incoming_edge_ignoring_loop_edges (basic_block bb,
+ bool ignore_unreachable)
+{
+  edge retval = NULL;
+  edge e;
+  edge_iterator ei;
+
+  FOR_EACH_EDGE (e, ei, bb->preds)
+{
+  /* A loop back edge can be identified by the destination of
+the edge dominating the source of the edge.  */
+  if (dominated_by_p (CDI_DOMINATORS, e->src, e->dest))
+   continue;
+
+  /* We can safely ignore edges that are not executable.  */
+  if (ignore_unreachable
+ && (e->flags & EDGE_EXECUTABLE) == 0)
+   continue;
+
+  /* If we have already seen a non-loop edge, then we must have
+multiple incoming non-loop edges and thus we return NULL.  */
+  if (retval)
+   return NULL;
+
+  /* This is the first non-loop incoming edge we have found.  Record
+it.  */
+  retval = e;
+}
+
+  return retval;
+}
diff --git a/gcc/cfganal.h b/gcc/cfganal.h
index 39bb5e5..74975e5 100644
--- a/gcc/cfganal.h
+++ b/gcc/cfganal.h
@@ -77,5 +77,6 @@ extern void bitmap_intersection_of_preds (sbitmap, sbitmap *, 
basic_block);
 extern void bitmap_union_of_succs (sbitmap, sbitmap *, basic_block);
 extern void bitmap_union_of_preds (sbitmap, sbitmap *, basic_block);
 extern basic_block * single_pred_before_succ_order (void);
+extern edge single_incoming_edge_ignoring_loop_edges (basic_block, bool);
 
 #endif /* GCC_CFGANAL_H */
diff --git a/gcc/tree-ssa-dom.c b/gcc/tree-ssa-dom.c
index 06be69a..31f88b4 100644
--- a/gcc/tree-ssa-dom.c
+++ b/gcc/tree-ssa-dom.c
@@ -113,7 +113,6 @@ static void eliminate_redundant_computations 
(gimple_stmt_iterator *,
  class avail_exprs_stack *);
 static void record_equivalences_from_stmt (gimple *, int,
   class avail_exprs_stack *);
-static edge single_incoming_edge_ignoring_loop_edges (basic_block);
 static void dump_dominator_optimization_stats (FILE *file,
   hash_table *);
 
@@ -1057,39 +1056,6 @@ record_equivalences_from_phis (basic_block bb)
 }
 }
 
-/* Ignoring loop backedges, if BB has precisely one incoming edge then
-   return that edge.  Otherwise return NULL.  */
-static edge
-single_incoming_edge_ignoring_loop_edges (basic_block bb)
-{
-  edge retval = NULL;
-  edge e;
-  edge_iterator ei;
-
-  FOR_EACH_EDGE (e, ei, bb->preds)
-{
-  /* A loop back edge can be identified by the destination of
-the edge dominating the source of the edge.  */
-  if (dominated_by_p (CDI_DOMINATORS, e->src, e->dest))
-   continue;
-
-  /* We can safely ignore edges that are not executable.  */
-  if ((e->flags & EDGE_EXECUTABLE) == 0)
-   continue;
-
-  /* If we have already seen a non-loop edge, then we must have
-multiple incoming non-loop edges and thus we return NULL.  */
-  if (retval)
-   return NULL;
-
-  /* This is the first non-loop incoming edge we have found.  Record
-it.  */
-  retval = e;
-}
-
-  return retval;
-}
-
 /* Record any equivalences created by the incoming edge to BB into
CONST_AND_COPIES and AVAIL_EXPRS_ST

[patch] tweak gcc.target/mips/msa.c options

2017-11-02 Thread Sandra Loosemore

The testcase gcc.target/mips/msa.c gives dozens of FAILs if it's tested 
with a GCC configured to default to -fno-common, because of patterns like


/* { dg-final { scan-assembler-times "\t.comm\tv16i8_\\d+,16,16" 3 } } */

Seems like the simplest solution is to force -fcommon for this test.  OK?

-Sandra

2017-11-02  Sandra Loosemore  

	gcc/testsuite/
	* gcc.target/mips/msa.c: Add -fcommon to dg-options.
Index: gcc/testsuite/gcc.target/mips/msa.c
===
--- gcc/testsuite/gcc.target/mips/msa.c	(revision 480407)
+++ gcc/testsuite/gcc.target/mips/msa.c	(working copy)
@@ -1,6 +1,6 @@
 /* Test MIPS MSA ASE instructions */
 /* { dg-do compile } */
-/* { dg-options "-mfp64 -mhard-float -mmsa -fexpensive-optimizations" } */
+/* { dg-options "-mfp64 -mhard-float -mmsa -fexpensive-optimizations -fcommon" } */
 /* { dg-skip-if "madd and msub need combine" { *-*-* } { "-O0" } { "" } } */
 
 /* { dg-final { scan-assembler-times "\t.comm\tv16i8_\\d+,16,16" 3 } } */

PR82808

2017-11-02 Thread Prathamesh Kulkarni

Hi Martin,
As mentioned in PR, the issue here for propagating value of 'm' from
f_c1 to foo() is that the jump function operation is FLOAT_EXPR, and
the type of input param 'm' is int, so fold_unary() doesn't do the
conversion to real_type. The attached patch fixes that by calling
fold_convert if operation is FLOAT_EXPR / FIX_TRUNC_EXPR /
CONVERT_EXPR and converts it to the type of corresponding parameter in
callee.

There are still two issues:
a) Using NOP_EXPR for early_exit in ipa_get_jf_pass_through_result.
I suppose we need to change to some other code to indicate that there
is no operation ?
b) Patch does not passing param_type from all callers.
I suppose we could fix these incrementally ?

Bootstrap+tested on x86_64-unknown-linux-gnu.
OK for trunk ?

Thanks,
Prathamesh
2017-11-03  Prathamesh Kulkarni  

* ipa-cp.c (ipa_get_jf_pass_through_result): Add new parameter
parm_type with default value set to NULL. Call fold_convert if jfunc
operation is FLOAT_EXPR or FIX_TRUNC_EXPR or CONVERT_EXPR.
(propagate_vals_across_pass_through): Add parameter parm_type.
(propagate_scalar_across_jump_function): Add parameter parm_type and
pass it to propagate_vals_across_pass_through.
(propagate_constants_across_call): Pass param_type to
propagate_scalar_across_jump_function.

testsuite/
* gcc.dg/ipa/pr82808.c: New test.

diff --git a/gcc/ipa-cp.c b/gcc/ipa-cp.c
index 6b3d8d7364c..20328a43f9b 100644
--- a/gcc/ipa-cp.c
+++ b/gcc/ipa-cp.c
@@ -1224,7 +1224,8 @@ initialize_node_lattices (struct cgraph_node *node)
determined or be considered an interprocedural invariant.  */
 
 static tree
-ipa_get_jf_pass_through_result (struct ipa_jump_func *jfunc, tree input)
+ipa_get_jf_pass_through_result (struct ipa_jump_func *jfunc, tree input,
+   tree parm_type = NULL_TREE)
 {
   tree restype, res;
 
@@ -1233,7 +1234,17 @@ ipa_get_jf_pass_through_result (struct ipa_jump_func 
*jfunc, tree input)
   if (!is_gimple_ip_invariant (input))
 return NULL_TREE;
 
-  if (TREE_CODE_CLASS (ipa_get_jf_pass_through_operation (jfunc))
+  if (ipa_get_jf_pass_through_operation (jfunc) == FLOAT_EXPR
+  || ipa_get_jf_pass_through_operation (jfunc) == FIX_TRUNC_EXPR
+  || ipa_get_jf_pass_through_operation (jfunc) == CONVERT_EXPR)
+{
+  if (!parm_type)
+   return NULL_TREE;
+
+  res = fold_convert (parm_type, input);
+  restype = parm_type;
+}
+  else if (TREE_CODE_CLASS (ipa_get_jf_pass_through_operation (jfunc))
   == tcc_unary)
 res = fold_unary (ipa_get_jf_pass_through_operation (jfunc),
  TREE_TYPE (input), input);
@@ -1567,7 +1578,8 @@ ipcp_lattice::add_value (valtype newval, 
cgraph_edge *cs,
 static bool
 propagate_vals_across_pass_through (cgraph_edge *cs, ipa_jump_func *jfunc,
ipcp_lattice *src_lat,
-   ipcp_lattice *dest_lat, int src_idx)
+   ipcp_lattice *dest_lat, int src_idx,
+   tree parm_type)
 {
   ipcp_value *src_val;
   bool ret = false;
@@ -1581,7 +1593,8 @@ propagate_vals_across_pass_through (cgraph_edge *cs, 
ipa_jump_func *jfunc,
   else
 for (src_val = src_lat->values; src_val; src_val = src_val->next)
   {
-   tree cstval = ipa_get_jf_pass_through_result (jfunc, src_val->value);
+   tree cstval = ipa_get_jf_pass_through_result (jfunc, src_val->value,
+ parm_type);
 
if (cstval)
  ret |= dest_lat->add_value (cstval, cs, src_val, src_idx);
@@ -1627,7 +1640,8 @@ propagate_vals_across_ancestor (struct cgraph_edge *cs,
 static bool
 propagate_scalar_across_jump_function (struct cgraph_edge *cs,
   struct ipa_jump_func *jfunc,
-  ipcp_lattice *dest_lat)
+  ipcp_lattice *dest_lat,
+  tree param_type)
 {
   if (dest_lat->bottom)
 return false;
@@ -1662,7 +1676,7 @@ propagate_scalar_across_jump_function (struct cgraph_edge 
*cs,
 
   if (jfunc->type == IPA_JF_PASS_THROUGH)
ret = propagate_vals_across_pass_through (cs, jfunc, src_lat,
- dest_lat, src_idx);
+ dest_lat, src_idx, 
param_type);
   else
ret = propagate_vals_across_ancestor (cs, jfunc, src_lat, dest_lat,
  src_idx);
@@ -2279,7 +2293,7 @@ propagate_constants_across_call (struct cgraph_edge *cs)
   else
{
  ret |= propagate_scalar_across_jump_function (cs, jump_func,
-   &dest_plats->itself);
+   &dest_plats->itself, 
param_type);
  ret |= propagate_context_across_

[RFA][PATCH] Fix assert on x86 target for -fstack-clash-protection




Internal testing within Red Hat tripped an assert in the x86 target.

The assert wants to verify that the integer registers were saved prior 
to probing to deal with a special case where the probe turns into a call 
(which occurs on Windows IIUC).


In the case of stack-clash we aren't bound by that restriction, so 
asserting that the registers were saved doesn't make any real sense.


This patch avoids the assert in that situation.

Sadly, I can't recall the test which triggered this.  It's almost 
certainly something found when doing some testing with stack clash 
protections enabled by default.


Bootstrapped and regression tested on x86.

OK for the trunk?

Jeff

* config/i386/i386.c (ix86_expand_prologue): Tighten assert
for int_registers_saved.

diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index 2967872..ea29ef3 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -13168,7 +13168,8 @@ ix86_expand_prologue (void)
  || flag_stack_clash_protection))
 {
   /* We expect the GP registers to be saved when probes are used.  */
-  gcc_assert (int_registers_saved);
+  if (!flag_stack_clash_protection)
+   gcc_assert (int_registers_saved);
 
   if (flag_stack_clash_protection)
{

[RFA][PATCH] Improve initial probe for noreturn functions for x86 target



The x86 port relies on implicit probes that occur when a call 
instruction pushes the return address onto the stack.  Those implicit 
calls allow the target to avoid emitting explicit stack probes.


Of course we have to account for tail call optimizations which turn the 
call into a jump -- in particular a tail call to a noreturn function (I 
won't go into the details, but other tail calls are safe).


To handle this we emit an explicit probe at the start of any function 
which is marked as noreturn.  That probe hits the red zone, which is of 
course ok if we're using the red zone, but causes valgrind headaches if 
we are not using the red zone.


This patch changes that probe so that instead of

orl $0,-4(sp)

we instead generate

push eax
pop eax

[ With the obvious adjustments in 64 bit mode. ]

The push/pop sequence is one more instruction, but may actually be 
faster than the internal RMW aspects of the orl style.  The push/pop 
sequence is valgrind safe and is actually shorter than the orl probe by 
a few bytes.



Bootstrapped and regression tested on x86.

OK for the trunk?

Jeff
* config/i386/i386.c (ix86_emit_restore_reg_using_pop): Prototype.
(ix86_adjust_stack_and_probe_stack_clash): Use a push/pop sequence
to probe at the start of a noreturn function.

* gcc.target/i386/stack-check-12.c: New test

diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index ea29ef3..fc43962 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -101,6 +101,8 @@ static void ix86_print_operand_address_as (FILE *, rtx, 
addr_space_t, bool);
 static bool ix86_save_reg (unsigned int, bool, bool);
 static bool ix86_function_naked (const_tree);
 static bool ix86_notrack_prefixed_insn_p (rtx);
+static void ix86_emit_restore_reg_using_pop (rtx);
+
 
 #ifndef CHECK_STACK_LIMIT
 #define CHECK_STACK_LIMIT (-1)
@@ -12124,8 +12126,9 @@ ix86_adjust_stack_and_probe_stack_clash (const 
HOST_WIDE_INT size)
  we just probe when we cross PROBE_INTERVAL.  */
   if (TREE_THIS_VOLATILE (cfun->decl))
 {
-  emit_stack_probe (plus_constant (Pmode, stack_pointer_rtx,
-  -GET_MODE_SIZE (word_mode)));
+  rtx_insn *insn = emit_insn (gen_push (gen_rtx_REG (word_mode, 0)));
+  RTX_FRAME_RELATED_P (insn) = 1;
+  ix86_emit_restore_reg_using_pop (gen_rtx_REG (word_mode, 0));
   emit_insn (gen_blockage ());
 }
 
diff --git a/gcc/testsuite/gcc.target/i386/stack-check-12.c 
b/gcc/testsuite/gcc.target/i386/stack-check-12.c
new file mode 100644
index 000..720951c
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/stack-check-12.c
@@ -0,0 +1,17 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fstack-clash-protection -mtune=generic" } */
+/* { dg-require-effective-target supports_stack_clash_protection } */
+
+__attribute__ ((noreturn)) void exit (int);
+
+__attribute__ ((noreturn)) void
+f (void)
+{
+  asm volatile ("nop" ::: "edi");
+  exit (1);
+}
+
+/* { dg-final { scan-assembler-not "or\[ql\]" } } */
+/* { dg-final { scan-assembler "push\[ql\] %\[er\]ax" } } */
+/* { dg-final { scan-assembler "pop\[ql]   %\[er\]ax" } } */
+

[RFA][PATCH][PR target/82788] Remove uses of PROBE_INTERVAL in x86 target files