Re: [PR66726] Factor conversion out of COND_EXPR

2015-07-15 Thread Kugan

Here is a patch to fix to teach tree-ssa-reassoc the sinking the cast.
Bootstrapped and regression tested on x86-64-none-linux-gnu with no new
regressions. Also regression tested on qemu arm.

I also verified the issue Andreas Schwab raised is fixed on arm
cortex-a5 where the same issue was present. Does this make sense?

Thanks,
Kugan

gcc/ChangeLog:

2015-07-15  Kugan Vivekanandarajah  

PR middle-end/66726
* tree-ssa-reassoc.c (optimize_range_tests): Handle sinking the cast
after PHI.
(final_range_test_p): Detect sinking the cast after PHI.
(maybe_optimize_range_tests): Handle sinking the cast after PHI.
diff --git a/gcc/tree-ssa-reassoc.c b/gcc/tree-ssa-reassoc.c
index 932c83a..3058eb5 100644
--- a/gcc/tree-ssa-reassoc.c
+++ b/gcc/tree-ssa-reassoc.c
@@ -2707,18 +2707,32 @@ optimize_range_tests (enum tree_code opcode,
# _345 = PHI <_123(N), 1(...), 1(...)>
where _234 has bool type, _123 has single use and
bb N has a single successor M.  This is commonly used in
-   the last block of a range test.  */
+   the last block of a range test.
+
+   Also Return true if STMT is tcc_compare like:
+   :
+   ...
+   _234 = a_2(D) == 2;
 
+   :
+   # _345 = PHI <_234(N), 1(...), 1(...)>
+   _346 = (int) _345;
+   where _234 has booltype, single use and
+   bb N has a single successor M.  This is commonly used in
+   the last block of a range test.  */
 static bool
 final_range_test_p (gimple stmt)
 {
-  basic_block bb, rhs_bb;
+  basic_block bb, rhs_bb, lhs_bb;
   edge e;
   tree lhs, rhs;
   use_operand_p use_p;
   gimple use_stmt;
 
-  if (!gimple_assign_cast_p (stmt))
+  if (!gimple_assign_cast_p (stmt)
+  && (!is_gimple_assign (stmt)
+ || (TREE_CODE_CLASS (gimple_assign_rhs_code (stmt))
+ != tcc_comparison)))
 return false;
   bb = gimple_bb (stmt);
   if (!single_succ_p (bb))
@@ -2729,9 +2743,8 @@ final_range_test_p (gimple stmt)
 
   lhs = gimple_assign_lhs (stmt);
   rhs = gimple_assign_rhs1 (stmt);
-  if (!INTEGRAL_TYPE_P (TREE_TYPE (lhs))
-  || TREE_CODE (rhs) != SSA_NAME
-  || TREE_CODE (TREE_TYPE (rhs)) != BOOLEAN_TYPE)
+  if (TREE_CODE (TREE_TYPE (rhs)) != BOOLEAN_TYPE
+  && TREE_CODE (TREE_TYPE (lhs)) != BOOLEAN_TYPE)
 return false;
 
   /* Test whether lhs is consumed only by a PHI in the only successor bb.  */
@@ -2743,10 +2756,21 @@ final_range_test_p (gimple stmt)
 return false;
 
   /* And that the rhs is defined in the same loop.  */
-  rhs_bb = gimple_bb (SSA_NAME_DEF_STMT (rhs));
-  if (rhs_bb == NULL
-  || !flow_bb_inside_loop_p (loop_containing_stmt (stmt), rhs_bb))
-return false;
+  if (gimple_assign_cast_p (stmt))
+{
+  if (!INTEGRAL_TYPE_P (TREE_TYPE (lhs))
+ || TREE_CODE (rhs) != SSA_NAME
+ || !(rhs_bb = gimple_bb (SSA_NAME_DEF_STMT (rhs)))
+ || !flow_bb_inside_loop_p (loop_containing_stmt (stmt), rhs_bb))
+   return false;
+}
+  else
+{
+  if (TREE_CODE (lhs) != SSA_NAME
+ || !(lhs_bb = gimple_bb (SSA_NAME_DEF_STMT (lhs)))
+ || !flow_bb_inside_loop_p (loop_containing_stmt (stmt), lhs_bb))
+   return false;
+}
 
   return true;
 }
@@ -3132,6 +3156,8 @@ maybe_optimize_range_tests (gimple stmt)
 
  /* stmt is
 _123 = (int) _234;
+OR
+_234 = a_2(D) == 2;
 
 followed by:
 :
@@ -3161,6 +3187,8 @@ maybe_optimize_range_tests (gimple stmt)
 of the bitwise or resp. and, recursively.  */
  if (!get_ops (rhs, code, &ops,
loop_containing_stmt (stmt))
+ && (TREE_CODE_CLASS (gimple_assign_rhs_code (stmt))
+ != tcc_comparison)
  && has_single_use (rhs))
{
  /* Otherwise, push the _234 range test itself.  */
@@ -3173,6 +3201,22 @@ maybe_optimize_range_tests (gimple stmt)
  ops.safe_push (oe);
  bb_ent.last_idx++;
}
+ else if (!get_ops (lhs, code, &ops,
+loop_containing_stmt (stmt))
+  && is_gimple_assign (stmt)
+  && (TREE_CODE_CLASS (gimple_assign_rhs_code (stmt))
+  == tcc_comparison)
+  && has_single_use (lhs))
+   {
+ /* Push the _234 range test itself.  */
+ operand_entry_t oe = operand_entry_pool.allocate ();
+ oe->op = lhs;
+ oe->rank = code;
+ oe->id = 0;
+ oe->count = 1;
+ ops.safe_push (oe);
+ bb_ent.last_idx++;
+   }
  else
bb_ent.last_idx = ops.length ();
  bb_ent.op = rhs;
@@ -3267,7 +3311,8 @@ maybe_optimize_range_tests (gimple stmt)
else if (gimple_assign_cast_p (use_stmt))
  cast_stmt = use_stmt;
else
- gcc_unreachable ();
+ cast_stmt = NULL;
+
  if (cast_stm

Re: [C/C++ PATCH] PR c++/66572. Fix Wlogical-op false positive

2015-07-15 Thread Mikhail Maltsev
On 07/14/2015 07:38 PM, Marek Polacek wrote:
> Ok, in that case I think easiest would the following (I hit the same issue
> when writing the -Wtautological-compare patch):
Thanks for taking care of this issue.

-- 
Regards,
Mikhail Maltsev


Re: [PATCH 1/2, rtl-optimization]: Fix PR 58066, __tls_get_addr is called with misaligned stack on x86-64

2015-07-15 Thread Uros Bizjak
On Tue, Jul 14, 2015 at 6:46 AM, Jeff Law  wrote:
> On 07/13/2015 11:03 AM, Uros Bizjak wrote:
>>
>> This is rtl-optimization part of a two-part patch series.
>>
>> As discussed in the PR, we have to prcompute register parameters
>> before stack alignment is performed, otherwise eventual call to
>> __tls_get_addr can be called with unaligned stack. When compiling the
>> testcase from the PR, anti_adjust_stack is called just before
>> precompute starts expanding function parameters.
>>
>> The solution is to move  precomputation before stack pointer is adjusted.
>>
>> 2015-07-13  Uros Bizjak  
>>
>>  PR rtl-optimization/58066
>>  * calls.c (expand_call): Precompute register parameters before stack
>>  alignment is performed.
>>
>> Patch was bootstrapped and regression tested on x86_64-linux-gnu
>> {,-m32} for all default languages, obj-c++ and go.
>>
>> OK for mainline?
>
> OK once a comment is added indicating why we have to precompute before the
> anti-adjust-stack.

Thanks, I have committed the patch with following comment:

+  /* Precompute all register parameters.  It isn't safe to compute
+anything once we have started filling any specific hard regs.
+TLS symbols sometimes need a call to resolve.  Precompute
+register parameters before any stack pointer manipulation
+to avoid unaligned stack in the called function.  */

Uros.


Re: [PATCH][RFC] Consolidate -O3 torture options

2015-07-15 Thread Richard Biener
On Tue, 14 Jul 2015, Jeff Law wrote:

> On 07/14/2015 05:58 AM, Richard Biener wrote:
> > 
> > The following patch tries to consolidate the -O3 torture testing
> > options in the attempt to reduce testing time while not losing
> > coverage.
> > 
> > It drops testing of -funroll-all-loops (which nobody should use)
> > and retains only one non-default -O3 set of options - namely
> > -O3 plus those flags that would be enabled by -fprofile-use.
> > 
> > One should hope for ~20% less time in the C and dg tortures this way.
> > 
> > Didn't look into other tortures to apply the same yet (objc-torture?)
> > 
> > Currently testing on x86_64-unknown-linux-gnu.
> > 
> > For weird flag combinations we do have contributors that test
> > them and regularly report bugzillas.
> > 
> > Ok?
> > 
> > Comments?
> > 
> > Thanks,
> > Richard.
> > 
> > 2015-07-14  Richard Biener  
> > 
> > * lib/c-torture.exp (C_TORTURE_OPTIONS): Remove
> > { -O3 -fomit-frame-pointer },
> > { -O3 -fomit-frame-pointer -funroll-loops } and
> > { -O3 -fomit-frame-pointer -funroll-all-loops -finline-functions }
> > in favor of
> > { -O3 -fomit-frame-pointer -funroll-loops -fpeel-loops -ftracer
> >   -finline-functions }
> > * lib/gcc-dg.exp (DG_TORTURE_OPTIONS): Likewise.
> I think this is OK -- I've occasionally wondered about the additional coverage
> we get vs the amount of time spent for the various options.
> 
> I can't recall specific cases where one of those 3 options would trigger a
> failure, but the two didn't. I'm sure it's happened, but it's just common
> enough to warrant the amount of time we spend testing it.
> 
> This patch has the additional benefit that I think we can eliminate scanning
> the source for loops and eliminating the -funroll[-all]-loops options.   Hmm,
> that code may have already been dead... Hmmm.

Testing reveals one fallout:

FAIL: gcc.c-torture/execute/builtins/snprintf-chk.c execution,  -O3 
-fomit-frame
-pointer -funroll-loops -fpeel-loops -ftracer -finline-functions 
FAIL: gcc.c-torture/execute/builtins/vsnprintf-chk.c execution,  -O3 
-fomit-fram
e-pointer -funroll-loops -fpeel-loops -ftracer -finline-functions 

this is because -ftracer manages to thread things in a way that we
optimize away a _chk variant at compile-time and hit

  if (chk_calls != 5)
abort ();

with chk_calls == 4.  I'm going to install the following alongside
(we've had similar issues with -ftree-loop-distribute-patterns enabled
by -O3).

Richard.

Index: gcc/testsuite/gcc.c-torture/execute/builtins/builtins.exp
===
--- gcc/testsuite/gcc.c-torture/execute/builtins/builtins.exp   (revision 
225768)
+++ gcc/testsuite/gcc.c-torture/execute/builtins/builtins.exp   (working 
copy)
@@ -37,7 +37,7 @@ load_lib c-torture.exp
 torture-init
 set-torture-options $C_TORTURE_OPTIONS {{}} $LTO_TORTURE_OPTIONS
 
-set additional_flags "-fno-tree-loop-distribute-patterns"
+set additional_flags "-fno-tree-loop-distribute-patterns -fno-tracer"
 if [istarget "powerpc-*-darwin*"] {
lappend additional_flags "-Wl,-multiply_defined,suppress"
 }



[PATCH] fix in-tree-binutils builds

2015-07-15 Thread Jan Beulich
Quite a while ago configure.in go renamed to configure.ac in all of
binutils' subtrees - allow for both when checking for that case while
configuring.

gcc/
2015-07-15  Jan Beulich  

* acinclude.m4: Replace configure.in by configure.[ai][cn].
* configure.ac: Likewise.
* configure: Re-generate.

--- a/gcc/acinclude.m4
+++ b/gcc/acinclude.m4
@@ -390,7 +390,7 @@ AC_DEFUN([_gcc_COMPUTE_GAS_VERSION],
 [gcc_cv_as_bfd_srcdir=`echo $srcdir | sed -e 's,/gcc$,,'`/bfd
 for f in $gcc_cv_as_bfd_srcdir/configure \
  $gcc_cv_as_gas_srcdir/configure \
- $gcc_cv_as_gas_srcdir/configure.in \
+ $gcc_cv_as_gas_srcdir/configure.[ai][cn] \
  $gcc_cv_as_gas_srcdir/Makefile.in ; do
   gcc_cv_gas_version=`sed -n -e 's/^[[ 
]]*VERSION=[[^0-9A-Za-z_]]*\([[0-9]]*\.[[0-9]]*.*\)/VERSION=\1/p' < $f`
   if test x$gcc_cv_gas_version != x; then
--- a/gcc/configure
+++ b/gcc/configure
@@ -21675,7 +21675,7 @@ else
 
 if test -x "$DEFAULT_ASSEMBLER"; then
gcc_cv_as="$DEFAULT_ASSEMBLER"
-elif test -f $gcc_cv_as_gas_srcdir/configure.in \
+elif test -f $gcc_cv_as_gas_srcdir/configure.[ai][cn] \
  && test -f ../gas/Makefile \
  && test x$build = x$host; then
gcc_cv_as=../gas/as-new$build_exeext
@@ -21748,7 +21748,7 @@ $as_echo "newly built gas" >&6; }
   gcc_cv_as_bfd_srcdir=`echo $srcdir | sed -e 's,/gcc$,,'`/bfd
 for f in $gcc_cv_as_bfd_srcdir/configure \
  $gcc_cv_as_gas_srcdir/configure \
- $gcc_cv_as_gas_srcdir/configure.in \
+ $gcc_cv_as_gas_srcdir/configure.[ai][cn] \
  $gcc_cv_as_gas_srcdir/Makefile.in ; do
   gcc_cv_gas_version=`sed -n -e 's/^[  
]*VERSION=[^0-9A-Za-z_]*\([0-9]*\.[0-9]*.*\)/VERSION=\1/p' < $f`
   if test x$gcc_cv_gas_version != x; then
@@ -21838,7 +21838,7 @@ elif test $install_gold_as_default = yes
  && test -f ../gold/Makefile \
  && test x$build = x$host; then
gcc_cv_ld=../gold/ld-new$build_exeext
-elif test -f $gcc_cv_ld_gld_srcdir/configure.in \
+elif test -f $gcc_cv_ld_gld_srcdir/configure.[ai][cn] \
  && test -f ../ld/Makefile \
  && test x$build = x$host; then
gcc_cv_ld=../ld/ld-new$build_exeext
@@ -21954,7 +21954,7 @@ $as_echo "newly built ld" >&6; }
elif test "$ld_is_gold" = yes; then
  in_tree_ld_is_elf=yes
fi
-   for f in $gcc_cv_ld_bfd_srcdir/configure 
$gcc_cv_ld_gld_srcdir/configure $gcc_cv_ld_gld_srcdir/configure.in 
$gcc_cv_ld_gld_srcdir/Makefile.in
+   for f in $gcc_cv_ld_bfd_srcdir/configure 
$gcc_cv_ld_gld_srcdir/configure $gcc_cv_ld_gld_srcdir/configure.[ai][cn] 
$gcc_cv_ld_gld_srcdir/Makefile.in
do
gcc_cv_gld_version=`sed -n -e 's/^[ 
]*VERSION=[^0-9A-Za-z_]*\([0-9]*\.[0-9]*.*\)/VERSION=\1/p' < $f`
if test x$gcc_cv_gld_version != x; then
@@ -21987,7 +21987,7 @@ if test "${gcc_cv_nm+set}" = set; then :
 
 else
 
-if test -f $gcc_cv_binutils_srcdir/configure.in \
+if test -f $gcc_cv_binutils_srcdir/configure.[ai][cn] \
  && test -f ../binutils/Makefile \
  && test x$build = x$host; then
gcc_cv_nm=../binutils/nm-new$build_exeext
@@ -22066,7 +22066,7 @@ if test "${gcc_cv_objdump+set}" = set; t
 
 else
 
-if test -f $gcc_cv_binutils_srcdir/configure.in \
+if test -f $gcc_cv_binutils_srcdir/configure.[ai][cn] \
  && test -f ../binutils/Makefile \
  && test x$build = x$host; then
# Single tree build which includes binutils.
@@ -22138,7 +22138,7 @@ if test "${gcc_cv_readelf+set}" = set; t
 
 else
 
-if test -f $gcc_cv_binutils_srcdir/configure.in \
+if test -f $gcc_cv_binutils_srcdir/configure.[ai][cn] \
  && test -f ../binutils/Makefile \
  && test x$build = x$host; then
# Single tree build which includes binutils.
--- gcc-4.9.3/gcc/configure.ac  2015-05-12 10:55:54.0 +0200
+++ 4.9.3/gcc/configure.ac  2015-07-14 10:45:36.0 +0200
@@ -2085,7 +2085,7 @@ m4_pattern_allow([AS_FOR_TARGET])dnl
 AS_VAR_SET_IF(gcc_cv_as,, [
 if test -x "$DEFAULT_ASSEMBLER"; then
gcc_cv_as="$DEFAULT_ASSEMBLER"
-elif test -f $gcc_cv_as_gas_srcdir/configure.in \
+elif test -f $gcc_cv_as_gas_srcdir/configure.[ai][cn] \
  && test -f ../gas/Makefile \
  && test x$build = x$host; then
gcc_cv_as=../gas/as-new$build_exeext
@@ -2174,7 +2174,7 @@ elif test $install_gold_as_default = yes
  && test -f ../gold/Makefile \
  && test x$build = x$host; then
gcc_cv_ld=../gold/ld-new$build_exeext
-elif test -f $gcc_cv_ld_gld_srcdir/configure.in \
+elif test -f $gcc_cv_ld_gld_srcdir/configure.[ai][cn] \
  && test -f ../ld/Makefile \
  && test x$build = x$host; then
gcc_cv_ld=../ld/ld-new$build_exeext
@@ -2238,7 +2238,7 @@ if test "$gcc_cv_ld" = ../ld/ld-new$buil
elif test "$ld_is_gold" = yes; then
  in_tree_ld_is_elf=yes
fi
-   for f in $gcc_cv_ld_bfd_srcdir/configure 
$gcc_cv_ld_gld_srcdir/configure $gcc_cv_ld_gld_srcdir/configure.in 
$gcc_cv_ld_gld_srcdir/Makefile.in
+   f

Re: [PATCH][AArch64] PR target/66731 Fix fnmul insn with -frounding-math

2015-07-15 Thread Ramana Radhakrishnan


On 09/07/15 15:40, Szabolcs Nagy wrote:
> On 06/07/15 16:39, Marcus Shawcroft wrote:
>> On 6 July 2015 at 09:20, Szabolcs Nagy  wrote:
>>
>>> 2015-07-06  Szabolcs Nagy  


PR target/66731 in Changelog ?


Ramana

>>>
>>> * gcc.target/aarch64/fnmul-1.c: New.
>>> * gcc.target/aarch64/fnmul-2.c: New.
>>> * gcc.target/aarch64/fnmul-3.c: New.
>>> * gcc.target/aarch64/fnmul-4.c: New.
>>
>> +float
>> +foo_s (float a, float b)
>> +{
>> +   /* { dg-final { scan-assembler "fnmul\\ts\[0-9\]+, s\[0-9\]+,
>> s\[0-9\]+" } } */
>> +   return -(a * b);
>> +}
>>
>> Indentation should set at two spaces.
>> /Marcus
> 
> Committed the indentation fix as obvious in r225613.
> 
> 2015-07-09  Szabolcs Nagy  
> 
>   * gcc.target/aarch64/fnmul-1.c: Fix whitespace.
>   * gcc.target/aarch64/fnmul-2.c: Likewise.
>   * gcc.target/aarch64/fnmul-3.c: Likewise.
>   * gcc.target/aarch64/fnmul-4.c: Likewise.
> 


Re: [PATCH AArch64]Handle wrong cost for addition of minus immediate in aarch64_rtx_costs.

2015-07-15 Thread Bin.Cheng
Ping^2

On Thu, Jul 9, 2015 at 5:42 PM, Bin.Cheng  wrote:
> Ping.
>
> On Fri, Jun 26, 2015 at 4:47 PM, Bin Cheng  wrote:
>> Hi,
>> The canonical form of subtract of immediate is (add op0 minus_imm), which is
>> supported with addsi3_aarch64 pattern on aarch64.  Unfortunately wrong cost
>> (8 rather than 4) is computed by aarch64_rtx_cost because it doesn't honor
>> the fact that it actually is a sub instruction.  This patch fixes it, is
>> this OK?
>>
>> Thanks,
>> bin
>>
>> 2015-06-25  Bin Cheng  
>>
>> * config/aarch64/aarch64.c (aarch64_rtx_costs): Handle addition of
>> minus immediate.


Re: [PATCH][DRIVER] Wrong C++ include paths when configuring with "--with-sysroot=/"

2015-07-15 Thread Yvan Roux
Hi,

(Sorry for the delay I'm just back from a long sick leave)

>> There is this old patch submitted by Matthias on that same issue, if
>> its logic is the right one for you Joseph I can rebase/validate it
>> Joseph.
>>
>> https://gcc.gnu.org/ml/gcc-patches/2012-02/msg00320.html
>
> Yes, that seems better.

I've rebased the patch on trunk, bootstrap is ok and when configuring
with options:
"--with-sysroot=/ --with-gxx-include-dir=/usr/include/c++/4.9.2"
gcc_gxx_include_dir keeps its leading slash.

Is it ok for trunk ?

Thanks,
Yvan


2015-07-15  Yvan Roux  
  Matthias Klose  

   * configure.ac: Move AC_ARG_WITH checks for native-system-header-dir,
   build-sysroot, sysroot from the `Miscenalleous configure options'
   to the `Directories' section and strip trailing `/' from with_sysroot.
   (gcc_gxx_include_dir): Don't strip a `/' sysroot value.
   * configure: Regenerated.
diff --git a/gcc/configure b/gcc/configure
index 9561e5c..1fc246b 100755
--- a/gcc/configure
+++ b/gcc/configure
@@ -769,10 +769,6 @@ REPORT_BUGS_TEXI
 REPORT_BUGS_TO
 PKGVERSION
 CONFIGURE_SPECS
-CROSS_SYSTEM_HEADER_DIR
-TARGET_SYSTEM_ROOT_DEFINE
-TARGET_SYSTEM_ROOT
-SYSROOT_CFLAGS_FOR_TARGET
 enable_shared
 enable_fixed_point
 enable_decimal_float
@@ -812,6 +808,10 @@ LDFLAGS
 CFLAGS
 CC
 GENINSRC
+CROSS_SYSTEM_HEADER_DIR
+TARGET_SYSTEM_ROOT_DEFINE
+TARGET_SYSTEM_ROOT
+SYSROOT_CFLAGS_FOR_TARGET
 target_subdir
 host_subdir
 build_subdir
@@ -873,6 +873,9 @@ ac_user_opts='
 enable_option_checking
 with_build_libsubdir
 with_local_prefix
+with_native_system_header_dir
+with_build_sysroot
+with_sysroot
 with_gxx_include_dir
 with_cpp_install_dir
 enable_generated_files_in_srcdir
@@ -899,9 +902,6 @@ enable_tls
 enable_objc_gc
 with_dwarf2
 enable_shared
-with_native_system_header_dir
-with_build_sysroot
-with_sysroot
 with_specs
 with_pkgversion
 with_bugurl
@@ -1685,6 +1685,12 @@ Optional Packages:
   --without-PACKAGE   do not use PACKAGE (same as --with-PACKAGE=no)
   --with-build-libsubdir=DIR  Directory where to find libraries for build 
system
   --with-local-prefix=DIR specifies directory to put local include
+  --with-native-system-header-dir=dir
+  use dir as the directory to look for standard
+  system header files in.  Defaults to /usr/include.
+  --with-build-sysroot=sysroot
+  use sysroot as the system root during the build
+  --with-sysroot[=DIR]search for usr/lib, usr/include, et al, within DIR
   --with-gxx-include-dir=DIR
   specifies directory to put g++ header files
   --with-cpp-install-dir=DIR
@@ -1697,12 +1703,6 @@ Optional Packages:
   --with-as   arrange to use the specified as (full pathname)
   --with-stabsarrange to use stabs instead of host debug format
   --with-dwarf2   force the default debug format to be DWARF 2
-  --with-native-system-header-dir=dir
-  use dir as the directory to look for standard
-  system header files in.  Defaults to /usr/include.
-  --with-build-sysroot=sysroot
-  use sysroot as the system root during the build
-  --with-sysroot[=DIR]search for usr/lib, usr/include, et al, within DIR
   --with-specs=SPECS  add SPECS to driver command-line processing
   --with-pkgversion=PKG   Use PKG in the version string in place of "GCC"
   --with-bugurl=URL   Direct users to URL to report a bug
@@ -3456,6 +3456,83 @@ if test x$local_prefix = x; then
local_prefix=/usr/local
 fi
 
+
+# Check whether --with-native-system-header-dir was given.
+if test "${with_native_system_header_dir+set}" = set; then :
+  withval=$with_native_system_header_dir;
+ case ${with_native_system_header_dir} in
+ yes|no) as_fn_error "bad value ${withval} given for 
--with-native-system-header-dir" "$LINENO" 5 ;;
+ /* | [A-Za-z]:[\\/]*) ;;
+ *) as_fn_error "--with-native-system-header-dir argument ${withval} must be 
an absolute directory" "$LINENO" 5 ;;
+ esac
+ configured_native_system_header_dir="${withval}"
+
+else
+  configured_native_system_header_dir=
+fi
+
+
+
+# Check whether --with-build-sysroot was given.
+if test "${with_build_sysroot+set}" = set; then :
+  withval=$with_build_sysroot; if test x"$withval" != x ; then
+ SYSROOT_CFLAGS_FOR_TARGET="--sysroot=$withval"
+   fi
+else
+  SYSROOT_CFLAGS_FOR_TARGET=
+fi
+
+
+
+if test "x$prefix" = xNONE; then
+ test_prefix=/usr/local
+else
+ test_prefix=$prefix
+fi
+if test "x$exec_prefix" = xNONE; then
+ test_exec_prefix=$test_prefix
+else
+ test_exec_prefix=$exec_prefix
+fi
+
+
+# Check whether --with-sysroot was given.
+if test "${with_sysroot+set}" = set; then :
+  withval=$with_sysroot;
+ case ${with_sysroot} in
+ /) ;;
+ */) with_sysroot=`echo $with_sysroot | sed 's,/$,,'` ;;
+ esac
+ case ${with_sysroot} in
+ yes) TARGET_SYSTEM_ROOT='${exec_prefix}/${target_noncanonical}/sys-root' ;;
+ *) TARGET_SYSTEM_

Re: conditional lim

2015-07-15 Thread Evgeniya Maenkova
On Tue, Jul 14, 2015 at 2:54 PM, Richard Biener
 wrote:
> On Mon, Jun 29, 2015 at 4:21 PM, Evgeniya Maenkova
>  wrote:
>> On Mon, Jun 29, 2015 at 5:10 PM, Richard Biener
>>  wrote:
>>> On Tue, Jun 9, 2015 at 10:11 PM, Evgeniya Maenkova
>>>  wrote:
 On Tue, Jun 9, 2015 at 3:46 PM, Richard Biener
  wrote:
> On Fri, May 29, 2015 at 3:14 PM, Evgeniya Maenkova
>  wrote:
>> Hi Richard,
>>
>> Here is some explanation. I hope you let me know if I need to clarify 
>> something.
>>
>> Also, you asked me about concrete example, to make sure you don’t miss
>> my answer here is the link:
>> https://gcc.gnu.org/ml/gcc-patches/2015-05/msg02417.html.
>>
>> Also, I doubt whether it’s convenient for you to create a build with
>> my patch or not. May be to clarify things you could send me some
>> examples/concrete cases, then I’ll compile them with
>> –fdump-tree-loopinit-details and –fdump-tree-lim-details and send you
>> these dumps. May be these dumps will be useful. (I’ll only disable
>> cleanup_cfg TODO after lim to let you know the exact picture after
>> lim).
>>
>> What do you think?
>>
>> 1.   invariantness _dom_walker –
>>
>> 1.1   for each GIMPLE_COND in given bb calls handle_cond_stmt to call
>> for true and false edges handle_branch_edge, which calls SET_TARGET_OF
>> for all bb ‘predicated’ by given GIMPLE_COND.
>>
>> SET_TARGET_OF sets in basic_blocks aux 2 facts:
>>
>> a)  this is true or false edge;
>>
>> b)  link to cond stmt;
>>
>> Handle_branch_edge works this way:
>>
>> If (cond1)
>>
>>   {
>>
>>  bb1;
>>
>>  if (cond2}
>>
>>{
>>
>>bb2;
>>
>> }
>>
>>Being called for cond1, it sets cond1 as condition for both bb1 and
>> bb2 (the whole branch for cond1, ie also for bb containing cond2),
>> then this method will be called (as there is dominance order) for
>> cond2 to correct things (ie to set cond2 as condition for bb2).
>
> Hmm, why not track the current condition as state during the DOM walk
> and thus avoid processing more than one basic-block in handle_branch_edge?
> Thus get rid of handle_branch_edge and instead do everything in 
> handle_cond_stmt
> plus the dom-walkers BB visitor?
>
 I need to look more carefully how to implement it, but I think I
 understand what you mean and this optimization of course looks
 reasonable to me. Will do.

> I see you don't handle BBs with multiple predecessors - that's ok, but
> are you sure you don't run into correctness issues when not marking such
> BBs as predicated?  This misses handling of, say
>
>  if (a || b)
>bb;
>
> which is a pity (but can be fixed later if desired).
>
 I had some test (in gcc testsuite or bootstrap build) which worked
 incorrectly because of multiple predecessors. As far as I remember the
 situation was (further, will make some notes about such tests to
 clarify this better), I mean with previous version of my code which
 handled bb with 2 predecessors:
 if (a)
   tmpvar=something;
 while()
   if (a || b)
   basic_block {do something with tmpvar;} // I mean basic block
 predicated by bb with a and bb with b

 So, if a is false, I mean we didn't do tmpvar=something (outside
 loop), BUT we are in basick_block  (we went by bb with b), we got
 Segmentation falt in basic_block {do something with tmpvar;}.

 I think we can reproduce all the details of this test if I remove not
 handling bb with 2 predecessors.

 So I wouldn't move bb with 2 predecessors (this is not always executed
 bb in any loop, not conditionally, they will not be moved at all).

 This is my more detail explanation on this point. Perhaps, I didn't
 understand your question about correctness. Could you repeat it in
 other words (based on this new clarification).

 So I think according to current code it will not be moved. What
 incorrectness do you mean?
>>>
>>> If the block isn't marked as predicated the question is whether it is
>>> handled correctly or assumed to be unconditionally executed.
>>>
> I note that collecting predicates has similarities to what if-conversion
> does in tree-ifcvt.c (even if its implementation is completely different,
> of course).
>

 Ok, I'll look at this. But could you please clarify your point?
 (Should I just take this into account with low priority and look at
 this later or you want some refactoring?)
>>>
>>> I just noted similar code exists elsewhere - it may be possible to
>>> factor it out but I didn't investigate.  And no, doing that isn't a 
>>> prerequesite
>>> for this patch.
>>>
>> 1.2   As 1.1 goes we identify whether some bb is predic

Re: [gomp] constify device data & fix cleanup

2015-07-15 Thread Ilya Verbin
2015-07-15 2:59 GMT+03:00 Nathan Sidwell :
> The other thing this does is change the interface between libgommp and the 
> plugin's load_image and unload_image routines.  I've added the ability to 
> return a pointer to target-specific connection data, and have it provided to 
> the unload function.  The ptx routines allocate some storage during loading, 
> but had no way to free it on onloading. (Actually, the unloading was rather 
> broken, attempting to free the wrong thing.)  this data is stashed in the map 
> created for host->target fns & vars.

Why do you need to return dev_data to libgomp?  Is it possible to save
it in plugin, e.g. in some global set with target_data as a key?  I've
implemented unloading this way in plugin-intelmic.

@@ -350,11 +350,11 @@ generate_host_descr_file (const char *ho
"#ifdef __cplusplus\n"
"extern \"C\"\n"
"#endif\n"
-   "void GOMP_offload_register (void *, int, void *);\n"
+   "void GOMP_offload_register (void *, int, const void *);\n"
+   "void GOMP_offload_unregister (void *, int, void const *);\n"
"#ifdef __cplusplus\n"
"extern \"C\"\n"
"#endif\n"
-   "void GOMP_offload_unregister (void *, int, void *);\n\n"

I haven't tried to build intelmic-mkoffload, but looks like here is
something wrong with extern "C".

  -- Ilya


Re: [PATCH AArch64]Handle wrong cost for addition of minus immediate in aarch64_rtx_costs.

2015-07-15 Thread James Greenhalgh
On Wed, Jul 15, 2015 at 09:22:01AM +0100, Bin.Cheng wrote:
> Ping^2


>  +/* ADD -(immediate).  */

I'd like to see a more detailed comment in this case. Probably something
along the lines of:

  /* The canonical form of subtract of immediate is
 (add op0 minus_imm).  Catch that here, modify the immediate,
 and handle the costing in cost_minus.  */


> +  && aarch64_uimm12_shift (- (INTVAL (op1

This looks strange to me, should it not be

> +  && aarch64_uimm12_shift (-(INTVAL (op1

Without the space after the `-`. Likewise below:

> + op1 = gen_int_mode (- (INTVAL (op1)), mode);

Thanks,
James

> 
> > On Fri, Jun 26, 2015 at 4:47 PM, Bin Cheng  wrote:
> >> Hi,
> >> The canonical form of subtract of immediate is (add op0 minus_imm), which 
> >> is
> >> supported with addsi3_aarch64 pattern on aarch64.  Unfortunately wrong cost
> >> (8 rather than 4) is computed by aarch64_rtx_cost because it doesn't honor
> >> the fact that it actually is a sub instruction.  This patch fixes it, is
> >> this OK?
> >>
> >> Thanks,
> >> bin
> >>
> >> 2015-06-25  Bin Cheng  
> >>
> >> * config/aarch64/aarch64.c (aarch64_rtx_costs): Handle addition of
> >> minus immediate.
> 


Re: [PATCH][AArch64] PR target/66731 Fix fnmul insn with -frounding-math

2015-07-15 Thread Szabolcs Nagy
On 15/07/15 09:11, Ramana Radhakrishnan wrote:
> On 09/07/15 15:40, Szabolcs Nagy wrote:
>> On 06/07/15 16:39, Marcus Shawcroft wrote:
>>> On 6 July 2015 at 09:20, Szabolcs Nagy  wrote:
>>>
 2015-07-06  Szabolcs Nagy  
> 
> 
> PR target/66731 in Changelog ?
> 

thanks, fixed in r225810.



Re: [PATCH] [gomp] Recycle non-nested team if possible

2015-07-15 Thread Sebastian Huber

Thanks, I checked this in:

https://gcc.gnu.org/viewcvs/gcc?view=revision&revision=225811

--
Sebastian Huber, embedded brains GmbH

Address : Dornierstr. 4, D-82178 Puchheim, Germany
Phone   : +49 89 189 47 41-16
Fax : +49 89 189 47 41-09
E-Mail  : sebastian.hu...@embedded-brains.de
PGP : Public key available on request.

Diese Nachricht ist keine geschäftliche Mitteilung im Sinne des EHUG.



RE: [PATCH] MIPS: Correctly update the isa and arch_test_option_p variables after the arch dependency handling code in mips.exp

2015-07-15 Thread Andrew Bennett
> > Ok to commit?
> 
> Yes, this is OK.

Committed as SVN 225813.

Regards,


Andrew


Re: [gomp4] New test loop independent clause

2015-07-15 Thread Thomas Schwinge
Hi!

On Tue, 14 Jul 2015 14:10:01 -0500, James Norris  
wrote:
> The attached adds testing for the independent clause
> with the loop directive in Fortran.
> 
> Committed to gomp-4_0-branch.

> --- /dev/null
> +++ b/libgomp/testsuite/libgomp.oacc-fortran/kernels-independent.f90

Thanks!  I had a look, and per -O2 -ftree-parallelize-loops=32
-fdump-tree-parloops_oacc_kernels-all -fdump-tree-optimized tree dumps,
indeed I see the anticipated »SUCCESS: may be parallelized« as well as
»#pragma omp target oacc_parallel num_gangs(32)« markers, so I think this
test case is being parallelized (as well as it executes correctly).  So,
I'm correct in assuming that this is not showcasing the Fortran issue
that Tom raised yesterday?  Anyway, assuming that it adds value to the
current set of test cases, should this also be made a compiler test case,
with dg-final directives checking for the anticipated properties, similar
to gcc/testsuite/gfortran.dg/goacc/kernels-loop.f95, for example?

> @@ -0,0 +1,43 @@
> +! { dg-do run } */
> +! { dg-additional-options "-cpp" }
> +! { dg-additional-options "-ftree-parallelize-loops=32" }
> +
> +#define N (1024 * 512)
> +
> +subroutine foo (a,  b,  c)
> +  integer, parameter :: n = N
> +  integer, dimension (n) :: a
> +  integer, dimension (n) :: b
> +  integer, dimension (n) :: c
> +  integer i, ii
> +
> +  do i = 1, n
> +a(i) = i * 2;
> +  end do
> +
> +  do i = 1, n
> +b(i) = i * 4;
> +  end do
> +
> +  !$acc kernels copyin (a(1:n), b(1:n)) copyout (c(1:n))
> +!$acc loop independent
> +do ii = 1, n
> +  c(ii) = a(ii) + b(ii)
> +end do
> +  !$acc end kernels
> +
> +  do i = 1, n
> +if (c(i) .ne. a(i) + b(i)) call abort
> +  end do
> +
> +end subroutine
> +
> +program main
> +  integer, parameter :: n = N
> +  integer :: a(n)
> +  integer :: b(n)
> +  integer :: c(n)
> +
> +  call foo (a, b, c)
> +
> +end program main


Grüße,
 Thomas


pgprzCxe_uI5Y.pgp
Description: PGP signature


[PATCH][ARM][testsuite][committed] Add -mfloat-abi=softfp to some xscale tests

2015-07-15 Thread Kyrill Tkachov

Hi all,

This patch adds -mfloat-abi=softfp to a couple of tests that check xscale 
functionality
and an appropriate skipping directive.
This helps avoid "unimplemented Thumb-1 hard float ABI" errors when testing the 
arm-none-linux-gnueabihf/-mthumb variant.

With this patch the two tests PASS on arm-none-linux-gnueabihf/-mthumb and are 
UNSUPPORTED on arm-none-linux-gnueabihf/-mthumb/-mfloat-abi=hard

Committed as obvious with r225814.

Thanks,
Kyrill

2015-07-15  Kyrylo Tkachov  

* gcc.target/arm/scd42-1.c: Add -mfloat-abi=softfp and appropriate
dg-skip-if.
* gcc.target/arm/scd42-3.c: Likewise.
commit e640e90cce03d8e899972f73178e830556ed2e2d
Author: Kyrylo Tkachov 
Date:   Tue Jul 7 09:36:30 2015 +0100

[ARM][testsuite] Add -mfloat-abi=softfp to some xscale tests

diff --git a/gcc/testsuite/gcc.target/arm/scd42-1.c b/gcc/testsuite/gcc.target/arm/scd42-1.c
index 420f7c4..d1de24a 100644
--- a/gcc/testsuite/gcc.target/arm/scd42-1.c
+++ b/gcc/testsuite/gcc.target/arm/scd42-1.c
@@ -1,7 +1,8 @@
 /* Verify that mov is preferred on XScale for loading a 1 byte constant. */
 /* { dg-do compile } */
 /* { dg-skip-if "incompatible options" { arm*-*-* } { "-march=*" } { "" } } */
-/* { dg-options "-mcpu=xscale -O" } */
+/* { dg-skip-if "do not override -mfloat-abi" { *-*-* } { "-mfloat-abi=*" } { "-mfloat-abi=softfp" } } */
+/* { dg-options "-mcpu=xscale -O -mfloat-abi=softfp" } */
 
 unsigned load1(void) __attribute__ ((naked));
 unsigned load1(void)
diff --git a/gcc/testsuite/gcc.target/arm/scd42-3.c b/gcc/testsuite/gcc.target/arm/scd42-3.c
index eb90e43..e566cb2 100644
--- a/gcc/testsuite/gcc.target/arm/scd42-3.c
+++ b/gcc/testsuite/gcc.target/arm/scd42-3.c
@@ -2,7 +2,8 @@
 /* { dg-do compile } */
 /* { dg-skip-if "Test is specific to Xscale" { arm*-*-* } { "-march=*" } { "-march=xscale" } } */
 /* { dg-skip-if "Test is specific to Xscale" { arm*-*-* } { "-mcpu=*" } { "-mcpu=xscale" } } */
-/* { dg-options "-mcpu=xscale -O" } */
+/* { dg-skip-if "do not override -mfloat-abi" { *-*-* } { "-mfloat-abi=*" } { "-mfloat-abi=softfp" } } */
+/* { dg-options "-mcpu=xscale -O -mfloat-abi=softfp" } */
 
 unsigned load4(void) __attribute__ ((naked));
 unsigned load4(void)


Re: [gomp] constify device data & fix cleanup

2015-07-15 Thread Thomas Schwinge
Hi!

On Tue, 14 Jul 2015 19:59:17 -0400, Nathan Sidwell  wrote:
> This patch turned out a little larger than expected as I ran into an API 
> limitation between libgomp and the plugins.
> 
> The patch changes GOMP_offload_{,un}register to take a pointer to constant 
> target data.  I've fixed up the two mkoffloads to constify their target data.


> The other thing this does is change the interface between libgommp and the 
> plugin's load_image and unload_image routines.  I've added the ability to 
> return 
> a pointer to target-specific connection data, and have it provided to the 
> unload 
> function.  The ptx routines allocate some storage during loading, but had no 
> way 
> to free it on onloading. (Actually, the unloading was rather broken, 
> attempting 
> to free the wrong thing.)  this data is stashed in the map created for 
> host->target fns & vars.

Julian once came up with a patch to »Fix OpenACC shutdown and PTX image
unloading (PR65904)«,
,
but apparently that never got committed?  (Waiting for approval
by Jakub, I guess?)


> There doesn't appear to be an intelmic plugin to modify.

It lives in liboffloadmic/plugin/.


> I expect the next patch in this series to break the API between mkoffload and 
> libgomp -- adding version numbering.

»Adding version numbering« means to use regular symbol versioning in
libgomp for the GOMP_offload_register function?

> I could do this just for PTX (and have an 
> inferior long term solution), or modify intelmic too.  Preference?

So far, the preference has been to keep things (in this case: the
intelmic and nvptx plugins; or OpenMP and OpenACC offloading) as similar
as possible, striving for converging them into one unified API.  So, it
makes sense to modify intelmic, too.  (But of course, we have not yet
seen ;-) your suggested changes.)


Grüße,
 Thomas


signature.asc
Description: PGP signature


RE: [Patch] Add support for IEEE-conformant versions of scalar fmin* and fmax*

2015-07-15 Thread David Sherwood
> >
> > > On Mon, 29 Jun 2015, David Sherwood wrote:
> > >
> > > > Hi,
> > > >
> > > > I have added new STRICT_MAX_EXPR and STRICT_MIN_EXPR expressions to 
> > > > support the
> > > > IEEE versions of fmin and fmax. This is done by recognising the math 
> > > > library
> > > > "fmax" and "fmin" builtin functions in a similar way to how this is 
> > > > done for
> > > > -ffast-math. This also allows us to vectorise the IEEE max/min 
> > > > functions for
> > > > targets that support it, for example aarch64/aarch32.
> > >
> > > This patch is missing documentation.  You need to document the new insn
> > > patterns in md.texi and the new tree codes in generic.texi.
> >
> > Hi, I've uploaded a new patch with the documentation. Hope this is ok.
> 
> In various places where you refer to one operand being NaN, I think you
> mean one operand being a *quiet* NaN (if one is a signaling NaN - only
> supported by GCC if -fsignaling-nans - the IEEE minNum and maxNum
> operations raise "invalid" and return a quiet NaN).

Hi, I have a new patch that hopefully addresses the documentation issues.

Thanks,
David.

ChangeLog:

2015-07-15  David Sherwood  

gcc/
* builtins.c (integer_valued_real_p): Add STRICT_MIN_EXPR and
STRICT_MAX_EXPR.
(fold_builtin_fmin_fmax): For strict math, convert builting fmin and 
fmax to STRICT_MIN_EXPR and STRICT_MIN_EXPR, respectively.
* expr.c (expand_expr_real_2): Add STRICT_MIN_EXPR and STRICT_MAX_EXPR.
* fold-const.c (const_binop): Likewise.
(fold_binary_loc, tree_binary_nonnegative_warnv_p): Likewise.
(tree_binary_nonzero_warnv_p): Likewise.
* optabs.h (strict_minmax_support): Declare.
* optabs.def: Add new optabs strict_max_optab/strict_min_optab.
* optabs.c (optab_for_tree_code): Return new optabs for STRICT_MIN_EXPR
and STRICT_MAX_EXPR.
(strict_minmax_support): New function.
* real.c (real_arithmetic): Add STRICT_MIN_EXPR and STRICT_MAX_EXPR.
* tree.def: Likewise.
* tree.c (associative_tree_code, commutative_tree_code): Likewise.
* tree-cfg.c (verify_expr): Likewise.
(verify_gimple_assign_binary): Likewise.
* tree-inline.c (estimate_operator_cost): Likewise.
* tree-pretty-print.c (dump_generic_node, op_code_prio): Likewise.
(op_symbol_code): Likewise.
gcc/config:
* aarch64/aarch64.md: New pattern.
* aarch64/aarch64-simd.md: Likewise.
* aarch64/iterators.md: New unspecs, iterators.
* arm/iterators.md: New iterators.
* arm/unspecs.md: New unspecs.
* arm/neon.md: New pattern.
* arm/vfp.md: Likewise.
gcc/doc:
* generic.texi: Add STRICT_MAX_EXPR and STRICT_MIN_EXPR.
* md.texi: Add strict_min and strict_max patterns.
gcc/testsuite
* gcc.target/aarch64/maxmin_strict.c: New test.
* gcc.target/arm/maxmin_strict.c: New test.


strict_max.patch
Description: Binary data


Re: Tests for libgomp based on OpenMP Examples 4.0.2

2015-07-15 Thread Dominique d'Humières
> The patch replaces all FP comparisons with inequalities and epsilons
> in those tests for libgomp.
In libgomp/testsuite/libgomp.fortran/examples-4/simd-8.f90

integer, parameter :: EPS = 0.005

should be

real, parameter :: EPS = 0.005

TIA

Dominique



Re: [gomp4] New test loop independent clause

2015-07-15 Thread Tom de Vries

On 15/07/15 11:28, Thomas Schwinge wrote:

Hi!

On Tue, 14 Jul 2015 14:10:01 -0500, James Norris  
wrote:

The attached adds testing for the independent clause
with the loop directive in Fortran.

Committed to gomp-4_0-branch.



--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-fortran/kernels-independent.f90


Thanks!  I had a look, and per -O2 -ftree-parallelize-loops=32
-fdump-tree-parloops_oacc_kernels-all -fdump-tree-optimized tree dumps,
indeed I see the anticipated »SUCCESS: may be parallelized« as well as
»#pragma omp target oacc_parallel num_gangs(32)« markers, so I think this
test case is being parallelized (as well as it executes correctly).  So,
I'm correct in assuming that this is not showcasing the Fortran issue
that Tom raised yesterday?


The issue I remember raising is: PR66873 - fortran variant of outer-1.c 
not parallelized by autopar.


I'm not sure if I mentioned it at the meeting, but marking the outer 
loop of that example as independent does not result in parallelization 
either. So that's something to be investigated.


Thanks,
- Tom


Anyway, assuming that it adds value to the
current set of test cases, should this also be made a compiler test case,
with dg-final directives checking for the anticipated properties, similar
to gcc/testsuite/gfortran.dg/goacc/kernels-loop.f95, for example?


@@ -0,0 +1,43 @@
+! { dg-do run } */
+! { dg-additional-options "-cpp" }
+! { dg-additional-options "-ftree-parallelize-loops=32" }
+
+#define N (1024 * 512)
+
+subroutine foo (a,  b,  c)
+  integer, parameter :: n = N
+  integer, dimension (n) :: a
+  integer, dimension (n) :: b
+  integer, dimension (n) :: c
+  integer i, ii
+
+  do i = 1, n
+a(i) = i * 2;
+  end do
+
+  do i = 1, n
+b(i) = i * 4;
+  end do
+
+  !$acc kernels copyin (a(1:n), b(1:n)) copyout (c(1:n))
+!$acc loop independent
+do ii = 1, n
+  c(ii) = a(ii) + b(ii)
+end do
+  !$acc end kernels
+
+  do i = 1, n
+if (c(i) .ne. a(i) + b(i)) call abort
+  end do
+
+end subroutine
+
+program main
+  integer, parameter :: n = N
+  integer :: a(n)
+  integer :: b(n)
+  integer :: c(n)
+
+  call foo (a, b, c)
+
+end program main



Grüße,
  Thomas





Re: conditional lim

2015-07-15 Thread Richard Biener
On Wed, Jul 15, 2015 at 10:36 AM, Evgeniya Maenkova
 wrote:
> On Tue, Jul 14, 2015 at 2:54 PM, Richard Biener
>  wrote:
>> On Mon, Jun 29, 2015 at 4:21 PM, Evgeniya Maenkova
>>  wrote:
>>> On Mon, Jun 29, 2015 at 5:10 PM, Richard Biener
>>>  wrote:
 On Tue, Jun 9, 2015 at 10:11 PM, Evgeniya Maenkova
  wrote:
> On Tue, Jun 9, 2015 at 3:46 PM, Richard Biener
>  wrote:
>> On Fri, May 29, 2015 at 3:14 PM, Evgeniya Maenkova
>>  wrote:
>>> Hi Richard,
>>>
>>> Here is some explanation. I hope you let me know if I need to clarify 
>>> something.
>>>
>>> Also, you asked me about concrete example, to make sure you don’t miss
>>> my answer here is the link:
>>> https://gcc.gnu.org/ml/gcc-patches/2015-05/msg02417.html.
>>>
>>> Also, I doubt whether it’s convenient for you to create a build with
>>> my patch or not. May be to clarify things you could send me some
>>> examples/concrete cases, then I’ll compile them with
>>> –fdump-tree-loopinit-details and –fdump-tree-lim-details and send you
>>> these dumps. May be these dumps will be useful. (I’ll only disable
>>> cleanup_cfg TODO after lim to let you know the exact picture after
>>> lim).
>>>
>>> What do you think?
>>>
>>> 1.   invariantness _dom_walker –
>>>
>>> 1.1   for each GIMPLE_COND in given bb calls handle_cond_stmt to call
>>> for true and false edges handle_branch_edge, which calls SET_TARGET_OF
>>> for all bb ‘predicated’ by given GIMPLE_COND.
>>>
>>> SET_TARGET_OF sets in basic_blocks aux 2 facts:
>>>
>>> a)  this is true or false edge;
>>>
>>> b)  link to cond stmt;
>>>
>>> Handle_branch_edge works this way:
>>>
>>> If (cond1)
>>>
>>>   {
>>>
>>>  bb1;
>>>
>>>  if (cond2}
>>>
>>>{
>>>
>>>bb2;
>>>
>>> }
>>>
>>>Being called for cond1, it sets cond1 as condition for both bb1 and
>>> bb2 (the whole branch for cond1, ie also for bb containing cond2),
>>> then this method will be called (as there is dominance order) for
>>> cond2 to correct things (ie to set cond2 as condition for bb2).
>>
>> Hmm, why not track the current condition as state during the DOM walk
>> and thus avoid processing more than one basic-block in 
>> handle_branch_edge?
>> Thus get rid of handle_branch_edge and instead do everything in 
>> handle_cond_stmt
>> plus the dom-walkers BB visitor?
>>
> I need to look more carefully how to implement it, but I think I
> understand what you mean and this optimization of course looks
> reasonable to me. Will do.
>
>> I see you don't handle BBs with multiple predecessors - that's ok, but
>> are you sure you don't run into correctness issues when not marking such
>> BBs as predicated?  This misses handling of, say
>>
>>  if (a || b)
>>bb;
>>
>> which is a pity (but can be fixed later if desired).
>>
> I had some test (in gcc testsuite or bootstrap build) which worked
> incorrectly because of multiple predecessors. As far as I remember the
> situation was (further, will make some notes about such tests to
> clarify this better), I mean with previous version of my code which
> handled bb with 2 predecessors:
> if (a)
>   tmpvar=something;
> while()
>   if (a || b)
>   basic_block {do something with tmpvar;} // I mean basic block
> predicated by bb with a and bb with b
>
> So, if a is false, I mean we didn't do tmpvar=something (outside
> loop), BUT we are in basick_block  (we went by bb with b), we got
> Segmentation falt in basic_block {do something with tmpvar;}.
>
> I think we can reproduce all the details of this test if I remove not
> handling bb with 2 predecessors.
>
> So I wouldn't move bb with 2 predecessors (this is not always executed
> bb in any loop, not conditionally, they will not be moved at all).
>
> This is my more detail explanation on this point. Perhaps, I didn't
> understand your question about correctness. Could you repeat it in
> other words (based on this new clarification).
>
> So I think according to current code it will not be moved. What
> incorrectness do you mean?

 If the block isn't marked as predicated the question is whether it is
 handled correctly or assumed to be unconditionally executed.

>> I note that collecting predicates has similarities to what if-conversion
>> does in tree-ifcvt.c (even if its implementation is completely different,
>> of course).
>>
>
> Ok, I'll look at this. But could you please clarify your point?
> (Should I just take this into account with low priority and look at
> this later or you want some refactoring?)

 I just noted similar code exists elsewhere - it 

Re: [PATCH][13/n] Remove GENERIC stmt combining from SCCVN

2015-07-15 Thread Richard Biener
On Thu, 9 Jul 2015, Kyrill Tkachov wrote:

> Hi Richard,
> 
> On 07/07/15 15:10, Richard Biener wrote:
> > This moves a few more patterns that show up during bootstrap.
> > 
> > Bootstrapped and tested on x86_64-unknown-linux-gnu, applied to trunk.
> > 
> > Richard.
> > 
> > 2015-07-07  Richard Biener  
> > 
> > * fold-const.c (fold_binary_loc): Move
> > (X & C2) << C1 -> (X << C1) & (C2 << C1) simplification ...
> > * match.pd: ... here.
> > Add (X * C1) % C2 -> 0 simplification pattern derived from
> > extract_muldiv_1.
> > 
> > * gcc.dg/vect/vect-over-widen-3-big-array.c: Adjust.
> > 
> > 
> >   case MIN_EXPR:
> > Index: gcc/testsuite/gcc.dg/vect/vect-over-widen-3-big-array.c
> > ===
> > --- gcc/testsuite/gcc.dg/vect/vect-over-widen-3-big-array.c (revision
> > 225504)
> > +++ gcc/testsuite/gcc.dg/vect/vect-over-widen-3-big-array.c (working copy)
> > @@ -58,6 +58,6 @@ int main (void)
> > return 0;
> >   }
> >   -/* { dg-final { scan-tree-dump-times "vect_recog_over_widening_pattern:
> > detected" 1 "vect" } } */
> > +/* { dg-final { scan-tree-dump-times "vect_recog_over_widening_pattern:
> > detected" 2 "vect" } } */
> >   /* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */
> >   
> 
> I see this scan now fail on arm-none-eabi:
> FAIL: gcc.dg/vect/vect-over-widen-3-big-array.c scan-tree-dump-times vect
> "vect_recog_over_widening_pattern: detected" 2
> FAIL: gcc.dg/vect/vect-over-widen-3-big-array.c -flto -ffat-lto-objects
> scan-tree-dump-times vect "vect_recog_over_widening_pattern: detected" 2

Can you open a bugreport do I don't forget about this?

Thanks,
Richard.


AW: [Bug fortran/52846] [F2008] Support submodules - part 2/3

2015-07-15 Thread Bader, Reinhold
Hello Paul, 

the attached example fails to compile with the error message

Error: Global name »mod_s« at (1) is already being used as a MODULE at (2)
subm_10_pos.f90:29:6:

   use mod_s

However, the global identifier of a submodule is the pair (ancestor module 
name, 
submodule name) according to 11.2.3, para 2 of the Fortran 2008 standard. So 
I think the error message is spurious.

Cheers
Reinhold

> -Ursprüngliche Nachricht-
> Von: Paul Richard Thomas [mailto:paul.richard.tho...@gmail.com]
> Gesendet: Dienstag, 14. Juli 2015 13:10
> An: Bader, Reinhold; fort...@gcc.gnu.org; gcc-patches
> Betreff: [Bug fortran/52846] [F2008] Support submodules - part 2/3
> 
> Dear All,
> 
> Reinhold Bader has pointed out the naming the submodule files after the
> submodule name and using .mod as the extension can potentially lead to
> clashes. Therefore, I have written a patch to change gfortran to follow the
> naming convention of another leading brand:
> 
> submodule filename = module@ancestor@@submodule.smod
> 
> The implementation is straightforward and the ChangeLog and the patch
> provide an adequate description.
> 
> Bootstraps and regtests on x86_64 - OK for trunk?
> 
> Paul
> 
> 2015-07-14  Paul Thomas  
> 
> PR fortran/52846
> * gfortran.h : Add 'submodule_name' to gfc_use_list structure.
> * module.c (gfc_match_submodule): Define submodule_name and add
> static 'submodule_name'.
> (gfc_match_submodule): Build up submodule filenames, using '@'
> as a delimiter. Store the output filename in 'submodule_name'.
> (gfc_dump_module): If current state is COMP_SUBMODULE, write
> to file 'submodule_name', using SUBMODULE_EXTENSION.
> (gfc_use_module): Similarly, use the 'submodule_name' field in
> the gfc_use_list structure and SUBMODULE_EXTENSION to read the
> implicitly used submodule files.


subm_10_pos.f90
Description: subm_10_pos.f90


Re: [PATCH][13/n] Remove GENERIC stmt combining from SCCVN

2015-07-15 Thread Kyrill Tkachov


On 15/07/15 11:52, Richard Biener wrote:

On Thu, 9 Jul 2015, Kyrill Tkachov wrote:


Hi Richard,

On 07/07/15 15:10, Richard Biener wrote:

This moves a few more patterns that show up during bootstrap.

Bootstrapped and tested on x86_64-unknown-linux-gnu, applied to trunk.

Richard.

2015-07-07  Richard Biener  

* fold-const.c (fold_binary_loc): Move
(X & C2) << C1 -> (X << C1) & (C2 << C1) simplification ...
* match.pd: ... here.
Add (X * C1) % C2 -> 0 simplification pattern derived from
extract_muldiv_1.

* gcc.dg/vect/vect-over-widen-3-big-array.c: Adjust.


   case MIN_EXPR:
Index: gcc/testsuite/gcc.dg/vect/vect-over-widen-3-big-array.c
===
--- gcc/testsuite/gcc.dg/vect/vect-over-widen-3-big-array.c (revision
225504)
+++ gcc/testsuite/gcc.dg/vect/vect-over-widen-3-big-array.c (working copy)
@@ -58,6 +58,6 @@ int main (void)
 return 0;
   }
   -/* { dg-final { scan-tree-dump-times "vect_recog_over_widening_pattern:
detected" 1 "vect" } } */
+/* { dg-final { scan-tree-dump-times "vect_recog_over_widening_pattern:
detected" 2 "vect" } } */
   /* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */
   

I see this scan now fail on arm-none-eabi:
FAIL: gcc.dg/vect/vect-over-widen-3-big-array.c scan-tree-dump-times vect
"vect_recog_over_widening_pattern: detected" 2
FAIL: gcc.dg/vect/vect-over-widen-3-big-array.c -flto -ffat-lto-objects
scan-tree-dump-times vect "vect_recog_over_widening_pattern: detected" 2

Can you open a bugreport do I don't forget about this?


Sure, PR 66877.

Kyrill



Thanks,
Richard.





Re: [Bug fortran/52846] [F2008] Support submodules - part 2/3

2015-07-15 Thread Paul Richard Thomas
Dear Reinhold,

I didn't change the internal symbol names because @ is not allowed in
linker symbol names. I'll use '.' internally.

Thanks for letting me know. Obviously, I have a little bit more work
to do before the patch can be committed :-(

Paul

On 15 July 2015 at 12:58, Bader, Reinhold  wrote:
> Hello Paul,
>
> the attached example fails to compile with the error message
>
> Error: Global name »mod_s« at (1) is already being used as a MODULE at (2)
> subm_10_pos.f90:29:6:
>
>use mod_s
>
> However, the global identifier of a submodule is the pair (ancestor module 
> name,
> submodule name) according to 11.2.3, para 2 of the Fortran 2008 standard. So
> I think the error message is spurious.
>
> Cheers
> Reinhold
>
>> -Ursprüngliche Nachricht-
>> Von: Paul Richard Thomas [mailto:paul.richard.tho...@gmail.com]
>> Gesendet: Dienstag, 14. Juli 2015 13:10
>> An: Bader, Reinhold; fort...@gcc.gnu.org; gcc-patches
>> Betreff: [Bug fortran/52846] [F2008] Support submodules - part 2/3
>>
>> Dear All,
>>
>> Reinhold Bader has pointed out the naming the submodule files after the
>> submodule name and using .mod as the extension can potentially lead to
>> clashes. Therefore, I have written a patch to change gfortran to follow the
>> naming convention of another leading brand:
>>
>> submodule filename = module@ancestor@@submodule.smod
>>
>> The implementation is straightforward and the ChangeLog and the patch
>> provide an adequate description.
>>
>> Bootstraps and regtests on x86_64 - OK for trunk?
>>
>> Paul
>>
>> 2015-07-14  Paul Thomas  
>>
>> PR fortran/52846
>> * gfortran.h : Add 'submodule_name' to gfc_use_list structure.
>> * module.c (gfc_match_submodule): Define submodule_name and add
>> static 'submodule_name'.
>> (gfc_match_submodule): Build up submodule filenames, using '@'
>> as a delimiter. Store the output filename in 'submodule_name'.
>> (gfc_dump_module): If current state is COMP_SUBMODULE, write
>> to file 'submodule_name', using SUBMODULE_EXTENSION.
>> (gfc_use_module): Similarly, use the 'submodule_name' field in
>> the gfc_use_list structure and SUBMODULE_EXTENSION to read the
>> implicitly used submodule files.



-- 
Outside of a dog, a book is a man's best friend. Inside of a dog it's
too dark to read.

Groucho Marx


[gomp4.1] Add support for directive-name-modifiers in if clauses

2015-07-15 Thread Jakub Jelinek
Hi!

In OpenMP 4.1, one can optionally use modifiers to say which construct
the if clause belongs to.

Implemented thusly:

2015-07-15  Jakub Jelinek  

* tree-core.h (struct tree_omp_clause): Add subcode.if_modifier
field.
* tree.h (OMP_CLAUSE_IF_MODIFIER): Define.
* gimplify.c (gimplify_scan_omp_clauses): Add CODE argument.
For OMP_CLAUSE_IF complain if OMP_CLAUSE_IF_MODIFIER is present
and does not match code.  Handle OMP_CLAUSE_GANG separately.
(gimplify_oacc_cache, gimplify_omp_parallel, gimplify_omp_task,
gimplify_omp_for, gimplify_omp_workshare, gimplify_omp_target_update,
gimplify_expr): Adjust gimplify_scan_omp_clauses callers.
* tree-pretty-print.c (dump_omp_clause): Print OMP_CLAUSE_IF_MODIFIER.
c-family/
* c-omp.c (c_omp_split_clauses): Use OMP_CLAUSE_IF_MODIFIER to
decide where to put OMP_CLAUSE_IF, without modifier duplicate to
both target and parallel if combined.
c/
* c-parser.c (c_parser_omp_clause_if): Add IS_OMP argument.  Parse
and diagnose directive-name-modifier.
(c_parser_oacc_all_clauses, c_parser_omp_all_clauses): Adjust
callers.
cp/
* parser.c (cp_parser_omp_clause_if): Add IS_OMP argument.  Parse
and diagnose directive-name-modifier.
(cp_parser_oacc_all_clauses, cp_parser_omp_all_clauses): Adjust
callers.
fortran/
* trans-openmp.c (gfc_trans_omp_clauses): Set OMP_CLAUSE_IF_MODIFIER
to ERROR_MARK.
testsuite/
* c-c++-common/gomp/if-1.c: New test.
* c-c++-common/gomp/if-2.c: New test.

--- gcc/tree-core.h.jj  2015-07-14 14:49:57.0 +0200
+++ gcc/tree-core.h 2015-07-15 09:06:43.660755515 +0200
@@ -1351,6 +1351,7 @@ struct GTY(()) tree_omp_clause {
 enum omp_clause_proc_bind_kind proc_bind_kind;
 enum tree_code reduction_code;
 enum omp_clause_linear_kindlinear_kind;
+enum tree_code if_modifier;
   } GTY ((skip)) subcode;
 
   /* The gimplification of OMP_CLAUSE_REDUCTION_{INIT,MERGE} for omp-low's
--- gcc/tree.h.jj   2015-07-14 14:29:49.0 +0200
+++ gcc/tree.h  2015-07-15 09:09:56.450274941 +0200
@@ -1385,6 +1385,9 @@ extern void protected_set_expr_location
 #define OMP_CLAUSE_SHARED_FIRSTPRIVATE(NODE) \
   (OMP_CLAUSE_SUBCODE_CHECK (NODE, OMP_CLAUSE_SHARED)->base.public_flag)
 
+#define OMP_CLAUSE_IF_MODIFIER(NODE)   \
+  (OMP_CLAUSE_SUBCODE_CHECK (NODE, 
OMP_CLAUSE_IF)->omp_clause.subcode.if_modifier)
+
 #define OMP_CLAUSE_FINAL_EXPR(NODE) \
   OMP_CLAUSE_OPERAND (OMP_CLAUSE_SUBCODE_CHECK (NODE, OMP_CLAUSE_FINAL), 0)
 #define OMP_CLAUSE_IF_EXPR(NODE) \
--- gcc/gimplify.c.jj   2015-07-14 16:17:52.0 +0200
+++ gcc/gimplify.c  2015-07-15 11:09:06.831010500 +0200
@@ -6133,7 +6133,8 @@ find_decl_expr (tree *tp, int *walk_subt
 
 static void
 gimplify_scan_omp_clauses (tree *list_p, gimple_seq *pre_p,
-  enum omp_region_type region_type)
+  enum omp_region_type region_type,
+  enum tree_code code)
 {
   struct gimplify_omp_ctx *ctx, *outer_ctx;
   tree c;
@@ -6550,8 +6551,33 @@ gimplify_scan_omp_clauses (tree *list_p,
}
  break;
 
-   case OMP_CLAUSE_FINAL:
case OMP_CLAUSE_IF:
+ if (OMP_CLAUSE_IF_MODIFIER (c) != ERROR_MARK
+ && OMP_CLAUSE_IF_MODIFIER (c) != code)
+   {
+ const char *p[2];
+ for (int i = 0; i < 2; i++)
+   switch (i ? OMP_CLAUSE_IF_MODIFIER (c) : code)
+ {
+ case OMP_PARALLEL: p[i] = "parallel"; break;
+ case OMP_TASK: p[i] = "task"; break;
+ case OMP_TASKLOOP: p[i] = "taskloop"; break;
+ case OMP_TARGET_DATA: p[i] = "target data"; break;
+ case OMP_TARGET: p[i] = "target"; break;
+ case OMP_TARGET_UPDATE: p[i] = "target update"; break;
+ case OMP_TARGET_ENTER_DATA:
+   p[i] = "target enter data"; break;
+ case OMP_TARGET_EXIT_DATA: p[i] = "target exit data"; break;
+ default: gcc_unreachable ();
+ }
+ error_at (OMP_CLAUSE_LOCATION (c),
+   "expected %qs % clause modifier rather than %qs",
+   p[0], p[1]);
+ remove = true;
+   }
+ /* Fall through.  */
+
+   case OMP_CLAUSE_FINAL:
  OMP_CLAUSE_OPERAND (c, 0)
= gimple_boolify (OMP_CLAUSE_OPERAND (c, 0));
  /* Fall through.  */
@@ -6572,15 +6598,19 @@ gimplify_scan_omp_clauses (tree *list_p,
case OMP_CLAUSE_NUM_GANGS:
case OMP_CLAUSE_NUM_WORKERS:
case OMP_CLAUSE_VECTOR_LENGTH:
-   case OMP_CLAUSE_GANG:
case OMP_CLAUSE_WORKER:
case OMP_CLAUSE_VECTOR:
  if (gimplify_expr (&OMP_CLAUSE_OPERAND (c, 0), pre_p, NULL,
 

Re: [PATCH] PR target/66824: Allow software FP SFmode in FP splitter

2015-07-15 Thread Uros Bizjak
On Mon, Jul 13, 2015 at 6:09 PM, H.J. Lu  wrote:
> On Sun, Jul 12, 2015 at 10:56 AM, Uros Bizjak  wrote:
>> On Sat, Jul 11, 2015 at 9:23 PM, H.J. Lu  wrote:
>>> On Thu, Jul 09, 2015 at 01:58:22PM -0700, H.J. Lu wrote:
 On Thu, Jul 09, 2015 at 12:13:38PM -0700, H.J. Lu wrote:
 > ix86_split_long_move can optimize floating point constant move, which
 > can be used to optimize SFmode move for IA MCU.
 >
 > OK for trunk if there is no regression?
 >
 >
 > H.J.
 > ---
 > gcc/
 >
 > PR target/66824
 > * config/i386/i386.c (ix86_split_to_parts): Allow SFmode move
 > for IA MCU.
 > (ix86_split_long_move): Support single move.
 > * config/i386/i386.md (FP splitter): Allow SFmode for IA MCU.
 >
 > gcc/testsuite/
 >
 > PR target/66824
 > * gcc.target/i386/pr66824.c: New test.
 > ---


 I missed the testcase.  Here is the updated patch.

>>>
>>> ix86_split_long_move can optimize floating point constant move, which
>>> can be used to optimize SFmode move with software floating point.
>>>
>>> OK for trunk if there are no regressions?
>>
>> No, this patch is wrong. Please investigate why "*movsf_internal"
>> doesn't use "?r/rmF" alternative in case FP regs are unavailable.
>> Perhaps you should add new alternative with a conditional constraint,
>> but without "?". And... please use:
>>
>
> I couldn't figure a way to add conditional constraints for "?r/rmF" and
> "r/rmF".   I simply disabled *movsf_internal if TARGET_HARD_FP_REGS
> is false and added a new "*movsf_internal_soft_fp" pattern.
>
> OK for trunk if there is no regressions?

No. We don't want duplicated patterns.

Please add two patterns at the end of the constraint string and use
enabled attribute to disable all others.

IMO, you will also need to apply similar change to a DFmode move.

Uros.


Re: [Patch, Fortran, 66035, v2] [5/6 Regression] gfortran ICE segfault

2015-07-15 Thread Paul Richard Thomas
Dear Andre,

I am still in the bizarre situation that the testcase compiles and
runs correctly on a clean trunk!

That said, the patch applies cleanly and, at very least from my point
of view, does not do any harm :-)

OK for trunk

Thanks for the patch

Paul

On 11 July 2015 at 14:08, Andre Vehreschild  wrote:
> Hi Mikael,
>
>> > @@ -7030,7 +7053,8 @@ gfc_trans_subcomponent_assign (tree dest,
>> > gfc_component * cm, gfc_expr * expr, gfc_add_expr_to_block (&block, tmp);
>> >  }
>> >else if (init && (cm->attr.allocatable
>> > -  || (cm->ts.type == BT_CLASS && CLASS_DATA
>> > (cm)->attr.allocatable)))
>> > +  || (cm->ts.type == BT_CLASS && CLASS_DATA (cm)->attr.allocatable
>> > +  && expr->ts.type != BT_CLASS)))
>> >  {
>> >/* Take care about non-array allocatable components here.  The
>> > alloc_* routine below is motivated by the alloc_scalar_allocatable_for_
>> > @@ -7074,6 +7098,14 @@ gfc_trans_subcomponent_assign (tree dest,
>> > gfc_component * cm, gfc_expr * expr, tmp = gfc_build_memcpy_call (tmp,
>> > se.expr, size); gfc_add_expr_to_block (&block, tmp);
>> > }
>> > +  else if (cm->ts.type == BT_CLASS && expr->ts.type == BT_CLASS)
>> > +   {
>> > + tmp = gfc_copy_class_to_class (se.expr, dest, integer_one_node,
>> > +  CLASS_DATA
>> > (cm)->attr.unlimited_polymorphic);
>> > + gfc_add_expr_to_block (&block, tmp);
>> > + gfc_add_modify (&block, gfc_class_vptr_get (dest),
>> > + gfc_class_vptr_get (se.expr));
>> > +   }
>> >else
>> > gfc_add_modify (&block, tmp,
>> > fold_convert (TREE_TYPE (tmp), se.expr));
>> But this hunk is canceled by the one before, isn't it?
>> I mean, If the condition here is true, the condition before was false?
>
> You are absolutely right. The second hunk is dead code and removed in the
> attached patch. That must have been the first attempt to address the issue and
> later on I did not perceive that it was useless. Sorry for that.
>
> Regards,
> Andre
> --
> Andre Vehreschild * Email: vehre ad gmx dot de



-- 
Outside of a dog, a book is a man's best friend. Inside of a dog it's
too dark to read.

Groucho Marx


Re: [Patch, fortran] PR 37131, inline matmul

2015-07-15 Thread Mikael Morin
Hello,

Le 13/07/2015 21:54, Thomas Schwinge a écrit :
> Original situation; _gfortran_runtime_error is not being properly
> declared (invoked via gcc/fortran/frontend-passes.c:runtime_error_ne),
> but, for example, _gfortran_error_runtime_at is being properly declared
> (invoked from elsewhere):

normal (non-frontend-generated) code does through a 'resolution' step
which after some checks may resolve procedure names to intrinsic
procedure symbols (user procedure with same name as an intrinsic are
allowed).
So maybe what you miss is such a resolution step.
Does it work if you add
  resolve_block_construct (*c);
at the end of inline_matmul_assign?

Mikael


RE: [PATCH, MIPS] Support interrupt handlers with hard-float

2015-07-15 Thread Robert Suchanek
Hi,

> > Hi Matthew/Catherine,
> >
> > The attached patch removes the restriction to compile a TU with an ISR with
> -
> > mhard-float. Instead of forcing -msoft-float, the coprocessor 1 is disabled
> in
> > an ISR for -mhard-float.
> >
> > Ok to apply?
> 
> Yes, this one is OK.

Committed as r225818.

Regards,
Robert


Re: [PATCH][RFC] Consolidate -O3 torture options

2015-07-15 Thread Richard Biener
On Wed, 15 Jul 2015, Richard Biener wrote:

> On Tue, 14 Jul 2015, Jeff Law wrote:
> 
> > On 07/14/2015 05:58 AM, Richard Biener wrote:
> > > 
> > > The following patch tries to consolidate the -O3 torture testing
> > > options in the attempt to reduce testing time while not losing
> > > coverage.
> > > 
> > > It drops testing of -funroll-all-loops (which nobody should use)
> > > and retains only one non-default -O3 set of options - namely
> > > -O3 plus those flags that would be enabled by -fprofile-use.
> > > 
> > > One should hope for ~20% less time in the C and dg tortures this way.
> > > 
> > > Didn't look into other tortures to apply the same yet (objc-torture?)
> > > 
> > > Currently testing on x86_64-unknown-linux-gnu.
> > > 
> > > For weird flag combinations we do have contributors that test
> > > them and regularly report bugzillas.
> > > 
> > > Ok?
> > > 
> > > Comments?
> > > 
> > > Thanks,
> > > Richard.
> > > 
> > > 2015-07-14  Richard Biener  
> > > 
> > >   * lib/c-torture.exp (C_TORTURE_OPTIONS): Remove
> > >   { -O3 -fomit-frame-pointer },
> > >   { -O3 -fomit-frame-pointer -funroll-loops } and
> > >   { -O3 -fomit-frame-pointer -funroll-all-loops -finline-functions }
> > >   in favor of
> > >   { -O3 -fomit-frame-pointer -funroll-loops -fpeel-loops -ftracer
> > > -finline-functions }
> > >   * lib/gcc-dg.exp (DG_TORTURE_OPTIONS): Likewise.
> > I think this is OK -- I've occasionally wondered about the additional 
> > coverage
> > we get vs the amount of time spent for the various options.
> > 
> > I can't recall specific cases where one of those 3 options would trigger a
> > failure, but the two didn't. I'm sure it's happened, but it's just common
> > enough to warrant the amount of time we spend testing it.
> > 
> > This patch has the additional benefit that I think we can eliminate scanning
> > the source for loops and eliminating the -funroll[-all]-loops options.   
> > Hmm,
> > that code may have already been dead... Hmmm.
> 
> Testing reveals one fallout:
> 
> FAIL: gcc.c-torture/execute/builtins/snprintf-chk.c execution,  -O3 
> -fomit-frame
> -pointer -funroll-loops -fpeel-loops -ftracer -finline-functions 
> FAIL: gcc.c-torture/execute/builtins/vsnprintf-chk.c execution,  -O3 
> -fomit-fram
> e-pointer -funroll-loops -fpeel-loops -ftracer -finline-functions 
> 
> this is because -ftracer manages to thread things in a way that we
> optimize away a _chk variant at compile-time and hit
> 
>   if (chk_calls != 5)
> abort ();
> 
> with chk_calls == 4.  I'm going to install the following alongside
> (we've had similar issues with -ftree-loop-distribute-patterns enabled
> by -O3).

The following adjusts objc-torture.exp (the only other I found with
the many -O3 variants).

Tested on x86_64-unknown-linux-gnu, applied.

Richard.

2015-07-15  Richard Biener  

* lib/objc-torture.exp (OBJC_TORTURE_OPTIONS): Remove
{ -O3 -fomit-frame-pointer },
{ -O3 -fomit-frame-pointer -funroll-loops } and
{ -O3 -fomit-frame-pointer -funroll-all-loops -finline-functions }
in favor of
{ -O3 -fomit-frame-pointer -funroll-loops -fpeel-loops -ftracer
  -finline-functions }

Index: gcc/testsuite/lib/objc-torture.exp
===
--- gcc/testsuite/lib/objc-torture.exp  (revision 225807)
+++ gcc/testsuite/lib/objc-torture.exp  (working copy)
@@ -126,9 +126,7 @@ if [info exists TORTURE_OPTIONS] {
" -O0 " \
" -O1 " \
" -O2 " \
-   " -O3 -fomit-frame-pointer " \
-   " -O3 -fomit-frame-pointer -funroll-loops " \
-   " -O3 -fomit-frame-pointer -funroll-all-loops -finline-functions " \
+   " -O3 -fomit-frame-pointer -funroll-loops -fpeel-loops -ftracer 
-finline-functions " \
" -O3 -g " \
" -Os " ]
 }


RE: [PATCH, MIPS] Support new interrupt handler options

2015-07-15 Thread Robert Suchanek
Hi Catherine,

> This is now OK to commit.
> Catherine

Committed as r225819.

Robert


RE: [PATCH, MIPS] Fix restoration of hi/lo in MIPS64R2 interrupt handlers

2015-07-15 Thread Robert Suchanek
Hi,

> > OK. I'll change it to interrupt_handler-5.c, add a comment and commit after
> > approval for the new interrupt handler options.
> 
> I believe this change is independent of the new attributes so feel free to
> commit
> it before.

I was to going to commit it before but by the time I did that, I got approval 
for
all patches and committed in order.

Committed as r225820.

Regards,
Robert


Re: [gomp4] New test loop independent clause

2015-07-15 Thread Thomas Schwinge
Hi Tom!

On Wed, 15 Jul 2015 12:43:05 +0200, Tom de Vries  wrote:
> On 15/07/15 11:28, Thomas Schwinge wrote:
> > On Tue, 14 Jul 2015 14:10:01 -0500, James Norris  
> > wrote:
> >> The attached adds testing for the independent clause
> >> with the loop directive in Fortran.
> >>
> >> Committed to gomp-4_0-branch.
> >
> >> --- /dev/null
> >> +++ b/libgomp/testsuite/libgomp.oacc-fortran/kernels-independent.f90
> >
> > Thanks!  I had a look, and per -O2 -ftree-parallelize-loops=32
> > -fdump-tree-parloops_oacc_kernels-all -fdump-tree-optimized tree dumps,
> > indeed I see the anticipated »SUCCESS: may be parallelized« as well as
> > »#pragma omp target oacc_parallel num_gangs(32)« markers, so I think this
> > test case is being parallelized (as well as it executes correctly).  So,
> > I'm correct in assuming that this is not showcasing the Fortran issue
> > that Tom raised yesterday?
> 
> The issue I remember raising is: PR66873 - fortran variant of outer-1.c 
> not parallelized by autopar.

Ah, I mixed that up, sorry.


Anyway, do we want a compiler test case additional to thie libgomp
execution test?

> > Anyway, assuming that it adds value to the
> > current set of test cases, should this also be made a compiler test case,
> > with dg-final directives checking for the anticipated properties, similar
> > to gcc/testsuite/gfortran.dg/goacc/kernels-loop.f95, for example?
> >
> >> @@ -0,0 +1,43 @@
> >> +! { dg-do run } */
> >> +! { dg-additional-options "-cpp" }
> >> +! { dg-additional-options "-ftree-parallelize-loops=32" }
> >> +
> >> +#define N (1024 * 512)
> >> +
> >> +subroutine foo (a,  b,  c)
> >> +  integer, parameter :: n = N
> >> +  integer, dimension (n) :: a
> >> +  integer, dimension (n) :: b
> >> +  integer, dimension (n) :: c
> >> +  integer i, ii
> >> +
> >> +  do i = 1, n
> >> +a(i) = i * 2;
> >> +  end do
> >> +
> >> +  do i = 1, n
> >> +b(i) = i * 4;
> >> +  end do
> >> +
> >> +  !$acc kernels copyin (a(1:n), b(1:n)) copyout (c(1:n))
> >> +!$acc loop independent
> >> +do ii = 1, n
> >> +  c(ii) = a(ii) + b(ii)
> >> +end do
> >> +  !$acc end kernels
> >> +
> >> +  do i = 1, n
> >> +if (c(i) .ne. a(i) + b(i)) call abort
> >> +  end do
> >> +
> >> +end subroutine
> >> +
> >> +program main
> >> +  integer, parameter :: n = N
> >> +  integer :: a(n)
> >> +  integer :: b(n)
> >> +  integer :: c(n)
> >> +
> >> +  call foo (a, b, c)
> >> +
> >> +end program main


Grüße,
 Thomas


pgp5Fx0KAIz40.pgp
Description: PGP signature


Re: [PATCH] PR target/66824: Allow software FP SFmode in FP splitter

2015-07-15 Thread Uros Bizjak
On Wed, Jul 15, 2015 at 1:36 PM, Uros Bizjak  wrote:

>> I couldn't figure a way to add conditional constraints for "?r/rmF" and
>> "r/rmF".   I simply disabled *movsf_internal if TARGET_HARD_FP_REGS
>> is false and added a new "*movsf_internal_soft_fp" pattern.
>>
>> OK for trunk if there is no regressions?
>
> No. We don't want duplicated patterns.
>
> Please add two patterns at the end of the constraint string and use

... add two *constraints* at the end ...

Uros.


undefined reference to `acc_copyin_array_h_' (was: [PATCH] [gomp] Recycle non-nested team if possible)

2015-07-15 Thread Thomas Schwinge
Hallo!

On Tue, 14 Jul 2015 13:47:41 +0200, Sebastian Huber 
 wrote:
> [...]
> 
> I run the test suite on x86_64-unknown-linux-gnu with the previously 
> mentioned asserts and got no unexpected failures.
> 
> With --disable-linux-futex (without asserts) I got several failures, but 
> none of them is related to my patch, e.g. they are of the following type
> 
> /tmp/ccw4RofR.o: In function `main':
> data-already-3.f:(.text+0x56): undefined reference to `acc_copyin_array_h_'

Hmm, I have difficulties seeing how --disable-linux-futex and/or your
libgomp/team.c could have any effect on OpenACC Fortran; I've not yet
been able to reproduce this.  Can you please tell me your complete GCC
configuration command line, etc., as well as cite from libgomp.log a
FAILing invocation command line:

>  === libgomp tests ===
> 
> Schedule of variations:
>  unix
> 
> Running target unix
> Using /usr/share/dejagnu/baseboards/unix.exp as board description file 
> for target.
> Using /usr/share/dejagnu/config/unix.exp as generic interface file for 
> target.
> Using 
> /home/EB/sebastian_h/archive/gcc-git/libgomp/testsuite/config/default.exp as 
> tool-and-target-specific interface file.
> Running 
> /home/EB/sebastian_h/archive/gcc-git/libgomp/testsuite/libgomp.c/c.exp ...
> Running 
> /home/EB/sebastian_h/archive/gcc-git/libgomp/testsuite/libgomp.c++/c++.exp 
> ...
> Running 
> /home/EB/sebastian_h/archive/gcc-git/libgomp/testsuite/libgomp.fortran/fortran.exp
>  
> ...
> Running 
> /home/EB/sebastian_h/archive/gcc-git/libgomp/testsuite/libgomp.graphite/graphite.exp
>  
> ...
> Running 
> /home/EB/sebastian_h/archive/gcc-git/libgomp/testsuite/libgomp.oacc-c/c.exp 
> ...
> Running 
> /home/EB/sebastian_h/archive/gcc-git/libgomp/testsuite/libgomp.oacc-c++/c++.exp
>  
> ...
> Running 
> /home/EB/sebastian_h/archive/gcc-git/libgomp/testsuite/libgomp.oacc-fortran/fortran.exp
>  
> ...
> FAIL: libgomp.oacc-fortran/acc_on_device-1-1.f90 
> -DACC_DEVICE_TYPE_host_nonshm=1 -DACC_MEM_SHARED=0  -O  (test for excess 
> errors)
> WARNING: libgomp.oacc-fortran/acc_on_device-1-1.f90 
> -DACC_DEVICE_TYPE_host_nonshm=1 -DACC_MEM_SHARED=0  -O  compilation 
> failed to produce executable
> [...]


Grüße,
 Thomas


signature.asc
Description: PGP signature


[PATCH][15/n] Remove GENERIC stmt combining from SCCVN

2015-07-15 Thread Richard Biener

This moves a few more patterns and in turn makes negate_expr_p
a predicate (without fully populating it with fold-const.c
capabilities - thus also not removing negate_expr_p using patterns
from there).

Bootstrapped and tested on x86_64-unknown-linux-gnu, applied.

Richard.

2015-07-14  Richard Biener  

* fold-const.c (fold_binary_loc): Move bool_var != 0 -> bool_var
and bool_var == 1 -> bool_var simplifications ...
* match.pd: ... to patterns here.  Factor out negate_expr_p
cases from the A - B -> A + (-B) patterns as negate_expr_p
predicate and add a -(A + B) -> (-B) - A pattern.

Index: gcc/fold-const.c
===
--- gcc/fold-const.c(revision 225767)
+++ gcc/fold-const.c(working copy)
@@ -11245,16 +11245,6 @@ fold_binary_loc (location_t loc,
   if (tem != NULL_TREE)
return tem;
 
-  /* bool_var != 0 becomes bool_var. */
-  if (TREE_CODE (TREE_TYPE (arg0)) == BOOLEAN_TYPE && integer_zerop (arg1)
-  && code == NE_EXPR)
-return non_lvalue_loc (loc, fold_convert_loc (loc, type, arg0));
-
-  /* bool_var == 1 becomes bool_var. */
-  if (TREE_CODE (TREE_TYPE (arg0)) == BOOLEAN_TYPE && integer_onep (arg1)
-  && code == EQ_EXPR)
-return non_lvalue_loc (loc, fold_convert_loc (loc, type, arg0));
-
   /* bool_var != 1 becomes !bool_var. */
   if (TREE_CODE (TREE_TYPE (arg0)) == BOOLEAN_TYPE && integer_onep (arg1)
   && code == NE_EXPR)
Index: gcc/match.pd
===
--- gcc/match.pd(revision 225809)
+++ gcc/match.pd(working copy)
@@ -479,23 +479,38 @@ (define_operator_list CBRT BUILT_IN_CBRT
  (abs tree_expr_nonnegative_p@0)
  @0)
 
-/* A - B -> A + (-B) if B is easily negatable.  This just covers
-   very few cases of "easily negatable", effectively inlining negate_expr_p.  
*/
-(simplify
- (minus @0 INTEGER_CST@1)
+/* A few cases of fold-const.c negate_expr_p predicate.  */
+(match negate_expr_p
+ INTEGER_CST
  (if ((INTEGRAL_TYPE_P (type)
&& TYPE_OVERFLOW_WRAPS (type))
   || (!TYPE_OVERFLOW_SANITIZED (type)
- && may_negate_without_overflow_p (@1)))
-  (plus @0 (negate @1
+ && may_negate_without_overflow_p (t)
+(match negate_expr_p
+ FIXED_CST)
+(match negate_expr_p
+ (negate @0)
+ (if (!TYPE_OVERFLOW_SANITIZED (type
+(match negate_expr_p
+ REAL_CST
+ (if (REAL_VALUE_NEGATIVE (TREE_REAL_CST (t)
+/* VECTOR_CST handling of non-wrapping types would recurse in unsupported
+   ways.  */
+(match negate_expr_p
+ VECTOR_CST
+ (if (FLOAT_TYPE_P (TREE_TYPE (type)) || TYPE_OVERFLOW_WRAPS (type
+ 
+/* -(A + B) -> (-B) - A.  */
 (simplify
- (minus @0 REAL_CST@1)
- (if (REAL_VALUE_NEGATIVE (TREE_REAL_CST (@1)))
-  (plus @0 (negate @1
+ (negate (plus:c @0 negate_expr_p@1))
+ (if (!HONOR_SIGN_DEPENDENT_ROUNDING (element_mode (type))
+  && !HONOR_SIGNED_ZEROS (element_mode (type)))
+  (minus (negate @1) @0)))
+
+/* A - B -> A + (-B) if B is easily negatable.  */
 (simplify
- (minus @0 VECTOR_CST@1)
- (if (FLOAT_TYPE_P (type) || TYPE_OVERFLOW_WRAPS (type))
-  (plus @0 (negate @1
+ (minus @0 negate_expr_p@1)
+ (plus @0 (negate @1)))
 
 
 /* Try to fold (type) X op CST -> (type) (X op ((type-x) CST))
@@ -1678,6 +1693,20 @@ (define_operator_list CBRT BUILT_IN_CBRT
   (if (tree_nop_conversion_p (TREE_TYPE (@3), TREE_TYPE (@0)))
(cmp @0 (bit_xor @1 (convert @2))
 
+/* bool_var != 0 becomes bool_var.  */
+(simplify
+ (ne @0 integer_zerop@1)
+ (if (TREE_CODE (TREE_TYPE (@0)) == BOOLEAN_TYPE
+  && types_match (type, TREE_TYPE (@0)))
+  (non_lvalue @0)))
+/* bool_var == 1 becomes bool_var.  */
+(simplify
+ (eq @0 integer_onep@1)
+ (if (TREE_CODE (TREE_TYPE (@0)) == BOOLEAN_TYPE
+  && types_match (type, TREE_TYPE (@0)))
+  (non_lvalue @0)))
+
+
 /* Simplification of math builtins.  */
 
 /* fold_builtin_logarithm */


Re: [gomp] constify device data & fix cleanup

2015-07-15 Thread Nathan Sidwell

On 07/15/15 04:42, Ilya Verbin wrote:

2015-07-15 2:59 GMT+03:00 Nathan Sidwell :

The other thing this does is change the interface between libgommp and the plugin's 
load_image and unload_image routines.  I've added the ability to return a pointer to 
target-specific connection data, and have it provided to the unload function.  The ptx 
routines allocate some storage during loading, but had no way to free it on onloading. 
(Actually, the unloading was rather broken, attempting to free the wrong thing.)  this 
data is stashed in the map created for host->target fns & vars.


Why do you need to return dev_data to libgomp?  Is it possible to save
it in plugin, e.g. in some global set with target_data as a key?  I've
implemented unloading this way in plugin-intelmic.


Ok.  The thought occured to me too late :)



@@ -350,11 +350,11 @@ generate_host_descr_file (const char *ho
 "#ifdef __cplusplus\n"
 "extern \"C\"\n"
 "#endif\n"
-   "void GOMP_offload_register (void *, int, void *);\n"
+   "void GOMP_offload_register (void *, int, const void *);\n"
+   "void GOMP_offload_unregister (void *, int, void const *);\n"
 "#ifdef __cplusplus\n"
 "extern \"C\"\n"
 "#endif\n"
-   "void GOMP_offload_unregister (void *, int, void *);\n\n"

I haven't tried to build intelmic-mkoffload, but looks like here is
something wrong with extern "C".


Doh! I'd misread the idiom as using the { ... } form.

nathan


Re: [gomp4.1] Support C++ "this" in OpenMP directives

2015-07-15 Thread Ilya Verbin
On Thu, Jul 09, 2015 at 10:50:14 +0200, Jakub Jelinek wrote:
>   * parser.c (cp_parser_omp_var_list_no_open): Parse this.
>   * cp-tree.h (finish_omp_declare_simd_methods): New prototype.
>   * semantics.c (handle_omp_array_sections_1): Disallow this based
>   array sections for OpenMP.
>   (finish_omp_declare_simd_methods): New function.
>   (finish_omp_clauses): Don't attempt to adjust linear step of
>   this if it points to TYPE_BEING_DEFINED.  Disallow this in
>   all clauses expecting variable lists, except for declare simd
>   linear/uniform/aligned clauses.
>   (finish_struct_1): Call finish_omp_declare_simd_methods.
> 
>   * g++.dg/vect/simd-clone-2.cc: New test.
>   * g++.dg/vect/simd-clone-2.h: New file.
>   * g++.dg/vect/simd-clone-3.cc: New test.
>   * g++.dg/vect/simd-clone-4.cc: New test.
>   * g++.dg/vect/simd-clone-4.h: New file.
>   * g++.dg/vect/simd-clone-5.cc: New test.
>   * g++.dg/gomp/this-1.C: New test.
>   * g++.dg/gomp/this-2.C: New test.

One more warning:

gcc/cp/parser.c: In function ‘tree_node* 
cp_parser_omp_var_list_no_open(cp_parser*, omp_clause_code, tree, bool*)’:
gcc/cp/parser.c:27931:26: error: ‘name’ may be used uninitialized in this 
function [-Werror=maybe-uninitialized]
  token->location);
  ^
cc1plus: all warnings being treated as errors
make[4]: *** [cp/parser.o] Error 1

  -- Ilya


Re: [PATCH, ARM] stop changing signedness in PROMOTE_MODE

2015-07-15 Thread Michael Matz
Hi,

On Tue, 14 Jul 2015, Richard Earnshaw wrote:

> > I think it's a backend bug that parameters and locals are extended 
> > differently.  The code in tree-outof-ssa was written with the 
> > assumption that the modes of RTL objects might be different (larger) 
> > than the tree types suggest, but that they be _consistent_, i.e. that 
> > the particular extension depends on only the types, not on the 
> > tree-type of the decl.
> 
> We went through this a couple of weeks back.  The backend documentation 
> for PROMOTE_MODE says:
> 
> " For most machines, the macro definition does not change 
> @var{unsignedp}. However, some machines, have instructions that 
> preferentially handle either signed or unsigned quantities of certain 
> modes.  For example, on the DEC Alpha, 32-bit loads from memory and 
> 32-bit add instructions sign-extend the result to 64 bits.  On such 
> machines, set @var{unsignedp} according to which kind of extension is 
> more efficient."

We aren't talking about what PROMOTE_MODE is or is not allowed, but about 
an inconsistency between what PROMOTE_MODE does and what 
target.promote_function_mode does.  This is quite clearly a historic 
accident in the ARM port, namely that they extend parameters and locals in 
opposite ways, which originally wasn't even possible to describe in GCC.

> So clearly it anticipates that all permitted extensions should work, and 
> in particular it makes no mention of this having to match some 
> abi-mandated promotions.  That makes this a MI bug not a target one.

All extensions do work just fine, except when the LHS and RHS of a copy 
instruction with the same types require different extension types 
depending on if the rhs is a PARM_DECL or not.  It's also fundamental in 
gimple that copies can't change signedness, and this insonsistency breaks 
that assumption.  Jim says that this is actually harmless in reality, 
because even a copy propagation will at most replace a use of a VAR_DECL 
with a PARM_DECL, and at the point of such use the other promotion would 
be added by expand.  While the latter is true, I haven't really convinced 
myself that this really leads to correct code in all cases.


Ciao,
Michael.


Re: [Fortran, Patch] Passing function pointer to co_reduce

2015-07-15 Thread Mikael Morin
Hello,

Le 13/07/2015 17:31, Alessandro Fanfarillo a écrit :
> Dear all,
> 
> during the implementation of co_reduce in OpenCoarrays I noticed that
> GFortran passes a pointer to function instead of the function name to
> co_reduce.
> 
More exactly there are two address operators ('&') in a row (the dumping
removes one of them).

> Currently the compiler produces the following call:
> 
> _gfortran_caf_co_reduce (&desc.0, &simple_reduction, 0, 0, 0B, 0B, 0, 0);
> 
> where simple_reduction is the pure function that has to be used by co_reduce.
> 
> The attached patch seems to fix the issue, any comments?
> 
The patch itself looks good to me.
A ChangeLog entry should be provided with it.
The test is missing the usual dejagnu pattern matching directives to
check the generated code.
Do you have commit rights?

Mikael


Re: [PATCH, ARM] stop changing signedness in PROMOTE_MODE

2015-07-15 Thread Michael Matz
Hi,

On Tue, 14 Jul 2015, Jim Wilson wrote:

> Now that we do have the problem, we can't fix it without an ARM port ABI 
> change, which is undesirable, so we may have to fix it with a MI change.

What's the ABI implication of fixing the inconsistency?

> There were two MI changes suggested, one was fixing the out-of-ssa pass 
> to handle SUBREG_PROMOTED_P promotions.  The other was to disallow 
> creating PHI nodes between parms and locals.  I haven't had a chance to 
> try implementing the second one yet; I hope to work on that today.

Don't bother with the latter, it doesn't have a chance of being accepted.

If the terrible hack in outof-ssa really will be necessary (and I really 
really hope it won't) then I think I prefer the approach you partly tried 
in comment #12 of PR 65932 already.  Let partition_to_pseudo[] refer to 
the promoted subreg and deal with that situation in emit_partition_copy; 
I'd then hope that the unsignedsrcp parameter could go away (unfortunately 
the sizeexp will have to stay).


Ciao,
Michael.


Re: Tests for libgomp based on OpenMP Examples 4.0.2

2015-07-15 Thread Ilya Verbin
On Wed, Jul 15, 2015 at 12:29:53 +0200, Dominique d'Humières wrote:
> > The patch replaces all FP comparisons with inequalities and epsilons
> > in those tests for libgomp.
> In libgomp/testsuite/libgomp.fortran/examples-4/simd-8.f90
> 
> integer, parameter :: EPS = 0.005
> 
> should be
> 
> real, parameter :: EPS = 0.005

Committed as obvious.


2015-07-15  Maxim Blumenthal  

* testsuite/libgomp.fortran/examples-4/simd-8.f90: (main): Change type
of EPS parameter from integer to real.
* testsuite/libgomp.fortran/examples-4/task_dep-5.f90: (check): Change
type of EPS parameter from integer to real.


diff --git a/libgomp/testsuite/libgomp.fortran/examples-4/simd-8.f90 
b/libgomp/testsuite/libgomp.fortran/examples-4/simd-8.f90
index ba7b0f9..3c7869d 100644
--- a/libgomp/testsuite/libgomp.fortran/examples-4/simd-8.f90
+++ b/libgomp/testsuite/libgomp.fortran/examples-4/simd-8.f90
@@ -36,7 +36,7 @@ program simd_8f
   implicit none
   real :: pri, arr(1000), diff
   integer :: i
-  integer, parameter :: EPS = 0.005
+  real, parameter :: EPS = 0.005
 
   do i = 1, 1000
  P(i)   = i
diff --git a/libgomp/testsuite/libgomp.fortran/examples-4/task_dep-5.f90 
b/libgomp/testsuite/libgomp.fortran/examples-4/task_dep-5.f90
index f12b42c..0746531 100644
--- a/libgomp/testsuite/libgomp.fortran/examples-4/task_dep-5.f90
+++ b/libgomp/testsuite/libgomp.fortran/examples-4/task_dep-5.f90
@@ -44,7 +44,7 @@ contains
   subroutine check (N, A, B)
 integer :: N
 integer :: i, j
-integer, parameter :: EPS = 0.01
+real, parameter :: EPS = 0.01
 real, dimension(N,N) :: A, B
 real :: diff
 do i = 1, N


  -- Ilya


Re: undefined reference to `acc_copyin_array_h_'

2015-07-15 Thread Sebastian Huber



On 15/07/15 14:16, Thomas Schwinge wrote:

Hallo!

On Tue, 14 Jul 2015 13:47:41 +0200, Sebastian Huber 
 wrote:

[...]

I run the test suite on x86_64-unknown-linux-gnu with the previously
mentioned asserts and got no unexpected failures.

With --disable-linux-futex (without asserts) I got several failures, but
none of them is related to my patch, e.g. they are of the following type

/tmp/ccw4RofR.o: In function `main':
data-already-3.f:(.text+0x56): undefined reference to `acc_copyin_array_h_'

Hmm, I have difficulties seeing how --disable-linux-futex and/or your
libgomp/team.c could have any effect on OpenACC Fortran; I've not yet
been able to reproduce this.  Can you please tell me your complete GCC
configuration command line, etc., as well as cite from libgomp.log a
FAILing invocation command line:


I deleted my build tree and started from scratch with the latest trunk 
version. Now I don't get errors with --disable-linux-futex:


Native configuration is x86_64-unknown-linux-gnu

=== libgomp tests ===

Schedule of variations:
unix

Running target unix
Using /usr/share/dejagnu/baseboards/unix.exp as board description file 
for target.
Using /usr/share/dejagnu/config/unix.exp as generic interface file for 
target.
Using gcc-git/libgomp/testsuite/config/default.exp as 
tool-and-target-specific interface file.

Running gcc-git/libgomp/testsuite/libgomp.c/c.exp ...
Running gcc-git/libgomp/testsuite/libgomp.c++/c++.exp ...
Running gcc-git/libgomp/testsuite/libgomp.fortran/fortran.exp ...
Running gcc-git/libgomp/testsuite/libgomp.graphite/graphite.exp ...
Running gcc-git/libgomp/testsuite/libgomp.oacc-c/c.exp ...
Running gcc-git/libgomp/testsuite/libgomp.oacc-c++/c++.exp ...
Running gcc-git/libgomp/testsuite/libgomp.oacc-fortran/fortran.exp ...

=== libgomp Summary ===

# of expected passes5145
# of expected failures  8
# of unsupported tests  258

Sorry for the confusion, I guess that I somehow damaged my previous 
build tree.


--
Sebastian Huber, embedded brains GmbH

Address : Dornierstr. 4, D-82178 Puchheim, Germany
Phone   : +49 89 189 47 41-16
Fax : +49 89 189 47 41-09
E-Mail  : sebastian.hu...@embedded-brains.de
PGP : Public key available on request.

Diese Nachricht ist keine geschäftliche Mitteilung im Sinne des EHUG.



Re: [PATCH] PR target/66824: Allow software FP SFmode in FP splitter

2015-07-15 Thread H.J. Lu
On Wed, Jul 15, 2015 at 5:05 AM, Uros Bizjak  wrote:
> On Wed, Jul 15, 2015 at 1:36 PM, Uros Bizjak  wrote:
>
>>> I couldn't figure a way to add conditional constraints for "?r/rmF" and
>>> "r/rmF".   I simply disabled *movsf_internal if TARGET_HARD_FP_REGS
>>> is false and added a new "*movsf_internal_soft_fp" pattern.
>>>
>>> OK for trunk if there is no regressions?
>>
>> No. We don't want duplicated patterns.
>> Please add two patterns at the end of the constraint string and use
>
> ... add two *constraints* at the end ...

When I simply added "r/rmF" to the end,  RA saw "?r/rmF" first and
ignored  "r/rmF" at the end.

I also tried:

+(define_register_constraint "Ba"
+ "TARGET_HARD_FP_REGS ? NO_REGS : GENERAL_REGS"
+ "@internal Any integer register when TARGET_HARD_FP_REGS is disbled.")
+
+(define_constraint "Bb"
+  "@internal Memory operand when TARGET_HARD_FP_REGS is disabled."
+  (and (not (match_test "TARGET_HARD_FP_REGS"))
+   (match_operand 0 "memory_operand")))
+
 (define_constraint "Bf"
   "@internal Flags register operand."
   (match_operand 0 "flags_reg_operand"))
@@ -160,6 +171,15 @@
   "@internal GOT memory operand."
   (match_operand 0 "GOT_memory_operand"))

+(define_constraint "Bm"
+  "@internal Memory operand when TARGET_HARD_FP_REGS is enabled."
+  (and (match_test "TARGET_HARD_FP_REGS")
+   (match_operand 0 "memory_operand")))
+
+(define_register_constraint "Br"
+ "TARGET_HARD_FP_REGS ? GENERAL_REGS : NO_REGS"
+ "@internal Any integer register when TARGET_HARD_FP_REGS is enabled.")
+

I couldn't find a way to generate "mov $1234, %eax" without a
separate pattern.

-- 
H.J.


Re: [gomp4] New test loop independent clause

2015-07-15 Thread Tom de Vries

On 15/07/15 14:03, Thomas Schwinge wrote:

Hi Tom!

On Wed, 15 Jul 2015 12:43:05 +0200, Tom de Vries  wrote:

On 15/07/15 11:28, Thomas Schwinge wrote:

On Tue, 14 Jul 2015 14:10:01 -0500, James Norris  
wrote:

The attached adds testing for the independent clause
with the loop directive in Fortran.

Committed to gomp-4_0-branch.



--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-fortran/kernels-independent.f90


Thanks!  I had a look, and per -O2 -ftree-parallelize-loops=32
-fdump-tree-parloops_oacc_kernels-all -fdump-tree-optimized tree dumps,
indeed I see the anticipated »SUCCESS: may be parallelized« as well as
»#pragma omp target oacc_parallel num_gangs(32)« markers, so I think this
test case is being parallelized (as well as it executes correctly).  So,
I'm correct in assuming that this is not showcasing the Fortran issue
that Tom raised yesterday?


The issue I remember raising is: PR66873 - fortran variant of outer-1.c
not parallelized by autopar.


Ah, I mixed that up, sorry.


Anyway, do we want a compiler test case additional to thie libgomp
execution test?



The important thing is to check in C and Fortran for 'SUCCESS: may be 
parallelized, marked independent'.


So I think we want a fortran variant of 
gcc/testsuite/c-c++-common/goacc/kernels-independent.c, in other words, 
a independent clause variant of of 
gcc/testsuite/gfortran.dg/goacc/kernels-loop.f95


Both gcc/testsuite/gfortran.dg/goacc/kernels-loop.f95 and 
gcc/testsuite/c-c++-common/goacc/kernels-loop.c have libgomp 
counterparts, there's no reason to have libgomp counterparts for the 
respective independent

clause variants.

Thanks,
- Tom


Anyway, assuming that it adds value to the
current set of test cases, should this also be made a compiler test case,
with dg-final directives checking for the anticipated properties, similar
to gcc/testsuite/gfortran.dg/goacc/kernels-loop.f95, for example?


@@ -0,0 +1,43 @@
+! { dg-do run } */
+! { dg-additional-options "-cpp" }
+! { dg-additional-options "-ftree-parallelize-loops=32" }
+
+#define N (1024 * 512)
+
+subroutine foo (a,  b,  c)
+  integer, parameter :: n = N
+  integer, dimension (n) :: a
+  integer, dimension (n) :: b
+  integer, dimension (n) :: c
+  integer i, ii
+
+  do i = 1, n
+a(i) = i * 2;
+  end do
+
+  do i = 1, n
+b(i) = i * 4;
+  end do
+
+  !$acc kernels copyin (a(1:n), b(1:n)) copyout (c(1:n))
+!$acc loop independent
+do ii = 1, n
+  c(ii) = a(ii) + b(ii)
+end do
+  !$acc end kernels
+
+  do i = 1, n
+if (c(i) .ne. a(i) + b(i)) call abort
+  end do
+
+end subroutine
+
+program main
+  integer, parameter :: n = N
+  integer :: a(n)
+  integer :: b(n)
+  integer :: c(n)
+
+  call foo (a, b, c)
+
+end program main



Grüße,
  Thomas





[PATCH][AArch64] Use cinc mnemonic for *csinc2_insn

2015-07-15 Thread Kyrill Tkachov

Hi all,
This pattern performs a csinc of the same register in both operands.
This form can be written using the shorter alias CINC.
The ARMv8-A ARM says:

"CINC , , 
is equivalent to
CSINC , , , invert()"

So the patch switches the condition output modifier from 'M' to the inverse 'm'.

I don't think we emit cinc anywhere else in aarch64.md so this will exercise 
the assembler
a tiny bit more (no fallout detected in my testing) and make the output a bit 
more concise.
Again, this is an alias, not a different instruction, so there are no 
performance/behaviour implications

Bootstrapped and tested on aarch64.

Ok for trunk?

Thanks,
Kyrill

2015-07-15  Kyrylo Tkachov  

* config/aarch64/aarch64.md (*csinc2_insn): Use cinc mnemonic.
commit ec05547074dd575471ebafdd10975f438e3361f6
Author: Kyrylo Tkachov 
Date:   Wed Jul 8 11:04:47 2015 +0100

[AArch64] Use cinc mnemonic for *csinc2_insn

diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
index 300537e..57a3360 100644
--- a/gcc/config/aarch64/aarch64.md
+++ b/gcc/config/aarch64/aarch64.md
@@ -3036,7 +3036,7 @@ (define_insn "*csinc2_insn"
 (plus:GPI (match_operand 2 "aarch64_comparison_operation" "")
   (match_operand:GPI 1 "register_operand" "r")))]
   ""
-  "csinc\\t%0, %1, %1, %M2"
+  "cinc\\t%0, %1, %m2"
   [(set_attr "type" "csel")]
 )
 


Re: undefined reference to `acc_copyin_array_h_'

2015-07-15 Thread Thomas Schwinge
Hi!

On Wed, 15 Jul 2015 15:25:19 +0200, Sebastian Huber 
 wrote:
> I deleted my build tree and started from scratch with the latest trunk 
> version. Now I don't get errors with --disable-linux-futex:

> # of expected passes5145
> # of expected failures  8
> # of unsupported tests  258
> 
> Sorry for the confusion, I guess that I somehow damaged my previous 
> build tree.

Alright, then there is no inexplicable mystery to be chased after; thanks
for checking.  :-)


Grüße,
 Thomas


pgppwu_1RGq82.pgp
Description: PGP signature


Re: [PATCH][AArch64] Use cinc mnemonic for *csinc2_insn

2015-07-15 Thread James Greenhalgh
On Wed, Jul 15, 2015 at 02:44:40PM +0100, Kyrill Tkachov wrote:
> Hi all,
> This pattern performs a csinc of the same register in both operands.
> This form can be written using the shorter alias CINC.
> The ARMv8-A ARM says:
> 
> "CINC , , 
> is equivalent to
> CSINC , , , invert()"
> 
> So the patch switches the condition output modifier from 'M' to the inverse 
> 'm'.
> 
> I don't think we emit cinc anywhere else in aarch64.md so this will exercise
> the assembler a tiny bit more (no fallout detected in my testing) and make
> the output a bit more concise.

More importantly this puts us in line with the preferred disassembly, which
I think is a desirable position. If we can ever get here using the
zero register you should really put out cset, but having different
alternatives in this pattern to cover the different aliasing conditions
does seem overkill!

> Again, this is an alias, not a different instruction, so there are no
> performance/behaviour implications
> 
> Bootstrapped and tested on aarch64.
> 
> Ok for trunk?

Yup, OK. Watch out for the mangling of the tab in your proposed ChangeLog.

Thanks,
James

> 
> Thanks,
> Kyrill
> 
> 2015-07-15  Kyrylo Tkachov  
> 
>  * config/aarch64/aarch64.md (*csinc2_insn): Use cinc mnemonic.

> commit ec05547074dd575471ebafdd10975f438e3361f6
> Author: Kyrylo Tkachov 
> Date:   Wed Jul 8 11:04:47 2015 +0100
> 
> [AArch64] Use cinc mnemonic for *csinc2_insn
> 
> diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
> index 300537e..57a3360 100644
> --- a/gcc/config/aarch64/aarch64.md
> +++ b/gcc/config/aarch64/aarch64.md
> @@ -3036,7 +3036,7 @@ (define_insn "*csinc2_insn"
>  (plus:GPI (match_operand 2 "aarch64_comparison_operation" "")
>(match_operand:GPI 1 "register_operand" "r")))]
>""
> -  "csinc\\t%0, %1, %1, %M2"
> +  "cinc\\t%0, %1, %m2"
>[(set_attr "type" "csel")]
>  )
>  



Re: [Patch, fortran] PR 37131, inline matmul

2015-07-15 Thread Thomas Schwinge
Hi!

On Wed, 15 Jul 2015 13:44:38 +0200, Mikael Morin  wrote:
> Le 13/07/2015 21:54, Thomas Schwinge a écrit :
> > Original situation; _gfortran_runtime_error is not being properly
> > declared (invoked via gcc/fortran/frontend-passes.c:runtime_error_ne),
> > but, for example, _gfortran_error_runtime_at is being properly declared
> > (invoked from elsewhere):
> 
> normal (non-frontend-generated) code does through a 'resolution' step
> which after some checks may resolve procedure names to intrinsic
> procedure symbols (user procedure with same name as an intrinsic are
> allowed).
> So maybe what you miss is such a resolution step.
> Does it work if you add
>   resolve_block_construct (*c);
> at the end of inline_matmul_assign?

Hmm, that doesn't seem to work, or I've done it wrongly.  Any other
ideas?


Grüße,
 Thomas


pgpS5JW4bLvf5.pgp
Description: PGP signature


Re: [PATCH][AArch64] Use cinc mnemonic for *csinc2_insn

2015-07-15 Thread Kyrill Tkachov


On 15/07/15 15:00, James Greenhalgh wrote:

On Wed, Jul 15, 2015 at 02:44:40PM +0100, Kyrill Tkachov wrote:

Hi all,
This pattern performs a csinc of the same register in both operands.
This form can be written using the shorter alias CINC.
The ARMv8-A ARM says:

"CINC , , 
is equivalent to
CSINC , , , invert()"

So the patch switches the condition output modifier from 'M' to the inverse 'm'.

I don't think we emit cinc anywhere else in aarch64.md so this will exercise
the assembler a tiny bit more (no fallout detected in my testing) and make
the output a bit more concise.

More importantly this puts us in line with the preferred disassembly, which
I think is a desirable position. If we can ever get here using the
zero register you should really put out cset, but having different
alternatives in this pattern to cover the different aliasing conditions
does seem overkill!


The predicate here is a register_operand so a (const_int 0)
matching the zero register would never go into this pattern, I think.

I would like it if we output the compact forms in the csneg3, csinv3, csinc3
patterns whenever the register numbers match up, but it would require making
the output templates more verbose, which could hurt the readability of the 
patterns.





Again, this is an alias, not a different instruction, so there are no
performance/behaviour implications

Bootstrapped and tested on aarch64.

Ok for trunk?

Yup, OK. Watch out for the mangling of the tab in your proposed ChangeLog.


Thanks, committed with r225830.

Kyrill



Thanks,
James


Thanks,
Kyrill

2015-07-15  Kyrylo Tkachov  

  * config/aarch64/aarch64.md (*csinc2_insn): Use cinc mnemonic.
commit ec05547074dd575471ebafdd10975f438e3361f6
Author: Kyrylo Tkachov 
Date:   Wed Jul 8 11:04:47 2015 +0100

 [AArch64] Use cinc mnemonic for *csinc2_insn

diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
index 300537e..57a3360 100644
--- a/gcc/config/aarch64/aarch64.md
+++ b/gcc/config/aarch64/aarch64.md
@@ -3036,7 +3036,7 @@ (define_insn "*csinc2_insn"
  (plus:GPI (match_operand 2 "aarch64_comparison_operation" "")
(match_operand:GPI 1 "register_operand" "r")))]
""
-  "csinc\\t%0, %1, %1, %M2"
+  "cinc\\t%0, %1, %m2"
[(set_attr "type" "csel")]
  )
  




Re: [PATCH][C++] Fix PR65091

2015-07-15 Thread Jason Merrill

On 07/14/2015 07:29 PM, Paolo Carlini wrote:

I see. At the time I had this second try:

 https://gcc.gnu.org/ml/gcc-patches/2015-06/msg01232.html

which has the action in cp_parser_unqualified_id but unfortunately is
more complex. Certainly we can't just return inconditionally
error_mark_node in that case, we have at least these regressions, a few
accepts-invalid:


I think the problem here was that you can't just return error_mark_node 
without also forcing a parser error, or you get accepts-invalids.  I 
played around with it and came up with the following:


Tested x86_64-pc-linux-gnu, applying to trunk.

commit 25d5216505bfc4b922ce3e9627ed6ad223f348e1
Author: Jason Merrill 
Date:   Tue Jul 14 14:34:00 2015 -0400

	PR c++/65091
	* parser.c (cp_parser_unqualified_id): Don't accept ~x in a
	template if there is no type x in scope.

diff --git a/gcc/cp/parser.c b/gcc/cp/parser.c
index 574ffba..f1d5656 100644
--- a/gcc/cp/parser.c
+++ b/gcc/cp/parser.c
@@ -5162,8 +5162,15 @@ cp_parser_unqualified_id (cp_parser* parser,
 	if (processing_template_decl
 		&& ! cp_parser_parse_definitely (parser))
 	  {
-		/* We couldn't find a type with this name, so just accept
-		   it and check for a match at instantiation time.  */
+		/* We couldn't find a type with this name.  If we're parsing
+		   tentatively, fail and try something else.  */
+		if (cp_parser_uncommitted_to_tentative_parse_p (parser))
+		  {
+		cp_parser_simulate_error (parser);
+		return error_mark_node;
+		  }
+		/* Otherwise, accept it and check for a match at instantiation
+		   time.  */
 		type_decl = cp_parser_identifier (parser);
 		if (type_decl != error_mark_node)
 		  type_decl = build_nt (BIT_NOT_EXPR, type_decl);
diff --git a/gcc/testsuite/g++.dg/parse/dtor17.C b/gcc/testsuite/g++.dg/parse/dtor17.C
new file mode 100644
index 000..1fca413
--- /dev/null
+++ b/gcc/testsuite/g++.dg/parse/dtor17.C
@@ -0,0 +1,11 @@
+// PR c++/65091
+// { dg-do compile { target c++11 } }
+
+template
+auto foo(T x) -> decltype(~x) {
+  return ~x;
+}
+
+int bar() {
+  return foo(10);
+}


Re: [PATCH 4/5] Downgrade value_expr_for_decl to non-cache

2015-07-15 Thread Michael Matz
Hi,

On Tue, 14 Jul 2015, Richard Biener wrote:

> For example have those special caches have two marking phases. The first 
> phase marks all non-key edges originating from each entry. The second 
> phase is the same as what we have now - unmarked entries get removed.
> 
> The first phase would go with regular marking, the second when 
> processing caches.
> 
> You'd delay collecting the memory the non-key edges point to
> to the next GC run, but I think that's a fair trade-off.

That's Toms other approach with supporting multi-step dependencies.  As I 
have tried to argue in the other thread, I think this idea is 
fundamentally broken and just hides real bugs, and I don't see why this 
would be different for this particular hash-map.  If the value of this 
hash refers to a decl that isn't mentioned anywhere else except from this 
hash entry, then it has no meaning anymore, and hence shouldn't itself be 
part of the hash anymore.


Ciao,
Michael.


Re: [PATCH 4/5] Downgrade value_expr_for_decl to non-cache

2015-07-15 Thread Jakub Jelinek
On Wed, Jul 15, 2015 at 04:14:07PM +0200, Michael Matz wrote:
> That's Toms other approach with supporting multi-step dependencies.  As I 
> have tried to argue in the other thread, I think this idea is 
> fundamentally broken and just hides real bugs, and I don't see why this 
> would be different for this particular hash-map.  If the value of this 
> hash refers to a decl that isn't mentioned anywhere else except from this 
> hash entry, then it has no meaning anymore, and hence shouldn't itself be 
> part of the hash anymore.

You mean key or value?  The value of course can mention various trees that
aren't referenced from anywhere else, and it has meaning.

Jakub


Re: [PATCH] Add 'switch' statement to match.pd language

2015-07-15 Thread Michael Matz
Hi,

On Tue, 14 Jul 2015, Richard Biener wrote:

> I know Micha detests the extra 'if' as much as the extra braces thus
> would have prefered
> 
>  (switch
>   (A) B
>   (B) C
>   (C) D
>   E)

The lispy way would have been

   (switch
(A) (B)
(C) (D)
(E) (F)
G)

i.e. parenthesize the result as well, which then would be unambiguously 
parsable.  But hey, it's your language ;)


Ciao,
Michael.


Re: [PATCH 4/5] Downgrade value_expr_for_decl to non-cache

2015-07-15 Thread Michael Matz
Hi,

On Wed, 15 Jul 2015, Jakub Jelinek wrote:

> On Wed, Jul 15, 2015 at 04:14:07PM +0200, Michael Matz wrote:
> > That's Toms other approach with supporting multi-step dependencies.  As I 
> > have tried to argue in the other thread, I think this idea is 
> > fundamentally broken and just hides real bugs, and I don't see why this 
> > would be different for this particular hash-map.  If the value of this 
> > hash refers to a decl that isn't mentioned anywhere else except from this 
> > hash entry, then it has no meaning anymore, and hence shouldn't itself be 
> > part of the hash anymore.
> 
> You mean key or value?  The value of course can mention various trees that
> aren't referenced from anywhere else, and it has meaning.

No, I really meant value.  If you think it has meaning, then tell me what 
it is for DECL_VALUE_EXPR (X) to be 'Y', if Y is nowhere else mentioned, 
neither in code, nor in local-decls, nor in globals, or anywhere else that 
would be reachable by GC.


Ciao,
Michael.


Re: [gomp4] libgomp: Cope with DejaGnu having no mechanism to transfer environment variables to remote boards

2015-07-15 Thread Thomas Schwinge
Hi!

(CCed to gcc-patches in case this is useful for anyone else, too.)

Nathan noticed that the following patch ("hack") no longer applies
cleanly to GCC trunk:

On Thu, 14 May 2015 00:10:50 +0200, I wrote:
> On Fri, 24 Oct 2014 00:18:10 +0200, I wrote:
> > On Thu, 23 Oct 2014 17:02:20 +0200, I wrote:
> > > On Fri, 16 May 2014 20:01:47 +0200, I wrote:
> > > > On Fri, 16 May 2014 19:02:41 +0200, Tom de Vries 
> > > >  wrote:
> > > > > Essentially I want to run the set of testcases in libgomp.oacc-c, 
> > > > > similar to how
> > > > > it is done in libgomp.oacc-c/c.exp, with the environment variable
> > > > > ACC_DEVICE_TYPE set to host for the executable.
> > > > 
> > > > Thanks for working on this; indeed the idea is that we run the libgomp
> > > > execution tests is as many configurations as possible -- sort of how
> > > > certain classes of GCC tests are run for many different compiler options
> > > > (optimization levels).  I suggest this to be implemented in the existing
> > > > libgomp.*/*.exp files, by setting up some suitable looping over all
> > > > supported accelerator targets (currently just the one GCC has been
> > > > configured with) as well as host fallback, with suitable tags being 
> > > > added
> > > > to the PASS/FAIL lines.  It appears to make sense to first separate any
> > > > libgomp tests that test "traditional" OpenMP shared-memory, so don't run
> > > > on acceleration devices; these of course only need to be run once.
> > > > 
> > > > > I'm not sure setenv is guaranteed to work.
> > > 
> > > Indeed that does not work for non-native testing, which is what we're
> > > doing [...]...  That means, we can't do any testing of
> > > offloading -- which is "a bit" unfortunate.
> 
> > > > > I found this related discussion:
> > > > > http://marc.info/?t=12162271871&r=1&w=2 .
> > > > 
> > > > Is it just the shell quoting issue that remains?  That's fixable.
> > > 
> > > Instead of going this route (the approach has later been disputed
> > > upstream), I cooked up something else.  This is clearly a hack, and I
> > > hope to get rid of that as soon as possible.  But it works.
> > 
> > Here it is.  Don't look closely -- you've been warned.  My hope is that
> > we'll be able to get rid of that rather sooner than later:
> > [...] for this use of ACC_DEVICE_TYPE, I hope that soon we'll
> > get a compiler option to specify the default offloading device.  (Intel
> > are working into this direction, upstream, -foffload=[...].)
> 
> (That -foffload=[...] option exists, but we're not yet (able) to use it
> as desired.)  See also the discussion in
> ,
> ,
> and so on.
> 
> > Until then: committed [...], with disgust.
> 
> Until then: committed to gomp-4_0-branch in r223185, with disgust.
> 
> commit 5803a3376041706672baede225a2dc47d70dd9a1
> Author: tschwinge 
> Date:   Wed May 13 22:07:27 2015 +
> 
> libgomp: Cope with DejaGnu having no mechanism to transfer environment 
> variables to remote boards

..., so I resolved the merge conflicts; here is the patch (untested, but
should be fine) rebased onto current trunk r225829:

commit 31d8e05087bf129c1afe7be6593311d204393c09
Author: Thomas Schwinge 
Date:   Wed Jul 15 16:11:23 2015 +0200

libgomp: Cope with DejaGnu having no mechanism to transfer environment 
variables to remote boards

No doubt, looking forward to the day, when this can be reverted.

Port gomp-4_0-branch r223185:

libgomp/
* env.c (initialize_env): Remove static attribute.
* libgomp.map (INTERNAL): Export initialize_env.
* testsuite/lib/libgomp.exp (libgomp_init): Build a few object
files to pre-set environment variables.
(ALWAYS_CFLAGS): Add constructor-setenv-defaults.o to ldflags.
(libgomp_target_compile): Don't set the compiler.
* testsuite/libgomp.c++/c++.exp (GXX_UNDER_TEST): Provide default.
(libgomp_compile_options): Set the compiler.
* testsuite/libgomp.c/c.exp (libgomp_compile_options): Set the
compiler.
* testsuite/libgomp.fortran/fortran.exp (GFORTRAN_UNDER_TEST):
Provide default.
(libgomp_compile_options): Set the compiler.
* testsuite/libgomp.graphite/graphite.exp
(libgomp_compile_options): Set the compiler.
* testsuite/libgomp.oacc-c++/c++.exp (SAVE_GCC_UNDER_TEST)
(GCC_UNDER_TEST): Don't set.
(GXX_UNDER_TEST): Provide default.
(libgomp_compile_options): Set the compiler.
(ALWAYS_CFLAGS): Add the respective
constructor-setenv-ACC_DEVICE_TYPE-$offload_target_openacc.o to
ldflags.
(ACC_DEVICE_TYPE): Don't set.
* testsuite/libgomp.oacc-c/c.exp (libgomp_compile_options): Set
the compiler.
(ALWAYS_CFLAGS): Add the respective
constructor-setenv-ACC_DEVICE_

Re: [PATCH 4/5] Downgrade value_expr_for_decl to non-cache

2015-07-15 Thread Jakub Jelinek
On Wed, Jul 15, 2015 at 04:25:44PM +0200, Michael Matz wrote:
> On Wed, 15 Jul 2015, Jakub Jelinek wrote:
> 
> > On Wed, Jul 15, 2015 at 04:14:07PM +0200, Michael Matz wrote:
> > > That's Toms other approach with supporting multi-step dependencies.  As I 
> > > have tried to argue in the other thread, I think this idea is 
> > > fundamentally broken and just hides real bugs, and I don't see why this 
> > > would be different for this particular hash-map.  If the value of this 
> > > hash refers to a decl that isn't mentioned anywhere else except from this 
> > > hash entry, then it has no meaning anymore, and hence shouldn't itself be 
> > > part of the hash anymore.
> > 
> > You mean key or value?  The value of course can mention various trees that
> > aren't referenced from anywhere else, and it has meaning.
> 
> No, I really meant value.  If you think it has meaning, then tell me what 
> it is for DECL_VALUE_EXPR (X) to be 'Y', if Y is nowhere else mentioned, 
> neither in code, nor in local-decls, nor in globals, or anywhere else that 
> would be reachable by GC.

Pretty much anything, DECL_VALUE_EXPR (X) is some expression.
It can be some_var[some_other_var], *some_var, ptr->foo, etc.
just to list a few of the ones currently in use.
DECL_DEBUG_EXPR can also be __imag__ somevar, __real__ somevar,
something.field, etc.

Jakub


Re: [PATCH] remove some usage of expr_list from read_rtx

2015-07-15 Thread Trevor Saunders
On Mon, Jul 13, 2015 at 02:43:46PM +0100, Richard Sandiford wrote:
> Nice clean-up :-)
> 
> tbsaunde+...@tbsaunde.org writes:
> > @@ -2248,11 +2249,14 @@ process_define_subst (void)
> >  static void
> >  rtx_handle_directive (int lineno, const char *rtx_name)
> >  {
> > -  rtx queue, x;
> > +  auto_vec subrtxs;
> > +  if (!read_rtx (rtx_name, &subrtxs))
> > +return;
> 
> Very minor, but many iterators are over powers of 2, so it seems unlikely
> that 10 would be a good stack/heap cut-off point.  How about 16 or 32
> instead?
> 
> OK with that change, thanks.

changed to 32 and committed, thanks!

Trev

> 
> Richard
> 


Re: [PATCH 4/5] Downgrade value_expr_for_decl to non-cache

2015-07-15 Thread Michael Matz
Hi,

On Wed, 15 Jul 2015, Jakub Jelinek wrote:

> > No, I really meant value.  If you think it has meaning, then tell me 
> > what it is for DECL_VALUE_EXPR (X) to be 'Y', if Y is nowhere else 
> > mentioned, neither in code, nor in local-decls, nor in globals, or 
> > anywhere else that would be reachable by GC.
> 
> Pretty much anything, DECL_VALUE_EXPR (X) is some expression.
> It can be some_var[some_other_var], *some_var, ptr->foo, etc.
> just to list a few of the ones currently in use.

Yes, I know all that.  I haven't made myself clear, if 'Y' above is a 
decl, and it's mentioned nowhere else (so it has no place and has no 
value), then what meaning could possibly be given to DECL_VALUE_EXPR(X) if 
it were 'Y'?

Similar for "ptr->foo" if "ptr" is nowhere mentioned in code or tables.  
In effect DECL_VALUE_EXPR refers to stale decls that aren't initialized, 
aren't given a place and aren't dealt with in code.

> DECL_DEBUG_EXPR can also be __imag__ somevar, __real__ somevar, 
> something.field, etc.

Sure, and the same applies, if "something" is a stale decl, then what's 
the meaning of "something.field" in DECL_DEBUG_EXPR?


Ciao,
Michael.


[gomp4.1] C++ iterators with #omp ordered depend(sink:)

2015-07-15 Thread Aldy Hernandez
This fixes the problem with C++ iterators not working as sink() iterator 
variables.


OK for branch?

Aldy
commit feb44bd0b32a941092441206af9157cb45995d81
Author: Aldy Hernandez 
Date:   Tue Jul 14 19:23:09 2015 -0700

* gimplify.c (gimplify_omp_for): Use OMP_FOR_ORIG_DECLS.
* tree.def (omp_for): Add new operand.
* tree.h (OMP_FOR_ORIG_DECLS): New macro.
c-family/
* c-common.h (c_finish_omp_for): Add argument.
* c-omp.c (c_finish_omp_for): Set OMP_FOR_ORIG_DECLS.
cp/
* semantics.c (finish_omp_for): Pass original DECLs to
c_finish_omp_for.
c/
* c-parser.c (c_parser_omp_for_loop): Pass new argument to
c_finish_omp_for.

diff --git a/gcc/c-family/c-common.h b/gcc/c-family/c-common.h
index 202c8f9..7e857c3 100644
--- a/gcc/c-family/c-common.h
+++ b/gcc/c-family/c-common.h
@@ -1245,7 +1245,7 @@ extern void c_finish_omp_flush (location_t);
 extern void c_finish_omp_taskwait (location_t);
 extern void c_finish_omp_taskyield (location_t);
 extern tree c_finish_omp_for (location_t, enum tree_code, tree, tree, tree,
- tree, tree, tree);
+ tree, tree, tree, tree);
 extern tree c_finish_oacc_wait (location_t, tree, tree);
 extern void c_omp_split_clauses (location_t, enum tree_code, omp_clause_mask,
 tree, tree *);
diff --git a/gcc/c-family/c-omp.c b/gcc/c-family/c-omp.c
index f020a80..81aef7a 100644
--- a/gcc/c-family/c-omp.c
+++ b/gcc/c-family/c-omp.c
@@ -432,6 +432,10 @@ c_omp_for_incr_canonicalize_ptr (location_t loc, tree 
decl, tree incr)
 
 /* Validate and generate OMP_FOR.
DECLV is a vector of iteration variables, for each collapsed loop.
+
+   ORIG_DECLV, if non-NULL, is a vector with the original iteration
+   variables (prior to any transformations, by say, C++ iterators).
+
INITV, CONDV and INCRV are vectors containing initialization
expressions, controlling predicates and increment expressions.
BODY is the body of the loop and PRE_BODY statements that go before
@@ -439,7 +443,8 @@ c_omp_for_incr_canonicalize_ptr (location_t loc, tree decl, 
tree incr)
 
 tree
 c_finish_omp_for (location_t locus, enum tree_code code, tree declv,
- tree initv, tree condv, tree incrv, tree body, tree pre_body)
+ tree orig_declv, tree initv, tree condv, tree incrv,
+ tree body, tree pre_body)
 {
   location_t elocus;
   bool fail = false;
@@ -678,6 +683,7 @@ c_finish_omp_for (location_t locus, enum tree_code code, 
tree declv,
   OMP_FOR_INCR (t) = incrv;
   OMP_FOR_BODY (t) = body;
   OMP_FOR_PRE_BODY (t) = pre_body;
+  OMP_FOR_ORIG_DECLS (t) = orig_declv;
 
   SET_EXPR_LOCATION (t, locus);
   return add_stmt (t);
diff --git a/gcc/c/c-parser.c b/gcc/c/c-parser.c
index 90486d2..0a42072 100644
--- a/gcc/c/c-parser.c
+++ b/gcc/c/c-parser.c
@@ -13696,7 +13696,7 @@ c_parser_omp_for_loop (location_t loc, c_parser 
*parser, enum tree_code code,
  an error from the initialization parsing.  */
   if (!fail)
 {
-  stmt = c_finish_omp_for (loc, code, declv, initv, condv,
+  stmt = c_finish_omp_for (loc, code, declv, NULL, initv, condv,
   incrv, body, NULL);
   if (stmt)
{
diff --git a/gcc/cp/semantics.c b/gcc/cp/semantics.c
index 5e7a94d..5a4638d 100644
--- a/gcc/cp/semantics.c
+++ b/gcc/cp/semantics.c
@@ -7333,6 +7333,7 @@ finish_omp_for (location_t locus, enum tree_code code, 
tree declv, tree initv,
   if (processing_template_decl)
 orig_incr = make_tree_vec (TREE_VEC_LENGTH (incrv));
 
+  tree orig_declv = copy_node (declv);
   for (i = 0; i < TREE_VEC_LENGTH (declv); )
 {
   decl = TREE_VEC_ELT (declv, i);
@@ -7430,8 +7431,8 @@ finish_omp_for (location_t locus, enum tree_code code, 
tree declv, tree initv,
   if (code == CILK_FOR && !processing_template_decl)
 block = push_stmt_list ();
 
-  omp_for = c_finish_omp_for (locus, code, declv, initv, condv, incrv,
- body, pre_body);
+  omp_for = c_finish_omp_for (locus, code, declv, orig_declv, initv, condv,
+ incrv, body, pre_body);
 
   if (omp_for == NULL)
 {
diff --git a/gcc/gimplify.c b/gcc/gimplify.c
index 9ba3f37..90df326 100644
--- a/gcc/gimplify.c
+++ b/gcc/gimplify.c
@@ -7311,7 +7311,11 @@ gimplify_omp_for (tree *expr_p, gimple_seq *pre_p)
   gcc_assert (DECL_P (decl));
   gcc_assert (INTEGRAL_TYPE_P (TREE_TYPE (decl))
  || POINTER_TYPE_P (TREE_TYPE (decl)));
-  gimplify_omp_ctxp->iter_vars.quick_push (decl);
+  if (OMP_FOR_ORIG_DECLS (for_stmt))
+   gimplify_omp_ctxp->iter_vars.quick_push
+ (TREE_VEC_ELT (OMP_FOR_ORIG_DECLS (for_stmt), i));
+  else
+   gimplify_omp_ctxp->iter_vars.quick_push (decl);
 
   /* Make sure the iteration variable is private.  */
   tree c = NULL_TREE;
diff --git a/gcc/tree.def b/gcc/tree.def
index 6703b

Re: [PATCH 4/5] Downgrade value_expr_for_decl to non-cache

2015-07-15 Thread Michael Matz
Hi,

On Wed, 15 Jul 2015, Michael Matz wrote:

> Similar for "ptr->foo" if "ptr" is nowhere mentioned in code or tables.  
> In effect DECL_VALUE_EXPR refers to stale decls that aren't initialized, 
> aren't given a place and aren't dealt with in code.

Or, maybe we're talking past each other.  You mean the case where 
complicated-expr-on-Y is the value-expr, and Y is _no_ stale decl, but the 
complicated expr itself nevertheless is mentioned nowhere else?  Yes, 
those trees must be retained, I was only talking about the stale-decl 
cases.


Ciao,
Michael.


Re: [PATCH] Add 'switch' statement to match.pd language

2015-07-15 Thread Richard Biener
On July 15, 2015 4:21:03 PM GMT+02:00, Michael Matz  wrote:
>Hi,
>
>On Tue, 14 Jul 2015, Richard Biener wrote:
>
>> I know Micha detests the extra 'if' as much as the extra braces thus
>> would have prefered
>> 
>>  (switch
>>   (A) B
>>   (B) C
>>   (C) D
>>   E)
>
>The lispy way would have been
>
>   (switch
>(A) (B)
>(C) (D)
>(E) (F)
>G)
>
>i.e. parenthesize the result as well, which then would be unambiguously

That's just atoms vs. Expressions.  Like
(Plus @0 @1) vs. Plain @1.  So you suggest to require ((plus @0 @1)) here to 
make it unambiguous?

>parsable.  But hey, it's your language ;)
>
>
>Ciao,
>Michael.




Re: [PATCH 4/5] Downgrade value_expr_for_decl to non-cache

2015-07-15 Thread Richard Biener
On July 15, 2015 4:52:41 PM GMT+02:00, Michael Matz  wrote:
>Hi,
>
>On Wed, 15 Jul 2015, Michael Matz wrote:
>
>> Similar for "ptr->foo" if "ptr" is nowhere mentioned in code or
>tables.  
>> In effect DECL_VALUE_EXPR refers to stale decls that aren't
>initialized, 
>> aren't given a place and aren't dealt with in code.
>
>Or, maybe we're talking past each other.  You mean the case where 
>complicated-expr-on-Y is the value-expr, and Y is _no_ stale decl, but
>the 
>complicated expr itself nevertheless is mentioned nowhere else?  Yes, 
>those trees must be retained, I was only talking about the stale-decl 
>cases.

Yes.  And that's the case where we still want to collect the decl  if it is not 
mentioned anywhere else.  Thus use a cache map.  But then we'll ICE on the new 
sanity check.

>
>Ciao,
>Michael.




Re: [PATCH 4/5] Downgrade value_expr_for_decl to non-cache

2015-07-15 Thread Jakub Jelinek
On Wed, Jul 15, 2015 at 04:52:41PM +0200, Michael Matz wrote:
> On Wed, 15 Jul 2015, Michael Matz wrote:
> 
> > Similar for "ptr->foo" if "ptr" is nowhere mentioned in code or tables.  
> > In effect DECL_VALUE_EXPR refers to stale decls that aren't initialized, 
> > aren't given a place and aren't dealt with in code.
> 
> Or, maybe we're talking past each other.  You mean the case where 
> complicated-expr-on-Y is the value-expr, and Y is _no_ stale decl, but the 
> complicated expr itself nevertheless is mentioned nowhere else?  Yes, 
> those trees must be retained, I was only talking about the stale-decl 
> cases.

I meant primarly all those ADDR_EXPRs, MEM_REFs, INDIRECT_REFs, ARRAY_REFs,
etc.  For referenced decls in there if they aren't referenced from anywhere
else, not 100% sure about it.  DECL_VALUE_EXPR is mainly used during
gimplification, and then for debug info production, DECL_DEBUG_EXPR too.
For debug info, I bet if the underlying decls referenced in there will not
exist, then we'd just give up in debug info generation of the particular
object, but wonder if gcc won't ICE if you have the flag bits like
DECL_HAS_VALUE_EXPR_P set and then DECL_VALUE_EXPR of NULL, if you'd
GC them.

Another option for GC these might be to gc walk DECL_VALUE_EXPR of
decls that have DECL_HAS_VALUE_EXPR_P set, similarly for DECL_DEBUG_EXPR,
and then treat those hash maps as pure caches, entries with unmarked
keys would be removed, and no walking of trees referenced from the hash map
would be performed directly.

Jakub


Re: [PATCH] Add 'switch' statement to match.pd language

2015-07-15 Thread Michael Matz
Hi,

On Wed, 15 Jul 2015, Richard Biener wrote:

> >>  (switch
> >>   (A) B
> >>   (B) C
> >>   (C) D
> >>   E)
> >
> >The lispy way would have been
> >
> >   (switch
> >(A) (B)
> >(C) (D)
> >(E) (F)
> >G)
> >
> >i.e. parenthesize the result as well, which then would be unambiguously
> 
> That's just atoms vs. Expressions.

But if the result is no atom, you'd want parentheses.  Similar if the 
condition is no expression but an atom, you'd want to leave out 
parentheses as well.  My point is, that both condition and result are at 
the same level, and hence should be subject to the same parenthesis rules, 
namely: surrounding parens by default, optional for atoms.

> Like (Plus @0 @1) vs. Plain @1.  So you suggest to require ((plus @0 
> @1)) here to make it unambiguous?

No :)  Just look at your example again:

 (switch
  (A) B
 )

Both A and B are at the same level, and are both expressions, but still 
you parenthesize them differently; that can't be right.  You also don't 
write

 (switch
  ((plus @0 @1))  (@0)
 )

You write

 (switch
  (plus @0 @1) (@0)
 )

And as syntactic sugar you are allowed to leave out the parens around @0 
as it's an atom:

 (switch
  (plus @0 @1) @0
 )

Similar, if the condition is an atom you should be able to leave the 
parens away:

 (switch
  cond (minus @0 @1)
 )

(given a predicate 'cond' defined appropriately).


Ciao,
Michael.


Re: [PATCH 4/5] Downgrade value_expr_for_decl to non-cache

2015-07-15 Thread Michael Matz
Hi,

On Wed, 15 Jul 2015, Richard Biener wrote:

> >Or, maybe we're talking past each other.  You mean the case where 
> >complicated-expr-on-Y is the value-expr, and Y is _no_ stale decl, but 
> >the complicated expr itself nevertheless is mentioned nowhere else?  
> >Yes, those trees must be retained, I was only talking about the 
> >stale-decl cases.
> 
> Yes.  And that's the case where we still want to collect the decl if it 
> is not mentioned anywhere else.  Thus use a cache map.  But then we'll 
> ICE on the new sanity check.

Like Jakubs mentioned, that can (and probably should) better be fixed by 
walking DECL_VALUE_EXPR from the decl walker, as if it were a normal 
member (likewise for the other maps, if necessary).  It's not necessary to 
fiddle with the hash-table markers itself.


Ciao,
Michael.


Re: [gomp4.1] C++ iterators with #omp ordered depend(sink:)

2015-07-15 Thread Jakub Jelinek
On Wed, Jul 15, 2015 at 07:47:12AM -0700, Aldy Hernandez wrote:
> This fixes the problem with C++ iterators not working as sink() iterator
> variables.
> 
> OK for branch?

I wonder (though not 100% sure about) if, because we really need the
OMP_FOR_ORIG_DECLS on OMP_FOR only, no other construct can be ordered,
it wouldn't make more sense to make
#define OMP_FOR_ORIG_DECLS(NODE)   TREE_OPERAND (OMP_FOR_CHECK (NODE), 6)
(note, FOR rather than LOOP) and only use 7 arguments on OMP_FOR and
not all the other loop constructs.  That would of course mean
guarding all setters and users of OMP_FOR_ORIG_DECLS (x) with
if (TREE_CODE (x) == OMP_FOR), but that is just 2 locations.

> --- a/gcc/gimplify.c
> +++ b/gcc/gimplify.c
> @@ -7311,7 +7311,11 @@ gimplify_omp_for (tree *expr_p, gimple_seq *pre_p)
>gcc_assert (DECL_P (decl));
>gcc_assert (INTEGRAL_TYPE_P (TREE_TYPE (decl))
> || POINTER_TYPE_P (TREE_TYPE (decl)));
> -  gimplify_omp_ctxp->iter_vars.quick_push (decl);
> +  if (OMP_FOR_ORIG_DECLS (for_stmt))
> + gimplify_omp_ctxp->iter_vars.quick_push
> +   (TREE_VEC_ELT (OMP_FOR_ORIG_DECLS (for_stmt), i));
> +  else
> + gimplify_omp_ctxp->iter_vars.quick_push (decl);

No matter the decision about above, I think you want to quick_push
here twice for each decl (or have two vectors, but a single one is probably
better), and rewrite all uses of the orig decl to the new decl.
Note, gimplify_omp_for can also do similar replacement of an orig decl
with a new decl (even for OMP_FOR_ORIG_DECLS == NULL case aka C),
when the iterator var is addressable.

> +#pragma omp parallel for ordered(1)
> +  for (it = v.begin(); it < v.end(); ++it)
> +{
> +#pragma omp ordered depend(sink:it)
> +std::cout << *it << '\n';
> +}
> +}

I'd try to avoid adding such blatanly wrong testcases, even just
for parsing, perhaps one day we'll want to diagnose it.
Depending on the current iteration is just wrong (like on a future one).
And with no depend(source) it will just hang.
So, can you make it depend(sink:it-1) and add
#pragma omp ordered depend(source) after it?

Jakub


Re: [gomp4.1] Support C++ "this" in OpenMP directives

2015-07-15 Thread Jakub Jelinek
On Wed, Jul 15, 2015 at 03:47:54PM +0300, Ilya Verbin wrote:
> On Thu, Jul 09, 2015 at 10:50:14 +0200, Jakub Jelinek wrote:
> > * parser.c (cp_parser_omp_var_list_no_open): Parse this.
> > * cp-tree.h (finish_omp_declare_simd_methods): New prototype.
> > * semantics.c (handle_omp_array_sections_1): Disallow this based
> > array sections for OpenMP.
> > (finish_omp_declare_simd_methods): New function.
> > (finish_omp_clauses): Don't attempt to adjust linear step of
> > this if it points to TYPE_BEING_DEFINED.  Disallow this in
> > all clauses expecting variable lists, except for declare simd
> > linear/uniform/aligned clauses.
> > (finish_struct_1): Call finish_omp_declare_simd_methods.
> > 
> > * g++.dg/vect/simd-clone-2.cc: New test.
> > * g++.dg/vect/simd-clone-2.h: New file.
> > * g++.dg/vect/simd-clone-3.cc: New test.
> > * g++.dg/vect/simd-clone-4.cc: New test.
> > * g++.dg/vect/simd-clone-4.h: New file.
> > * g++.dg/vect/simd-clone-5.cc: New test.
> > * g++.dg/gomp/this-1.C: New test.
> > * g++.dg/gomp/this-2.C: New test.
> 
> One more warning:
> 
> gcc/cp/parser.c: In function ‘tree_node* 
> cp_parser_omp_var_list_no_open(cp_parser*, omp_clause_code, tree, bool*)’:
> gcc/cp/parser.c:27931:26: error: ‘name’ may be used uninitialized in this 
> function [-Werror=maybe-uninitialized]
>   token->location);
>   ^
> cc1plus: all warnings being treated as errors
> make[4]: *** [cp/parser.o] Error 1

Thanks, here is a fix:

2015-07-15  Jakub Jelinek  

* parser.c (cp_parser_omp_var_list_no_open): Don't process
RID_THIS for kind == 0.  Don't call cp_parser_name_lookup_error
if finish_this_expr returned error_mark_node.

--- gcc/parser.c(revision 225817)
+++ gcc/parser.c(working copy)
@@ -27910,7 +27910,9 @@ cp_parser_omp_var_list_no_open (cp_parse
   tree name, decl;
 
   token = cp_lexer_peek_token (parser->lexer);
-  if (current_class_ptr && cp_parser_is_keyword (token, RID_THIS))
+  if (kind != 0
+ && current_class_ptr
+ && cp_parser_is_keyword (token, RID_THIS))
{
  decl = finish_this_expr ();
  if (TREE_CODE (decl) == NON_LVALUE_EXPR
@@ -27929,10 +27931,12 @@ cp_parser_omp_var_list_no_open (cp_parse
goto skip_comma;
 
  decl = cp_parser_lookup_name_simple (parser, name, token->location);
+ if (decl == error_mark_node)
+   cp_parser_name_lookup_error (parser, name, decl, NLE_NULL,
+token->location);
}
   if (decl == error_mark_node)
-   cp_parser_name_lookup_error (parser, name, decl, NLE_NULL,
-token->location);
+   ;
   else if (kind != 0)
{
  switch (kind)


Jakub


Re: [PATCH, ARM] stop changing signedness in PROMOTE_MODE

2015-07-15 Thread Jim Wilson
On Wed, Jul 15, 2015 at 6:04 AM, Michael Matz  wrote:
> Hi,
>
> On Tue, 14 Jul 2015, Jim Wilson wrote:
>
>> Now that we do have the problem, we can't fix it without an ARM port ABI
>> change, which is undesirable, so we may have to fix it with a MI change.
>
> What's the ABI implication of fixing the inconsistency?

Currently signed chars and signed shorts are passed sign-extended.  If
we make TARGET_PROMOTE_FUNCTION_MODE work the same as PROMOTE_MODE,
then they will be passed zero-extended.

Given the testcase:

int sub (int) __attribute__ ((noinline));
int sub2 (signed char) __attribute__ ((noinline));
int sub (int i) { return sub2 (i); }
int sub2 (signed char c) { return c & 0xff; }

Currently sub will do a char sign-extend to convert the int to signed
char, and sub2 will do a char zero-extend for the and.  With the
change, sub will do a char zero-extend to convert the int to unsigned
char, and sub2 will do nothing.  If you compile sub without the change
and sub2 with the change, then you lose the and operation and get a
sign-extended char at the end.

>> There were two MI changes suggested, one was fixing the out-of-ssa pass
>> to handle SUBREG_PROMOTED_P promotions.  The other was to disallow
>> creating PHI nodes between parms and locals.  I haven't had a chance to
>> try implementing the second one yet; I hope to work on that today.
>
> Don't bother with the latter, it doesn't have a chance of being accepted.

I tried looking at it anyways, as I need to learn more about this
stuff.  It didn't seem feasible without changing a lot of optimization
passes which doesn't seem reasonable.

> If the terrible hack in outof-ssa really will be necessary (and I really
> really hope it won't) then I think I prefer the approach you partly tried
> in comment #12 of PR 65932 already.  Let partition_to_pseudo[] refer to
> the promoted subreg and deal with that situation in emit_partition_copy;
> I'd then hope that the unsignedsrcp parameter could go away (unfortunately
> the sizeexp will have to stay).

Yes, I think that is a cleaner way to do it, but I had trouble getting
that to work as I don't know enough about the code yet.  Doing it
directly in emit_partition_copy was easier, just to prove it could
work.  I can go back and try to make this work again.

Jim


[genmatch] reject empty c_expr

2015-07-15 Thread Prathamesh Kulkarni
Hi,
We allow c_expr to be empty which accepts cases like the following:

(simplify
  match-operand
  (if ()
result-operand))

(simplify
  match-operand
  {})

The attached patch rejects empty c_expr.
Ok for trunk after bootstrap + test ?

Thank you,
Prathamesh
2015-07-15  Prathamesh Kulkarni  

* genmatch.c (parse_c_expr): Reject empty c_expr.
Index: genmatch.c
===
--- genmatch.c  (revision 225834)
+++ genmatch.c  (working copy)
@@ -3375,6 +3375,7 @@
   unsigned opencnt;
   vec code = vNULL;
   unsigned nr_stmts = 0;
+  bool empty = true;
   eat_token (start);
   if (start == CPP_OPEN_PAREN)
 end = CPP_CLOSE_PAREN;
@@ -3394,6 +3395,7 @@
   && --opencnt == 0)
break;
 
+  empty = false;
   /* This is a lame way of counting the number of statements.  */
   if (token->type == CPP_SEMICOLON)
nr_stmts++;
@@ -3412,6 +3414,10 @@
   code.safe_push (*token);
 }
   while (1);
+
+  if (empty)
+fatal_at (token, "c_expr cannot be empty");
+
   return new c_expr (r, code, nr_stmts, vNULL, capture_ids);
 }
 


Re: [PATCH] PR/66760, ipa-inline-analysis.c compile-time hog

2015-07-15 Thread Martin Jambor
Hi,

On Mon, Jul 13, 2015 at 03:49:05PM +0200, Richard Biener wrote:
> On Mon, Jul 13, 2015 at 3:46 PM, Paolo Bonzini  wrote:
> >
> >
> > On 13/07/2015 15:45, Richard Biener wrote:
> >> It would be nice to have a patch that can be backported to the GCC 5 branch
> >> as well.  We can improve this on trunk as followup,no?
> >
> > The patch I've already posted can be backported. O:-)
> 
> So unless Martin objects consider the patch approved for trunk and for
> backporting
> after 5.2 is released and trunk shows no issues.
> 
> Martin - can you take care of committing if you are fine with it?
> 

I have commitred the patch to trunk (and hopefully will not forget to
backport it once 5 branch reopens).  I am testing the following
hopefully obvious followup, which I plan to commit tomorrow.  It is
just mechanical renaming of the newly exported structures to give them
ipa prefix.

Thanks,

Martin


2015-07-15  Martin Jambor  

* ipa-prop.h (param_aa_status): Rename to ipa_param_aa_status.  Adjust
all uses.  Fix two typos in its general comment.
(func_body_info): Rename to ipa_func_body_info.  Adjust all uses.

diff --git a/gcc/ipa-inline-analysis.c b/gcc/ipa-inline-analysis.c
index 81a6860..b79ef14 100644
--- a/gcc/ipa-inline-analysis.c
+++ b/gcc/ipa-inline-analysis.c
@@ -1574,7 +1574,7 @@ unmodified_parm (gimple stmt, tree op)
loaded.  */
 
 static bool
-unmodified_parm_or_parm_agg_item (struct func_body_info *fbi,
+unmodified_parm_or_parm_agg_item (struct ipa_func_body_info *fbi,
  gimple stmt, tree op, int *index_p,
  struct agg_position_info *aggpos)
 {
@@ -1745,7 +1745,7 @@ eliminated_by_inlining_prob (gimple stmt)
predicates to the CFG edges.   */
 
 static void
-set_cond_stmt_execution_predicate (struct func_body_info *fbi,
+set_cond_stmt_execution_predicate (struct ipa_func_body_info *fbi,
   struct inline_summary *summary,
   basic_block bb)
 {
@@ -1827,7 +1827,7 @@ set_cond_stmt_execution_predicate (struct func_body_info 
*fbi,
predicates to the CFG edges.   */
 
 static void
-set_switch_stmt_execution_predicate (struct func_body_info *fbi,
+set_switch_stmt_execution_predicate (struct ipa_func_body_info *fbi,
 struct inline_summary *summary,
 basic_block bb)
 {
@@ -1888,7 +1888,7 @@ set_switch_stmt_execution_predicate (struct 
func_body_info *fbi,
which it is executable.  */
 
 static void
-compute_bb_predicates (struct func_body_info *fbi,
+compute_bb_predicates (struct ipa_func_body_info *fbi,
   struct cgraph_node *node,
   struct inline_summary *summary)
 {
@@ -2031,7 +2031,7 @@ will_be_nonconstant_expr_predicate (struct 
ipa_node_params *info,
a compile time constant.  */
 
 static struct predicate
-will_be_nonconstant_predicate (struct func_body_info *fbi,
+will_be_nonconstant_predicate (struct ipa_func_body_info *fbi,
   struct inline_summary *summary,
   gimple stmt,
   vec nonconstant_names)
@@ -2481,7 +2481,7 @@ estimate_function_body_sizes (struct cgraph_node *node, 
bool early)
   int freq;
   struct inline_summary *info = inline_summaries->get (node);
   struct predicate bb_predicate;
-  struct func_body_info fbi;
+  struct ipa_func_body_info fbi;
   vec nonconstant_names = vNULL;
   int nblocks, n;
   int *order;
diff --git a/gcc/ipa-prop.c b/gcc/ipa-prop.c
index 615f749..2815ca6 100644
--- a/gcc/ipa-prop.c
+++ b/gcc/ipa-prop.c
@@ -481,7 +481,7 @@ ipa_set_ancestor_jf (struct ipa_jump_func *jfunc, 
HOST_WIDE_INT offset,
of this function body.  */
 
 static struct ipa_bb_info *
-ipa_get_bb_info (struct func_body_info *fbi, basic_block bb)
+ipa_get_bb_info (struct ipa_func_body_info *fbi, basic_block bb)
 {
   gcc_checking_assert (fbi);
   return &fbi->bb_infos[bb->index];
@@ -756,7 +756,7 @@ mark_modified (ao_ref *ao ATTRIBUTE_UNUSED, tree vdef 
ATTRIBUTE_UNUSED,
should really just start giving up.  */
 
 static bool
-aa_overwalked (struct func_body_info *fbi)
+aa_overwalked (struct ipa_func_body_info *fbi)
 {
   gcc_checking_assert (fbi);
   return fbi->aa_walked > (unsigned) PARAM_VALUE (PARAM_IPA_MAX_AA_STEPS);
@@ -765,8 +765,8 @@ aa_overwalked (struct func_body_info *fbi)
 /* Find the nearest valid aa status for parameter specified by INDEX that
dominates BB.  */
 
-static struct param_aa_status *
-find_dominating_aa_status (struct func_body_info *fbi, basic_block bb,
+static struct ipa_param_aa_status *
+find_dominating_aa_status (struct ipa_func_body_info *fbi, basic_block bb,
   int index)
 {
   while (true)
@@ -785,21 +785,21 @@ find_dominating_aa_status (struct func_body_info *fbi, 
basic_block bb,
structures and/or intialize the result with a dominating description as
necessary.

Re: [PATCH] PR/66760, ipa-inline-analysis.c compile-time hog

2015-07-15 Thread Paolo Bonzini


On 15/07/2015 18:01, Martin Jambor wrote:
> > So unless Martin objects consider the patch approved for trunk and for
> > backporting after 5.2 is released and trunk shows no issues.
> > 
> > Martin - can you take care of committing if you are fine with it?
> 
> I have commitred the patch to trunk (and hopefully will not forget to
> backport it once 5 branch reopens).  I am testing the following
> hopefully obvious followup, which I plan to commit tomorrow.  It is
> just mechanical renaming of the newly exported structures to give them
> ipa prefix.

Thanks Martin!

Paolo


[gomp4] Additional tests

2015-07-15 Thread James Norris

Hi,

The attached patch add tests for combined directives for, i.e.,
the parallel + loop directive and the kernel + loop directive
for C/C++/Fortran. Tests also added for the private, num_gangs,
num_workers, and vector_length clauses with the parallel
directive in Fortran.

Jim
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/combdir-1.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/combdir-1.c
new file mode 100644
index 000..a7def92
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/combdir-1.c
@@ -0,0 +1,52 @@
+/* { dg-do run } */
+
+#include 
+
+int
+main (int argc, char **argv)
+{
+  const int N = 32;
+  float a[N], b[N];
+  int i;
+
+  for (i = 0; i < N; i++)
+{
+  a[i] = 1.0;
+  b[i] = 0.0;
+}
+
+#pragma acc parallel loop copy (a[0:N]) copy (b[0:N])
+  for (i = 0; i < N; i++)
+{
+  b[i] = 2.0;
+  a[i] = a[i] + b[i];
+}
+
+  for (i = 0; i < N; i++)
+{
+  if (a[i] != 3.0)
+	abort ();
+
+  if (b[i] != 2.0)
+	abort ();
+}
+
+#pragma acc kernels loop copy (a[0:N]) copy (b[0:N])
+  for (i = 0; i < N; i++)
+{
+  b[i] = 3.0;
+  a[i] = a[i] + b[i];
+}
+
+  for (i = 0; i < N; i++)
+{
+  if (a[i] != 6.0)
+	abort ();
+
+  if (b[i] != 3.0)
+	abort ();
+}
+
+  return 0;
+
+} 
diff --git a/libgomp/testsuite/libgomp.oacc-fortran/combdir-1.f90 b/libgomp/testsuite/libgomp.oacc-fortran/combdir-1.f90
new file mode 100644
index 000..0cd8a67
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-fortran/combdir-1.f90
@@ -0,0 +1,37 @@
+! { dg-do run }
+
+program main
+  integer, parameter :: n = 32
+  real :: a(n), b(n);
+  integer :: i
+
+  do i = 1, n
+a(i) = 1.0
+b(i) = 0.0
+  end do
+
+  !$acc parallel loop copy (a(1:n)) copy (b(1:n))
+  do i = 1, n
+b(i) = 2.0
+a(i) = a(i) + b(i)
+  end do
+
+  do i = 1, n
+if (a(i) .ne. 3.0) call abort
+
+if (b(i) .ne. 2.0) call abort
+  end do
+
+  !$acc kernels loop copy (a(1:n)) copy (b(1:n))
+  do i = 1, n
+b(i) = 3.0;
+a(i) = a(i) + b(i)
+  end do
+
+  do i = 1, n
+if (a(i) .ne. 6.0) call abort
+
+if (b(i) .ne. 3.0) call abort
+  end do
+
+end program main
diff --git a/libgomp/testsuite/libgomp.oacc-fortran/private-vars-par-gang-2.f90 b/libgomp/testsuite/libgomp.oacc-fortran/private-vars-par-gang-2.f90
new file mode 100644
index 000..bb0891c
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-fortran/private-vars-par-gang-2.f90
@@ -0,0 +1,37 @@
+
+! { dg-additional-options "-cpp" }
+
+! Test of gang-private variables declared on the parallel directive.
+
+#if defined(ACC_DEVICE_TYPE_host) || defined(ACC_DEVICE_TYPE_host_nonshm)
+#define ACTUAL_GANGS 1
+#else
+#define ACTUAL_GANGS 32
+#endif
+
+program main
+  integer :: x = 5
+  integer, parameter :: n = ACTUAL_GANGS
+  integer :: arr(n)
+
+  do i = 1, n
+arr(i) = 3
+  end do
+
+  !$acc parallel private(x) copy(arr) num_gangs(n) num_workers(8) vector_length(32)
+!$acc loop gang(static:1)
+do i = 1, n
+  x = i * 2;
+end do
+
+   !$acc loop gang(static:1)
+do i = 1, n
+  arr(i) = arr(i) + x
+end do
+  !$acc end parallel
+
+  do i = 1, n
+if (arr(i) .ne. (3 + i * 2)) call abort
+  end do
+
+end program main


Re: [PATCH] fix in-tree-binutils builds

2015-07-15 Thread H.J. Lu
On Wed, Jul 15, 2015 at 1:03 AM, Jan Beulich  wrote:
> Quite a while ago configure.in go renamed to configure.ac in all of
> binutils' subtrees - allow for both when checking for that case while
> configuring.
>
> gcc/
> 2015-07-15  Jan Beulich  
>
> * acinclude.m4: Replace configure.in by configure.[ai][cn].
> * configure.ac: Likewise.
> * configure: Re-generate.
>
> --- a/gcc/acinclude.m4
> +++ b/gcc/acinclude.m4
> @@ -390,7 +390,7 @@ AC_DEFUN([_gcc_COMPUTE_GAS_VERSION],
>  [gcc_cv_as_bfd_srcdir=`echo $srcdir | sed -e 's,/gcc$,,'`/bfd
>  for f in $gcc_cv_as_bfd_srcdir/configure \
>   $gcc_cv_as_gas_srcdir/configure \
> - $gcc_cv_as_gas_srcdir/configure.in \
> + $gcc_cv_as_gas_srcdir/configure.[ai][cn] \
>   $gcc_cv_as_gas_srcdir/Makefile.in ; do
>gcc_cv_gas_version=`sed -n -e 's/^[[ 
> ]]*VERSION=[[^0-9A-Za-z_]]*\([[0-9]]*\.[[0-9]]*.*\)/VERSION=\1/p' < $f`

How portable is [ai][cn]?

-- 
H.J.


Re: [PATCH] fix in-tree-binutils builds

2015-07-15 Thread Jan Beulich
>>> On 15.07.15 at 18:07,  wrote:
> On Wed, Jul 15, 2015 at 1:03 AM, Jan Beulich  wrote:
>> --- a/gcc/acinclude.m4
>> +++ b/gcc/acinclude.m4
>> @@ -390,7 +390,7 @@ AC_DEFUN([_gcc_COMPUTE_GAS_VERSION],
>>  [gcc_cv_as_bfd_srcdir=`echo $srcdir | sed -e 's,/gcc$,,'`/bfd
>>  for f in $gcc_cv_as_bfd_srcdir/configure \
>>   $gcc_cv_as_gas_srcdir/configure \
>> - $gcc_cv_as_gas_srcdir/configure.in \
>> + $gcc_cv_as_gas_srcdir/configure.[ai][cn] \
>>   $gcc_cv_as_gas_srcdir/Makefile.in ; do
>>gcc_cv_gas_version=`sed -n -e 's/^[[ 
>> ]]*VERSION=[[^0-9A-Za-z_]]*\([[0-9]]*\.[[0-9]]*.*\)/VERSION=\1/p' < $f`
> 
> How portable is [ai][cn]?

I'm not sure - that's why I asked yesterday whether that approach
would be acceptable.

Jan



Re: [PATCH] fix in-tree-binutils builds

2015-07-15 Thread H.J. Lu
On Wed, Jul 15, 2015 at 9:11 AM, Jan Beulich  wrote:
 On 15.07.15 at 18:07,  wrote:
>> On Wed, Jul 15, 2015 at 1:03 AM, Jan Beulich  wrote:
>>> --- a/gcc/acinclude.m4
>>> +++ b/gcc/acinclude.m4
>>> @@ -390,7 +390,7 @@ AC_DEFUN([_gcc_COMPUTE_GAS_VERSION],
>>>  [gcc_cv_as_bfd_srcdir=`echo $srcdir | sed -e 's,/gcc$,,'`/bfd
>>>  for f in $gcc_cv_as_bfd_srcdir/configure \
>>>   $gcc_cv_as_gas_srcdir/configure \
>>> - $gcc_cv_as_gas_srcdir/configure.in \
>>> + $gcc_cv_as_gas_srcdir/configure.[ai][cn] \
>>>   $gcc_cv_as_gas_srcdir/Makefile.in ; do
>>>gcc_cv_gas_version=`sed -n -e 's/^[[ 
>>> ]]*VERSION=[[^0-9A-Za-z_]]*\([[0-9]]*\.[[0-9]]*.*\)/VERSION=\1/p' < $f`
>>
>> How portable is [ai][cn]?
>
> I'm not sure - that's why I asked yesterday whether that approach
> would be acceptable.
>

I updated my patch to check $gcc_cv_as_gas_srcdir/configure.ac
in _gcc_COMPUTE_GAS_VERSION.

My last patch was sent to gcc, not gcc-patches.

-- 
H.J.
From 27bdd94b4b351f677eccf0b03737e61bc5335f0f Mon Sep 17 00:00:00 2001
From: "H.J. Lu" 
Date: Wed, 15 Jul 2015 08:07:04 -0700
Subject: [PATCH] Also check configure.ac in binutils source tree

The configure.in files in binutils source tree have been renamed to
configure.ac.  This patch checks both configure.in and configure.ac in
binutils source tree.

	* acinclude.m4 (_gcc_COMPUTE_GAS_VERSION): Also check
	$gcc_cv_as_gas_srcdir/configure.ac.
	* configure.ac (gcc_cv_as_gas_configure): New.  Set if
	configure.in or configure.ac exist.
	(gcc_cv_ld_gld_configure): Likewise.
	(gcc_cv_binutils_configure): Likewise.
	(gcc_cv_as): Check $gcc_cv_as_gas_configure instead of
	$gcc_cv_as_gas_srcdir/configure.in.
	(gcc_cv_ld): Check $gcc_cv_ld_gld_configure instead of
	$gcc_cv_ld_gld_srcdir/configure.in.
	(gcc_cv_nm): Check $gcc_cv_binutils_configure instead of
	$gcc_cv_binutils_srcdir/configure.in.
	(gcc_cv_objdump): Likewise.
	(gcc_cv_readelf): Likewise.
	* configure: Regenerated.

_gcc_COMPUTE_GAS_VERSION
---
 gcc/acinclude.m4 |  1 +
 gcc/configure| 31 +--
 gcc/configure.ac | 30 --
 3 files changed, 50 insertions(+), 12 deletions(-)

diff --git a/gcc/acinclude.m4 b/gcc/acinclude.m4
index 94da88e..3d30b0f 100644
--- a/gcc/acinclude.m4
+++ b/gcc/acinclude.m4
@@ -390,6 +390,7 @@ AC_DEFUN([_gcc_COMPUTE_GAS_VERSION],
 [gcc_cv_as_bfd_srcdir=`echo $srcdir | sed -e 's,/gcc$,,'`/bfd
 for f in $gcc_cv_as_bfd_srcdir/configure \
  $gcc_cv_as_gas_srcdir/configure \
+ $gcc_cv_as_gas_srcdir/configure.ac \
  $gcc_cv_as_gas_srcdir/configure.in \
  $gcc_cv_as_gas_srcdir/Makefile.in ; do
   gcc_cv_gas_version=`sed -n -e 's/^[[ 	]]*VERSION=[[^0-9A-Za-z_]]*\([[0-9]]*\.[[0-9]]*.*\)/VERSION=\1/p' < $f`
diff --git a/gcc/configure b/gcc/configure
index e0755f9..a8eac95 100755
--- a/gcc/configure
+++ b/gcc/configure
@@ -21668,6 +21668,12 @@ ac_compiler_gnu=$ac_cv_cxx_compiler_gnu
 gcc_cv_gas_major_version=
 gcc_cv_gas_minor_version=
 gcc_cv_as_gas_srcdir=`echo $srcdir | sed -e 's,/gcc$,,'`/gas
+gcc_cv_as_gas_configure=
+if test -f $gcc_cv_as_gas_srcdir/configure.in; then
+	gcc_cv_as_gas_configure=$gcc_cv_as_gas_srcdir/configure.in;
+elif test -f $gcc_cv_as_gas_srcdir/configure.ac; then
+	gcc_cv_as_gas_configure=$gcc_cv_as_gas_srcdir/configure.ac
+fi
 
 if test "${gcc_cv_as+set}" = set; then :
 
@@ -21675,7 +21681,7 @@ else
 
 if test -x "$DEFAULT_ASSEMBLER"; then
 	gcc_cv_as="$DEFAULT_ASSEMBLER"
-elif test -f $gcc_cv_as_gas_srcdir/configure.in \
+elif test -n "$gcc_cv_as_gas_configure" \
  && test -f ../gas/Makefile \
  && test x$build = x$host; then
 	gcc_cv_as=../gas/as-new$build_exeext
@@ -21748,6 +21754,7 @@ $as_echo "newly built gas" >&6; }
   gcc_cv_as_bfd_srcdir=`echo $srcdir | sed -e 's,/gcc$,,'`/bfd
 for f in $gcc_cv_as_bfd_srcdir/configure \
  $gcc_cv_as_gas_srcdir/configure \
+ $gcc_cv_as_gas_srcdir/configure.ac \
  $gcc_cv_as_gas_srcdir/configure.in \
  $gcc_cv_as_gas_srcdir/Makefile.in ; do
   gcc_cv_gas_version=`sed -n -e 's/^[ 	]*VERSION=[^0-9A-Za-z_]*\([0-9]*\.[0-9]*.*\)/VERSION=\1/p' < $f`
@@ -21826,6 +21833,12 @@ gcc_cv_gld_minor_version=
 gcc_cv_ld_gld_srcdir=`echo $srcdir | sed -e 's,/gcc$,,'`/ld
 gcc_cv_ld_gold_srcdir=`echo $srcdir | sed -e 's,/gcc$,,'`/gold
 gcc_cv_ld_bfd_srcdir=`echo $srcdir | sed -e 's,/gcc$,,'`/bfd
+gcc_cv_ld_gld_configure=
+if test -f $gcc_cv_ld_gld_srcdir/configure.in; then
+	gcc_cv_ld_gld_configure=gcc_cv_ld_gld_srcdir/configure.in
+elif test -f $gcc_cv_ld_gld_srcdir/configure.ac; then
+	gcc_cv_ld_gld_configure=gcc_cv_ld_gld_srcdir/configure.ac
+fi
 
 if test "${gcc_cv_ld+set}" = set; then :
 
@@ -21838,7 +21851,7 @@ elif test $install_gold_as_default = yes \
  && test -f ../gold/Makefile \
  && test x$build = x$host; then
 	gcc_cv_ld=../gold/ld-new$build_exeext
-elif test -f $gcc_cv_ld_gld_srcdir/configure.in \
+elif test -n "$gcc_cv_ld_gld_configure" \
  && test -f ../ld/Makefile \

[committed] check_GNU_style.sh: Fix quoting in cat_with_prefix

2015-07-15 Thread Tom de Vries

Hi,

I've committed this obvious fix.

Thanks,
- Tom
check_GNU_style.sh: Fix quoting in cat_with_prefix

2015-07-15  Tom de Vries  

	* check_GNU_style.sh (cat_with_prefix): Fix quoting.

diff --git a/contrib/check_GNU_style.sh b/contrib/check_GNU_style.sh
index 033a2c9..ac54ed0 100755
--- a/contrib/check_GNU_style.sh
+++ b/contrib/check_GNU_style.sh
@@ -89,7 +89,7 @@ cat_with_prefix ()
 if [ "$prefix" = "" ]; then
 	cat "$f"
 else
-	awk "{printf "%s%s\n", $prefix, \$0}" $f
+	awk "{printf \"%s%s\n\", \"$prefix\", \$0}" $f
 fi
 }
 
-- 
1.9.1



[patch] options.h and comments on a couple of tm.h #include's

2015-07-15 Thread Andrew MacLeod
I think this is the last patch before I start include reductions and 
cleanups.


options.h is generated with the following snippet:

/* Anything that includes tm.h, does not necessarily need this.  */"
#if !defined(GCC_TM_H)
#include "input.h" /* for location_t */
<...>
// bunch of prototypes which use location_t
<...>
#endif

This is primarily so that the generator files can compile which don't 
include input.h, and thus have no location_t defined.
options.h is usually included by tm.h, but some front end files include 
just option.h, and the inclusion of input.h is so that they don't have 
to I guess.


input.h is now included by coretypes.h, so any of these files that care 
about the prototypes  already have input.h.  It seems to make more sense 
to simply check if input.h has been included instead of a weird 
contortion on tm.h, which is really unrelated in many ways. (I'm simply 
checking if UNKNOWN_LOCATION is defined)   In fact, the current approach 
introduces a latent ordering issue in a number of front end files... 
they include options.h because they dont need tm.h, but then include 
tm.h indirectly through target.h..  if the include order is changed such 
that target.h was included *before* options.h (or instead of), some 
files fail to compile because they no longer see these prototypes.   
this just cleans that crud up.


The other 4 files in the patch include tm.h on a line with a multi-line 
comment following it.  The comment is suppose to reflect what macros the 
include is for, but at least some of these are out of date.. no one 
updates them. The information they convey is easily determined in an up 
to date way by simply removing the include from the list and compiling 
the .o.  Plus many other front end files dont bother with this, so its 
inconsistent.  And my real motivation is because it plays havoc with my 
automatic include maniupulations :-)So i propose just removing the 
comments.


I didnt bother trying to merge options.h and tm.h in various files, 
mostly because for the include reduction, Im going to flatten option.h 
out of tm.h and tm.h/hard-reg-set.h  out of target.h.  I wont bother 
submitting that to mainline since its going to affect all the same files 
I'll be checking in later.  When the reduction is done, I'll look at who 
actually needed options.h, tm.h, target.h and maybe they'll probably get 
mashed back to the way they are now(or maybe they wont... :-)  it'll 
depend on the usage patterns.  In any case, i dont need to pollute trunk 
until I know for sure.


This bootstraps on x86_64-unknown-linux-gnu with no new regressions.  ok 
for trunk?


Andrew






	* opth-gen.awk: Check for UNKNOWN_LOCATION rather than GCC_TM_H, don't
	include input.h.
	* opts.c: Remove multiline #include comment.
	* java/expr.c: Remove multiline #include comment.
	* fortran/trans-types.c: Remove multiline #include comment.
	* c-family/c-opts.c: Remove multiline #include comment.



Index: opth-gen.awk
===
*** opth-gen.awk	(revision 225789)
--- opth-gen.awk	(working copy)
*** print "";
*** 308,316 
  print "/* Hash optimization from a structure.  */";
  print "extern hashval_t cl_optimization_hash (const struct cl_optimization *);";
  print "";
! print "/* Anything that includes tm.h, does not necessarily need this.  */"
! print "#if !defined(GCC_TM_H)"
! print "#include \"input.h\" /* for location_t */"
  print "bool  "
  print "common_handle_option_auto (struct gcc_options *opts,  "
  print "   struct gcc_options *opts_set,  "
--- 308,315 
  print "/* Hash optimization from a structure.  */";
  print "extern hashval_t cl_optimization_hash (const struct cl_optimization *);";
  print "";
! print "/* Generator files may not have access to location_t, and don't need these.  */"
! print "#if defined(UNKNOWN_LOCATION)"
  print "bool  "
  print "common_handle_option_auto (struct gcc_options *opts,  "
  print "   struct gcc_options *opts_set,  "
Index: java/expr.c
===
*** java/expr.c	(revision 225789)
--- java/expr.c	(working copy)
*** The Free Software Foundation is independ
*** 26,37 
  #include "config.h"
  #include "system.h"
  #include "coretypes.h"
! #include "tm.h"			/* For INT_TYPE_SIZE,
!    TARGET_VTABLE_USES_DESCRIPTORS,
!    BITS_PER_UNIT,
!    MODIFY_JNI_METHOD_CALL and
!    PARM_BOUNDARY.  */
!    
  #include "alias.h"
  #include "tree.h"
  #include "fold-const.h"
--- 26,32 
  #include "config.h"
  #include "system.h"
  #include "coretypes.h"
! #include "tm.h"
  #include "alias.h"
  #include "tree.h"
  #include "fold-const.h"
Index: fortran/trans-types.c
=

Re: [patch] Adjust *-streamer.h

2015-07-15 Thread Jeff Law

On 07/14/2015 02:25 PM, Andrew MacLeod wrote:

THIs patch just does a minor cleanup..

gimple-pretty-print.h doesn't need to include pretty-print.h since
tree-pretty-print.h already does.
I also cleaned up the 4 streamer files, such that each one includes just
the previous one, and then make each source file include just he one it
needs.

so lto-streamer.h <- data-streamer.h <- tree-streamer.h  <-
gimple-streamer.h
and tree-streamer.h also includes streamer-hooks.h

This makes these files match their compilation requirements, and only
one ever needs to be included. I also added them to each source file and
ran include reduction to ensure they each had the exact one they needed.
which they did.

The rest of the the patch is simply removing headers that are redundant
now (ie,if tree-streamer.h is included, you don't need to include
lto-streamer.h or streamer-hooks.h.

bootstraps on  x86_64-unknown-linux-gnu, and no new regressions

  OK for trunk?

OK.
jeff


Re: [PATCH] fix in-tree-binutils builds

2015-07-15 Thread Mike Stump
On Jul 15, 2015, at 9:07 AM, H.J. Lu  wrote:
> On Wed, Jul 15, 2015 at 1:03 AM, Jan Beulich  wrote:
>> 
>> - $gcc_cv_as_gas_srcdir/configure.in \
>> + $gcc_cv_as_gas_srcdir/configure.[ai][cn] \
>>  $gcc_cv_as_gas_srcdir/Makefile.in ; do
>>   gcc_cv_gas_version=`sed -n -e 's/^[[ 
>> ]]*VERSION=[[^0-9A-Za-z_]]*\([[0-9]]*\.[[0-9]]*.*\)/VERSION=\1/p' < $f`
> 
> How portable is [ai][cn]?

Should be portable enough.

Re: [patch] options.h and comments on a couple of tm.h #include's

2015-07-15 Thread Jeff Law

On 07/15/2015 10:32 AM, Andrew MacLeod wrote:

I think this is the last patch before I start include reductions and
cleanups.

options.h is generated with the following snippet:

/* Anything that includes tm.h, does not necessarily need this.  */"
#if !defined(GCC_TM_H)
#include "input.h" /* for location_t */
<...>
// bunch of prototypes which use location_t
<...>
#endif

This is primarily so that the generator files can compile which don't
include input.h, and thus have no location_t defined.
options.h is usually included by tm.h, but some front end files include
just option.h, and the inclusion of input.h is so that they don't have
to I guess.

input.h is now included by coretypes.h, so any of these files that care
about the prototypes  already have input.h.  It seems to make more sense
to simply check if input.h has been included instead of a weird
contortion on tm.h, which is really unrelated in many ways. (I'm simply
checking if UNKNOWN_LOCATION is defined)   In fact, the current approach
introduces a latent ordering issue in a number of front end files...
they include options.h because they dont need tm.h, but then include
tm.h indirectly through target.h..  if the include order is changed such
that target.h was included *before* options.h (or instead of), some
files fail to compile because they no longer see these prototypes. this
just cleans that crud up.

The other 4 files in the patch include tm.h on a line with a multi-line
comment following it.  The comment is suppose to reflect what macros the
include is for, but at least some of these are out of date.. no one
updates them. The information they convey is easily determined in an up
to date way by simply removing the include from the list and compiling
the .o.  Plus many other front end files dont bother with this, so its
inconsistent.  And my real motivation is because it plays havoc with my
automatic include maniupulations :-)So i propose just removing the
comments.

I didnt bother trying to merge options.h and tm.h in various files,
mostly because for the include reduction, Im going to flatten option.h
out of tm.h and tm.h/hard-reg-set.h  out of target.h.  I wont bother
submitting that to mainline since its going to affect all the same files
I'll be checking in later.  When the reduction is done, I'll look at who
actually needed options.h, tm.h, target.h and maybe they'll probably get
mashed back to the way they are now(or maybe they wont... :-)  it'll
depend on the usage patterns.  In any case, i dont need to pollute trunk
until I know for sure.

This bootstraps on x86_64-unknown-linux-gnu with no new regressions.  ok
for trunk?

OK.
jeff


[Patch ARM] Switch to unified asm for ARM state in the compiler.

2015-07-15 Thread Ramana Radhakrishnan
Hi, 

This patch switches A32 (ARM) state code generation to unified syntax. The 
backend already generates unified syntax in Thumb state and for the floating 
point / SIMD instruction set. The backend still continues to use divided syntax 
for inline assembler.

This is beneficial for a few reasons.

1. Assembler output from the compiler is more in line with the documentation 
for the ISA.
2. Removing special casing for various instructions where unified asm went one 
way and divided asm the other.
3. Possible sharing of more patterns between arm.md and thumb2.md - I've not 
addressed that in this patch though.
4. Frees up a few punctuation characters if we ever needed them.

This patch does the following - (some minor follow-ups are required)

- Remove use of TARGET_UNIFIED_ASM
- Consolidate all uses into unified asm and removes all old support for the 
same.
- Remove support for %( and %) punctuation characters. I do not expect these 
characters to be used in inline assembler.
- Remove all use of %. punctuation character - however definition remains as an 
oversight. I will deal with this in a followup patch.
- Need to cleanup the definition of ARM_LSL_NAME and remove that in a future 
patch.
- Adjust testsuite.



Tested with bootstrap and regression run on armhf with and without thumb on 
Cortex-A15.


arm-none-eabi with the following multilibs.

   
"arm-eabi{-marm/-march=armv7-a/-mfpu=vfpv3-d16/-mfloat-abi=softfp}" \
   
"arm-eabi{-mthumb/-march=armv8-a/-mfpu=crypto-neon-fp-armv8/-mfloat-abi=hard}" \
   "arm-eabi{-marm/-mcpu=arm7tdmi/-mfloat-abi=soft}" \
   "arm-eabi{-mthumb/-mcpu=arm7tdmi/-mfloat-abi=soft}

with no regressions.


I will apply this to trunk in a couple of days if folks don't have any comments 
and try out a few more multilibs in order to stress this a bit.


regards
Ramana

  Ramana Radhakrishnan  

* config/arm/arm-ldmstm.ml: Rewrite to using unified syntax.
* config/arm/ldmstm.md: Regenerate.
* config/arm/arm.c (arm_asm_trampoline_template): Use unified syntax.
(arm_output_multireg_pop): Use unified syntax.
(output_move_double): Likewise.
(output_move_quad): Likewise.
(output_return_instruction): Likewise
(arm_print_operand): Remove support for '(' and ')'
(arm_output_shift):  Use unified syntax.
(arm_declare_function_name): Likewise.
* config/arm/arm.h (TARGET_UNIFIED_SYNTAX): Delete.
* config/arm/arm.md: Rewrite to generate unified syntax.
* config/arm/sync.md: Likewise.
* config/arm/thumb2.md: Likewise.

gcc/testsuite

  Ramana Radhakrishnan  

* gcc.target/arm/combine-movs.c: Adjust.
* gcc.target/arm/interrupt-1.c: Likewise.
* gcc.target/arm/interrupt-2.c: Likewise.
* gcc.target/arm/unaligned-memcpy-4.c: Likewise.


commit ff5c72b06b7aea8b01af25ffe9cbc0154322f614
Author: Ramana Radhakrishnan 
Date:   Thu Jul 9 14:37:55 2015 +0100

Remove TARGET_UNIFIED_ASM

2015-07-09  Ramana Radhakrishnan  

* config/arm/arm-ldmstm.ml: Rewrite to using unified syntax.
* config/arm/ldmstm.md: Regenerate.
* config/arm/arm.c (arm_asm_trampoline_template): Use unified syntax.
(arm_output_multireg_pop): Use unified syntax.
(output_move_double): Likewise.
(output_move_quad): Likewise.
(output_return_instruction): Likewise
(arm_print_operand): Remove support for '(' and ')'
(arm_output_shift):  Use unified syntax.
(arm_declare_function_name): Likewise.
* config/arm/arm.h (TARGET_UNIFIED_SYNTAX): Delete.
* config/arm/arm.md: Rewrite to generate unified syntax.
* config/arm/sync.md: Likewise.
* config/arm/thumb2.md: Likewise.

gcc/testsuite/ChangeLog:

2015-07-10  Ramana Radhakrishnan  

* gcc.target/arm/combine-movs.c: Adjust.
* gcc.target/arm/interrupt-1.c: Likewise.
* gcc.target/arm/interrupt-2.c: Likewise.
* gcc.target/arm/unaligned-memcpy-4.c: Likewise.

diff --git a/gcc/config/arm/arm-ldmstm.ml b/gcc/config/arm/arm-ldmstm.ml
index bb90192..e88a51c 100644
--- a/gcc/config/arm/arm-ldmstm.ml
+++ b/gcc/config/arm/arm-ldmstm.ml
@@ -33,9 +33,20 @@ type amode = IA | IB | DA | DB
 
 type optype = IN | OUT | INOUT
 
-let rec string_of_addrmode addrmode =
+let rec string_of_addrmode addrmode thumb update =
+  if thumb || update
+then
   match addrmode with
-IA -> "ia" | IB -> "ib" | DA -> "da" | DB -> "db"
+IA -> "ia"
+  | IB -> "ib"
+  | DA -> "da"
+  | DB -> "db"
+else
+  match addrmode with
+IA -> ""
+  | IB -> "ib"
+  | DA -> "da"
+  | DB -> "db"
 
 let rec initial_offset addrmode nregs =
   match addrmode with
@@ -160,7 +171,7 @@ let target addrmode thumb =
   | _, _ -> raise (InvalidAddrMode "ERROR: Invalid Addressing mode for 
Thumb1.")
 
 let write_pattern_1 name ls

Re: [PATCH] rs6000: Revamp rotate-and-mask and insert

2015-07-15 Thread Segher Boessenkool
On Wed, Jul 15, 2015 at 12:10:51PM +0930, Alan Modra wrote:
> There are one or two regressions related to a TODO that Segher added.
> The following produces poorer code than mainline.

Yes, that's why it says TODO :-)

> Also, rs6000.md patterns uses SImode for the rotate/shift count.
> Segher has added some new insns that use DImode when 64-bit.

"Some"?  Looks like all even.  &($^&*()%$&.  Would be nice if gen*
could warn, hrm.

> I think that inconsistency ought to be fixed.

Yeah; patch in progress.  Thanks for spotting it.

> (I haven't completely analysed this) but won't
> 
> (define_insn_and_split "*and3_imm_dot_shifted"
> [snip]
>   (lshiftrt:GPR (match_operand:GPR 1 "gpc_reg_operand" "%r,r")
> (match_operand:GPR 4 "const_int_operand" "n,n"))
>^^^this
> fail to match combined patterns generated from other rs6000.md
> patterns like
> 
> (define_insn "lshr3"

This pattern isn't generated from that; it's generated from and_imm.
Combine thinks many "dot1" patterns should look different than the
"base" and dot2 patterns; this is an important case (it happens when
testing a single bit).


Segher


[committed] Add kernels-loop-nest-independent.f95

2015-07-15 Thread Tom de Vries

[ was: Re: [gomp4] New test loop independent clause ]

On 15/07/15 12:43, Tom de Vries wrote:

I'm not sure if I mentioned it at the meeting, but marking the outer
loop of that example as independent does not result in parallelization
either. So that's something to be investigated.


I've got it working now.

Committed to gomp-4_0-branch.

Thanks,
- Tom

Add kernels-loop-nest-independent.f95

2015-07-15  Tom de Vries  

	* gfortran.dg/goacc/kernels-loop-nest-independent.f95: New test.

	* testsuite/libgomp.oacc-fortran/kernels-loop-nest-independent.f95: New
	test.

diff --git a/gcc/testsuite/gfortran.dg/goacc/kernels-loop-nest-independent.f95 b/gcc/testsuite/gfortran.dg/goacc/kernels-loop-nest-independent.f95
new file mode 100644
index 000..436048e
--- /dev/null
+++ b/gcc/testsuite/gfortran.dg/goacc/kernels-loop-nest-independent.f95
@@ -0,0 +1,41 @@
+! { dg-do compile }
+! { dg-additional-options "-O2" }
+! { dg-additional-options "-ftree-parallelize-loops=32" }
+! { dg-additional-options "-fdump-tree-parloops_oacc_kernels-all" }
+! { dg-additional-options "-fdump-tree-optimized" }
+
+! Based on autopar/outer-1.c.
+
+program main
+  implicit none
+  integer, parameter :: n = 500
+  integer, dimension (0:n-1, 0:n-1) :: x
+  integer:: i, j, ii, jj
+
+
+  !$acc kernels copyout (x)
+  !$acc loop independent
+  do ii = 0, n - 1
+ do jj = 0, n - 1
+x(jj, ii) = ii + jj + 3
+ end do
+  end do
+  !$acc end kernels
+
+  do i = 0, n - 1
+ do j = 0, n - 1
+if (x(i, j) .ne. i + j + 3) call abort
+ end do
+  end do
+
+end program main
+
+! Check that only one loop is analyzed, and that it can be parallelized.
+! { dg-final { scan-tree-dump-times "SUCCESS: may be parallelized, marked independent" 1 "parloops_oacc_kernels" } }
+! { dg-final { scan-tree-dump-not "FAILED:" "parloops_oacc_kernels" } }
+! { dg-final { scan-tree-dump-times "parallelizing outer loop" 1 "parloops_oacc_kernels" } }
+
+! Check that the loop has been split off into a function.
+! { dg-final { scan-tree-dump-times "(?n);; Function MAIN__._omp_fn.0 " 1 "optimized" } }
+
+! { dg-final { scan-tree-dump-times "(?n)pragma omp target oacc_parallel.*num_gangs\\(32\\)" 1 "parloops_oacc_kernels" } }
diff --git a/libgomp/testsuite/libgomp.oacc-fortran/kernels-loop-nest-independent.f95 b/libgomp/testsuite/libgomp.oacc-fortran/kernels-loop-nest-independent.f95
new file mode 100644
index 000..87a3d23
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-fortran/kernels-loop-nest-independent.f95
@@ -0,0 +1,28 @@
+! { dg-do run }
+! { dg-options "-ftree-parallelize-loops=32" }
+
+! Based on autopar/outer-1.c.
+
+program main
+  implicit none
+  integer, parameter :: n = 500
+  integer, dimension (0:n-1, 0:n-1) :: x
+  integer:: i, j, ii, jj
+
+
+  !$acc kernels copyout (x)
+  !$acc loop independent
+  do ii = 0, n - 1
+ do jj = 0, n - 1
+x(jj, ii) = ii + jj + 3
+ end do
+  end do
+  !$acc end kernels
+
+  do i = 0, n - 1
+ do j = 0, n - 1
+if (x(i, j) .ne. i + j + 3) call abort
+ end do
+  end do
+
+end program main
-- 
1.9.1



[gomp4, committed] Add c-c++-common/goacc/kernels-loop-nest-independent.c

2015-07-15 Thread Tom de Vries

Hi,

this patch adds a testcase testing the independent clause on outer loop 
in oacc kernels region.


Committed to gomp-4_0-branch.

Thanks,
- Tom
Add c-c++-common/goacc/kernels-loop-nest-independent.c

2015-07-15  Tom de Vries  

	* c-c++-common/goacc/kernels-loop-nest-independent.c: New test.

diff --git a/gcc/testsuite/c-c++-common/goacc/kernels-loop-nest-independent.c b/gcc/testsuite/c-c++-common/goacc/kernels-loop-nest-independent.c
new file mode 100644
index 000..4486b6a
--- /dev/null
+++ b/gcc/testsuite/c-c++-common/goacc/kernels-loop-nest-independent.c
@@ -0,0 +1,40 @@
+/* { dg-additional-options "-O2" } */
+/* { dg-additional-options "-ftree-parallelize-loops=32" } */
+/* { dg-additional-options "-fdump-tree-parloops_oacc_kernels-all" } */
+/* { dg-additional-options "-fdump-tree-optimized" } */
+
+/* Based on autopar/outer-1.c.  */
+
+#include 
+
+#define N 1000
+
+int
+main (void)
+{
+  int x[N][N];
+
+#pragma acc kernels copyout (x)
+  {
+#pragma acc loop independent
+for (int ii = 0; ii < N; ii++)
+  for (int jj = 0; jj < N; jj++)
+	x[ii][jj] = ii + jj + 3;
+  }
+
+  for (int i = 0; i < N; i++)
+for (int j = 0; j < N; j++)
+  if (x[i][j] != i + j + 3)
+	abort ();
+
+  return 0;
+}
+
+/* Check that only one loop is analyzed, and that it can be parallelized.  */
+/* { dg-final { scan-tree-dump-times "SUCCESS: may be parallelized, marked independent" 1 "parloops_oacc_kernels" } } */
+/* { dg-final { scan-tree-dump-not "FAILED:" "parloops_oacc_kernels" } } */
+
+/* Check that the loop has been split off into a function.  */
+/* { dg-final { scan-tree-dump-times "(?n);; Function .*main._omp_fn.0" 1 "optimized" } } */
+
+/* { dg-final { scan-tree-dump-times "(?n)pragma omp target oacc_parallel.*num_gangs\\(32\\)" 1 "parloops_oacc_kernels" } } */
-- 
1.9.1



PATCH trunk GCCJIT: adding gcc_jit_context_new_rvalue_from_long_long, etc...

2015-07-15 Thread Basile Starynkevitch

Hello All and David Malcolm

The attached patch (relative to trunk r224842) is adding 
gcc_jit_context_new_rvalue_from_long_long and similar functions to GCCJIT.


It is bootstrapping, but I don't have any test cases 

## gcc/jit/ChangeLog entry:

2015-07-15  Basile Starynkevitch  

* libgccjit.h (gcc_jit_context_new_rvalue_from_long_long)
(gcc_jit_context_new_rvalue_from_int32)
(gcc_jit_context_new_rvalue_from_int64)
(gcc_jit_context_new_rvalue_from_intptr): New function
declarations.

* libgccjit.map: New entries for above functions.

* libgccjit.c (gcc_jit_context_new_rvalue_from_long_long)
(gcc_jit_context_new_rvalue_from_int32)
(gcc_jit_context_new_rvalue_from_int64)
(gcc_jit_context_new_rvalue_from_intptr): New functions.

###

Comments are welcome. Ok for trunk?

see https://gcc.gnu.org/ml/jit/2015-q3/msg00085.html

Regards.

--
Basile STARYNKEVITCH http://starynkevitch.net/Basile/
email: basilestarynkevitchnet mobile: +33 6 8501 2359
8, rue de la Faiencerie, 92340 Bourg La Reine, France
*** opinions {are only mine, sont seulement les miennes} ***

Index: gcc/jit/libgccjit.c
===
--- gcc/jit/libgccjit.c	(revision 225842)
+++ gcc/jit/libgccjit.c	(working copy)
@@ -1154,6 +1154,70 @@ gcc_jit_context_new_rvalue_from_long (gcc_jit_cont
 	  ->new_rvalue_from_const  (numeric_type, value));
 }
 
+/* Public entrypoint.  See description in libgccjit.h.  */
+
+gcc_jit_rvalue *
+gcc_jit_context_new_rvalue_from_long_long (gcc_jit_context *ctxt,
+ gcc_jit_type *numeric_type,
+ long long value)
+{
+  RETURN_NULL_IF_FAIL (ctxt, NULL, NULL, "NULL context");
+  JIT_LOG_FUNC (ctxt->get_logger ());
+  RETURN_NULL_IF_FAIL_NONNULL_NUMERIC_TYPE (ctxt, numeric_type);
+
+  return ((gcc_jit_rvalue *)ctxt
+	  ->new_rvalue_from_const  (numeric_type, value));
+}
+
+
+/* Public entrypoint.  See description in libgccjit.h.  */
+
+gcc_jit_rvalue *
+gcc_jit_context_new_rvalue_from_int32 (gcc_jit_context *ctxt,
+ gcc_jit_type *numeric_type,
+ int32_t value)
+{
+  RETURN_NULL_IF_FAIL (ctxt, NULL, NULL, "NULL context");
+  JIT_LOG_FUNC (ctxt->get_logger ());
+  RETURN_NULL_IF_FAIL_NONNULL_NUMERIC_TYPE (ctxt, numeric_type);
+
+  return ((gcc_jit_rvalue *)ctxt
+	  ->new_rvalue_from_const  (numeric_type, value));
+}
+
+
+/* Public entrypoint.  See description in libgccjit.h.  */
+
+gcc_jit_rvalue *
+gcc_jit_context_new_rvalue_from_int64 (gcc_jit_context *ctxt,
+ gcc_jit_type *numeric_type,
+ int64_t value)
+{
+  RETURN_NULL_IF_FAIL (ctxt, NULL, NULL, "NULL context");
+  JIT_LOG_FUNC (ctxt->get_logger ());
+  RETURN_NULL_IF_FAIL_NONNULL_NUMERIC_TYPE (ctxt, numeric_type);
+
+  return ((gcc_jit_rvalue *)ctxt
+	  ->new_rvalue_from_const  (numeric_type, value));
+}
+
+
+/* Public entrypoint.  See description in libgccjit.h.  */
+
+gcc_jit_rvalue *
+gcc_jit_context_new_rvalue_from_intptr (gcc_jit_context *ctxt,
+ gcc_jit_type *numeric_type,
+ intptr_t value)
+{
+  RETURN_NULL_IF_FAIL (ctxt, NULL, NULL, "NULL context");
+  JIT_LOG_FUNC (ctxt->get_logger ());
+  RETURN_NULL_IF_FAIL_NONNULL_NUMERIC_TYPE (ctxt, numeric_type);
+
+  return ((gcc_jit_rvalue *)ctxt
+	  ->new_rvalue_from_const  (numeric_type, value));
+}
+
+
 /* Public entrypoint.  See description in libgccjit.h.
 
This is essentially equivalent to:
Index: gcc/jit/libgccjit.h
===
--- gcc/jit/libgccjit.h	(revision 225842)
+++ gcc/jit/libgccjit.h	(working copy)
@@ -752,6 +752,26 @@ gcc_jit_context_new_rvalue_from_long (gcc_jit_cont
   long value);
 
 extern gcc_jit_rvalue *
+gcc_jit_context_new_rvalue_from_long_long (gcc_jit_context *ctxt,
+  gcc_jit_type *numeric_type,
+  long long value);
+
+extern gcc_jit_rvalue *
+gcc_jit_context_new_rvalue_from_int32 (gcc_jit_context *ctxt,
+  gcc_jit_type *numeric_type,
+  int32_t value);
+
+extern gcc_jit_rvalue *
+gcc_jit_context_new_rvalue_from_int64 (gcc_jit_context *ctxt,
+  gcc_jit_type *numeric_type,
+  int64_t value);
+
+extern gcc_jit_rvalue *
+gcc_jit_context_new_rvalue_from_intptr (gcc_jit_context *ctxt,
+  gcc_jit_type *numeric_type,
+  intptr_t value);
+
+extern gcc_jit_rvalue *
 gcc_jit_context_zero (gcc_jit_context *ctxt,
 		  gcc_jit_type *numeric_type);
 
Index: gcc/jit/libgccjit.map
===
--- gcc/jit/libgccjit.map	(revision 225842)
+++ gcc/jit/libgccjit.map	(working copy)
@@ -61,7 +61,10 @@ LIBGCCJIT_ABI_0
 gcc_jit_context_new_param;
 gcc_jit_context_new_rvalue_from_double;
 gcc_jit_context_new_rvalue_from_int;
+gcc_jit_context_new_rvalue_from_int32;
+gcc_jit_context_new_rvalue_from_int64;
 gcc_jit_context_new_rvalue_from_long;
+gcc_jit_context_new_rvalue_from_long_long;	
 gcc_jit_context_new_rvalue_from_ptr;
 gcc_j

[gomp] Fix PTX unloading

2015-07-15 Thread Nathan Sidwell
This patch reworks my previous one to just deal with the PTX-specific unloading 
breakage.  I don't change the API between libgomp and the plugins, and fixup the 
PTX plugin to have per-device instance lists of loaded programs.  As with the 
previous patch, we fix an ordering problem by unloading the target images before 
destroying the memory maps containing said images.  Mkoffloads now emits a 
static destructor call.


Ok for gomp4?

nathan
2015-07-15  Nathan Sidwell  

	libgomp/
	* target.c (gomp_offload_image_to_device): Rename to ...
	(gomp_load_image_to_device): ... here.
	(GOMP_offload_register): Adjust call.
	(gomp_init_device): Likewise.
	(gomp_unload_image_from_devuce): New.  Broken out of ...
	(GOMP_offload_unregister): ... here.  Call it.
	(gomp_unload_device): New.
	* libgomp.h (gomp_unload_device): Declare.
	* plugin/plugin-nvptx.c (struct targ_fn_descriptor): Move later.
	(struct ptx_image_data): Move earlier, add fns field.
	(struct ptx_device): Add images and image_lock fields.
	(ptx_images, ptx_image_lock): Delete.
	(nvptx_open_device): Initialize images and image_lock fields.
	(GOMP_OFFLOAD_load_image): Register image to device-specific fields.
	(GOMP_OFFLOAD_unload_image): Unregister from device-specific.
	* oacc-init.c (acc_shutdown_1): Unload from device before deleting
	mem maps.

	gcc/
	* config/nvptx/mkoffload.c (process): Reformat printing.
	Add destructor call.

Index: libgomp/target.c
===
--- libgomp/target.c	(revision 225829)
+++ libgomp/target.c	(working copy)
@@ -647,12 +647,13 @@ gomp_update (struct gomp_device_descr *d
 
 /* Load image pointed by TARGET_DATA to the device, specified by DEVICEP.
And insert to splay tree the mapping between addresses from HOST_TABLE and
-   from loaded target image.  */
+   from loaded target image.  We rely in the host and device compiler
+   emitting variable and functions in the same order.  */
 
 static void
-gomp_offload_image_to_device (struct gomp_device_descr *devicep,
-			  void *host_table, void *target_data,
-			  bool is_register_lock)
+gomp_load_image_to_device (struct gomp_device_descr *devicep,
+			   void *host_table, void *target_data,
+			   bool is_register_lock)
 {
   void **host_func_table = ((void ***) host_table)[0];
   void **host_funcs_end  = ((void ***) host_table)[1];
@@ -667,7 +668,8 @@ gomp_offload_image_to_device (struct gom
   /* Load image to device and get target addresses for the image.  */
   struct addr_pair *target_table = NULL;
   int i, num_target_entries
-= devicep->load_image_func (devicep->target_id, target_data, &target_table);
+= devicep->load_image_func (devicep->target_id, target_data,
+&target_table);
 
   if (num_target_entries != num_funcs + num_vars)
 {
@@ -736,6 +738,59 @@ gomp_offload_image_to_device (struct gom
   free (target_table);
 }
 
+/* Unload the mappings described by target_data from device DEVICE_P.
+   The device must be locked.   */
+
+static void
+gomp_unload_image_from_device (struct gomp_device_descr *devicep,
+			   void *host_table, void *target_data)
+{
+  void **host_func_table = ((void ***) host_table)[0];
+  void **host_funcs_end  = ((void ***) host_table)[1];
+  void **host_var_table  = ((void ***) host_table)[2];
+  void **host_vars_end   = ((void ***) host_table)[3];
+
+  /* The func table contains only addresses, the var table contains addresses
+ and corresponding sizes.  */
+  int num_funcs = host_funcs_end - host_func_table;
+  int num_vars  = (host_vars_end - host_var_table) / 2;
+
+  unsigned j;
+  struct splay_tree_key_s k;
+  splay_tree_key node = NULL;
+
+  /* Find mapping at start of node array */
+  if (num_funcs || num_vars)
+{
+  k.host_start = num_funcs ? (uintptr_t) host_func_table[0] : (uintptr_t) host_var_table[0];
+  k.host_end = k.host_start + 1;
+  node = splay_tree_lookup (&devicep->mem_map, &k);
+}
+  
+  devicep->unload_image_func (devicep->target_id, target_data);
+
+  /* Remove mappings from splay tree.  */
+  for (j = 0; j < num_funcs; j++)
+{
+  k.host_start = (uintptr_t) host_func_table[j];
+  k.host_end = k.host_start + 1;
+  splay_tree_remove (&devicep->mem_map, &k);
+}
+
+  for (j = 0; j < num_vars; j++)
+{
+  k.host_start = (uintptr_t) host_var_table[j * 2];
+  k.host_end = k.host_start + (uintptr_t) host_var_table[j * 2 + 1];
+  splay_tree_remove (&devicep->mem_map, &k);
+}
+
+  if (node)
+{
+  free (node->tgt);
+  free (node);
+}
+}
+
 /* This function should be called from every offload image while loading.
It gets the descriptor of the host func and var tables HOST_TABLE, TYPE of
the target, and TARGET_DATA needed by target plugin.  */
@@ -753,7 +808,7 @@ GOMP_offload_register (void *host_table,
   struct gomp_device_descr *devicep = &devices[i];
   gomp_mutex_lock (&devicep->lock);
   if (devicep->type == target_type && devicep->is_initi

Re: [PATCH] Add 'switch' statement to match.pd language

2015-07-15 Thread Richard Sandiford
Michael Matz  writes:
>> >>  (switch
>> >>   (A) B
>> >>   (B) C
>> >>   (C) D
>> >>   E)
>> >
>> >The lispy way would have been
>> >
>> >   (switch
>> >(A) (B)
>> >(C) (D)
>> >(E) (F)
>> >G)
>> >
>> >i.e. parenthesize the result as well, which then would be unambiguously
>> 
>> That's just atoms vs. Expressions.
>
> But if the result is no atom, you'd want parentheses.  Similar if the 
> condition is no expression but an atom, you'd want to leave out 
> parentheses as well.  My point is, that both condition and result are at 
> the same level, and hence should be subject to the same parenthesis rules, 
> namely: surrounding parens by default, optional for atoms.
>
>> Like (Plus @0 @1) vs. Plain @1.  So you suggest to require ((plus @0 
>> @1)) here to make it unambiguous?
>
> No :)  Just look at your example again:
>
>  (switch
>   (A) B
>  )
>
> Both A and B are at the same level, and are both expressions, but still 
> you parenthesize them differently; that can't be right.  You also don't 
> write
>
>  (switch
>   ((plus @0 @1))  (@0)
>  )
>
> You write
>
>  (switch
>   (plus @0 @1) (@0)
>  )
>
> And as syntactic sugar you are allowed to leave out the parens around @0 
> as it's an atom:
>
>  (switch
>   (plus @0 @1) @0
>  )
>
> Similar, if the condition is an atom you should be able to leave the 
> parens away:
>
>  (switch
>   cond (minus @0 @1)
>  )
>
> (given a predicate 'cond' defined appropriately).

Agreed FWIW.  The rtx equivalent (unfortunately called "cond",
so the clash with "cond"==COND_EXPR prevents naming consistency)
uses the lispy syntax without any ambiguity.

Thanks,
Richard


Re: [PR66726] Factor conversion out of COND_EXPR

2015-07-15 Thread Jeff Law

On 07/15/2015 01:09 AM, Kugan wrote:


2015-07-15  Kugan Vivekanandarajah

PR middle-end/66726
* tree-ssa-reassoc.c (optimize_range_tests): Handle sinking the cast
after PHI.
(final_range_test_p): Detect sinking the cast after PHI.
(maybe_optimize_range_tests): Handle sinking the cast after PHI.

Can we tweak



p.txt


diff --git a/gcc/tree-ssa-reassoc.c b/gcc/tree-ssa-reassoc.c
index 932c83a..3058eb5 100644
--- a/gcc/tree-ssa-reassoc.c
+++ b/gcc/tree-ssa-reassoc.c



  return false;
bb = gimple_bb (stmt);
if (!single_succ_p (bb))
@@ -2729,9 +2743,8 @@ final_range_test_p (gimple stmt)

lhs = gimple_assign_lhs (stmt);
rhs = gimple_assign_rhs1 (stmt);
-  if (!INTEGRAL_TYPE_P (TREE_TYPE (lhs))
-  || TREE_CODE (rhs) != SSA_NAME
-  || TREE_CODE (TREE_TYPE (rhs)) != BOOLEAN_TYPE)
+  if (TREE_CODE (TREE_TYPE (rhs)) != BOOLEAN_TYPE
+  && TREE_CODE (TREE_TYPE (lhs)) != BOOLEAN_TYPE)
  return false;
So you're ensuring that one of the two is a boolean...  Note that 
previously we ensured that the rhs was a boolean and the lhs was an 
integral type (which I believe is true for booleans).


Thus if we had
bool x;
int y;

x = (bool) y;

The old code would have rejected that case.  But I think it gets through 
now, right?


I think once that issue is addressed, this will be good for the trunk.

jeff




Re: [PATCH][DRIVER] Wrong C++ include paths when configuring with "--with-sysroot=/"

2015-07-15 Thread Jeff Law

On 07/15/2015 02:34 AM, Yvan Roux wrote:

Hi,

(Sorry for the delay I'm just back from a long sick leave)


There is this old patch submitted by Matthias on that same issue, if
its logic is the right one for you Joseph I can rebase/validate it
Joseph.

https://gcc.gnu.org/ml/gcc-patches/2012-02/msg00320.html


Yes, that seems better.


I've rebased the patch on trunk, bootstrap is ok and when configuring
with options:
"--with-sysroot=/ --with-gxx-include-dir=/usr/include/c++/4.9.2"
gcc_gxx_include_dir keeps its leading slash.

Is it ok for trunk ?

Thanks,
Yvan


2015-07-15  Yvan Roux  
   Matthias Klose  

* configure.ac: Move AC_ARG_WITH checks for native-system-header-dir,
build-sysroot, sysroot from the `Miscenalleous configure options'
to the `Directories' section and strip trailing `/' from with_sysroot.
(gcc_gxx_include_dir): Don't strip a `/' sysroot value.
* configure: Regenerated.

OK.
jeff



[nvptx] C++ify mkoffloads

2015-07-15 Thread Nathan Sidwell
I've applied this obvious patch to trunk to make mkoffloads work for C++.  The 
equivalent is already on the gomp4 branch.


nathan
2015-07-15  Nathan Sidwell  

	* config/nvptx/mkoffload.c (process): Add C++ protection to
	emitted code.

Index: config/nvptx/mkoffload.c
===
--- config/nvptx/mkoffload.c	(revision 225843)
+++ config/nvptx/mkoffload.c	(working copy)
@@ -867,7 +867,13 @@ process (FILE *in, FILE *out)
 		"func_mappings\n", nvars, nfuncs);
   fprintf (out, "};\n\n");
 
-  fprintf (out, "extern void GOMP_offload_register (const void *, int, void *);\n");
+  fprintf (out, "#ifdef __cplusplus\n"
+	   "extern \"C\" {\n"
+	   "#endif\n");
+  fprintf (out, "extern void GOMP_offload_register (void *, int, void *);\n");
+  fprintf (out, "#ifdef __cplusplus\n"
+	   "}\n"
+	   "#endif\n");
 
   fprintf (out, "extern void *__OFFLOAD_TABLE__[];\n\n");
   fprintf (out, "static __attribute__((constructor)) void init (void)\n{\n");


Re: [gomp] Fix PTX unloading

2015-07-15 Thread Ilya Verbin
On Wed, Jul 15, 2015 at 14:36:45 -0400, Nathan Sidwell wrote:
> -= devicep->load_image_func (devicep->target_id, target_data, 
> &target_table);
> += devicep->load_image_func (devicep->target_id, target_data,
> + &target_table);

It was exactly 80 chars long :)

  -- Ilya


Re: PATCH trunk GCCJIT: adding gcc_jit_context_new_rvalue_from_long_long, etc...

2015-07-15 Thread David Malcolm
On Wed, 2015-07-15 at 20:19 +0200, Basile Starynkevitch wrote:
> Hello All and David Malcolm
> 
> The attached patch (relative to trunk r224842) is adding 
> gcc_jit_context_new_rvalue_from_long_long and similar functions to
> GCCJIT.

Thanks.

[CCing the jit mailing list; please CC patches affecting the jit there.]

Various comments inline throughout.

> It is bootstrapping, but I don't have any test cases 

You don't need to do a full bootstrap for code that just touches the
"jit" subdirectory.

You can use "make check-jit" to run just the jit test suite.  It's
mostly parallelized, so
   make -jN check-jit
for some N is worthwhile.

Currently you ought to get about 8000 "PASS" results in jit.sum, and no
failures.

Please add test coverage for the new API entrypoints to
gcc/testsuite/jit.dg/test-constants.c (which is what tests the analogous
pre-existing entrypoints).

You can run just this one testcase by running:

 make check-jit RUNTESTFLAGS="-v -v -v jit.exp=test-constants.c"

Aside: this isn't the case here, but if you were adding an entirely new
testcase, here are some notes: jit.exp expects jit testcases to begin
with "test-" or "test-error-" (for an testcase that generates an error
on a gcc_jit_context).   New testcases that don't generate errors should
ideally be added to the "testcases" array in
testsuite/jit.dg/all-non-failing-tests.h; this means that, in addition
to being run standalone, they also get run within test-combination.c
(which runs all successful tests inside one big gcc_jit_context), and
test-threads.c (which runs all successful tests in one process, each one
running in a different thread on a different gcc_jit-context).


> ## gcc/jit/ChangeLog entry:
> 
> 2015-07-15  Basile Starynkevitch  
> 
>  * libgccjit.h (gcc_jit_context_new_rvalue_from_long_long)
>  (gcc_jit_context_new_rvalue_from_int32)
>  (gcc_jit_context_new_rvalue_from_int64)
>  (gcc_jit_context_new_rvalue_from_intptr): New function
>  declarations.
> 
>  * libgccjit.map: New entries for above functions.
> 
>  * libgccjit.c (gcc_jit_context_new_rvalue_from_long_long)
>  (gcc_jit_context_new_rvalue_from_int32)
>  (gcc_jit_context_new_rvalue_from_int64)
>  (gcc_jit_context_new_rvalue_from_intptr): New functions.
> 
> ###
> 
> Comments are welcome. Ok for trunk?
> see https://gcc.gnu.org/ml/jit/2015-q3/msg00085.html


Note that these are *host* types; the target type is expressed by the
(gcc_jit_type *) parameter.

Do we actually need all of them?   I suspect that these:

  gcc_jit_context_new_rvalue_from_long_long
  gcc_jit_context_new_rvalue_from_unsigned_long_long

ought to suffice, assuming we can guarantee that
  sizeof (long long) >= sizeof (int64)
and
  sizeof (long long) >= sizeof (intptr_t)
on every host that we care about.

[snip]


> Index: gcc/jit/libgccjit.c
> ===
> --- gcc/jit/libgccjit.c (revision 225842)
> +++ gcc/jit/libgccjit.c (working copy)
> @@ -1154,6 +1154,70 @@ gcc_jit_context_new_rvalue_from_long
> (gcc_jit_cont
>   ->new_rvalue_from_const  (numeric_type, value));
>  }
>  
> +/* Public entrypoint.  See description in libgccjit.h.  */
> +
> +gcc_jit_rvalue *
> +gcc_jit_context_new_rvalue_from_long_long (gcc_jit_context *ctxt,
> +gcc_jit_type *numeric_type,
> +long long value)
> +{
> +  RETURN_NULL_IF_FAIL (ctxt, NULL, NULL, "NULL context");
> +  JIT_LOG_FUNC (ctxt->get_logger ());
> +  RETURN_NULL_IF_FAIL_NONNULL_NUMERIC_TYPE (ctxt, numeric_type);
> +
> +  return ((gcc_jit_rvalue *)ctxt
> + ->new_rvalue_from_const  (numeric_type, value));
> +}
> +
> +
> +/* Public entrypoint.  See description in libgccjit.h.  */
> +
> +gcc_jit_rvalue *
> +gcc_jit_context_new_rvalue_from_int32 (gcc_jit_context *ctxt,
> +gcc_jit_type *numeric_type,
> +int32_t value)
> +{
> +  RETURN_NULL_IF_FAIL (ctxt, NULL, NULL, "NULL context");
> +  JIT_LOG_FUNC (ctxt->get_logger ());
> +  RETURN_NULL_IF_FAIL_NONNULL_NUMERIC_TYPE (ctxt, numeric_type);
> +
> +  return ((gcc_jit_rvalue *)ctxt
> + ->new_rvalue_from_const  (numeric_type, value));
> +}
> +
> +
> +/* Public entrypoint.  See description in libgccjit.h.  */
> +
> +gcc_jit_rvalue *
> +gcc_jit_context_new_rvalue_from_int64 (gcc_jit_context *ctxt,
> +gcc_jit_type *numeric_type,
> +int64_t value)
> +{
> +  RETURN_NULL_IF_FAIL (ctxt, NULL, NULL, "NULL context");
> +  JIT_LOG_FUNC (ctxt->get_logger ());
> +  RETURN_NULL_IF_FAIL_NONNULL_NUMERIC_TYPE (ctxt, numeric_type);
> +
> +  return ((gcc_jit_rvalue *)ctxt
> + ->new_rvalue_from_const  (numeric_type, value));
> +}
> +
> +
> +/* Public entrypoint.  See description in libgccjit.h.  */
> +
> +gcc_jit_rvalue *
> +gcc_jit_context_new_rvalue_from_intptr (gcc_jit_context *ctxt,
> +  

Re: [PATCH][4/n] Remove GENERIC stmt combining from SCCVN

2015-07-15 Thread Jeff Law

On 07/14/2015 05:37 AM, Richard Biener wrote:

On Tue, 14 Jul 2015, Richard Biener wrote:


On Mon, 13 Jul 2015, Jeff Law wrote:


2015-07-13  Richard Biener  

* tree-ssa-dom.c (record_temporary_equivalences): Merge
wideing type conversion case from
record_equivalences_from_incoming_edge
and use record_equality to record equivalences.
(record_equivalences_from_incoming_edge): Call
record_temporary_equivalences.

Yea, if testing is clean, that's OK.  Ought to be easier to then add code to
handle looking at the uses of X to see if they create an equivalence for the
destination of those use statements.


Applied.  The following patch adds the equivalences for the destination
of use stmts if they simplify.


Actually I can't use FOR_EACH_IMM_USE_STMT any longer because
record_equivalence ends up calling has_single_use which doens't
handle the special marker FOR_EACH_IMM_USE_STMT inserts.

Thus the following - bootstrapped and tested on x86_64-unknown-linux-gnu.

Ok?

Thanks,
Richard.

2015-07-14  Richard Biener  

* tree-ssa-dom.c (dom_valueize): New function.
(record_temporary_equivalences): Also record equivalences
for dominating stmts that have uses of equivalences we are
about to record.

OK.
jeff


fixup gomp register/unregister prototypes

2015-07-15 Thread Nathan Sidwell
I'm almost tempted to commit as obvious.  I noticed that the callers of these 
functions from code generated by mkoffload declare the 2nd arg as an int, 
because they have no visibility of the enum.  I thought it wise to make the 
definitions match.


ok for trunk?

nathan
2015-07-15  Nathan Sidwell  

	* target.c (GOMP_offload_register): Use int for device type arg.
	(GOMP_offload_unregister): Likewise.

Index: target.c
===
--- target.c	(revision 225843)
+++ target.c	(working copy)
@@ -727,10 +727,11 @@ gomp_offload_image_to_device (struct gom
 
 /* This function should be called from every offload image while loading.
It gets the descriptor of the host func and var tables HOST_TABLE, TYPE of
-   the target, and TARGET_DATA needed by target plugin.  */
+   the target, and TARGET_DATA needed by target plugin.  TYPE is an
+   int because the caller is not exposed to the enum definition.   */
 
 void
-GOMP_offload_register (void *host_table, enum offload_target_type target_type,
+GOMP_offload_register (void *host_table, int target_type,
 		   void *target_data)
 {
   int i;
@@ -764,7 +765,7 @@ GOMP_offload_register (void *host_table,
the target, and TARGET_DATA needed by target plugin.  */
 
 void
-GOMP_offload_unregister (void *host_table, enum offload_target_type target_type,
+GOMP_offload_unregister (void *host_table, int target_type,
 			 void *target_data)
 {
   void **host_func_table = ((void ***) host_table)[0];


Re: [PATCH][4/n] Remove GENERIC stmt combining from SCCVN

2015-07-15 Thread Andrew MacLeod

On 07/15/2015 03:01 PM, Jeff Law wrote:

On 07/14/2015 05:37 AM, Richard Biener wrote:

On Tue, 14 Jul 2015, Richard Biener wrote:



Applied.  The following patch adds the equivalences for the destination
of use stmts if they simplify.


Actually I can't use FOR_EACH_IMM_USE_STMT any longer because
record_equivalence ends up calling has_single_use which doens't
handle the special marker FOR_EACH_IMM_USE_STMT inserts.


hmm, that is unfortunate... and seems almost like a  bug to me... Maybe 
that should be fixed?


And doesn't that also mean num_imm_uses() has a latent bug if used 
during a FOR_EACH_IMM_USE_STMT ?


admittedly neither situation is very common I suspect, but it does seem 
like a hidden gotchya waiting to happen.


Andrew




Re: PATCH trunk GCCJIT: adding gcc_jit_context_new_rvalue_from_long_long, etc...

2015-07-15 Thread Basile Starynkevitch

On 07/15/2015 20:52, David Malcolm wrote:

On Wed, 2015-07-15 at 20:19 +0200, Basile Starynkevitch wrote:

Hello All and David Malcolm

The attached patch (relative to trunk r224842) is adding
gcc_jit_context_new_rvalue_from_long_long and similar functions to
GCCJIT.
Does this actually link and run? This appears to be missing some 
implementations of the template specializations in jit/jit-recording.c 
for the new specializations of new_rvalue_from_const. If these are 
missing, I'd expect to see a linker error at run-time when attempting 
to run client code that links against such a libgccjit.so. 


It does bootstrap (in the GCC sense). I suspect that C++ integral 
promotion or casting rules are enough to have something being linked, 
but probably not what is really needed. And I'm testing that on 
x86-64/Linux where the patch is almost useless.


Thanks for your other comments. I'm trying to understand them and I am 
working on that.


Cheers

--
Basile STARYNKEVITCH http://starynkevitch.net/Basile/
email: basilestarynkevitchnet mobile: +33 6 8501 2359
8, rue de la Faiencerie, 92340 Bourg La Reine, France
*** opinions {are only mine, sont seulement les miennes} ***



Re: PATCH trunk GCCJIT: adding gcc_jit_context_new_rvalue_from_long_long, etc...

2015-07-15 Thread David Malcolm
On Wed, 2015-07-15 at 21:15 +0200, Basile Starynkevitch wrote:
> On 07/15/2015 20:52, David Malcolm wrote:
> > On Wed, 2015-07-15 at 20:19 +0200, Basile Starynkevitch wrote:
> >> Hello All and David Malcolm
> >>
> >> The attached patch (relative to trunk r224842) is adding
> >> gcc_jit_context_new_rvalue_from_long_long and similar functions to
> >> GCCJIT.
> > Does this actually link and run? This appears to be missing some 
> > implementations of the template specializations in jit/jit-recording.c 
> > for the new specializations of new_rvalue_from_const. If these are 
> > missing, I'd expect to see a linker error at run-time when attempting 
> > to run client code that links against such a libgccjit.so. 
> 
> It does bootstrap (in the GCC sense). I suspect that C++ integral 
> promotion or casting rules are enough to have something being linked, 
> but probably not what is really needed. 

Perhaps, but note that nothing in a regular gcc bootstrap uses
libgccjit, so you *might* still have a latent linking error that shows
up only at run time.   Running the jit testsuite is the best way to be
sure.

> And I'm testing that on 
> x86-64/Linux where the patch is almost useless.
> 
> Thanks for your other comments. I'm trying to understand them and I am 
> working on that.
> 
> Cheers
> 




[PATCH, PR66846] Mark inner loop for fixup in parloops

2015-07-15 Thread Tom de Vries

Hi,

I.

In openmp expansion of loops, we do some effort to try to create 
matching loops in the loop state of the child function, f.i.in 
expand_omp_for_generic:

...
  struct loop *outer_loop;
  if (seq_loop)
outer_loop = l0_bb->loop_father;
  else
{
  outer_loop = alloc_loop ();
  outer_loop->header = l0_bb;
  outer_loop->latch = l2_bb;
  add_loop (outer_loop, l0_bb->loop_father);
}

  if (!gimple_omp_for_combined_p (fd->for_stmt))
{
  struct loop *loop = alloc_loop ();
  loop->header = l1_bb;
  /* The loop may have multiple latches.  */
  add_loop (loop, outer_loop);
}
...

And if that doesn't work out, we try to mark the loop state for fixup, 
in expand_omp_taskreg and expand_omp_target:

...
  /* When the OMP expansion process cannot guarantee an up-to-date
 loop tree arrange for the child function to fixup loops.  */
  if (loops_state_satisfies_p (LOOPS_NEED_FIXUP))
child_cfun->x_current_loops->state |= LOOPS_NEED_FIXUP;
...

and expand_omp_for:
...
  else
/* If there isn't a continue then this is a degerate case where
   the introduction of abnormal edges during lowering will prevent
   original loops from being detected.  Fix that up.  */
loops_state_set (LOOPS_NEED_FIXUP);
...

However, loops are fixed up anyway, because the first pass we execute 
with the new child function is pass_fixup_cfg.


The new child function contains a function call to 
__builtin_omp_get_num_threads, which is marked with ECF_CONST, so 
execute_fixup_cfg marks the function for TODO_cleanup_cfg, and 
subsequently the loops with LOOPS_NEED_FIXUP.



II.

This patch adds a verification that at the end of the omp-expand 
processing of the child function, either the loop structure is ok, or 
marked for fixup.


This verfication triggered a failure in parloops. When an outer loop is 
being parallelized, both the outer and inner loop are cancelled. Then 
during omp-expansion, we create a loop in the loop state for the outer 
loop (the one that is transformed), but not for the inner, which causes 
the verification failure:

...
outer-1.c:11:3: error: loop with header 5 not in loop tree
...

[ I ran into this verification failure with an openacc kernels testcase 
on the gomp-4_0-branch, where parloops is called additionally from a 
different location, and pass_fixup_cfg is not the first pass that the 
child function is processed by. ]


The patch contains a bit that makes sure that the loop state of the 
child function is marked for fixup in parloops. The bit is non-trival 
since it create a loop state and sets the fixup flag on the loop state, 
but postpones the init_loops_structure call till move_sese_region_to_fn, 
where it can succeed.



III.

Bootstrapped and reg-tested on x86_64.

OK for trunk?

Thanks,
- Tom
Mark inner loop for fixup in parloops

2015-07-13  Tom de Vries  

	PR tree-optimization/66846
	* omp-low.c (expand_omp_taskreg) [ENABLE_CHECKING]: If
	!LOOPS_NEED_FIXUP, verify_loop_structure in child_fn.
	(expand_omp_target) [ENABLE_CHECKING]: Same.
	* tree-cfg.c (move_sese_region_to_fn): Only allocate struct loops if
	necessary.  Preserve dest_cfun->x_current_loops->state while calling
	init_loops_structure.
	* tree-parloops.c (gen_parallel_loop): If inner loop is cancelled,
	allocate struct loop and mark child_fn for loop fixup.
---
 gcc/omp-low.c   |  8 
 gcc/tree-cfg.c  | 26 +-
 gcc/tree-parloops.c | 10 +-
 3 files changed, 38 insertions(+), 6 deletions(-)

diff --git a/gcc/omp-low.c b/gcc/omp-low.c
index 0e69bc2..64d9742 100644
--- a/gcc/omp-low.c
+++ b/gcc/omp-low.c
@@ -5603,6 +5603,10 @@ expand_omp_taskreg (struct omp_region *region)
 	}
   if (gimple_in_ssa_p (cfun))
 	update_ssa (TODO_update_ssa);
+#ifdef ENABLE_CHECKING
+  if (!loops_state_satisfies_p (LOOPS_NEED_FIXUP))
+	verify_loop_structure ();
+#endif
   pop_cfun ();
 }
 
@@ -8983,6 +8987,10 @@ expand_omp_target (struct omp_region *region)
 	  if (changed)
 	cleanup_tree_cfg ();
 	}
+#ifdef ENABLE_CHECKING
+  if (!loops_state_satisfies_p (LOOPS_NEED_FIXUP))
+	verify_loop_structure ();
+#endif
   pop_cfun ();
 }
 
diff --git a/gcc/tree-cfg.c b/gcc/tree-cfg.c
index d97b824..6b415fe 100644
--- a/gcc/tree-cfg.c
+++ b/gcc/tree-cfg.c
@@ -7126,11 +7126,27 @@ move_sese_region_to_fn (struct function *dest_cfun, basic_block entry_bb,
 	}
 }
 
-  /* Initialize an empty loop tree.  */
-  struct loops *loops = ggc_cleared_alloc ();
-  init_loops_structure (dest_cfun, loops, 1);
-  loops->state = LOOPS_MAY_HAVE_MULTIPLE_LATCHES;
-  set_loops_for_fn (dest_cfun, loops);
+  struct loops *loops;
+  int loop_state_flags = 0;
+  if (dest_cfun->x_current_loops == NULL)
+{
+  /* Initialize an empty loop tree.  */
+  loops = ggc_cleared_alloc ();
+  set_loops_for_fn (dest_cfun, loops);
+}
+  else
+{
+  loo

  1   2   >