[PATCH i386 AVX512] [28/n] Add si/di masked interleave.

2014-08-28 Thread Kirill Yukhin
Hello,
This patch adds support for SI/DO masked interleaves.

Bootstrapped.
AVX-512* tests on top of patch-set all pass
under simulator.

Is it ok for trunk?

gcc/
* config/i386/sse.md
(define_insn "avx2_interleave_highv4di"): Add masking.
(define_insn "vec_interleave_highv2di"): Ditto.
(define_insn "avx2_interleave_lowv4di"): Ditto.
(define_insn "vec_interleave_lowv2di"): Ditto.

--
Thanks, K

diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index c602eeb..c9931b4 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -7515,18 +7515,18 @@
 })
 
 ;; punpcklqdq and punpckhqdq are shorter than shufpd.
-(define_insn "avx2_interleave_highv4di"
-  [(set (match_operand:V4DI 0 "register_operand" "=x")
+(define_insn "avx2_interleave_highv4di"
+  [(set (match_operand:V4DI 0 "register_operand" "=v")
(vec_select:V4DI
  (vec_concat:V8DI
-   (match_operand:V4DI 1 "register_operand" "x")
-   (match_operand:V4DI 2 "nonimmediate_operand" "xm"))
+   (match_operand:V4DI 1 "register_operand" "v")
+   (match_operand:V4DI 2 "nonimmediate_operand" "vm"))
  (parallel [(const_int 1)
 (const_int 5)
 (const_int 3)
 (const_int 7)])))]
-  "TARGET_AVX2"
-  "vpunpckhqdq\t{%2, %1, %0|%0, %1, %2}"
+  "TARGET_AVX2 && "
+  "vpunpckhqdq\t{%2, %1, %0|%0, %1, %2}"
   [(set_attr "type" "sselog")
(set_attr "prefix" "vex")
(set_attr "mode" "OI")])
@@ -7547,36 +7547,36 @@
(set_attr "prefix" "evex")
(set_attr "mode" "XI")])
 
-(define_insn "vec_interleave_highv2di"
-  [(set (match_operand:V2DI 0 "register_operand" "=x,x")
+(define_insn "vec_interleave_highv2di"
+  [(set (match_operand:V2DI 0 "register_operand" "=x,v")
(vec_select:V2DI
  (vec_concat:V4DI
-   (match_operand:V2DI 1 "register_operand" "0,x")
-   (match_operand:V2DI 2 "nonimmediate_operand" "xm,xm"))
+   (match_operand:V2DI 1 "register_operand" "0,v")
+   (match_operand:V2DI 2 "nonimmediate_operand" "xm,vm"))
  (parallel [(const_int 1)
 (const_int 3)])))]
-  "TARGET_SSE2"
+  "TARGET_SSE2 && "
   "@
punpckhqdq\t{%2, %0|%0, %2}
-   vpunpckhqdq\t{%2, %1, %0|%0, %1, %2}"
+   vpunpckhqdq\t{%2, %1, %0|%0, %1, %2}"
   [(set_attr "isa" "noavx,avx")
(set_attr "type" "sselog")
(set_attr "prefix_data16" "1,*")
-   (set_attr "prefix" "orig,vex")
+   (set_attr "prefix" "orig,")
(set_attr "mode" "TI")])
 
-(define_insn "avx2_interleave_lowv4di"
-  [(set (match_operand:V4DI 0 "register_operand" "=x")
+(define_insn "avx2_interleave_lowv4di"
+  [(set (match_operand:V4DI 0 "register_operand" "=v")
(vec_select:V4DI
  (vec_concat:V8DI
-   (match_operand:V4DI 1 "register_operand" "x")
-   (match_operand:V4DI 2 "nonimmediate_operand" "xm"))
+   (match_operand:V4DI 1 "register_operand" "v")
+   (match_operand:V4DI 2 "nonimmediate_operand" "vm"))
  (parallel [(const_int 0)
 (const_int 4)
 (const_int 2)
 (const_int 6)])))]
-  "TARGET_AVX2"
-  "vpunpcklqdq\t{%2, %1, %0|%0, %1, %2}"
+  "TARGET_AVX2 && "
+  "vpunpcklqdq\t{%2, %1, %0|%0, %1, %2}"
   [(set_attr "type" "sselog")
(set_attr "prefix" "vex")
(set_attr "mode" "OI")])
@@ -7597,18 +7597,18 @@
(set_attr "prefix" "evex")
(set_attr "mode" "XI")])
 
-(define_insn "vec_interleave_lowv2di"
-  [(set (match_operand:V2DI 0 "register_operand" "=x,x")
+(define_insn "vec_interleave_lowv2di"
+  [(set (match_operand:V2DI 0 "register_operand" "=x,v")
(vec_select:V2DI
  (vec_concat:V4DI
-   (match_operand:V2DI 1 "register_operand" "0,x")
-   (match_operand:V2DI 2 "nonimmediate_operand" "xm,xm"))
+   (match_operand:V2DI 1 "register_operand" "0,v")
+   (match_operand:V2DI 2 "nonimmediate_operand" "xm,vm"))
  (parallel [(const_int 0)
 (const_int 2)])))]
-  "TARGET_SSE2"
+  "TARGET_SSE2 && "
   "@
punpcklqdq\t{%2, %0|%0, %2}
-   vpunpcklqdq\t{%2, %1, %0|%0, %1, %2}"
+   vpunpcklqdq\t{%2, %1, %0|%0, %1, %2}"
   [(set_attr "isa" "noavx,avx")
(set_attr "type" "sselog")
(set_attr "prefix_data16" "1,*")


Re: [PATCH 2/2] Enable elimination of zext/sext

2014-08-28 Thread Kugan


On 28/08/14 16:44, Marc Glisse wrote:
> On Thu, 28 Aug 2014, Kugan wrote:
> 
>> On 27/08/14 23:02, Kugan wrote:
>>> On 27/08/14 20:01, Uros Bizjak wrote:
 Hello!

> 2014-08-07  Kugan Vivekanandarajah  
>
> * calls.c (precompute_arguments): Check
> promoted_for_signed_and_unsigned_p and set the promoted mode.
> (promoted_for_signed_and_unsigned_p): New function.
> (expand_expr_real_1): Check promoted_for_signed_and_unsigned_p
> and set the promoted mode.
> * expr.h (promoted_for_signed_and_unsigned_p): New function
> definition.
> * cfgexpand.c (expand_gimple_stmt_1): Call emit_move_insn if
> SUBREG is promoted with SRP_SIGNED_AND_UNSIGNED.

 This patch regresses:

 Running target unix
 FAIL: libgomp.fortran/simd7.f90   -O2  execution test
 FAIL: libgomp.fortran/simd7.f90   -Os  execution test

>>>
>>> [snip]
>>>
 When compiling this code, we have:

 lhs = _63
 target = (subreg/s/v/u:SI (reg:DI 145 [ D.1694 ]) 0)
 temp = (subreg:SI (reg:DI 540) 0)

 So, the code assumes that it is possible to copy (reg:DI 540) directly
 to (reg:DI 154). However, this is not the case, since we still have
 garbage in the top 32bits.

 Reverting the part above fixes the runtime failure, since (insn 599)
 is now:

 (insn 599 598 0 (set (reg:DI 145 [ D.1694 ])
 (zero_extend:DI (subreg:SI (reg:DI 540) 0))) -1
  (nil))

 It looks to me that we have also to check the temp with
 SUBREG_PROMOTED_*.
>>>
>>> Sorry for the breakage. I am looking into this now and I can reproduce
>>> it on qemu-alpha.
>>>
>>> I have noticed the following VRP data which is used in deciding this
>>> erroneous removal. It seems suspicious to me.
>>>
>>> _343: [2147483652, 2147483715]
>>> _344: [8, 134]
>>> _345: [8, 134]
>>>
>>> _343 = ivtmp.179_52 + 2147483645;
>>> _344 = _343 * 2;
>>> _345 = (integer(kind=4)) _344;
>>>
>>> Error comes from the third statement.
>>
>> In tree-vrp.c, in extract_range_from_binary_expr_1, there is a loss of
>> precision and the value_range is truncated. For the test-case provided
>> by Uros, it is
>>
>> _344 = _343 * 2;
>> [...,0x10008], precision = 384
>> [...,0x10086], precision = 384
>>
>> and it is converted to following when it goes from wide_int to tree.
>> [8, 134]
> 
> Why do you believe that is wrong? Assuming _344 has a 32 bit type with
> wrapping overflow, this is just doing the wrapping modulo 2^32.
> 

Indeed. I missed the TYPE_OVERFLOW_WRAPS check earlier. Thanks for
pointing me to that.

Kugan


[Ping v2][PATCH] Add patch for debugging compiler ICEs.

2014-08-28 Thread Maxim Ostapenko

Ping. Add Joseph S. Myers as driver maintainer.

-Maxim
 Original Message 
Subject:Fwd: [PATCH] Add patch for debugging compiler ICEs.
Date:   Tue, 19 Aug 2014 17:57:51 +0400
From:   Maxim Ostapenko 
To: Jeff Law , GCC Patches 
CC: 	tsaund...@mozilla.com, Yury Gribov , Slava 
Garbuzov , Maxim Ostapenko 




Ping.

-Maxim
 Original Message 
Subject:[PATCH] Add patch for debugging compiler ICEs.
Date:   Mon, 04 Aug 2014 21:03:22 +0400
From:   Maxim Ostapenko 
To: GCC Patches 
CC: Jeff Law , Jakub Jelinek ,
tsaund...@mozilla.com, Yury Gribov , Slava
Garbuzov 



Hi,

A years ago there was a discussion
(https://gcc.gnu.org/ml/gcc-patches/2004-01/msg02437.html) about
debugging compiler ICEs that resulted in a patch from Jakub, which dumps
useful information into temporary file, but for some reasons this patch
wasn't applied to trunk.

This is the resurrected patch with added GCC version information into
generated repro file.

I've updated the patch that I've posted earlier
(https://gcc.gnu.org/ml/gcc-patches/2014-07/msg01649.html ) according to
recent upstream discussion
(https://gcc.gnu.org/ml/gcc-patches/2014-08/msg00020.html).

The debugging functionality is disabled by default and can be enabled
with adding -freport-bug into compile options. It can be also enabled by
default with
--with-spec during GCC build.

There are several directions in which this can be improved e.g:

1) more user-friendly ways to report bugs (autosubmitting to Bugzilla, etc.)

2) generate repro in case of segfault.

but having basic functionality (autogenerating reprocase in temprorary
file) already seems quite useful.

-Maxim






2014-08-04  Jakub Jelinek  
	  Max Ostapenko  

	* common.opt: New option.
	* doc/invoke.texi: Describe new option.
	* diagnostic.c (diagnostic_action_after_output): Exit with
	ICE_EXIT_CODE instead of FATAL_EXIT_CODE.
	* gcc.c (execute): Don't free first string early, but at the end
	of the function.  Call retry_ice if compiler exited with
	ICE_EXIT_CODE.
	(main): Factor out common code.
	(print_configuration): New function.
	(try_fork): Likewise.
	(redirect_stdout_stderr): Likewise.
	(files_equal_p): Likewise.
	(check_repro): Likewise.
	(run_attempt): Likewise.
	(do_report_bug): Likewise.
	(append_text): Likewise.
	(try_generate_repro): Likewise

diff --git a/gcc/common.opt b/gcc/common.opt
index 0c4f86b..aa79250 100644
--- a/gcc/common.opt
+++ b/gcc/common.opt
@@ -1120,6 +1120,11 @@ fdump-noaddr
 Common Report Var(flag_dump_noaddr)
 Suppress output of addresses in debugging dumps
 
+freport-bug
+Common Driver Var(flag_report_bug)
+Collect and dump debug information into temporary file if ICE in C/C++
+compiler occured.
+
 fdump-passes
 Common Var(flag_dump_passes) Init(0)
 Dump optimization passes
diff --git a/gcc/diagnostic.c b/gcc/diagnostic.c
index 0cc7593..67b8c5b 100644
--- a/gcc/diagnostic.c
+++ b/gcc/diagnostic.c
@@ -492,7 +492,7 @@ diagnostic_action_after_output (diagnostic_context *context,
 	real_abort ();
   diagnostic_finish (context);
   fnotice (stderr, "compilation terminated.\n");
-  exit (FATAL_EXIT_CODE);
+  exit (ICE_EXIT_CODE);
 
 default:
   gcc_unreachable ();
diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
index 4f327df..dafb573 100644
--- a/gcc/doc/invoke.texi
+++ b/gcc/doc/invoke.texi
@@ -6271,6 +6271,11 @@ feasible to use diff on debugging dumps for compiler invocations with
 different compiler binaries and/or different
 text / bss / data / heap / stack / dso start locations.
 
+@item -freport-bug
+@opindex freport-bug
+Collect and dump debug information into temporary file if ICE in C/C++
+compiler occured.
+
 @item -fdump-unnumbered
 @opindex fdump-unnumbered
 When doing debugging dumps, suppress instruction numbers and address output.
diff --git a/gcc/gcc.c b/gcc/gcc.c
index 44d0416..f7a56d1 100644
--- a/gcc/gcc.c
+++ b/gcc/gcc.c
@@ -43,6 +43,13 @@ compilation is specified by a string called a "spec".  */
 #include "params.h"
 #include "vec.h"
 #include "filenames.h"
+#ifdef HAVE_UNISTD_H
+#include 
+#endif
+
+#if !(defined (__MSDOS__) || defined (OS2) || defined (VMS))
+#define RETRY_ICE_SUPPORTED
+#endif
 
 /* By default there is no special suffix for target executables.  */
 /* FIXME: when autoconf is fixed, remove the host check - dj */
@@ -253,6 +260,9 @@ static void init_gcc_specs (struct obstack *, const char *, const char *,
 static const char *convert_filename (const char *, int, int);
 #endif
 
+#ifdef RETRY_ICE_SUPPORTED
+static void try_generate_repro (const char *prog, const char **argv);
+#endif
 static const char *getenv_spec_function (int, const char **);
 static const char *if_exists_spec_function (int, const char **);
 static const char *if_exists_else_spec_function (int, const char **);
@@ -2849,7 +2859,7 @@ execute (void)
 	}
 	}
 
-  if (string != commands[i].prog)
+  if (i && string != commands[i].prog)
 	free (CONST_CAST (char *, string));
 

Re: [PATCH 2/2] Enable elimination of zext/sext

2014-08-28 Thread Kugan


On 27/08/14 20:07, Richard Biener wrote:
> On Wed, Aug 27, 2014 at 12:01 PM, Uros Bizjak  wrote:
>> Hello!
>>
>>> 2014-08-07  Kugan Vivekanandarajah  
>>>
>>> * calls.c (precompute_arguments): Check
>>> promoted_for_signed_and_unsigned_p and set the promoted mode.
>>> (promoted_for_signed_and_unsigned_p): New function.
>>> (expand_expr_real_1): Check promoted_for_signed_and_unsigned_p
>>> and set the promoted mode.
>>> * expr.h (promoted_for_signed_and_unsigned_p): New function definition.
>>> * cfgexpand.c (expand_gimple_stmt_1): Call emit_move_insn if
>>> SUBREG is promoted with SRP_SIGNED_AND_UNSIGNED.
>>
>> This patch regresses:
>>
>> Running target unix
>> FAIL: libgomp.fortran/simd7.f90   -O2  execution test
>> FAIL: libgomp.fortran/simd7.f90   -Os  execution test
>>
>> on alphaev6-linux-gnu.
>>
>> The problem can be illustrated with attached testcase with a
>> crosscompiler to alphaev68-linux-gnu (-O2 -fopenmp). The problem is in
>> missing SImode extension after DImode shift of SImode subregs for this
>> part:
>>
>> --cut here--
>>   # test.23_12 = PHI <0(37), 1(36)>
>>   _242 = ivtmp.181_73 + 2147483645;
>>   _240 = _242 * 2;
>>   _63 = (integer(kind=4)) _240;
>>   if (ubound.6_99 <= 2)
>> goto ;
>>   else
>> goto ;
>> ;;succ:   39
>> ;;40
>>
>> ;;   basic block 39, loop depth 1
>> ;;pred:   38
>>   pretmp_337 = test.23_12 | l_76;
>>   goto ;
>> ;;succ:   45
>>
>> ;;   basic block 40, loop depth 1
>> ;;pred:   38
>>   _11 = *c_208[0];
>>   if (_11 != _63)
>> goto ;
>>   else
>> goto ;
>> --cut here--
>>
>> this expands to:
>>
>> (code_label 592 591 593 35 "" [0 uses])
>>
>> (note 593 592 0 NOTE_INSN_BASIC_BLOCK)
>>
>> ;; _63 = (integer(kind=4)) _240;
>>
>> (insn 594 593 595 (set (reg:SI 538)
>> (const_int 1073741824 [0x4000])) -1
>>  (nil))
>>
>> (insn 595 594 596 (set (reg:SI 539)
>> (plus:SI (reg:SI 538)
>> (const_int 1073741824 [0x4000]))) -1
>>  (nil))
>>
>> (insn 596 595 597 (set (reg:SI 537)
>> (plus:SI (reg:SI 539)
>> (const_int -3 [0xfffd]))) -1
>>  (expr_list:REG_EQUAL (const_int 2147483645 [0x7ffd])
>> (nil)))
>>
>> (insn 597 596 598 (set (reg:SI 536 [ D.1700 ])
>> (plus:SI (subreg/s/v/u:SI (reg:DI 144 [ ivtmp.181 ]) 0)
>> (reg:SI 537))) -1
>>  (nil))
>>
>> (insn 598 597 599 (set (reg:DI 540)
>> (ashift:DI (subreg:DI (reg:SI 536 [ D.1700 ]) 0)
>> (const_int 1 [0x1]))) -1
>>  (nil))
>>
>> (insn 599 598 0 (set (reg:DI 145 [ D.1694 ])
>> (reg:DI 540)) -1
>>  (nil))
>>
>> ...
>>
>> (note 610 609 0 NOTE_INSN_BASIC_BLOCK)
>>
>> ;; _11 = *c_208[0];
>>
>> (insn 611 610 0 (set (reg:DI 120 [ D.1694 ])
>> (sign_extend:DI (mem:SI (reg/v/f:DI 227 [ c ]) [7 *c_208+0 S4
>> A128]))) simd7.f90:12 -1
>>  (nil))
>>
>> ;; if (_11 != _63)
>>
>> (insn 612 611 613 40 (set (reg:DI 545)
>> (eq:DI (reg:DI 120 [ D.1694 ])
>> (reg:DI 145 [ D.1694 ]))) simd7.f90:12 -1
>>  (nil))
>>
>> (jump_insn 613 612 616 40 (set (pc)
>> (if_then_else (eq (reg:DI 545)
>> (const_int 0 [0]))
>> (label_ref 0)
>> (pc))) simd7.f90:12 -1
>>  (int_list:REG_BR_PROB 450 (nil)))
>>
>> which results in following asm:
>>
>> $L35:
>> addl $25,$7,$2 # 597addsi3/1[length = 4]
>> addq $2,$2,$2 # 598ashldi3/1[length = 4] <-- here
>> bne $24,$L145 # 601*bcc_normal[length = 4]
>> lda $4,4($20) # 627*adddi_internal/2[length = 4]
>> ldl $8,0($20) # 611*extendsidi2_1/2[length = 4]
>> lda $3,3($31) # 74*movdi/2[length = 4]
>> cmpeq $8,$2,$2 # 612*setcc_internal[length = 4]  <-- compare
>> bne $2,$L40 # 613*bcc_normal[length = 4]
>> br $31,$L88 # 2403jump[length = 4]
>> .align 4
>> ...
>>
>> Tracking the values with the debugger shows wrong calculation:
>>
>>0x00012000108c <+1788>:  addlt10,t12,t1
>>0x000120001090 <+1792>:  addqt1,t1,t1
>>...
>>0x0001200010a4 <+1812>:  cmpeq   t6,t1,t1
>>0x0001200010a8 <+1816>:  bne t1,0x1200010c0 
>>
>> (gdb) si
>> 0x00012000108c  17  l = l .or. any (b /= 7 + i)
>> (gdb) i r t10 t12
>> t100x7  7
>> t120x7ffd   2147483645
>>
>> (gdb) si
>> 0x000120001090  17  l = l .or. any (b /= 7 + i)
>> (gdb) i r t1
>> t1 0x8004   -2147483644
>>
>> (gdb) si
>> 18  l = l .or. any (c /= 8 + 2 * i)
>> (gdb) i r t1
>> t1 0x0008   -4294967288
>>
>> At this point, the calculation should zero-extend SImode value to full
>> DImode, since compare operates on DImode values. The problematic insn
>> is (insn 599), which is now a DImode assignment instead of
>> zero-extend, due to:
>>
>> --- a/gcc/cfgexpand.c
>> +++ b

Re: [PATCH i386 AVX512] [28/n] Add si/di masked interleave.

2014-08-28 Thread Uros Bizjak
On Thu, Aug 28, 2014 at 9:05 AM, Kirill Yukhin  wrote:
> Hello,
> This patch adds support for SI/DO masked interleaves.
>
> Bootstrapped.
> AVX-512* tests on top of patch-set all pass
> under simulator.
>
> Is it ok for trunk?
>
> gcc/
> * config/i386/sse.md
> (define_insn "avx2_interleave_highv4di"): Add masking.
> (define_insn "vec_interleave_highv2di"): Ditto.
> (define_insn "avx2_interleave_lowv4di"): Ditto.
> (define_insn "vec_interleave_lowv2di"): Ditto.

It looks that latest patches all extend patterns in the same way. I
have looked briefly through the patch and rubberstamped it as OK.

OK.

Thanks,
Uros.


Re: [PING^3] Re: [PATCH 1/2] Add -B support to gcc-ar/ranlib/nm

2014-08-28 Thread Richard Biener
On Wed, Aug 27, 2014 at 3:45 PM, Andi Kleen  wrote:
> Andi Kleen  writes:
>
> PING!
>
>> Andi Kleen  writes:
>>
>> PING^2 !
>>
>> Would be nice to make slim bootstrap work, it really speeds it up quite
>> a bit.
>>
>>> From: Andi Kleen 
>>>
>>> To use gcc-{ar,ranlib} for boot strap we need to add a -B option
>>> to the tool. Since ar has weird and unusual argument conventions
>>> implement the code by hand instead of using any libraries.
>>>
>>> v2: Fix typo
>>>
>>> gcc/:
>>>
>>> 2014-08-04  Andi Kleen  
>>>
>>>  * gcc-ar.c (main): Support -B option.
>>> ---
>>>  gcc/gcc-ar.c | 41 +
>>>  1 file changed, 41 insertions(+)
>>>
>>> diff --git a/gcc/gcc-ar.c b/gcc/gcc-ar.c
>>> index aebaa92..70bf222 100644
>>> --- a/gcc/gcc-ar.c
>>> +++ b/gcc/gcc-ar.c
>>> @@ -132,9 +132,50 @@ main (int ac, char **av)
>>>const char **nargv;
>>>bool is_ar = !strcmp (PERSONALITY, "ar");
>>>int exit_code = FATAL_EXIT_CODE;
>>> +  int i;
>>>
>>>setup_prefixes (av[0]);
>>>
>>> +  /* Not using getopt for now.  */
>>> +  for (i = 0; i < ac; i++)
>>> +  if (!strncmp (av[i], "-B", 2))

This also matches joined -B/foo

>>> +{
>>> +  const char *arg = av[i] + 2;
>>> +  const char *end;
>>> +
>>> +  memmove (av + i, av + i + 1, sizeof (char *) * ((ac + 1) - i));
>>> +  ac--;
>>> +  if (*arg == 0)
>>> +{
>>> +  arg = av[i + 1];
>>> +  if (!arg)
>>> +{

But this doesn't handle it?  common.opt has -B as Joined Separate option
thus allowing both.

>>> +  fprintf (stderr, "Usage: gcc-ar [-B prefix] ar arguments 
>>> ...\n");
>>> +  exit (EXIT_FAILURE);
>>> +}
>>> +  memmove (av + i, av + i + 1, sizeof (char *) * ((ac + 1) - i));
>>> +  ac--;
>>> +  i++;
>>> +}
>>> +
>>> +  for (end = arg; *end; end++)
>>> +;
>>> +  end--;
>>> +  if (end > arg && *end != '/')
>>> +{
>>> +  char *newarg = (char *)xmalloc (strlen(arg) + 2);
>>> +
>>> +  strcpy (newarg, arg);
>>> +  strcat (newarg, "/");
>>> +  arg = newarg;
>>> +}

Why the above?  And why open-coded instead of using strlen?
Also instead of testing for '/' this should test for IS_DIR_SEPARATOR.

Without comments all this code is hard to decipher.

>>> +
>>> +  add_prefix (&path, arg);
>>> +  add_prefix (&target_path, arg);

This adds the -B path to the _end_ of the prefix list.  Does that match
gcc driver behavior?  The gcc driver uses PREFIX_PRIORITY_B_OPT
as argument to add_prefix which ends up adding -B prefixes to the
beginning of the prefix list.

Thanks,
Richard.

>>> +  break;
>>> +}
>>> +
>>> +
>>>/* Find the GCC LTO plugin */
>>>plugin = find_a_file (&target_path, LTOPLUGINSONAME, R_OK);
>>>if (!plugin)
>
> --
> a...@linux.intel.com -- Speaking for myself only


Re: Enable EBX for x86 in 32bits PIC code

2014-08-28 Thread Ilya Enkovich
2014-08-28 0:19 GMT+04:00 Vladimir Makarov :
> On 2014-08-26 5:42 PM, Ilya Enkovich wrote:
>>
>> Hi,
>>
>> Here is a patch I tried.  I apply it over revision 214215.  Unfortunately
>> I do not have a small reproducer but the problem can be easily reproduced on
>> SPEC2000 benchmark 175.vpr.  The problem is in read_arch.c:701 where float
>> value is compared with float constant 1.0.  It is inlined into read_arch
>> function and can be easily found in RTL dump of function read_arch as a
>> float comparison with 1.0 after the first call to strtod function.
>>
>> Here is a compilation string I use:
>>
>> gcc -m32 -mno-movbe -g3 -fdump-rtl-all-details -O2 -ffast-math
>> -mfpmath=sse -m32  -march=slm -fPIE -pie -c -o read_arch.o
>> -DSPEC_CPU2000read_arch.c
>>
>> In my final assembler comparison with 1.0 looks like:
>>
>> comiss  .LC11@GOTOFF(%ebp), %xmm0   # 1101  *cmpisf_sse [length =
>> 7]
>>
>> and %ebp here doesn't have a proper value.
>>
>> I'll try to make a smaller reproducer if these instructions don't help.
>
>
> I've managed to reproduce it.  Although it would be better to send the patch
> as an attachment.
>
> The problem is actually in IRA not LRA.  IRA splits pseudo used for PIC.
> Then in a region when a *new* pseudo used as PIC we rematerialize a constant
> which transformed in memory addressed through *original* PIC pseudo.
>
> To solve the problem we should prevent such splitting and guarantee that PIC
> pseudo allocnos in different region gets the same hard reg.
>
> The following patch should solve the problem.
>

Thanks for the patch! I'll try it and be back with results.

Ilya
>


[PATCH][match-and-simplify] Fix some testcases

2014-08-28 Thread Richard Biener

The following fixes a few ICEs and C++ testsuite fails.

Bootstrapped and tested on x86_64-unknown-linux-gnu, applied.

Richard.

2014-08-28  Richard Biener  

* match-constant-folding.pd (x + 0 -> x): Wrap result in
a NON_LVALUE_EXPR to account for the C++ frontend folding
too early, avoding some spurious testsuite FAILs.
* match-conversions.pd: Use convert where that's unconditionally
required.

Index: gcc/match-constant-folding.pd
===
--- gcc/match-constant-folding.pd   (revision 214519)
+++ gcc/match-constant-folding.pd   (working copy)
@@ -20,6 +20,13 @@ along with GCC; see the file COPYING3.
 (for op in plus pointer_plus minus bit_ior bit_xor
   (simplify
 (op @0 integer_zerop)
+(if (!in_gimple_form)
+  /* ???  fold_binary adds non_lvalue here and "fixes" the C++
+run of Wsizeof-pointer-memaccess1.c, preserving enough of
+sizeof (&a) + 0 because sizeof (&a) is maybe_lvalue_p ()
+for no good reason.  The C frontend is fine as it doesn't
+fold too early.  */
+ (non_lvalue @0))
 @0))
 
 (simplify
Index: gcc/match-conversions.pd
===
--- gcc/match-conversions.pd(revision 214567)
+++ gcc/match-conversions.pd(working copy)
@@ -101,7 +101,7 @@
&& ((inside_prec < inter_prec && inter_prec < final_prec
 && inside_unsignedp && !inter_unsignedp)
|| final_prec == inter_prec))
-(ocvt @0))
+(convert @0))
 
/* Two conversions in a row are not needed unless:
- some conversion is floating-point (overstrict for now), or
@@ -134,10 +134,10 @@
&& final_prec == inside_prec
&& final_prec > inter_prec
&& inter_unsignedp)
-(ocvt (bit_and @0 { wide_int_to_tree
- (inside_type,
-  wi::mask (inter_prec, false,
-TYPE_PRECISION (inside_type))); })))
+(convert (bit_and @0 { wide_int_to_tree
+(inside_type,
+ wi::mask (inter_prec, false,
+   TYPE_PRECISION (inside_type))); })))
 
/* If we are converting an integer to a floating-point that can
   represent it exactly and back to an integer, we can skip the
@@ -145,4 +145,4 @@
(if (inside_int && inter_float && final_int &&
(unsigned) significand_size (TYPE_MODE (inter_type))
>= inside_prec - !inside_unsignedp)
-(ocvt @0))
+(convert @0))


Re: Enable EBX for x86 in 32bits PIC code

2014-08-28 Thread Ilya Enkovich
2014-08-28 1:39 GMT+04:00 Jeff Law :
> On 08/26/14 15:42, Ilya Enkovich wrote:
>>
>> diff --git a/gcc/calls.c b/gcc/calls.c
>> index 4285ec1..85dae6b 100644
>> --- a/gcc/calls.c
>> +++ b/gcc/calls.c
>> @@ -1122,6 +1122,14 @@ initialize_argument_information (int num_actuals
>> ATTRIBUTE_UNUSED,
>>   call_expr_arg_iterator iter;
>>   tree arg;
>>
>> +if (targetm.calls.implicit_pic_arg (fndecl ? fndecl : fntype))
>> +  {
>> +   gcc_assert (pic_offset_table_rtx);
>> +   args[j].tree_value = make_tree (ptr_type_node,
>> +   pic_offset_table_rtx);
>> +   j--;
>> +  }
>> +
>>   if (struct_value_addr_value)
>> {
>> args[j].tree_value = struct_value_addr_value;
>
> So why do you need this?  Can't this be handled in the call/call_value
> expanders or what about attaching the use to CALL_INSN_FUNCTION_USAGE from
> inside ix86_expand_call?  Basically I'm not seeing the need for another
> target hook here.  I think that would significantly simply the patch as
> well.

GOT base address become an additional implicit arg with EBX relaxed
and I handled it as all other args. I can move EBX initialization into
ix86_expand_call. Would still need some hint from target to init
pic_offset_table_rtx with proper value in the beginning of function
expand.

Thanks,
Ilya

>
>
> Jeff


Re: [PATCH] Steam out non-explicit -fno-tree-loop-distribute-patterns for LTO options

2014-08-28 Thread Richard Biener
On Wed, Aug 27, 2014 at 4:42 PM, Kito Cheng  wrote:
> Hi all:
>
> This patch basically is extension for r210100[1], stream out
> non-explicit -fno-tree-loop-distribute-patterns since compile with
> `-flto -O3 -fno-builtin` still may gen builtin function call during
> LTO phase.
>
> LTO bootstrapped and tested on x86_64-unknown-linux-gnu.

Hmm, but that will for two units, one compiled with -O2 and one
compiled with -O3, produce -O3 -fno-tree-loop-distribute-patterns.
The previous patch restricted this to -ffreestanding / -fno-builtin.

I think it makes more sense to preserve -fno-builtin, that is,
add 'LTO' to the list of FEs supporting 'fbuiltin' (the non-joined version)
and stream '-fno-builtin' if set, doing similar option post-processing
in the LTO frontend as I added to the C family frontends.

Richard.

> 2014-09-27  Kito Cheng  
>
> * lto-opts.c (lto_write_options): Output non-explicit
> -fno-tree-loop-distribute-patterns.
> * lto-wrapper.c (merge_and_complain): Merge
> -fno-tree-loop-distribute-patterns conservatively.
> (run_gcc): Pass through -fno-tree-loop-distribute-patterns.
>
>
> [1] https://gcc.gnu.org/viewcvs/gcc?view=revision&revision=210100


Re: [PATCH 2/2] Enable elimination of zext/sext

2014-08-28 Thread Richard Biener
On Thu, Aug 28, 2014 at 9:50 AM, Kugan
 wrote:
>
>
> On 27/08/14 20:07, Richard Biener wrote:
>> On Wed, Aug 27, 2014 at 12:01 PM, Uros Bizjak  wrote:
>>> Hello!
>>>
 2014-08-07  Kugan Vivekanandarajah  

 * calls.c (precompute_arguments): Check
 promoted_for_signed_and_unsigned_p and set the promoted mode.
 (promoted_for_signed_and_unsigned_p): New function.
 (expand_expr_real_1): Check promoted_for_signed_and_unsigned_p
 and set the promoted mode.
 * expr.h (promoted_for_signed_and_unsigned_p): New function definition.
 * cfgexpand.c (expand_gimple_stmt_1): Call emit_move_insn if
 SUBREG is promoted with SRP_SIGNED_AND_UNSIGNED.
>>>
>>> This patch regresses:
>>>
>>> Running target unix
>>> FAIL: libgomp.fortran/simd7.f90   -O2  execution test
>>> FAIL: libgomp.fortran/simd7.f90   -Os  execution test
>>>
>>> on alphaev6-linux-gnu.
>>>
>>> The problem can be illustrated with attached testcase with a
>>> crosscompiler to alphaev68-linux-gnu (-O2 -fopenmp). The problem is in
>>> missing SImode extension after DImode shift of SImode subregs for this
>>> part:
>>>
>>> --cut here--
>>>   # test.23_12 = PHI <0(37), 1(36)>
>>>   _242 = ivtmp.181_73 + 2147483645;
>>>   _240 = _242 * 2;
>>>   _63 = (integer(kind=4)) _240;
>>>   if (ubound.6_99 <= 2)
>>> goto ;
>>>   else
>>> goto ;
>>> ;;succ:   39
>>> ;;40
>>>
>>> ;;   basic block 39, loop depth 1
>>> ;;pred:   38
>>>   pretmp_337 = test.23_12 | l_76;
>>>   goto ;
>>> ;;succ:   45
>>>
>>> ;;   basic block 40, loop depth 1
>>> ;;pred:   38
>>>   _11 = *c_208[0];
>>>   if (_11 != _63)
>>> goto ;
>>>   else
>>> goto ;
>>> --cut here--
>>>
>>> this expands to:
>>>
>>> (code_label 592 591 593 35 "" [0 uses])
>>>
>>> (note 593 592 0 NOTE_INSN_BASIC_BLOCK)
>>>
>>> ;; _63 = (integer(kind=4)) _240;
>>>
>>> (insn 594 593 595 (set (reg:SI 538)
>>> (const_int 1073741824 [0x4000])) -1
>>>  (nil))
>>>
>>> (insn 595 594 596 (set (reg:SI 539)
>>> (plus:SI (reg:SI 538)
>>> (const_int 1073741824 [0x4000]))) -1
>>>  (nil))
>>>
>>> (insn 596 595 597 (set (reg:SI 537)
>>> (plus:SI (reg:SI 539)
>>> (const_int -3 [0xfffd]))) -1
>>>  (expr_list:REG_EQUAL (const_int 2147483645 [0x7ffd])
>>> (nil)))
>>>
>>> (insn 597 596 598 (set (reg:SI 536 [ D.1700 ])
>>> (plus:SI (subreg/s/v/u:SI (reg:DI 144 [ ivtmp.181 ]) 0)
>>> (reg:SI 537))) -1
>>>  (nil))
>>>
>>> (insn 598 597 599 (set (reg:DI 540)
>>> (ashift:DI (subreg:DI (reg:SI 536 [ D.1700 ]) 0)
>>> (const_int 1 [0x1]))) -1
>>>  (nil))
>>>
>>> (insn 599 598 0 (set (reg:DI 145 [ D.1694 ])
>>> (reg:DI 540)) -1
>>>  (nil))
>>>
>>> ...
>>>
>>> (note 610 609 0 NOTE_INSN_BASIC_BLOCK)
>>>
>>> ;; _11 = *c_208[0];
>>>
>>> (insn 611 610 0 (set (reg:DI 120 [ D.1694 ])
>>> (sign_extend:DI (mem:SI (reg/v/f:DI 227 [ c ]) [7 *c_208+0 S4
>>> A128]))) simd7.f90:12 -1
>>>  (nil))
>>>
>>> ;; if (_11 != _63)
>>>
>>> (insn 612 611 613 40 (set (reg:DI 545)
>>> (eq:DI (reg:DI 120 [ D.1694 ])
>>> (reg:DI 145 [ D.1694 ]))) simd7.f90:12 -1
>>>  (nil))
>>>
>>> (jump_insn 613 612 616 40 (set (pc)
>>> (if_then_else (eq (reg:DI 545)
>>> (const_int 0 [0]))
>>> (label_ref 0)
>>> (pc))) simd7.f90:12 -1
>>>  (int_list:REG_BR_PROB 450 (nil)))
>>>
>>> which results in following asm:
>>>
>>> $L35:
>>> addl $25,$7,$2 # 597addsi3/1[length = 4]
>>> addq $2,$2,$2 # 598ashldi3/1[length = 4] <-- here
>>> bne $24,$L145 # 601*bcc_normal[length = 4]
>>> lda $4,4($20) # 627*adddi_internal/2[length = 4]
>>> ldl $8,0($20) # 611*extendsidi2_1/2[length = 4]
>>> lda $3,3($31) # 74*movdi/2[length = 4]
>>> cmpeq $8,$2,$2 # 612*setcc_internal[length = 4]  <-- compare
>>> bne $2,$L40 # 613*bcc_normal[length = 4]
>>> br $31,$L88 # 2403jump[length = 4]
>>> .align 4
>>> ...
>>>
>>> Tracking the values with the debugger shows wrong calculation:
>>>
>>>0x00012000108c <+1788>:  addlt10,t12,t1
>>>0x000120001090 <+1792>:  addqt1,t1,t1
>>>...
>>>0x0001200010a4 <+1812>:  cmpeq   t6,t1,t1
>>>0x0001200010a8 <+1816>:  bne t1,0x1200010c0 
>>>
>>> (gdb) si
>>> 0x00012000108c  17  l = l .or. any (b /= 7 + i)
>>> (gdb) i r t10 t12
>>> t100x7  7
>>> t120x7ffd   2147483645
>>>
>>> (gdb) si
>>> 0x000120001090  17  l = l .or. any (b /= 7 + i)
>>> (gdb) i r t1
>>> t1 0x8004   -2147483644
>>>
>>> (gdb) si
>>> 18  l = l .or. any (c /= 8 + 2 * i)
>>> (gdb) i r t1
>>> t1 0x0008   -4294967288
>>>
>>> At this point, the calculation should zero-extend SImode va

Re: [PATCH] Steam out non-explicit -fno-tree-loop-distribute-patterns for LTO options

2014-08-28 Thread Kito Cheng
Hi Richard:

I think preserve -fno-builtin is better than
-fno-tree-loop-distribute-patterns too,

However if we preserve -fno-builtin, the coming problem is should we
preserve all -fbuiltin-* and do the check logic[1] in common code in
gcc?

btw, in our internal gcc 4.9.x tree is move -fno-builtin to common
group (CL_COMMON) (preserve in lto since it's in CL_COMMON)  and check
it in tree-loop-distribution.c for temporary workaround .

[1] gcc/c-family/c-common.c:disable_builtin_function

On Thu, Aug 28, 2014 at 4:45 PM, Richard Biener
 wrote:
> On Wed, Aug 27, 2014 at 4:42 PM, Kito Cheng  wrote:
>> Hi all:
>>
>> This patch basically is extension for r210100[1], stream out
>> non-explicit -fno-tree-loop-distribute-patterns since compile with
>> `-flto -O3 -fno-builtin` still may gen builtin function call during
>> LTO phase.
>>
>> LTO bootstrapped and tested on x86_64-unknown-linux-gnu.
>
> Hmm, but that will for two units, one compiled with -O2 and one
> compiled with -O3, produce -O3 -fno-tree-loop-distribute-patterns.
> The previous patch restricted this to -ffreestanding / -fno-builtin.
>
> I think it makes more sense to preserve -fno-builtin, that is,
> add 'LTO' to the list of FEs supporting 'fbuiltin' (the non-joined version)
> and stream '-fno-builtin' if set, doing similar option post-processing
> in the LTO frontend as I added to the C family frontends.
>
> Richard.
>
>> 2014-09-27  Kito Cheng  
>>
>> * lto-opts.c (lto_write_options): Output non-explicit
>> -fno-tree-loop-distribute-patterns.
>> * lto-wrapper.c (merge_and_complain): Merge
>> -fno-tree-loop-distribute-patterns conservatively.
>> (run_gcc): Pass through -fno-tree-loop-distribute-patterns.
>>
>>
>> [1] https://gcc.gnu.org/viewcvs/gcc?view=revision&revision=210100


Re: [PATCH] Steam out non-explicit -fno-tree-loop-distribute-patterns for LTO options

2014-08-28 Thread Richard Biener
On Thu, 28 Aug 2014, Kito Cheng wrote:

> Hi Richard:
> 
> I think preserve -fno-builtin is better than
> -fno-tree-loop-distribute-patterns too,
> 
> However if we preserve -fno-builtin, the coming problem is should we
> preserve all -fbuiltin-* and do the check logic[1] in common code in
> gcc?

No, just -fno-builtin.
 
> btw, in our internal gcc 4.9.x tree is move -fno-builtin to common
> group (CL_COMMON) (preserve in lto since it's in CL_COMMON)  and check
> it in tree-loop-distribution.c for temporary workaround .
> 
> [1] gcc/c-family/c-common.c:disable_builtin_function
> 
> On Thu, Aug 28, 2014 at 4:45 PM, Richard Biener
>  wrote:
> > On Wed, Aug 27, 2014 at 4:42 PM, Kito Cheng  wrote:
> >> Hi all:
> >>
> >> This patch basically is extension for r210100[1], stream out
> >> non-explicit -fno-tree-loop-distribute-patterns since compile with
> >> `-flto -O3 -fno-builtin` still may gen builtin function call during
> >> LTO phase.
> >>
> >> LTO bootstrapped and tested on x86_64-unknown-linux-gnu.
> >
> > Hmm, but that will for two units, one compiled with -O2 and one
> > compiled with -O3, produce -O3 -fno-tree-loop-distribute-patterns.
> > The previous patch restricted this to -ffreestanding / -fno-builtin.
> >
> > I think it makes more sense to preserve -fno-builtin, that is,
> > add 'LTO' to the list of FEs supporting 'fbuiltin' (the non-joined version)
> > and stream '-fno-builtin' if set, doing similar option post-processing
> > in the LTO frontend as I added to the C family frontends.
> >
> > Richard.
> >
> >> 2014-09-27  Kito Cheng  
> >>
> >> * lto-opts.c (lto_write_options): Output non-explicit
> >> -fno-tree-loop-distribute-patterns.
> >> * lto-wrapper.c (merge_and_complain): Merge
> >> -fno-tree-loop-distribute-patterns conservatively.
> >> (run_gcc): Pass through -fno-tree-loop-distribute-patterns.
> >>
> >>
> >> [1] https://gcc.gnu.org/viewcvs/gcc?view=revision&revision=210100
> 
> 

-- 
Richard Biener 
SUSE / SUSE Labs
SUSE LINUX Products GmbH - Nuernberg - AG Nuernberg - HRB 16746
GF: Jeff Hawn, Jennifer Guild, Felix Imend"orffer


[match-and-simplify] Merge from trunk

2014-08-28 Thread Richard Biener

This merges from trunk the fold_stmt re-org changes so I can
go forward with some builtin simplification stuff.

Committed.

Richard.

2014-08-28  Richard Biener  

Merge from trunk r214266 through r214675.



[PATCH] Move -fbuiltin from c.opt to common.opt and change it to common group

2014-08-28 Thread Kito Cheng
Hi all:

-fno-builtin is seem not only for the c family front-end, but also
used in LTO now, so move it to common.opt and change it to `Common`.
From 47552b58a09ac9d944be1c35bb5c938f4cb8ec0f Mon Sep 17 00:00:00 2001
From: Kito Cheng 
Date: Thu, 14 Aug 2014 11:34:26 +0800
Subject: [PATCH 1/2] Move -fbuiltin from c.opt to common.opt and change it to
 common group

ChangeLog

2014-09-28  Kito Cheng  

	c-family/
	* c.opt (fbuiltin): Move to gcc/common.opt

	gcc/
	* common.opt (fbuiltin): Add.
---
 gcc/c-family/c.opt | 4 
 gcc/common.opt | 4 
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/gcc/c-family/c.opt b/gcc/c-family/c.opt
index d619250..ae04114 100644
--- a/gcc/c-family/c.opt
+++ b/gcc/c-family/c.opt
@@ -912,10 +912,6 @@ Recognize the \"asm\" keyword
 fbuilding-libgcc
 C ObjC C++ ObjC++ Undocumented Var(flag_building_libgcc)
 
-fbuiltin
-C ObjC C++ ObjC++ Var(flag_no_builtin, 0)
-Recognize built-in functions
-
 fbuiltin-
 C ObjC C++ ObjC++ Joined
 
diff --git a/gcc/common.opt b/gcc/common.opt
index f7021102..607799d 100644
--- a/gcc/common.opt
+++ b/gcc/common.opt
@@ -923,6 +923,10 @@ fbtr-bb-exclusive
 Common Report Var(flag_btr_bb_exclusive) Optimization
 Restrict target load migration not to re-use registers in any basic block
 
+fbuiltin
+Common Var(flag_no_builtin, 0)
+Recognize built-in functions
+
 fcall-saved-
 Common Joined RejectNegative Var(common_deferred_options) Defer
 -fcall-saved-	Mark  as being preserved across functions
-- 
1.9.3



[PATCH] For -fno-builtin disable pattern recognition if not enabled explicitly in lto.

2014-08-28 Thread Kito Cheng
Hi all:
The purpose for this patch is prevent `-flto -O3 -fno-builtin`  gen
builtin function,
so checking -fno-builit, and then disable
-ftree-loop-distribute-patterns if if -fno-builit enabled, the prev
discuss in [1], and this patch dependence on [2].

[1] https://gcc.gnu.org/ml/gcc-patches/2014-08/msg02483.html
[2] https://gcc.gnu.org/ml/gcc-patches/2014-08/msg02555.html
From 80922e53ff41b1c08322a0d0ae5b6d947cb39353 Mon Sep 17 00:00:00 2001
From: Kito Cheng 
Date: Thu, 28 Aug 2014 18:06:48 +0800
Subject: [PATCH 2/2] For -fno-builtin disable pattern recognition if not
 enabled explicitly. 2014-09-28  Kito Cheng  

	lto/
	* lto-lang.c (lto_post_options): For -fno-builtin disable
	pattern recognition if not enabled explicitly.
---
 gcc/lto/lto-lang.c | 5 +
 1 file changed, 5 insertions(+)

diff --git a/gcc/lto/lto-lang.c b/gcc/lto/lto-lang.c
index 9e8524a..ae022cc 100644
--- a/gcc/lto/lto-lang.c
+++ b/gcc/lto/lto-lang.c
@@ -794,6 +794,11 @@ lto_post_options (const char **pfilename ATTRIBUTE_UNUSED)
  support.  */
   flag_excess_precision_cmdline = EXCESS_PRECISION_FAST;
 
+  /* If -fno-builtin then disable pattern recognition.  */
+  if (!global_options_set.x_flag_tree_loop_distribute_patterns
+  && flag_no_builtin)
+flag_tree_loop_distribute_patterns = 0;
+
   /* Initialize the compiler back end.  */
   return false;
 }
-- 
1.9.3



Re: [PATCH] Steam out non-explicit -fno-tree-loop-distribute-patterns for LTO options

2014-08-28 Thread Kito Cheng
Hi Richard:

thanks you comment :)

I send new patch for move -fno-builtin to common.opt and check it in
lto-lang.c (lto_post_options).
https://gcc.gnu.org/ml/gcc-patches/2014-08/msg02555.html
https://gcc.gnu.org/ml/gcc-patches/2014-08/msg02556.html

On Thu, Aug 28, 2014 at 5:29 PM, Richard Biener  wrote:
> On Thu, 28 Aug 2014, Kito Cheng wrote:
>
>> Hi Richard:
>>
>> I think preserve -fno-builtin is better than
>> -fno-tree-loop-distribute-patterns too,
>>
>> However if we preserve -fno-builtin, the coming problem is should we
>> preserve all -fbuiltin-* and do the check logic[1] in common code in
>> gcc?
>
> No, just -fno-builtin.
>
>> btw, in our internal gcc 4.9.x tree is move -fno-builtin to common
>> group (CL_COMMON) (preserve in lto since it's in CL_COMMON)  and check
>> it in tree-loop-distribution.c for temporary workaround .
>>
>> [1] gcc/c-family/c-common.c:disable_builtin_function
>>
>> On Thu, Aug 28, 2014 at 4:45 PM, Richard Biener
>>  wrote:
>> > On Wed, Aug 27, 2014 at 4:42 PM, Kito Cheng  wrote:
>> >> Hi all:
>> >>
>> >> This patch basically is extension for r210100[1], stream out
>> >> non-explicit -fno-tree-loop-distribute-patterns since compile with
>> >> `-flto -O3 -fno-builtin` still may gen builtin function call during
>> >> LTO phase.
>> >>
>> >> LTO bootstrapped and tested on x86_64-unknown-linux-gnu.
>> >
>> > Hmm, but that will for two units, one compiled with -O2 and one
>> > compiled with -O3, produce -O3 -fno-tree-loop-distribute-patterns.
>> > The previous patch restricted this to -ffreestanding / -fno-builtin.
>> >
>> > I think it makes more sense to preserve -fno-builtin, that is,
>> > add 'LTO' to the list of FEs supporting 'fbuiltin' (the non-joined version)
>> > and stream '-fno-builtin' if set, doing similar option post-processing
>> > in the LTO frontend as I added to the C family frontends.
>> >
>> > Richard.
>> >
>> >> 2014-09-27  Kito Cheng  
>> >>
>> >> * lto-opts.c (lto_write_options): Output non-explicit
>> >> -fno-tree-loop-distribute-patterns.
>> >> * lto-wrapper.c (merge_and_complain): Merge
>> >> -fno-tree-loop-distribute-patterns conservatively.
>> >> (run_gcc): Pass through -fno-tree-loop-distribute-patterns.
>> >>
>> >>
>> >> [1] https://gcc.gnu.org/viewcvs/gcc?view=revision&revision=210100
>>
>>
>
> --
> Richard Biener 
> SUSE / SUSE Labs
> SUSE LINUX Products GmbH - Nuernberg - AG Nuernberg - HRB 16746
> GF: Jeff Hawn, Jennifer Guild, Felix Imend"orffer


Re: [PATCH] Move -fbuiltin from c.opt to common.opt and change it to common group

2014-08-28 Thread Richard Biener
On Thu, 28 Aug 2014, Kito Cheng wrote:

> Hi all:
> 
> -fno-builtin is seem not only for the c family front-end, but also
> used in LTO now, so move it to common.opt and change it to `Common`.

Please leave it in c-family and just add LTO to the set of supported
languages.  -fno-builtin isn't meaningful for other frontends
and we just happen to use the flag.

If then it makes more sense to move -fhosted and -ffreestanding
though I don't know how meaningful those are for other frontends.

Or create a "proper" flag to communicate that the middle-end
should avoid creating new calls to builtins at all cost
(well, that's really what -ffreestanding is about).

Richard.


Port of VTV for Cygwin and MinGW

2014-08-28 Thread Patrick Wollgast
This patch contains a port of VTV -fvtable-verify=std for Cygwin and MinGW.

Since weak symbols on Windows and Linux are implemented differently, and
VTV should have the possibility to be switched on and off, the structure
of the feature had to be modified.
On Linux libstdc++ contains the weak stub functions of VTV. For Cygwin
and MinGW they have been removed, due to the difference of weak symbols.
On Linux and on Windows libstdc++ itself gets build with
-fvtable-verify=std. Since libvtv gets build after libstdc++, and
libstdc++ doesn't contain the stub functions any more, 'undefined
reference' errors are thrown during linking of libstdc++. To prevent
these errors during the linking process a libvtv-0.dll gets build from
the stub functions before libstdc++-6.dll is linked.
At the end of the build process two VTV dlls have been build. One is
called libvtv-0.dll, containing the real functions, the other is called
libvtv_stubs-0.dll, containing the stub functions. Depending on whether
libvtv-0.dll is first found in the dll search path or
libvtv_stubs-0.dll, renamed to libvtv-0.dll, the real functions or the
stub functions are used.

Testing:
The test builds were configured the following way:
Linux 64bit (from patched and unpatched trunk):
/path/to/configure --prefix=/prefix/gcc-vtv-bin-64
--enable-libstdcxx-threads --enable-vtable-verify=yes
MinGW 32bit cross compiled:
/path/to/configure --target=i686-w64-mingw32
--prefix=/prefix/mingw-vtv-bin-32 --with-gnu-ld --with-gnu-as
--enable-fully-dynamic-string --disable-multilib
--enable-libstdcxx-threads --enable-vtable-verify=yes
MinGW 64bit cross compiled:
/path/to/configure --target=x86_64-w64-mingw32
--prefix=/prefix/mingw-vtv-bin-64 --with-gnu-ld --with-gnu-as
--enable-fully-dynamic-string --disable-multilib
--enable-libstdcxx-threads --enable-vtable-verify=yes
Cygwin 64bit:
/path/to/configure --enable-languages=c,c++ --enable-libstdcxx-threads
--enable-vtable-verify=yes

At Linux the patched and unpatched version resulted in the same number
of passed tests with 'make check-target-libvtv'.

Since MinGW was cross compiled the test cases couldn't be built and run
with 'make check-target-libvtv'. Therefore they were built with the
attached makefiles and tested afterwards on Windows 7 64bit. Some test
cases contain Linux specific parts and weren't tested. See the makefiles
for further information. Additionally virtual_func_test_min_UAF.cpp was
also built and tested. All built tests passed.

Cygwin was just tested on gcc 4.9.0, because the current trunk isn't
building for me. Even the clean trunk without the patch attached to this
mail. On Cygwin with gcc 4.9.0 VTV worked.

Besides the test cases Botan was also built and tested (gcc 4.9.0) with
MinGW 32bit and VTV.

regards
* config/i386/cygwin.h (STARTFILE_SPEC): Add vtv_start.o,
if -fvtable-verify=std is used.
* config/i386/mingw-w64.h (STARTFILE_SPEC): Likewise.
* config/i386/mingw32.h (STARTFILE_SPEC): Likewise.
* config/i386/cygwin.h (ENDFILE_SPEC): Add vtv_end.o,
if -fvtable-verify=std is used.
* config/i386/mingw32.h (ENDFILE_SPEC): Likewise.
* config/i386/cygwin.h (LIB_SPEC): Pass -lvtv and -lpsapi,
if -fvtable-verify=std is used.
* config/i386/mingw-w64.h (LIB_SPEC): Likewise.
* config/i386/mingw32.h (LIB_SPEC): Likewise.


* gcc/varasm.c (assemble_variable): Add code to properly set the comdat
section and name for the .vtable_map_vars section in case the
target is PE or COFF.


* libgcc/Makefile.in: Move rules to build vtv_*.o out of the check
for CUSTOM_CRTSTUFF.
* libgcc/config.host (i[34567]86-*-cygwin*, x86_64-*-cygwin*, 
i[34567]86-*-mingw*)
(x86_64-*-mingw*): Only add vtv_*.o to extra_parts if enable_vtable_verify.


* libstdc++-v3/acinclude.m4: Define VTV_CYGMIN.
* libstdc++-v3/configure: Regenerate.

* libstdc++-v3/libsupc++/Makefile.am: Add vtv_sources only to
libsupc___la_SOURCES and libsupc__convenience_la_SOURCES if VTV_CYGMIN is
not set.
* libstdc++-v3/libsupc++/Makefile.in: Regenerated.
* libstdc++-v3/libsupc++/vtv_stubs.cc: Add none weak declaration of every
function for Cygwin and MinGW.

* libstdc++-v3/src/Makefile.am: Add libvtv.la to toolexeclib_LTLIBRARIES,
if VTV_CYGMIN is set. Define libvtv_la_SOURCES, libvtv_la_LDFLAGS,
libvtv_la_AM_CXXFLAGS and libvtv_la_LINK if VTV_CYGMIN is set.
* libstdc++-v3/src/Makefile.in: Regenerate.


* libvtv/Makefile.am : Add libvtv.la to toolexeclib_LTLIBRARIES, if VTV_CYGMIN
is set. Define libvtv_la_LIBADD, libvtv_la_LDFLAGS, libvtv_stubs_la_LDFLAGS
and libvtv_stubs_la_SOURCES if VTV_CYGMIN is set. Add obstac.c to
libvtv_la_SOURCES if VTV_CYGMIN is set.
* libvtv/Makefile.in : Regenerate.
* libvtv/aclocal.m4 : Regenerate.
* libvtv/configure : Regenerate.
* libvtv/configure.ac : Add ACX_LT_HOST_FLAGS. Define VTV_CYGMIN.
* libvtv/configure.tgt : (x86_64-*-cygwin*, i?86-*-cygwin*, x86_64-*-mingw*)
(i?86-*-mingw*): Add to supported targets.
* libvtv/obstack.c : New file.
* libvtv/vtv_fail.cc : S

Re: [PATCH 1/4] aarch64: Improve epilogue unwind info

2014-08-28 Thread Jiong Wang

On 26/08/14 14:37, Jiong Wang wrote:

thanks,

verified no regression on aarch64-none-elf bare-metal check-gcc/check-gdb.

-- Jiong

On 22/08/14 23:05, Richard Henderson wrote:

Delay cfi restore opcodes until the stack frame is deallocated.
This reduces the number of cfi advance opcodes required.

We perform a similar optimization in the x86_64 epilogue.


  * config/aarch64/aarch64.c (aarch64_popwb_single_reg): Remove.
  (aarch64_popwb_pair_reg): Remove.
  (aarch64_restore_callee_saves): Add CFI_OPS argument; fill it with
  the restore ops performed by the insns generated.
  (aarch64_expand_epilogue): Attach CFI_OPS to the stack deallocation
  insn.  Perform the calls_eh_return addition later; do not attempt to
  preserve the CFA in that case.  Don't use aarch64_set_frame_expr.
---
   gcc/config/aarch64/aarch64.c | 177 
+--
   1 file changed, 52 insertions(+), 125 deletions(-)

diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
index c3c871e..9a11e05 100644
--- a/gcc/config/aarch64/aarch64.c
+++ b/gcc/config/aarch64/aarch64.c
@@ -1964,23 +1964,6 @@ aarch64_pushwb_single_reg (enum machine_mode mode, 
unsigned regno,
 RTX_FRAME_RELATED_P (insn) = 1;
   }

-static void
-aarch64_popwb_single_reg (enum machine_mode mode, unsigned regno,
- HOST_WIDE_INT adjustment)
-{
-  rtx base_rtx = stack_pointer_rtx;
-  rtx insn, reg, mem;
-
-  reg = gen_rtx_REG (mode, regno);
-  mem = gen_rtx_POST_MODIFY (Pmode, base_rtx,
-plus_constant (Pmode, base_rtx, adjustment));
-  mem = gen_rtx_MEM (mode, mem);
-
-  insn = emit_move_insn (reg, mem);
-  add_reg_note (insn, REG_CFA_RESTORE, reg);
-  RTX_FRAME_RELATED_P (insn) = 1;
-}


this also fix a hiding bug. POST_MODIFY also imply a REG_CFA_ADJUST_CFA which 
is missing
if "aarch64_popwb_single_reg" invoked.

I am curious about why "dwarf2out_frame_debug_expr" only handle PRE/POST_MODIFY 
on dest
while no handling on src ?

for example the following rule:
(set reg (mem_post_modify sp offset))

thanks.

Regards,
Jiong


-
   static rtx
   aarch64_gen_storewb_pair (enum machine_mode mode, rtx base, rtx reg, rtx 
reg2,
HOST_WIDE_INT adjustment)
@@ -2011,7 +1994,6 @@ aarch64_pushwb_pair_reg (enum machine_mode mode, unsigned 
regno1,
 insn = emit_insn (aarch64_gen_storewb_pair (mode, stack_pointer_rtx, reg1,
reg2, adjustment));
 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 2)) = 1;
-
 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 1)) = 1;
 RTX_FRAME_RELATED_P (insn) = 1;
   }
@@ -2033,29 +2015,6 @@ aarch64_gen_loadwb_pair (enum machine_mode mode, rtx 
base, rtx reg, rtx reg2,
   }
   }

-static void
-aarch64_popwb_pair_reg (enum machine_mode mode, unsigned regno1,
-   unsigned regno2, HOST_WIDE_INT adjustment, rtx cfa)
-{
-  rtx insn;
-  rtx reg1 = gen_rtx_REG (mode, regno1);
-  rtx reg2 = gen_rtx_REG (mode, regno2);
-
-  insn = emit_insn (aarch64_gen_loadwb_pair (mode, stack_pointer_rtx, reg1,
-reg2, adjustment));
-  RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 2)) = 1;
-  RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 1)) = 1;
-  RTX_FRAME_RELATED_P (insn) = 1;
-
-  if (cfa)
-add_reg_note (insn, REG_CFA_ADJUST_CFA,
- (gen_rtx_SET (Pmode, stack_pointer_rtx,
-   plus_constant (Pmode, cfa, adjustment;
-
-  add_reg_note (insn, REG_CFA_RESTORE, reg1);
-  add_reg_note (insn, REG_CFA_RESTORE, reg2);
-}
-
   static rtx
   aarch64_gen_store_pair (enum machine_mode mode, rtx mem1, rtx reg1, rtx mem2,
  rtx reg2)
@@ -2151,9 +2110,8 @@ aarch64_save_callee_saves (enum machine_mode mode, 
HOST_WIDE_INT start_offset,
   static void
   aarch64_restore_callee_saves (enum machine_mode mode,
HOST_WIDE_INT start_offset, unsigned start,
- unsigned limit, bool skip_wb)
+ unsigned limit, bool skip_wb, rtx *cfi_ops)
   {
-  rtx insn;
 rtx base_rtx = stack_pointer_rtx;
 rtx (*gen_mem_ref) (enum machine_mode, rtx) = (frame_pointer_needed
   ? gen_frame_mem : 
gen_rtx_MEM);
@@ -2187,25 +2145,14 @@ aarch64_restore_callee_saves (enum machine_mode mode,

offset = start_offset + cfun->machine->frame.reg_offset[regno2];
mem2 = gen_mem_ref (mode, plus_constant (Pmode, base_rtx, offset));
- insn = emit_insn (aarch64_gen_load_pair (mode, reg, mem, reg2,
-  mem2));
- add_reg_note (insn, REG_CFA_RESTORE, reg);
- add_reg_note (insn, REG_CFA_RESTORE, reg2);
+ emit_insn (aarch64_gen_load_pair (mode, reg, mem, reg2, mem2));

- /* The first part of a frame-

Re: Enable EBX for x86 in 32bits PIC code

2014-08-28 Thread Uros Bizjak
On Thu, Aug 28, 2014 at 10:37 AM, Ilya Enkovich  wrote:
> 2014-08-28 1:39 GMT+04:00 Jeff Law :
>> On 08/26/14 15:42, Ilya Enkovich wrote:
>>>
>>> diff --git a/gcc/calls.c b/gcc/calls.c
>>> index 4285ec1..85dae6b 100644
>>> --- a/gcc/calls.c
>>> +++ b/gcc/calls.c
>>> @@ -1122,6 +1122,14 @@ initialize_argument_information (int num_actuals
>>> ATTRIBUTE_UNUSED,
>>>   call_expr_arg_iterator iter;
>>>   tree arg;
>>>
>>> +if (targetm.calls.implicit_pic_arg (fndecl ? fndecl : fntype))
>>> +  {
>>> +   gcc_assert (pic_offset_table_rtx);
>>> +   args[j].tree_value = make_tree (ptr_type_node,
>>> +   pic_offset_table_rtx);
>>> +   j--;
>>> +  }
>>> +
>>>   if (struct_value_addr_value)
>>> {
>>> args[j].tree_value = struct_value_addr_value;
>>
>> So why do you need this?  Can't this be handled in the call/call_value
>> expanders or what about attaching the use to CALL_INSN_FUNCTION_USAGE from
>> inside ix86_expand_call?  Basically I'm not seeing the need for another
>> target hook here.  I think that would significantly simply the patch as
>> well.
>
> GOT base address become an additional implicit arg with EBX relaxed
> and I handled it as all other args. I can move EBX initialization into
> ix86_expand_call. Would still need some hint from target to init
> pic_offset_table_rtx with proper value in the beginning of function
> expand.

Maybe you can you use get_hard_reg_initial_val for this?

Uros.


Re: Enable EBX for x86 in 32bits PIC code

2014-08-28 Thread Ilya Enkovich
2014-08-28 16:42 GMT+04:00 Uros Bizjak :
> On Thu, Aug 28, 2014 at 10:37 AM, Ilya Enkovich  
> wrote:
>> 2014-08-28 1:39 GMT+04:00 Jeff Law :
>>> On 08/26/14 15:42, Ilya Enkovich wrote:

 diff --git a/gcc/calls.c b/gcc/calls.c
 index 4285ec1..85dae6b 100644
 --- a/gcc/calls.c
 +++ b/gcc/calls.c
 @@ -1122,6 +1122,14 @@ initialize_argument_information (int num_actuals
 ATTRIBUTE_UNUSED,
   call_expr_arg_iterator iter;
   tree arg;

 +if (targetm.calls.implicit_pic_arg (fndecl ? fndecl : fntype))
 +  {
 +   gcc_assert (pic_offset_table_rtx);
 +   args[j].tree_value = make_tree (ptr_type_node,
 +   pic_offset_table_rtx);
 +   j--;
 +  }
 +
   if (struct_value_addr_value)
 {
 args[j].tree_value = struct_value_addr_value;
>>>
>>> So why do you need this?  Can't this be handled in the call/call_value
>>> expanders or what about attaching the use to CALL_INSN_FUNCTION_USAGE from
>>> inside ix86_expand_call?  Basically I'm not seeing the need for another
>>> target hook here.  I think that would significantly simply the patch as
>>> well.
>>
>> GOT base address become an additional implicit arg with EBX relaxed
>> and I handled it as all other args. I can move EBX initialization into
>> ix86_expand_call. Would still need some hint from target to init
>> pic_offset_table_rtx with proper value in the beginning of function
>> expand.
>
> Maybe you can you use get_hard_reg_initial_val for this?

Actually there is no input hard reg holding GOT address.  Currently I
use initialization with ebx with following ebx initialization in
prolog_epilog pass.  But this is a temporary workaround.  It is
inefficient because always uses callee save reg to get GOT address.  I
suppose we should generate pseudo reg for pic_offset_table_rtx and
also set_got with this register as a destination in expand pass.
After register allocation set_got may be transformed into get_pc_thunk
call with proper hard reg.  But some target hook has to be used for
this.

Ilya

>
> Uros.


Re: Enable EBX for x86 in 32bits PIC code

2014-08-28 Thread Uros Bizjak
On Fri, Aug 22, 2014 at 2:21 PM, Ilya Enkovich  wrote:
> Hi,
>
> On Cauldron 2014 we had a couple of talks about relaxation of ebx usage in 
> 32bit PIC mode.  It was decided that the best approach would be to not fix 
> ebx register, use speudo register for GOT base address and let allocator do 
> the rest.  This should be similar to how clang and icc work with GOT base 
> address.  I've been working for some time on such patch and now want to share 
> my results.

+#define PIC_OFFSET_TABLE_REGNUM
 \
+  ((TARGET_64BIT && (ix86_cmodel == CM_SMALL_PIC   \
+ || TARGET_PECOFF))
 \
+   || !flag_pic ? INVALID_REGNUM   \
+   : X86_TUNE_RELAX_PIC_REG ? (pic_offset_table_rtx ? INVALID_REGNUM   \
+  : REAL_PIC_OFFSET_TABLE_REGNUM)  \
+   : reload_completed ? REGNO (pic_offset_table_rtx)   \
: REAL_PIC_OFFSET_TABLE_REGNUM)

I'd like to avoid X86_TUNE_RELAX_PIC_REG and always treat EBX as an
allocatable register. This way, we can avoid all mess with implicit
xchgs in atomic_compare_and_swap_doubleword. Also, having
allocatable EBX would allow us to introduce __builtin_cpuid builtin
and cleanup cpiud.h.


[PATCH] Fix peeling issue in PR62283

2014-08-28 Thread Richard Biener

The following fixes two testcases in PR62283 which are not
vectorized because they are deemed not profitable to.  They
are in fact not if we employ peeling for alignment but they
are (according to the cost model and my naiive thinking) if
we do not do that.

And it's trivial to see that peeling a loop running 4 times
for alignment isn't a very bright idea if the vectorization
factor is 4 as well...

Thus the following patch adjusts heuristics deciding whether
to do peeling to avoid doing that if it surely (or likely)
results in a vector loop with zero iterations.  I made
that likely case make sure we will always at least end
up with one vectorized iteration to be profitable to
peel (which means at least 2 * VF - 1 iterations as if
peeling for unknown alignment we also need an epilogue
loop for the remaining iterations).

Unsurprisingly this requires some fiddling with existing
testcases that we now vectorize.

Bootstrapped and tested on x86_64-unknown-linux-gnu, applied
to trunk.

Richard.

2014-08-28  Richard Biener  

PR tree-optimization/62283
* tree-vect-data-refs.c (vect_enhance_data_refs_alignment):
Do not peel loops for alignment where the vector loop likely
doesn't run at least VF times.

* gfortran.dg/vect/pr62283.f: New testcase.
* gcc.dg/tree-ssa/cunroll-5.c: Adjust.
* gcc.dg/vect/costmodel/i386/costmodel-vect-31.c: Likewise.
* gcc.dg/vect/costmodel/i386/costmodel-vect-33.c: Likewise.
* gcc.dg/vect/costmodel/x86_64/costmodel-vect-31.c: Likewise.
* gcc.dg/vect/costmodel/x86_64/costmodel-vect-33.c: Likewise.
* gcc.dg/vect/vect-33.c: Likewise.

Index: gcc/tree-vect-data-refs.c
===
*** gcc/tree-vect-data-refs.c.orig  2014-08-28 14:31:22.991260098 +0200
--- gcc/tree-vect-data-refs.c   2014-08-28 14:31:31.943259482 +0200
*** vect_enhance_data_refs_alignment (loop_v
*** 1537,1546 
|| !slpeel_can_duplicate_loop_p (loop, single_exit (loop)))
  do_peeling = false;
  
!   if (do_peeling && all_misalignments_unknown
&& vect_supportable_dr_alignment (dr0, false))
  {
- 
/* Check if the target requires to prefer stores over loads, i.e., if
   misaligned stores are more expensive than misaligned loads (taking
   drs with same alignment into account).  */
--- 1537,1556 
|| !slpeel_can_duplicate_loop_p (loop, single_exit (loop)))
  do_peeling = false;
  
!   /* If we don't know how many times the peeling loop will run
!  assume it will run VF-1 times and disable peeling if the remaining
!  iters are less than the vectorization factor.  */
!   if (do_peeling
!   && all_misalignments_unknown
!   && LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
!   && (LOOP_VINFO_INT_NITERS (loop_vinfo)
! < 2 * (unsigned) LOOP_VINFO_VECT_FACTOR (loop_vinfo) - 1))
! do_peeling = false;
! 
!   if (do_peeling
!   && all_misalignments_unknown
&& vect_supportable_dr_alignment (dr0, false))
  {
/* Check if the target requires to prefer stores over loads, i.e., if
   misaligned stores are more expensive than misaligned loads (taking
   drs with same alignment into account).  */
*** vect_enhance_data_refs_alignment (loop_v
*** 1627,1632 
--- 1637,1650 
   &body_cost_vec);
if (!dr0 || !npeel)
  do_peeling = false;
+ 
+   /* If peeling by npeel will result in a remaining loop not iterating
+  enough to be vectorized then do not peel.  */
+   if (do_peeling
+ && LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
+ && (LOOP_VINFO_INT_NITERS (loop_vinfo)
+ < LOOP_VINFO_VECT_FACTOR (loop_vinfo) + npeel))
+   do_peeling = false;
  }
  
if (do_peeling)
Index: gcc/testsuite/gfortran.dg/vect/pr62283.f
===
*** /dev/null   1970-01-01 00:00:00.0 +
--- gcc/testsuite/gfortran.dg/vect/pr62283.f2014-08-28 14:31:31.968259480 
+0200
***
*** 0 
--- 1,17 
+ C { dg-do compile }
+ C { dg-additional-options "-fvect-cost-model=dynamic" }
+   subroutine test2(x,y)
+   real x(4),y(4)
+   beta=3.141593
+   do i=1,4
+ y(i)=y(i)+beta*x(i)
+   end do
+   end
+ 
+   subroutine test3(x,y)
+   real x(4),y(4)
+   beta=3.141593
+   y=y+beta*x
+   end
+ C { dg-final { scan-tree-dump-times "vectorized 1 loops" 2 "vect" { target { 
vect_hw_misalign } } } }
+ C { dg-final { cleanup-tree-dump "vect" } }
Index: gcc/testsuite/gcc.dg/tree-ssa/cunroll-5.c
===
*** gcc/testsuite/gcc.dg/tree-ssa/cunroll-5.c.orig  2013-08-30 
09:55:26.721775507 +0200
--- gcc/testsuite/gcc.dg/tree-ssa/cunroll-5.c   2014-08-28 14:44:39.436205264 
+0200
***

Re: Enable EBX for x86 in 32bits PIC code

2014-08-28 Thread Uros Bizjak
On Thu, Aug 28, 2014 at 2:54 PM, Ilya Enkovich  wrote:

> diff --git a/gcc/calls.c b/gcc/calls.c
> index 4285ec1..85dae6b 100644
> --- a/gcc/calls.c
> +++ b/gcc/calls.c
> @@ -1122,6 +1122,14 @@ initialize_argument_information (int num_actuals
> ATTRIBUTE_UNUSED,
>   call_expr_arg_iterator iter;
>   tree arg;
>
> +if (targetm.calls.implicit_pic_arg (fndecl ? fndecl : fntype))
> +  {
> +   gcc_assert (pic_offset_table_rtx);
> +   args[j].tree_value = make_tree (ptr_type_node,
> +   pic_offset_table_rtx);
> +   j--;
> +  }
> +
>   if (struct_value_addr_value)
> {
> args[j].tree_value = struct_value_addr_value;

 So why do you need this?  Can't this be handled in the call/call_value
 expanders or what about attaching the use to CALL_INSN_FUNCTION_USAGE from
 inside ix86_expand_call?  Basically I'm not seeing the need for another
 target hook here.  I think that would significantly simply the patch as
 well.
>>>
>>> GOT base address become an additional implicit arg with EBX relaxed
>>> and I handled it as all other args. I can move EBX initialization into
>>> ix86_expand_call. Would still need some hint from target to init
>>> pic_offset_table_rtx with proper value in the beginning of function
>>> expand.
>>
>> Maybe you can you use get_hard_reg_initial_val for this?
>
> Actually there is no input hard reg holding GOT address.  Currently I
> use initialization with ebx with following ebx initialization in
> prolog_epilog pass.  But this is a temporary workaround.  It is
> inefficient because always uses callee save reg to get GOT address.  I
> suppose we should generate pseudo reg for pic_offset_table_rtx and
> also set_got with this register as a destination in expand pass.
> After register allocation set_got may be transformed into get_pc_thunk
> call with proper hard reg.  But some target hook has to be used for
> this.

Let me expand my idea a bit. IIRC, get_hard_reg_initial_val and
friends will automatically emit intialization of a pseudo from
pic_offset_table_rtx hard reg. After reload, real initialization of
pic_offset_table_rtx hard reg is emitted in pro_and_epilogue pass. I
don't know if this works with current implementation of dynamic
pic_offset_table_rtx selection, though.

Uros.


Re: Enable EBX for x86 in 32bits PIC code

2014-08-28 Thread Ilya Enkovich
2014-08-28 17:01 GMT+04:00 Uros Bizjak :
> On Fri, Aug 22, 2014 at 2:21 PM, Ilya Enkovich  wrote:
>> Hi,
>>
>> On Cauldron 2014 we had a couple of talks about relaxation of ebx usage in 
>> 32bit PIC mode.  It was decided that the best approach would be to not fix 
>> ebx register, use speudo register for GOT base address and let allocator do 
>> the rest.  This should be similar to how clang and icc work with GOT base 
>> address.  I've been working for some time on such patch and now want to 
>> share my results.
>
> +#define PIC_OFFSET_TABLE_REGNUM
>  \
> +  ((TARGET_64BIT && (ix86_cmodel == CM_SMALL_PIC   \
> + || TARGET_PECOFF))
>  \
> +   || !flag_pic ? INVALID_REGNUM   \
> +   : X86_TUNE_RELAX_PIC_REG ? (pic_offset_table_rtx ? INVALID_REGNUM   \
> +  : REAL_PIC_OFFSET_TABLE_REGNUM)  \
> +   : reload_completed ? REGNO (pic_offset_table_rtx)   \
> : REAL_PIC_OFFSET_TABLE_REGNUM)
>
> I'd like to avoid X86_TUNE_RELAX_PIC_REG and always treat EBX as an
> allocatable register. This way, we can avoid all mess with implicit
> xchgs in atomic_compare_and_swap_doubleword. Also, having
> allocatable EBX would allow us to introduce __builtin_cpuid builtin
> and cleanup cpiud.h.

We should show nice performance to have this feature enabled by
default.  Currently patch causes a set of performance losses. I have a
version of this patch where EBX is relaxed by a compiler flag, not
tune flag.

Ilya


[PATCH i386 AVX512] [29/n] Add narrowing vpmov.

2014-08-28 Thread Kirill Yukhin
Hello,
This patch introduces AVX-512 narrowing moves.

Bootstrapped.
AVX-512* tests on top of patch-set all pass
under simulator.

Is it ok for trunk?

gcc/
* config/i386/i386-modes.def: Add V12QI, V14QI, V6HI modes.
* config/i386/sse.md
(define_mode_iterator VI4_128_8_256): New.
(define_mode_iterator VI2_128_4_256): New.
(define_mode_iterator PMOV_DST_MODE): Rename to
(define_mode_iterator PMOV_DST_MODE_1): this.
(define_insn "*avx512bw_v32hiv32qi2"): New.
(define_insn "avx512bw_v32hiv32qi2_mask"): Ditto.
(define_expand "avx512bw_v32hiv32qi2_store_mask"): Ditto.
(define_mode_iterator PMOV_DST_MODE_2): Ditto.
(define_insn "*avx512vl_2"): Ditto.
(define_insn "_2_mask"): Ditto.
(define_expand "_2_store_mask"):
Ditto.
(define_mode_iterator PMOV_SRC_MODE_3): New.
(define_mode_attr pmov_dst_3): New.
(define_mode_attr pmov_dst_zeroed_3): New.
(define_mode_attr pmov_suff_3): New.
(define_insn "*avx512vl_vqi2"): New.
(define_insn "*avx512vl_v2div2qi2_store"): New.
(define_insn "avx512vl_v2div2qi2_mask"): New.
(define_insn "avx512vl_v2div2qi2_store_mask"): New.
(define_insn "*avx512vl_v4qi2_store"): New.
(define_insn "avx512vl_v4qi2_mask"): New.
(define_insn "avx512vl_v4qi2_store_mask"): New.
(define_insn "*avx512vl_v8qi2_store"): New.
(define_insn "avx512vl_v8qi2_mask"): New.
(define_insn "avx512vl_v8qi2_store_mask"): New.
(define_mode_iterator PMOV_SRC_MODE_4): New.
(define_mode_attr pmov_dst_4): New.
(define_mode_attr pmov_dst_zeroed_4): New.
(define_mode_attr pmov_suff_4): New.
(define_insn "*avx512vl_vhi2"): New.
(define_insn "*avx512vl_v4hi2_store"): New.
(define_insn "avx512vl_v4hi2_mask"): New.
(define_insn "avx512vl_v4hi2_store_mask"): New.
(define_insn "*avx512vl_v2div2hi2_store"): New.
(define_insn "avx512vl_v2div2hi2_mask"): New.
(define_insn "avx512vl_v2div2hi2_store_mask"): New.
(define_insn "*avx512vl_v2div2si2"): New.
(define_insn "*avx512vl_v2div2si2_store"): New.
(define_insn "avx512vl_v2div2si2_mask"): New.
(define_insn "avx512vl_v2div2si2_store_mask"): New.

--
Thanks, K

diff --git a/gcc/config/i386/i386-modes.def b/gcc/config/i386/i386-modes.def
index 07e5720..c24abe6 100644
--- a/gcc/config/i386/i386-modes.def
+++ b/gcc/config/i386/i386-modes.def
@@ -86,6 +86,9 @@ VECTOR_MODE (INT, TI, 1); /*   V1TI */
 VECTOR_MODE (INT, DI, 1); /*   V1DI */
 VECTOR_MODE (INT, SI, 1); /*   V1SI */
 VECTOR_MODE (INT, QI, 2); /*   V2QI */
+VECTOR_MODE (INT, QI, 12);/*  V12QI */
+VECTOR_MODE (INT, QI, 14);/*  V14QI */
+VECTOR_MODE (INT, HI, 6); /*   V6HI */
 
 INT_MODE (OI, 32);
 INT_MODE (XI, 64);
diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index c9931b4..afdca58 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -321,6 +321,9 @@
 (define_mode_iterator VI8_AVX2_AVX512F
   [(V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX2") V2DI])
 
+(define_mode_iterator VI4_128_8_256
+  [V4SI V4DI])
+
 ;; All V8D* modes
 (define_mode_iterator V8FI
   [V8DF V8DI])
@@ -7948,48 +7951,549 @@
 ;;
 ;
 
-(define_mode_iterator PMOV_DST_MODE [V16QI V16HI V8SI V8HI])
+(define_mode_iterator PMOV_DST_MODE_1 [V16QI V16HI V8SI V8HI])
 (define_mode_attr pmov_src_mode
   [(V16QI "V16SI") (V16HI "V16SI") (V8SI "V8DI") (V8HI "V8DI")])
 (define_mode_attr pmov_src_lower
   [(V16QI "v16si") (V16HI "v16si") (V8SI "v8di") (V8HI "v8di")])
-(define_mode_attr pmov_suff
+(define_mode_attr pmov_suff_1
   [(V16QI "db") (V16HI "dw") (V8SI "qd") (V8HI "qw")])
 
 (define_insn "*avx512f_2"
-  [(set (match_operand:PMOV_DST_MODE 0 "nonimmediate_operand" "=v,m")
-   (any_truncate:PMOV_DST_MODE
+  [(set (match_operand:PMOV_DST_MODE_1 0 "nonimmediate_operand" "=v,m")
+   (any_truncate:PMOV_DST_MODE_1
  (match_operand: 1 "register_operand" "v,v")))]
   "TARGET_AVX512F"
-  "vpmov\t{%1, %0|%0, %1}"
+  "vpmov\t{%1, %0|%0, %1}"
   [(set_attr "type" "ssemov")
(set_attr "memory" "none,store")
(set_attr "prefix" "evex")
(set_attr "mode" "")])
 
 (define_insn "avx512f_2_mask"
-  [(set (match_operand:PMOV_DST_MODE 0 "nonimmediate_operand" "=v,m")
-(vec_merge:PMOV_DST_MODE
-  (any_truncate:PMOV_DST_MODE
+  [(set (match_operand:PMOV_DST_MODE_1 0 "nonimmediate_operand" "=v,m")
+(vec_merge:PMOV_DST_MODE_1
+  (any_truncate:PMOV_DST_MODE_1
 (match_operand: 1 "register_operand" "v,v"))
-  (match_operand:PMOV_DST_MODE 2 "vector_move_operand" "0C,0")
+  (match_operand:PMOV_DST_MODE_1 2 "vector_move_operand" "0C,0")
   (match_operand: 3 "register_opera

Re: [PATCH, CPP/23827] standard C++ should not have hex float preprocessing tokens

2014-08-28 Thread Ed Smith-Rowland

On 08/27/2014 03:40 PM, Jason Merrill wrote:

OK.

Jason


Is this OK for 4.9 also?
It builds and tests clean on x86_64-linux.

Attached slightly modified patch.

Ed


libcpp/

2014-08-28  Edward Smith-Rowland  <3dw...@verizon.net>

PR cpp/23827 - standard C++ should not have hex float preprocessor
tokens
* libcpp/init.c (lang_flags): Change CXX98 flag for extended numbers
from 1 to 0.
* libcpp/expr.c (cpp_classify_number): Weite error message for improper
use of hex floating literal.


gcc/testsuite/

2014-08-28  Edward Smith-Rowland  <3dw...@verizon.net>

PR cpp/23827 - standard C++ should not have hex float preprocessor
tokens
* g++.dg/cpp/pr23827_cxx11.C: New.
* g++.dg/cpp/pr23827_cxx98.C: New.
* g++.dg/cpp/pr23827_cxx98_neg.C: New.
* gcc.dg/cpp/pr23827_c90.c: New.
* gcc.dg/cpp/pr23827_c90_neg.c: New.
* gcc.dg/cpp/pr23827_c99.c: New.

Index: libcpp/init.c
===
--- libcpp/init.c   (revision 214616)
+++ libcpp/init.c   (working copy)
@@ -98,7 +98,7 @@
   /* STDC99   */  { 1,  0,  1,   0,  0,  1,   1,   1,   0,   0,   0,0, 
 0 },
   /* STDC11   */  { 1,  0,  1,   0,  1,  1,   1,   1,   1,   0,   0,0, 
 0 },
   /* GNUCXX   */  { 0,  1,  1,   0,  0,  0,   1,   1,   0,   0,   0,0, 
 0 },
-  /* CXX98*/  { 0,  1,  1,   0,  0,  1,   1,   1,   0,   0,   0,0, 
 0 },
+  /* CXX98*/  { 0,  1,  0,   0,  0,  1,   1,   1,   0,   0,   0,0, 
 0 },
   /* GNUCXX11 */  { 1,  1,  1,   0,  1,  0,   1,   1,   1,   1,   1,0, 
 0 },
   /* CXX11*/  { 1,  1,  1,   0,  1,  1,   1,   1,   1,   1,   1,0, 
 0 },
   /* GNUCXX1Y */  { 1,  1,  1,   0,  1,  0,   1,   1,   1,   1,   1,1, 
 1 },
Index: libcpp/expr.c
===
--- libcpp/expr.c   (revision 214616)
+++ libcpp/expr.c   (working copy)
@@ -540,9 +540,16 @@
SYNTAX_ERROR_AT (virtual_location,
 "no digits in hexadecimal floating constant");
 
-  if (radix == 16 && CPP_PEDANTIC (pfile) && !CPP_OPTION (pfile, c99))
-   cpp_error_with_line (pfile, CPP_DL_PEDWARN, virtual_location, 0,
-"use of C99 hexadecimal floating constant");
+  if (radix == 16 && CPP_PEDANTIC (pfile)
+ && !CPP_OPTION (pfile, extended_numbers))
+   {
+ if (CPP_OPTION (pfile, cplusplus))
+   cpp_error_with_line (pfile, CPP_DL_PEDWARN, virtual_location, 0,
+"use of C++11 hexadecimal floating constant");
+ else
+   cpp_error_with_line (pfile, CPP_DL_PEDWARN, virtual_location, 0,
+"use of C99 hexadecimal floating constant");
+   }
 
   if (float_flag == AFTER_EXPON)
{
Index: gcc/testsuite/g++.dg/cpp/pr23827_cxx11.C
===
--- gcc/testsuite/g++.dg/cpp/pr23827_cxx11.C(revision 0)
+++ gcc/testsuite/g++.dg/cpp/pr23827_cxx11.C(working copy)
@@ -0,0 +1,23 @@
+// { dg-do run { target c++11 } }
+// { dg-options "-pedantic-errors" }
+
+#define f (
+#define l )
+#define str(x) #x
+#define xstr(x) str(x)
+
+// C90 and C++98: "0x1p+( 0x1p+)"
+// C99 and C++11: "0x1p+f 0x1p+l"
+const char *s = xstr(0x1p+f 0x1p+l);
+
+extern "C" void abort (void);
+extern "C" int strcmp (const char *, const char *);
+
+int
+main()
+{
+  if (strcmp (s, "0x1p+( 0x1p+)"))
+return 0; // Correct C99 and C++11 behavior.
+  else
+abort (); // Correct C90 and C++ behavior.
+}
Index: gcc/testsuite/g++.dg/cpp/pr23827_cxx98.C
===
--- gcc/testsuite/g++.dg/cpp/pr23827_cxx98.C(revision 0)
+++ gcc/testsuite/g++.dg/cpp/pr23827_cxx98.C(working copy)
@@ -0,0 +1,23 @@
+// { dg-do run { target c++98_only } }
+// { dg-options "-ansi -pedantic-errors" }
+
+#define f (
+#define l )
+#define str(x) #x
+#define xstr(x) str(x)
+
+// C90 and C++98: "0x1p+( 0x1p+)"
+// C99 and C++11: "0x1p+f 0x1p+l"
+const char *s = xstr(0x1p+f 0x1p+l);
+
+extern "C" void abort (void);
+extern "C" int strcmp (const char *, const char *);
+
+int
+main()
+{
+  if (strcmp (s, "0x1p+( 0x1p+)"))
+abort (); // Correct C99 and C++11 behavior.
+  else
+return 0; // Correct C90 and C++ behavior.
+}
Index: gcc/testsuite/g++.dg/cpp/pr23827_cxx98_neg.C
===
--- gcc/testsuite/g++.dg/cpp/pr23827_cxx98_neg.C(revision 0)
+++ gcc/testsuite/g++.dg/cpp/pr23827_cxx98_neg.C(working copy)
@@ -0,0 +1,4 @@
+// { dg-do compile { target c++98_only } }
+/* { dg-options "-ansi -pedantic-errors" }  */
+
+double x = 0x3.1415babep0; // { dg-error "use of C..11 hexadecimal floating 
constant" }
Index: gcc/testsuite/gcc.dg/cpp/pr23827_c90.c
==

Re: Enable EBX for x86 in 32bits PIC code

2014-08-28 Thread Ilya Enkovich
2014-08-28 17:08 GMT+04:00 Uros Bizjak :
> On Thu, Aug 28, 2014 at 2:54 PM, Ilya Enkovich  wrote:
>
>> diff --git a/gcc/calls.c b/gcc/calls.c
>> index 4285ec1..85dae6b 100644
>> --- a/gcc/calls.c
>> +++ b/gcc/calls.c
>> @@ -1122,6 +1122,14 @@ initialize_argument_information (int num_actuals
>> ATTRIBUTE_UNUSED,
>>   call_expr_arg_iterator iter;
>>   tree arg;
>>
>> +if (targetm.calls.implicit_pic_arg (fndecl ? fndecl : fntype))
>> +  {
>> +   gcc_assert (pic_offset_table_rtx);
>> +   args[j].tree_value = make_tree (ptr_type_node,
>> +   pic_offset_table_rtx);
>> +   j--;
>> +  }
>> +
>>   if (struct_value_addr_value)
>> {
>> args[j].tree_value = struct_value_addr_value;
>
> So why do you need this?  Can't this be handled in the call/call_value
> expanders or what about attaching the use to CALL_INSN_FUNCTION_USAGE from
> inside ix86_expand_call?  Basically I'm not seeing the need for another
> target hook here.  I think that would significantly simply the patch as
> well.

 GOT base address become an additional implicit arg with EBX relaxed
 and I handled it as all other args. I can move EBX initialization into
 ix86_expand_call. Would still need some hint from target to init
 pic_offset_table_rtx with proper value in the beginning of function
 expand.
>>>
>>> Maybe you can you use get_hard_reg_initial_val for this?
>>
>> Actually there is no input hard reg holding GOT address.  Currently I
>> use initialization with ebx with following ebx initialization in
>> prolog_epilog pass.  But this is a temporary workaround.  It is
>> inefficient because always uses callee save reg to get GOT address.  I
>> suppose we should generate pseudo reg for pic_offset_table_rtx and
>> also set_got with this register as a destination in expand pass.
>> After register allocation set_got may be transformed into get_pc_thunk
>> call with proper hard reg.  But some target hook has to be used for
>> this.
>
> Let me expand my idea a bit. IIRC, get_hard_reg_initial_val and
> friends will automatically emit intialization of a pseudo from
> pic_offset_table_rtx hard reg. After reload, real initialization of
> pic_offset_table_rtx hard reg is emitted in pro_and_epilogue pass. I
> don't know if this works with current implementation of dynamic
> pic_offset_table_rtx selection, though.

That means you should choose some hard reg early before register
allocation to be used for PIC reg initialization.  I do not like we
have to do this and want to just generate set_got with pseudo reg and
do not involve any additional hard reg. That would look like

(insn/f 168 167 169 2 (parallel [
(set (reg:SI 127)
(unspec:SI [
(const_int 0 [0])
] UNSPEC_SET_GOT))
(clobber (reg:CC 17 flags))
]) test.cc:42 -1
 (expr_list:REG_CFA_FLUSH_QUEUE (nil)
(nil)))

after expand pass.  r127 is pic_offset_table_rtx here. And after
reload it would become:

(insn/f 168 167 169 2 (parallel [
(set (reg:SI 3 bx)
(unspec:SI [
(const_int 0 [0])
] UNSPEC_SET_GOT))
(clobber (reg:CC 17 flags))
]) test.cc:42 -1
 (expr_list:REG_CFA_FLUSH_QUEUE (nil)
(nil)))

And no additional actions are required on pro_and_epilogue.  Also it
simplifies analysis whether we should generate set_got at all.
Current we check hard reg is ever live which is wrong with not fixed
ebx because any usage of hard reg used to init GOT doesn't mean GOT
usage.  And with my proposed scheme unused GOT would mean DCE just
removes useless set_got.

Ilya

>
> Uros.


[PATCH][match-and-simplify] Fix NON_LVALUE_EXPR leaking

2014-08-28 Thread Richard Biener

This tries to prevent NON_LVALUE_EXPR from leaking in to GIMPLE
(happens on trunk also, but mostly savaged by re-gimplifying
GENERIC folding results which strips them).  We need a better
solution in the end (fix the C++ frontend), but for now the
following should fix things enough.  (fingers crossing)

Committed.

I plan to push the tree-cfg.c hunk to trunk as well (once it
passed bootstrap/testing there) - expand_debug_expr will
ICE on NON_LVALUE_EXPRs (which is where I noticed the above).

Richard.

2014-08-28  Richard Biener  

* tree-cfg.c (verify_gimple_assign_unary): Do not allow
NON_LVALUE_EXPR in gimple.
* genmatch.c (main): Always define GENERIC and GIMPLE.
* match-constant-folding.pd (x OP 0): Constrain when we
generate NON_LVALUE_EXPR further.

Index: gcc/tree-cfg.c
===
--- gcc/tree-cfg.c  (revision 214677)
+++ gcc/tree-cfg.c  (working copy)
@@ -3572,7 +3572,6 @@ verify_gimple_assign_unary (gimple stmt)
 case ABS_EXPR:
 case BIT_NOT_EXPR:
 case PAREN_EXPR:
-case NON_LVALUE_EXPR:
 case CONJ_EXPR:
   break;
 
Index: gcc/genmatch.c
===
--- gcc/genmatch.c  (revision 214677)
+++ gcc/genmatch.c  (working copy)
@@ -2544,6 +2544,7 @@ main(int argc, char **argv)
   if (!cpp_read_main_file (r, input))
 return 1;
   cpp_define (r, gimple ? "GIMPLE=1": "GENERIC=1");
+  cpp_define (r, gimple ? "GENERIC=0": "GIMPLE=0");
 
   /* Pre-seed operators.  */
   operators = new hash_table (1024);
Index: gcc/match-constant-folding.pd
===
--- gcc/match-constant-folding.pd   (revision 214677)
+++ gcc/match-constant-folding.pd   (working copy)
@@ -20,7 +20,7 @@ along with GCC; see the file COPYING3.
 (for op in plus pointer_plus minus bit_ior bit_xor
   (simplify
 (op @0 integer_zerop)
-(if (!in_gimple_form)
+(if (GENERIC && !in_gimple_form)
   /* ???  fold_binary adds non_lvalue here and "fixes" the C++
 run of Wsizeof-pointer-memaccess1.c, preserving enough of
 sizeof (&a) + 0 because sizeof (&a) is maybe_lvalue_p ()


Re: [PATCH, CPP/23827] standard C++ should not have hex float preprocessing tokens

2014-08-28 Thread Marc Glisse

On Thu, 28 Aug 2014, Ed Smith-Rowland wrote:


Is this OK for 4.9 also?
It builds and tests clean on x86_64-linux.

Attached slightly modified patch.


In my opinion it is not appropriate for a backport, no. If someone was 
using hex floats with -std=c++98 with 4.9.1, it should still work with 
4.9.2, I only expect to have to fix such things when moving to 5.0.


--
Marc Glisse


Re: [PATCH, CPP/23827] standard C++ should not have hex float preprocessing tokens

2014-08-28 Thread Jason Merrill

On 08/28/2014 09:41 AM, Marc Glisse wrote:

In my opinion it is not appropriate for a backport, no. If someone was
using hex floats with -std=c++98 with 4.9.1, it should still work with
4.9.2, I only expect to have to fix such things when moving to 5.0.


Agreed.

Jason




[PATCH i386 AVX512] [30/n] Add FMA patterns.

2014-08-28 Thread Kirill Yukhin
Hello,
This patch adds patterns to support FMA new insns.

Bootstrapped.
AVX-512* tests on top of patch-set all pass
under simulator.

Is it ok for trunk?

gcc/
* config/i386/sse.md
(define_mode_iterator VF_AVX512VL): New.
(define_mode_iterator FMAMODEM): Allow 128/256bit evex version.
(define_mode_iterator FMAMODE):  Ditto.
(define_expand "avx512f_fmadd__maskz"): Delete.
(define_expand "_fmadd__maskz"): New.
(define_insn
"fma_fmadd_"): Delete.
(define_insn
"fma_fmadd_noavx512_":
New.
(define_insn "avx512f_fmadd__mask"): Delete.
(define_insn "_fmadd__mask"): New.
(define_insn "avx512f_fmadd__mask3"): Delete.
(define_insn "_fmadd__mask3"): New.
(define_insn
"fma_fmsub_"): Delete.
(define_insn
"fma_fmsub_noavx512"):
New.
(define_insn
"fma_fmadd_"): Use 
VF_AVX512VL.
(define_insn "avx512f_fmadd__mask"): Delete.
(define_insn "_fmadd__mask"): New.
(define_insn "avx512f_fmadd__mask3"): Delete.
(define_insn "_fmadd__mask3"): New.
(define_insn
"fma_fmsub_"): Delete.
(define_insn
"fma_fmsub_noavx512"):
New.
(define_insn "avx512f_fmsub__mask"): Delete.
(define_insn "_fmsub__mask"): New.
(define_insn "avx512f_fmsub__mask3"): Delete.
(define_insn "_fmsub__mask3"): New.
(define_insn
"fma_fnmadd_"): 
Delete.
(define_insn

"fma_fnmadd_noavx512_"):
New.
(define_insn 
"fma_fnmadd_"):
Use VF_AVX512VL.
(define_insn "avx512f_fnmadd__mask"): Delete.
(define_insn "_fnmadd__mask"): New.
(define_insn "avx512f_fnmadd__mask3"): Delete.
(define_insn "_fnmadd__mask3"): New.
(define_insn 
"fma_fnmsub_"):
Delete.
(define_insn

"fma_fnmsub_noavx512_"): New.
(define_insn 
"fma_fnmsub_"):
Use VF_AVX512VL.
(define_insn "avx512f_fnmsub__mask"): Delete.
(define_insn "_fnmsub__mask"): New.
(define_insn "avx512f_fnmsub__mask3"): Delete.
(define_insn "_fnmsub__mask3"): New.
(define_expand "avx512f_fmaddsub__maskz"): 
Delete.
(define_expand "_fmaddsub__maskz"): 
New.
(define_insn 
"fma_fmaddsub_"):
Rename to
(define_insn

"fma_fmaddsub_noavx512_"): 
this.
(define_insn 
"fma_fmaddsub_"):
Use VF_AVX512VL.
(define_insn "avx512f_fmaddsub__mask"): Delete.
(define_insn "_fmaddsub__mask"): New.
(define_insn "avx512f_fmaddsub__mask3"): Delete.
(define_insn "_fmaddsub__mask3"): New.
(define_insn 
"fma_fmsubadd_"):
Rename to
(define_insn

"fma_fmsubadd_noavx512_"): 
this.
(define_insn 
"fma_fmsubadd_"):
Use VF_AVX512VL.
(define_insn "avx512f_fmsubadd__mask"): Delete.
(define_insn "_fmsubadd__mask"): New.
(define_insn "avx512f_fmsubadd__mask3"): Delete.
(define_insn "_fmsubadd__mask3"): New.

--
Thanks, K

diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index afdca58..310c29f 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -239,6 +239,10 @@
   [V16SI (V8SI  "TARGET_AVX512VL") (V4SI  "TARGET_AVX512VL")
V8DI  (V4DI  "TARGET_AVX512VL") (V2DI  "TARGET_AVX512VL")])
 
+(define_mode_iterator VF_AVX512VL
+  [V16SF (V8SF "TARGET_AVX512VL") (V4SF "TARGET_AVX512VL")
+   V8DF (V4DF "TARGET_AVX512VL") (V2DF "TARGET_AVX512VL")])
+
 (define_mode_iterator VF2_AVX512VL
   [V8DF (V4DF "TARGET_AVX512VL") (V2DF "TARGET_AVX512VL")])
 
@@ -2960,10 +2964,10 @@
 (define_mode_iterator FMAMODEM
   [(SF "TARGET_SSE_MATH && (TARGET_FMA || TARGET_FMA4 || TARGET_AVX512F)")
(DF "TARGET_SSE_MATH && (TARGET_FMA || TARGET_FMA4 || TARGET_AVX512F)")
-   (V4SF "TARGET_FMA || TARGET_FMA4")
-   (V2DF "TARGET_FMA || TARGET_FMA4")
-   (V8SF "TARGET_FMA || TARGET_FMA4")
-   (V4DF "TARGET_FMA || TARGET_FMA4")
+   (V4SF "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512VL")
+   (V2DF "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512VL")
+   (V8SF "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512VL")
+   (V4DF "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512VL")
(V16SF "TARGET_AVX512F")
(V8DF "TARGET_AVX512F")])
 
@@ -2997,14 +3001,14 @@
 
 ;; The builtins for intrinsics are not constrained by SSE math enabled.
 (define_mode_iterator FMAMODE
-  [(SF "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512F")
-   (DF "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512F")
-   (V4SF "TARGET_FMA || TARGET_FMA4")
-   (V2DF "TARGET_FMA || TARGET_FMA4")
-   (V8SF "TARGET_FMA || TARGET_FMA4")
-   (V4DF "TARGET_FMA || TARGET_FMA4")
-   (V16SF "TARGET_AVX512F")
-   (V8DF "TARGET_AVX512F")])
+ [(SF "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512F")
+  (DF "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512F")
+  (V4SF "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512VL")
+  (V2DF "TARGET_FMA || TARGET_FMA

[PATCH][match-and-simplify] Move simplify first in fold_stmt

2014-08-28 Thread Richard Biener

Now as the fold_stmt re-org is merged from trunk this moves the
simplify dispatch first before doing the legacy stuff.  It also
adds dumping to this place and removes dumping from
tree-ssa-forwprop.c (as there it catches all fold_stmt transforms
done).  I now also dump the associated sequence which makes
two match testcases fail (I've yet to investigate how to best
write multi-line matching regexps in tcl...).

The patch also adds more foldings from builtins.c and splits
the match-1.c testcase.

Committed.

Richard.

2014-08-28  Richard Biener  

* gimple-fold.c (fold_stmt_1): Move gimple_simplify dispatch
first, add dumping here.
* tree-ssa-forwprop.c (pass_forwprop::execute): Remove dumping.
* match-builtin.pd: Implement more patterns and fix existing ones.

testsuite/
* gcc.dg/tree-ssa/match-1.c: Split into ...
* gcc.dg/tree-ssa/match-builtins.c: ... this ...
* tree-ssa/match-builtins-fast-math.c: ... and this.

Index: gcc/gimple-fold.c
===
--- gcc/gimple-fold.c   (revision 214676)
+++ gcc/gimple-fold.c   (working copy)
@@ -2876,6 +2876,71 @@ fold_stmt_1 (gimple_stmt_iterator *gsi,
 default:;
 }
 
+  /* Dispatch to pattern-based folding.  */
+  /* ???  Change "inplace" semantics to allow replacing a stmt if
+ no further stmts need to be inserted (basically disallow
+ creating of new SSA names).  */
+  if (!inplace
+  || is_gimple_assign (stmt))
+{
+  gimple_seq seq = NULL;
+  code_helper rcode;
+  tree ops[3] = {};
+  if (gimple_simplify (stmt, &rcode, ops, inplace ? NULL : &seq, valueize))
+   {
+ if (is_gimple_assign (stmt)
+ && rcode.is_tree_code ())
+   {
+ if ((!inplace
+  || gimple_num_ops (stmt) <= get_gimple_rhs_num_ops (rcode))
+ /* Play safe and do not allow abnormals to be mentioned in
+newly created statements.  */
+ && !((TREE_CODE (ops[0]) == SSA_NAME
+   && SSA_NAME_OCCURS_IN_ABNORMAL_PHI (ops[0]))
+  || (ops[1]
+  && TREE_CODE (ops[1]) == SSA_NAME
+  && SSA_NAME_OCCURS_IN_ABNORMAL_PHI (ops[1]))
+  || (ops[2]
+  && TREE_CODE (ops[2]) == SSA_NAME
+  && SSA_NAME_OCCURS_IN_ABNORMAL_PHI (ops[2]
+   {
+ gimple_assign_set_rhs_with_ops_1 (gsi, rcode,
+   ops[0], ops[1], ops[2]);
+ if (dump_file && (dump_flags & TDF_DETAILS))
+   {
+ fprintf (dump_file, "gimple_simplified to ");
+ if (!gimple_seq_empty_p (seq))
+   print_gimple_seq (dump_file, seq, 0, TDF_SLIM);
+ print_gimple_stmt (dump_file, gsi_stmt (*gsi),
+0, TDF_SLIM);
+   }
+ gsi_insert_seq_before (gsi, seq, GSI_SAME_STMT);
+ changed = true;
+   }
+   }
+ else if (!inplace)
+   {
+ if (gimple_has_lhs (stmt))
+   {
+ tree lhs = gimple_get_lhs (stmt);
+ maybe_push_res_to_seq (rcode, TREE_TYPE (lhs),
+ops, &seq, lhs);
+ if (dump_file && (dump_flags & TDF_DETAILS))
+   {
+ fprintf (dump_file, "gimple_simplified to ");
+ print_gimple_seq (dump_file, seq, 0, TDF_SLIM);
+   }
+ gsi_replace_with_seq_vops (gsi, seq);
+ changed = true;
+   }
+ else
+   gcc_unreachable ();
+   }
+   }
+}
+
+  stmt = gsi_stmt (*gsi);
+
   /* Fold the main computation performed by the statement.  */
   switch (gimple_code (stmt))
 {
@@ -3012,58 +3077,7 @@ fold_stmt_1 (gimple_stmt_iterator *gsi,
}
 }
 
-  /* Dispatch to pattern-based folding.
- ???  Do this after the previous stuff as fold_stmt is used to make
- stmts valid gimple again via maybe_fold_reference of ops.  */
-  /* ???  Change "inplace" semantics to allow replacing a stmt if
- no further stmts need to be inserted (basically disallow
- creating of new SSA names).  */
-  if (inplace
-  && !is_gimple_assign (stmt))
-return changed;
-
-  gimple_seq seq = NULL;
-  code_helper rcode;
-  tree ops[3] = {};
-  if (!gimple_simplify (stmt, &rcode, ops, inplace ? NULL : &seq, valueize))
-return changed;
-
-  if (is_gimple_assign (stmt)
-  && rcode.is_tree_code ())
-{
-  if (inplace
- && gimple_num_ops (stmt) <= get_gimple_rhs_num_ops (rcode))
-   return changed;
-  /* Play safe and do not allow abnormals to be mentioned in
- newly created

Re: [debug-early] reuse variable DIEs and fix their context

2014-08-28 Thread Richard Biener
On Wed, Aug 27, 2014 at 4:42 AM, Aldy Hernandez  wrote:
> This patch fixes a bunch of guality failures.  With it I get 144 guality.exp
> failures vs. 163 for "make check-gcc RUNTESTFLAGS=guality.exp".  A lot
> better than 100% fail rate ;-).
>
> Variable DIEs were not being reused.  Instead, variable DIEs even had the
> wrong context (unilaterally the compilation unit).  The attached patch
> reuses variable DIEs that have been outputted earlier.  It also fixes the
> context by correcting the context on the second round.
>
> I have also added a bit field to the DIE structure to record if a DIE has
> been generated early.
>
> Again, this is all a rough draft, but feel free to comment.

I wonder if we can't not force a proper context die (ISTR dwarf2out.c
lazily handles some contexts in some circumstances).  All parent
"trees" should be readily available and we should be able to create
DIEs for them.

Richard.

> Committed to branch.
> Aldy


Re: [PATCH 1/4] aarch64: Improve epilogue unwind info

2014-08-28 Thread Richard Earnshaw
On 28/08/14 13:36, Jiong Wang wrote:

> I am curious about why "dwarf2out_frame_debug_expr" only handle 
> PRE/POST_MODIFY on dest
> while no handling on src ?
> 
> for example the following rule:
> (set reg (mem_post_modify sp offset))
> 



Probably because dwarf2out was originally written to handle just the
prologue code; and in that case you would only get writes to the stack,
not reads from it.

R.



Re: [PATCH, CPP/23827] standard C++ should not have hex float preprocessing tokens

2014-08-28 Thread Ed Smith-Rowland

On 08/28/2014 09:47 AM, Jason Merrill wrote:

On 08/28/2014 09:41 AM, Marc Glisse wrote:

In my opinion it is not appropriate for a backport, no. If someone was
using hex floats with -std=c++98 with 4.9.1, it should still work with
4.9.2, I only expect to have to fix such things when moving to 5.0.


Agreed.

Jason





OK, I understand.  Makes sense.
It is a capability change not really just a bugfix per se.
On that note is it worth a sentence in Changes?

Ed



Re: Migrating gcc.c-torture

2014-08-28 Thread Bernd Schmidt

On 08/22/2014 10:39 PM, Mike Stump wrote:

On Aug 22, 2014, at 5:18 AM, Bernd Schmidt  wrote:


Here's another attempt.



Ok?


Ok.  Thanks a ton for doing the work.


The next question would be what to do with gcc.c-torture/unsorted.  As 
far as I can tell, these are all just plain compile tests, except for 
dump-noaddr.c.  Ok to move all except for that test to 
c-torture/compile, adjusting SFset.c and DFcmp.c which require int32plus?



Bernd



[patch] Adjust comments in testsuite/ext/random/

2014-08-28 Thread Jonathan Wakely

The tests for the non-standard distributions have bogus standard
references. [rand.concept.dist] comes from an old C++0x draft, and the
[rand.dist.ext.*] labels are entirely fictional because obviously
non-standard extensions are not in the standard.

Tested x86_64-linux, committed to trunk.
commit f6da27eb1cbb9a6bbcfe2a6e39f99cb1cb372704
Author: Jonathan Wakely 
Date:   Tue Apr 15 18:48:13 2014 +0100

	* testsuite/ext/random/*: Fix incorrect standard references in
	comments.

diff --git a/libstdc++-v3/testsuite/ext/random/hypergeometric_distribution/cons/default.cc b/libstdc++-v3/testsuite/ext/random/hypergeometric_distribution/cons/default.cc
index c1b3f56..a6dbd33 100644
--- a/libstdc++-v3/testsuite/ext/random/hypergeometric_distribution/cons/default.cc
+++ b/libstdc++-v3/testsuite/ext/random/hypergeometric_distribution/cons/default.cc
@@ -20,8 +20,8 @@
 // with this library; see the file COPYING3.  If not see
 // .
 
-// 26.4.8.3.* Class template hypergeometric_distribution [rand.dist.ext.hypergeometric]
-// 26.4.2.4 Concept RandomNumberDistribution [rand.concept.dist]
+// Class template hypergeometric_distribution
+// 26.5.1.6 Random number distribution requirements [rand.req.dist]
 
 #include 
 #include 
diff --git a/libstdc++-v3/testsuite/ext/random/hypergeometric_distribution/cons/parms.cc b/libstdc++-v3/testsuite/ext/random/hypergeometric_distribution/cons/parms.cc
index 0f541d0..8d98ab8 100644
--- a/libstdc++-v3/testsuite/ext/random/hypergeometric_distribution/cons/parms.cc
+++ b/libstdc++-v3/testsuite/ext/random/hypergeometric_distribution/cons/parms.cc
@@ -20,8 +20,8 @@
 // with this library; see the file COPYING3.  If not see
 // .
 
-// 26.4.8.3.* Class template hypergeometric_distribution [rand.dist.ext.hypergeometric]
-// 26.4.2.4 Concept RandomNumberDistribution [rand.concept.dist]
+// Class template hypergeometric_distribution
+// 26.5.1.6 Random number distribution requirements [rand.req.dist]
 
 #include 
 #include 
diff --git a/libstdc++-v3/testsuite/ext/random/hypergeometric_distribution/operators/equal.cc b/libstdc++-v3/testsuite/ext/random/hypergeometric_distribution/operators/equal.cc
index daf4a75..7f20ef6 100644
--- a/libstdc++-v3/testsuite/ext/random/hypergeometric_distribution/operators/equal.cc
+++ b/libstdc++-v3/testsuite/ext/random/hypergeometric_distribution/operators/equal.cc
@@ -20,7 +20,7 @@
 // with this library; see the file COPYING3.  If not see
 // .
 
-// 26.5.8.4.5 Class template rice_distribution [rand.dist.ext.hypergeometric]
+// Class template hypergeometric_distribution
 
 #include 
 #include 
diff --git a/libstdc++-v3/testsuite/ext/random/hypergeometric_distribution/operators/inequal.cc b/libstdc++-v3/testsuite/ext/random/hypergeometric_distribution/operators/inequal.cc
index c4d3c8c..4f20232 100644
--- a/libstdc++-v3/testsuite/ext/random/hypergeometric_distribution/operators/inequal.cc
+++ b/libstdc++-v3/testsuite/ext/random/hypergeometric_distribution/operators/inequal.cc
@@ -20,7 +20,7 @@
 // with this library; see the file COPYING3.  If not see
 // .
 
-// 26.5.8.4.5 Class template rice_distribution [rand.dist.ext.hypergeometric]
+// Class template hypergeometric_distribution
 
 #include 
 #include 
diff --git a/libstdc++-v3/testsuite/ext/random/hypergeometric_distribution/operators/serialize.cc b/libstdc++-v3/testsuite/ext/random/hypergeometric_distribution/operators/serialize.cc
index bd2d579..b9958c9 100644
--- a/libstdc++-v3/testsuite/ext/random/hypergeometric_distribution/operators/serialize.cc
+++ b/libstdc++-v3/testsuite/ext/random/hypergeometric_distribution/operators/serialize.cc
@@ -20,8 +20,8 @@
 // with this library; see the file COPYING3.  If not see
 // .
 
-// 26.4.8.3.* Class template hypergeometric_distribution [rand.dist.ext.hypergeometric]
-// 26.4.2.4 Concept RandomNumberDistribution [rand.concept.dist]
+// Class template hypergeometric_distribution
+// 26.5.1.6 Random number distribution requirements [rand.req.dist]
 
 #include 
 #include 
diff --git a/libstdc++-v3/testsuite/ext/random/hypergeometric_distribution/operators/values.cc b/libstdc++-v3/testsuite/ext/random/hypergeometric_distribution/operators/values.cc
index ceaaf38..f730ec1 100644
--- a/libstdc++-v3/testsuite/ext/random/hypergeometric_distribution/operators/values.cc
+++ b/libstdc++-v3/testsuite/ext/random/hypergeometric_distribution/operators/values.cc
@@ -21,9 +21,8 @@
 // with this library; see the file COPYING3.  If not see
 // .
 
-// 26.4.8.3.* Class template hypergeometric_distribution
-// [rand.dist.ext.hypergeometric]
-// 26.4.2.4 Concept RandomNumberDistribution [rand.concept.dist]
+// Class template hypergeometric_distribution
+// 26.5.1.6 Random number distribution requirements [rand.req.dist]
 
 #include 
 #include 
diff --git a/libstdc++-v3/testsuite/ext/random/hypergeomet

Re: [PATCH] Move -fbuiltin from c.opt to common.opt and change it to common group

2014-08-28 Thread Kito Cheng
Hi Richard:

>> -fno-builtin is seem not only for the c family front-end, but also
>> used in LTO now, so move it to common.opt and change it to `Common`.
>
> Please leave it in c-family and just add LTO to the set of supported
> languages.  -fno-builtin isn't meaningful for other frontends
> and we just happen to use the flag.
> If then it makes more sense to move -fhosted and -ffreestanding
> though I don't know how meaningful those are for other frontends.
>
> Or create a "proper" flag to communicate that the middle-end
> should avoid creating new calls to builtins at all cost
> (well, that's really what -ffreestanding is about).

-fno-builtin is meaningless for other front-end but middle-end,
However `-fno-builtin` is more explicit than -fhosted and  -ffreestanding,
when people see the option `-fno-builtin`, they will know what this
option mean "do not use builtin implicitly", and most important
 -fno-builtin is more well known than -fhosted or -ffreestanding.

and the flag_no_builtin is already used in gcc/lto/lto-lang.c:def_builtin_1,
so my patch is not first user of this option in LTO front-end.

> Richard.


Re: [PATCH 4.8] libstdc++ pretty-printers: Backport Python 3 support from mainline

2014-08-28 Thread Jonathan Wakely

Applied


Re: Enable EBX for x86 in 32bits PIC code

2014-08-28 Thread Uros Bizjak
On Thu, Aug 28, 2014 at 3:29 PM, Ilya Enkovich  wrote:

>>> diff --git a/gcc/calls.c b/gcc/calls.c
>>> index 4285ec1..85dae6b 100644
>>> --- a/gcc/calls.c
>>> +++ b/gcc/calls.c
>>> @@ -1122,6 +1122,14 @@ initialize_argument_information (int num_actuals
>>> ATTRIBUTE_UNUSED,
>>>   call_expr_arg_iterator iter;
>>>   tree arg;
>>>
>>> +if (targetm.calls.implicit_pic_arg (fndecl ? fndecl : fntype))
>>> +  {
>>> +   gcc_assert (pic_offset_table_rtx);
>>> +   args[j].tree_value = make_tree (ptr_type_node,
>>> +   pic_offset_table_rtx);
>>> +   j--;
>>> +  }
>>> +
>>>   if (struct_value_addr_value)
>>> {
>>> args[j].tree_value = struct_value_addr_value;
>>
>> So why do you need this?  Can't this be handled in the call/call_value
>> expanders or what about attaching the use to CALL_INSN_FUNCTION_USAGE 
>> from
>> inside ix86_expand_call?  Basically I'm not seeing the need for another
>> target hook here.  I think that would significantly simply the patch as
>> well.
>
> GOT base address become an additional implicit arg with EBX relaxed
> and I handled it as all other args. I can move EBX initialization into
> ix86_expand_call. Would still need some hint from target to init
> pic_offset_table_rtx with proper value in the beginning of function
> expand.

 Maybe you can you use get_hard_reg_initial_val for this?
>>>
>>> Actually there is no input hard reg holding GOT address.  Currently I
>>> use initialization with ebx with following ebx initialization in
>>> prolog_epilog pass.  But this is a temporary workaround.  It is
>>> inefficient because always uses callee save reg to get GOT address.  I
>>> suppose we should generate pseudo reg for pic_offset_table_rtx and
>>> also set_got with this register as a destination in expand pass.
>>> After register allocation set_got may be transformed into get_pc_thunk
>>> call with proper hard reg.  But some target hook has to be used for
>>> this.
>>
>> Let me expand my idea a bit. IIRC, get_hard_reg_initial_val and
>> friends will automatically emit intialization of a pseudo from
>> pic_offset_table_rtx hard reg. After reload, real initialization of
>> pic_offset_table_rtx hard reg is emitted in pro_and_epilogue pass. I
>> don't know if this works with current implementation of dynamic
>> pic_offset_table_rtx selection, though.
>
> That means you should choose some hard reg early before register
> allocation to be used for PIC reg initialization.  I do not like we
> have to do this and want to just generate set_got with pseudo reg and
> do not involve any additional hard reg. That would look like
>
> (insn/f 168 167 169 2 (parallel [
> (set (reg:SI 127)
> (unspec:SI [
> (const_int 0 [0])
> ] UNSPEC_SET_GOT))
> (clobber (reg:CC 17 flags))
> ]) test.cc:42 -1
>  (expr_list:REG_CFA_FLUSH_QUEUE (nil)
> (nil)))
>
> after expand pass.  r127 is pic_offset_table_rtx here. And after
> reload it would become:
>
> (insn/f 168 167 169 2 (parallel [
> (set (reg:SI 3 bx)
> (unspec:SI [
> (const_int 0 [0])
> ] UNSPEC_SET_GOT))
> (clobber (reg:CC 17 flags))
> ]) test.cc:42 -1
>  (expr_list:REG_CFA_FLUSH_QUEUE (nil)
> (nil)))
>
> And no additional actions are required on pro_and_epilogue.  Also it
> simplifies analysis whether we should generate set_got at all.
> Current we check hard reg is ever live which is wrong with not fixed
> ebx because any usage of hard reg used to init GOT doesn't mean GOT
> usage.  And with my proposed scheme unused GOT would mean DCE just
> removes useless set_got.

Yes this is better. I was under impression you want to retain current
initialization insertion in expand_prologue.

Uros.


Re: [PATCH libstdc++ v5] - Add xmethods for std::vector and std::unique_ptr

2014-08-28 Thread Siva Chandra
On Wed, Aug 27, 2014 at 7:11 PM, Tom Tromey  wrote:
> Siva> My patch is still using a single function to register libstdc++
> Siva> xmethods. Do you mean there should be a single function for pretty
> Siva> printers and xmethods together?
>
> Yeah, that's my view.

I am probably not understanding it right again. Are you suggesting
that in hook.in, we just have single function call like this:

register_python_hooks (gdb.current_objfile ())

and, this function register_python_hooks lives somewhere else and
calls register_libstdcxx_printers and register_libstdcxx_xmethods?

Thanks,
Siva Chandra


Re: [PATCH 225/236] Work towards NEXT_INSN/PREV_INSN requiring insns as their params

2014-08-28 Thread Richard Henderson
On 08/26/2014 10:15 AM, David Malcolm wrote:
> Attached is a revised version of #225, with the following changes:
> 
> * fix for the above: avoid introducing a new shadow name "note" within
> force_nonfallthru_and_redirect by introducing a new local rtx_insn *
> "new_head" and renaming "note" to it in the appropriate places.
> 
> * changed an as_a<> to a safe_as_a<> within
> function.c:thread_prologue_and_epilogue_insns to fix a segfault seen
> during an earlier bootstrap
> 
> Successfully bootstrapped on x86_64 (Fedora 20), on top of the rest of
> the patches leading up to it (including the revised ones for #220-#221
> that rth recently approved).

Ok.


r~


Re: [PATCH GCC]Fix broken Canadian when checking isl library support

2014-08-28 Thread Sebastian Pop
Richard Biener wrote:
> On Mon, Aug 25, 2014 at 11:07 PM, Sebastian Pop  wrote:
> > Sebastian Pop wrote:
> >> Richard Biener wrote:
> >> > I think it would be better to identify a set of features we rely on that
> >> > are not present in earlier versions and make the test a link
> >> > test unconditionally.
> >> >
> >> > Tobias, are there include files / types / functions we require
> >> > that are not available in earlier versions?
> >>
> >> The version 0.12 of ISL has an include file isl/val.h that is not present 
> >> in
> >> previous versions of ISL.
> >>
> >> There also was a patch from Mircea a few weeks ago that was missing the
> >> configure bits to check that the isl version contained isl/val.h.
> >> I will update both patches and submit for review.
> >
> > I see that Mircea's patch has been committed:
> >
> > commit c5ec3cc336c7d42d9ad2995395d430b99a9a34cc
> > Author: mircea 
> > Date:   Mon Aug 11 15:05:48 2014 +
> >
> > Replacement of isl_int by isl_val
> >
> > git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@213816 
> > 138bc75d-0d04-0410-961f-82ee72b054a4
> >
> > Attached a patch to check for isl/val.h instead of ISL's release number.
> >
> > Bootstrapping on x86-64. Ok to commit?
> 
> Ok.  I suppose the "checking for version 0.12 of ISL" is now slightly
> misleading - maybe change it to "checking for compatible ISL"?

Committed r214683 with the suggested change.



Re: [PATCH 3/4] aarch64: Tidy prologue local variables

2014-08-28 Thread Richard Henderson
On 08/26/2014 05:58 AM, Jiong Wang wrote:
> On 22/08/14 23:05, Richard Henderson wrote:
>> Don't continually re-read data from cfun->machine.
>>
>> * config/aarch64/aarch64.c (aarch64_expand_prologue): Load
>> cfun->machine->frame.hard_fp_offset into a local variable.
>> ---
>>   gcc/config/aarch64/aarch64.c | 14 +++---
>>   1 file changed, 7 insertions(+), 7 deletions(-)
>>
>> diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
>> index dcca446..c890773 100644
>> --- a/gcc/config/aarch64/aarch64.c
>> +++ b/gcc/config/aarch64/aarch64.c
>> @@ -2194,18 +2194,18 @@ aarch64_expand_prologue (void)
>> */
>> HOST_WIDE_INT frame_size, offset;
>> HOST_WIDE_INT fp_offset;/* Offset from hard FP to SP.  */
>> +  HOST_WIDE_INT hard_fp_offset;
>> rtx insn;
>>   aarch64_layout_frame ();
>>   -  if (flag_stack_usage_info)
>> -current_function_static_stack_size = cfun->machine->frame.frame_size;
>> -
>> frame_size = cfun->machine->frame.frame_size;
>> -  offset = cfun->machine->frame.frame_size;
>> +  hard_fp_offset = cfun->machine->frame.hard_fp_offset;
>> +  offset = frame_size;
>> +  fp_offset = frame_size - hard_fp_offset;
> 
> there is a field "hardfp_offset" in aarch64_frame, and I think that field is
> not used and not initialized correctly.
> 
> how about hoisting the calculation to aarch64_layout_frame to avoid duplicated
> calcuation here and there,  something like:
> 
>  cfun->machine->frame.hardfp_offset = (cfun->machine->frame.frame_size-
>cfun->machine->frame.hard_fp_offset);
> 
> then use it directly in expand_epilogue:
> 
> fp_offset = cfun->machine->frame.hardfp_offset;

I'd go the other way, and simply delete hardfp_offset as unused.  We need the
other two inputs to the subtraction for other reasons, so we don't save
anything by pre-computing the subtract.


r~


Re: [PATCH 3/4] aarch64: Tidy prologue local variables

2014-08-28 Thread Jiong Wang

On 28/08/14 17:48, Richard Henderson wrote:

On 08/26/2014 05:58 AM, Jiong Wang wrote:


there is a field "hardfp_offset" in aarch64_frame, and I think that field is
not used and not initialized correctly.

how about hoisting the calculation to aarch64_layout_frame to avoid duplicated
calcuation here and there,  something like:

  cfun->machine->frame.hardfp_offset = (cfun->machine->frame.frame_size-
cfun->machine->frame.hard_fp_offset);

then use it directly in expand_epilogue:

fp_offset = cfun->machine->frame.hardfp_offset;

I'd go the other way, and simply delete hardfp_offset as unused.  We need the
other two inputs to the subtraction for other reasons, so we don't save
anything by pre-computing the subtract.


make sense.

thanks.

-- Jiong



r~







Re: [PATCH GCC]Fix broken Canadian when checking isl library support

2014-08-28 Thread Sebastian Pop
Bin.Cheng wrote:
> Is the interface between isl/gcc stable enough?  If it's not, the work
> "compatible" is definitely confusing for users who don't know isl like
> me.  If it is stable, it won't be a problem.

The required version of ISL is documented in GCC's doc/install.texi:


@item ISL Library version 0.12.2

Necessary to build GCC with the Graphite loop optimizations.
It can be downloaded from @uref{ftp://gcc.gnu.org/pub/gcc/infrastructure/}
as @file{isl-0.12.2.tar.bz2}.


The required version of ISL will most likely evolve with time, and will be
updated whenever new ISL functionality will be needed in graphite.

Sebastian


Re: [PATCH libstdc++ v5] - Add xmethods for std::vector and std::unique_ptr

2014-08-28 Thread Tom Tromey
> "Siva" == Siva Chandra  writes:

Tom> Yeah, that's my view.

Siva> I am probably not understanding it right again.

It's ok.  I am having some trouble myself.

Siva> Are you suggesting that in hook.in, we just have single function
Siva> call like this:
Siva> register_python_hooks (gdb.current_objfile ())
Siva> and, this function register_python_hooks lives somewhere else and
Siva> calls register_libstdcxx_printers and register_libstdcxx_xmethods?

Yeah.  I think it's better to put as little as possible in the hook file.
I realize this may sound ridiculous given all the code that is already
there; but that's an artifact of gcc's install-relocatability requirement.

Tom


Re: [PATCH,rs6000] Add some more vector built-ins

2014-08-28 Thread David Edelsohn
On Tue, Aug 26, 2014 at 7:52 PM, Bill Schmidt
 wrote:
> Hi,
>
> This patch adds a few more cases of overloaded vector built-ins to
> support V2DI and V2DF modes:  vec_xl, vec_xst, vec_splat, vec_div,
> vec_mul, vec_round.  These are all straightforward.  For vec_div and
> vec_mul, the most efficient thing appears to be to just scalarize these;
> at least I couldn't come up with a magic sequence for multiply at this
> width.
>
> Bootstrapped and tested on powerpc64le-unknown-linux-gnu with no
> regressions.  Is this ok for trunk?
>
> Thanks,
> Bill
>
>
> [gcc]
>
> 2014-08-26  Bill Schmidt  
>
> * config/rs6000/altivec.h (vec_xl): New #define.
> (vec_xst): Likewise.
> * config/rs6000/rs6000-builtin.def (XXSPLTD_V2DF): New built-in.
> (XXSPLTD_V2DI): Likewise.
> (DIV_V2DI): Likewise.
> (UDIV_V2DI): Likewise.
> (MUL_V2DI): Likewise.
> * config/rs6000/rs6000-c.c (altivec_overloaded_builtins): Add
> entries for VSX_BUILTIN_XVRDPI, VSX_BUILTIN_DIV_V2DI,
> VSX_BUILTIN_UDIV_V2DI, VSX_BUILTIN_MUL_V2DI,
> VSX_BUILTIN_XXSPLTD_V2DF, and VSX_BUILTIN_XXSPLTD_V2DI).
> * config/rs6000/vsx.md (UNSPEC_VSX_XXSPLTD): New unspec.
> (UNSPEC_VSX_DIVSD): Likewise.
> (UNSPEC_VSX_DIVUD): Likewise.
> (UNSPEC_VSX_MULSD): Likewise.
> (vsx_mul_v2di): New insn-and-split.
> (vsx_div_v2di): Likewise.
> (vsx_udiv_v2di): Likewise.
> (vsx_xxspltd_): New insn.
>
> [gcc/testsuite]
>
> 2014-08-26  Bill Schmidt  
>
> * gcc.target/powerpc/builtins-1.c: Add tests for vec_xl, vec_xst,
> vec_round, vec_splat, vec_div, and vec_mul.
> * gcc.target/powerpc/builtins-2.c: New test.

Okay.

Thanks, David


Re: [debug-early] reuse variable DIEs and fix their context

2014-08-28 Thread Aldy Hernandez

On 08/28/14 06:58, Richard Biener wrote:

On Wed, Aug 27, 2014 at 4:42 AM, Aldy Hernandez  wrote:

This patch fixes a bunch of guality failures.  With it I get 144 guality.exp
failures vs. 163 for "make check-gcc RUNTESTFLAGS=guality.exp".  A lot
better than 100% fail rate ;-).

Variable DIEs were not being reused.  Instead, variable DIEs even had the
wrong context (unilaterally the compilation unit).  The attached patch
reuses variable DIEs that have been outputted earlier.  It also fixes the
context by correcting the context on the second round.

I have also added a bit field to the DIE structure to record if a DIE has
been generated early.

Again, this is all a rough draft, but feel free to comment.


I wonder if we can't not force a proper context die (ISTR dwarf2out.c
lazily handles some contexts in some circumstances).  All parent


Hmmm, I don't see any of this lazy context setting you speak of, but...


"trees" should be readily available and we should be able to create
DIEs for them.


I wonder if instead of early dumping of all the DECLs, we could only 
dump the toplevel scoped DECLs, and let inheritance set the proper contexts.


We could start with calling dwarf2out_early_decl() for each function 
decl, and then for every global.  This is analogous to what we currently 
do for late dwarf2out.


see final.c for the functions:
  if (!DECL_IGNORED_P (current_function_decl))
debug_hooks->function_decl (current_function_decl);

see c/c-decl.c for the globals:
  FOR_EACH_VEC_ELT (*all_translation_units, i, t)
c_write_global_declarations_2 (BLOCK_VARS (DECL_INITIAL (t)));
  c_write_global_declarations_2 (BLOCK_VARS (ext_block));

The problem being that to calculate `ext_block' above, we need intimate 
knowledge of scopes and such, only available in the FE.  Is there a 
generic way of determining if a DECL is in global scope?  If, so we 
could do:


foreach decl in fld.decls
if (is_global_scope(decl))
dwarf2out_decl (decl)

...and contexts will magically be set.

Is there such a way, or am I going about this the wrong way?

Aldy


Re: [debug-early] New branch for streaming dwarf early (and status)

2014-08-28 Thread Jason Merrill

On 08/25/2014 11:53 PM, Aldy Hernandez wrote:

My current predicament is that dwarf2out_early_decl() is called early
enough such that dwarf2out_decl() sets the context die to the entire
compilation unit.  Which means, that local variables end up with global
scope.  I was thinking of hacking gen_variable_die() such that the
second time around through gen_variable_die(), we fix the parent_die to
point to the correct place, but I haven't thought too much about it.
Perhaps dwarf2out_early_decl() should set things up correctly??  I'm
open to suggestions.


We already have a similar situation with nested functions and such; see 
limbo_die_list.


Jason



Re: [debug-early] reuse variable DIEs and fix their context

2014-08-28 Thread Jason Merrill

On 08/28/2014 01:34 PM, Aldy Hernandez wrote:

I wonder if instead of early dumping of all the DECLs, we could only
dump the toplevel scoped DECLs, and let inheritance set the proper
contexts.


Yes, I think this makes a lot more sense; do it at a well-defined point 
in compilation rather than as part of free_lang_data.



We could start with calling dwarf2out_early_decl() for each function
decl, and then for every global.  This is analogous to what we currently
do for late dwarf2out.

see final.c for the functions:
   if (!DECL_IGNORED_P (current_function_decl))
 debug_hooks->function_decl (current_function_decl);

see c/c-decl.c for the globals:
   FOR_EACH_VEC_ELT (*all_translation_units, i, t)
 c_write_global_declarations_2 (BLOCK_VARS (DECL_INITIAL (t)));
   c_write_global_declarations_2 (BLOCK_VARS (ext_block));



The problem being that to calculate `ext_block' above, we need intimate
knowledge of scopes and such, only available in the FE.  Is there a
generic way of determining if a DECL is in global scope?


Why not do it in the FE, i.e. *_write_global_declarations?

Jason



Re: Enable EBX for x86 in 32bits PIC code

2014-08-28 Thread Florian Weimer

On 08/28/2014 03:01 PM, Uros Bizjak wrote:

I'd like to avoid X86_TUNE_RELAX_PIC_REG and always treat EBX as an
allocatable register. This way, we can avoid all mess with implicit
xchgs in atomic_compare_and_swap_doubleword. Also, having
allocatable EBX would allow us to introduce __builtin_cpuid builtin
and cleanup cpiud.h.


It also makes writing solid inline assembly which has to use %ebx for 
some reason much easier.  We just fixed a glibc bug related to that.


--
Florian Weimer / Red Hat Product Security


Re: [PATCH i386 AVX512] [29/n] Add narrowing vpmov.

2014-08-28 Thread Uros Bizjak
On Thu, Aug 28, 2014 at 3:15 PM, Kirill Yukhin  wrote:
> Hello,
> This patch introduces AVX-512 narrowing moves.
>
> Bootstrapped.
> AVX-512* tests on top of patch-set all pass
> under simulator.
>
> Is it ok for trunk?
>
> gcc/
> * config/i386/i386-modes.def: Add V12QI, V14QI, V6HI modes.
> * config/i386/sse.md
> (define_mode_iterator VI4_128_8_256): New.
> (define_mode_iterator VI2_128_4_256): New.
> (define_mode_iterator PMOV_DST_MODE): Rename to
> (define_mode_iterator PMOV_DST_MODE_1): this.
> (define_insn "*avx512bw_v32hiv32qi2"): New.
> (define_insn "avx512bw_v32hiv32qi2_mask"): Ditto.
> (define_expand "avx512bw_v32hiv32qi2_store_mask"): Ditto.
> (define_mode_iterator PMOV_DST_MODE_2): Ditto.
> (define_insn "*avx512vl_2"): Ditto.
> (define_insn "_2_mask"): 
> Ditto.
> (define_expand 
> "_2_store_mask"):
> Ditto.
> (define_mode_iterator PMOV_SRC_MODE_3): New.
> (define_mode_attr pmov_dst_3): New.
> (define_mode_attr pmov_dst_zeroed_3): New.
> (define_mode_attr pmov_suff_3): New.
> (define_insn "*avx512vl_vqi2"): New.
> (define_insn "*avx512vl_v2div2qi2_store"): New.
> (define_insn "avx512vl_v2div2qi2_mask"): New.
> (define_insn "avx512vl_v2div2qi2_store_mask"): New.
> (define_insn "*avx512vl_v4qi2_store"): New.
> (define_insn "avx512vl_v4qi2_mask"): New.
> (define_insn "avx512vl_v4qi2_store_mask"): New.
> (define_insn "*avx512vl_v8qi2_store"): New.
> (define_insn "avx512vl_v8qi2_mask"): New.
> (define_insn "avx512vl_v8qi2_store_mask"): New.
> (define_mode_iterator PMOV_SRC_MODE_4): New.
> (define_mode_attr pmov_dst_4): New.
> (define_mode_attr pmov_dst_zeroed_4): New.
> (define_mode_attr pmov_suff_4): New.
> (define_insn "*avx512vl_vhi2"): New.
> (define_insn "*avx512vl_v4hi2_store"): New.
> (define_insn "avx512vl_v4hi2_mask"): New.
> (define_insn "avx512vl_v4hi2_store_mask"): New.
> (define_insn "*avx512vl_v2div2hi2_store"): New.
> (define_insn "avx512vl_v2div2hi2_mask"): New.
> (define_insn "avx512vl_v2div2hi2_store_mask"): New.
> (define_insn "*avx512vl_v2div2si2"): New.
> (define_insn "*avx512vl_v2div2si2_store"): New.
> (define_insn "avx512vl_v2div2si2_mask"): New.
> (define_insn "avx512vl_v2div2si2_store_mask"): New.

Please also mention patterns that were updated PMOV_DST_MODE
> --
> Thanks, K
>
> diff --git a/gcc/config/i386/i386-modes.def b/gcc/config/i386/i386-modes.def
> index 07e5720..c24abe6 100644
> --- a/gcc/config/i386/i386-modes.def
> +++ b/gcc/config/i386/i386-modes.def
> @@ -86,6 +86,9 @@ VECTOR_MODE (INT, TI, 1); /*   V1TI */
>  VECTOR_MODE (INT, DI, 1); /*   V1DI */
>  VECTOR_MODE (INT, SI, 1); /*   V1SI */
>  VECTOR_MODE (INT, QI, 2); /*   V2QI */
> +VECTOR_MODE (INT, QI, 12);/*  V12QI */
> +VECTOR_MODE (INT, QI, 14);/*  V14QI */
> +VECTOR_MODE (INT, HI, 6); /*   V6HI */
>
>  INT_MODE (OI, 32);
>  INT_MODE (XI, 64);
> diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
> index c9931b4..afdca58 100644
> --- a/gcc/config/i386/sse.md
> +++ b/gcc/config/i386/sse.md
> @@ -321,6 +321,9 @@
>  (define_mode_iterator VI8_AVX2_AVX512F
>[(V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX2") V2DI])
>
> +(define_mode_iterator VI4_128_8_256
> +  [V4SI V4DI])
> +
>  ;; All V8D* modes
>  (define_mode_iterator V8FI
>[V8DF V8DI])
> @@ -7948,48 +7951,549 @@
>  ;;
>  ;
>
> -(define_mode_iterator PMOV_DST_MODE [V16QI V16HI V8SI V8HI])
> +(define_mode_iterator PMOV_DST_MODE_1 [V16QI V16HI V8SI V8HI])
>  (define_mode_attr pmov_src_mode
>[(V16QI "V16SI") (V16HI "V16SI") (V8SI "V8DI") (V8HI "V8DI")])
>  (define_mode_attr pmov_src_lower
>[(V16QI "v16si") (V16HI "v16si") (V8SI "v8di") (V8HI "v8di")])
> -(define_mode_attr pmov_suff
> +(define_mode_attr pmov_suff_1
>[(V16QI "db") (V16HI "dw") (V8SI "qd") (V8HI "qw")])
>
>  (define_insn "*avx512f_2"
> -  [(set (match_operand:PMOV_DST_MODE 0 "nonimmediate_operand" "=v,m")
> -   (any_truncate:PMOV_DST_MODE
> +  [(set (match_operand:PMOV_DST_MODE_1 0 "nonimmediate_operand" "=v,m")
> +   (any_truncate:PMOV_DST_MODE_1
>   (match_operand: 1 "register_operand" "v,v")))]
>"TARGET_AVX512F"
> -  "vpmov\t{%1, %0|%0, %1}"
> +  "vpmov\t{%1, %0|%0, %1}"
>[(set_attr "type" "ssemov")
> (set_attr "memory" "none,store")
> (set_attr "prefix" "evex")
> (set_attr "mode" "")])
>
>  (define_insn "avx512f_2_mask"
> -  [(set (match_operand:PMOV_DST_MODE 0 "nonimmediate_operand" "=v,m")
> -(vec_merge:PMOV_DST_MODE
> -  (any_truncate:PMOV_DST_MODE
> +  [(set (match_operand:PMOV_DST

Re: [PATCH i386 AVX512] [29/n] Add narrowing vpmov.

2014-08-28 Thread Uros Bizjak
On Thu, Aug 28, 2014 at 3:15 PM, Kirill Yukhin  wrote:
> Hello,
> This patch introduces AVX-512 narrowing moves.
>
> Bootstrapped.
> AVX-512* tests on top of patch-set all pass
> under simulator.
>
> Is it ok for trunk?
>
> gcc/
> * config/i386/i386-modes.def: Add V12QI, V14QI, V6HI modes.
> * config/i386/sse.md
> (define_mode_iterator VI4_128_8_256): New.
> (define_mode_iterator VI2_128_4_256): New.
> (define_mode_iterator PMOV_DST_MODE): Rename to
> (define_mode_iterator PMOV_DST_MODE_1): this.
> (define_insn "*avx512bw_v32hiv32qi2"): New.
> (define_insn "avx512bw_v32hiv32qi2_mask"): Ditto.
> (define_expand "avx512bw_v32hiv32qi2_store_mask"): Ditto.
> (define_mode_iterator PMOV_DST_MODE_2): Ditto.
> (define_insn "*avx512vl_2"): Ditto.
> (define_insn "_2_mask"): 
> Ditto.
> (define_expand 
> "_2_store_mask"):
> Ditto.
> (define_mode_iterator PMOV_SRC_MODE_3): New.
> (define_mode_attr pmov_dst_3): New.
> (define_mode_attr pmov_dst_zeroed_3): New.
> (define_mode_attr pmov_suff_3): New.
> (define_insn "*avx512vl_vqi2"): New.
> (define_insn "*avx512vl_v2div2qi2_store"): New.
> (define_insn "avx512vl_v2div2qi2_mask"): New.
> (define_insn "avx512vl_v2div2qi2_store_mask"): New.
> (define_insn "*avx512vl_v4qi2_store"): New.
> (define_insn "avx512vl_v4qi2_mask"): New.
> (define_insn "avx512vl_v4qi2_store_mask"): New.
> (define_insn "*avx512vl_v8qi2_store"): New.
> (define_insn "avx512vl_v8qi2_mask"): New.
> (define_insn "avx512vl_v8qi2_store_mask"): New.
> (define_mode_iterator PMOV_SRC_MODE_4): New.
> (define_mode_attr pmov_dst_4): New.
> (define_mode_attr pmov_dst_zeroed_4): New.
> (define_mode_attr pmov_suff_4): New.
> (define_insn "*avx512vl_vhi2"): New.
> (define_insn "*avx512vl_v4hi2_store"): New.
> (define_insn "avx512vl_v4hi2_mask"): New.
> (define_insn "avx512vl_v4hi2_store_mask"): New.
> (define_insn "*avx512vl_v2div2hi2_store"): New.
> (define_insn "avx512vl_v2div2hi2_mask"): New.
> (define_insn "avx512vl_v2div2hi2_store_mask"): New.
> (define_insn "*avx512vl_v2div2si2"): New.
> (define_insn "*avx512vl_v2div2si2_store"): New.
> (define_insn "avx512vl_v2div2si2_mask"): New.
> (define_insn "avx512vl_v2div2si2_store_mask"): New.

Please also mention patterns that were updated from PMOV_DST_MODE to
PMOV_DST_MODE_1.

There is one inconsistency - existing pattern is named ..._mask_store,
new ones are named ..._store_mask. Is there a reason for this
difference?

Otherwise the patch looks OK, as far as humans can read this code ;)

Uros.


Re: [PATCH i386 AVX512] [30/n] Add FMA patterns.

2014-08-28 Thread Uros Bizjak
On Thu, Aug 28, 2014 at 3:48 PM, Kirill Yukhin  wrote:
> Hello,
> This patch adds patterns to support FMA new insns.
>
> Bootstrapped.
> AVX-512* tests on top of patch-set all pass
> under simulator.
>
> Is it ok for trunk?
>
> gcc/
> * config/i386/sse.md
> (define_mode_iterator VF_AVX512VL): New.
> (define_mode_iterator FMAMODEM): Allow 128/256bit evex version.
> (define_mode_iterator FMAMODE):  Ditto.
> (define_expand "avx512f_fmadd__maskz"): 
> Delete.
> (define_expand "_fmadd__maskz"): New.
> (define_insn
> "fma_fmadd_"): 
> Delete.
> (define_insn
> 
> "fma_fmadd_noavx512_":
> New.
> (define_insn "avx512f_fmadd__mask"): Delete.
> (define_insn "_fmadd__mask"): New.
> (define_insn "avx512f_fmadd__mask3"): Delete.
> (define_insn "_fmadd__mask3"): New.
> (define_insn
> "fma_fmsub_"): 
> Delete.
> (define_insn
> 
> "fma_fmsub_noavx512"):
> New.
> (define_insn
> "fma_fmadd_"): Use
> VF_AVX512VL.
> (define_insn "avx512f_fmadd__mask"): Delete.
> (define_insn "_fmadd__mask"): New.
> (define_insn "avx512f_fmadd__mask3"): Delete.
> (define_insn "_fmadd__mask3"): New.
> (define_insn
> "fma_fmsub_"): 
> Delete.
> (define_insn
> 
> "fma_fmsub_noavx512"):
> New.
> (define_insn "avx512f_fmsub__mask"): Delete.
> (define_insn "_fmsub__mask"): New.
> (define_insn "avx512f_fmsub__mask3"): Delete.
> (define_insn "_fmsub__mask3"): New.
> (define_insn
> "fma_fnmadd_"): 
> Delete.
> (define_insn
> 
> "fma_fnmadd_noavx512_"):
> New.
> (define_insn 
> "fma_fnmadd_"):
> Use VF_AVX512VL.
> (define_insn "avx512f_fnmadd__mask"): Delete.
> (define_insn "_fnmadd__mask"): New.
> (define_insn "avx512f_fnmadd__mask3"): Delete.
> (define_insn "_fnmadd__mask3"): New.
> (define_insn 
> "fma_fnmsub_"):
> Delete.
> (define_insn
> 
> "fma_fnmsub_noavx512_"): 
> New.
> (define_insn 
> "fma_fnmsub_"):
> Use VF_AVX512VL.
> (define_insn "avx512f_fnmsub__mask"): Delete.
> (define_insn "_fnmsub__mask"): New.
> (define_insn "avx512f_fnmsub__mask3"): Delete.
> (define_insn "_fnmsub__mask3"): New.
> (define_expand "avx512f_fmaddsub__maskz"): 
> Delete.
> (define_expand "_fmaddsub__maskz"): 
> New.
> (define_insn 
> "fma_fmaddsub_"):
> Rename to
> (define_insn
> 
> "fma_fmaddsub_noavx512_"): 
> this.
> (define_insn 
> "fma_fmaddsub_"):
> Use VF_AVX512VL.
> (define_insn "avx512f_fmaddsub__mask"): Delete.
> (define_insn "_fmaddsub__mask"): New.
> (define_insn "avx512f_fmaddsub__mask3"): Delete.
> (define_insn "_fmaddsub__mask3"): New.
> (define_insn 
> "fma_fmsubadd_"):
> Rename to
> (define_insn
> 
> "fma_fmsubadd_noavx512_"): 
> this.
> (define_insn 
> "fma_fmsubadd_"):
> Use VF_AVX512VL.
> (define_insn "avx512f_fmsubadd__mask"): Delete.
> (define_insn "_fmsubadd__mask"): New.
> (define_insn "avx512f_fmsubadd__mask3"): Delete.
> (define_insn "_fmsubadd__mask3"): New.

Please document these changes as:

[new_pattern_name]: Rename from [old_pattern_name] and use VF_AVXyy
mode iterator.

> -(define_insn "fma_fmsub_"
> +(define_insn 
> "fma_fmsub_noavx512"

I'd suggest to put noavx512 at the beginning, so:

"noavx512_fma_fmsub_"

This way, we will have avx512 and noavx512 prefixes. It looks more
consistent to me.

Otherwise the patch is OK (it is fairly mechanical).

Thanks,
Uros.


Re: [PATCH libstdc++ v5] - Add xmethods for std::vector and std::unique_ptr

2014-08-28 Thread Siva Chandra
On Thu, Aug 28, 2014 at 10:15 AM, Tom Tromey  wrote:
> Siva> Are you suggesting that in hook.in, we just have single function
> Siva> call like this:
> Siva> register_python_hooks (gdb.current_objfile ())
> Siva> and, this function register_python_hooks lives somewhere else and
> Siva> calls register_libstdcxx_printers and register_libstdcxx_xmethods?
>
> Yeah.  I think it's better to put as little as possible in the hook file.
> I realize this may sound ridiculous given all the code that is already
> there; but that's an artifact of gcc's install-relocatability requirement.

OK. Do you think I could do this as a follow up? May be not
considerable, but seems to me like it distracts the intent of this
patch?

Thanks,
Siva Chandra


[Patch] fix C++11 thread support for win32

2014-08-28 Thread Thorsten Wilmer
Hi,

I stumbled over the problem, that on win32 c++11's Thread support is
broken. I found a patch from  Tomohiro Kashiwada, who did a great job.
Only he didn't pursue to have it integrated since more than two years.

I have applied his patch and fixed some compiler warnings and obvious
cast/pointer problems, where the win32 api and the pthread api don't
match. Also I verified that libstdc++'s configure script detects that
thread support can be activated.

Please consider the patch below for inclusion. If you see any blocking
points, let me know, I'll try to address them as I would like to use
my C++11 programs  on win32 without hassle and a fresh compiler.

The patch is against current git mirror. It compiles without error and
the compiled source code survived some example programs for threads
and my own complex program, which is using threads, mutex and
condition variables.

I compiled the complete tool chain from source, this is my gcc configuration:
../gcc/configure --prefix=/usr/local/gcc/git-mingw
--with-gmp=/usr/local/gcc/git-mingw --target=x86_64-w64-mingw32
--enable-targets=all --disable-multilib

Here the random output of the example program:
Z:\>a.exe
Thread Thread 2 executing
1 executing
Thread Thread 2 executing
1 executing
Thread 2 executing
Thread 1 executing
Thread 2 executing
Thread 1 executing
Thread Thread 2 executing
1 executing
Final value of n is 5

Kind Regards,
 Thorsten Wilmer


>From 9591d02da5e6131e4a79259ed952d52dcecb86ac Mon Sep 17 00:00:00 2001
From: Thorsten Wilmer 
Date: Thu, 28 Aug 2014 23:37:41 +0530
Subject: [PATCH] fix C++11 thread support for win32

Based on
Implemented Condition Variable by Tomohiro Kashiwada
   .

Fixed compile warnings and added some error handling.
---
 libgcc/config/i386/gthr-win32.c | 290 
 libgcc/config/i386/gthr-win32.h | 220 --
 2 files changed, 502 insertions(+), 8 deletions(-)

diff --git a/libgcc/config/i386/gthr-win32.c b/libgcc/config/i386/gthr-win32.c
index eec16b3..10769a7 100644
--- a/libgcc/config/i386/gthr-win32.c
+++ b/libgcc/config/i386/gthr-win32.c
@@ -5,6 +5,8 @@
Contributed by Mumit Khan .
Modified and moved to separate file by Danny Smith
.
+   Implemented Condition Variable by Tomohiro Kashiwada
+   .

 This file is part of GCC.

@@ -28,6 +30,7 @@ see the files COPYING3 and COPYING.RUNTIME
respectively.  If not, see
 .  */

 #include 
+#include 
 #ifndef __GTHREAD_HIDE_WIN32API
 # define __GTHREAD_HIDE_WIN32API 1
 #endif
@@ -259,9 +262,296 @@ __gthr_win32_recursive_mutex_unlock
(__gthread_recursive_mutex_t *mutex)
   return 0;
 }

+
 int
 __gthr_win32_recursive_mutex_destroy (__gthread_recursive_mutex_t *mutex)
 {
   CloseHandle ((HANDLE) mutex->sema);
   return 0;
 }
+
+
+static DWORD
+gthread_calc_time_offset (const __gthread_time_t *abs_time)
+{
+  FILETIME ft;
+  LONGLONG ll;
+  LONGLONG ret;
+  GetSystemTimeAsFileTime (&ft);
+  ll = ((LONGLONG)ft.dwHighDateTime << 32) + ft.dwLowDateTime;
+  ret = (((LONGLONG)abs_time->tv_sec * 1000 + abs_time->tv_nsec / 100)
+ - (ll - 1164447360)) / 1;
+  if (ret < 0)
+return 0;
+  if (ret > 0x7FFF)
+return 0x7FFF;
+  return ret;
+}
+
+int
+__gthr_win32_cond_init (__gthread_cond_t *cond,
+ void *attr __attribute__((unused)))
+{
+  cond->counter = -1;
+  cond->pcs = malloc (sizeof (CRITICAL_SECTION));
+  InitializeCriticalSection ((LPCRITICAL_SECTION)cond->pcs);
+  cond->sema = CreateSemaphore (NULL, 0, 65535, NULL);
+  return 0;
+}
+
+int
+__gthr_win32_cond_destroy (__gthread_cond_t *cond)
+{
+  __gthread_cond_broadcast (cond);
+  free (cond->pcs);
+  CloseHandle ((HANDLE)cond->sema);
+  return 0;
+}
+
+int
+__gthr_win32_cond_broadcast (__gthread_cond_t *cond)
+{
+  EnterCriticalSection (cond->pcs);
+  if (cond->counter >= 0)
+ReleaseSemaphore (cond->sema, cond->counter+1, NULL);
+  cond->counter = -1;
+  LeaveCriticalSection (cond->pcs);
+  return 0;
+}
+
+static int
+cond_wait_impl (__gthread_cond_t *cond,
+ __gthread_mutex_t *mutex, DWORD timeout)
+{
+  EnterCriticalSection (cond->pcs);
+  ++cond->counter;
+  LeaveCriticalSection (cond->pcs);
+  if (SignalObjectAndWait (mutex->sema, cond->sema, timeout, FALSE)
+  != WAIT_OBJECT_0)
+{
+  EnterCriticalSection (cond->pcs);
+  --cond->counter;
+  LeaveCriticalSection (cond->pcs);
+}
+  WaitForSingleObject (mutex->sema, INFINITE);
+  return 0;
+}
+
+static int
+cond_wait_recursive_impl (__gthread_cond_t *cond,
+  __gthread_recursive_mutex_t *mutex, DWORD timeout)
+{
+  EnterCriticalSection (cond->pcs);
+  ++cond->counter;
+  LeaveCriticalSection (cond->pcs);
+  if (SignalObjectAndWait (mutex->sema, cond->sema, timeout, FALSE)
+  != WAIT_OBJECT_0)
+{
+  EnterCriticalSection (cond->pcs);
+  --cond->counter;
+  LeaveCriticalSection (cond->pcs);
+}
+  WaitForSingleObject (mutex->sema, INFINITE);
+  return 0;
+}
+
+int
+__gthr_win32_cond_wait (__gthread_con

Re: Enable EBX for x86 in 32bits PIC code

2014-08-28 Thread Uros Bizjak
On Fri, Aug 22, 2014 at 2:21 PM, Ilya Enkovich  wrote:

> On Cauldron 2014 we had a couple of talks about relaxation of ebx usage in 
> 32bit PIC mode.  It was decided that the best approach would be to not fix 
> ebx register, use speudo register for GOT base address and let allocator do 
> the rest.  This should be similar to how clang and icc work with GOT base 
> address.  I've been working for some time on such patch and now want to share 
> my results.

>  (define_insn "*pushtf"
>[(set (match_operand:TF 0 "push_operand" "=<,<")
> -   (match_operand:TF 1 "general_no_elim_operand" "x,*roF"))]
> +   (match_operand:TF 1 "nonimmediate_no_elim_operand" "x,*roF"))]

Can you please explain the reason for this change (and a couple of
similar changes to push patterns) ?

Uros.


Re: [debug-early] reuse variable DIEs and fix their context

2014-08-28 Thread Richard Biener
On August 28, 2014 8:01:05 PM CEST, Jason Merrill  wrote:
>On 08/28/2014 01:34 PM, Aldy Hernandez wrote:
>> I wonder if instead of early dumping of all the DECLs, we could only
>> dump the toplevel scoped DECLs, and let inheritance set the proper
>> contexts.
>
>Yes, I think this makes a lot more sense; do it at a well-defined point
>
>in compilation rather than as part of free_lang_data.
>
>> We could start with calling dwarf2out_early_decl() for each function
>> decl, and then for every global.  This is analogous to what we
>currently
>> do for late dwarf2out.
>>
>> see final.c for the functions:
>>if (!DECL_IGNORED_P (current_function_decl))
>>  debug_hooks->function_decl (current_function_decl);
>>
>> see c/c-decl.c for the globals:
>>FOR_EACH_VEC_ELT (*all_translation_units, i, t)
>>  c_write_global_declarations_2 (BLOCK_VARS (DECL_INITIAL (t)));
>>c_write_global_declarations_2 (BLOCK_VARS (ext_block));
>
>> The problem being that to calculate `ext_block' above, we need
>intimate
>> knowledge of scopes and such, only available in the FE.  Is there a
>> generic way of determining if a DECL is in global scope?

Via DECL_CONTEXT and the global scope macro predicate.  Eventually not enough 
to detect class scope statics.

>Why not do it in the FE, i.e. *_write_global_declarations?

Yeah, ultimatively I'd like the front ends to do all required dwarf2out calls 
but free lang data seemed a convenient place to do things.

There is no reason we can't walk its array in a more sensible order.

Richard.
>
>Jason




Re: [PATCH 4.8] libstdc++ pretty-printers: Backport Python 3 support from mainline

2014-08-28 Thread Samuel Bronson
Thanks!

On 8/28/14, Jonathan Wakely  wrote:
> Applied
>


Re: [debug-early] reuse variable DIEs and fix their context

2014-08-28 Thread Jason Merrill

On 08/28/2014 03:13 PM, Richard Biener wrote:

knowledge of scopes and such, only available in the FE.  Is there a
generic way of determining if a DECL is in global scope?


Via DECL_CONTEXT and the global scope macro predicate.  Eventually not enough 
to detect class scope statics.


!decl_function_context should do the trick.

Jason



[GOOGLE] Fixup varpool references after LIPO linking

2014-08-28 Thread Teresa Johnson
This patch fixes up varpool nodes after LIPO linking as we were doing
for cgraph node references. While, here I made some fixes to the
cgraph fixup as well (mark address taken as appropriate) and removed
old references. The latter exposed an issue with resolved cgraph nodes
getting eliminated when they were only referenced from vtables and the
LIPO linking selected an external copy as the resolved node. Addressed
this by forcing the LIPO linking to prefer the non-external copy.

Passes regression testing, internal benchmark testing in progress. Ok
for google/4_9 if that succeeds?

Teresa

2014-08-28  Teresa Johnson  

Google ref b/17038802.
* l-ipo.c (resolve_cgraph_node): Pick non-external node.
(fixup_reference_list): Fixup varpool references, remove old
references, mark cgraph nodes as address taken as needed.

Index: l-ipo.c
===
--- l-ipo.c (revision 213975)
+++ l-ipo.c (working copy)
@@ -1564,6 +1564,15 @@ resolve_cgraph_node (struct cgraph_sym **slot, str
   (*slot)->rep_decl = decl2;
   return;
 }
+  /* Similarly, pick the non-external symbol, since external
+ symbols may be eliminated by symtab_remove_unreachable_nodes
+ after ipa inlining (see process_references).  */
+  if (DECL_EXTERNAL (decl1) && !DECL_EXTERNAL (decl2))
+{
+  (*slot)->rep_node = node;
+  (*slot)->rep_decl = decl2;
+  return;
+}

   has_prof1 = has_profile_info (decl1);
   bool is_aux1 = cgraph_is_auxiliary (decl1);
@@ -2304,31 +2313,44 @@ fixup_reference_list (struct varpool_node *node)
   int i;
   struct ipa_ref *ref;
   struct ipa_ref_list *list = &node->ref_list;
-  vec new_refered;
+  vec new_refered;
   vec new_refered_type;
-  struct cgraph_node *c;
+  struct symtab_node *sym_node;
   enum ipa_ref_use use_type = IPA_REF_LOAD;

   new_refered.create (10);
   new_refered_type.create (10);
   for (i = 0; ipa_ref_list_reference_iterate (list, i, ref); i++)
 {
-  if (!is_a  (ref->referred))
-continue;
-
-  struct cgraph_node *cnode = ipa_ref_node (ref);
-  struct cgraph_node *r_cnode
-= cgraph_lipo_get_resolved_node (cnode->decl);
-  if (r_cnode != cnode)
+  if (is_a  (ref->referred))
 {
+  struct cgraph_node *cnode = ipa_ref_node (ref);
+  struct cgraph_node *r_cnode
+= cgraph_lipo_get_resolved_node (cnode->decl);
   new_refered.safe_push (r_cnode);
   use_type = ref->use;
   new_refered_type.safe_push ((int) use_type);
+  gcc_assert (use_type != IPA_REF_ADDR
+  || cnode->global.inlined_to
+  || cnode->address_taken);
+  if (use_type == IPA_REF_ADDR)
+cgraph_mark_address_taken_node (r_cnode);
 }
+  else if (is_a  (ref->referred))
+{
+  struct varpool_node *var = ipa_ref_varpool_node (ref);
+  struct varpool_node *r_var = real_varpool_node (var->decl);
+  new_refered.safe_push (r_var);
+  use_type = ref->use;
+  new_refered_type.safe_push ((int) use_type);
+}
+  else
+gcc_assert (false);
 }
-  for (i = 0; new_refered.iterate (i, &c); ++i)
+  ipa_remove_all_references (&node->ref_list);
+  for (i = 0; new_refered.iterate (i, &sym_node); ++i)
 {
-  ipa_record_reference (node, c,
+  ipa_record_reference (node, sym_node,
 (enum ipa_ref_use) new_refered_type[i], NULL);
 }
 }


-- 
Teresa Johnson | Software Engineer | tejohn...@google.com | 408-460-2413


Re: [GOOGLE] Fixup varpool references after LIPO linking

2014-08-28 Thread Teresa Johnson
On Thu, Aug 28, 2014 at 1:40 PM, Xinliang David Li  wrote:
>
> Do you know why the previous check is not enough ?
>
> cgraph_can_remove_if_no_direct_calls_and_refs_p (struct cgraph_node *node)

This will return true for the external node, but it also returns true
for the non-external node. The non-external node is a COMDAT, as as
the comments in that routine indicate, COMDATs can be removed even if
they are externally_visible.

Teresa

>
> David
>
>
> On Thu, Aug 28, 2014 at 1:29 PM, Teresa Johnson 
> wrote:
>>
>> This patch fixes up varpool nodes after LIPO linking as we were doing
>> for cgraph node references. While, here I made some fixes to the
>> cgraph fixup as well (mark address taken as appropriate) and removed
>> old references. The latter exposed an issue with resolved cgraph nodes
>> getting eliminated when they were only referenced from vtables and the
>> LIPO linking selected an external copy as the resolved node. Addressed
>> this by forcing the LIPO linking to prefer the non-external copy.
>>
>> Passes regression testing, internal benchmark testing in progress. Ok
>> for google/4_9 if that succeeds?
>>
>> Teresa
>>
>> 2014-08-28  Teresa Johnson  
>>
>> Google ref b/17038802.
>> * l-ipo.c (resolve_cgraph_node): Pick non-external node.
>> (fixup_reference_list): Fixup varpool references, remove old
>> references, mark cgraph nodes as address taken as needed.
>>
>> Index: l-ipo.c
>> ===
>> --- l-ipo.c (revision 213975)
>> +++ l-ipo.c (working copy)
>> @@ -1564,6 +1564,15 @@ resolve_cgraph_node (struct cgraph_sym **slot, str
>>(*slot)->rep_decl = decl2;
>>return;
>>  }
>> +  /* Similarly, pick the non-external symbol, since external
>> + symbols may be eliminated by symtab_remove_unreachable_nodes
>> + after ipa inlining (see process_references).  */
>> +  if (DECL_EXTERNAL (decl1) && !DECL_EXTERNAL (decl2))
>> +{
>> +  (*slot)->rep_node = node;
>> +  (*slot)->rep_decl = decl2;
>> +  return;
>> +}
>>
>>has_prof1 = has_profile_info (decl1);
>>bool is_aux1 = cgraph_is_auxiliary (decl1);
>> @@ -2304,31 +2313,44 @@ fixup_reference_list (struct varpool_node *node)
>>int i;
>>struct ipa_ref *ref;
>>struct ipa_ref_list *list = &node->ref_list;
>> -  vec new_refered;
>> +  vec new_refered;
>>vec new_refered_type;
>> -  struct cgraph_node *c;
>> +  struct symtab_node *sym_node;
>>enum ipa_ref_use use_type = IPA_REF_LOAD;
>>
>>new_refered.create (10);
>>new_refered_type.create (10);
>>for (i = 0; ipa_ref_list_reference_iterate (list, i, ref); i++)
>>  {
>> -  if (!is_a  (ref->referred))
>> -continue;
>> -
>> -  struct cgraph_node *cnode = ipa_ref_node (ref);
>> -  struct cgraph_node *r_cnode
>> -= cgraph_lipo_get_resolved_node (cnode->decl);
>> -  if (r_cnode != cnode)
>> +  if (is_a  (ref->referred))
>>  {
>> +  struct cgraph_node *cnode = ipa_ref_node (ref);
>> +  struct cgraph_node *r_cnode
>> += cgraph_lipo_get_resolved_node (cnode->decl);
>>new_refered.safe_push (r_cnode);
>>use_type = ref->use;
>>new_refered_type.safe_push ((int) use_type);
>> +  gcc_assert (use_type != IPA_REF_ADDR
>> +  || cnode->global.inlined_to
>> +  || cnode->address_taken);
>> +  if (use_type == IPA_REF_ADDR)
>> +cgraph_mark_address_taken_node (r_cnode);
>>  }
>> +  else if (is_a  (ref->referred))
>> +{
>> +  struct varpool_node *var = ipa_ref_varpool_node (ref);
>> +  struct varpool_node *r_var = real_varpool_node (var->decl);
>> +  new_refered.safe_push (r_var);
>> +  use_type = ref->use;
>> +  new_refered_type.safe_push ((int) use_type);
>> +}
>> +  else
>> +gcc_assert (false);
>>  }
>> -  for (i = 0; new_refered.iterate (i, &c); ++i)
>> +  ipa_remove_all_references (&node->ref_list);
>> +  for (i = 0; new_refered.iterate (i, &sym_node); ++i)
>>  {
>> -  ipa_record_reference (node, c,
>> +  ipa_record_reference (node, sym_node,
>>  (enum ipa_ref_use) new_refered_type[i],
>> NULL);
>>  }
>>  }
>>
>>
>> --
>> Teresa Johnson | Software Engineer | tejohn...@google.com | 408-460-2413
>
>



-- 
Teresa Johnson | Software Engineer | tejohn...@google.com | 408-460-2413


Re: [GOOGLE] Fixup varpool references after LIPO linking

2014-08-28 Thread Xinliang David Li
ok. The patch is fine.

On Thu, Aug 28, 2014 at 1:46 PM, Teresa Johnson  wrote:
> On Thu, Aug 28, 2014 at 1:40 PM, Xinliang David Li  wrote:
>>
>> Do you know why the previous check is not enough ?
>>
>> cgraph_can_remove_if_no_direct_calls_and_refs_p (struct cgraph_node *node)
>
> This will return true for the external node, but it also returns true
> for the non-external node. The non-external node is a COMDAT, as as
> the comments in that routine indicate, COMDATs can be removed even if
> they are externally_visible.
>
> Teresa
>
>>
>> David
>>
>>
>> On Thu, Aug 28, 2014 at 1:29 PM, Teresa Johnson 
>> wrote:
>>>
>>> This patch fixes up varpool nodes after LIPO linking as we were doing
>>> for cgraph node references. While, here I made some fixes to the
>>> cgraph fixup as well (mark address taken as appropriate) and removed
>>> old references. The latter exposed an issue with resolved cgraph nodes
>>> getting eliminated when they were only referenced from vtables and the
>>> LIPO linking selected an external copy as the resolved node. Addressed
>>> this by forcing the LIPO linking to prefer the non-external copy.
>>>
>>> Passes regression testing, internal benchmark testing in progress. Ok
>>> for google/4_9 if that succeeds?
>>>
>>> Teresa
>>>
>>> 2014-08-28  Teresa Johnson  
>>>
>>> Google ref b/17038802.
>>> * l-ipo.c (resolve_cgraph_node): Pick non-external node.
>>> (fixup_reference_list): Fixup varpool references, remove old
>>> references, mark cgraph nodes as address taken as needed.
>>>
>>> Index: l-ipo.c
>>> ===
>>> --- l-ipo.c (revision 213975)
>>> +++ l-ipo.c (working copy)
>>> @@ -1564,6 +1564,15 @@ resolve_cgraph_node (struct cgraph_sym **slot, str
>>>(*slot)->rep_decl = decl2;
>>>return;
>>>  }
>>> +  /* Similarly, pick the non-external symbol, since external
>>> + symbols may be eliminated by symtab_remove_unreachable_nodes
>>> + after ipa inlining (see process_references).  */
>>> +  if (DECL_EXTERNAL (decl1) && !DECL_EXTERNAL (decl2))
>>> +{
>>> +  (*slot)->rep_node = node;
>>> +  (*slot)->rep_decl = decl2;
>>> +  return;
>>> +}
>>>
>>>has_prof1 = has_profile_info (decl1);
>>>bool is_aux1 = cgraph_is_auxiliary (decl1);
>>> @@ -2304,31 +2313,44 @@ fixup_reference_list (struct varpool_node *node)
>>>int i;
>>>struct ipa_ref *ref;
>>>struct ipa_ref_list *list = &node->ref_list;
>>> -  vec new_refered;
>>> +  vec new_refered;
>>>vec new_refered_type;
>>> -  struct cgraph_node *c;
>>> +  struct symtab_node *sym_node;
>>>enum ipa_ref_use use_type = IPA_REF_LOAD;
>>>
>>>new_refered.create (10);
>>>new_refered_type.create (10);
>>>for (i = 0; ipa_ref_list_reference_iterate (list, i, ref); i++)
>>>  {
>>> -  if (!is_a  (ref->referred))
>>> -continue;
>>> -
>>> -  struct cgraph_node *cnode = ipa_ref_node (ref);
>>> -  struct cgraph_node *r_cnode
>>> -= cgraph_lipo_get_resolved_node (cnode->decl);
>>> -  if (r_cnode != cnode)
>>> +  if (is_a  (ref->referred))
>>>  {
>>> +  struct cgraph_node *cnode = ipa_ref_node (ref);
>>> +  struct cgraph_node *r_cnode
>>> += cgraph_lipo_get_resolved_node (cnode->decl);
>>>new_refered.safe_push (r_cnode);
>>>use_type = ref->use;
>>>new_refered_type.safe_push ((int) use_type);
>>> +  gcc_assert (use_type != IPA_REF_ADDR
>>> +  || cnode->global.inlined_to
>>> +  || cnode->address_taken);
>>> +  if (use_type == IPA_REF_ADDR)
>>> +cgraph_mark_address_taken_node (r_cnode);
>>>  }
>>> +  else if (is_a  (ref->referred))
>>> +{
>>> +  struct varpool_node *var = ipa_ref_varpool_node (ref);
>>> +  struct varpool_node *r_var = real_varpool_node (var->decl);
>>> +  new_refered.safe_push (r_var);
>>> +  use_type = ref->use;
>>> +  new_refered_type.safe_push ((int) use_type);
>>> +}
>>> +  else
>>> +gcc_assert (false);
>>>  }
>>> -  for (i = 0; new_refered.iterate (i, &c); ++i)
>>> +  ipa_remove_all_references (&node->ref_list);
>>> +  for (i = 0; new_refered.iterate (i, &sym_node); ++i)
>>>  {
>>> -  ipa_record_reference (node, c,
>>> +  ipa_record_reference (node, sym_node,
>>>  (enum ipa_ref_use) new_refered_type[i],
>>> NULL);
>>>  }
>>>  }
>>>
>>>
>>> --
>>> Teresa Johnson | Software Engineer | tejohn...@google.com | 408-460-2413
>>
>>
>
>
>
> --
> Teresa Johnson | Software Engineer | tejohn...@google.com | 408-460-2413


Re: Migrating gcc.c-torture

2014-08-28 Thread Mike Stump
On Aug 28, 2014, at 7:27 AM, Bernd Schmidt  wrote:
> The next question would be what to do with gcc.c-torture/unsorted.  As far as 
> I can tell, these are all just plain compile tests, except for dump-noaddr.c. 
>  Ok to move all except for that test to c-torture/compile, adjusting SFset.c 
> and DFcmp.c which require int32plus?

Ok.


Re: [Patch AArch64] Fix for PR62040

2014-08-28 Thread Carrot Wei
AArch64 maintainers, could you help to review following patches?

https://gcc.gnu.org/ml/gcc-patches/2014-08/msg01966.html
https://gcc.gnu.org/ml/gcc-patches/2014-08/msg02060.html

thanks
Guozhi Wei


On Wed, Aug 20, 2014 at 12:51 PM, Carrot Wei  wrote:
> Good suggestion. Add the testcase.
>
> thanks
> Guozhi Wei
>
> 2014-08-20  Guozhi Wei  
>
> PR target/62040
> * gcc.target/aarch64/pr62040.c: New test.
>
> Index: pr62040.c
> ===
> --- pr62040.c (revision 0)
> +++ pr62040.c (revision 0)
> @@ -0,0 +1,19 @@
> +/* { dg-do compile } */
> +/* { dg-options "-g -Os" } */
> +
> +#include "arm_neon.h"
> +
> +extern bar(int32x4_t);
> +
> +void foo() {
> +  int32x4x4_t rows;
> +  uint64x2x2_t row01;
> +
> +  row01.val[0] = vreinterpretq_u64_s32(rows.val[0]);
> +  row01.val[1] = vreinterpretq_u64_s32(rows.val[1]);
> +  uint64x1_t row3l = vget_low_u64(row01.val[0]);
> +  row01.val[0] = vcombine_u64(vget_low_u64(row01.val[1]), row3l);
> +  int32x4_t xxx = vreinterpretq_s32_u64(row01.val[0]);
> +  int32x4_t out = vtrn1q_s32 (xxx, xxx);
> +  bar(out);
> +}
>
> On Wed, Aug 20, 2014 at 4:26 AM, Kyrill Tkachov  
> wrote:
>> Hi Carrot,
>>
>> cc'ing the aarch64 maintainers...
>>
>>
>> On 20/08/14 00:43, Carrot Wei wrote:
>>>
>>> Hi
>>>
>>> Current AArch64 backend can generate rtl expressions like
>>> (vec_duplicate:DI (const_int 0 [0])), which causes ICE in
>>> simplify_const_unary_operation because vec_duplicate should generate
>>> vector mode only.
>>>
>>> As suggested by Andrew in the bug entry, I split the original insn
>>> patterns to avoid scalar mode vec_duplicate expression.
>>
>>
>> The documentation does say that vec_concat can work on scalars, so it seems
>> ok to me at a glance (but I can't approve it myself).
>>
>> Would be nice to have an addition to the testsuite though...
>>
>> Kyrill
>>
>>
>>> Passed regression tests on qemu without failure.
>>> OK for trunk and 4.9 branch?
>>>
>>> thanks
>>> Guozhi Wei
>>>
>>> 2014-08-19  Guozhi Wei  
>>>
>>>  PR target/62040
>>>  * config/aarch64/iterators.md (VQ_NO2E, VQ_2E): New iterators.
>>>  * config/aarch64/aarch64-simd.md (move_lo_quad_internal_):
>>> Split
>>>  it into two patterns.
>>>  (move_lo_quad_internal_be_): Likewise.
>>
>>
>>


[BUILDROBOT][PATCH] Fix warnings in the mep-elf target

2014-08-28 Thread Jan-Benedict Glaw
Hi!

The following patch silences two warnings in the mep-elf target,
fixing the config-list.mk build:


First one:
~~
g++ -c  -DIN_GCC_FRONTEND -DIN_GCC_FRONTEND -g -O2 -DIN_GCC  
-DCROSS_DIRECTORY_STRUCTURE  -fno-exceptions -fno-rtti 
-fasynchronous-unwind-tables -W -Wall -Wno-narrowing -Wwrite-strings 
-Wcast-qual -Wmissing-format-attribute -Woverloaded-virtual -pedantic 
-Wno-long-long -Wno-variadic-macros -Wno-overlength-strings -Werror -fno-common 
 -DHAVE_CONFIG_H -I. -I. -I../../../gcc/gcc -I../../../gcc/gcc/. 
-I../../../gcc/gcc/../include -I../../../gcc/gcc/../libcpp/include 
-I/opt/cfarm/mpc/include  -I../../../gcc/gcc/../libdecnumber 
-I../../../gcc/gcc/../libdecnumber/dpd -I../libdecnumber 
-I../../../gcc/gcc/../libbacktrace -DCLOOG_INT_GMP  -DCLOOG_INT_GMP   -I. -I. 
-I../../../gcc/gcc -I../../../gcc/gcc/. -I../../../gcc/gcc/../include 
-I../../../gcc/gcc/../libcpp/include -I/opt/cfarm/mpc/include  
-I../../../gcc/gcc/../libdecnumber -I../../../gcc/gcc/../libdecnumber/dpd 
-I../libdecnumber -I../../../gcc/gcc/../libbacktrace -DCLOOG_INT_GMP  
-DCLOOG_INT_GMP  ../../../gcc/gcc/config/mep/mep-pragma.c
../../../gcc/gcc/config/mep/mep-pragma.c: In function ‘void 
mep_pragma_coprocessor(cpp_reader*)’:
../../../gcc/gcc/config/mep/mep-pragma.c:271:18: error: ‘rclass’ may be used 
uninitialized in this function [-Werror=maybe-uninitialized]
   enum reg_class rclass;
  ^
cc1plus: all warnings being treated as errors
make[2]: *** [mep-pragma.o] Error 1


This is actually a misbehavior of current GCC, the code itself looks 100% okay
to me.  Shall I report that as a bug, too?



Second one:
~~~
g++ -c   -g -O2 -DIN_GCC  -DCROSS_DIRECTORY_STRUCTURE  -fno-exceptions 
-fno-rtti -fasynchronous-unwind-tables -W -Wall -Wno-narrowing -Wwrite-strings 
-Wcast-qual -Wmissing-format-attribute -Woverloaded-virtual -pedantic 
-Wno-long-long -Wno-variadic-macros -Wno-overlength-strings -Werror -fno-common 
 -DHAVE_CONFIG_H -I. -I. -I../../../gcc/gcc -I../../../gcc/gcc/. 
-I../../../gcc/gcc/../include -I../../../gcc/gcc/../libcpp/include 
-I/opt/cfarm/mpc/include  -I../../../gcc/gcc/../libdecnumber 
-I../../../gcc/gcc/../libdecnumber/dpd -I../libdecnumber 
-I../../../gcc/gcc/../libbacktrace-o mep.o -MT mep.o -MMD -MP -MF 
./.deps/mep.TPo ../../../gcc/gcc/config/mep/mep.c
../../../gcc/gcc/config/mep/mep.c:3448:0: error: "VECTOR_TYPE_P" redefined 
[-Werror]
 #define VECTOR_TYPE_P(t) (TREE_CODE(t) == VECTOR_TYPE)
 ^
In file included from ../../../gcc/gcc/config/mep/mep.c:26:0:
../../../gcc/gcc/tree.h:474:0: note: this is the location of the previous 
definition
 #define VECTOR_TYPE_P(TYPE) (TREE_CODE (TYPE) == VECTOR_TYPE)
 ^
cc1plus: all warnings being treated as errors
make[2]: *** [mep.o] Error 1





This patch should fix it. Okay to apply?


2014-08-28  Jan-Benedict Glaw  

* config/mep/mep-pragma.c (mep_pragma_coprocessor_subclass): Rework
to silence warning.
* config/mep/mep.c (VECTOR_TYPE_P): Remove duplicate definition.


diff --git a/gcc/config/mep/mep-pragma.c b/gcc/config/mep/mep-pragma.c
index 632e92d..7bda297 100644
--- a/gcc/config/mep/mep-pragma.c
+++ b/gcc/config/mep/mep-pragma.c
@@ -274,24 +274,21 @@ mep_pragma_coprocessor_subclass (void)
   if (type != CPP_CHAR)
 goto syntax_error;
   class_letter = tree_to_uhwi (val);
-  if (class_letter >= 'A' && class_letter <= 'D')
-switch (class_letter)
-  {
-  case 'A':
-   rclass = USER0_REGS;
-   break;
-  case 'B':
-   rclass = USER1_REGS;
-   break;
-  case 'C':
-   rclass = USER2_REGS;
-   break;
-  case 'D':
-   rclass = USER3_REGS;
-   break;
-  }
-  else
+  switch (class_letter)
 {
+case 'A':
+  rclass = USER0_REGS;
+  break;
+case 'B':
+  rclass = USER1_REGS;
+  break;
+case 'C':
+  rclass = USER2_REGS;
+  break;
+case 'D':
+  rclass = USER3_REGS;
+  break;
+default:
   error ("#pragma GCC coprocessor subclass letter must be in [ABCD]");
   return;
 }
diff --git a/gcc/config/mep/mep.c b/gcc/config/mep/mep.c
index 3c71b95..eb0adf8 100644
--- a/gcc/config/mep/mep.c
+++ b/gcc/config/mep/mep.c
@@ -3445,8 +3445,6 @@ mep_expand_builtin_saveregs (void)
   return XEXP (regbuf, 0);
 }
 
-#define VECTOR_TYPE_P(t) (TREE_CODE(t) == VECTOR_TYPE)
-
 static tree
 mep_build_builtin_va_list (void)
 {


MfG, JBG

-- 
  Jan-Benedict Glaw  jbg...@lug-owl.de  +49-172-7608481
 Signature of:  http://perl.plover.com/Questions.html
 the second  :


signature.asc
Description: Digital signature


Re: [BUILDROBOT][PATCH] Fix warnings in the mep-elf target

2014-08-28 Thread Andrew Pinski
On Thu, Aug 28, 2014 at 3:00 PM, Jan-Benedict Glaw  wrote:
> Hi!
>
> The following patch silences two warnings in the mep-elf target,
> fixing the config-list.mk build:


I thought -Werror was only on when the versions of GCC match.

Thanks,
Andrew

>
>
> First one:
> ~~
> g++ -c  -DIN_GCC_FRONTEND -DIN_GCC_FRONTEND -g -O2 -DIN_GCC  
> -DCROSS_DIRECTORY_STRUCTURE  -fno-exceptions -fno-rtti 
> -fasynchronous-unwind-tables -W -Wall -Wno-narrowing -Wwrite-strings 
> -Wcast-qual -Wmissing-format-attribute -Woverloaded-virtual -pedantic 
> -Wno-long-long -Wno-variadic-macros -Wno-overlength-strings -Werror 
> -fno-common  -DHAVE_CONFIG_H -I. -I. -I../../../gcc/gcc -I../../../gcc/gcc/. 
> -I../../../gcc/gcc/../include -I../../../gcc/gcc/../libcpp/include 
> -I/opt/cfarm/mpc/include  -I../../../gcc/gcc/../libdecnumber 
> -I../../../gcc/gcc/../libdecnumber/dpd -I../libdecnumber 
> -I../../../gcc/gcc/../libbacktrace -DCLOOG_INT_GMP  -DCLOOG_INT_GMP   -I. -I. 
> -I../../../gcc/gcc -I../../../gcc/gcc/. -I../../../gcc/gcc/../include 
> -I../../../gcc/gcc/../libcpp/include -I/opt/cfarm/mpc/include  
> -I../../../gcc/gcc/../libdecnumber -I../../../gcc/gcc/../libdecnumber/dpd 
> -I../libdecnumber -I../../../gcc/gcc/../libbacktrace -DCLOOG_INT_GMP  
> -DCLOOG_INT_GMP  ../../../gcc/gcc/config/mep/mep-pragma.c
> ../../../gcc/gcc/config/mep/mep-pragma.c: In function ‘void 
> mep_pragma_coprocessor(cpp_reader*)’:
> ../../../gcc/gcc/config/mep/mep-pragma.c:271:18: error: ‘rclass’ may be used 
> uninitialized in this function [-Werror=maybe-uninitialized]
>enum reg_class rclass;
>   ^
> cc1plus: all warnings being treated as errors
> make[2]: *** [mep-pragma.o] Error 1
>
>
> This is actually a misbehavior of current GCC, the code itself looks 100% okay
> to me.  Shall I report that as a bug, too?
>
>
>
> Second one:
> ~~~
> g++ -c   -g -O2 -DIN_GCC  -DCROSS_DIRECTORY_STRUCTURE  -fno-exceptions 
> -fno-rtti -fasynchronous-unwind-tables -W -Wall -Wno-narrowing 
> -Wwrite-strings -Wcast-qual -Wmissing-format-attribute -Woverloaded-virtual 
> -pedantic -Wno-long-long -Wno-variadic-macros -Wno-overlength-strings -Werror 
> -fno-common  -DHAVE_CONFIG_H -I. -I. -I../../../gcc/gcc -I../../../gcc/gcc/. 
> -I../../../gcc/gcc/../include -I../../../gcc/gcc/../libcpp/include 
> -I/opt/cfarm/mpc/include  -I../../../gcc/gcc/../libdecnumber 
> -I../../../gcc/gcc/../libdecnumber/dpd -I../libdecnumber 
> -I../../../gcc/gcc/../libbacktrace-o mep.o -MT mep.o -MMD -MP -MF 
> ./.deps/mep.TPo ../../../gcc/gcc/config/mep/mep.c
> ../../../gcc/gcc/config/mep/mep.c:3448:0: error: "VECTOR_TYPE_P" redefined 
> [-Werror]
>  #define VECTOR_TYPE_P(t) (TREE_CODE(t) == VECTOR_TYPE)
>  ^
> In file included from ../../../gcc/gcc/config/mep/mep.c:26:0:
> ../../../gcc/gcc/tree.h:474:0: note: this is the location of the previous 
> definition
>  #define VECTOR_TYPE_P(TYPE) (TREE_CODE (TYPE) == VECTOR_TYPE)
>  ^
> cc1plus: all warnings being treated as errors
> make[2]: *** [mep.o] Error 1
>
>
>
>
>
> This patch should fix it. Okay to apply?
>
>
> 2014-08-28  Jan-Benedict Glaw  
>
> * config/mep/mep-pragma.c (mep_pragma_coprocessor_subclass): Rework
> to silence warning.
> * config/mep/mep.c (VECTOR_TYPE_P): Remove duplicate definition.
>
>
> diff --git a/gcc/config/mep/mep-pragma.c b/gcc/config/mep/mep-pragma.c
> index 632e92d..7bda297 100644
> --- a/gcc/config/mep/mep-pragma.c
> +++ b/gcc/config/mep/mep-pragma.c
> @@ -274,24 +274,21 @@ mep_pragma_coprocessor_subclass (void)
>if (type != CPP_CHAR)
>  goto syntax_error;
>class_letter = tree_to_uhwi (val);
> -  if (class_letter >= 'A' && class_letter <= 'D')
> -switch (class_letter)
> -  {
> -  case 'A':
> -   rclass = USER0_REGS;
> -   break;
> -  case 'B':
> -   rclass = USER1_REGS;
> -   break;
> -  case 'C':
> -   rclass = USER2_REGS;
> -   break;
> -  case 'D':
> -   rclass = USER3_REGS;
> -   break;
> -  }
> -  else
> +  switch (class_letter)
>  {
> +case 'A':
> +  rclass = USER0_REGS;
> +  break;
> +case 'B':
> +  rclass = USER1_REGS;
> +  break;
> +case 'C':
> +  rclass = USER2_REGS;
> +  break;
> +case 'D':
> +  rclass = USER3_REGS;
> +  break;
> +default:
>error ("#pragma GCC coprocessor subclass letter must be in [ABCD]");
>return;
>  }
> diff --git a/gcc/config/mep/mep.c b/gcc/config/mep/mep.c
> index 3c71b95..eb0adf8 100644
> --- a/gcc/config/mep/mep.c
> +++ b/gcc/config/mep/mep.c
> @@ -3445,8 +3445,6 @@ mep_expand_builtin_saveregs (void)
>return XEXP (regbuf, 0);
>  }
>
> -#define VECTOR_TYPE_P(t) (TREE_CODE(t) == VECTOR_TYPE)
> -
>  static tree
>  mep_build_builtin_va_list (void)
>  {
>
>
> MfG, JBG
>
> --
>   Jan-Benedict Glaw  jbg...@lug-owl.de  +49-172-7608481
>  Signature of:  http://perl.plover.com/Questions.html
>  the second  :


Re: [BUILDROBOT][PATCH] Fix warnings in the mep-elf target

2014-08-28 Thread DJ Delorie

> This patch should fix it. Okay to apply?

Ok.  Thanks!

> 2014-08-28  Jan-Benedict Glaw  
> 
>   * config/mep/mep-pragma.c (mep_pragma_coprocessor_subclass): Rework
>   to silence warning.
>   * config/mep/mep.c (VECTOR_TYPE_P): Remove duplicate definition.


Re: [PATCH 225/236] Work towards NEXT_INSN/PREV_INSN requiring insns as their params

2014-08-28 Thread H.J. Lu
On Tue, Aug 26, 2014 at 10:15 AM, David Malcolm  wrote:
> On Mon, 2014-08-25 at 08:25 -0600, Jeff Law wrote:
>> On 08/19/14 15:35, David Malcolm wrote:
>> > On Tue, 2014-08-19 at 13:57 -0700, Richard Henderson wrote:
>> >> On 08/06/2014 10:23 AM, David Malcolm wrote:
>> >>> diff --git a/gcc/cfgrtl.c b/gcc/cfgrtl.c
>> >>> index 59d633d..5e42a97 100644
>> >>> --- a/gcc/cfgrtl.c
>> >>> +++ b/gcc/cfgrtl.c
>> >>> @@ -1604,6 +1604,7 @@ force_nonfallthru_and_redirect (edge e, 
>> >>> basic_block target, rtx jump_label)
>> >>>
>> >>> if (EDGE_COUNT (e->src->succs) >= 2 || abnormal_edge_flags || 
>> >>> asm_goto_edge)
>> >>>   {
>> >>> +  rtx_insn *note;
>> >>> gcov_type count = e->count;
>> >>> int probability = e->probability;
>> >>> /* Create the new structures.  */
>> >>
>> >> A new variable with no uses?
>> >
>> > This one is quite ugly: the pre-existing code has two locals named
>> > "note", both of type rtx, with one shadowing the other.  This patch
>> > introduces a third, within the scope where the name "note" is used for
>> > insns.  In the other scopes the two other "note" variables are used for
>> > find_reg_note.  In each case, the name "note" is written to before use.
>> >
>> > So in my defense, the existing code already had shadowing of locals...
>> > but I guess that's not much of a defense, and it would be better to
>> > introduce a different name, and rename the uses in the appropriate
>> > scope.
>> If it's reasonable to do this now, then please do so.  Else make it a
>> follow-up item.  I guess we should have had a list of follow-up items :-)
>>
>> jeff
>
> Attached is a revised version of #225, with the following changes:
>
> * fix for the above: avoid introducing a new shadow name "note" within
> force_nonfallthru_and_redirect by introducing a new local rtx_insn *
> "new_head" and renaming "note" to it in the appropriate places.
>
> * changed an as_a<> to a safe_as_a<> within
> function.c:thread_prologue_and_epilogue_insns to fix a segfault seen
> during an earlier bootstrap
>
> Successfully bootstrapped on x86_64 (Fedora 20), on top of the rest of
> the patches leading up to it (including the revised ones for #220-#221
> that rth recently approved).
>
> OK for trunk?
> Dave

One of your changes caused:

https://gcc.gnu.org/bugzilla/show_bug.cgi?id=62300

-- 
H.J.


Re: [GOOGLE, AUTOFDO] Assign different discriminators to calls with the same lineno

2014-08-28 Thread Wei Mi
Hi Cary,

Is the new patch ok for google-4_9?

Thanks,
Wei.


On Sun, Aug 24, 2014 at 8:53 PM, Wei Mi  wrote:
> To avoid the unused new discriminator value, I added a map
> "found_call_this_line" to track whether a call is the first call in a
> source line seen when assigning discriminators. For the first call in
> a source line, its discriminator is 0. For the following calls in the
> same source line, a new discriminator will be used everytime. The new
> patch is attached. Internal perf test and regression test are ok. Is
> it ok for google-4_9?
>
> Thanks,
> Wei.
>
>
>
> On Thu, Aug 7, 2014 at 2:10 PM, Wei Mi  wrote:
>> Yes, that is intentional. It is to avoid assiging a discriminator for
>> the first call in the group of calls with the same source lineno.
>> Starting from the second call in the group, it will get a different
>> discriminator with previous call in the same group.
>>
>> Thanks,
>> Wei.
>>
>> On Thu, Aug 7, 2014 at 12:17 PM, Cary Coutant  wrote:
  static int
 -next_discriminator_for_locus (location_t locus)
 +increase_discriminator_for_locus (location_t locus, bool return_next)
  {
struct locus_discrim_map item;
struct locus_discrim_map **slot;
 @@ -934,8 +936,10 @@ next_discriminator_for_locus (location_t
(*slot)->locus = locus;
(*slot)->discriminator = 0;
  }
 +
(*slot)->discriminator++;
 -  return (*slot)->discriminator;
 +  return return_next ? (*slot)->discriminator
 +: (*slot)->discriminator - 1;
  }
>>>
>>> Won't this have the effect of sometimes incrementing the next
>>> available discriminator without actually using the new value? That is,
>>> if you call it once with return_next == false, and then with
>>> return_next == true.
>>>
>>> -cary


Re: [PATCH 225/236] Work towards NEXT_INSN/PREV_INSN requiring insns as their params

2014-08-28 Thread David Malcolm
On Thu, 2014-08-28 at 17:08 -0700, H.J. Lu wrote:
> On Tue, Aug 26, 2014 at 10:15 AM, David Malcolm  wrote:
> > On Mon, 2014-08-25 at 08:25 -0600, Jeff Law wrote:
> >> On 08/19/14 15:35, David Malcolm wrote:
> >> > On Tue, 2014-08-19 at 13:57 -0700, Richard Henderson wrote:
> >> >> On 08/06/2014 10:23 AM, David Malcolm wrote:
> >> >>> diff --git a/gcc/cfgrtl.c b/gcc/cfgrtl.c
> >> >>> index 59d633d..5e42a97 100644
> >> >>> --- a/gcc/cfgrtl.c
> >> >>> +++ b/gcc/cfgrtl.c
> >> >>> @@ -1604,6 +1604,7 @@ force_nonfallthru_and_redirect (edge e, 
> >> >>> basic_block target, rtx jump_label)
> >> >>>
> >> >>> if (EDGE_COUNT (e->src->succs) >= 2 || abnormal_edge_flags || 
> >> >>> asm_goto_edge)
> >> >>>   {
> >> >>> +  rtx_insn *note;
> >> >>> gcov_type count = e->count;
> >> >>> int probability = e->probability;
> >> >>> /* Create the new structures.  */
> >> >>
> >> >> A new variable with no uses?
> >> >
> >> > This one is quite ugly: the pre-existing code has two locals named
> >> > "note", both of type rtx, with one shadowing the other.  This patch
> >> > introduces a third, within the scope where the name "note" is used for
> >> > insns.  In the other scopes the two other "note" variables are used for
> >> > find_reg_note.  In each case, the name "note" is written to before use.
> >> >
> >> > So in my defense, the existing code already had shadowing of locals...
> >> > but I guess that's not much of a defense, and it would be better to
> >> > introduce a different name, and rename the uses in the appropriate
> >> > scope.
> >> If it's reasonable to do this now, then please do so.  Else make it a
> >> follow-up item.  I guess we should have had a list of follow-up items :-)
> >>
> >> jeff
> >
> > Attached is a revised version of #225, with the following changes:
> >
> > * fix for the above: avoid introducing a new shadow name "note" within
> > force_nonfallthru_and_redirect by introducing a new local rtx_insn *
> > "new_head" and renaming "note" to it in the appropriate places.
> >
> > * changed an as_a<> to a safe_as_a<> within
> > function.c:thread_prologue_and_epilogue_insns to fix a segfault seen
> > during an earlier bootstrap
> >
> > Successfully bootstrapped on x86_64 (Fedora 20), on top of the rest of
> > the patches leading up to it (including the revised ones for #220-#221
> > that rth recently approved).
> >
> > OK for trunk?
> > Dave
> 
> One of your changes caused:
> 
> https://gcc.gnu.org/bugzilla/show_bug.cgi?id=62300

Sorry about this; candidate patch attached; attempted bootstrap in
progress.
Index: gcc/ChangeLog
===
--- gcc/ChangeLog	(revision 214708)
+++ gcc/ChangeLog	(working copy)
@@ -1,5 +1,12 @@
 2014-08-28  David Malcolm  
 
+	PR bootstrap/62300
+	* function.c (assign_parm_setup_reg): Remove erroneous checked
+	cast to rtx_insn * on result of gen_extend_insn in favor of
+	introducing a new local rtx "pat".
+
+2014-08-28  David Malcolm  
+
 	* rtl.h (previous_insn): Strengthen param from rtx to rtx_insn *.
 	(next_insn): Likewise.
 	* emit-rtl.c (next_insn): Likewise.
Index: gcc/function.c
===
--- gcc/function.c	(revision 214704)
+++ gcc/function.c	(working copy)
@@ -3039,10 +3039,9 @@
 	}
 	  else
 	t = op1;
-	  insn = as_a  (
-		   gen_extend_insn (op0, t, promoted_nominal_mode,
-data->passed_mode, unsignedp));
-	  emit_insn (insn);
+	  rtx pat = gen_extend_insn (op0, t, promoted_nominal_mode,
+ data->passed_mode, unsignedp);
+	  emit_insn (pat);
 	  insns = get_insns ();
 
 	  moved = true;


Re: [PATCH 225/236] Work towards NEXT_INSN/PREV_INSN requiring insns as their params

2014-08-28 Thread Richard Henderson
On 08/28/2014 05:47 PM, David Malcolm wrote:
> -   insn = as_a  (
> -gen_extend_insn (op0, t, promoted_nominal_mode,
> - data->passed_mode, unsignedp));
> -   emit_insn (insn);
> +   rtx pat = gen_extend_insn (op0, t, promoted_nominal_mode,
> +  data->passed_mode, unsignedp);
> +   emit_insn (pat);

Certainly ok.


r~


Re: [BUILDROBOT][PATCH] Fix warnings in the mep-elf target

2014-08-28 Thread Jan-Benedict Glaw
On Thu, 2014-08-28 15:47:07 -0700, Andrew Pinski  wrote:
> On Thu, Aug 28, 2014 at 3:00 PM, Jan-Benedict Glaw  wrote:
> > Hi!
> >
> > The following patch silences two warnings in the mep-elf target,
> > fixing the config-list.mk build:
> 
> 
> I thought -Werror was only on when the versions of GCC match.

Exactly that's the case. The config-list.mk backend ensures that a g++
of the very same revision is used. It's first in $PATH.

MfG, JBG

-- 
  Jan-Benedict Glaw  jbg...@lug-owl.de  +49-172-7608481
Signature of:   ...und wenn Du denkst, es geht nicht mehr,
the second  :  kommt irgendwo ein Lichtlein her.


signature.asc
Description: Digital signature


Re: [BUILDROBOT][PATCH] Fix warnings in the mep-elf target

2014-08-28 Thread Jan-Benedict Glaw
On Thu, 2014-08-28 15:47:07 -0700, Andrew Pinski  wrote:
> On Thu, Aug 28, 2014 at 3:00 PM, Jan-Benedict Glaw  wrote:
> > The following patch silences two warnings in the mep-elf target,
> > fixing the config-list.mk build:
> 
> 
> I thought -Werror was only on when the versions of GCC match.

See https://plus.google.com/112436047517408844110/posts/UE2dPJra4Qj ,
https://plus.google.com/112436047517408844110/posts/duprj3yMRT9 and
https://plus.google.com/112436047517408844110/posts/j53pZGTHerx .

MfG, JBG

-- 
  Jan-Benedict Glaw  jbg...@lug-owl.de  +49-172-7608481
Signature of:   The real problem with C++ for kernel modules is:
the second  : the language just sucks.
   -- Linus Torvalds


signature.asc
Description: Digital signature


Go patch committed: Fix bug comparing struct/array to interface

2014-08-28 Thread Ian Lance Taylor
The Go frontend had a bug comparing a struct or array value to an
interface value when the struct or array was not addressable.  The code
that was supposed to force the struct/array into a temporary variable
did not fire, because the compiler erroneously tried to handle it as a
straight struct/array comparison.  This patch fixes the problem.
Bootstrapped and ran Go testsuite on x86_64-unknown-linux-gnu.
Committed to mainline and 4.9 branch.

The test case has been committed to the master testsuite; see
https://codereview.appspot.com/135170043 .  This fixes
http://golang.org/issue/8612 .

Ian

diff -r 3a08729cc5f8 go/expressions.cc
--- a/go/expressions.cc	Tue Aug 26 21:09:25 2014 -0700
+++ b/go/expressions.cc	Thu Aug 28 19:39:45 2014 -0700
@@ -5187,10 +5187,13 @@
   // Lower struct, array, and some interface comparisons.
   if (op == OPERATOR_EQEQ || op == OPERATOR_NOTEQ)
 {
-  if (left->type()->struct_type() != NULL)
+  if (left->type()->struct_type() != NULL
+	  && right->type()->struct_type() != NULL)
 	return this->lower_struct_comparison(gogo, inserter);
   else if (left->type()->array_type() != NULL
-	   && !left->type()->is_slice_type())
+	   && !left->type()->is_slice_type()
+	   && right->type()->array_type() != NULL
+	   && !right->type()->is_slice_type())
 	return this->lower_array_comparison(gogo, inserter);
   else if ((left->type()->interface_type() != NULL
 && right->type()->interface_type() == NULL)


(Still) ICE for cris-elf at r214710

2014-08-28 Thread Hans-Peter Nilsson
Sorry for the context-less mail but I didn't find a proper
obvious gcc-patches-message to reply to.  (Also, I can't log
into bugzilla because to enter a PR as there appears to have
been some SSL changes such that my old firefox and gcc.gnu.org
can no longer agree on a cipher or something.)  But, since
r214690 and at up to and including r214714 (HEAD as of this
writing), a build for cris-elf fails on trunk as follows:

/tmp/hpautotest-gcc1/cris-elf/gccobj/./gcc/xgcc 
-B/tmp/hpautotest-gcc1/cris-elf/gccobj/./gcc/ -nostdinc 
-B/tmp/hpautotest-gcc1/cris-elf/gccobj/cris-elf/newlib/ -isystem 
/tmp/hpautotest-gcc1/cris-elf/gccobj/cris-elf/newlib/targ-include -isystem 
/tmp/hpautotest-gcc1/gcc/newlib/libc/include 
-B/tmp/hpautotest-gcc1/cris-elf/gccobj/cris-elf/libgloss/cris 
-L/tmp/hpautotest-gcc1/cris-elf/gccobj/cris-elf/libgloss/libnosys 
-L/tmp/hpautotest-gcc1/gcc/libgloss/cris 
-B/tmp/hpautotest-gcc1/cris-elf/pre/cris-elf/bin/ 
-B/tmp/hpautotest-gcc1/cris-elf/pre/cris-elf/lib/ -isystem 
/tmp/hpautotest-gcc1/cris-elf/pre/cris-elf/include -isystem 
/tmp/hpautotest-gcc1/cris-elf/pre/cris-elf/sys-include-g -O2 -march=v8 
-mbest-lib-options -O2  -g -O2 -DIN_GCC  -DCROSS_DIRECTORY_STRUCTURE  -W -Wall 
-Wwrite-strings -Wcast-qual -Wstrict-prototypes -Wmissing-prototypes 
-Wold-style-definition  -isystem ./include   -g -DIN_LIBGCC2 -fbuilding-libgcc 
-fno-stack-protector -Dinhibit_libc  -I. -I. -I../../.././gcc
  -I/tmp/
 hpautotest-gcc1/gcc/libgcc -I/tmp/hpautotest-gcc1/gcc/libgcc/. 
-I/tmp/hpautotest-gcc1/gcc/libgcc/../gcc 
-I/tmp/hpautotest-gcc1/gcc/libgcc/../include  -DHAVE_CC_TLS -DUSE_EMUTLS -o 
_lshrdi3.o -MT _lshrdi3.o -MD -MP -MF _lshrdi3.dep -DL_lshrdi3 -c 
/tmp/hpautotest-gcc1/gcc/libgcc/libgcc2.c -fvisibility=hidden -DHIDE_EXPORTS
/tmp/hpautotest-gcc1/gcc/libgcc/libgcc2.c: In function '__lshrdi3':
/tmp/hpautotest-gcc1/gcc/libgcc/libgcc2.c:426:1: internal compiler error: in 
safe_as_a, at is-a.h:205
 }
 ^
0x9119c2 safe_as_a
/tmp/hpautotest-gcc1/gcc/gcc/is-a.h:205
0x9119c2 JUMP_LABEL_AS_INSN
/tmp/hpautotest-gcc1/gcc/gcc/rtl.h:1663
0x9119c2 find_dead_or_set_registers
/tmp/hpautotest-gcc1/gcc/gcc/resource.c:500
0x912408 mark_target_live_regs(rtx_insn*, rtx_insn*, resources*)
/tmp/hpautotest-gcc1/gcc/gcc/resource.c:1115
0x90cb4b fill_slots_from_thread
/tmp/hpautotest-gcc1/gcc/gcc/reorg.c:2404
0x90ff45 fill_eager_delay_slots
/tmp/hpautotest-gcc1/gcc/gcc/reorg.c:2933
0x90ff45 dbr_schedule
/tmp/hpautotest-gcc1/gcc/gcc/reorg.c:3742
0x9108ef rest_of_handle_delay_slots
/tmp/hpautotest-gcc1/gcc/gcc/reorg.c:3885
0x9108ef execute
/tmp/hpautotest-gcc1/gcc/gcc/reorg.c:3916
Please submit a full bug report,
with preprocessed source if appropriate.
Please include the complete backtrace with any bug report.
See  for instructions.
make[4]: *** [_lshrdi3.o] Error 1

Use "./cc1 -fpreprocessed this.i -O2" to repeat.

struct DWstruct {int low, high;};

typedef union
{
  struct DWstruct s;
  long long ll;
} DWunion;

long long
__lshrdi3 (long long u, int b)
{
  if (b == 0)
return u;

  const DWunion uu = {.ll = u};
  const int bm = (4 * (8)) - b;
  DWunion w;

  if (bm <= 0)
{
  w.s.high = 0;
  w.s.low = (unsigned int) uu.s.high >> -bm;
}
  else
{
  const unsigned int carries = (unsigned int) uu.s.high << bm;

  w.s.high = (unsigned int) uu.s.high >> b;
  w.s.low = ((unsigned int) uu.s.low >> b) | carries;
}

  return w.ll;
}

That aside, I must say, I'm somewhat impressed by the work
you've done here, and the fears I expressed at the Cauldron
about churn to port-specific code (i.e. supposedly making
back-porting bug-fixes to older releases difficult) seem to
thankfully have been unfounded in the patches I've seen.
(IIRC, I saw *one* type-change; one line, for two ports!)

Also, I wouldn't worry too much trying to go to great lengths to
try and *avoid* this kind of fallout (it's just going to happen,
QED), as long as you're prepared to handle it quickly.  And
thanks in advance for that! ;)

brgds, H-P


[PATCH] 2014-08-29 Honggyu Kim

2014-08-28 Thread Honggyu Kim
* doc/generic.texi: Fix typo.
---
 gcc/ChangeLog|4 
 gcc/doc/generic.texi |2 +-
 2 files changed, 5 insertions(+), 1 deletion(-)

diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index ac5dc7c..01698e6 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,7 @@
+2014-08-29  Honggyu Kim  
+
+   * doc/generic.texi: Fix typo.
+
 2014-08-29  David Malcolm  
 
PR bootstrap/62300
diff --git a/gcc/doc/generic.texi b/gcc/doc/generic.texi
index 4476f28..a598500 100644
--- a/gcc/doc/generic.texi
+++ b/gcc/doc/generic.texi
@@ -53,7 +53,7 @@ seems inelegant.
 @node Deficiencies
 @section Deficiencies
 
-There are many places in which this document is incomplet and incorrekt.
+There are many places in which this document is incomplete or incorrect.
 It is, as of yet, only @emph{preliminary} documentation.
 
 @c -
-- 
1.7.9.5



Re: [PATCH] 2014-08-29 Honggyu Kim

2014-08-28 Thread Marek Polacek
On Fri, Aug 29, 2014 at 02:55:36PM +0900, Honggyu Kim wrote:
> --- a/gcc/doc/generic.texi
> +++ b/gcc/doc/generic.texi
> @@ -53,7 +53,7 @@ seems inelegant.
>  @node Deficiencies
>  @section Deficiencies
>  
> -There are many places in which this document is incomplet and incorrekt.
> +There are many places in which this document is incomplete or incorrect.

I believe these typos are intentional.

Marek


Re: [PATCH i386 AVX512] [29/n] Add narrowing vpmov.

2014-08-28 Thread Kirill Yukhin
Hello Uroš,
On 28 Aug 20:34, Uros Bizjak wrote:
> On Thu, Aug 28, 2014 at 3:15 PM, Kirill Yukhin  
> wrote:
> > Is it ok for trunk?
> There is one inconsistency - existing pattern is named ..._mask_store,
> new ones are named ..._store_mask. Is there a reason for this
> difference?
This was taken into account in built-ins patch, however it's better
to be consistent. Fixed. Bootstrap/regtest in progress. Thanks!

--
Thanks, K


[PATCH] doc/generic.texi: Fix typo

2014-08-28 Thread Honggyu Kim
This fixes some typo errors in the gcc internal document.

2014-08-29  Honggyu Kim  

* doc/generic.texi: Fix typo.
---
 gcc/ChangeLog|4 
 gcc/doc/generic.texi |2 +-
 2 files changed, 5 insertions(+), 1 deletion(-)

diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index ac5dc7c..01698e6 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,7 @@
+2014-08-29  Honggyu Kim  
+
+   * doc/generic.texi: Fix typo.
+
 2014-08-29  David Malcolm  
 
PR bootstrap/62300
diff --git a/gcc/doc/generic.texi b/gcc/doc/generic.texi
index 4476f28..a598500 100644
--- a/gcc/doc/generic.texi
+++ b/gcc/doc/generic.texi
@@ -53,7 +53,7 @@ seems inelegant.
 @node Deficiencies
 @section Deficiencies
 
-There are many places in which this document is incomplet and incorrekt.
+There are many places in which this document is incomplete or incorrect.
 It is, as of yet, only @emph{preliminary} documentation.
 
 @c -
-- 
1.7.9.5



Re: Enable EBX for x86 in 32bits PIC code

2014-08-28 Thread Ilya Enkovich
2014-08-28 12:28 GMT+04:00 Ilya Enkovich :
> 2014-08-28 0:19 GMT+04:00 Vladimir Makarov :
>> On 2014-08-26 5:42 PM, Ilya Enkovich wrote:
>>>
>>> Hi,
>>>
>>> Here is a patch I tried.  I apply it over revision 214215.  Unfortunately
>>> I do not have a small reproducer but the problem can be easily reproduced on
>>> SPEC2000 benchmark 175.vpr.  The problem is in read_arch.c:701 where float
>>> value is compared with float constant 1.0.  It is inlined into read_arch
>>> function and can be easily found in RTL dump of function read_arch as a
>>> float comparison with 1.0 after the first call to strtod function.
>>>
>>> Here is a compilation string I use:
>>>
>>> gcc -m32 -mno-movbe -g3 -fdump-rtl-all-details -O2 -ffast-math
>>> -mfpmath=sse -m32  -march=slm -fPIE -pie -c -o read_arch.o
>>> -DSPEC_CPU2000read_arch.c
>>>
>>> In my final assembler comparison with 1.0 looks like:
>>>
>>> comiss  .LC11@GOTOFF(%ebp), %xmm0   # 1101  *cmpisf_sse [length =
>>> 7]
>>>
>>> and %ebp here doesn't have a proper value.
>>>
>>> I'll try to make a smaller reproducer if these instructions don't help.
>>
>>
>> I've managed to reproduce it.  Although it would be better to send the patch
>> as an attachment.
>>
>> The problem is actually in IRA not LRA.  IRA splits pseudo used for PIC.
>> Then in a region when a *new* pseudo used as PIC we rematerialize a constant
>> which transformed in memory addressed through *original* PIC pseudo.
>>
>> To solve the problem we should prevent such splitting and guarantee that PIC
>> pseudo allocnos in different region gets the same hard reg.
>>
>> The following patch should solve the problem.
>>
>
> Thanks for the patch! I'll try it and be back with results.

Seems your patch doesn't cover all cases.  Attached is a modified
patch (with your changes included) and a test where double constant is
wrongly rematerialized.  I also see in ira dump that there is still a
copy of PIC reg created:

Initialization of original PIC reg:
(insn 23 22 24 2 (set (reg:SI 127)
(reg:SI 3 bx)) test.cc:42 90 {*movsi_internal}
 (expr_list:REG_DEAD (reg:SI 3 bx)
(nil)))
...
Copy is created:
(insn 135 37 25 3 (set (reg:SI 138 [127])
(reg:SI 127)) 90 {*movsi_internal}
 (expr_list:REG_DEAD (reg:SI 127)
(nil)))
...
Copy is used:
(insn 119 25 122 3 (set (reg:DF 134)
(mem/u/c:DF (plus:SI (reg:SI 138 [127])
(const:SI (unspec:SI [
(symbol_ref/u:SI ("*.LC0") [flags 0x2])
] UNSPEC_GOTOFF))) [5  S8 A64])) 128 {*movdf_internal}
 (expr_list:REG_EQUIV (const_double:DF
2.9997371893933895137251965934410691261292e-4
[0x0.9d495182a99308p-11])
(nil)))

After reload we have new usage of r127 which is allocated to ecx which
actually does not have any definition in this function at all.

(insn 151 42 44 4 (set (reg:SI 0 ax [147])
(plus:SI (reg:SI 2 cx [127])
(const:SI (unspec:SI [
(symbol_ref/u:SI ("*.LC0") [flags 0x2])
] UNSPEC_GOTOFF test.cc:44 213 {*leasi}
 (expr_list:REG_EQUAL (symbol_ref/u:SI ("*.LC0") [flags 0x2])
(nil)))
(insn 44 151 45 4 (set (reg:DF 21 xmm0 [orig:129 D.2450 ] [129])
(mult:DF (reg:DF 21 xmm0 [orig:128 D.2450 ] [128])
(mem/u/c:DF (reg:SI 0 ax [147]) [5  S8 A64]))) test.cc:44
790 {*fop_df_comm_sse}
 (expr_list:REG_EQUAL (mult:DF (reg:DF 21 xmm0 [orig:128 D.2450 ] [128])
(const_double:DF
2.9997371893933895137251965934410691261292e-4
[0x0.9d495182a99308p-11]))
(nil)))

Compilation string: g++ -m32 -O2 -mfpmath=sse -fPIE -S test.cc

Thanks,
Ilya

>
> Ilya
>>


pie-2014-08-28.patch
Description: Binary data


test.cc
Description: Binary data


Re: Enable EBX for x86 in 32bits PIC code

2014-08-28 Thread Ilya Enkovich
2014-08-28 22:58 GMT+04:00 Uros Bizjak :
> On Fri, Aug 22, 2014 at 2:21 PM, Ilya Enkovich  wrote:
>
>> On Cauldron 2014 we had a couple of talks about relaxation of ebx usage in 
>> 32bit PIC mode.  It was decided that the best approach would be to not fix 
>> ebx register, use speudo register for GOT base address and let allocator do 
>> the rest.  This should be similar to how clang and icc work with GOT base 
>> address.  I've been working for some time on such patch and now want to 
>> share my results.
>
>>  (define_insn "*pushtf"
>>[(set (match_operand:TF 0 "push_operand" "=<,<")
>> -   (match_operand:TF 1 "general_no_elim_operand" "x,*roF"))]
>> +   (match_operand:TF 1 "nonimmediate_no_elim_operand" "x,*roF"))]
>
> Can you please explain the reason for this change (and a couple of
> similar changes to push patterns) ?

This is a workaround for stability problem with reload.  Immediate
operands cause new usages of pseudo PIC register in reload which leads
to wrong registers allocation.  These changes wouldn't be required
after reload issue if resolved.

Ilya

>
> Uros.


[PATCH] Fix byte size confusion in bswap pass

2014-08-28 Thread Thomas Preud'homme
[CCing you Jakub as you are the one who raised this issue to me]

The bswap pass deals with 3 possibly different byte size: host, target and the 
size a byte marker in the symbolic_number structure [1]. However, right now the 
code mixes the three sizes. This works in practice as the pass is only enabled 
for target with BITS_PER_UNIT == 8 and nobody runs GCC on a host with CHAR_BIT 
!= 8. As prompted by Jakub Jelinek, this patch fixes this mess. Byte marker are 
8-bit quantities (they could be made 4-bit quantities but I preferred to keep 
the code working the same as before) for which a new macro is introduced 
(BITS_PER_MARKERS), anything related to storing the value or a byte marker in a 
variable should check for the host byte size or wide integer size and anything 
aimed at manipulating the target value should check for BITS_PER_UNIT.


[1] Although the comment for this structure implies that a byte marker as the 
same size as the host byte, the way it is used in the code (even before any of 
my patch) shows that it uses a fixed size of 8 [2].
[2] Note that since the pass is only active for targets with BITS_PER_UNIT == 
8, it might be using the target byte size.


gcc/ChangeLog:

2014-08-29  Thomas Preud'homme  

* tree-ssa-math-opts.c (struct symbolic_number): Clarify comment about
the size of byte markers.
(do_shift_rotate): Fix confusion between host, target and marker byte
size.
(verify_symbolic_number_p): Likewise.
(find_bswap_or_nop_1): Likewise.
(find_bswap_or_nop): Likewise.


diff --git a/gcc/tree-ssa-math-opts.c b/gcc/tree-ssa-math-opts.c
index ca2b30d..55c5df7 100644
--- a/gcc/tree-ssa-math-opts.c
+++ b/gcc/tree-ssa-math-opts.c
@@ -1600,11 +1600,10 @@ make_pass_cse_sincos (gcc::context *ctxt)
 
 /* A symbolic number is used to detect byte permutation and selection
patterns.  Therefore the field N contains an artificial number
-   consisting of byte size markers:
+   consisting of octet sized markers:
 
-   0- byte has the value 0
-   1..size - byte contains the content of the byte
-   number indexed with that value minus one.
+   0- target byte has the value 0
+   1..size - marker value is the target byte index minus one.
 
To detect permutations on memory sources (arrays and structures), a symbolic
number is also associated a base address (the array or structure the load is
@@ -1629,6 +1628,8 @@ struct symbolic_number {
   unsigned HOST_WIDE_INT range;
 };
 
+#define BITS_PER_MARKER 8
+
 /* The number which the find_bswap_or_nop_1 result should match in
order to have a nop.  The number is masked according to the size of
the symbolic number before using it.  */
@@ -1650,15 +1651,16 @@ do_shift_rotate (enum tree_code code,
 struct symbolic_number *n,
 int count)
 {
-  int bitsize = TYPE_PRECISION (n->type);
+  int size = TYPE_PRECISION (n->type) / BITS_PER_UNIT;
 
-  if (count % 8 != 0)
+  if (count % BITS_PER_UNIT != 0)
 return false;
+  count = (count / BITS_PER_UNIT) * BITS_PER_MARKER;
 
   /* Zero out the extra bits of N in order to avoid them being shifted
  into the significant bits.  */
-  if (bitsize < 8 * (int)sizeof (int64_t))
-n->n &= ((uint64_t)1 << bitsize) - 1;
+  if (size < 64 / BITS_PER_MARKER)
+n->n &= ((uint64_t) 1 << (size * BITS_PER_MARKER)) - 1;
 
   switch (code)
 {
@@ -1668,22 +1670,22 @@ do_shift_rotate (enum tree_code code,
 case RSHIFT_EXPR:
   /* Arithmetic shift of signed type: result is dependent on the value.  */
   if (!TYPE_UNSIGNED (n->type)
- && (n->n & ((uint64_t) 0xff << (bitsize - 8
+ && (n->n & ((uint64_t) 0xff << ((size - 1) * BITS_PER_MARKER
return false;
   n->n >>= count;
   break;
 case LROTATE_EXPR:
-  n->n = (n->n << count) | (n->n >> (bitsize - count));
+  n->n = (n->n << count) | (n->n >> ((size * BITS_PER_MARKER) - count));
   break;
 case RROTATE_EXPR:
-  n->n = (n->n >> count) | (n->n << (bitsize - count));
+  n->n = (n->n >> count) | (n->n << ((size * BITS_PER_MARKER) - count));
   break;
 default:
   return false;
 }
   /* Zero unused bits for size.  */
-  if (bitsize < 8 * (int)sizeof (int64_t))
-n->n &= ((uint64_t)1 << bitsize) - 1;
+  if (size < 64 / BITS_PER_MARKER)
+n->n &= ((uint64_t) 1 << (size * BITS_PER_MARKER)) - 1;
   return true;
 }
 
@@ -1724,13 +1726,13 @@ init_symbolic_number (struct symbolic_number *n, tree 
src)
   if (size % BITS_PER_UNIT != 0)
 return false;
   size /= BITS_PER_UNIT;
-  if (size > (int)sizeof (uint64_t))
+  if (size > 64 / BITS_PER_MARKER)
 return false;
   n->range = size;
   n->n = CMPNOP;
 
-  if (size < (int)sizeof (int64_t))
-n->n &= ((uint64_t)1 << (size * BITS_PER_UNIT)) - 1;
+  if (size < 64 / BITS_PER_MARKER)
+n->n &= ((uint64_t) 1 << (size * BITS_PER_MARKER)) - 1;
 
   return true;
 }
@@ -1868,15 +1870,17 @@ find_bswap_or_nop_1 (gimple stmt,