date:20140916

Re: [gomp4] OpenACC routine directive

2014-09-16 Thread Tobias Burnus

Cesar Philippidis wrote:
> This patch adds initial support for the OpenACC routine directive. It's
> not complete just yet because it doesn't implement any of the optional
> clauses, except for the optional function/subroutine name. As such, it
> doesn't go beyond marking functions with the "omp declare target" attribute.

For the Fortran side: As you currently use the OpenMP implementation, it
should work, but if you later add support for clauses, recall that you may
need to store those also in the .mod files (cf. module.c). (That's only needed
if information from the clauses has to be propagated to the the call site.)

> My understanding is 'acc routine' should also be visible at the call site,
> but if it's not the compiler can treat it as a regular function call.

... which means that it has to reject it, unless the compiler can (e.g. via
LTO) inline the function or find out that the function also exist on the
accelerator.

> Furthermore, I've been told that it's not sufficient to place the
> routine directive in an interface block by itself. 

Well, that's also not different to C/C++: If you just use "#pragma acc routine"
in the header file, where you declare the function, it also won't work when
you don't have the pragma for the definition.

> The justification for this is that fortran
> lacks a file scope, so 'acc routine' wouldn't be visible to fact.

True. However,in modern Fortran use, you would use not use an interface
block (unless the function is written in, e.g., C). But you would use
a module - or an internal procedure (nested procedure with "contains").

If you want, you can also check that an omp declare target/acc routine
in an interface block matches the one in a subroutine/function by
adding a check in resolve.c's resolve_global_procedure.

> Is this patch OK for gomp-4_0-branch?

The Fortran part looks good to me.

Tobias

> 2014-09-15  Cesar Philippidis  
>   gcc/fortran/
>   * gfortran.h (ST_OACC_ROUTINE): New statement enum.
>   * match.h (gfc_match_oacc_routine): New prototype.
>   * openmp.c (gfc_match_oacc_routine): New function.
>   * parse.c (decode_oacc_directive): Handle the routine directive.
>   (next_statement): Handle ST_OACC_ROUTINE.
>   (gfc_ascii_statement): Likewise.

Re: [PATCH i386 AVX512] [41/n] Extend extract insn patterns.

2014-09-16 Thread Uros Bizjak

On Tue, Sep 16, 2014 at 7:26 AM, Kirill Yukhin  wrote:
> Hello,
> This patch extends extract insn patterns.
> It also fixes ICE on testsuite when F16C switched off.
> Also it fixes condition in old xtract pattern.
>
> Bootstrapped.
> AVX-512* tests on top of patch-set all pass
> under simulator.
>
> Is it ok for trunk?
>
> gcc/
> * config/i386/i386.c
> (ix86_expand_vector_extract): Handle V32HI and V64QI modes.
> * config/i386/sse.md
> (define_mode_iterator VI48F_256): New.
> (define_mode_attr extract_type): Ditto.
> (define_mode_attr extract_suf): Ditto.
> (define_mode_iterator AVX512_VEC): Ditto.
> (define_expand
> "_vextract_mask"): Use
> AVX512_VEC.
> (define_insn "avx512dq_vextract64x2_1_maskm"): New.
> (define_insn
> "avx512dq_vextract64x2_1"):
> Ditto.
> (define_mode_attr extract_type_2): Ditto.
> (define_mode_attr extract_suf_2): Ditto.
> (define_mode_iterator AVX512_VEC_2): Ditto.
> (define_expand
> "_vextract_mask"): Use
> AVX512_VEC_2 mode iterator.
> (define_insn "vec_extract_hi__maskm"): Ditto.
> (define_expand "avx512vl_vextractf128"): Ditto.
> (define_insn_and_split "vec_extract_lo_"): Delete.
> (define_insn "vec_extract_lo_"): New.
> (define_split for V16FI mode): Ditto.
> (define_insn_and_split "vec_extract_lo_"): Delete.
> (define_insn "vec_extract_lo_"): New.
> (define_split for VI8F_256 mode): Ditto.
> (define_insn "vec_extract_hi_"): Add masking.
> (define_insn_and_split "vec_extract_lo_"): Delete.
> (define_insn "vec_extract_lo_"): New.
> (define_split for VI4F_256 mode): Ditto.
> (define_insn "vec_extract_lo__maskm"): Ditto.
> (define_insn "vec_extract_hi__maskm"): Ditto.
> (define_insn "vec_extract_hi_"): Add masking.
> (define_mode_iterator VEC_EXTRACT_MODE): Add V64QI and V32HI modes.
> (define_insn "vcvtph2ps"): Fix pattern condition.
> (define_insn "avx512f_vextract32x4_1_maskm"): Ditto.
>
> --
> Thanks, K
>
> diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
> index 30120a5..ccfd47d 100644
> --- a/gcc/config/i386/i386.c
> +++ b/gcc/config/i386/i386.c
> @@ -40979,6 +40979,32 @@ ix86_expand_vector_extract (bool mmx_ok, rtx target, 
> rtx vec, int elt)
> }
>break;
>
> +case V32HImode:
> +  if (TARGET_AVX512BW)
> +   {
> + tmp = gen_reg_rtx (V16HImode);
> + if (elt < 16)
> +   emit_insn (gen_vec_extract_lo_v32hi (tmp, vec));
> + else
> +   emit_insn (gen_vec_extract_hi_v32hi (tmp, vec));
> + ix86_expand_vector_extract (false, target, tmp, elt & 15);
> + return;
> +   }
> +  break;
> +
> +case V64QImode:
> +  if (TARGET_AVX512BW)
> +   {
> + tmp = gen_reg_rtx (V32QImode);
> + if (elt < 32)
> +   emit_insn (gen_vec_extract_lo_v64qi (tmp, vec));
> + else
> +   emit_insn (gen_vec_extract_hi_v64qi (tmp, vec));
> + ix86_expand_vector_extract (false, target, tmp, elt & 31);
> + return;
> +   }
> +  break;
> +
>  case V16SFmode:
>tmp = gen_reg_rtx (V8SFmode);
>if (elt < 8)
> diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
> index bd321fc..0e21031 100644
> --- a/gcc/config/i386/sse.md
> +++ b/gcc/config/i386/sse.md
> @@ -534,6 +534,7 @@
> (V4DI "TARGET_AVX512VL") (V4DF "TARGET_AVX512VL")
> (V4SI "TARGET_AVX512VL") (V4SF "TARGET_AVX512VL")
> (V2DI "TARGET_AVX512VL") (V2DF "TARGET_AVX512VL")])
> +(define_mode_iterator VI48F_256 [V8SI V8SF V4DI V4DF])
>
>  ;; Mapping from float mode to required SSE level
>  (define_mode_attr sse
> @@ -6319,44 +6320,64 @@
>operands[1] = adjust_address (operands[1], SFmode, INTVAL (operands[2]) * 
> 4);
>  })
>
> -(define_expand "avx512f_vextract32x4_mask"
> +(define_mode_attr extract_type
> +  [(V16SF "avx512f") (V16SI "avx512f") (V8DF "avx512dq") (V8DI "avx512dq")])
> +
> +(define_mode_attr extract_suf
> +  [(V16SF "32x4") (V16SI "32x4") (V8DF "64x2") (V8DI "64x2")])
> +
> +(define_mode_iterator AVX512_VEC
> +  [(V8DF "TARGET_AVX512DQ") (V8DI "TARGET_AVX512DQ") V16SF V16SI])
> +
> +(define_expand "_vextract_mask"
>[(match_operand: 0 "nonimmediate_operand")
> -   (match_operand:V16FI 1 "register_operand")
> +   (match_operand:AVX512_VEC 1 "register_operand")
> (match_operand:SI 2 "const_0_to_3_operand")
> (match_operand: 3 "nonimmediate_operand")
> (match_operand:QI 4 "register_operand")]
>"TARGET_AVX512F"
>  {
> +  int mask;
> +  mask = INTVAL (operands[2]);
> +
>if (MEM_P (operands[0]) && GET_CODE (operands[3]) == CONST_VECTOR)
>  operands[0] = force_reg (mode, operands[0]);
> -  switch (INTVAL (operands[2]))
> -{
> -case 0:
> -  emit_insn (gen_avx512f_vextract32x4_1_mask (operands[0],
> -

Re: [PR libfortran/62768] Handle filenames with embedded nulls

2014-09-16 Thread FX

>> 2014-09-05  Janne Blomqvist  
>> 
>>PR libfortran/62768
>>* io/io.h (gfc_unit): Store C string for the filename.
>>* io/close.c (st_close): Use gfc_unit.filename.
>>* io/inquire.c (inquire_via_unit): Likewise.
>>* io/open.c (new_unit): Likewise.
>>(already_open): Likewise, unlink file before freeing filename.
>>* io/unit.c (init_units): Likewise.
>>(close_unit_1): Likewise.
>>(filename_from_unit): Likewise.
>>* io/unix.c (compare_file_filename): Likewise.
>>(find_file0): Likewise.
>>(delete_file): Likewise.

OK, if you add a runtime testcase.

I tried to think of other characters we might want to sanitize/special case, 
but at least on Unix/POSIX only NUL and / are fundamentally different. It might 
make sense to think about it for Windows targets.

FX

RE: [PATCH] Avoid inter-test dependencies in gfortran.dg (PR fortran/56408)

2014-09-16 Thread VandeVondele Joost

Hi Jakub,

thanks!

> +dg-test $gfortran_test_path/[lindex $args 1] "" 
> $gfortran_aux_module_flags
> +# cleanup-modules isn't intentionally invoked here.

should this be 'is intentionally not invoked here' ?

I'm currently seeing a lot of errors in the log of make -j32 -k check.  Similar 
to the ones below.

Joost

ERROR: tcl error sourcing 
/data/vjoost/gnu/gcc_trunk/gcc/gcc/testsuite/gfortran.dg/dg.exp.
ERROR: can't rename "dg-save-unknown": command doesn't exist
while executing
"rename dg-save-unknown unknown"
(procedure "saved-dg-test" line 96)
invoked from within
"saved-dg-test 
/data/vjoost/gnu/gcc_trunk/gcc/gcc/testsuite/gfortran.dg/test_common_binding_labels_2_main.f03
 {  -O } { -pedantic-errors}"
("eval" body line 1)
invoked from within
"eval saved-dg-test $args "
(procedure "dg-test" line 11)
invoked from within
"dg-test $test "$flags $flags_t" ${default-extra-flags}"
(procedure "gfortran-dg-runtest" line 28)
invoked from within
"gfortran-dg-runtest [lsort \
   [glob -nocomplain $srcdir/$subdir/*.\[fF\]{,90,95,03,08} ] ] "" 
$DEFAULT_FFLAGS"
(file "/data/vjoost/gnu/gcc_trunk/gcc/gcc/testsuite/gfortran.dg/dg.exp" 
line 47)
invoked from within
"source /data/vjoost/gnu/gcc_trunk/gcc/gcc/testsuite/gfortran.dg/dg.exp"
("uplevel" body line 1)
invoked from within
"uplevel #0 source 
/data/vjoost/gnu/gcc_trunk/gcc/gcc/testsuite/gfortran.dg/dg.exp"
invoked from within
"catch "uplevel #0 source $test_file_name""
Running /data/vjoost/gnu/gcc_trunk/gcc/gcc/testsuite/gfortran.dg/gomp/gomp.exp 
...
ERROR: tcl error sourcing 
/data/vjoost/gnu/gcc_trunk/gcc/gcc/testsuite/gfortran.dg/gomp/gomp.exp.
ERROR: torture-init: torture_without_loops is not empty as expected
while executing
"error "torture-init: torture_without_loops is not empty as expected""
invoked from within
"if [info exists torture_without_loops] {
error "torture-init: torture_without_loops is not empty as expected"
}"
(procedure "torture-init" line 4)
invoked from within
"torture-init"
(procedure "gfortran-dg-runtest" line 5)
invoked from within
"gfortran-dg-runtest [lsort \
   [find $srcdir/$subdir *.\[fF\]{,90,95,03,08} ] ] "" "-fopenmp""
(file 
"/data/vjoost/gnu/gcc_trunk/gcc/gcc/testsuite/gfortran.dg/gomp/gomp.exp" line 
32)
invoked from within
"source /data/vjoost/gnu/gcc_trunk/gcc/gcc/testsuite/gfortran.dg/gomp/gomp.exp"
("uplevel" body line 1)
invoked from within
"uplevel #0 source 
/data/vjoost/gnu/gcc_trunk/gcc/gcc/testsuite/gfortran.dg/gomp/gomp.exp"
invoked from within
"catch "uplevel #0 source $test_file_name""
Running 
/data/vjoost/gnu/gcc_trunk/gcc/gcc/testsuite/gfortran.dg/graphite/graphite.exp 
...
ERROR: tcl error sourcing 
/data/vjoost/gnu/gcc_trunk/gcc/gcc/testsuite/gfortran.dg/graphite/graphite.exp.

Re: [PATCH] Avoid inter-test dependencies in gfortran.dg (PR fortran/56408)

2014-09-16 Thread Jakub Jelinek

On Tue, Sep 16, 2014 at 08:28:48AM +, VandeVondele  Joost wrote:
> > +dg-test $gfortran_test_path/[lindex $args 1] "" 
> > $gfortran_aux_module_flags
> > +# cleanup-modules isn't intentionally invoked here.
> 
> should this be 'is intentionally not invoked here' ?

Isn't that the same?

> I'm currently seeing a lot of errors in the log of make -j32 -k check.  
> Similar to the ones below.

Weird.  Works just fine for me, on two different boxes,
one with dejagnu 1.5 and one with dejagnu 1.5.1.  There is
no dg-save-unknown in gfortran.dg/test_common_binding_labels_2_main.f03,
but dg-compile-aux-modules and that is defined in gfortran.dg/dg.exp.
> 
> ERROR: tcl error sourcing 
> /data/vjoost/gnu/gcc_trunk/gcc/gcc/testsuite/gfortran.dg/dg.exp.
> ERROR: can't rename "dg-save-unknown": command doesn't exist
> while executing
> "rename dg-save-unknown unknown"
> (procedure "saved-dg-test" line 96)
> invoked from within
> "saved-dg-test 
> /data/vjoost/gnu/gcc_trunk/gcc/gcc/testsuite/gfortran.dg/test_common_binding_labels_2_main.f03
>  {  -O } { -pedantic-errors}"
> ("eval" body line 1)
> invoked from within
> "eval saved-dg-test $args "
> (procedure "dg-test" line 11)
> invoked from within
> "dg-test $test "$flags $flags_t" ${default-extra-flags}"
> (procedure "gfortran-dg-runtest" line 28)
> invoked from within
> "gfortran-dg-runtest [lsort \
>[glob -nocomplain $srcdir/$subdir/*.\[fF\]{,90,95,03,08} ] ] "" 
> $DEFAULT_FFLAGS"
> (file "/data/vjoost/gnu/gcc_trunk/gcc/gcc/testsuite/gfortran.dg/dg.exp" 
> line 47)
> invoked from within
> "source /data/vjoost/gnu/gcc_trunk/gcc/gcc/testsuite/gfortran.dg/dg.exp"
> ("uplevel" body line 1)
> invoked from within
> "uplevel #0 source 
> /data/vjoost/gnu/gcc_trunk/gcc/gcc/testsuite/gfortran.dg/dg.exp"
> invoked from within
> "catch "uplevel #0 source $test_file_name""
> Running 
> /data/vjoost/gnu/gcc_trunk/gcc/gcc/testsuite/gfortran.dg/gomp/gomp.exp ...
> ERROR: tcl error sourcing 
> /data/vjoost/gnu/gcc_trunk/gcc/gcc/testsuite/gfortran.dg/gomp/gomp.exp.
> ERROR: torture-init: torture_without_loops is not empty as expected
> while executing
> "error "torture-init: torture_without_loops is not empty as expected""
> invoked from within
> "if [info exists torture_without_loops] {
>   error "torture-init: torture_without_loops is not empty as expected"
> }"
> (procedure "torture-init" line 4)
> invoked from within
> "torture-init"
> (procedure "gfortran-dg-runtest" line 5)
> invoked from within
> "gfortran-dg-runtest [lsort \
>[find $srcdir/$subdir *.\[fF\]{,90,95,03,08} ] ] "" "-fopenmp""
> (file 
> "/data/vjoost/gnu/gcc_trunk/gcc/gcc/testsuite/gfortran.dg/gomp/gomp.exp" line 
> 32)
> invoked from within
> "source 
> /data/vjoost/gnu/gcc_trunk/gcc/gcc/testsuite/gfortran.dg/gomp/gomp.exp"
> ("uplevel" body line 1)
> invoked from within
> "uplevel #0 source 
> /data/vjoost/gnu/gcc_trunk/gcc/gcc/testsuite/gfortran.dg/gomp/gomp.exp"
> invoked from within
> "catch "uplevel #0 source $test_file_name""
> Running 
> /data/vjoost/gnu/gcc_trunk/gcc/gcc/testsuite/gfortran.dg/graphite/graphite.exp
>  ...
> ERROR: tcl error sourcing 
> /data/vjoost/gnu/gcc_trunk/gcc/gcc/testsuite/gfortran.dg/graphite/graphite.exp.

Jakub

Re: [PATCH] Avoid inter-test dependencies in gfortran.dg (PR fortran/56408)

2014-09-16 Thread Jakub Jelinek

On Tue, Sep 16, 2014 at 10:42:46AM +0200, Jakub Jelinek wrote:
> > I'm currently seeing a lot of errors in the log of make -j32 -k check.  
> > Similar to the ones below.
> 
> Weird.  Works just fine for me, on two different boxes,
> one with dejagnu 1.5 and one with dejagnu 1.5.1.  There is
> no dg-save-unknown in gfortran.dg/test_common_binding_labels_2_main.f03,
> but dg-compile-aux-modules and that is defined in gfortran.dg/dg.exp.

Looking around in gcc-testresults, for some people (e.g. H.J.'s testresults,
Andreas Schwab ia64, etc.) it works just fine too,
https://gcc.gnu.org/ml/gcc-testresults/2014-09/msg01551.html
shows the same symptomps as you report.  What dejagnu version are you using?

Jakub

[ping] define CROSS = @CROSS@ in gcc/Makefile.in

2014-09-16 Thread Olivier Hainque

Hello,

ping on https://gcc.gnu.org/ml/gcc-patches/2014-09/msg00056.html

Thanks in advance,

With Kind Regards,

Olivier

On Sep 1, 2014, at 17:26 , Olivier Hainque  wrote:

> Hello,
> 
> This patch is necessary for proper operation of a piece
> of the Ada Makefile fragment which tests the value of $(CROSS).
> 
> @ substitutions aren't performed for the language specific
> Makefile fragments so using @CROSS directly isn't an option
> there.
> 
> We have been using this for years and multiple targets in our
> local trees. Boostrapped & reg-tested on x86_64-linux.
> 
> OK to commit ?
> 
> Thanks in advance for your feedback,
> 
> Olivier
> 
> 2014-09-01  Olivier Hainque  
> 
>   * Makefile.in (CROSS): Define, to @CROSS@.
> 
> 
> 
>

[ping*3] fix build failure of x86_64-mingw32, missing crtbegin/crtend.o

2014-09-16 Thread Olivier Hainque

Hello,

Ping #3 for https://gcc.gnu.org/ml/gcc-patches/2014-07/msg00237.html

Thanks in advance for your feedback,

With Kind Regards,

Olivier

On Jul 3, 2014, at 16:57 , Olivier Hainque  wrote:
> From gcc/i386/config/mingw32.h, STARTFILE_SPEC and ENDFILE_SPEC include
> crtbegin.o and crtend.o unconditionally.
> 
> libgcc/config.host includes crtbegin.o and crtend.o in extra_parts for
> i[34567]86-*-mingw* but not for x86_64-*-mingw*.
> 
> Building a toolchain for x86_64-pc-mingw32 then rapidly fails with complaints
> about crtbegin.o and crtend.o missing.
> 
> This patch is a proposal to fix this by adding the objects to extra_parts,
> as well as i386/t-cygming to tmake_file so rules are available to build the
> objects.
> 
> Tested by verifying that a build with --target=x86_64-pc-mingw32
> proceeds to completion after the change.
> 
> OK to commit ?
> 
> Thanks in advance for your feedback,
> 
> With Kind Regards,
> 
> Olivier
> 
> 2014-07-02  Olivier Hainque  
> 
> libgcc/
>  * config.host (x86_64-*-mingw*): Add i386/t-cygming to tmake_file
>  and crtbegin.o + crtend.o to extra_parts.
> 
>

Re: [PATCH] Avoid inter-test dependencies in gfortran.dg (PR fortran/56408)

2014-09-16 Thread Paolo Carlini


Hi,

On 09/16/2014 10:42 AM, Jakub Jelinek wrote:

On Tue, Sep 16, 2014 at 08:28:48AM +, VandeVondele  Joost wrote:

+dg-test $gfortran_test_path/[lindex $args 1] "" $gfortran_aux_module_flags
+# cleanup-modules isn't intentionally invoked here.

should this be 'is intentionally not invoked here' ?

Isn't that the same?
Well, if the module "isn't intentionally invoked" it could still be 
invoked, only, not intentionally. If something "is intentionally not 
invoked", that can't happen, because the code intentionally, 'actively' 
so to speak, does *not* invoke it.


My guess too is that you want the latter exact meaning.

Paolo.

RE: [PATCH] Avoid inter-test dependencies in gfortran.dg (PR fortran/56408)

2014-09-16 Thread VandeVondele Joost

>> What dejagnu version are you using?

> runtest --version
WARNING: Couldn't find the global config file.
Expect version is   5.44.1.15
Tcl version is  8.5
Framework version is1.4.4

Re: [PATCH, i386, Pointer Bounds Checker 30/x] Size relocation

2014-09-16 Thread Uros Bizjak

Hello!

> This patch adds size relocation support for i386 target.  Relocation is used 
> to compute bounds for static objects with incomplete type.
>
> Thanks,
> Ilya
> --
> gcc/
>
> 2014-06-11  Ilya Enkovich  
>
> * config/i386/i386.md (UNSPEC_SIZEOF): New.
> (move_size_reloc_si): New.
> (move_size_reloc_di): New.
> * config/i386/predicates.md (symbol_operand): New.
>
>
> diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
> index 32db2c6..5fd556b 100644
> --- a/gcc/config/i386/i386.md
> +++ b/gcc/config/i386/i386.md
> @@ -79,6 +79,7 @@
>UNSPEC_PLTOFF
>UNSPEC_MACHOPIC_OFFSET
>UNSPEC_PCREL
> +  UNSPEC_SIZEOF
>
>;; Prologue support
>UNSPEC_STACK_ALLOC
> @@ -18340,6 +18341,32 @@
>"bndstx\t{%2, %3|%3, %2}"
>[(set_attr "type" "mpxst")])
>
> +(define_insn "move_size_reloc_si"
> +  [(set (match_operand:SI 0 "register_operand" "=r")
> +   (unspec:SI
> +[(match_operand:SI 1 "symbol_operand")]
> +UNSPEC_SIZEOF))]
> +  "TARGET_MPX"
> +  "mov{l}\t{%1@SIZE, %0|%0, %1@SIZE}"
> +  [(set_attr "type" "imov")
> +   (set_attr "mode" "SI")])
> +
> +(define_insn "move_size_reloc_di"
> +  [(set (match_operand:DI 0 "register_operand" "=r")
> +   (unspec:DI
> +[(match_operand:DI 1 "symbol_operand")]
> +UNSPEC_SIZEOF))]
> +  "TARGET_64BIT && TARGET_MPX"
> +{
> +  if (ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_LARGE
> +  || ix86_cmodel == CM_MEDIUM_PIC || ix86_cmodel == CM_LARGE_PIC)
> +return "movabs{q}\t{%1@SIZE, %0|%0, %1@SIZE}";

Can x86_64_immediate_operand predicate be used here?

> +  else
> +return "mov{l}\t{%1@SIZE, %k0|%k0, %1@SIZE}";
> +}
> +  [(set_attr "type" "imov")
> +   (set_attr "mode" "DI")])
> +
>  (include "mmx.md")
>  (include "sse.md")
>  (include "sync.md")
> diff --git a/gcc/config/i386/predicates.md b/gcc/config/i386/predicates.md
> index a738033..e4c5d21 100644
> --- a/gcc/config/i386/predicates.md
> +++ b/gcc/config/i386/predicates.md
> @@ -119,6 +119,10 @@
> (match_test "TARGET_64BIT")
> (match_test "REGNO (op) > BX_REG")))
>
> +;; Return true if VALUE is symbol reference
> +(define_predicate "symbol_operand"
> +  (match_code "symbol_ref"))
> +
>  ;; Return true if VALUE can be stored in a sign extended immediate field.
>  (define_predicate "x86_64_immediate_operand"
>(match_code "const_int,symbol_ref,label_ref,const")

Uros.

Re: [PATCH, Pointer Bounds Checker 23/x] Function split

2014-09-16 Thread Ilya Enkovich

2014-09-16 1:08 GMT+04:00 Jeff Law :
> On 09/15/14 10:20, Ilya Enkovich wrote:

 A problem I'm trying to avoid is that bounds in return statement are
 not taken into account when checking for data dependencies between
 parts.  It means we may have a case when return statement with bounds
 is put into split part but bounds producer is not.  If
 SSA_NAME_DEFSTMT for returned bounds is in the same partition as a
 return then I do not think I should care about the rest of definitions
 chain because regular split point checks should make sure we have
 everything required.
>>>
>>>
>>> Is the data dependency in the gimple IL?  If so there shouldn't be
>>> anything
>>> particularly special we need to do.  If not, then how ugly would it be to
>>> "use" the bounds at the return statement to expose the missing
>>> dependency?
>>>
>>> Not asking you to make that change, just want to make sure that I
>>> understand
>>> the core issue and that if something is missing from a dependency
>>> standpoint
>>> that we consider what it would take to expose the missing dependency.
>>
>>
>> Gimple IL has required data dependencies to handle returns properly.
>> But split pass handles return basic block in a special way.  Return
>> basic block has to have a simple form and is not scanned using stmt
>> walkers as it is done for all other BBs by visit_bb.  It is assumed
>> that all dependencies for return BB are PHI args and returned value.
>> Thus returned bounds are just not taken into account.  That's how I
>> see the problem.
>
> I must be misunderstanding something then.  I fundamentally don't see how
> the return bounds are any different here than the return value.  If we have
> exposed the bounds in the IL, then aren't they going to be handled just like
> any other object in the IL?

They are not handled like any other object in IL because return block
and all statements in it are not handled as all other statements we
put into split part.

Here is a comment from find_return_bb:

/* Return basic block containing RETURN statement.  We allow basic blocks
   of the form:
= tmp_var;
   return 
   but return_bb can not be more complex than this.
...
*/

Phi nodes also may present in return_bb.

All blocks going to split part are analyzed by visit_bb function.
Return basic block is not analyzed in the same way but still may be
copied into split part in case return value is defined in it.  There
is a special code in visit_bb to add args of phi statements of
return_bb as uses of split part to have no undefined values in copied
block.  It was enough when those phi args plus return value were only
uses in return_bb.

But now we add returned bounds to GIMPLE_RETURN as a new use and this
new use is ignored.  If split part returns value then return_bb will
be copied into it.  It means I should check returned bounds are
defined there too.  If SSA_NAME_DEF_STMT of returned bounds is in
split part then it is OK.  If SSA_NAME_DEF_STMT of returned bounds is
in return_bb then it is also OK because it means it is a result of PHI
node whose args were added as additional uses for split part earlier
in visit_bb.

At least that is how I think this happens :)

>
> Maybe you should post the IL for a case where this all matters and walk me
> through the key issues.

I attach a dump I got from Chrome compilation with no additional
checks restrictions in split.  Original function returns value defined
by phi node in return_bb and bounds defined in BB2.  Split part
contains BB3, BB4 and BB5 and resulting function part has usage of
returned bounds but no producer for it.

Thanks,
Ilya

>
> jeff

split.dump
Description: Binary data

[PATCH][match-and-simplify] Fix comparison operator type handling

2014-09-16 Thread Richard Biener


Bootstrapped and tested on x86_64-unknown-linux-gnu, applied.

Richard.

2014-09-16  Richard Biener  

* genmatch.c (operator_id): Add tcc member.
(get_operand_type): New function, split out from ...
(expr::gen_transform): ... here.  Treat comparisons
properly.
(dt_simplify::gen): Use get_operand_type.

Index: gcc/genmatch.c
===
--- gcc/genmatch.c  (revision 215264)
+++ gcc/genmatch.c  (working copy)
@@ -171,12 +171,14 @@ id_base::id_base (id_kind kind_, const c
 
 struct operator_id : public id_base
 {
-  operator_id (enum tree_code code_, const char *id_, unsigned nargs_)
+  operator_id (enum tree_code code_, const char *id_, unsigned nargs_,
+  const char *tcc_)
   : id_base (id_base::CODE, id_),
-  code (code_), nargs (nargs_) {}
+  code (code_), nargs (nargs_), tcc (tcc_) {}
   unsigned get_required_nargs () const { return nargs; }
   enum tree_code code;
   unsigned nargs;
+  const char *tcc;
 };
 
 struct fn_id : public id_base
@@ -215,7 +217,7 @@ add_operator (enum tree_code code, const
   /* To have INTEGER_CST and friends as "predicate operators".  */
   && strcmp (tcc, "tcc_constant") != 0)
 return;
-  operator_id *op = new operator_id (code, id, nargs);
+  operator_id *op = new operator_id (code, id, nargs, tcc);
   id_base **slot = operators->find_slot_with_hash (op, op->hashval, INSERT);
   if (*slot)
 fatal ("duplicate id definition");
@@ -890,6 +892,38 @@ is_conversion (id_base *op)
  || *op == VIEW_CONVERT_EXPR);
 }
 
+/* Get the type to be used for generating operands of OP from the
+   various sources.  */
+
+static const char *
+get_operand_type (id_base *op, const char *in_type,
+ const char *expr_type,
+ const char *other_oprnd_type)
+{
+  /* Generally operands whose type does not match the type of the
+ expression generated need to know their types but match and
+ thus can fall back to 'other_oprnd_type'.  */
+  if (is_conversion (op))
+return other_oprnd_type;
+  else if (*op == REALPART_EXPR
+  || *op == IMAGPART_EXPR)
+return other_oprnd_type;
+  else if (is_a  (op)
+  && strcmp (as_a  (op)->tcc, "tcc_comparison") == 0)
+return other_oprnd_type;
+  else
+{
+  /* Otherwise all types should match - choose one in order of
+ preference.  */
+  if (expr_type)
+   return expr_type;
+  else if (in_type)
+   return in_type;
+  else
+   return other_oprnd_type;
+}
+}
+
 /* Code gen off the AST.  */
 
 void
@@ -912,8 +946,14 @@ expr::gen_transform (FILE *f, const char
   /* __real and __imag use the component type of its operand.  */
   sprintf (optype, "TREE_TYPE (TREE_TYPE (ops%d[0]))", depth);
   type = optype;
-  /* Avoid passing in_type / type to operand creation.  */
-  conversion_p = true;
+}
+  else if (is_a  (operation->op)
+  && strcmp (as_a  (operation->op)->tcc, 
"tcc_comparison") == 0)
+{
+  /* comparisons use boolean_type_node (or what gets in), but
+ their operands need to figure out the types themselves.  */
+  sprintf (optype, "boolean_type_node");
+  type = optype;
 }
   else
 {
@@ -926,26 +966,16 @@ expr::gen_transform (FILE *f, const char
 
   fprintf (f, "{\n");
   fprintf (f, "  tree ops%d[%u], res;\n", depth, ops.length ());
+  char op0type[64];
+  snprintf (op0type, 64, "TREE_TYPE (ops%d[0])", depth);
   for (unsigned i = 0; i < ops.length (); ++i)
 {
   char dest[32];
   snprintf (dest, 32, "  ops%d[%u]", depth, i);
-  ops[i]->gen_transform (f, dest, gimple, depth + 1,
-conversion_p
-/* If this op is a conversion its single
-   operand has to know its type itself.  */
-? NULL
-/* For other ops the type is the type
-   we got passed in, or if that is from
-   a conversion we can at most use the
-   first operand type for all further
-   operands.  So (convert (plus @1 (convert @2))
-   is possible while
-   (convert (plus (convert @1) @2))
-   is not unless we somehow discover what
-   operand we can generate first and do it
-   in the appropriate order.  */
-: (i == 0 ? in_type : type));
+  const char *optype
+   = get_operand_type (operation->op, in_type, expr_type,
+   i == 0 ? NULL : op0type);
+  ops[i]->gen_transform (f, dest, gimple, depth + 1, optype);
 }
 
   if (gimple)
@@ -1804,9 +1834,12 @@ dt_simplify::gen (FILE *f, bool gimple)
{
  c

Re: [PATCH] Avoid inter-test dependencies in gfortran.dg (PR fortran/56408)

2014-09-16 Thread Andreas Schwab

"VandeVondele  Joost"  writes:

>>> What dejagnu version are you using?
>
>> runtest --version
> WARNING: Couldn't find the global config file.
> Expect version is 5.44.1.15
> Tcl version is8.5
> Framework version is  1.4.4

You need at least dejagnu 1.5, which includes this fix:

http://git.savannah.gnu.org/cgit/dejagnu.git/commit/?id=fd70857

Andreas.

-- 
Andreas Schwab, SUSE Labs, sch...@suse.de
GPG Key fingerprint = 0196 BAD8 1CE9 1970 F4BE  1748 E4D4 88E3 0EEA B9D7
"And now for something completely different."

Re: [PATCH] Avoid inter-test dependencies in gfortran.dg (PR fortran/56408)

2014-09-16 Thread Jakub Jelinek

On Tue, Sep 16, 2014 at 10:47:17AM +0200, Jakub Jelinek wrote:
> On Tue, Sep 16, 2014 at 10:42:46AM +0200, Jakub Jelinek wrote:
> > > I'm currently seeing a lot of errors in the log of make -j32 -k check.  
> > > Similar to the ones below.
> > 
> > Weird.  Works just fine for me, on two different boxes,
> > one with dejagnu 1.5 and one with dejagnu 1.5.1.  There is
> > no dg-save-unknown in gfortran.dg/test_common_binding_labels_2_main.f03,
> > but dg-compile-aux-modules and that is defined in gfortran.dg/dg.exp.
> 
> Looking around in gcc-testresults, for some people (e.g. H.J.'s testresults,
> Andreas Schwab ia64, etc.) it works just fine too,
> https://gcc.gnu.org/ml/gcc-testresults/2014-09/msg01551.html
> shows the same symptomps as you report.  What dejagnu version are you using?

Looking at the prehistoric dejagnu 1.4.4 (11 years old), it has
unconditional:
# But first rename the existing one so we can restore it afterwards.
catch {rename dg-save-unknown ""}
rename unknown dg-save-unknown
proc unknown { args } {
return -code error "unknown dg option: $args"
}
in there while 1.5 has:
# Define our own "special function" `unknown' so we catch spelling
# errors.
# But first rename the existing one so we can restore it afterwards.
if { [info procs dg-save-unknown] == [list] } {
rename unknown dg-save-unknown
proc unknown { args } {
return -code error "unknown dg option: $args"
}
}

Does the following patch fix this?  Works for me with dejagnu 1.5.1.

--- gcc/testsuite/gfortran.dg/dg.exp.jj 2014-09-15 21:45:45.0 +0200
+++ gcc/testsuite/gfortran.dg/dg.exp2014-09-16 11:15:26.766004692 +0200
@@ -39,8 +39,18 @@ proc dg-compile-aux-modules { args } {
error "dg-set-target-env-var: needs one argument"
return
 }
+
+set level [info level]
+if { [info procs dg-save-unknown] != [list] } {
+   rename dg-save-unknown dg-save-unknown-level-$level
+}
+
 dg-test $gfortran_test_path/[lindex $args 1] "" $gfortran_aux_module_flags
-# cleanup-modules isn't intentionally invoked here.
+# cleanup-modules is intentionally not invoked here.
+
+if { [info procs dg-save-unknown-level-$level] != [list] } {
+   rename dg-save-unknown-level-$level dg-save-unknown
+}
 }

 # Main loop.

Jakub

RE: [PATCH] Avoid inter-test dependencies in gfortran.dg (PR fortran/56408)

2014-09-16 Thread VandeVondele Joost

> > Framework version is  1.4.4
> You need at least dejagnu 1.5, which includes this fix:

I see, but that's contrary to :
https://gcc.gnu.org/install/prerequisites.html

Re: [PATCH] gcc parallel make check

2014-09-16 Thread Richard Biener

On Mon, Sep 15, 2014 at 7:44 PM, Mike Stump  wrote:
> On Sep 15, 2014, at 9:05 AM, Jakub Jelinek  wrote:
>
> All the updates sound good.
>
>> Regtested on x86_64-linux, without the patch toplevel make -k check
>> took 8hrs3minutes (don't have time data for that run),
>
> This confuses me, but, no matter.  Isn’t 8hrs time data?  :-)
>
>> patch toplevel make -j48 -k check took:
>> real40m21.984s
>> user341m51.675s
>> sys 112m46.993s
>> and with the patch make -j48 -k check took:
>> real32m22.066s
>> user355m1.788s
>> sys 117m5.809s
>
> These numbers are useful to try and ensure the overhead (scaling factor) is 
> reasonable, thanks.

A nice improvement indeed.  The patched result is 15 times faster
than the serial unpatched run.  So there is room for improvement
(I wouldn't say the scaling factor is reasonable - with accounting for
overhead I'd expect it should be possible to arrive at a factor of 32
here at least).

So - where's the "serial" parts of the testing run?

Thanks for the improvements btw!

Richard.

>> Is this version ok for trunk?
>
> Ok.
>
> Thanks for all your work.

Re: [PATCH] Avoid inter-test dependencies in gfortran.dg (PR fortran/56408)

2014-09-16 Thread Jakub Jelinek

On Tue, Sep 16, 2014 at 11:14:33AM +0200, Andreas Schwab wrote:
> "VandeVondele  Joost"  writes:
> 
> >>> What dejagnu version are you using?
> >
> >> runtest --version
> > WARNING: Couldn't find the global config file.
> > Expect version is   5.44.1.15
> > Tcl version is  8.5
> > Framework version is1.4.4
> 
> You need at least dejagnu 1.5, which includes this fix:
> 
> http://git.savannah.gnu.org/cgit/dejagnu.git/commit/?id=fd70857

BTW, even that change doesn't look right to me,
with nested calls, my reading of the change is that in case of
nested calls, the outer one will rename unknown to dg-save-unknown,
then inner call will not rename anything because dg-save-unknown
already exists, then when about to return, the nested call will rename
dg-save-unknown to unknown (but, I'd say that it should be the outer, not
inner call that does that), then the outer call will not rename anything.
So, either it can use a trick with [info level] I've used, or just remember
in some local variable if a particular call renamed it and only if it did,
rename it back.

Jakub

Re: [match-and-simplify] remove dt_node::level_max

2014-09-16 Thread Richard Biener

On Mon, Sep 15, 2014 at 8:51 PM, Prathamesh Kulkarni
 wrote:
> * genmatch.c (dt_node::level_max): Remove.

Applied.

Thanks,
Richard.

> Thanks,
> Prathamesh

Re: [PATCH] gcc parallel make check

2014-09-16 Thread Jakub Jelinek

On Tue, Sep 16, 2014 at 11:20:37AM +0200, Richard Biener wrote:
> > This confuses me, but, no matter.  Isn’t 8hrs time data?  :-)

It is, but not time(1) data, just wall clock computed from subtracting
mtimes of my make check output log and make -j48 bootstrap log.

> >> patch toplevel make -j48 -k check took:
> >> real40m21.984s
> >> user341m51.675s
> >> sys 112m46.993s
> >> and with the patch make -j48 -k check took:
> >> real32m22.066s
> >> user355m1.788s
> >> sys 117m5.809s
> >
> > These numbers are useful to try and ensure the overhead (scaling factor) is 
> > reasonable, thanks.
> 
> A nice improvement indeed.  The patched result is 15 times faster
> than the serial unpatched run.  So there is room for improvement

Note, the box used was oldish AMD 16-core, no ht, box, haven't tried it on 
anything
more parallel, also it was normal hard disk, etc.  No idea whether anything
from this is relevant to that though.
Some CPU time goes into the expect processes, I can retry the build tonight
and grab also time(1) info from make -k check to see the user/sys times for
serial testing.

Jakub

RE: [PATCH] Avoid inter-test dependencies in gfortran.dg (PR fortran/56408)

2014-09-16 Thread VandeVondele Joost


> Does the following patch fix this?  Works for me with dejagnu 1.5.1.

and works for me with 1.4.4

Joost

Re: [PATCH, i386, Pointer Bounds Checker 30/x] Size relocation

2014-09-16 Thread Ilya Enkovich

2014-09-16 13:08 GMT+04:00 Uros Bizjak :
> Hello!
>
>> This patch adds size relocation support for i386 target.  Relocation is used 
>> to compute bounds for static objects with incomplete type.
>>
>> Thanks,
>> Ilya
>> --
>> gcc/
>>
>> 2014-06-11  Ilya Enkovich  
>>
>> * config/i386/i386.md (UNSPEC_SIZEOF): New.
>> (move_size_reloc_si): New.
>> (move_size_reloc_di): New.
>> * config/i386/predicates.md (symbol_operand): New.
>>
>>
>> diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
>> index 32db2c6..5fd556b 100644
>> --- a/gcc/config/i386/i386.md
>> +++ b/gcc/config/i386/i386.md
>> @@ -79,6 +79,7 @@
>>UNSPEC_PLTOFF
>>UNSPEC_MACHOPIC_OFFSET
>>UNSPEC_PCREL
>> +  UNSPEC_SIZEOF
>>
>>;; Prologue support
>>UNSPEC_STACK_ALLOC
>> @@ -18340,6 +18341,32 @@
>>"bndstx\t{%2, %3|%3, %2}"
>>[(set_attr "type" "mpxst")])
>>
>> +(define_insn "move_size_reloc_si"
>> +  [(set (match_operand:SI 0 "register_operand" "=r")
>> +   (unspec:SI
>> +[(match_operand:SI 1 "symbol_operand")]
>> +UNSPEC_SIZEOF))]
>> +  "TARGET_MPX"
>> +  "mov{l}\t{%1@SIZE, %0|%0, %1@SIZE}"
>> +  [(set_attr "type" "imov")
>> +   (set_attr "mode" "SI")])
>> +
>> +(define_insn "move_size_reloc_di"
>> +  [(set (match_operand:DI 0 "register_operand" "=r")
>> +   (unspec:DI
>> +[(match_operand:DI 1 "symbol_operand")]
>> +UNSPEC_SIZEOF))]
>> +  "TARGET_64BIT && TARGET_MPX"
>> +{
>> +  if (ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_LARGE
>> +  || ix86_cmodel == CM_MEDIUM_PIC || ix86_cmodel == CM_LARGE_PIC)
>> +return "movabs{q}\t{%1@SIZE, %0|%0, %1@SIZE}";
>
> Can x86_64_immediate_operand predicate be used here?

I think it cannot be used because of TLS symbols not counting as immediate.

Thanks,
Ilya

>
>> +  else
>> +return "mov{l}\t{%1@SIZE, %k0|%k0, %1@SIZE}";
>> +}
>> +  [(set_attr "type" "imov")
>> +   (set_attr "mode" "DI")])
>> +
>>  (include "mmx.md")
>>  (include "sse.md")
>>  (include "sync.md")
>> diff --git a/gcc/config/i386/predicates.md b/gcc/config/i386/predicates.md
>> index a738033..e4c5d21 100644
>> --- a/gcc/config/i386/predicates.md
>> +++ b/gcc/config/i386/predicates.md
>> @@ -119,6 +119,10 @@
>> (match_test "TARGET_64BIT")
>> (match_test "REGNO (op) > BX_REG")))
>>
>> +;; Return true if VALUE is symbol reference
>> +(define_predicate "symbol_operand"
>> +  (match_code "symbol_ref"))
>> +
>>  ;; Return true if VALUE can be stored in a sign extended immediate field.
>>  (define_predicate "x86_64_immediate_operand"
>>(match_code "const_int,symbol_ref,label_ref,const")
>
> Uros.

Re: [PATCH] Avoid inter-test dependencies in gfortran.dg (PR fortran/56408)

2014-09-16 Thread Jakub Jelinek

On Tue, Sep 16, 2014 at 09:31:04AM +, VandeVondele  Joost wrote:
> 
> > Does the following patch fix this?  Works for me with dejagnu 1.5.1.
> 
> and works for me with 1.4.4

Ok, I've committed this to trunk then, so that testing works again for those
with old dejagnu.

2014-09-16  Jakub Jelinek  

PR fortran/56408
* gfortran.dg/dg.exp (dg-compile-aux-modules): Workaround
missing nexted dg-test call support in dejaGNU 1.4.4.

--- gcc/testsuite/gfortran.dg/dg.exp.jj 2014-09-15 21:45:45.0 +0200
+++ gcc/testsuite/gfortran.dg/dg.exp2014-09-16 11:15:26.766004692 +0200
@@ -39,8 +39,18 @@ proc dg-compile-aux-modules { args } {
error "dg-set-target-env-var: needs one argument"
return
 }
+
+set level [info level]
+if { [info procs dg-save-unknown] != [list] } {
+   rename dg-save-unknown dg-save-unknown-level-$level
+}
+
 dg-test $gfortran_test_path/[lindex $args 1] "" $gfortran_aux_module_flags
-# cleanup-modules isn't intentionally invoked here.
+# cleanup-modules is intentionally not invoked here.
+
+if { [info procs dg-save-unknown-level-$level] != [list] } {
+   rename dg-save-unknown-level-$level dg-save-unknown
+}
 }
 
 # Main loop.

Jakub

Re: [match-and-simplify] CSE with expression captures

2014-09-16 Thread Richard Biener

On Tue, Sep 16, 2014 at 8:25 AM, Prathamesh Kulkarni
 wrote:
> On Tue, Sep 16, 2014 at 2:15 AM, Marc Glisse  wrote:
>> On Tue, 16 Sep 2014, Prathamesh Kulkarni wrote:
>>
>>> --- gcc/match-builtin.pd(revision 215271)
>>> +++ gcc/match-builtin.pd(working copy)
>>> @@ -44,8 +44,8 @@
>>>   /* ???  There is no way to CSE here.  We'd need to support
>>>  expression captures here, like with
>>>   (mult (realpart@1 @0) @1) */
>>> - (mult (realpart @0) (realpart @0))
>>> - (mult (imagpart @0) (imagpart @0)))
>>> + (mult (realpart@1 @0) @1) + (mult (imagpart@2 @0) @2))
>>
>>
>> Maybe remove the comment above?
>>
>> You seem to have trailing spaces on most of your new lines.
> Thanks, fixed.
>
> * genmatch.c (operand::gen_transform): Add dt_operand ** default argument
> to operand heirarchy.
>   (expr::gen_transform): Adjust.
>   (capture::gen_transform): Likewise.
>   (dt_simplify::gen): Likewise.
>
> * match-builtins.pd: Adjust pattern to use expression captures in
> transform.

Heh, that was simpler than I thought ;)

Thanks, applied.

Richard.

> Regards,
> Prathamesh
>>
>> --
>> Marc Glisse

Re: [PATCH] gcc parallel make check

2014-09-16 Thread Richard Biener

On Tue, Sep 16, 2014 at 11:28 AM, Jakub Jelinek  wrote:
> On Tue, Sep 16, 2014 at 11:20:37AM +0200, Richard Biener wrote:
>> > This confuses me, but, no matter.  Isn’t 8hrs time data?  :-)
>
> It is, but not time(1) data, just wall clock computed from subtracting
> mtimes of my make check output log and make -j48 bootstrap log.
>
>> >> patch toplevel make -j48 -k check took:
>> >> real40m21.984s
>> >> user341m51.675s
>> >> sys 112m46.993s
>> >> and with the patch make -j48 -k check took:
>> >> real32m22.066s
>> >> user355m1.788s
>> >> sys 117m5.809s
>> >
>> > These numbers are useful to try and ensure the overhead (scaling factor) 
>> > is reasonable, thanks.
>>
>> A nice improvement indeed.  The patched result is 15 times faster
>> than the serial unpatched run.  So there is room for improvement
>
> Note, the box used was oldish AMD 16-core, no ht, box, haven't tried it on 
> anything

Ah, I assumed -j48 testing means you have 48 cores.  I usually test
with -j12 on my 6-core HT-enabled box.  A factor 15 scaling for 16
CPUs is of course close to the best we can achieve.

Richard.

> more parallel, also it was normal hard disk, etc.  No idea whether anything
> from this is relevant to that though.
> Some CPU time goes into the expect processes, I can retry the build tonight
> and grab also time(1) info from make -k check to see the user/sys times for
> serial testing.
>
> Jakub

Re: [PATCH, i386, Pointer Bounds Checker 31/x] Pointer Bounds Checker builtins for i386 target

2014-09-16 Thread Uros Bizjak

> 2014-06-11  Ilya Enkovich  
>
> * config/i386/i386-builtin-types.def (BND): New.
> (ULONG): New.
> (BND_FTYPE_PCVOID_ULONG): New.
> (VOID_FTYPE_BND_PCVOID): New.
> (VOID_FTYPE_PCVOID_PCVOID_BND): New.
> (BND_FTYPE_PCVOID_PCVOID): New.
> (BND_FTYPE_PCVOID): New.
> (BND_FTYPE_BND_BND): New.
> (PVOID_FTYPE_PVOID_PVOID_ULONG): New.
> (PVOID_FTYPE_PCVOID_BND_ULONG): New.
> (ULONG_FTYPE_VOID): New.
> (PVOID_FTYPE_BND): New.
> * config/i386/i386.c: Include tree-chkp.h, rtl-chkp.h.
> (ix86_builtins): Add
> IX86_BUILTIN_BNDMK, IX86_BUILTIN_BNDSTX,
> IX86_BUILTIN_BNDLDX, IX86_BUILTIN_BNDCL,
> IX86_BUILTIN_BNDCU, IX86_BUILTIN_BNDRET,
> IX86_BUILTIN_BNDNARROW, IX86_BUILTIN_BNDINT,
> IX86_BUILTIN_SIZEOF, IX86_BUILTIN_BNDLOWER,
> IX86_BUILTIN_BNDUPPER.
> (builtin_isa): Add leaf_p and nothrow_p fields.
> (def_builtin): Initialize leaf_p and nothrow_p.
> (ix86_add_new_builtins): Handle leaf_p and nothrow_p
> flags.
> (bdesc_mpx): New.
> (bdesc_mpx_const): New.
> (ix86_init_mpx_builtins): New.
> (ix86_init_builtins): Call ix86_init_mpx_builtins.
> (ix86_emit_move_max): New.
> (ix86_expand_builtin): Expand IX86_BUILTIN_BNDMK,
> IX86_BUILTIN_BNDSTX, IX86_BUILTIN_BNDLDX,
> IX86_BUILTIN_BNDCL, IX86_BUILTIN_BNDCU,
> IX86_BUILTIN_BNDRET, IX86_BUILTIN_BNDNARROW,
> IX86_BUILTIN_BNDINT, IX86_BUILTIN_SIZEOF,
> IX86_BUILTIN_BNDLOWER, IX86_BUILTIN_BNDUPPER.

Hm, can this patch be compiled as part of the series? The expanders
refer to various gen_bnd patterns that I don't see. Also, I don't see
BND mode introduced.

Anyway, some general observations:

> +case IX86_BUILTIN_BNDLDX:
> +  arg0 = CALL_EXPR_ARG (exp, 0);
> +  arg1 = CALL_EXPR_ARG (exp, 1);
> +
> +  op0 = expand_normal (arg0);
> +  op1 = expand_normal (arg1);
> +
> +  op0 = force_reg (Pmode, op0);
> +  op1 = force_reg (Pmode, op1);
> +
> +  /* Avoid registers which connot be used as index.  */
> +  if (!index_register_operand (op1, Pmode))
> +   {
> + rtx temp = gen_reg_rtx (Pmode);
> + emit_move_insn (temp, op1);
> + op1 = temp;
> +   }
> +
> +  /* If op1 was a register originally then it may have
> +mode other than Pmode.  We need to extend in such
> +case because bndldx may work only with Pmode regs.  */
> +  if (GET_MODE (op1) != Pmode)
> +   op1 = ix86_zero_extend_to_Pmode (op1);
> +
> +  if (REG_P (target))
> +   emit_insn (TARGET_64BIT
> +  ? gen_bnd64_ldx (target, op0, op1)
> +  : gen_bnd32_ldx (target, op0, op1));
> +  else
> +   {
> + rtx temp = gen_reg_rtx (BNDmode);
> + emit_insn (TARGET_64BIT
> +? gen_bnd64_ldx (temp, op0, op1)
> +: gen_bnd32_ldx (temp, op0, op1));
> + emit_move_insn (target, temp);
> +   }
> +  return target;

I don't like the way arguments are prepared. For the case above,
bnd_ldx should have index_register_operand predicate in its pattern,
and this predicate (and its mode) should be checked in the expander
code. There are many examples of argument expansion in
ix86_expand_builtin function, including how Pmode is handled.

Also, please see how target is handled there. Target can be null, so
REG_P predicate will crash.

You should also select insn patterns depending on BNDmode, not TARGET_64BIT.

Please use assign_386_stack_local so stack slots can be shared.
SLOT_TEMP is intended for short-lived temporaries, you can introduce
new slots if you need more live values at once.

Uros.

Re: [PATCH i386 AVX512] [41/n] Extend extract insn patterns.

2014-09-16 Thread Kirill Yukhin

Hello Uroš,
On 16 Sep 09:47, Uros Bizjak wrote:
> > +  "TARGET_AVX512DQ && (INTVAL (operands[2]) = INTVAL (operands[3]) - 1)"
> 
> Ouch, you have assignment instead of comparison here!
Thanks, fixed!

> > +   (set (attr "memory")
> > +  (if_then_else (match_test "MEM_P (operands[0])")
> > +   (const_string "store")
> > +   (const_string "none")))
> 
> Set the type attribute to sselog1 to automatically calculate memory
> attribute. Please see the definition of the attribute in i386.md.
Fixed.
> > +  "TARGET_AVX512DQ"
> > +  "vextract32x8\t{$0x1, %1, %0%{%3%}|%0%{%3%}, %1, 0x1}"
> > +  [(set_attr "type" "sselog")
> > +   (set_attr "prefix_extra" "1")
> > +   (set_attr "length_immediate" "1")
> > +   (set (attr "memory")
> > +  (if_then_else (match_test "MEM_P (operands[0])")
> > +   (const_string "store")
> > +   (const_string "none")))
> 
> Set the type to sselog1 and remove memory attribute calculation (as above).
Fixed.
> > +}
> > +   [(set_attr "type" "sselog")
> > +(set_attr "prefix_extra" "1")
> > +(set_attr "length_immediate" "1")
> > +(set_attr "memory" "none,store")
> 
> Set the type to sselog1 and remove memory attribute calculation.
Fixed.
> > -  "TARGET_AVX"
> > -  "vextract\t{$0x1, %1, %0|%0, %1, 0x1}"
> > +  "TARGET_AVX && (! || (TARGET_AVX512VL && TARGET_AVX512F))"
> 
> Please split the pattern to avoid too complex insn constraints.
Condition simplified.

Updated ChangeLog entry:
gcc/
* config/i386/i386.c
(ix86_expand_vector_extract): Handle V32HI and V64QI modes.
* config/i386/sse.md
(define_mode_iterator VI48F_256): New.
(define_mode_attr extract_type): Ditto.
(define_mode_attr extract_suf): Ditto.
(define_mode_iterator AVX512_VEC): Ditto.
(define_expand
"_vextract_mask"): Use
AVX512_VEC.
(define_insn "avx512dq_vextract64x2_1_maskm"): New.
(define_insn
"avx512dq_vextract64x2_1"):
Ditto.
(define_mode_attr extract_type_2): Ditto.
(define_mode_attr extract_suf_2): Ditto.
(define_mode_iterator AVX512_VEC_2): Ditto.
(define_expand
"_vextract_mask"): Use
AVX512_VEC_2 mode iterator.
(define_insn "vec_extract_hi__maskm"): Ditto.
(define_expand "avx512vl_vextractf128"): Ditto.
(define_insn_and_split "vec_extract_lo_"): Delete.
(define_insn "vec_extract_lo_"): New.
(define_split for V16FI mode): Ditto.
(define_insn_and_split "vec_extract_lo_"): Delete.
(define_insn "vec_extract_lo_"): New.
(define_split for VI8F_256 mode): Ditto.
(define_insn "vec_extract_hi_"): Add masking.
(define_insn_and_split "vec_extract_lo_"): Delete.
(define_insn "vec_extract_lo_"): New.
(define_split for VI4F_256 mode): Ditto.
(define_insn "vec_extract_lo__maskm"): Ditto.
(define_insn "vec_extract_hi__maskm"): Ditto.
(define_insn "vec_extract_hi_"): Add masking.
(define_mode_iterator VEC_EXTRACT_MODE): Add V64QI and V32HI modes.
(define_insn "vcvtph2ps"): Fix pattern condition.
(define_insn "avx512f_vextract32x4_1_maskm"): Ditto.
(define_insn 
"avx512f_vextract32x4_1"):
Update `type' attribute, remove explicit `memory' attribute calculation.

Is it ok for trunk?

--
Thanks, K

diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index 30120a5..ccfd47d 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -40979,6 +40979,32 @@ ix86_expand_vector_extract (bool mmx_ok, rtx target, 
rtx vec, int elt)
}
   break;

+case V32HImode:
+  if (TARGET_AVX512BW)
+   {
+ tmp = gen_reg_rtx (V16HImode);
+ if (elt < 16)
+   emit_insn (gen_vec_extract_lo_v32hi (tmp, vec));
+ else
+   emit_insn (gen_vec_extract_hi_v32hi (tmp, vec));
+ ix86_expand_vector_extract (false, target, tmp, elt & 15);
+ return;
+   }
+  break;
+
+case V64QImode:
+  if (TARGET_AVX512BW)
+   {
+ tmp = gen_reg_rtx (V32QImode);
+ if (elt < 32)
+   emit_insn (gen_vec_extract_lo_v64qi (tmp, vec));
+ else
+   emit_insn (gen_vec_extract_hi_v64qi (tmp, vec));
+ ix86_expand_vector_extract (false, target, tmp, elt & 31);
+ return;
+   }
+  break;
+
 case V16SFmode:
   tmp = gen_reg_rtx (V8SFmode);
   if (elt < 8)
diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index bd321fc..561fdbb 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -534,6 +534,7 @@
(V4DI "TARGET_AVX512VL") (V4DF "TARGET_AVX512VL")
(V4SI "TARGET_AVX512VL") (V4SF "TARGET_AVX512VL")
(V2DI "TARGET_AVX512VL") (V2DF "TARGET_AVX512VL")])
+(define_mode_iterator VI48F_256 [V8SI V8SF V4DI V4DF])

 ;; Mapping from float mode to required SSE level
 (define_mode_attr sse
@@ -6319,44 +6320,64 @@
   operands[1] = adjust_address (operands[1],

Re: [PATCH, i386, Pointer Bounds Checker 30/x] Size relocation

2014-09-16 Thread Uros Bizjak

On Tue, Sep 16, 2014 at 11:37 AM, Ilya Enkovich  wrote:
> 2014-09-16 13:08 GMT+04:00 Uros Bizjak :
>> Hello!
>>
>>> This patch adds size relocation support for i386 target.  Relocation is 
>>> used to compute bounds for static objects with incomplete type.
>>>
>>> Thanks,
>>> Ilya
>>> --
>>> gcc/
>>>
>>> 2014-06-11  Ilya Enkovich  
>>>
>>> * config/i386/i386.md (UNSPEC_SIZEOF): New.
>>> (move_size_reloc_si): New.
>>> (move_size_reloc_di): New.
>>> * config/i386/predicates.md (symbol_operand): New.
>>>
>>>
>>> diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
>>> index 32db2c6..5fd556b 100644
>>> --- a/gcc/config/i386/i386.md
>>> +++ b/gcc/config/i386/i386.md
>>> @@ -79,6 +79,7 @@
>>>UNSPEC_PLTOFF
>>>UNSPEC_MACHOPIC_OFFSET
>>>UNSPEC_PCREL
>>> +  UNSPEC_SIZEOF
>>>
>>>;; Prologue support
>>>UNSPEC_STACK_ALLOC
>>> @@ -18340,6 +18341,32 @@
>>>"bndstx\t{%2, %3|%3, %2}"
>>>[(set_attr "type" "mpxst")])
>>>
>>> +(define_insn "move_size_reloc_si"
>>> +  [(set (match_operand:SI 0 "register_operand" "=r")
>>> +   (unspec:SI
>>> +[(match_operand:SI 1 "symbol_operand")]
>>> +UNSPEC_SIZEOF))]
>>> +  "TARGET_MPX"
>>> +  "mov{l}\t{%1@SIZE, %0|%0, %1@SIZE}"
>>> +  [(set_attr "type" "imov")
>>> +   (set_attr "mode" "SI")])
>>> +
>>> +(define_insn "move_size_reloc_di"
>>> +  [(set (match_operand:DI 0 "register_operand" "=r")
>>> +   (unspec:DI
>>> +[(match_operand:DI 1 "symbol_operand")]
>>> +UNSPEC_SIZEOF))]
>>> +  "TARGET_64BIT && TARGET_MPX"
>>> +{
>>> +  if (ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_LARGE
>>> +  || ix86_cmodel == CM_MEDIUM_PIC || ix86_cmodel == CM_LARGE_PIC)
>>> +return "movabs{q}\t{%1@SIZE, %0|%0, %1@SIZE}";
>>
>> Can x86_64_immediate_operand predicate be used here?
>
> I think it cannot be used because of TLS symbols not counting as immediate.

OK, please introduce a new predicate, similar to
x86_64_immediate_operand, perhaps x86_64_immediate_size_operand, so we
can add some comments there. This will also help to macroize the insn,
x86_64_immediate_operand has !TARGET_64BIT shortcut for this case.

Uros.

[PATCH] Fix PR63266: Keep track of impact of sign extension in bswap

2014-09-16 Thread Thomas Preud'homme

Hi all,

The fix for PR61306 disabled bswap when a sign extension is detected. However 
this led to a test case regression (and potential performance regression) in 
case where a sign extension happens but its effect is canceled by other bit 
manipulation. This patch aims to fix that by having a special marker to track 
bytes whose value is unpredictable due to sign extension. If the final result 
of a bit manipulation doesn't contain any such marker then the bswap 
optimization can proceed.

*** gcc/ChangeLog ***

2014-09-15  Thomas Preud'homme  

PR tree-optimization/63266
* tree-ssa-math-opts.c (struct symbolic_number): Add comment about
marker for unknown byte value.
(MARKER_MASK): New macro.
(MARKER_BYTE_UNKNOWN): New macro.
(HEAD_MARKER): New macro.
(do_shift_rotate): Mark bytes with unknown values due to sign
extension when doing an arithmetic right shift. Replace hardcoded
mask for marker by new MARKER_MASK macro.
(find_bswap_or_nop_1): Likewise and adjust ORing of two symbolic
numbers accordingly.

*** gcc/testsuite/ChangeLog ***

2014-09-15  Thomas Preud'homme  

PR tree-optimization/63266
* gcc.dg/optimize-bswapsi-1.c (swap32_d): New bswap pass test.


Testing:

* Built an arm-none-eabi-gcc cross-compiler and used it to run the testsuite on 
QEMU emulating Cortex-M3 without any regression
* Bootstrapped on x86_64-linux-gnu target and testsuite was run without 
regression


Ok for trunk?

pr63266.1.1.diff
Description: Binary data

[PATCH][match-and-simplify] Make predicate IDs explicit

2014-09-16 Thread Richard Biener


The following patch makes the parser know known predicates via
us defining them in match.pd with a new (define_predicates ...)
construct.  This will allow (in a followup patch) to define
our own predicates via sth like

(match negate_expr_p
  (negate @0))
(match negate_expr_p
  INTEGER_CST@0
  (if (TYPE_OVERFLOW_WRAPS (type) || may_negate_without_overflow_p (@0
...

Committed.

Richard.

2014-09-16  Richard Biener  

* genmatch.c (id_base::id_kind): Add PREDICATE.
(struct predicate_id): New id_base variant.
(is_a_helper): Add predicate_id support.
(add_predicate): New function.
(struct predicate): Adjust.
(print_operand): Likewise.
(cmp_operand): Likewise.
(dt_operand::gen_predicate): Likewise.
(parse_op): Likewise.  Error out on unknown predicates.
(parse_predicates): New function.
(parse_pattern): Call it.
* match.pd: Define used predicates.

Index: gcc/genmatch.c
===
--- gcc/genmatch.c  (revision 215294)
+++ gcc/genmatch.c  (working copy)
@@ -132,7 +132,7 @@ END_BUILTINS
 
 struct id_base : typed_free_remove
 {
-  enum id_kind { CODE, FN, USER_DEFINED } kind;
+  enum id_kind { CODE, FN, PREDICATE, USER_DEFINED } kind;
 
   id_base (id_kind, const char *);
 
@@ -188,6 +188,11 @@ struct fn_id : public id_base
   enum built_in_function fn;
 };
 
+struct predicate_id : public id_base
+{
+  predicate_id (const char *id_) : id_base (id_base::PREDICATE, id_) {}
+};
+
 template<>
 template<>
 inline bool
@@ -204,6 +209,24 @@ is_a_helper ::test (id_ba
   return id->kind == id_base::CODE;
 }
 
+template<>
+template<>
+inline bool
+is_a_helper ::test (id_base *id)
+{
+  return id->kind == id_base::PREDICATE;
+}
+
+static void
+add_predicate (const char *id)
+{
+  predicate_id *p = new predicate_id (id);
+  id_base **slot = operators->find_slot_with_hash (p, p->hashval, INSERT);
+  if (*slot)
+fatal ("duplicate id definition");
+  *slot = p;
+}
+
 static void
 add_operator (enum tree_code code, const char *id,
  const char *tcc, unsigned nargs)
@@ -257,8 +280,8 @@ struct operand {
 
 struct predicate : public operand
 {
-  predicate (const char *ident_) : operand (OP_PREDICATE), ident (ident_) {}
-  const char *ident;
+  predicate (predicate_id *p_) : operand (OP_PREDICATE), p (p_) {}
+  predicate_id *p;
   virtual void gen_transform (FILE *, const char *, bool, int, const char *, 
dt_operand ** = 0)
 { gcc_unreachable (); }
 };
@@ -525,7 +548,7 @@ print_operand (operand *o, FILE *f = std
 }
 
   else if (predicate *p = dyn_cast (o))
-fprintf (f, "%s", p->ident);
+fprintf (f, "%s", p->p->id);
 
   else if (is_a (o))
 fprintf (f, "c_expr");
@@ -1116,9 +1139,9 @@ cmp_operand (operand *o1, operand *o2)
 
   if (o1->type == operand::OP_PREDICATE)
 {
-  predicate *p1 = static_cast(o1);
-  predicate *p2 = static_cast(o2);
-  return strcmp (p1->ident, p2->ident) == 0;
+  predicate *p1 = as_a(o1);
+  predicate *p2 = as_a(o2);
+  return p1->p == p2->p;
 }
   else if (o1->type == operand::OP_EXPR)
 {
@@ -1381,9 +1404,9 @@ dt_operand::gen_opname (char *name, unsi
 unsigned
 dt_operand::gen_predicate (FILE *f, const char *opname)
 {
-  predicate *p = static_cast (op);
+  predicate *p = as_a  (op);
 
-  fprintf (f, "if (%s (%s))\n", p->ident, opname);
+  fprintf (f, "if (%s (%s))\n", p->p->id, opname);
   fprintf (f, "{\n");
   return 1;
 }
@@ -2336,23 +2359,21 @@ parse_op (cpp_reader *r)
   if (token->type == CPP_NAME)
{
  const char *id = get_ident (r);
- /* We support zero-operand operator names as predicates.  */
  id_base *opr = get_operator (id);
- if (opr)
-   {
- if (operator_id *code = dyn_cast  (opr))
-   {
- if (code->nargs != 0)
-   fatal_at (token, "using an operator with operands as 
predicate");
- /* Parse the zero-operand operator "predicates" as
-expression.  */
- op = new expr (new e_operation (id));
-   }
- else
-   fatal_at (token, "using an unsupported operator as predicate");
+ if (!opr)
+   fatal_at (token, "expected predicate name");
+ if (operator_id *code = dyn_cast  (opr))
+   {
+ if (code->nargs != 0)
+   fatal_at (token, "using an operator with operands as 
predicate");
+ /* Parse the zero-operand operator "predicates" as
+expression.  */
+ op = new expr (new e_operation (id));
}
+ else if (predicate_id *p = dyn_cast  (opr))
+   op = new predicate (p);
  else
-   op = new predicate (id);
+   fatal_at (token, "using an unsupported operator as predicate");
  token = peek (r);
  if (token->flags & PREV_WHITE)

Re: [PATCH i386 AVX512] [41/n] Extend extract insn patterns.

2014-09-16 Thread Uros Bizjak

On Tue, Sep 16, 2014 at 12:10 PM, Kirill Yukhin  wrote:

> Updated ChangeLog entry:
> gcc/
> * config/i386/i386.c
> (ix86_expand_vector_extract): Handle V32HI and V64QI modes.
> * config/i386/sse.md
> (define_mode_iterator VI48F_256): New.
> (define_mode_attr extract_type): Ditto.
> (define_mode_attr extract_suf): Ditto.
> (define_mode_iterator AVX512_VEC): Ditto.
> (define_expand
> "_vextract_mask"): Use
> AVX512_VEC.
> (define_insn "avx512dq_vextract64x2_1_maskm"): New.
> (define_insn
> "avx512dq_vextract64x2_1"):
> Ditto.
> (define_mode_attr extract_type_2): Ditto.
> (define_mode_attr extract_suf_2): Ditto.
> (define_mode_iterator AVX512_VEC_2): Ditto.
> (define_expand
> "_vextract_mask"): Use
> AVX512_VEC_2 mode iterator.
> (define_insn "vec_extract_hi__maskm"): Ditto.
> (define_expand "avx512vl_vextractf128"): Ditto.
> (define_insn_and_split "vec_extract_lo_"): Delete.
> (define_insn "vec_extract_lo_"): New.
> (define_split for V16FI mode): Ditto.
> (define_insn_and_split "vec_extract_lo_"): Delete.
> (define_insn "vec_extract_lo_"): New.
> (define_split for VI8F_256 mode): Ditto.
> (define_insn "vec_extract_hi_"): Add masking.
> (define_insn_and_split "vec_extract_lo_"): Delete.
> (define_insn "vec_extract_lo_"): New.
> (define_split for VI4F_256 mode): Ditto.
> (define_insn "vec_extract_lo__maskm"): Ditto.
> (define_insn "vec_extract_hi__maskm"): Ditto.
> (define_insn "vec_extract_hi_"): Add masking.
> (define_mode_iterator VEC_EXTRACT_MODE): Add V64QI and V32HI modes.
> (define_insn "vcvtph2ps"): Fix pattern condition.
> (define_insn "avx512f_vextract32x4_1_maskm"): Ditto.
> (define_insn 
> "avx512f_vextract32x4_1"):
> Update `type' attribute, remove explicit `memory' attribute 
> calculation.
>
> Is it ok for trunk?

OK with a small change below.

> +(define_insn "vec_extract_lo__maskm"
> +  [(set (match_operand: 0 "memory_operand" "=m")
> +   (vec_merge:
> + (vec_select:
> +   (match_operand:VI4F_256 1 "register_operand" "v")
> +   (parallel [(const_int 0) (const_int 1)
> + (const_int 2) (const_int 3)]))
> + (match_operand: 2 "memory_operand" "0")
> + (match_operand:QI 3 "register_operand" "k")))]
> +  "TARGET_AVX512VL && TARGET_AVX512F"
> +  "vextract32x4\t{$0x0, %1, %0%{3%}|%0%{%3%}, %1, 0x0}"
> +  [(set_attr "type" "sselog")
> +   (set_attr "length_immediate" "1")
> +   (set_attr "prefix" "evex")
> +   (set_attr "mode" "")])

This pattern should probably match attributes (especially memory attr)
of vec_extract_hi__maskm below.

> +(define_insn "vec_extract_hi__maskm"
> +  [(set (match_operand: 0 "memory_operand" "=m")
> +   (vec_merge:
> + (vec_select:
> +   (match_operand:VI4F_256 1 "register_operand" "v")
> +   (parallel [(const_int 4) (const_int 5)
> + (const_int 6) (const_int 7)]))
> + (match_operand: 2 "memory_operand" "0")
> + (match_operand: 3 "register_operand" "k")))]
> +  "TARGET_AVX512F && TARGET_AVX512VL"
> +{
> +  return "vextract32x4\t{$0x1, %1, %0%{%3%}|%0%{%3%}, %1, 0x1}";
> +}
> +  [(set_attr "type" "sselog")
> +   (set_attr "prefix_extra" "1")
> +   (set_attr "length_immediate" "1")
> +   (set_attr "memory" "store")
> +   (set_attr "prefix" "evex")
> +   (set_attr "mode" "")])

Re: ptx preliminary address space fixes [1/4]

2014-09-16 Thread Bernd Schmidt


On 09/12/2014 01:48 PM, Richard Biener wrote:

On Fri, Sep 12, 2014 at 1:15 PM, Bernd Schmidt  wrote:

On 09/11/2014 01:29 PM, Richard Biener wrote:


+  if (TREE_CODE (type) == ARRAY_TYPE)
+TREE_TYPE (type) = apply_as_to_type (TREE_TYPE (type), as);

why is this necessary for ARRAY_TYPE but not for sth like
a RECORD_TYPE or a POINTER_TYPE?



Still testing whether I actually strictly need it for ARRAY_TYPE nowadays
(these patches are really old...). However, the TYPE_FIELDS of a RECORD_TYPE
seem to be mostly ignored once the frontends are done, but it's very easy
for other parts of the compiler to take the TREE_TYPE of an ARRAY_TYPE.
Fixing that up is simple and seems like a good thing to do for consistency
(I notice that maybe I should add VECTOR_TYPE).


Well, for an access a->b the COMPONENT_REF specifies the type
of the reference which uses the type of the FIELD_DECL...  IVOPTs
for example may produce

  ptr *p = &a->b;
  *p;

from that with ptr * built from TREE_TYPE of that expression.

Btw, a similar type as VECTOR_TYPE is COMPLEX_TYPE.


Ok, so testing seems to show that nothing breaks with the ARRAY_TYPE 
special case removed. However, I remembered another reason to do this, 
and it's for consistency with how address spaces are represented in 
other parts of the compiler - specifically, the C frontend.


C has the notion that arrays don't have type qualifiers, so to get the 
address space of an array you'd have to look at the address space of its 
element types. Joseph has in the past rejected patches to fix this 
inconsistency. For other types like structs or vectors (as we saw in the 
tree-vect patch) it's the outermost type that has the address space 
information.


I guess I'll declare myself agnostic, let me know whatever variant you 
want to have here (fixing up all types or not fixing arrays) and I'll 
make a new patch.



Bernd

Extract and insert merging patch

2014-09-16 Thread Petr Murzin

Hi,
This patch allows merging of extract and insert. Please have a look.

2014-09-16  Petr Murzin  

* simplify-rtx.c (simplify_ternary_operation): Allow extract and
insert merging.


extract_insert_patch
Description: Binary data

Re: Extract and insert merging patch

2014-09-16 Thread Marc Glisse


On Tue, 16 Sep 2014, Petr Murzin wrote:


Hi,
This patch allows merging of extract and insert. Please have a look.

2014-09-16  Petr Murzin  

* simplify-rtx.c (simplify_ternary_operation): Allow extract and
insert merging.


Hello,

no testcase?

You could probably generalize 0 and 1 to i and 1

Re: [PATCH] microblaze: microblaze.md: Use 'SI' instead of 'VOID' for operand 1 of 'call_value_intern'

2014-09-16 Thread Michael Eager


On 09/15/14 15:53, Chen Gang wrote:

On 09/15/2014 11:30 PM, Michael Eager wrote:



Configuring DejaGNU for cross-target testing requires creating a configuration
file describing the target board.



OK, thank you very much. And could you share your configuration file,
which I can reference to?


See attached.

--
Michael Eagerea...@eagercon.com
1960 Park Blvd., Palo Alto, CA 94306  650-325-8077
# Xilinx remote for MicroBlaze using XMD

#load_generic_config "monitor"
load_generic_config "xmd"

# Identify multilib flags using libgloss
process_multilib_options ""

# The default compiler for this target.
set_board_info compiler  "mb-gcc"

# We will be using the standard GDB remote protocol
set_board_info gdb_protocol "remote"

# Name of the computer whose socket will be used, if required.
#set_board_info sockethost "gdb"
set_board_info sockethost "localhost"

# Port ID to use for socket connection
set_board_info gdb,socketport "1234"

# Port for target
#set_board_info netport "gdb:1234"
set_board_info netport "localhost:1234"

# Use techniques appropriate to a stub (don't do "run" command)
set_board_info use_gdb_stub 1

# This gdbserver can only run a process once per session.
set_board_info gdb,do_reload_on_run 1

# There's no support for argument-passing (yet).
set_board_info noargs 1

# Can't do input (or output) in the current gdbserver.
set_board_info gdb,noinferiorio 1

# Can't do singnals
set_board_info gdb,nosignals 1

# Can't do hardware watchpoints, in general
set_board_info gdb,no_hardware_watchpoints 1

#set_board_info cflags  "[newlib_include_flags] [libgloss_include_flags]"

set_board_info cflags  "-mcpu=v4.00.b -mno-xl-soft-mul -mxl-barrel-shift"
set_board_info addl_link_flags "-L /home/eager/Xilinx/dg/microblaze_0/lib 
-Wl,-defsym -Wl,_HEAP_SIZE=0x100 -Wl,-defsym -Wl,_STACK_SIZE=0x8"
set_board_info ldscript "-T/home/eager/Xilinx/dg/microblaze_0/LinkScr.ld"
set_board_info ldflags  "[libgloss_link_flags] [newlib_link_flags] [board_info 
$board addl_link_flags]"
#  Use remote protocol to XMD server

load_config "monitor.exp"

proc set_host_info { entry value } {
global target_info board_info
verbose "set_host_info $entry $value" 3

set machine host
if [info exists target_info($machine,name)] {
set machine $target_info($machine,name)
}
set board_info($machine,$entry) $value
}

proc gdb_target_exec { } {
 send_gdb "cont"
#gdb_test "target exec" "No executable file now." "" ".*Kill it.*y or n.*" 
"y"
}

proc remote_reboot { host } {
}

Re: ptx preliminary address space fixes [1/4]

2014-09-16 Thread Richard Biener

On Tue, Sep 16, 2014 at 1:24 PM, Bernd Schmidt  wrote:
> On 09/12/2014 01:48 PM, Richard Biener wrote:
>>
>> On Fri, Sep 12, 2014 at 1:15 PM, Bernd Schmidt 
>> wrote:
>>>
>>> On 09/11/2014 01:29 PM, Richard Biener wrote:


 +  if (TREE_CODE (type) == ARRAY_TYPE)
 +TREE_TYPE (type) = apply_as_to_type (TREE_TYPE (type), as);

 why is this necessary for ARRAY_TYPE but not for sth like
 a RECORD_TYPE or a POINTER_TYPE?
>>>
>>>
>>>
>>> Still testing whether I actually strictly need it for ARRAY_TYPE nowadays
>>> (these patches are really old...). However, the TYPE_FIELDS of a
>>> RECORD_TYPE
>>> seem to be mostly ignored once the frontends are done, but it's very easy
>>> for other parts of the compiler to take the TREE_TYPE of an ARRAY_TYPE.
>>> Fixing that up is simple and seems like a good thing to do for
>>> consistency
>>> (I notice that maybe I should add VECTOR_TYPE).
>>
>>
>> Well, for an access a->b the COMPONENT_REF specifies the type
>> of the reference which uses the type of the FIELD_DECL...  IVOPTs
>> for example may produce
>>
>>   ptr *p = &a->b;
>>   *p;
>>
>> from that with ptr * built from TREE_TYPE of that expression.
>>
>> Btw, a similar type as VECTOR_TYPE is COMPLEX_TYPE.
>
>
> Ok, so testing seems to show that nothing breaks with the ARRAY_TYPE special
> case removed. However, I remembered another reason to do this, and it's for
> consistency with how address spaces are represented in other parts of the
> compiler - specifically, the C frontend.
>
> C has the notion that arrays don't have type qualifiers, so to get the
> address space of an array you'd have to look at the address space of its
> element types. Joseph has in the past rejected patches to fix this
> inconsistency. For other types like structs or vectors (as we saw in the
> tree-vect patch) it's the outermost type that has the address space
> information.
>
> I guess I'll declare myself agnostic, let me know whatever variant you want
> to have here (fixing up all types or not fixing arrays) and I'll make a new
> patch.

Hmm.  How is it with other compositive types like vectors and complex?
It's bad that the middle-end needs to follow a specific frontends need.
Why's the representation tied so closely together?

OTOH that address-spaces are "qualifiers" is an implementation detail
(and maybe not the very best).  So I don't see how the C frontend
needs to view them as qualifiers?

Joseph?

Thanks,
Richard.

>
> Bernd
>
>

Re: Fix pr61848, linux kernel miscompile

2014-09-16 Thread Joseph S. Myers

On Tue, 16 Sep 2014, Alan Modra wrote:

> gcc testsuite additions?  I decline.  It is too soon.  If you had read
> my patch submission you'll see that at some stage gcc was supposed to
> warn on conflicting section attributes, but hasn't done so for a very
> long time.  There needs to be some agreement on which direction we
> should go before I'm willing to spend even a small amount of time on
> the testsuite.  Also, a test for merging tls model attributes runs

The point of testsuite additions is to verify the visible changes in 
behavior intended to be caused by the patch (and, as applicable, that the 
behavior doesn't change in other related areas where it's not meant to 
change), rather than to test something that GCC doesn't do either before 
or after the patch.

If the lack of tests is because the patch is an RFC about what the desired 
behavior is, rather than an actual submission for inclusion, then it's 
helpful to say so in the patch submission.

> into the problem that this can only be done in a target independent
> way by looking at dumps, and the tls model dump is currently broken.

If there is a reason some aspect of the change can't readily be tested, 
that should be stated in the patch submission (along with examples of the 
affected code that can't readily be put into suitable form for the 
testsuite).

> Come to think of it, what if I decline to make any testsuite
> additions?  I'm asking because you're a steering committee member, and

Then the patch isn't ready for review.  Documentation and testcases are 
the first thing I look at when reviewing C front-end changes; the 
testcases are the primary evidence that the patch does what it's meant to 
do, and without them I won't generally try to review the code changes.

There's no requirement for test-driven development, but personally I 
prefer to write the documentation and tests before the rest of the patch 
(and make sure the tests do fail with the unmodified compiler, unless they 
are tests of related cases that already work but I want to make sure don't 
get broken) - though in the course of implementing the patch I expect to 
find other related cases that result in more tests being written, and to 
modify exactly what I expect from the tests I wrote earlier.

(I also find it a pain when backporting patches to packages that don't 
expect testcases as a norm for all patches if the author didn't include 
testsuite coverage with their patch, as it makes it much harder to tell if 
the backport is working properly.  Or if a problem was caused by a patch 
that was committed without testcases - again, it's hard to tell if a fix 
affects the fix to the original issue the patch was supposed to address.)

-- 
Joseph S. Myers
jos...@codesourcery.com

Re: [PATCH 1/2, x86] Add palignr support for AVX2.

2014-09-16 Thread Evgeny Stupachenko

PING 2

On Mon, Sep 8, 2014 at 2:03 PM, Evgeny Stupachenko  wrote:
> PING
>
> On Wed, Aug 27, 2014 at 7:50 PM, Evgeny Stupachenko  
> wrote:
>> The rotate insn appeared right after expand.
>> I've done it similar to define_insn_and_split "*avx_vperm_broadcast_".
>> I don't see any potential losses on splitting that after reload.
>>
>> On Tue, Aug 26, 2014 at 8:29 PM, Richard Henderson  wrote:
>>> On 08/26/2014 05:59 AM, Evgeny Stupachenko wrote:
 +(define_insn_and_split "avx2_rotate_perm"
 +  [(set (match_operand:V_256 0 "register_operand" "=&x")
 +  (vec_select:V_256
 +   (match_operand:V_256 1 "register_operand" "x")
 +   (match_parallel 2 "palignr_operand"
 + [(match_operand 3 "const_int_operand" "n")])))]
 +  "TARGET_AVX2"
 +  "#"
 +  "&& reload_completed"
 +  [(const_int 0)]
>>>
>>> Why are you waiting until after reload to expand this?  It's only the
>>> vec_select parallel that determines which direction the palignr should be 
>>> done.
>>>
>>> This seems like something you could do during permutation expansion.
>>>
>>>
>>> r~
>>>
>>>

[PATCH][match-and-simplify] User defined predicates

2014-09-16 Thread Richard Biener


The following adds the ability to write predicates using patterns
with an example following negate_expr_p which already has a
use in comparison folding (via its if c-expr).

The syntax is as follows:

(match negate_expr_p
 INTEGER_CST
 (if (TYPE_OVERFLOW_WRAPS (type)
  || may_negate_without_overflow_p (t
(match negate_expr_p
 (bit_not @0)
 (if (INTEGRAL_TYPE_P (type) && TYPE_OVERFLOW_WRAPS (type
(match negate_expr_p
 FIXED_CST)
(match negate_expr_p
 (negate @0))
...

that is, you write '(match ' instead of '(simplify' and then
follow with a pattern and optional conditionals.  There should
be no transform pattern (unchecked yet).  Multiple matches for
the same  simply add to what is recognized as .
The predicate is applied to a single 'tree' operand and looks
up SSA defs and utilizes the optional valueize hook.

Currently both GENERIC and GIMPLE variants result in name-mangling
and the proptotypes (unprototyped anywhere)

bool tree_negate_expr_p (tree t);
bool gimple_negate_expr_p (tree t, tree (*valueize)(tree) = NULL);

The predicate implementations simply use a separate decision tree.

I plan to follow this up with the ability to specify custom
operators like

(match widen_mult
  (mult (convert @0) (convert @1))
  (if (TYPE_PRECISION (TREE_TYPE (@0)) < TYPE_PRECISION (type))
(widen_mult @0 @1)))

and the ability to use that in

(simplify
  (convert (widen_mult @0 @1))
  (if (TYPE_PRECISION (TREE_TYPE (@0)) == TYPE_PRECISION (type))
(mult @0 @1)))

the generated APIs would then be sth like

bool gimple_wide_mult (tree t, tree *captures, tree (*valueize)(tree) = NULL);

thus receive @0 and @1 in the captures array.  This may help factoring
out stuff in the patterns itself and it addresses the desire to use
the pattern-matching engine from say tree-vect-patterns.c.

Bootstrap and regtest ongoing on x86_64-unknown-linux-gnu.

Comments?  (syntax bike-shedding?)

Thanks,
Richard.

2014-09-16  Richard Biener  

* genmatch.c (struct predicate_id): Add matchers member.
(add_predicate): Return added predicate.
(dt_node::gen_gimple_kids, dt_node::gen_generic_kids): Relocate
from ...
(dt_operand::gen_gimple_kids, dt_node::gen_generic_kids): ... here.
(check_no_user_id): Guard against NULL result.
(dt_operand::get_name): Handle NULL parent.
(dt_operand::gen_opname): Likewise.
(dt_operand::gen_predicate): Mangle user-defined predicates.
(dt_operand::gen_generic): Adjust.
(dt_operand::gen_gimple): Likewise.
(dt_simplify::gen): Guard against NULL result.
(write_predicate): New function.
(write_header): Only write header, not split out c-exprs.
(parse_simplify): Move identifier handling out, handle
"empty" result.
(parse_pattern): Parse (match  ...).  Drop optional
identifier from (simplify...).
(lower): New function, split out from ...
(main): ... here.  Generate code for all user-defined
predicates.
* match.pd (negate_expr_p): Implement as predicate.
* match-comparison.pd: Use negate_expr_p predicate.

Index: gcc/genmatch.c
===
--- gcc/genmatch.c  (revision 215297)
+++ gcc/genmatch.c  (working copy)
@@ -188,9 +188,13 @@ struct fn_id : public id_base
   enum built_in_function fn;
 };
 
+struct simplify;
+
 struct predicate_id : public id_base
 {
-  predicate_id (const char *id_) : id_base (id_base::PREDICATE, id_) {}
+  predicate_id (const char *id_)
+: id_base (id_base::PREDICATE, id_), matchers (vNULL) {}
+  vec matchers;
 };
 
 template<>
@@ -217,7 +221,7 @@ is_a_helper ::test (id_b
   return id->kind == id_base::PREDICATE;
 }
 
-static void
+static predicate_id * 
 add_predicate (const char *id)
 {
   predicate_id *p = new predicate_id (id);
@@ -225,6 +229,7 @@ add_predicate (const char *id)
   if (*slot)
 fatal ("duplicate id definition");
   *slot = p;
+  return p;
 }
 
 static void
@@ -461,6 +466,9 @@ struct dt_node
 
   virtual void gen_gimple (FILE *) {}
   virtual void gen_generic (FILE *) {}
+
+  void gen_gimple_kids (FILE *);
+  void gen_generic_kids (FILE *);
 };
 
 struct dt_operand: public dt_node
@@ -475,7 +483,7 @@ struct dt_operand: public dt_node
 
   virtual void gen_gimple (FILE *);
   virtual void gen_generic (FILE *);
-  unsigned gen_predicate (FILE *, const char *);
+  unsigned gen_predicate (FILE *, const char *, bool);
   unsigned gen_match_op (FILE *, const char *);
 
   unsigned gen_gimple_expr (FILE *);
@@ -483,9 +491,6 @@ struct dt_operand: public dt_node
 
   char *get_name (char *);
   void gen_opname (char *, unsigned);
-
-  void gen_gimple_kids (FILE *);
-  void gen_generic_kids (FILE *);
 };
 
 
@@ -901,7 +906,8 @@ void
 check_no_user_id (simplify *s)
 {
   check_no_user_id (s->match);
-  check_no_user_id (s->result);
+  if (s->result)
+check_no_user_id (s->result);
 }
 
 bool
@@ -1386,7 +1392,9 @@ dec

[PATCH] Fix PR63258

2014-09-16 Thread Richard Biener


We no longer even consider applying peeling for alignment on
gcc.dg/vect/costmodel/ppc/costmodel-slp-33.c, so the
cost model no longer triggers.

Committed.

Richard.

2014-09-16  Richard Biener  

PR testsuite/63258
* gcc.dg/vect/costmodel/ppc/costmodel-slp-33.c: Remove scan
for "vectorization not profitable".

Index: gcc/testsuite/gcc.dg/vect/costmodel/ppc/costmodel-slp-33.c
===
--- gcc/testsuite/gcc.dg/vect/costmodel/ppc/costmodel-slp-33.c  (revision 
215297)
+++ gcc/testsuite/gcc.dg/vect/costmodel/ppc/costmodel-slp-33.c  (working copy)
@@ -40,6 +40,5 @@ int main (void)
   return main1 ();
 } 
 
-/* { dg-final { scan-tree-dump-times "vectorization not profitable" 1 "vect" } 
} */
 /* { dg-final { scan-tree-dump-times "vectorized 1 loops" 0 "vect" } } */
 /* { dg-final { cleanup-tree-dump "vect" } } */

[RFC PATCH] -fsanitize=vptr instrumentation

2014-09-16 Thread Jakub Jelinek

Hi!

This patch attempts to instrument C++ when a pointer might not point
to object of the right dynamic type.

clang++ apparently instruments:
1) member accesses
2) method calls
3) downcasts (both pointer and reference downcasts)

The vptr-1.C (and from there derived -2 and -3 testcases) attempts to
model compiler-rt ubsan/TestCases/TypeCheck/vptr.cpp test.
1) and 2) are instrumented during genericization, 3) is instrumented
much earlier (build_static_cast_1), because afterwards it is not possible
to differentiate between reinterpret_cast, static_cast etc., but so that
it works with constexpr it is expanded only partially (just a builtin
function call) and the rest of instrumentation around it is added during
genericization; the builtin is ignored during constexpr evaluation.

The instrumentation works by computing a hash from the mangled type name
of the type we expect, and vtable pointer value in the object, this hash
is first used for a quick check in __ubsan_vptr_type_cache 128 element
hash table (if (__ubsan_vptr_type_cache[hash & 127] == hash), we don't
call anything and assume the access is fine), otherwise we call a library
function with the hash value, vtable pointer and rtti info what we are
looking for.

I've tried to optimize away checks for the most simple cases,
like when a member function is called on an object, not through a function
pointer (because then we know the vptr and the compiler should check that at
compile time), or when doing member access through this or member calls
through this (but only if it is to the actual type of the current class);
the this optimizations aren't done in ctors because the vptr is changing
there and the caller doesn't check for obvious reasons the vptr pointer
before calling ctor.

Attached are 3 testcases I'd like to discuss:

vptr-5.C is one Jason mailed me yesterday, clang++ doesn't instrument this
and g++ right now doesn't either, build_static_cast_1 certainly isn't called
in that case, and I must say I have no idea what should be checked there,
where etc.

vptr-6.C shows where the this optimization is performed and where it isn't
(clang++ has 10 instrumentations in T::h and 1 in S::l, g++ has fewer than
that, but not 0 in T::h (1 in S::l is right and needed I think)).

For downcasts, I'm testing the downcasted pointer for non-NULL (clang++ does
too), but not so for member access.  vptr-8.C testcase is me trying to
understand what is and what is not considered valid C++ for member accesses.
clang++ through -fsanitize=null already complains in f[1-6], g++ does
not (it will only instrument for null actual accesses, not mere taking of
address), but with -fsanitize=undefined (what matters is null,vptr)
clang++ complains in f1, complains in f2 and then segfaults on the vptr
instrumentation trying to read vtable pointer.  g++ doesn't complain on f1,
but complains on f2 and segfaults there too.
I guess my question is what exactly is strictly valid code (offsetof
really shouldn't be written that way I believe), and as f6 testcase shows,
often the pointer even isn't NULL when already taking address of a field
in dereference of a NULL pointer.
I hope all of f[1-6] is invalid, I really don't see how we could instrument
member accesses otherwise (we'd need to limit to not taking address of it);
NULL pointer shouldn't point at a valid object.

Thoughts, comments, suggestions for how to improve this?

2014-09-16  Jakub Jelinek  

* flag-types.h (enum sanitize_code): Add SANITIZE_VPTR,
include SANITIZE_VPTR in SANITIZE_UNDEFINED.
* opts.c (common_handle_option): Add -fsanitize=vptr.
* sanitizer.def (BUILT_IN_UBSAN_HANDLE_DYNAMIC_TYPE_CACHE_MISS,
BUILT_IN_UBSAN_HANDLE_DYNAMIC_TYPE_CACHE_MISS_ABORT): New.
* ubsan.h (enum ubsan_null_ckind): Add UBSAN_DOWNCAST_POINTER
and UBSAN_DOWNCAST_REFERENCE.
cp/
* config-lang.in (gtfiles): Add cp/cp-ubsan.c.
* cp-gimplify.c (cp_genericize_r): Call
cp_ubsan_maybe_instrument_member_call for member calls,
cp_ubsan_maybe_instrument_member_access for member accesses
and cp_ubsan_fixup_downcast_instrumentation for downcast
instrumentation.
* cp-tree.h (cp_ubsan_maybe_instrument_member_call,
cp_ubsan_maybe_instrument_member_access,
cp_ubsan_maybe_instrument_downcast,
cp_ubsan_fixup_downcast_instrumentation): New prototypes.
* cp-ubsan.c: New file.
* Make-lang.in (CXX_AND_OBJCXX_OBJS): Add cp/cp-ubsan.o.
* semantics.c (cxx_eval_call_expression): Handle 4 argument
BUILT_IN_UBSAN_HANDLE_DYNAMIC_TYPE_CACHE_MISS{,_ABORT}.
* typeck.c (build_class_member_access_expr): Provide locus
for COMPONENT_REFs.
(build_static_cast_1): Instrument downcasts.
gcc/testsuite/
* g++.dg/ubsan/vptr-1.C: New test.
* g++.dg/ubsan/vptr-2.C: New test.
* g++.dg/ubsan/vptr-3.C: New test.
* g++.dg/ubsan/vptr-4.C: New test.
* g++.dg/ubsan/vptr

Re: [GOOGLE] Fix LIPO COMDAT fixup and gcov-tool interactions

2014-09-16 Thread Teresa Johnson

On Mon, Sep 15, 2014 at 9:29 PM, Xinliang David Li  wrote:
> Is it necessary to declare zero_counts array at all?  Can a flag field
> be added to dyn_cgraph_node structure to indicate if it is fixed up?

The zero_counts array is used to pass info back to the caller in
libgcov-driver.cc (dyn_cgraph_node), which is where it is allocated.
That routine does not have access to the dyn-ipa cgraph.

Teresa

>
> David
>
> On Fri, Sep 12, 2014 at 4:31 PM, Teresa Johnson  wrote:
>> This patch addresses issues when running gcov-tool after performing
>> COMDAT fixup during dyn-ipa. Functions that were previously all zero
>> counts are marked, and the counts are discarded when being read in
>> by gcov-tool before recalculating module groups and summary info.
>>
>> While here, cleaned up the gcov-tool output (remove an overly-verbose output,
>> make all output consistently go to stderr).
>>
>> Passes regression tests and manual tests. Ok for google branches?
>>
>> 2014-09-12  Teresa Johnson  
>>
>> * gcc/coverage.c (read_counts_file): Handle new section.
>> * gcc/gcov.c (read_count_file): Ditto.
>> * gcc/gcov-dump.c (dump_gcov_file): Ditto.
>> (tag_function): Ditto.
>> (tag_zero_fixup): New function.
>> * gcc/gcov-io.c (gcov_read_comdat_zero_fixup): Ditto.
>> * gcc/gcov-io.h (gcov_read_comdat_zero_fixup): Ditto.
>> * libgcc/dyn-ipa.c (struct checksum_alias): Change flag to pointer.
>> (new_checksum_alias): Ditto.
>> (cfg_checksum_insert): Ditto.
>> (checksum_set_insert): Ditto.
>> (gcov_build_callgraph): New parameter.
>> (gcov_collect_imported_modules): Add assert for duplicate gcda reads.
>> (gcov_fixup_counters_checksum): Change flag to pointer to flag, set 
>> it.
>> (__gcov_compute_module_groups): New parameter.
>> * libgcc/libgcov-driver.c (set_gcov_fn_fixed_up): New function.
>> (get_gcov_fn_fixed_up): Ditto.
>> (gcov_exit_merge_gcda): Handle new section.
>> (gcov_write_comdat_zero_fixup): Ditto.
>> (gcov_write_build_info): Ditto.
>> (gcov_write_comdat_zero_fixup): New function.
>> (gcov_write_func_counters): Fix indent.
>> (gcov_dump_module_info): Write new flag section.
>> * libgcc/libgcov.h (gcov_get_counter): Clear fixed-up counters.
>> (gcov_get_counter_target): Ditto.
>> * libgcc/libgcov-util.c (tag_function): Annotate fixed-up functions,
>> remove overly verbose output.
>> (tag_counters): Clear fixed-up counters.
>> (lipo_process_substitute_string_1): Send all verbose output to 
>> stderr.
>> (tag_zero_fixup): New function.
>> (read_gcda_file): Deallocate flag array.
>> (gcov_profile_scale): Send all verbose output to stderr.
>> (gcov_profile_normalize): Ditto.
>>
>> Index: gcc/coverage.c
>> ===
>> --- gcc/coverage.c  (revision 215230)
>> +++ gcc/coverage.c  (working copy)
>> @@ -820,6 +820,14 @@ read_counts_file (const char *da_file_name, unsign
>>  free (build_info_strings[i]);
>>free (build_info_strings);
>>  }
>> +  else if (tag == GCOV_TAG_COMDAT_ZERO_FIXUP)
>> +{
>> +  /* Zero-profile fixup flags are not used by the compiler, read and
>> + ignore.  */
>> +  gcov_unsigned_t num_fn;
>> +  int *zero_fixup_flags = gcov_read_comdat_zero_fixup
>> (length, &num_fn);
>> +  free (zero_fixup_flags);
>> +}
>>else if (GCOV_TAG_IS_COUNTER (tag) && fn_ident)
>> {
>>   counts_entry_t **slot, *entry, elt;
>> Index: gcc/gcov.c
>> ===
>> --- gcc/gcov.c  (revision 215230)
>> +++ gcc/gcov.c  (working copy)
>> @@ -1441,6 +1441,12 @@ read_count_file (function_t *fns)
>>  free (build_info_strings[i]);
>>free (build_info_strings);
>>  }
>> +  else if (tag == GCOV_TAG_COMDAT_ZERO_FIXUP)
>> +{
>> +  gcov_unsigned_t num_fn;
>> +  int *zero_fixup_flags = gcov_read_comdat_zero_fixup
>> (length, &num_fn);
>> +  free (zero_fixup_flags);
>> +}
>>else if (tag == GCOV_TAG_FUNCTION && !length)
>> ; /* placeholder  */
>>else if (tag == GCOV_TAG_FUNCTION && length == 
>> GCOV_TAG_FUNCTION_LENGTH)
>> Index: gcc/gcov-dump.c
>> ===
>> --- gcc/gcov-dump.c (revision 215230)
>> +++ gcc/gcov-dump.c (working copy)
>> @@ -42,6 +42,7 @@ static void tag_summary (const char *, unsigned, u
>>  static void tag_module_info (const char *, unsigned, unsigned);
>>  static void dump_working_sets (const char *filename ATTRIBUTE_UNUSED,
>> const struct gcov_ctr_summary *summary);
>> +static void tag_zero_fixup (const char *, unsigned, unsigned);
>

[PINGv2][PATCH] Asan optimization for aligned accesses.

2014-09-16 Thread Marat Zakirov



On 09/10/2014 04:30 PM, Marat Zakirov wrote:

On 09/02/2014 07:09 PM, Marat Zakirov wrote:

Hi all!

Here's a simple optimization patch for Asan. It stores alignment 
information into ASAN_CHECK which is then extracted by sanopt to 
reduce number of "and 0x7" instructions for sufficiently aligned 
accesses. I checked it on linux kernel by comparing results of 
objdump -d -j .text vmlinux | grep "and.*0x7," for optimized and 
regular cases. It eliminates 12% of and 0x7's.


No regressions. Sanitized GCC was successfully Asan-bootstrapped. No 
false positives were found in kernel.


--Marat





gcc/ChangeLog:

2014-09-02  Marat Zakirov  

	* asan.c (build_check_stmt): Alignment arg was added.
	(asan_expand_check_ifn): Optimization for alignment >= 8.

gcc/testsuite/ChangeLog:

2014-09-02  Marat Zakirov  

	* c-c++-common/asan/red-align-1.c: New test.
	* c-c++-common/asan/red-align-2.c: New test.

diff --git a/gcc/asan.c b/gcc/asan.c
index 58e7719..aed5ede 100644
--- a/gcc/asan.c
+++ b/gcc/asan.c
@@ -1639,9 +1639,11 @@ build_check_stmt (location_t loc, tree base, tree len,
   if (end_instrumented)
 flags |= ASAN_CHECK_END_INSTRUMENTED;
 
-  g = gimple_build_call_internal (IFN_ASAN_CHECK, 3,
+  g = gimple_build_call_internal (IFN_ASAN_CHECK, 4,
   build_int_cst (integer_type_node, flags),
-  base, len);
+  base, len,
+  build_int_cst (integer_type_node,
+		 align/BITS_PER_UNIT));
   gimple_set_location (g, loc);
   if (before_p)
 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
@@ -2434,6 +2436,7 @@ asan_expand_check_ifn (gimple_stmt_iterator *iter, bool use_calls)
 
   tree base = gimple_call_arg (g, 1);
   tree len = gimple_call_arg (g, 2);
+  HOST_WIDE_INT align = tree_to_shwi (gimple_call_arg (g, 3));
 
   HOST_WIDE_INT size_in_bytes
 = is_scalar_access && tree_fits_shwi_p (len) ? tree_to_shwi (len) : -1;
@@ -2547,7 +2550,10 @@ asan_expand_check_ifn (gimple_stmt_iterator *iter, bool use_calls)
 	  gimple shadow_test = build_assign (NE_EXPR, shadow, 0);
 	  gimple_seq seq = NULL;
 	  gimple_seq_add_stmt (&seq, shadow_test);
-	  gimple_seq_add_stmt (&seq, build_assign (BIT_AND_EXPR, base_addr, 7));
+	  /* Aligned (>= 8 bytes) access do not need & 7.  */
+	  if (align < 8)
+	gimple_seq_add_stmt (&seq, build_assign (BIT_AND_EXPR,
+		 base_addr, 7));
 	  gimple_seq_add_stmt (&seq, build_type_cast (shadow_type,
 		  gimple_seq_last (seq)));
 	  if (real_size_in_bytes > 1)
diff --git a/gcc/internal-fn.def b/gcc/internal-fn.def
index 7ae60f3..54ade9f 100644
--- a/gcc/internal-fn.def
+++ b/gcc/internal-fn.def
@@ -55,4 +55,4 @@ DEF_INTERNAL_FN (UBSAN_CHECK_SUB, ECF_CONST | ECF_LEAF | ECF_NOTHROW, NULL)
 DEF_INTERNAL_FN (UBSAN_CHECK_MUL, ECF_CONST | ECF_LEAF | ECF_NOTHROW, NULL)
 DEF_INTERNAL_FN (ABNORMAL_DISPATCHER, ECF_NORETURN, NULL)
 DEF_INTERNAL_FN (BUILTIN_EXPECT, ECF_CONST | ECF_LEAF | ECF_NOTHROW, NULL)
-DEF_INTERNAL_FN (ASAN_CHECK, ECF_TM_PURE | ECF_LEAF | ECF_NOTHROW, ".W..")
+DEF_INTERNAL_FN (ASAN_CHECK, ECF_TM_PURE | ECF_LEAF | ECF_NOTHROW, ".W...")
diff --git a/gcc/testsuite/c-c++-common/asan/red-align-1.c b/gcc/testsuite/c-c++-common/asan/red-align-1.c
new file mode 100644
index 000..1edb3a2
--- /dev/null
+++ b/gcc/testsuite/c-c++-common/asan/red-align-1.c
@@ -0,0 +1,20 @@
+/* This tests aligment propagation to structure elem and
+   abcense of redudant & 7.  */
+
+/* { dg-options "-fdump-tree-sanopt" } */
+/* { dg-do compile } */
+/* { dg-skip-if "" { *-*-* } { "-flto" } { "" } } */
+
+struct st {
+  int a;
+  int b;
+  int c;
+} __attribute__((aligned(16)));
+
+int foo (struct st * s_p)
+{
+  return s_p->a;
+}
+
+/* { dg-final { scan-tree-dump-times "& 7" 0 "sanopt" } } */
+/* { dg-final { cleanup-tree-dump "sanopt" } } */
diff --git a/gcc/testsuite/c-c++-common/asan/red-align-2.c b/gcc/testsuite/c-c++-common/asan/red-align-2.c
new file mode 100644
index 000..161fe3c
--- /dev/null
+++ b/gcc/testsuite/c-c++-common/asan/red-align-2.c
@@ -0,0 +1,20 @@
+/* This tests aligment propagation to structure elem and
+   abcense of redudant & 7.  */
+
+/* { dg-options "-fdump-tree-sanopt" } */
+/* { dg-do compile } */
+/* { dg-skip-if "" { *-*-* } { "-flto" } { "" } } */
+
+struct st {
+  int a;
+  int b;
+  int c;
+} __attribute__((aligned(16)));
+
+int foo (struct st * s_p)
+{
+  return s_p->b;
+}
+
+/* { dg-final { scan-tree-dump-times "& 7" 1 "sanopt" } } */
+/* { dg-final { cleanup-tree-dump "sanopt" } } */

Re: [PATCH 1/2, x86] Add palignr support for AVX2.

2014-09-16 Thread H.J. Lu

On Tue, Sep 16, 2014 at 6:15 AM, Evgeny Stupachenko  wrote:
> PING 2
>
> On Mon, Sep 8, 2014 at 2:03 PM, Evgeny Stupachenko  wrote:
>> PING
>>
>> On Wed, Aug 27, 2014 at 7:50 PM, Evgeny Stupachenko  
>> wrote:
>>> The rotate insn appeared right after expand.
>>> I've done it similar to define_insn_and_split "*avx_vperm_broadcast_".
>>> I don't see any potential losses on splitting that after reload.
>>>
>>> On Tue, Aug 26, 2014 at 8:29 PM, Richard Henderson  wrote:
 On 08/26/2014 05:59 AM, Evgeny Stupachenko wrote:
> +(define_insn_and_split "avx2_rotate_perm"
> +  [(set (match_operand:V_256 0 "register_operand" "=&x")
> +  (vec_select:V_256
> +   (match_operand:V_256 1 "register_operand" "x")
> +   (match_parallel 2 "palignr_operand"
> + [(match_operand 3 "const_int_operand" "n")])))]
> +  "TARGET_AVX2"
> +  "#"
> +  "&& reload_completed"
> +  [(const_int 0)]

 Why are you waiting until after reload to expand this?  It's only the
 vec_select parallel that determines which direction the palignr should be 
 done.

 This seems like something you could do during permutation expansion.

Assuming your change is triggered today without any additional changes
 you should include some testcases.  For now, it doesn't show if it does
anything useful.

-- 
H.J.

Re: [GOOGLE] Fix LIPO COMDAT fixup and gcov-tool interactions

2014-09-16 Thread Xinliang David Li

 The zero_counts array is passed to gcov_build_callgraph but not used
until the dyn-cgraph is initialized. We should avoid increasing
runtime memory overhead by not creating it if possible.

David

On Tue, Sep 16, 2014 at 7:57 AM, Teresa Johnson  wrote:
> On Mon, Sep 15, 2014 at 9:29 PM, Xinliang David Li  wrote:
>> Is it necessary to declare zero_counts array at all?  Can a flag field
>> be added to dyn_cgraph_node structure to indicate if it is fixed up?
>
> The zero_counts array is used to pass info back to the caller in
> libgcov-driver.cc (dyn_cgraph_node), which is where it is allocated.
> That routine does not have access to the dyn-ipa cgraph.
>
> Teresa
>
>>
>> David
>>
>> On Fri, Sep 12, 2014 at 4:31 PM, Teresa Johnson  wrote:
>>> This patch addresses issues when running gcov-tool after performing
>>> COMDAT fixup during dyn-ipa. Functions that were previously all zero
>>> counts are marked, and the counts are discarded when being read in
>>> by gcov-tool before recalculating module groups and summary info.
>>>
>>> While here, cleaned up the gcov-tool output (remove an overly-verbose 
>>> output,
>>> make all output consistently go to stderr).
>>>
>>> Passes regression tests and manual tests. Ok for google branches?
>>>
>>> 2014-09-12  Teresa Johnson  
>>>
>>> * gcc/coverage.c (read_counts_file): Handle new section.
>>> * gcc/gcov.c (read_count_file): Ditto.
>>> * gcc/gcov-dump.c (dump_gcov_file): Ditto.
>>> (tag_function): Ditto.
>>> (tag_zero_fixup): New function.
>>> * gcc/gcov-io.c (gcov_read_comdat_zero_fixup): Ditto.
>>> * gcc/gcov-io.h (gcov_read_comdat_zero_fixup): Ditto.
>>> * libgcc/dyn-ipa.c (struct checksum_alias): Change flag to pointer.
>>> (new_checksum_alias): Ditto.
>>> (cfg_checksum_insert): Ditto.
>>> (checksum_set_insert): Ditto.
>>> (gcov_build_callgraph): New parameter.
>>> (gcov_collect_imported_modules): Add assert for duplicate gcda 
>>> reads.
>>> (gcov_fixup_counters_checksum): Change flag to pointer to flag, set 
>>> it.
>>> (__gcov_compute_module_groups): New parameter.
>>> * libgcc/libgcov-driver.c (set_gcov_fn_fixed_up): New function.
>>> (get_gcov_fn_fixed_up): Ditto.
>>> (gcov_exit_merge_gcda): Handle new section.
>>> (gcov_write_comdat_zero_fixup): Ditto.
>>> (gcov_write_build_info): Ditto.
>>> (gcov_write_comdat_zero_fixup): New function.
>>> (gcov_write_func_counters): Fix indent.
>>> (gcov_dump_module_info): Write new flag section.
>>> * libgcc/libgcov.h (gcov_get_counter): Clear fixed-up counters.
>>> (gcov_get_counter_target): Ditto.
>>> * libgcc/libgcov-util.c (tag_function): Annotate fixed-up functions,
>>> remove overly verbose output.
>>> (tag_counters): Clear fixed-up counters.
>>> (lipo_process_substitute_string_1): Send all verbose output to 
>>> stderr.
>>> (tag_zero_fixup): New function.
>>> (read_gcda_file): Deallocate flag array.
>>> (gcov_profile_scale): Send all verbose output to stderr.
>>> (gcov_profile_normalize): Ditto.
>>>
>>> Index: gcc/coverage.c
>>> ===
>>> --- gcc/coverage.c  (revision 215230)
>>> +++ gcc/coverage.c  (working copy)
>>> @@ -820,6 +820,14 @@ read_counts_file (const char *da_file_name, unsign
>>>  free (build_info_strings[i]);
>>>free (build_info_strings);
>>>  }
>>> +  else if (tag == GCOV_TAG_COMDAT_ZERO_FIXUP)
>>> +{
>>> +  /* Zero-profile fixup flags are not used by the compiler, read 
>>> and
>>> + ignore.  */
>>> +  gcov_unsigned_t num_fn;
>>> +  int *zero_fixup_flags = gcov_read_comdat_zero_fixup
>>> (length, &num_fn);
>>> +  free (zero_fixup_flags);
>>> +}
>>>else if (GCOV_TAG_IS_COUNTER (tag) && fn_ident)
>>> {
>>>   counts_entry_t **slot, *entry, elt;
>>> Index: gcc/gcov.c
>>> ===
>>> --- gcc/gcov.c  (revision 215230)
>>> +++ gcc/gcov.c  (working copy)
>>> @@ -1441,6 +1441,12 @@ read_count_file (function_t *fns)
>>>  free (build_info_strings[i]);
>>>free (build_info_strings);
>>>  }
>>> +  else if (tag == GCOV_TAG_COMDAT_ZERO_FIXUP)
>>> +{
>>> +  gcov_unsigned_t num_fn;
>>> +  int *zero_fixup_flags = gcov_read_comdat_zero_fixup
>>> (length, &num_fn);
>>> +  free (zero_fixup_flags);
>>> +}
>>>else if (tag == GCOV_TAG_FUNCTION && !length)
>>> ; /* placeholder  */
>>>else if (tag == GCOV_TAG_FUNCTION && length == 
>>> GCOV_TAG_FUNCTION_LENGTH)
>>> Index: gcc/gcov-dump.c
>>> ===
>>> --- gcc/gcov-dump.c (revision 215230)
>>> +++ gcc/gcov-dump.

Re: [PINGv2][PATCH] Asan optimization for aligned accesses.

2014-09-16 Thread Jakub Jelinek

On Tue, Sep 16, 2014 at 06:59:57PM +0400, Marat Zakirov wrote:
> --- a/gcc/asan.c
> +++ b/gcc/asan.c
> @@ -1639,9 +1639,11 @@ build_check_stmt (location_t loc, tree base, tree len,
>if (end_instrumented)
>  flags |= ASAN_CHECK_END_INSTRUMENTED;
>  
> -  g = gimple_build_call_internal (IFN_ASAN_CHECK, 3,
> +  g = gimple_build_call_internal (IFN_ASAN_CHECK, 4,
> build_int_cst (integer_type_node, flags),
> -   base, len);
> +   base, len,
> +   build_int_cst (integer_type_node,
> +  align/BITS_PER_UNIT));

Formatting.  Spaces should be around / (both before and after).

> --- /dev/null
> +++ b/gcc/testsuite/c-c++-common/asan/red-align-1.c
> @@ -0,0 +1,20 @@
> +/* This tests aligment propagation to structure elem and
> +   abcense of redudant & 7.  */

absence of redundant

> --- /dev/null
> +++ b/gcc/testsuite/c-c++-common/asan/red-align-2.c
> @@ -0,0 +1,20 @@
> +/* This tests aligment propagation to structure elem and
> +   abcense of redudant & 7.  */

Likewise.

Otherwise, LGTM.

Jakub

Re: [PATCH 2/2] Add some more test cases for fentry and pg

2014-09-16 Thread H.J. Lu

On Mon, Sep 15, 2014 at 8:59 AM, Andi Kleen  wrote:
> On Mon, Sep 15, 2014 at 11:29:50AM +0200, Richard Biener wrote:
>> On Fri, Sep 12, 2014 at 4:32 PM, Andi Kleen  wrote:
>> > From: Andi Kleen 
>> >
>> > Test fentry and no_instrument_function overriding.
>> >
>> > No test cases for the LTO test for now, as the LTO
>> > harness doesn't seem to support different flags for the final
>> > link.
>>
>> Sure it does - via dg-extra-ld-options (you have to negate compile-time
>> opts to "remove" them).
>
> Ok. I'll add test cases for that too then.
>
>>
>> Are you sure the tests are target independent enough?  That is,
>> 'mcount' is really 'mcount' on all targets?
>
> I'm not. I'll make them x86 only.
>
> Thanks,
>
> -Andi

Tested on Linux/x86-64.  I checked in this to fix:

https://gcc.gnu.org/ml/gcc-regression/2014-09/msg00084.html

-- 
H.J.
---
Index: ChangeLog
===
--- ChangeLog (revision 215300)
+++ ChangeLog (working copy)
@@ -1,3 +1,7 @@
+2014-09-16  H.J. Lu  
+
+ * gcc.dg/pg.c: Fix a typo.
+
 2014-09-16  Richard Biener  

  PR testsuite/63258
Index: gcc.dg/pg.c
===
--- gcc.dg/pg.c (revision 215300)
+++ gcc.dg/pg.c (working copy)
@@ -1,6 +1,6 @@
 /* Test -fprofile override */
 /* { dg-do compile } */
-/* { dg-options "-fprofile" { target { { i686-*-* x86_64-*-* } } } } */
+/* { dg-options "-fprofile" { target i?86-*-* x86_64-*-* } } */
 /* { dg-final { scan-assembler-not "mcount" } } */
 /* Origin: Andi Kleen */
 extern void foobar(const char *);

Re: [debug-early] reuse variable DIEs and fix their context

2014-09-16 Thread Aldy Hernandez


On 09/12/14 10:56, Jason Merrill wrote:

On 09/12/2014 01:48 PM, Aldy Hernandez wrote:

Unless I'm misunderstanding something, validate_phases() verifies that
the numbers add up by looking at the actual string name of the phase,
irregardless of if you timevar_push/pop'ed it:


Yes, but why wouldn't the numbers add up?  The comment for
timevar_push_1 says "No further elapsed time is attributed to the
previous topmost timing variable on the stack; subsequent elapsed time
is attributed to TIMEVAR, until it is popped or another element is
pushed on top."


As discussed on IRC, because even though a push will stop any timer on 
the stack, the timers throttled with timevar_{start,stop} do not live on 
the stack and will continue counting even if a nested timevar_{push,pop} 
happens, thus counting time twice.


You've suggested stopping the parsing and starting the deferred timer, 
which I've implemented here.


A few things: I didn't do anything with non C/C++ languages, which 
either don't care about TV_* timers, or don't implement TV_PHASE_DEFERRED.


Also, see ?? note in c_write_global_declarations_1.  I'm not sure how 
fine grained you want things.  If I were to keep track of 
TV_PHASE_DBGINFO here properly, it would start becoming a mess of nested 
phase timers that we'd have to manually keep track of.  This smells of 
overusing global variables.  I would prefer to use a 
timevar_push(*DBGINFO*) variant for dbginfo (non "phase" timer), but I'm 
also fine leaving things with the patch as is (modulo the comment).


Up to you sir.

OK for branch?

Aldy
commit c349dc8af009ac41ba1f86c3fde9052fb5282629
Author: Aldy Hernandez 
Date:   Tue Sep 16 08:38:10 2014 -0700

Resurrect TV_PHASE_DEFERRED.

diff --git a/gcc/c/c-decl.c b/gcc/c/c-decl.c
index a0a047f..89e3193 100644
--- a/gcc/c/c-decl.c
+++ b/gcc/c/c-decl.c
@@ -10306,11 +10306,16 @@ c_write_global_declarations_1 (tree globals)
 }
   while (reconsider);
 
+  /* ?? For completeness, we could stop the TV_PHASE_DEFERRED timer
+ here, and start the TV_PHASE_DBGINFO timer.  Is it worth it, or
+ would it convolute things?  */
   for (decl = globals; decl; decl = DECL_CHAIN (decl))
 {
   check_global_declaration_1 (decl);
   debug_hooks->early_global_decl (decl);
 }
+  /* ?? Similarly here. Stop TV_PHASE_DBGINFO and start
+ TV_PHASE_DEFERRED again.  */
 }
 
 /* Callback to collect a source_ref from a DECL.  */
@@ -10373,6 +10378,9 @@ c_parse_final_cleanups (void)
   if (pch_file)
 return;
 
+  timevar_stop (TV_PHASE_PARSING);
+  timevar_start (TV_PHASE_DEFERRED);
+
   /* Do the Objective-C stuff.  This is where all the Objective-C
  module stuff gets generated (symtab, class/protocol/selector
  lists etc).  */
@@ -10414,6 +10422,9 @@ c_parse_final_cleanups (void)
 c_write_global_declarations_1 (BLOCK_VARS (DECL_INITIAL (t)));
   c_write_global_declarations_1 (BLOCK_VARS (ext_block));
 
+  timevar_stop (TV_PHASE_DEFERRED);
+  timevar_start (TV_PHASE_PARSING);
+
   ext_block = NULL;
 }
 
diff --git a/gcc/cp/decl2.c b/gcc/cp/decl2.c
index 28bf6e4..64cd968 100644
--- a/gcc/cp/decl2.c
+++ b/gcc/cp/decl2.c
@@ -4354,6 +4354,9 @@ c_parse_final_cleanups (void)
 
   /* FIXME - huh?  was  input_line -= 1;*/
 
+  timevar_stop (TV_PHASE_PARSING);
+  timevar_start (TV_PHASE_DEFERRED);
+
   /* We now have to write out all the stuff we put off writing out.
  These include:
 
@@ -4671,6 +4674,9 @@ c_parse_final_cleanups (void)
   /* Collect candidates for Java hidden aliases.  */
   java_hidden_aliases = collect_candidates_for_java_method_aliases ();
 
+  timevar_stop (TV_PHASE_DEFERRED);
+  timevar_start (TV_PHASE_PARSING);
+
   if (flag_vtable_verify)
 {
   vtv_recover_class_info ();
@@ -4680,6 +4686,8 @@ c_parse_final_cleanups (void)
 
   /* Issue warnings about static, but not defined, functions, etc, and
  generate initial debug information.  */
+  timevar_stop (TV_PHASE_PARSING);
+  timevar_start (TV_PHASE_DBGINFO);
   walk_namespaces (emit_debug_for_namespace, 0);
   if (vec_safe_length (pending_statics) != 0)
 {
@@ -4689,7 +4697,8 @@ c_parse_final_cleanups (void)
  pending_statics->length (),
  EMIT_DEBUG_EARLY);
 }
-
+  timevar_stop (TV_PHASE_DBGINFO);
+  timevar_start (TV_PHASE_PARSING);
 }
 
 /* Perform any post compilation-proper cleanups for the C++ front-end.
diff --git a/gcc/langhooks.c b/gcc/langhooks.c
index dfc3ab6..49658d7 100644
--- a/gcc/langhooks.c
+++ b/gcc/langhooks.c
@@ -300,6 +300,8 @@ global_decl_processing_and_early_debug (void)
   tree globals, decl, *vec;
   int len, i;
 
+  timevar_stop (TV_PHASE_PARSING);
+  timevar_start (TV_PHASE_DEFERRED);
   /* Really define vars that have had only a tentative definition.
  Really output inline functions that must actually be callable
  and have not been output so far.  */
@@ -316,9 +318,13 @@ global_decl_processing_and_early_debug (void)
 
   wrapup_global_decla

RE: [PATCH] gcc parallel make check

2014-09-16 Thread VandeVondele Joost

>> > These numbers are useful to try and ensure the overhead (scaling factor) 
>> > is reasonable, thanks.
>>
>> A nice improvement indeed.  The patched result is 15 times faster
>> than the serial unpatched run.  So there is room for improvement
>
> Note, the box used was oldish AMD 16-core, no ht, box, haven't tried it on 
> anything

on a 32 core box, no ht, I see these timings:

time make -j32 -k check >& log.check32 ; time make -j8 -k check >& log.check8

real18m14.562s
user260m21.578s
sys 264m26.042s

real41m33.210s
user233m4.563s
sys 72m11.429s

so it is not quite reaching the ideal 4x speedup. Counting the number of 
'expect' processes they are nicely at around 32 and 8 for the full test, with 
only a very short tail near the end. So, there might be some overhead 
somewhere. Total user time is similar, but time in sys goes up.

[patch] allowing configure --target=e500v[12]-etc

2014-09-16 Thread Olivier Hainque

Hello,

The attached patch is something we have been using for a while
to facilitate the production of toolchains targeting the e500
series of powerpc's.

The general idea is to have e500 treated essentially as powerpc
except selecting a default with_cpu and using a specific "spe" os
extension when appropriate for proper ABI selection.

We find this useful to have triplets like e500v2-wrs-vxworks or
e500v2-wrs-linux work out of the box, and thought others might
find it useful as well.

OK to commit ?

Thanks in advance for your feedback,

Olivier


2014-09-16  Olivier Hainque  

toplevel/
* config.sub: Accept e500v[12] cpu names. Canonicalize to
powerpc and add a "spe" suffix to the os name when required
to select the proper ABI and not already there.

gcc/
* config.gcc: Select a default with_cpu=8540 for e500v1 and
a default with_cpu=8548 for e500v2.



e500v2-config.diff
Description: Binary data

[PATCHv3] Vimrc config with GNU formatting

2014-09-16 Thread Yury Gribov


Hi all,

This is the third version of the patch. A list of changes since last 
version:
* move config to contrib so that it's _not_ enabled by default (current 
score is 2/1 in favor of no Vim config by default)

* update Makefile.in to make .local.vimrc if developer asks for it
* disable autoformatting for flex files
* fix filtering of non-GNU sources (libsanitizer)
* added some small fixes in cinoptions based on feedback from community

As noted by Richard, the config does not do a good job of formatting 
unbound {} blocks e.g.

void
foo ()
{
  int x;
{
  // I'm an example of bad bad formatting
}
}
but it seems to be the best we can get with Vim's cindent
(and I don't think anyone seriously considers writing a custom indentexpr).

Ok to commit?

-Y
commit 67219512dac9a5cc14eea8f157222a226044dd72
Author: Yury Gribov 
Date:   Thu Sep 4 16:55:44 2014 +0400

2014-09-16  Laurynas Biveinis  
	Yury Gribov  

	Vim config with GNU formatting.

contrib/
	* vimrc: New file.

/
	* .gitignore: Added .local.vimrc.
	* Makefile.tpl (.local.vimrc): New target.
	* Makefile.in: Regenerate.

diff --git a/.gitignore b/.gitignore
index e9b56be..252b8b0 100644
--- a/.gitignore
+++ b/.gitignore
@@ -32,6 +32,8 @@ POTFILES
 TAGS
 TAGS.sub
 
+.local.vimrc
+
 .gdbinit
 .gdb_history
 
diff --git a/Makefile.in b/Makefile.in
index d6105b3..e573069 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -2384,6 +2384,11 @@ mail-report-with-warnings.log: warning.log
 	chmod +x $@
 	echo If you really want to send e-mail, run ./$@ now
 
+# Local Vim config
+
+vimrc:
+	(cd $(srcdir); $(LN_S) contrib/vimrc .local.vimrc)
+
 # Installation targets.
 
 .PHONY: install uninstall
diff --git a/Makefile.tpl b/Makefile.tpl
index f7c7e38..d050694 100644
--- a/Makefile.tpl
+++ b/Makefile.tpl
@@ -867,6 +867,11 @@ mail-report-with-warnings.log: warning.log
 	chmod +x $@
 	echo If you really want to send e-mail, run ./$@ now
 
+# Local Vim config
+
+vimrc:
+	(cd $(srcdir); $(LN_S) contrib/vimrc .local.vimrc)
+
 # Installation targets.
 
 .PHONY: install uninstall
diff --git a/contrib/vimrc b/contrib/vimrc
new file mode 100644
index 000..7287bd1
--- /dev/null
+++ b/contrib/vimrc
@@ -0,0 +1,43 @@
+" Code formatting settings for Vim.
+"
+" To enable this for GCC files by default, install thinca's localrc plugin
+" and do
+"   $ make .local.vimrc
+" Or if you dislike plugins, add autocmd in your .vimrc:
+"   :au BufNewFile,BufReadPost path/to/gcc/* :so path/to/gcc/.local.vimrc
+" Or just source file manually every time if you are masochist:
+"   :so .local.vimrc
+" 
+" Copyright (C) 2014 Free Software Foundation, Inc.
+"
+" This program is free software; you can redistribute it and/or modify
+" it under the terms of the GNU General Public License as published by
+" the Free Software Foundation; either version 3 of the License, or
+" (at your option) any later version.
+"
+" This program is distributed in the hope that it will be useful,
+" but WITHOUT ANY WARRANTY; without even the implied warranty of
+" MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+" GNU General Public License for more details.
+"
+" You should have received a copy of the GNU General Public License
+" along with this program.  If not, see .
+
+function! SetStyle()
+  let l:fname = expand("%:p")
+  if stridx(l:fname, 'libsanitizer') != -1
+return
+  endif
+  let l:ext = fnamemodify(l:fname, ":e")
+  let l:c_exts = ['c', 'h', 'cpp', 'cc', 'C', 'H', 'def', 'java']
+  if index(l:c_exts, l:ext) != -1
+setlocal cindent
+setlocal shiftwidth=2
+setlocal softtabstop=2
+setlocal cinoptions=>2s,n-s,{s,^-s,:s,=s,g0,f0,hs,p2s,t0,+s,(0,u0,w1,m0
+setlocal textwidth=79
+setlocal formatoptions-=ro formatoptions+=cql
+  endif
+endfunction
+
+call SetStyle()

[patch] flag .persistent.bss sections as bss

2014-09-16 Thread Olivier Hainque

Hello,

Some target loaders (on VxWorks 653 for example) handle the ".persistent.bss"
section as an area to reset during the initial startup of applications but not
during warm restarts.

We need these to be treated as bss sections by the compiler, in particular
to prevent the use of executable file space for them.

The attached patchlet does that by just augmenting the list of sections
recognized by name to be marked as SECTION_BSS by default_section_type_flags.

We have been using this for a while now, for Ada and its Persistent_BSS pragma
specifically.

Bootstrapped and regression tested on x86_64-linux.

OK to commit ?

Thanks in advance for your feedback,

With Kind Regards,

Olivier

2014-09-16  Olivier Hainque  

* varasm.c (default_section_type_flags): Flag .persistent.bss
sections as SECTION_BSS.




persistent-bss.diff
Description: Binary data

Re: [GOOGLE] Fix LIPO COMDAT fixup and gcov-tool interactions

2014-09-16 Thread Teresa Johnson

On Tue, Sep 16, 2014 at 8:07 AM, Xinliang David Li  wrote:
>  The zero_counts array is passed to gcov_build_callgraph but not used
> until the dyn-cgraph is initialized. We should avoid increasing
> runtime memory overhead by not creating it if possible.

We could delay creation a little bit, until the callgraph has been
initialized. But I am not sure this gains us much as it doesn't avoid
having the lifetime of the array overlap with the lifetime of the
callgraph. And unless fixups have been explicitly disabled or there
are no comdats we will need to allocate it (so likely in almost all
cases).

However, I can reduce the required overhead by using an array of chars
instead of ints. I measured the number of bytes in the array for a big
app and it was 5.6M with the int array and 1.5M with the char array.
Does this seem reasonable? To reduce it further I could encode as a
bitvector as we do in the gcda file.

Teresa

>
> David
>
> On Tue, Sep 16, 2014 at 7:57 AM, Teresa Johnson  wrote:
>> On Mon, Sep 15, 2014 at 9:29 PM, Xinliang David Li  
>> wrote:
>>> Is it necessary to declare zero_counts array at all?  Can a flag field
>>> be added to dyn_cgraph_node structure to indicate if it is fixed up?
>>
>> The zero_counts array is used to pass info back to the caller in
>> libgcov-driver.cc (dyn_cgraph_node), which is where it is allocated.
>> That routine does not have access to the dyn-ipa cgraph.
>>
>> Teresa
>>
>>>
>>> David
>>>
>>> On Fri, Sep 12, 2014 at 4:31 PM, Teresa Johnson  
>>> wrote:
 This patch addresses issues when running gcov-tool after performing
 COMDAT fixup during dyn-ipa. Functions that were previously all zero
 counts are marked, and the counts are discarded when being read in
 by gcov-tool before recalculating module groups and summary info.

 While here, cleaned up the gcov-tool output (remove an overly-verbose 
 output,
 make all output consistently go to stderr).

 Passes regression tests and manual tests. Ok for google branches?

 2014-09-12  Teresa Johnson  

 * gcc/coverage.c (read_counts_file): Handle new section.
 * gcc/gcov.c (read_count_file): Ditto.
 * gcc/gcov-dump.c (dump_gcov_file): Ditto.
 (tag_function): Ditto.
 (tag_zero_fixup): New function.
 * gcc/gcov-io.c (gcov_read_comdat_zero_fixup): Ditto.
 * gcc/gcov-io.h (gcov_read_comdat_zero_fixup): Ditto.
 * libgcc/dyn-ipa.c (struct checksum_alias): Change flag to pointer.
 (new_checksum_alias): Ditto.
 (cfg_checksum_insert): Ditto.
 (checksum_set_insert): Ditto.
 (gcov_build_callgraph): New parameter.
 (gcov_collect_imported_modules): Add assert for duplicate gcda 
 reads.
 (gcov_fixup_counters_checksum): Change flag to pointer to flag, 
 set it.
 (__gcov_compute_module_groups): New parameter.
 * libgcc/libgcov-driver.c (set_gcov_fn_fixed_up): New function.
 (get_gcov_fn_fixed_up): Ditto.
 (gcov_exit_merge_gcda): Handle new section.
 (gcov_write_comdat_zero_fixup): Ditto.
 (gcov_write_build_info): Ditto.
 (gcov_write_comdat_zero_fixup): New function.
 (gcov_write_func_counters): Fix indent.
 (gcov_dump_module_info): Write new flag section.
 * libgcc/libgcov.h (gcov_get_counter): Clear fixed-up counters.
 (gcov_get_counter_target): Ditto.
 * libgcc/libgcov-util.c (tag_function): Annotate fixed-up 
 functions,
 remove overly verbose output.
 (tag_counters): Clear fixed-up counters.
 (lipo_process_substitute_string_1): Send all verbose output to 
 stderr.
 (tag_zero_fixup): New function.
 (read_gcda_file): Deallocate flag array.
 (gcov_profile_scale): Send all verbose output to stderr.
 (gcov_profile_normalize): Ditto.

 Index: gcc/coverage.c
 ===
 --- gcc/coverage.c  (revision 215230)
 +++ gcc/coverage.c  (working copy)
 @@ -820,6 +820,14 @@ read_counts_file (const char *da_file_name, unsign
  free (build_info_strings[i]);
free (build_info_strings);
  }
 +  else if (tag == GCOV_TAG_COMDAT_ZERO_FIXUP)
 +{
 +  /* Zero-profile fixup flags are not used by the compiler, read 
 and
 + ignore.  */
 +  gcov_unsigned_t num_fn;
 +  int *zero_fixup_flags = gcov_read_comdat_zero_fixup
 (length, &num_fn);
 +  free (zero_fixup_flags);
 +}
else if (GCOV_TAG_IS_COUNTER (tag) && fn_ident)
 {
   counts_entry_t **slot, *entry, elt;
 Index: gcc/gcov.c
 ===

Re: [PATCH 2/2] Add some more test cases for fentry and pg

2014-09-16 Thread H.J. Lu

On Tue, Sep 16, 2014 at 8:36 AM, H.J. Lu  wrote:
> On Mon, Sep 15, 2014 at 8:59 AM, Andi Kleen  wrote:
>> On Mon, Sep 15, 2014 at 11:29:50AM +0200, Richard Biener wrote:
>>> On Fri, Sep 12, 2014 at 4:32 PM, Andi Kleen  wrote:
>>> > From: Andi Kleen 
>>> >
>>> > Test fentry and no_instrument_function overriding.
>>> >
>>> > No test cases for the LTO test for now, as the LTO
>>> > harness doesn't seem to support different flags for the final
>>> > link.
>>>
>>> Sure it does - via dg-extra-ld-options (you have to negate compile-time
>>> opts to "remove" them).
>>
>> Ok. I'll add test cases for that too then.
>>
>>>
>>> Are you sure the tests are target independent enough?  That is,
>>> 'mcount' is really 'mcount' on all targets?
>>
>> I'm not. I'll make them x86 only.
>>
>> Thanks,
>>
>> -Andi
>
> Tested on Linux/x86-64.  I checked in this to fix:
>
> https://gcc.gnu.org/ml/gcc-regression/2014-09/msg00084.html
>
> --
> H.J.
> ---
> Index: ChangeLog
> ===
> --- ChangeLog (revision 215300)
> +++ ChangeLog (working copy)
> @@ -1,3 +1,7 @@
> +2014-09-16  H.J. Lu  
> +
> + * gcc.dg/pg.c: Fix a typo.
> +
>  2014-09-16  Richard Biener  
>
>   PR testsuite/63258
> Index: gcc.dg/pg.c
> ===
> --- gcc.dg/pg.c (revision 215300)
> +++ gcc.dg/pg.c (working copy)
> @@ -1,6 +1,6 @@
>  /* Test -fprofile override */
>  /* { dg-do compile } */
> -/* { dg-options "-fprofile" { target { { i686-*-* x86_64-*-* } } } } */
> +/* { dg-options "-fprofile" { target i?86-*-* x86_64-*-* } } */
>  /* { dg-final { scan-assembler-not "mcount" } } */
>  /* Origin: Andi Kleen */
>  extern void foobar(const char *);

Here is another one:

Index: ChangeLog
===
--- ChangeLog (revision 215303)
+++ ChangeLog (working copy)
@@ -1,5 +1,9 @@
 2014-09-16  H.J. Lu  

+ * gcc.dg/pg-override.c: Fix a typo.
+
+2014-09-16  H.J. Lu  
+
  * gcc.dg/pg.c: Fix a typo.

 2014-09-16  Richard Biener  
Index: gcc.dg/pg-override.c
===
--- gcc.dg/pg-override.c (revision 215303)
+++ gcc.dg/pg-override.c (working copy)
@@ -1,6 +1,6 @@
 /* Test -fprofile override */
 /* { dg-do compile } */
-/* { dg-options "-fprofile" { target { { i686-*-* x86_64-*-* } } } } */
+/* { dg-options "-fprofile" { target i?86-*-* x86_64-*-* } } */
 /* { dg-final { scan-assembler-not "mcount" } } */
 /* Origin: Andi Kleen */
 extern void foobar(const char *);



-- 
H.J.

Re: Move unwind info to read-only section on AIX

2014-09-16 Thread David Edelsohn

On Thu, Sep 11, 2014 at 8:11 PM, Andrew Dixie  wrote:
> Hi David,
>
> As discussed, I have updated the to use SYMBOL-$ syntax for
> PC-relative addressing and to use a new symbol, __gcc_unwind_dbase,
> rather than overloading __dso_handle.
>
> Regards,
> Andrew
>
> 2014-09-09  Andrew Dixie  
>
>Move exception tables to read-only memory on AIX.
>* collect2.c (write_c_file_stat): Provide dbase on AIX.
>(scan_prog_file): Don't output __dso_handle nor __gcc_unwind_dbase.
>* config/rs6000/aix.h (ASM_PREFERRED_EH_DATA_FORMAT): define.
>(EH_TABLES_CAN_BE_READ_ONLY): define.
>(ASM_OUTPUT_DWARF_PCREL): define.
>(ASM_OUTPUT_DWARF_DATAREL): define.
>(EH_FRAME_IN_DATA_SECTION): undefine.
>* config/rs6000/rs6000-aix.c: new file.
>(rs6000_aix_asm_output_dwarf_pcrel): new function.
>(rs6000_aix_asm_output_dwarf_datarel): new function.
>* config/rs6000/rs6000.c (rs6000_xcoff_asm_init_sections): remove
>assignment of exception_section.
>* dwarf2asm.c (dw2_asm_output_encoded_addr_rtx): Add call to
>ASM_OUTPUT_DWARF_DATAREL.
>* dwarf2out.c (switch_to_eh_frame_section): Add call to
>EMIT_COLLECT2_LABELS_FOR_EH_FRAMES.

Hi, Andrew

Thanks for this work. The patch itself looks good. I cannot approve
the changes to dwarf2asm.c and dwarf2out.c.

My concern is this really is an ABI change.  One cannot mix object
files compiled with the old EH format and with the new EH format. One
can mix shared libraries of different styles, but not object files.
I'm not certain how much of a problem it is to require recompiling
everything from scratch.

Thanks, David

Re: [GOOGLE] Fix LIPO COMDAT fixup and gcov-tool interactions

2014-09-16 Thread Xinliang David Li

1.5M  increase is not big concern. Ok with that.

David

On Tue, Sep 16, 2014 at 10:51 AM, Teresa Johnson  wrote:
> On Tue, Sep 16, 2014 at 8:07 AM, Xinliang David Li  wrote:
>>  The zero_counts array is passed to gcov_build_callgraph but not used
>> until the dyn-cgraph is initialized. We should avoid increasing
>> runtime memory overhead by not creating it if possible.
>
> We could delay creation a little bit, until the callgraph has been
> initialized. But I am not sure this gains us much as it doesn't avoid
> having the lifetime of the array overlap with the lifetime of the
> callgraph. And unless fixups have been explicitly disabled or there
> are no comdats we will need to allocate it (so likely in almost all
> cases).
>
> However, I can reduce the required overhead by using an array of chars
> instead of ints. I measured the number of bytes in the array for a big
> app and it was 5.6M with the int array and 1.5M with the char array.
> Does this seem reasonable? To reduce it further I could encode as a
> bitvector as we do in the gcda file.
>
> Teresa
>
>>
>> David
>>
>> On Tue, Sep 16, 2014 at 7:57 AM, Teresa Johnson  wrote:
>>> On Mon, Sep 15, 2014 at 9:29 PM, Xinliang David Li  
>>> wrote:
 Is it necessary to declare zero_counts array at all?  Can a flag field
 be added to dyn_cgraph_node structure to indicate if it is fixed up?
>>>
>>> The zero_counts array is used to pass info back to the caller in
>>> libgcov-driver.cc (dyn_cgraph_node), which is where it is allocated.
>>> That routine does not have access to the dyn-ipa cgraph.
>>>
>>> Teresa
>>>

 David

 On Fri, Sep 12, 2014 at 4:31 PM, Teresa Johnson  
 wrote:
> This patch addresses issues when running gcov-tool after performing
> COMDAT fixup during dyn-ipa. Functions that were previously all zero
> counts are marked, and the counts are discarded when being read in
> by gcov-tool before recalculating module groups and summary info.
>
> While here, cleaned up the gcov-tool output (remove an overly-verbose 
> output,
> make all output consistently go to stderr).
>
> Passes regression tests and manual tests. Ok for google branches?
>
> 2014-09-12  Teresa Johnson  
>
> * gcc/coverage.c (read_counts_file): Handle new section.
> * gcc/gcov.c (read_count_file): Ditto.
> * gcc/gcov-dump.c (dump_gcov_file): Ditto.
> (tag_function): Ditto.
> (tag_zero_fixup): New function.
> * gcc/gcov-io.c (gcov_read_comdat_zero_fixup): Ditto.
> * gcc/gcov-io.h (gcov_read_comdat_zero_fixup): Ditto.
> * libgcc/dyn-ipa.c (struct checksum_alias): Change flag to 
> pointer.
> (new_checksum_alias): Ditto.
> (cfg_checksum_insert): Ditto.
> (checksum_set_insert): Ditto.
> (gcov_build_callgraph): New parameter.
> (gcov_collect_imported_modules): Add assert for duplicate gcda 
> reads.
> (gcov_fixup_counters_checksum): Change flag to pointer to flag, 
> set it.
> (__gcov_compute_module_groups): New parameter.
> * libgcc/libgcov-driver.c (set_gcov_fn_fixed_up): New function.
> (get_gcov_fn_fixed_up): Ditto.
> (gcov_exit_merge_gcda): Handle new section.
> (gcov_write_comdat_zero_fixup): Ditto.
> (gcov_write_build_info): Ditto.
> (gcov_write_comdat_zero_fixup): New function.
> (gcov_write_func_counters): Fix indent.
> (gcov_dump_module_info): Write new flag section.
> * libgcc/libgcov.h (gcov_get_counter): Clear fixed-up counters.
> (gcov_get_counter_target): Ditto.
> * libgcc/libgcov-util.c (tag_function): Annotate fixed-up 
> functions,
> remove overly verbose output.
> (tag_counters): Clear fixed-up counters.
> (lipo_process_substitute_string_1): Send all verbose output to 
> stderr.
> (tag_zero_fixup): New function.
> (read_gcda_file): Deallocate flag array.
> (gcov_profile_scale): Send all verbose output to stderr.
> (gcov_profile_normalize): Ditto.
>
> Index: gcc/coverage.c
> ===
> --- gcc/coverage.c  (revision 215230)
> +++ gcc/coverage.c  (working copy)
> @@ -820,6 +820,14 @@ read_counts_file (const char *da_file_name, unsign
>  free (build_info_strings[i]);
>free (build_info_strings);
>  }
> +  else if (tag == GCOV_TAG_COMDAT_ZERO_FIXUP)
> +{
> +  /* Zero-profile fixup flags are not used by the compiler, read 
> and
> + ignore.  */
> +  gcov_unsigned_t num_fn;
> +  int *zero_fixup_flags = gcov_read_comdat_zero_fixup
> (length, &num_fn);
> +  free (zero_f

Flatten function.h

2014-09-16 Thread Andrew MacLeod

Im currently focusing on flattening middle/backend files which are 
included from front-ends files so we can clean up the include interface 
a bit.


This flattens function.h.  It wasn't too bad, there were a few 
prototypes and defines in expr.h and rtl.h that belong in function.h, 
and a couple of other prototypes that belonged in other .h files.  A 
bunch of the gen*.c generated files actually use function.h.. so they 
needed some tweaking.


I did an include file reduction on all the language/*.[ch] and core 
*.[ch] files, but left the target files with the full complement of 7 
includes that function.h use to have.  Its probably easier when this is 
all done to fully reduce the targets one at a time... there are so many 
nooks and crannies I figured I'd bust something right now if i tried to 
do all the targets as well :-)


Before the final commit, I will do an include reduction on 
ada/gcc-interface/misc.c... my previous build didnt have Ada enabled, so 
I missed the reduction there.


Bootstraps on x86_64-unknown-linux-gnu with no new test regressions (one 
plugin test required adding an include). I've also run all the targets 
in  config-list.mk and the ones that built seem to build fine, but its 
still running.. I'll verify before checking in.


for the record, this patch relocates the following include to where ever 
function.h is included from.  Any plugins which include function.h may 
want to add these to the include list just before function.h


#include "hashtab.h"
#include "hash-set.h"
#include "vec.h"
#include "machmode.h"
#include "tm.h"
#include "hard-reg-set.h"
#include "input.h"

OK for trunk?

Andrew

	* function.h: Flatten file.  Remove includes, adjust prototypes to 
	reflect only what is in function.h.
	(enum direction, struct args_size, struct locate_and_pad_arg_data,
	ADD_PARM_SIZE, SUB_PARM_SIZE, ARGS_SIZE_TREE, ARGS_SIZE_RTX): Relocate
	from expr.h.
	(ASLK_REDUCE_ALIGN, ASLK_RECORD_PAD): Relocate from rtl.h.
	(optimize_function_for_size_p, optimize_function_for_speed_p): Move
	prototypes to predict.h.
	(init_varasm_status): Move prototype to varasm.h.

	* expr.h: Adjust include files.
	(enum direction, struct args_size, struct locate_and_pad_arg_data,
	ADD_PARM_SIZE, SUB_PARM_SIZE, ARGS_SIZE_TREE, ARGS_SIZE_RTX): Move
	to function.h.
	(locate_and_pad_parm): Move prototype to function.h.
	* rtl.h: (assign_stack_local, ASLK_REDUCE_ALIGN, ASLK_RECORD_PAD,
	assign_stack_local_1, assign_stack_temp, assign_stack_temp_for_type,
	assign_temp, reposition_prologue_and_epilogue_notes,
	prologue_epilogue_contains, sibcall_epilogue_contains,
	update_temp_slot_address, maybe_copy_prologue_epilogue_insn,
	set_return_jump_label): Move prototypes to function.h.
	* predict.h (optimize_function_for_size_p,
	optimize_function_for_speed_p): Relocate prototypes from function.h.
	* shrink-wrap.h (emit_return_into_block, active_insn_between,
	convert_jumps_to_returns, emit_return_for_exit): Move prototypes to
	function.h.
	* varasm.h (init_varasm_status): Relocate prototype from function.h.
	* genattrtab.c (write_header): Add predict.h to include list.
	* genconditions.c (write_header): Add predict.h to include list.
	* genemit.c (main): Adjust header file includes.
	* gengtype.c (ifiles): Add flattened function.h header files.
	* genoutput.c (output_prologue): Add predict.h to include list.
	* genpreds.c (write_insn_preds_c): Adjust header file includes.
	* genrecog.c (write_header): Add flattened function.h header files.

	* alias.c: Adjust include files.
	* auto-inc-dec.c: Likewise.
	* basic-block.h: Likewise.
	* bb-reorder.c: Likewise.
	* bt-load.c: Likewise.
	* builtins.c: Likewise.
	* caller-save.c: Likewise.
	* calls.c: Likewise.
	* cfgbuild.c: Likewise.
	* cfgcleanup.c: Likewise.
	* cfgexpand.c: Likewise.
	* cfgloop.c: Likewise.
	* cfgloop.h: Likewise.
	* cfgrtl.c: Likewise.
	* cgraph.h: Likewise.
	* cgraphclones.c: Likewise.
	* cgraphunit.c: Likewise.
	* combine-stack-adj.c: Likewise.
	* combine.c: Likewise.
	* coverage.c: Likewise.
	* cprop.c: Likewise.
	* cse.c: Likewise.
	* cselib.c: Likewise.
	* dbxout.c: Likewise.
	* ddg.c: Likewise.
	* df-core.c: Likewise.
	* df-problems.c: Likewise.
	* df-scan.c: Likewise.
	* dojump.c: Likewise.
	* dwarf2cfi.c: Likewise.
	* dwarf2out.c: Likewise.
	* emit-rtl.c: Likewise.
	* except.c: Likewise.
	* explow.c: Likewise.
	* expr.c: Likewise.
	* final.c: Likewise.
	* function.c: Likewise.
	* gcse.c: Likewise.
	* gimple-fold.c: Likewise.
	* gimple-low.c: Likewise.
	* gimple-streamer.h: Likewise.
	* haifa-sched.c: Likewise.
	* ifcvt.c: Likewise.
	* ira.c: Likewise.
	* jump.c: Likewise.
	* lcm.c: Likewise.
	* loop-invariant.c: Likewise.
	* lra-assigns.c: Likewise.
	* lra-coalesce.c: Likewise.
	* lra-constraints.c: Likewise.
	* lra-eliminations.c: Likewise.
	* lra-lives.c: Likewise.
	* lra-spills.c: Likewise.
	* lra.c: Likewise.
	* lto-cgraph.c: Likewise.
	* lto-section-in.c: Likewise.
	* lto-section-out.c: Likewise.
	* lto-streamer-in.c: Likewise.
	* lto-strea

Re: Extract and insert merging patch

2014-09-16 Thread Andrew Pinski

On Tue, Sep 16, 2014 at 4:40 AM, Petr Murzin  wrote:
> Hi,
> This patch allows merging of extract and insert. Please have a look.
>
> 2014-09-16  Petr Murzin  
>
> * simplify-rtx.c (simplify_ternary_operation): Allow extract and
> insert merging.

Besides no testcase.  Can your changelog mention vectors because I
thought from the description you were working on bits.

Thanks,
Andrew

Re: [PATCH][match-and-simplify] User defined predicates

2014-09-16 Thread Marc Glisse


On Tue, 16 Sep 2014, Richard Biener wrote:


The following adds the ability to write predicates using patterns
with an example following negate_expr_p which already has a
use in comparison folding (via its if c-expr).

The syntax is as follows:

(match negate_expr_p
INTEGER_CST
(if (TYPE_OVERFLOW_WRAPS (type)
 || may_negate_without_overflow_p (t
(match negate_expr_p
(bit_not @0)
(if (INTEGRAL_TYPE_P (type) && TYPE_OVERFLOW_WRAPS (type
(match negate_expr_p
FIXED_CST)
(match negate_expr_p
(negate @0))
...

that is, you write '(match ' instead of '(simplify' and then
follow with a pattern and optional conditionals.  There should
be no transform pattern (unchecked yet).  Multiple matches for
the same  simply add to what is recognized as .
The predicate is applied to a single 'tree' operand and looks
up SSA defs and utilizes the optional valueize hook.

Currently both GENERIC and GIMPLE variants result in name-mangling
and the proptotypes (unprototyped anywhere)

bool tree_negate_expr_p (tree t);
bool gimple_negate_expr_p (tree t, tree (*valueize)(tree) = NULL);


Ah, I haven't looked at the generated code, but I was expecting something 
roughly like:


struct matcher
{
  std::function valueize;
  bool negate_expr(tree);
  ...
};

where we can call negate_expr recursively without caring about passing 
valueize (if there are 2 matchers, one without a valueize function, 
negate_expr can be static in that version). Although recursive calls sound 
potentially slow, and having a thread_local counter in valueize to limit 
the call depth may not be ideal.


Please note that I am not at all saying the above is a good design, just 
dropping a random thought.


--
Marc Glisse

Re: ptx preliminary address space fixes [1/4]

2014-09-16 Thread Joseph S. Myers

On Tue, 16 Sep 2014, Richard Biener wrote:

> Hmm.  How is it with other compositive types like vectors and complex?
> It's bad that the middle-end needs to follow a specific frontends need.
> Why's the representation tied so closely together?

Complex types aren't derived types in C terms; they don't have an element 
type, but a corresponding real type.  Vectors should presumably be treated 
like complex types.  So both can have qualifiers.

> OTOH that address-spaces are "qualifiers" is an implementation detail
> (and maybe not the very best).  So I don't see how the C frontend
> needs to view them as qualifiers?

It's not an implementation detail, it's how TR 18037 defines them, and 
thus how the C front end should represent them in order to follow the 
requirements of TR 18037.

If something different is appropriate on GIMPLE, when GIMPLE gets its own 
type system independent of trees then the lowering could of course change 
this sort of thing.

(I think the fixed-point support, also from TR 18037, would better be 
implemented through lowering from fixed-point types at front-end level to 
special (e.g. saturating) operations on normal types and modes, rather 
than carrying a load of special types and modes through to the back end.)

-- 
Joseph S. Myers
jos...@codesourcery.com

Re: [patch] allowing configure --target=e500v[12]-etc

2014-09-16 Thread Joseph S. Myers

On Tue, 16 Sep 2014, Olivier Hainque wrote:

> 2014-09-16  Olivier Hainque  
> 
>   toplevel/
>   * config.sub: Accept e500v[12] cpu names. Canonicalize to
>   powerpc and add a "spe" suffix to the os name when required
>   to select the proper ABI and not already there.

config.sub patches have to go to config-patches first, we only ever import 
the latest unmodified config.sub and config.guess from config.git, without 
making local changes.

-- 
Joseph S. Myers
jos...@codesourcery.com

Re: Move unwind info to read-only section on AIX

2014-09-16 Thread Andrew Dixie

Hi David,

On AIX, mixing objects from different GCC versions should work as long
as the newest GCC is used for linking.  I tested a library with some
objects compiled by GCC-4.4 and some objects compiled by a patched
GCC-5.0.  Exceptions passed through the mixed objects without issue.
Did you have a specific example or reason to think it wouldn't work?

If one attempts to mix C++ objects from GCC-3.3 and GCC-4.4 on Linux
or Solaris there are link time errors.  If you're worried about the
risk of runtime problems, would you rather have a deliberate link time
error on AIX?

Regards,
Andrew

On Wed, Sep 17, 2014 at 6:37 AM, David Edelsohn  wrote:
> My concern is this really is an ABI change.  One cannot mix object
> files compiled with the old EH format and with the new EH format. One
> can mix shared libraries of different styles, but not object files.
> I'm not certain how much of a problem it is to require recompiling
> everything from scratch.

Re: [PATCH] PowerPC: Implement TARGET_ATOMIC_ASSIGN_EXPAND_FENV

2014-09-16 Thread David Edelsohn

Hi, Adhemerval

I cornered Honza during his visit to IBM Research to help me
understand my concerns with the function.

The code for *hold does:

+  tree fenv_var = create_tmp_var (double_type_node, NULL);
+
+  tree hold_mffs = build2 (MODIFY_EXPR, void_type_node, fenv_var, call_mffs);
+
+  tree fenv_llu = build1 (VIEW_CONVERT_EXPR, uint64_type_node, fenv_var);

The code for *clear does:

+  tree fenv_clear = create_tmp_var (double_type_node, NULL);
+
+  tree clear_mffs = build2 (MODIFY_EXPR, void_type_node, fenv_clear,
call_mffs);
+
+  tree fenv_clean_llu = build1 (VIEW_CONVERT_EXPR, uint64_type_node, fenv_var);

Note that *clear creates fenv_clear, but uses fenv_var that was
created earlier for *hold (probably should have been fenv_hold) or
something to distinguish it.

The code for *update does:

+  tree old_fenv = create_tmp_var (double_type_node, NULL);
+  tree update_mffs = build2 (MODIFY_EXPR, void_type_node, old_fenv, call_mffs);
+
+  tree old_llu = build1 (VIEW_CONVERT_EXPR, uint64_type_node, update_mffs);

But it never actually uses old_fenv that it obtained from the call to
call_mffs().

The current implementation is trying to follow the punning and
conversions in the C code, but, according to Honza, it does not need
to.  It should not need the temporary variable nor the MODIFY_EXPR.
The implementation of *update shows that the temporary really is not
necessary because it (accidentally) is not referenced and not used.

The code for *hold and *clear should be converted to the style of
*update, without the temporary. The later instruction selection and
register allocation in GCC should handle the conversion between
double_type and uint64_type through the VIEW_CONVERT_EXPR without an
explicit temporary. The call to mffs can be inserted directly into the
rest of the tree being built (creating a large and ugly tree).

Then the final COMPOUND_EXPR is not needed in any of the cases, as it
already is omitted in the *update case. The accidental omission of a
reference to old_fenv is what allowed it to be omitted from the
*update case, which prompted my original question.

Thanks, David

Re: ptx preliminary address space fixes [1/4]

2014-09-16 Thread Bernd Schmidt


On 09/16/2014 10:56 PM, Joseph S. Myers wrote:

On Tue, 16 Sep 2014, Richard Biener wrote:


Hmm.  How is it with other compositive types like vectors and complex?
It's bad that the middle-end needs to follow a specific frontends need.
Why's the representation tied so closely together?


Complex types aren't derived types in C terms; they don't have an element
type, but a corresponding real type.  Vectors should presumably be treated
like complex types.  So both can have qualifiers.


OTOH that address-spaces are "qualifiers" is an implementation detail
(and maybe not the very best).  So I don't see how the C frontend
needs to view them as qualifiers?


It's not an implementation detail, it's how TR 18037 defines them, and
thus how the C front end should represent them in order to follow the
requirements of TR 18037.


My position is that standards do not mandate how our internal data 
structures should look like, and we should be striving to make them 
consistent. That means building array types in such a way that address 
spaces (and probably things like constness) are identical between the 
array type and its element type. It would be easy enough to make a 
c_type_quals wrapper function around TYPE_QUALS that returns zero for 
array types, but I would not expect there to really be a need for it.



Bernd

Re: Move unwind info to read-only section on AIX

2014-09-16 Thread David Edelsohn

Hi, Andrew

I can understand the requirement of linking with the newer version GCC
to provide the dbase symbol. I'm just trying to understand how EH will
work when some objects have EH with absolute references in the data
section and some have relative references in the text section. I guess
the information is completely local to the object and only has to be
self-consistent within the object.

I want to make sure that there are no additional problems of a throw()
passing through objects that change the location of the EH
information.

Thanks, David

On Tue, Sep 16, 2014 at 5:04 PM, Andrew Dixie  wrote:
> Hi David,
>
> On AIX, mixing objects from different GCC versions should work as long
> as the newest GCC is used for linking.  I tested a library with some
> objects compiled by GCC-4.4 and some objects compiled by a patched
> GCC-5.0.  Exceptions passed through the mixed objects without issue.
> Did you have a specific example or reason to think it wouldn't work?
>
> If one attempts to mix C++ objects from GCC-3.3 and GCC-4.4 on Linux
> or Solaris there are link time errors.  If you're worried about the
> risk of runtime problems, would you rather have a deliberate link time
> error on AIX?
>
> Regards,
> Andrew
>
> On Wed, Sep 17, 2014 at 6:37 AM, David Edelsohn  wrote:
>> My concern is this really is an ABI change.  One cannot mix object
>> files compiled with the old EH format and with the new EH format. One
>> can mix shared libraries of different styles, but not object files.
>> I'm not certain how much of a problem it is to require recompiling
>> everything from scratch.

Re: Flatten function.h

2014-09-16 Thread Joseph S. Myers

On Tue, 16 Sep 2014, Andrew MacLeod wrote:

> I did an include file reduction on all the language/*.[ch] and core *.[ch]
> files, but left the target files with the full complement of 7 includes that
> function.h use to have.  Its probably easier when this is all done to fully
> reduce the targets one at a time... there are so many nooks and crannies I
> figured I'd bust something right now if i tried to do all the targets as well
> :-)

How did you determine what includes to remove?  You appear to have removed 
tm.h includes from various files that do in fact use target macros; maybe 
they get it indirectly included by some other header, but I thought a 
principle of this flattening was to avoid relying on such indirect 
inclusions.  Because of possible use of target macros in #ifdef 
conditionals, "compiles with the include removed" is not a sufficient 
condition for removing it.

cfgrtl.c
gimple-fold.c
mode-switching.c
tree-inline.c
vmsdbgout.c
fortran/f95-lang.c
fortran/trans-decl.c
objc/objc-act.c

-- 
Joseph S. Myers
jos...@codesourcery.com

Re: ptx preliminary address space fixes [1/4]

2014-09-16 Thread Joseph S. Myers

On Tue, 16 Sep 2014, Bernd Schmidt wrote:

> > It's not an implementation detail, it's how TR 18037 defines them, and
> > thus how the C front end should represent them in order to follow the
> > requirements of TR 18037.
> 
> My position is that standards do not mandate how our internal data structures
> should look like, and we should be striving to make them consistent. That

My position is that the structures in the front end should correspond to 
how the language is actually defined, so that the most obvious way of 
accessing some property of an entity in the front end actually gets that 
property as it is defined in the standard, and not something similar but 
confusingly different defined by GCC.  It's the job of genericizing / 
gimplifying to convert from structures that closely correspond to the 
source program and the language standard into ones that are more 
convenient for language-independent processing and code generation.

(That TYPE_MAIN_VARIANT maps an array of qualified type to an array of 
corresponding unqualified type necessitates lots of special cases in the 
front end to avoid applying TYPE_MAIN_VARIANT to array types, since in C 
terms array types are always unqualified and are unrelated to an array of 
corresponding unqualified element type.)

-- 
Joseph S. Myers
jos...@codesourcery.com

Re: Flatten function.h

2014-09-16 Thread Andrew MacLeod


On 09/16/2014 05:12 PM, Joseph S. Myers wrote:

On Tue, 16 Sep 2014, Andrew MacLeod wrote:


I did an include file reduction on all the language/*.[ch] and core *.[ch]
files, but left the target files with the full complement of 7 includes that
function.h use to have.  Its probably easier when this is all done to fully
reduce the targets one at a time... there are so many nooks and crannies I
figured I'd bust something right now if i tried to do all the targets as well
:-)

How did you determine what includes to remove?  You appear to have removed
tm.h includes from various files that do in fact use target macros; maybe
they get it indirectly included by some other header, but I thought a
principle of this flattening was to avoid relying on such indirect
inclusions.  Because of possible use of target macros in #ifdef
conditionals, "compiles with the include removed" is not a sufficient
condition for removing it.

cfgrtl.c
gimple-fold.c
mode-switching.c
tree-inline.c
vmsdbgout.c
fortran/f95-lang.c
fortran/trans-decl.c
objc/objc-act.c

Many of those files do in fact get numerous include files from expr.h, 
which are likely to get put back in when expr.h is flattened,  but there 
is a risk as you point out.


Perhaps I should proceed by simply moving the includes and removing any 
duplicate includes,  leaving the reduction for later date. There is less 
chance of that causing issues.  I did forget about the discussion last 
year concerning target macros from the RTL end of things... My mind is 
slowly going :-).




Andrew

Re: [PATCH v2 AArch64]: Re: [PATCH AArch64]: Add constraint letter for stack_protect_test pattern.

2014-09-16 Thread Andrew Pinski

On Thu, Sep 4, 2014 at 1:18 AM, James Greenhalgh
 wrote:
> On Thu, Sep 04, 2014 at 08:42:31AM +0100, Venkataramanan Kumar wrote:
>> Hi maintainers,
>>
>> I just added "=r" and retested it.
>
> I had a very similar patch to this sitting in my local tree. However,
> I am surprised you have left operand 3 as an output operand. In my tree
> I had marked operand 3 as "&r".
>
> What do you think?

The clobber needs to be "=&r" as you are writing to the register and
not just reading from it.  I think this is causing some issues
including linaro bugzilla #667
(https://bugs.linaro.org/show_bug.cgi?id=667).

Thanks,
Andrew Pinski


>
> Thanks,
> James
>
>> gcc/ChangeLog
>>
>> 2014-09-04 Venkataramanan Kumar 
>>
>>* config/aarch64/aarch64.md (stack_protect_test_) Add register
>>constraint for operand 0.
>>
>> diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
>> index b5be79c..ed6e602 100644
>> --- a/gcc/config/aarch64/aarch64.md
>> +++ b/gcc/config/aarch64/aarch64.md
>> @@ -4026,7 +4026,7 @@
>>  })
>>
>>  (define_insn "stack_protect_test_"
>> -  [(set (match_operand:PTR 0 "register_operand")
>> +  [(set (match_operand:PTR 0 "register_operand" "=r")
>> (unspec:PTR [(match_operand:PTR 1 "memory_operand" "m")
>>  (match_operand:PTR 2 "memory_operand" "m")]
>>  UNSPEC_SP_TEST))
>>
>> regards,
>> venkat.
>>
>> On 4 September 2014 12:42, Venkataramanan Kumar
>>  wrote:
>> > Hi Maintainers,
>> >
>> > Below patch adds register constraint "r" for destination operand in
>> > "stack_protect_test" pattern.
>> >
>> > We need a general register here and adding "r" will avoid vector
>> > register getting allocated.
>> >
>> > regression tested on aarch64-unknown-linux-gnu.
>> >
>> > Ok for trunk?
>> >
>> > regards,
>> > Venkat.
>> >
>> >
>> > gcc/ChangeLog
>> >
>> > 2014-09-04 Venkataramanan Kumar 
>> >
>> >* config/aarch64/aarch64.md (stack_protect_test_) Add register
>> >constraint for operand 0.
>> >
>> >
>> > diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
>> > index b5be79c..77588b9 100644
>> > --- a/gcc/config/aarch64/aarch64.md
>> > +++ b/gcc/config/aarch64/aarch64.md
>> > @@ -4026,7 +4026,7 @@
>> >  })
>> >
>> >  (define_insn "stack_protect_test_"
>> > -  [(set (match_operand:PTR 0 "register_operand")
>> > + [(set (match_operand:PTR 0 "register_operand" "r")
>> > (unspec:PTR [(match_operand:PTR 1 "memory_operand" "m")
>> >  (match_operand:PTR 2 "memory_operand" "m")]
>> >  UNSPEC_SP_TEST))
>>

Remove LIBGCC2_TF_CEXT target macro

2014-09-16 Thread Joseph S. Myers

This patch removes the (undocumented) LIBGCC2_TF_CEXT target macro,
replacing it by -fbuilding-libgcc predefines (and thereby gets rid of
another LIBGCC2_LONG_DOUBLE_TYPE_SIZE conditional, though some more
patches are needed before that target macro can be eliminated).  This
macro indicated the suffix used on __builtin_huge_val,
__builtin_copysign, __builtin_fabs built-in function names to produce
the names for a given floating-point mode.

Predefines are added for all floating-point modes supported for
libgcc, not just TFmode.  These are fully accurate for modes
corresponding to float, double and long double.  For other modes, the
suffix for *constants* is determined by the targetm.c.mode_for_suffix
hook (the limit to two possible suffixes 'w' and 'q' being hardcoded
in various places).  This is in fact the suffix for built-in functions
as well where such functions exist.

* For i386, the *q functions always exist (whether or not TFmode is
  used for long double).  The *w functions never exist (but this
  doesn't matter for libgcc, since no i386 configuration treats XFmode
  as a supported scalar mode if long double is TFmode; if __float80
  were to be supported for 64-bit Android, properly such functions
  ought to be added).

* For ia64, the *q functions exist for non-HP-UX (under HP-UX, long
  double is TFmode, so they aren't needed).  The *w functions never
  exist.  This is an issue for this libgcc code for the XFmode complex
  functions in libgcc on HP-UX; as I understand it, right now those
  will accidentally be using TFmode versions of those three functions,
  so involving unnecessary conversions, while the sanity check on CEXT
  accidentally passes because all it tests is the sizes of the types.

Because of the lack of 'w' functions, the patch uses 'l' when the
constant suffix is 'w', matching what the existing libgcc code would
do for IA64 HP-UX in that case.

Ideally there would be generic code to create such built-in functions
for all supported floating-point types.  That may be something to
consider if support for TS 18661-3 (standard bindings for IEEE
754-2008, defining names such as _Float128, and function names such as
copysignf128) is added in future.

Bootstrapped with no regressions on x86_64-unknown-linux-gnu.  OK to
commit?

gcc:
2014-09-16  Joseph Myers  

* system.h (LIBGCC2_TF_CEXT): Poison.
* config/i386/cygming.h (LIBGCC2_TF_CEXT): Remove.
* config/i386/darwin.h (LIBGCC2_TF_CEXT): Likewise.
* config/i386/dragonfly.h (LIBGCC2_TF_CEXT): Likewise.
* config/i386/freebsd.h (LIBGCC2_TF_CEXT): Likewise.
* config/i386/gnu-user-common.h (LIBGCC2_TF_CEXT): Likewise.
* config/i386/openbsdelf.h (LIBGCC2_TF_CEXT): Likewise.
* config/i386/sol2.h (LIBGCC2_TF_CEXT): Likewise.
* config/ia64/ia64.h (LIBGCC2_TF_CEXT): Likewise.
* config/ia64/linux.h (LIBGCC2_TF_CEXT): Likewise.

gcc/c-family:
2014-09-16  Joseph Myers  

* c-cppbuiltin.c (c_cpp_builtins): Define __LIBGCC_*_FUNC_EXT__
for supported floating-point modes.

libgcc:
2014-09-16  Joseph Myers  

* libgcc2.c (CEXT): Define using __LIBGCC_*_FUNC_EXT__.

Index: gcc/c-family/c-cppbuiltin.c
===
--- gcc/c-family/c-cppbuiltin.c (revision 215300)
+++ gcc/c-family/c-cppbuiltin.c (working copy)
@@ -956,6 +956,28 @@ c_cpp_builtins (cpp_reader *pfile)
+ sizeof ("__LIBGCC_HAS__MODE__"));
  sprintf (macro_name, "__LIBGCC_HAS_%s_MODE__", name);
  cpp_define (pfile, macro_name);
+ macro_name = (char *) alloca (strlen (name)
+   + sizeof ("__LIBGCC__FUNC_EXT__"));
+ sprintf (macro_name, "__LIBGCC_%s_FUNC_EXT__", name);
+ const char *suffix;
+ if (mode == TYPE_MODE (double_type_node))
+   suffix = "";
+ else if (mode == TYPE_MODE (float_type_node))
+   suffix = "f";
+ else if (mode == TYPE_MODE (long_double_type_node))
+   suffix = "l";
+ /* ??? The following assumes the built-in functions (defined
+in target-specific code) match the suffixes used for
+constants.  Because in fact such functions are not
+defined for the 'w' suffix, 'l' is used there
+instead.  */
+ else if (mode == targetm.c.mode_for_suffix ('q'))
+   suffix = "q";
+ else if (mode == targetm.c.mode_for_suffix ('w'))
+   suffix = "l";
+ else
+   gcc_unreachable ();
+ builtin_define_with_value (macro_name, suffix, 0);
}
 
   /* For libgcc crtstuff.c and libgcc2.c.  */
Index: gcc/config/i386/cygming.h
===
--- gcc/config/i386/cygming.h   (revision 215300)
+++ gcc/config/i386/cygming.h   (working copy)
@@ -339,9 +339,6 @@ do {\
 #define A

Re: [PR libfortran/62768] Handle filenames with embedded nulls

2014-09-16 Thread Janne Blomqvist

On Tue, Sep 16, 2014 at 11:17 AM, FX  wrote:
>>> 2014-09-05  Janne Blomqvist  
>>>
>>>PR libfortran/62768
>>>* io/io.h (gfc_unit): Store C string for the filename.
>>>* io/close.c (st_close): Use gfc_unit.filename.
>>>* io/inquire.c (inquire_via_unit): Likewise.
>>>* io/open.c (new_unit): Likewise.
>>>(already_open): Likewise, unlink file before freeing filename.
>>>* io/unit.c (init_units): Likewise.
>>>(close_unit_1): Likewise.
>>>(filename_from_unit): Likewise.
>>>* io/unix.c (compare_file_filename): Likewise.
>>>(find_file0): Likewise.
>>>(delete_file): Likewise.
>
> OK, if you add a runtime testcase.

Thanks for the review, committed as r215307. I added the testcase
below as gfortran.dg/filename_null.f90:

! { dg-do run }
! PR 62768
! Filenames with embedded NULL characters are truncated, make sure
! inquire reports the correct truncated name.
program filename_null
  implicit none
  character(len=15), parameter :: s = "hello" // achar(0) // "world", &
   s2 = "hello"
  character(len=15) :: r
  logical :: l
  open(10, file=s)
  inquire(unit=10, name=r)
  if (r /= s2) call abort()
  inquire(file=s2, exist=l)
  if (.not. l) call abort()
  close(10, status="delete")
end program filename_null


> I tried to think of other characters we might want to sanitize/special case, 
> but at least on Unix/POSIX only NUL and / are fundamentally different. It 
> might make sense to think about it for Windows targets.

IIRC on Windows only printable UTF-16 characters are allowed, and like
POSIX NULL and / are special. Anyway, if one does something which is
not allowed, the OS api's should report an error. The bug I fixed was
fundamentally about OS api's expecting null-terminated C strings, so
the strings were truncated in the Fortran->C string conversion before
calling the OS API's.

-- 
Janne Blomqvist

Re: [PATCHv3] Vimrc config with GNU formatting

2014-09-16 Thread Trevor Saunders

On Tue, Sep 16, 2014 at 08:38:58PM +0400, Yury Gribov wrote:
> Hi all,
> 
> This is the third version of the patch. A list of changes since last
> version:
> * move config to contrib so that it's _not_ enabled by default (current
> score is 2/1 in favor of no Vim config by default)

fwiw, I think enabling it by default especially when that really means
enable it if you've enabled the localrc plugin makes sense.  I don't see
how you can enable the localrc plugin and then complain when people use
it for its designed purpose.  However something in contrib/ is probably
easier for new people to find than something on the wiki or something so
better than doing nothing :)

Thanks!

Trev

> * update Makefile.in to make .local.vimrc if developer asks for it
> * disable autoformatting for flex files
> * fix filtering of non-GNU sources (libsanitizer)
> * added some small fixes in cinoptions based on feedback from community
> 
> As noted by Richard, the config does not do a good job of formatting unbound
> {} blocks e.g.
> void
> foo ()
> {
>   int x;
> {
>   // I'm an example of bad bad formatting
> }
> }
> but it seems to be the best we can get with Vim's cindent
> (and I don't think anyone seriously considers writing a custom indentexpr).
> 
> Ok to commit?
> 
> -Y

> commit 67219512dac9a5cc14eea8f157222a226044dd72
> Author: Yury Gribov 
> Date:   Thu Sep 4 16:55:44 2014 +0400
> 
> 2014-09-16  Laurynas Biveinis  
>   Yury Gribov  
> 
>   Vim config with GNU formatting.
> 
> contrib/
>   * vimrc: New file.
> 
> /
>   * .gitignore: Added .local.vimrc.
>   * Makefile.tpl (.local.vimrc): New target.
>   * Makefile.in: Regenerate.
> 
> diff --git a/.gitignore b/.gitignore
> index e9b56be..252b8b0 100644
> --- a/.gitignore
> +++ b/.gitignore
> @@ -32,6 +32,8 @@ POTFILES
>  TAGS
>  TAGS.sub
>  
> +.local.vimrc
> +
>  .gdbinit
>  .gdb_history
>  
> diff --git a/Makefile.in b/Makefile.in
> index d6105b3..e573069 100644
> --- a/Makefile.in
> +++ b/Makefile.in
> @@ -2384,6 +2384,11 @@ mail-report-with-warnings.log: warning.log
>   chmod +x $@
>   echo If you really want to send e-mail, run ./$@ now
>  
> +# Local Vim config
> +
> +vimrc:
> + (cd $(srcdir); $(LN_S) contrib/vimrc .local.vimrc)
> +
>  # Installation targets.
>  
>  .PHONY: install uninstall
> diff --git a/Makefile.tpl b/Makefile.tpl
> index f7c7e38..d050694 100644
> --- a/Makefile.tpl
> +++ b/Makefile.tpl
> @@ -867,6 +867,11 @@ mail-report-with-warnings.log: warning.log
>   chmod +x $@
>   echo If you really want to send e-mail, run ./$@ now
>  
> +# Local Vim config
> +
> +vimrc:
> + (cd $(srcdir); $(LN_S) contrib/vimrc .local.vimrc)
> +
>  # Installation targets.
>  
>  .PHONY: install uninstall
> diff --git a/contrib/vimrc b/contrib/vimrc
> new file mode 100644
> index 000..7287bd1
> --- /dev/null
> +++ b/contrib/vimrc
> @@ -0,0 +1,43 @@
> +" Code formatting settings for Vim.
> +"
> +" To enable this for GCC files by default, install thinca's localrc plugin
> +" and do
> +"   $ make .local.vimrc
> +" Or if you dislike plugins, add autocmd in your .vimrc:
> +"   :au BufNewFile,BufReadPost path/to/gcc/* :so path/to/gcc/.local.vimrc
> +" Or just source file manually every time if you are masochist:
> +"   :so .local.vimrc
> +" 
> +" Copyright (C) 2014 Free Software Foundation, Inc.
> +"
> +" This program is free software; you can redistribute it and/or modify
> +" it under the terms of the GNU General Public License as published by
> +" the Free Software Foundation; either version 3 of the License, or
> +" (at your option) any later version.
> +"
> +" This program is distributed in the hope that it will be useful,
> +" but WITHOUT ANY WARRANTY; without even the implied warranty of
> +" MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
> +" GNU General Public License for more details.
> +"
> +" You should have received a copy of the GNU General Public License
> +" along with this program.  If not, see .
> +
> +function! SetStyle()
> +  let l:fname = expand("%:p")
> +  if stridx(l:fname, 'libsanitizer') != -1
> +return
> +  endif
> +  let l:ext = fnamemodify(l:fname, ":e")
> +  let l:c_exts = ['c', 'h', 'cpp', 'cc', 'C', 'H', 'def', 'java']
> +  if index(l:c_exts, l:ext) != -1
> +setlocal cindent
> +setlocal shiftwidth=2
> +setlocal softtabstop=2
> +setlocal cinoptions=>2s,n-s,{s,^-s,:s,=s,g0,f0,hs,p2s,t0,+s,(0,u0,w1,m0
> +setlocal textwidth=79
> +setlocal formatoptions-=ro formatoptions+=cql
> +  endif
> +endfunction
> +
> +call SetStyle()

Re: [PATCH v2 AArch64]: Re: [PATCH AArch64]: Add constraint letter for stack_protect_test pattern.

2014-09-16 Thread Venkataramanan Kumar

Hi Andrew,

Thanks for pointing that.

I thought "&" modifier is enough to say that operand is early
clobbered and so GCC will use a different register and it will not
allocate same register that was given to a input operand.

Lookign at the the bug it looks like "=" is needed for the clobber,
so that GCC will allocate a fresh register.

regards,
Venkat.

On 17 September 2014 03:06, Andrew Pinski  wrote:
> On Thu, Sep 4, 2014 at 1:18 AM, James Greenhalgh
>  wrote:
>> On Thu, Sep 04, 2014 at 08:42:31AM +0100, Venkataramanan Kumar wrote:
>>> Hi maintainers,
>>>
>>> I just added "=r" and retested it.
>>
>> I had a very similar patch to this sitting in my local tree. However,
>> I am surprised you have left operand 3 as an output operand. In my tree
>> I had marked operand 3 as "&r".
>>
>> What do you think?
>
> The clobber needs to be "=&r" as you are writing to the register and
> not just reading from it.  I think this is causing some issues
> including linaro bugzilla #667
> (https://bugs.linaro.org/show_bug.cgi?id=667).
>
> Thanks,
> Andrew Pinski
>
>
>>
>> Thanks,
>> James
>>
>>> gcc/ChangeLog
>>>
>>> 2014-09-04 Venkataramanan Kumar 
>>>
>>>* config/aarch64/aarch64.md (stack_protect_test_) Add register
>>>constraint for operand 0.
>>>
>>> diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
>>> index b5be79c..ed6e602 100644
>>> --- a/gcc/config/aarch64/aarch64.md
>>> +++ b/gcc/config/aarch64/aarch64.md
>>> @@ -4026,7 +4026,7 @@
>>>  })
>>>
>>>  (define_insn "stack_protect_test_"
>>> -  [(set (match_operand:PTR 0 "register_operand")
>>> +  [(set (match_operand:PTR 0 "register_operand" "=r")
>>> (unspec:PTR [(match_operand:PTR 1 "memory_operand" "m")
>>>  (match_operand:PTR 2 "memory_operand" "m")]
>>>  UNSPEC_SP_TEST))
>>>
>>> regards,
>>> venkat.
>>>
>>> On 4 September 2014 12:42, Venkataramanan Kumar
>>>  wrote:
>>> > Hi Maintainers,
>>> >
>>> > Below patch adds register constraint "r" for destination operand in
>>> > "stack_protect_test" pattern.
>>> >
>>> > We need a general register here and adding "r" will avoid vector
>>> > register getting allocated.
>>> >
>>> > regression tested on aarch64-unknown-linux-gnu.
>>> >
>>> > Ok for trunk?
>>> >
>>> > regards,
>>> > Venkat.
>>> >
>>> >
>>> > gcc/ChangeLog
>>> >
>>> > 2014-09-04 Venkataramanan Kumar 
>>> >
>>> >* config/aarch64/aarch64.md (stack_protect_test_) Add 
>>> > register
>>> >constraint for operand 0.
>>> >
>>> >
>>> > diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
>>> > index b5be79c..77588b9 100644
>>> > --- a/gcc/config/aarch64/aarch64.md
>>> > +++ b/gcc/config/aarch64/aarch64.md
>>> > @@ -4026,7 +4026,7 @@
>>> >  })
>>> >
>>> >  (define_insn "stack_protect_test_"
>>> > -  [(set (match_operand:PTR 0 "register_operand")
>>> > + [(set (match_operand:PTR 0 "register_operand" "r")
>>> > (unspec:PTR [(match_operand:PTR 1 "memory_operand" "m")
>>> >  (match_operand:PTR 2 "memory_operand" "m")]
>>> >  UNSPEC_SP_TEST))
>>>

Re: [PATCH v2 AArch64]: Re: [PATCH AArch64]: Add constraint letter for stack_protect_test pattern.

2014-09-16 Thread James Greenhalgh

On Tue, Sep 16, 2014 at 10:36:08PM +0100, Andrew Pinski wrote:
> On Thu, Sep 4, 2014 at 1:18 AM, James Greenhalgh
>  wrote:
> > On Thu, Sep 04, 2014 at 08:42:31AM +0100, Venkataramanan Kumar wrote:
> >> Hi maintainers,
> >>
> >> I just added "=r" and retested it.
> >
> > I had a very similar patch to this sitting in my local tree. However,
> > I am surprised you have left operand 3 as an output operand. In my tree
> > I had marked operand 3 as "&r".
> >
> > What do you think?
> 
> The clobber needs to be "=&r" as you are writing to the register and
> not just reading from it.  I think this is causing some issues
> including linaro bugzilla #667
> (https://bugs.linaro.org/show_bug.cgi?id=667).

(+CC Matthias Klose and Steve McIntyre who have also been in contact with me
regarding this bug)

I've seen this bug locally, and had considered sending the patch you
suggested, which does indeed fix the bug. However, it feels wrong as
the operand is not a formal output of the pattern. It is clobbered - and
indeed earlyclobbered - so yes it is written to, but it isn't an output.
This makes the fix look like a band-aid around the real problem.

The bug looks similar to pr52573 - regrename fails to spot that it should
not rename to a register used in an earlyclobber operand of any type, rather
than just an output+earlyclobber operand as it does now.

I've played about with a fix that sits in regrename, and forces it to think
of all earlyclobber operands as starting and ending chains but this didn't
bootstrap clean - we end up with what I believe are false reports of stack
smashing in libstdc++.

I was planning to look again at my approach tomorrow, I would like to
convince myself that this isn't a deficiency in regrename before I would
support just marking this operand "=&r".

If you have any other suggestions, or if "=&r" is actually correct and
I am misreading the documentation please let me know.

Thanks,
James

Re: ptx preliminary address space fixes [1/4]

2014-09-16 Thread Bernd Schmidt


On 09/16/2014 11:18 PM, Joseph S. Myers wrote:


(That TYPE_MAIN_VARIANT maps an array of qualified type to an array of
corresponding unqualified type necessitates lots of special cases in the
front end to avoid applying TYPE_MAIN_VARIANT to array types, since in C
terms array types are always unqualified and are unrelated to an array of
corresponding unqualified element type.)


Sounds like you want a c_type_main_variant wrapper then? What exactly 
breaks if you ignore the problem and apply TYPE_MAIN_VARIANT to arrays?



bernd

Re: ptx preliminary address space fixes [1/4]

2014-09-16 Thread Joseph S. Myers

On Wed, 17 Sep 2014, Bernd Schmidt wrote:

> On 09/16/2014 11:18 PM, Joseph S. Myers wrote:
> 
> > (That TYPE_MAIN_VARIANT maps an array of qualified type to an array of
> > corresponding unqualified type necessitates lots of special cases in the
> > front end to avoid applying TYPE_MAIN_VARIANT to array types, since in C
> > terms array types are always unqualified and are unrelated to an array of
> > corresponding unqualified element type.)
> 
> Sounds like you want a c_type_main_variant wrapper then? What exactly breaks
> if you ignore the problem and apply TYPE_MAIN_VARIANT to arrays?

Anything where the C standard defines something in terms of the 
unqualified versions of types, or the set of qualifiers on a type, 
operates incorrectly (tests compatibility of the wrong types, etc.) if you 
apply TYPE_MAIN_VARIANT to arrays.

-- 
Joseph S. Myers
jos...@codesourcery.com

Re: [PATCH v2 AArch64]: Re: [PATCH AArch64]: Add constraint letter for stack_protect_test pattern.

2014-09-16 Thread Andrew Pinski

On Tue, Sep 16, 2014 at 3:03 PM, James Greenhalgh
 wrote:
> On Tue, Sep 16, 2014 at 10:36:08PM +0100, Andrew Pinski wrote:
>> On Thu, Sep 4, 2014 at 1:18 AM, James Greenhalgh
>>  wrote:
>> > On Thu, Sep 04, 2014 at 08:42:31AM +0100, Venkataramanan Kumar wrote:
>> >> Hi maintainers,
>> >>
>> >> I just added "=r" and retested it.
>> >
>> > I had a very similar patch to this sitting in my local tree. However,
>> > I am surprised you have left operand 3 as an output operand. In my tree
>> > I had marked operand 3 as "&r".
>> >
>> > What do you think?
>>
>> The clobber needs to be "=&r" as you are writing to the register and
>> not just reading from it.  I think this is causing some issues
>> including linaro bugzilla #667
>> (https://bugs.linaro.org/show_bug.cgi?id=667).
>
> (+CC Matthias Klose and Steve McIntyre who have also been in contact with me
> regarding this bug)
>
> I've seen this bug locally, and had considered sending the patch you
> suggested, which does indeed fix the bug. However, it feels wrong as
> the operand is not a formal output of the pattern. It is clobbered - and
> indeed earlyclobbered - so yes it is written to, but it isn't an output.
> This makes the fix look like a band-aid around the real problem.
>
> The bug looks similar to pr52573 - regrename fails to spot that it should
> not rename to a register used in an earlyclobber operand of any type, rather
> than just an output+earlyclobber operand as it does now.
>
> I've played about with a fix that sits in regrename, and forces it to think
> of all earlyclobber operands as starting and ending chains but this didn't
> bootstrap clean - we end up with what I believe are false reports of stack
> smashing in libstdc++.
>
> I was planning to look again at my approach tomorrow, I would like to
> convince myself that this isn't a deficiency in regrename before I would
> support just marking this operand "=&r".
>
> If you have any other suggestions, or if "=&r" is actually correct and
> I am misreading the documentation please let me know.

I think you misread the documentation.

Or rather the documentation is not fully clear here.
https://gcc.gnu.org/onlinedocs/gccint/Modifiers.html says this for "&":
‘&’ does not obviate the need to write ‘=’ or ‘+’.

Which means you need "=" if you write to the register.  Match_scratch
is the same as match_operand except it is able to be added back during
combine.

Also "=" means:
Means that this operand is write-only for this instruction: the
previous value is discarded and replaced by output data.

That is not necessary a formal output of the pattern.

Thanks,
Andrew

Thanks,
Andrew Pinski



>
> Thanks,
> James
>

Re: [C PATCH] Better diagnostics for C++ comments in C90 (PR c/61854)

2014-09-16 Thread Joseph S. Myers

On Mon, 15 Sep 2014, Marek Polacek wrote:

> On Mon, Sep 15, 2014 at 05:49:25PM +, Joseph S. Myers wrote:
> > On Mon, 15 Sep 2014, Marek Polacek wrote:
> > 
> > > We must be careful to properly handle code such as "1 //**/ 2", which
> > > has a different meaning in C90 and GNU90 mode.  New testcases test this.
> > 
> > I don't think there's sufficient allowance here for other valid cases.  
> > It's valid to have // inside #if 0 in C90, for example, so that must not 
> > be diagnosed (must not have a pedwarn or error, at least, that is).  It's 
> 
> Good point, sorry about that.  Luckily this can be fixed just by
> checking pfile->state.skipping.  New test added.

This is getting closer, but it looks like you still treat it as a line 
comment when being skipped for C90, when actually it's not safe to treat 
it like that; you have to produce a '/' preprocessing token and continue 
tokenizing the rest of the line.  Consider the following code:

int i = 0
#if 0
// /*
#else
// */
+1
#endif
;

For C90 i gets value 0.  With // comments it gets value 1.

> +   /* In C89/C94, C++ style comments are forbidden.  */
> +   else if ((CPP_OPTION (pfile, lang) == CLK_STDC89
> + || CPP_OPTION (pfile, lang) == CLK_STDC94))
> + {
> +   /* But don't be confused about // immediately followed by *.  */
> +   if (buffer->cur[1] == '*'
> +   || pfile->state.in_directive)

And this comment needs updating to reflect that it's not just //* where // 
can appear in valid C90 code in a way incompatible with treating it as a 
comment.

-- 
Joseph S. Myers
jos...@codesourcery.com

[PING^2][PATCH] Power/GCC: Fix e500 vs non-e500 register save slot issue

2014-09-16 Thread Maciej W. Rozycki

David,

 This patch:

https://gcc.gnu.org/ml/gcc-patches/2014-09/msg00051.html

is still waiting, please review.

 Thanks,

  Maciej

[committed] Fix problem handling return value aggregates on PA

2014-09-16 Thread John David Anglin

The attached problem fixes a regression in handling DFmode aggregates  
in pa_function_value.
Sometimes, an aggregrate with a single DFmode field could not be  
handled by the existing code
which used a BLKmode parallel.  We can just use a register when the  
aggregate fits exactly in a

single or double word.  This avoids an ICE in store_field.

Tested on hppa2.0w-hp-hpux11.11, hppa-unknown-linux-gnu and hppa64-hp- 
hpux11.11.  Committed

to trunk and 4.9 branch.

Dave
--
John David Anglin   dave.ang...@bell.net


2014-09-16  John David Anglin  

PR target/61853
* config/pa/pa.c (pa_function_value): Directly handle aggregates
that fit exactly in a word or double word.

Index: config/pa/pa.c
===
--- config/pa/pa.c  (revision 215242)
+++ config/pa/pa.c  (working copy)
@@ -9298,6 +9298,12 @@
   || TREE_CODE (valtype) == COMPLEX_TYPE
   || TREE_CODE (valtype) == VECTOR_TYPE)
 {
+  HOST_WIDE_INT valsize = int_size_in_bytes (valtype);
+
+  /* Handle aggregates that fit exactly in a word or double word.  */
+  if ((valsize & (UNITS_PER_WORD - 1)) == 0)
+   return gen_rtx_REG (TYPE_MODE (valtype), 28);
+
   if (TARGET_64BIT)
{
   /* Aggregates with a size less than or equal to 128 bits are
@@ -9306,7 +9312,7 @@
 memory.  */
  rtx loc[2];
  int i, offset = 0;
- int ub = int_size_in_bytes (valtype) <= UNITS_PER_WORD ? 1 : 2;
+ int ub = valsize <= UNITS_PER_WORD ? 1 : 2;
 
  for (i = 0; i < ub; i++)
{
@@ -9318,7 +9324,7 @@
 
  return gen_rtx_PARALLEL (BLKmode, gen_rtvec_v (ub, loc));
}
-  else if (int_size_in_bytes (valtype) > UNITS_PER_WORD)
+  else if (valsize > UNITS_PER_WORD)
{
  /* Aggregates 5 to 8 bytes in size are returned in general
 registers r28-r29 in the same manner as other non

[gomp4] various OpenACC/PTX built-ins and a reduction tweak

2014-09-16 Thread Cesar Philippidis

The patch introduces the following OpenACC/PTX-specific built-ins:

  * GOACC_ntid
  * GOACC_tid
  * GOACC_nctaid
  * GOACC_ctaid
  * acc_on_device
  * GOACC_get_thread_num
  * GOACC_get_num_threads

Of these functions, the only one part of the OpenACC spec is
acc_on_device. The other functions are helpers for omp-low.c. In
particular, I'm using GOACC_get_thread_num and GOACC_get_num_threads to
determine the number of accelerator threads available to the reduction
clause. Current GOACC_get_num_threads is num_gangs * vector_length, but
value is subject to change later on. It's probably a premature to
include the PTX built-ins right now, but I'd like to middle end of our
internal OpenACC branch in sync with gomp-4_0-branch.

This patch also allows OpenACC reductions to process the array holding
partial reductions on the accelerator, instead of copying that array
back to the host. Currently, this only happens when num_gangs = 1. For
PTX targets, we're going to need to use another kernel to process the
array of partial results because PTX lacks inter-CTA synchronization
(we're currently mapping gangs to CTAs). That's why I was working on the
routine clause recently.

Is this OK for gomp-4_0-branch?

Thanks,
Cesar
2014-09-16  Cesar Philippidis  

	gcc/
	* builtins.c (expand_builtin_acc_on_device): New function.
	(expand_oacc_builtin): New function.
	(expand_builtin): Handle BUILT_IN_GOACC_NCTAID, BUILT_IN_GOACC_CTAID,
	BUILT_IN_GOACC_NTID, BUILT_IN_GOACC_TID, BUILT_IN_GOACC_GET_THREAD_NUM
	and BUILT_IN_GOACC_GET_NUM_THREADS.
	(is_simple_builtin): Handle BUILT_IN_GOACC_NTID and BUILT_IN_GOACC_TID.
	(is_inexpensive_builtin): Handle BUILT_IN_ACC_ON_DEVICE.
	* gcc/builtins.def (DEF_GOACC_BUILTIN): Temporarily make COND always
	true.
	(DEF_GOACC_BUILTIN_COMPILER): New.

	* gcc/oacc-builtins.def (BUILT_IN_GOACC_NTID, BUILT_IN_GOACC_TID,
	BUILT_IN_GOACC_NCTAID, BUILT_IN_GOACC_CTAID, BUILT_IN_ACC_ON_DEVICE,
	BUILT_IN_GOACC_GET_THREAD_NUM, BUILT_IN_GOACC_GET_NUM_THREADS): New
	built-ins.
	* gcc/omp-low.c (finish_reduction_on_host): New function.
	(oacc_host_nthreads): New function.
	(lower_reduction_clauses): Process the array of partial reductions
	on the accelerator is num_gangs = 1.
	(expand_omp_for_static_nochunk): Use BUILT_IN_GOACC_GET_NUM_THREADS and
	BUILT_IN_GOACC_GET_THREAD_NUM for nthreads and threadid, respectively,
	with GF_OMP_FOR_KIND_OACC_LOOP.
	(expand_omp_for_static_chunk): Likewise.
	(expand_omp_target): Likewise.
	(initialize_reduction_data): Adjust memory maps for the case where
	the partial reductions are processed on the accelerator.
	(finalize_reduction_data): Handle reductions on the accelerator.
	(process_reduction_data): Likewise.


	gcc/fortran/
	* f95-lang.c (gfc_init_builtin_functions): Define
	DEF_GOACC_BUILTIN_COMPILER.
	* types.def (DEF_FUNCTION_TYPE_0): Define DEF_FUNCTION_TYPE_1 and
	DEF_FUNCTION_TYPE_3.

	gcc/testsuite/
	* c-c++-common/goacc/goacc_builtins.c: New test.


diff --git a/gcc/builtins.c b/gcc/builtins.c
index 975f696..fa1ac2d 100644
--- a/gcc/builtins.c
+++ b/gcc/builtins.c
@@ -5747,6 +5747,131 @@ expand_stack_save (void)
   return ret;
 }
 
+
+/* Expand OpenACC acc_on_device.
+
+   This has to happen late (that is, not in early folding; expand_builtin_*,
+   rather than fold_builtin_*), as we have to act differently for host and
+   acceleration device.  */
+
+static rtx
+expand_builtin_acc_on_device (tree exp, rtx target ATTRIBUTE_UNUSED)
+{
+  if (!validate_arglist (exp, INTEGER_TYPE, VOID_TYPE))
+return NULL_RTX;
+
+  tree arg, v1, v2, ret;
+  location_t loc;
+
+  arg = CALL_EXPR_ARG (exp, 0);
+  arg = builtin_save_expr (arg);
+  loc = EXPR_LOCATION (exp);
+
+  /* Build: (arg == v1 || arg == v2) ? 1 : 0.  */
+
+#ifdef ACCEL_COMPILER
+  v1 = build_int_cst (TREE_TYPE (arg), /* TODO: acc_device_not_host */ 3);
+  v2 = build_int_cst (TREE_TYPE (arg), ACCEL_COMPILER_acc_device);
+#else
+  v1 = build_int_cst (TREE_TYPE (arg), /* TODO: acc_device_none */ 0);
+  v2 = build_int_cst (TREE_TYPE (arg), /* TODO: acc_device_host */ 2);
+#endif
+
+  v1 = fold_build2_loc (loc, EQ_EXPR, integer_type_node, arg, v1);
+  v2 = fold_build2_loc (loc, EQ_EXPR, integer_type_node, arg, v2);
+
+  /* Can't use TRUTH_ORIF_EXPR, as that is not supported by
+ expand_expr_real*.  */
+  ret = fold_build3_loc (loc, COND_EXPR, integer_type_node, v1, v1, v2);
+  ret = fold_build3_loc (loc, COND_EXPR, integer_type_node,
+			 ret, integer_one_node, integer_zero_node);
+
+  return expand_normal (ret);
+}
+
+
+/* Expand a thread-id/thread-count builtin for OpenACC.  */
+static rtx
+expand_oacc_builtin (enum built_in_function fcode, tree exp, rtx target)
+{
+  tree arg0 = NULL_TREE;
+  bool has_arg0 = false;
+  rtx result = const0_rtx;
+  rtx arg;
+
+  enum insn_code icode = CODE_FOR_nothing;
+  switch (fcode)
+{
+case BUILT_IN_GOACC_NTID:
+#ifdef HAVE_oacc_ntid
+  icode = CODE_FOR_oacc_ntid;
+#endif
+  has_arg0 = true;
+  result = const1_rtx;
+  break;
+case BUILT_IN_GOACC

81 matches

Mail list logo