[PATCH] Misc sse.md formatting fixes

2017-12-28 Thread Jakub Jelinek
Hi!

I've noticed various formatting issues in the recently added ISA support
patterns.  No functional changes, bootstrapped/regtested on x86_64-linux and
i686-linux, ok for trunk?

OT, wonder why we have any of the maskz and maskz_1 patterns, can't it be
done in the intrinsic header by using the mask intrinsic with a _mm*zero*
operand?  I understand the need to have separate builtins for masked and
non-masked at least in some cases (as we need AVX512BW for the masked cases
but not for unmasked).

2017-12-28  Jakub Jelinek  

* config/i386/sse.md (vgf2p8affineinvqb_,
vgf2p8affineqb_, vgf2p8mulb_,
vpshrd_, vpshld_,
vpshrdv_, vpshrdv__mask, vpshrdv__maskz,
vpshrdv__maskz_1, vpshldv_, vpshldv__mask,
vpshldv__maskz, vpshldv__maskz_1, vpdpbusd_,
vpdpbusd__mask, vpdpbusd__maskz, vpdpbusd__maskz_1,
vpdpbusds_, vpdpbusds__mask, vpdpbusds__maskz,
vpdpbusds__maskz_1, vpdpwssd_, vpdpwssd__mask,
vpdpwssd__maskz, vpdpwssd__maskz_1, vpdpwssds_,
vpdpwssds__mask, vpdpwssds__maskz,
vpdpwssds__maskz_1, vaesdec_, vaesdeclast_,
vaesenc_, vpclmulqdq_,
avx512vl_vpshufbitqmb): Formatting fixes.

--- gcc/config/i386/sse.md.jj   2017-12-22 14:00:04.768613671 +0100
+++ gcc/config/i386/sse.md  2017-12-27 19:19:58.081660733 +0100
@@ -20082,10 +20082,11 @@ (define_insn "vpopcount
 
 (define_insn "vgf2p8affineinvqb_"
   [(set (match_operand:VI1_AVX512F 0 "register_operand" "=x,x,v")
-   (unspec:VI1_AVX512F [(match_operand:VI1_AVX512F 1 "register_operand" 
"%0,x,v")
-  (match_operand:VI1_AVX512F 2 
"nonimmediate_operand" "xBm,xm,vm")
-  (match_operand:QI 3 "const_0_to_255_operand" 
"n,n,n")]
- UNSPEC_GF2P8AFFINEINV))]
+   (unspec:VI1_AVX512F
+ [(match_operand:VI1_AVX512F 1 "register_operand" "%0,x,v")
+  (match_operand:VI1_AVX512F 2 "nonimmediate_operand" "xBm,xm,vm")
+  (match_operand:QI 3 "const_0_to_255_operand" "n,n,n")]
+ UNSPEC_GF2P8AFFINEINV))]
   "TARGET_GFNI"
   "@
gf2p8affineinvqb\t{%3, %2, %0| %0, %2, %3}
@@ -20099,10 +20100,11 @@ (define_insn "vgf2p8affineinvqb_"
   [(set (match_operand:VI1_AVX512F 0 "register_operand" "=x,x,v")
-   (unspec:VI1_AVX512F [(match_operand:VI1_AVX512F 1 "register_operand" 
"%0,x,v")
-  (match_operand:VI1_AVX512F 2 
"nonimmediate_operand" "xBm,xm,vm")
-  (match_operand:QI 3 "const_0_to_255_operand" 
"n,n,n")]
- UNSPEC_GF2P8AFFINE))]
+   (unspec:VI1_AVX512F
+ [(match_operand:VI1_AVX512F 1 "register_operand" "%0,x,v")
+  (match_operand:VI1_AVX512F 2 "nonimmediate_operand" "xBm,xm,vm")
+  (match_operand:QI 3 "const_0_to_255_operand" "n,n,n")]
+ UNSPEC_GF2P8AFFINE))]
   "TARGET_GFNI"
   "@
gf2p8affineqb\t{%3, %2, %0| %0, %2, %3}
@@ -20116,9 +20118,10 @@ (define_insn "vgf2p8affineqb_"
   [(set (match_operand:VI1_AVX512F 0 "register_operand" "=x,x,v")
-   (unspec:VI1_AVX512F [(match_operand:VI1_AVX512F 1 "register_operand" 
"%0,x,v")
-  (match_operand:VI1_AVX512F 2 
"nonimmediate_operand" "xBm,xm,vm")]
- UNSPEC_GF2P8MUL))]
+   (unspec:VI1_AVX512F
+ [(match_operand:VI1_AVX512F 1 "register_operand" "%0,x,v")
+  (match_operand:VI1_AVX512F 2 "nonimmediate_operand" "xBm,xm,vm")]
+ UNSPEC_GF2P8MUL))]
   "TARGET_GFNI"
   "@
gf2p8mulb\t{%2, %0| %0, %2}
@@ -20134,9 +20137,9 @@ (define_insn "vpshrd_"
   [(set (match_operand:VI248_VLBW 0 "register_operand" "=v")
(unspec:VI248_VLBW
  [(match_operand:VI248_VLBW 1 "register_operand" "v")
-   (match_operand:VI248_VLBW 2 "nonimmediate_operand" "vm")
-   (match_operand:SI 3 "const_0_to_255_operand" "n")
-] UNSPEC_VPSHRD))]
+  (match_operand:VI248_VLBW 2 "nonimmediate_operand" "vm")
+  (match_operand:SI 3 "const_0_to_255_operand" "n")]
+ UNSPEC_VPSHRD))]
   "TARGET_AVX512VBMI2"
   "vpshrd\t{%3, %2, %1, %0|%0, 
%1, %2, %3 }"
[(set_attr ("prefix") ("evex"))])
@@ -20145,9 +20148,9 @@ (define_insn "vpshld_"
   [(set (match_operand:VI248_VLBW 0 "register_operand" "=v")
(unspec:VI248_VLBW
  [(match_operand:VI248_VLBW 1 "register_operand" "v")
-   (match_operand:VI248_VLBW 2 "nonimmediate_operand" "vm")
-   (match_operand:SI 3 "const_0_to_255_operand" "n")
-] UNSPEC_VPSHLD))]
+  (match_operand:VI248_VLBW 2 "nonimmediate_operand" "vm")
+  (match_operand:SI 3 "const_0_to_255_operand" "n")]
+ UNSPEC_VPSHLD))]
   "TARGET_AVX512VBMI2"
   "vpshld\t{%3, %2, %1, %0|%0, 
%1, %2, %3 }"
[(set_attr ("prefix") ("evex"))])
@@ -20157,8 +20160,8 @@ (define_insn "vpshrdv_"
(unspec:VI248_VLBW
  [(match_operand:VI248_VLBW 1 "register_operand" "0")
   (match_operand:VI248_VLBW 2 "register_operand" "v")
-  (ma

[PATCH] Fix a vbmi2 ICE (PR target/83604)

2017-12-28 Thread Jakub Jelinek
Hi!

These insns don't really need AVX512BW in any way themselves, only their
masked variants might need it for reloading of the mask register, but that
should be covered in builtins.def, doesn't need duplication in sse.md.
For non-masked it causes ICEs, because the builtins properly aren't guarded
with AVX512BW, but the insns incorrectly require that.

Fixed thusly, bootstrapped/regtested on x86_64-linux and i686-linux, ok for
trunk?

2017-12-28  Jakub Jelinek  

PR target/83604
* config/i386/sse.md (VI248_VLBW): Rename to ...
(VI248_AVX512VL): ... this.  Don't guard V32HI with TARGET_AVX512BW.
(vpshrd_, vpshld_,
vpshrdv_, vpshrdv__mask, vpshrdv__maskz,
vpshrdv__maskz_1, vpshldv_, vpshldv__mask,
vpshldv__maskz, vpshldv__maskz_1): Use VI248_AVX512VL
mode iterator instead of VI248_VLBW.

* gcc.target/i386/pr83604.c: New test.

--- gcc/config/i386/sse.md.jj   2017-12-27 19:19:58.081660733 +0100
+++ gcc/config/i386/sse.md  2017-12-27 18:43:32.200347561 +0100
@@ -448,8 +448,8 @@ (define_mode_iterator VI124_AVX2
 (define_mode_iterator VI2_AVX2_AVX512BW
   [(V32HI "TARGET_AVX512BW") (V16HI "TARGET_AVX2") V8HI])
 
-(define_mode_iterator VI248_VLBW
-  [(V32HI "TARGET_AVX512BW") V16SI V8DI
+(define_mode_iterator VI248_AVX512VL
+  [V32HI V16SI V8DI
(V16HI "TARGET_AVX512VL") (V8SI "TARGET_AVX512VL")
(V4DI "TARGET_AVX512VL") (V8HI "TARGET_AVX512VL")
(V4SI "TARGET_AVX512VL") (V2DI "TARGET_AVX512VL")])
@@ -20134,10 +20134,10 @@ (define_insn "vgf2p8mulb_")])
 
 (define_insn "vpshrd_"
-  [(set (match_operand:VI248_VLBW 0 "register_operand" "=v")
-   (unspec:VI248_VLBW
- [(match_operand:VI248_VLBW 1 "register_operand" "v")
-  (match_operand:VI248_VLBW 2 "nonimmediate_operand" "vm")
+  [(set (match_operand:VI248_AVX512VL 0 "register_operand" "=v")
+   (unspec:VI248_AVX512VL
+ [(match_operand:VI248_AVX512VL 1 "register_operand" "v")
+  (match_operand:VI248_AVX512VL 2 "nonimmediate_operand" "vm")
   (match_operand:SI 3 "const_0_to_255_operand" "n")]
  UNSPEC_VPSHRD))]
   "TARGET_AVX512VBMI2"
@@ -20145,10 +20145,10 @@ (define_insn "vpshrd_"
[(set_attr ("prefix") ("evex"))])
 
 (define_insn "vpshld_"
-  [(set (match_operand:VI248_VLBW 0 "register_operand" "=v")
-   (unspec:VI248_VLBW
- [(match_operand:VI248_VLBW 1 "register_operand" "v")
-  (match_operand:VI248_VLBW 2 "nonimmediate_operand" "vm")
+  [(set (match_operand:VI248_AVX512VL 0 "register_operand" "=v")
+   (unspec:VI248_AVX512VL
+ [(match_operand:VI248_AVX512VL 1 "register_operand" "v")
+  (match_operand:VI248_AVX512VL 2 "nonimmediate_operand" "vm")
   (match_operand:SI 3 "const_0_to_255_operand" "n")]
  UNSPEC_VPSHLD))]
   "TARGET_AVX512VBMI2"
@@ -20156,11 +20156,11 @@ (define_insn "vpshld_"
[(set_attr ("prefix") ("evex"))])
 
 (define_insn "vpshrdv_"
-  [(set (match_operand:VI248_VLBW 0 "register_operand" "=v")
-   (unspec:VI248_VLBW
- [(match_operand:VI248_VLBW 1 "register_operand" "0")
-  (match_operand:VI248_VLBW 2 "register_operand" "v")
-  (match_operand:VI248_VLBW 3 "nonimmediate_operand" "vm")]
+  [(set (match_operand:VI248_AVX512VL 0 "register_operand" "=v")
+   (unspec:VI248_AVX512VL
+ [(match_operand:VI248_AVX512VL 1 "register_operand" "0")
+  (match_operand:VI248_AVX512VL 2 "register_operand" "v")
+  (match_operand:VI248_AVX512VL 3 "nonimmediate_operand" "vm")]
  UNSPEC_VPSHRDV))]
   "TARGET_AVX512VBMI2"
   "vpshrdv\t{%3, %2, %0|%0, %2, %3 }"
@@ -20168,12 +20168,12 @@ (define_insn "vpshrdv_"
(set_attr "mode" "")])
 
 (define_insn "vpshrdv__mask"
-  [(set (match_operand:VI248_VLBW 0 "register_operand" "=v")
-   (vec_merge:VI248_VLBW
- (unspec:VI248_VLBW
-   [(match_operand:VI248_VLBW 1 "register_operand" "0")
-(match_operand:VI248_VLBW 2 "register_operand" "v")
-(match_operand:VI248_VLBW 3 "nonimmediate_operand" "vm")]
+  [(set (match_operand:VI248_AVX512VL 0 "register_operand" "=v")
+   (vec_merge:VI248_AVX512VL
+ (unspec:VI248_AVX512VL
+   [(match_operand:VI248_AVX512VL 1 "register_operand" "0")
+(match_operand:VI248_AVX512VL 2 "register_operand" "v")
+(match_operand:VI248_AVX512VL 3 "nonimmediate_operand" "vm")]
UNSPEC_VPSHRDV)
  (match_dup 1)
  (match_operand: 4 "register_operand" "Yk")))]
@@ -20183,10 +20183,10 @@ (define_insn "vpshrdv__mask"
(set_attr "mode" "")])
 
 (define_expand "vpshrdv__maskz"
-  [(match_operand:VI248_VLBW 0 "register_operand")
-   (match_operand:VI248_VLBW 1 "register_operand")
-   (match_operand:VI248_VLBW 2 "register_operand")
-   (match_operand:VI248_VLBW 3 "nonimmediate_operand")
+  [(match_operand:VI248_AVX512VL 0 "register_operand")
+   (match_operand:VI248_AVX512VL 1 "register_operand")
+   (match_operand:VI248_AVX512VL 2 "register_o

[PATCH] GFNI and misc other fixes (PR target/83604)

2017-12-28 Thread Jakub Jelinek
Hi!

Martin reported sse-13.c ICEs without all the options it has in dg-options.
The problem is that the GFNI builtins used incorrect ISA masks and the
headers too.  GFNI has one SSE encoded instruction (but that really needs
SSE2 rather than SSE, because it uses V16QImode which is not enabled just
for SSE), 2 AVX VEC encoded ones (without masking) and then EVEX encoded
ones with masking where for *_mask we sometimes also need AVX512BW in
addition to GFNI + {AVX512VL,AVX512F} in the CPUID column in the pdf.

Of course, such combinations don't really work properly with the current
handling of the builtin masks, so I've finally analyzed all cases where we
combine multiple ISA options in masks and found out that these days
what actually is an exception is where we require one isa or another isa,
rather than both or all 3.  So instead of adding further and further
exceptions, this patch changes the general rule, by default we require
all the listed ISAs to be enabled and have 3 exceptions to that rule
(SSE | 3DNOW_A), (FMA | FMA4) and (SSE4_2 | CRC32), where we are looking for
at least one of those enabled rather than both (but, if these are ored with
other ISA masks, we require one of the two from the pair and all others).

Another thing is that 3 intrinsic headers were missing the boilerplate,
some were using incorrect macros etc.

The new testcase unfortunately still has to require -msse2 -mmmx, because
some intrin headers aren't fully correct in their pragmas (don't enable
MMX where required etc.).

Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?

2017-12-28  Jakub Jelinek  

PR target/83604
* config/i386/i386-builtin.def
(__builtin_ia32_vgf2p8affineinvqb_v64qi,
__builtin_ia32_vgf2p8affineqb_v64qi, __builtin_ia32_vgf2p8mulb_v64qi):
Require also OPTION_MASK_ISA_AVX512F in addition to
OPTION_MASK_ISA_GFNI.
(__builtin_ia32_vgf2p8affineinvqb_v16qi_mask,
__builtin_ia32_vgf2p8affineqb_v16qi_mask): Require
OPTION_MASK_ISA_AVX512VL instead of OPTION_MASK_ISA_SSE in addition
to OPTION_MASK_ISA_GFNI.
(__builtin_ia32_vgf2p8mulb_v32qi_mask): Require
OPTION_MASK_ISA_AVX512VL in addition to OPTION_MASK_ISA_GFNI and
OPTION_MASK_ISA_AVX512BW.
(__builtin_ia32_vgf2p8mulb_v16qi_mask): Require
OPTION_MASK_ISA_AVX512VL instead of OPTION_MASK_ISA_AVX512BW in
addition to OPTION_MASK_ISA_GFNI.
(__builtin_ia32_vgf2p8affineinvqb_v16qi,
__builtin_ia32_vgf2p8affineqb_v16qi, __builtin_ia32_vgf2p8mulb_v16qi):
Require OPTION_MASK_ISA_SSE2 instead of OPTION_MASK_ISA_SSE in addition
to OPTION_MASK_ISA_GFNI.
* config/i386/i386.c (def_builtin): Change to builtin isa/isa2 being
a requirement for all ISAs rather than any of them with a few
exceptions.
(ix86_add_new_builtins): Clear OPTION_MASK_ISA_64BIT from isa before
processing.
(ix86_expand_builtin): Require all ISAs from builtin's isa and isa2
bitmasks to be enabled with 3 exceptions, instead of requiring any
enabled ISA with lots of exceptions.
* config/i386/sse.md (vgf2p8affineinvqb_,
vgf2p8affineqb_, vgf2p8mulb_):
Change avx512bw in isa attribute to avx512f.
* config/i386/sgxintrin.h: Add license boilerplate.
* config/i386/vaesintrin.h: Likewise.  Fix macro spelling __AVX512F
to __AVX512F__ and __AVX512VL to __AVX512VL__.
(_mm256_aesdec_epi128, _mm256_aesdeclast_epi128, _mm256_aesenc_epi128,
_mm256_aesenclast_epi128): Enable temporarily avx if __AVX__ is not
defined.
* config/i386/gfniintrin.h (_mm_gf2p8mul_epi8,
_mm_gf2p8affineinv_epi64_epi8, _mm_gf2p8affine_epi64_epi8): Enable
temporarily sse2 rather than sse if not enabled already.

* gcc.target/i386/sse-26.c: New test.

--- gcc/config/i386/i386-builtin.def.jj 2017-12-25 12:58:49.860181619 +0100
+++ gcc/config/i386/i386-builtin.def2017-12-28 10:18:41.997233122 +0100
@@ -2530,24 +2530,24 @@ BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPT
 BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 
CODE_FOR_vpshldv_v2di_maskz, "__builtin_ia32_vpshldv_v2di_maskz", 
IX86_BUILTIN_VPSHLDVV2DI_MASKZ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_INT)
 
 /* GFNI */
-BDESC (OPTION_MASK_ISA_GFNI, CODE_FOR_vgf2p8affineinvqb_v64qi, 
"__builtin_ia32_vgf2p8affineinvqb_v64qi", IX86_BUILTIN_VGF2P8AFFINEINVQB512, 
UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_INT)
+BDESC (OPTION_MASK_ISA_GFNI | OPTION_MASK_ISA_AVX512F, 
CODE_FOR_vgf2p8affineinvqb_v64qi, "__builtin_ia32_vgf2p8affineinvqb_v64qi", 
IX86_BUILTIN_VGF2P8AFFINEINVQB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_INT)
 BDESC (OPTION_MASK_ISA_GFNI | OPTION_MASK_ISA_AVX512BW, 
CODE_FOR_vgf2p8affineinvqb_v64qi_mask, 
"__builtin_ia32_vgf2p8affineinvqb_v64qi_mask", 
IX86_BUILTIN_VGF2P8AFFINEINVQB512MASK, UNKNOWN, (int) 
V64QI_FTYPE_V64QI_V64QI_INT_V64QI_UDI)
 BDESC (O

Re: [Patch, fortran] PR83076 - [8 Regression] ICE in gfc_deallocate_scalar_with_status, at fortran/trans.c:1598

2017-12-28 Thread Paul Richard Thomas
Hi All,

OK - I'll hold back until I hear from Damian & Zaak.

Cheers

Paul

On 27 December 2017 at 21:06, Damian Rouson
 wrote:
>
> Thanks for the additional information Thomas. It sounds like I should test 
> Paul’s patch. I should be able to do so today and will post the results by 
> tomorrow. I’m adding OpenCoarrays developer Zaak Beekman to the cc and 
> attaching the patch again in case he wants to try it as well.
>
> Zaak, the full thread is at https://gcc.gnu.org/ml/fortran/ and starts with a 
> message from Paul on November 29.
>
> Damian
>
> On December 27, 2017 at 11:09:29 AM, Thomas Koenig 
> (tkoe...@netcologne.de(mailto:tkoe...@netcologne.de)) wrote:
>
>> Hi Damian,
>>
>> > Does breaking binary compatibility simply mean that CAF codes will need to 
>> > be recompiled (which is fine)
>>
>> Well... not really. We are not supposed to break binary compatibility
>> in a release. For gcc-8, we have greater freedom because we had to
>> do it anyway.
>>
>> Now, the interesting question is the impact. If we break binary
>> compatibilty for something that never worked anyway or was useless, or
>> something that was broken by a gcc-7 regression, I think we're fine.
>>
>> If not, well... one possible decision would be to wait for gcc-8 to
>> fix this.
>>
>> > or does it mean that there will need to be work done on OpenCoarrays
>> to support the changes
>>
>> This, I don't know, never having looked at the OpenCoarrays source.
>>
>> Regards
>>
>> Thomas



-- 
"If you can't explain it simply, you don't understand it well enough"
- Albert Einstein


Re: [Patch, fortran] PR83076 - [8 Regression] ICE in gfc_deallocate_scalar_with_status, at fortran/trans.c:1598

2017-12-28 Thread Andre Vehreschild
Hi all,

as long as the computation where the token can be found is adapted in the same
way, i.e. the token's offset in the derived type monitors the changed position,
everything is fine. When I remember correctly, then this is done
automatically by the routines setting up the caf_ref-chain for referencing into
coarrays of derived type's (trans-intrinsic.c:~1239 for example). So if
everything works, ok for trunk and gcc-7.


Regards,
Andre

On Thu, 28 Dec 2017 11:37:00 +
Paul Richard Thomas  wrote:

> Hi All,
> 
> OK - I'll hold back until I hear from Damian & Zaak.
> 
> Cheers
> 
> Paul
> 
> On 27 December 2017 at 21:06, Damian Rouson
>  wrote:
> >
> > Thanks for the additional information Thomas. It sounds like I should test
> > Paul’s patch. I should be able to do so today and will post the results by
> > tomorrow. I’m adding OpenCoarrays developer Zaak Beekman to the cc and
> > attaching the patch again in case he wants to try it as well.
> >
> > Zaak, the full thread is at https://gcc.gnu.org/ml/fortran/ and starts with
> > a message from Paul on November 29.
> >
> > Damian
> >
> > On December 27, 2017 at 11:09:29 AM, Thomas Koenig
> > (tkoe...@netcologne.de(mailto:tkoe...@netcologne.de)) wrote: 
> >> Hi Damian,
> >>  
> >> > Does breaking binary compatibility simply mean that CAF codes will need
> >> > to be recompiled (which is fine)  
> >>
> >> Well... not really. We are not supposed to break binary compatibility
> >> in a release. For gcc-8, we have greater freedom because we had to
> >> do it anyway.
> >>
> >> Now, the interesting question is the impact. If we break binary
> >> compatibilty for something that never worked anyway or was useless, or
> >> something that was broken by a gcc-7 regression, I think we're fine.
> >>
> >> If not, well... one possible decision would be to wait for gcc-8 to
> >> fix this.
> >>  
> >> > or does it mean that there will need to be work done on OpenCoarrays  
> >> to support the changes
> >>
> >> This, I don't know, never having looked at the OpenCoarrays source.
> >>
> >> Regards
> >>
> >> Thomas  
> 
> 
> 


-- 
Andre Vehreschild * Email: vehre ad gmx dot de 


[PATCH] Fix gcc.target/i386/avx512vpopcntdqvl-vpopcnt*-1.c FAILs

2017-12-28 Thread Jakub Jelinek
Hi!

Binutils had vpopcnt[dq] support since ~ January, but only for the 512-bit
instructions, only in ~ October further support for the AVX512VPOPCNTDQ |
AVX512VL instructions has been added.  So, if one is using gas in between
those two, these two tests FAIL to assemble.

Fixed thusly (tests will be UNSUPPORTED not just with as that doesn't
support vpopcnt[dq] at all, but also one that only supports
vpopcnt[dq] %zmmN, ..., but should work with more recent binutils), regtested
on x86_64-linux, ok for trunk?

2017-12-28  Jakub Jelinek  

* gcc.target/i386/i386.exp
(check_effective_target_avx512vpopcntdqvl): New proc.
* gcc.target/i386/avx512vpopcntdqvl-vpopcntd-1.c: Use
avx512vpopcntdqvl effective target rather than avx512vpopcntdq.
* gcc.target/i386/avx512vpopcntdqvl-vpopcntq-1.c: Likewise.

--- gcc/testsuite/gcc.target/i386/i386.exp.jj   2017-12-22 14:00:02.809638667 
+0100
+++ gcc/testsuite/gcc.target/i386/i386.exp  2017-12-28 16:09:25.702051624 
+0100
@@ -410,6 +410,19 @@ proc check_effective_target_avx512vpopcn
 } "-mavx512vpopcntdq" ]
 }
 
+# Return 1 if avx512_vpopcntdq & avx512vl instructions can be compiled.
+proc check_effective_target_avx512vpopcntdqvl { } {
+return [check_no_compiler_messages avx512vpopcntdqvl object {
+typedef int __v8si __attribute__ ((__vector_size__ (32)));
+
+__v8si
+_mm256_popcnt_epi32 (__v8si __A)
+{
+return (__v8si) __builtin_ia32_vpopcountd_v8si ((__v8si) __A);
+}
+} "-mavx512vpopcntdq -mavx512vl" ]
+}
+
 # Return 1 if gfni instructions can be compiled.
 proc check_effective_target_gfni { } {
 return [check_no_compiler_messages gfni object {
--- gcc/testsuite/gcc.target/i386/avx512vpopcntdqvl-vpopcntd-1.c.jj 
2017-12-22 14:00:02.785638973 +0100
+++ gcc/testsuite/gcc.target/i386/avx512vpopcntdqvl-vpopcntd-1.c
2017-12-28 16:12:20.427156937 +0100
@@ -1,7 +1,7 @@
 /* { dg-do run } */
 /* { dg-options "-O2 -mavx512vpopcntdq -mavx512bw -mavx512vl" } */
 /* { dg-require-effective-target avx512vl } */
-/* { dg-require-effective-target avx512vpopcntdq } */
+/* { dg-require-effective-target avx512vpopcntdqvl } */
 /* { dg-require-effective-target avx512bw } */
 
 #define AVX512VL
--- gcc/testsuite/gcc.target/i386/avx512vpopcntdqvl-vpopcntq-1.c.jj 
2017-12-22 14:00:02.784638986 +0100
+++ gcc/testsuite/gcc.target/i386/avx512vpopcntdqvl-vpopcntq-1.c
2017-12-28 16:12:28.269161404 +0100
@@ -1,7 +1,7 @@
 /* { dg-do run } */
 /* { dg-options "-O2 -mavx512vl -mavx512vpopcntdq" } */
 /* { dg-require-effective-target avx512vl } */
-/* { dg-require-effective-target avx512vpopcntdq } */
+/* { dg-require-effective-target avx512vpopcntdqvl } */
 
 #define AVX512VL
 #define AVX512F_LEN 256

Jakub


[libgomp, openacc, openmp, PR83046] Prune removed funcs from offload table

2017-12-28 Thread Tom de Vries

Hi,

Consider this openmp example:
...
/* { dg-do link } */

#define N 100

int
main ()
{
  int a[N];
  int i, x;
  int c;

  c = 1;
#pragma omp target
  for (i = 0; i < 100; i++)
a[i] = 0;

  if (c)
__builtin_unreachable ();

#pragma omp target
  for (i = 0; i < 100; i++)
a[i] = 1;

  return 0;
}
...

At ompexp, there are two offloaded functions, main._omp_fn.0 and 
main._omp_fn.1:

...
   :
  c = 1;
  ...
  __builtin_GOMP_target_ext (-1, main._omp_fn.0, 2, &.omp_data_arr.2,
 &.omp_data_sizes.3, &.omp_data_kinds.4,
 0, 0B, &.omp_target_args.9);
  ...
  if (c != 0)
goto ; [INV]
  else
goto ; [INV]

   :
  __builtin_unreachable ();

  :
  ...
  __builtin_GOMP_target_ext (-1, main._omp_fn.1, 2, &.omp_data_arr.5,
 &.omp_data_sizes.6, &.omp_data_kinds.7, 0,
 0B, &.omp_target_args.8);
...

But after cpp1, the reference to main._omp_fn.1 in main is removed:
...
  __builtin_GOMP_target_ext (-1, main._omp_fn.0, 2, &.omp_data_arr.2,
 &.omp_data_sizes.3, &.omp_data_kinds.4,
 0, 0B, &.omp_target_args.9);
  __builtin_unreachable ();
...
Consequently, during free-fnsummary, the cgraph_node for main._omp_fn.1 
is removed.


However, the main._omp_fn.1 function is still present in the offload 
table offload_funcs.  This causes an ICE in lto1 when we're trying 
access the cgraph_node* for main._omp_fn.1, which is NULL:


lto1: internal compiler error: Segmentation fault
0xab73cf crash_signal
gcc/toplev.c:325
0x94f694 cgraph_node::mark_force_output()
gcc/cgraph.h:3140
0x94dfda input_offload_tables(bool)
gcc/lto-cgraph.c:1940
0x5aa19f read_cgraph_and_symbols
gcc/lto/lto.c:2872
0x5aa19f lto_main()
gcc/lto/lto.c:3323
...

The ICE can be triggered for both openmp and openacc.

This patch fixes the ICE by removing entries from offload_funcs that no 
longer have corresponding cgraph_nodes.


Bootstrapped and reg-tested on x86_64.
Build and reg-tested on x86_64 with nvptx accelerator.

OK for trunk?

Thanks,
- Tom
Prune removed funcs from offload table

2017-12-27  Tom de Vries  

	PR libgomp/83046
	* lto-cgraph.c (output_offload_tables): Remove offload_funcs entries
	that no longer have a corresponding cgraph_node.

	* testsuite/libgomp.oacc-c-c++-common/pr83046.c: New test.
	* testsuite/libgomp.c-c++-common/pr83046.c: New test.

---
 gcc/lto-cgraph.c   | 10 +
 libgomp/testsuite/libgomp.c-c++-common/pr83046.c   | 25 ++
 .../testsuite/libgomp.oacc-c-c++-common/pr83046.c  | 25 ++
 3 files changed, 60 insertions(+)

diff --git a/gcc/lto-cgraph.c b/gcc/lto-cgraph.c
index ed3df15b143..6bef2d974a6 100644
--- a/gcc/lto-cgraph.c
+++ b/gcc/lto-cgraph.c
@@ -,6 +,16 @@ output_offload_tables (void)
   struct lto_simple_output_block *ob
 = lto_create_simple_output_block (LTO_section_offload_table);
 
+  for (unsigned i = 0; i < vec_safe_length (offload_funcs);)
+{
+  if (!cgraph_node::get ((*offload_funcs)[i]))
+	{
+	  offload_funcs->ordered_remove (i);
+	  continue;
+	}
+  i++;
+}
+
   for (unsigned i = 0; i < vec_safe_length (offload_funcs); i++)
 {
   streamer_write_enum (ob->main_stream, LTO_symtab_tags,
diff --git a/libgomp/testsuite/libgomp.c-c++-common/pr83046.c b/libgomp/testsuite/libgomp.c-c++-common/pr83046.c
new file mode 100644
index 000..90dcb704fb3
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c-c++-common/pr83046.c
@@ -0,0 +1,25 @@
+/* { dg-do link } */
+
+#define N 100
+
+int
+main ()
+{
+  int a[N];
+  int i, x;
+  int c;
+
+  c = 1;
+#pragma omp target
+  for (i = 0; i < 100; i++)
+a[i] = 0;
+
+  if (c)
+__builtin_unreachable ();
+
+#pragma omp target
+  for (i = 0; i < 100; i++)
+a[i] = 1;
+
+  return 0;
+}
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/pr83046.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/pr83046.c
new file mode 100644
index 000..a2a085c5fb2
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/pr83046.c
@@ -0,0 +1,25 @@
+/* { dg-do link } */
+
+#define N 100
+
+int
+main ()
+{
+  int a[N];
+  int i, x;
+  int c;
+
+  c = 1;
+#pragma acc parallel loop
+  for (i = 0; i < 100; i++)
+a[i] = 0;
+
+  if (c)
+__builtin_unreachable ();
+
+#pragma acc parallel loop
+  for (i = 0; i < 100; i++)
+a[i] = 1;
+
+  return 0;
+}


Re: [libgomp, openacc, openmp, PR83046] Prune removed funcs from offload table

2017-12-28 Thread Jakub Jelinek
On Thu, Dec 28, 2017 at 04:53:29PM +0100, Tom de Vries wrote:
> --- a/gcc/lto-cgraph.c
> +++ b/gcc/lto-cgraph.c
> @@ -,6 +,16 @@ output_offload_tables (void)
>struct lto_simple_output_block *ob
>  = lto_create_simple_output_block (LTO_section_offload_table);
>  
> +  for (unsigned i = 0; i < vec_safe_length (offload_funcs);)
> +{
> +  if (!cgraph_node::get ((*offload_funcs)[i]))
> + {
> +   offload_funcs->ordered_remove (i);
> +   continue;
> + }
> +  i++;
> +}

This has O(n^2) complexity for n == vec_safe_length (offload_funcs).
Can't you instead just have 2 IVs, one for where we read the vector elt and
one for where we write it if the 2 are different, then truncate the vector
if needed at the end?

Another thing, I think you can safely remove elts from the vector (== from
the host and offloading target arrays) only when !flag_lto, because we rely
on the two arrays being the same.  So you can't remove elts only on the host
and not on the device, or vice versa.  The output_offload_tables function
has:
  /* In WHOPR mode during the WPA stage the joint offload tables need to be
 streamed to one partition only.  That's why we free offload_funcs and
 offload_vars after the first call of output_offload_tables.  */
  if (flag_wpa)
{
  vec_free (offload_funcs);
  vec_free (offload_vars);
}
so at least with flag_wpa, if we remove anything in there, it won't be
reflected by the other tables.  So, can we do something different in case
we can't easily remove stuff from the vector anymore?  Either store some
placeholder in the tables (dunno if NULL would work or what), or instead
ensure corresponding functions can't be removed?

Jakub


Re: [libgomp, openacc, openmp, PR83046] Prune removed funcs from offload table

2017-12-28 Thread Jakub Jelinek
On Thu, Dec 28, 2017 at 05:06:57PM +0100, Jakub Jelinek wrote:
> This has O(n^2) complexity for n == vec_safe_length (offload_funcs).
> Can't you instead just have 2 IVs, one for where we read the vector elt and
> one for where we write it if the 2 are different, then truncate the vector
> if needed at the end?
> 
> Another thing, I think you can safely remove elts from the vector (== from
> the host and offloading target arrays) only when !flag_lto, because we rely
> on the two arrays being the same.  So you can't remove elts only on the host
> and not on the device, or vice versa.  The output_offload_tables function
> has:
>   /* In WHOPR mode during the WPA stage the joint offload tables need to be
>  streamed to one partition only.  That's why we free offload_funcs and
>  offload_vars after the first call of output_offload_tables.  */
>   if (flag_wpa)
> {
>   vec_free (offload_funcs);
>   vec_free (offload_vars);
> }
> so at least with flag_wpa, if we remove anything in there, it won't be
> reflected by the other tables.  So, can we do something different in case
> we can't easily remove stuff from the vector anymore?  Either store some
> placeholder in the tables (dunno if NULL would work or what), or instead
> ensure corresponding functions can't be removed?

Maybe this removal if (!flag_lto) could be done earlier, e.g. at the
beginning of lto_output, and for nodes we keep around in the table
past that point set DECL_PRESERVE_P to 1 on the fndecl, so that we then
stream that flag.

Jakub


m68k: clear cc_status in ashrdi3_const1, lshrdi3_const1

2017-12-28 Thread Andreas Schwab
The ashrdi3_const1 and lshrdi3_const1 insns don't produce a useful CC
status.  This fixes the 20_util/to_chars/1.cc test in the libstdc++
testsuite.

Andreas.

* config/m68k/m68k.md (ashrdi3_const1, lshrdi3_const1): Add
CC_STATUS_INIT.

diff --git a/gcc/config/m68k/m68k.md b/gcc/config/m68k/m68k.md
index 628e3889bb..a3a0f197db 100644
--- a/gcc/config/m68k/m68k.md
+++ b/gcc/config/m68k/m68k.md
@@ -4683,6 +4683,7 @@
   "!TARGET_COLDFIRE"
 {
   operands[1] = gen_rtx_REG (SImode, REGNO (operands[0]) + 1);
+  CC_STATUS_INIT;
   return "asr%.l #1,%0\;roxr%.l #1,%1";
 })
 
@@ -4919,7 +4920,10 @@
(lshiftrt:DI (match_operand:DI 1 "register_operand" "0")
 (const_int 1)))]
   "!TARGET_COLDFIRE"
-  "lsr%.l #1,%0\;roxr%.l #1,%R0")
+{
+  CC_STATUS_INIT;
+  return "lsr%.l #1,%0\;roxr%.l #1,%R0";
+})
 
 (define_split
   [(set (match_operand:DI 0 "register_operand" "")
-- 
2.15.1


-- 
Andreas Schwab, sch...@linux-m68k.org
GPG Key fingerprint = 58CA 54C7 6D53 942B 1756  01D3 44D5 214B 8276 4ED5
"And now for something completely different."


[PATCH] PR fortran/83344 Don't set bogus constant value

2017-12-28 Thread Janne Blomqvist
This patch does not fix PR 83344, but merely fixes an error where we
used to set a constant character length value from a non-constant
expression, and thus set it to some bogus value.

As a result of this, I have commented out part of the associate_22.f90
test which otherwise generates a warning message.

Regtested on x86_64-pc-linux-gnu, unless there are objections I'll
commit this to trunk in a few days?

gcc/fortran/ChangeLog:

2017-12-28  Janne Blomqvist  

PR fortran/83344
* resolve.c (resolve_assoc_var): Don't set the constant value
unless the target is a constant expression.

gcc/testsuite/ChangeLog:

2017-12-28  Janne Blomqvist  

PR fortran/83344
* gfortran.dg/associate_22.f90: Comment out part of test.
---
 gcc/fortran/resolve.c  | 3 ++-
 gcc/testsuite/gfortran.dg/associate_22.f90 | 9 +
 2 files changed, 7 insertions(+), 5 deletions(-)

diff --git a/gcc/fortran/resolve.c b/gcc/fortran/resolve.c
index f819b71..cf75a78 100644
--- a/gcc/fortran/resolve.c
+++ b/gcc/fortran/resolve.c
@@ -8627,7 +8627,8 @@ resolve_assoc_var (gfc_symbol* sym, bool resolve_target)
   if (!sym->ts.u.cl)
sym->ts.u.cl = target->ts.u.cl;
 
-  if (!sym->ts.u.cl->length && !sym->ts.deferred)
+  if (!sym->ts.u.cl->length && !sym->ts.deferred
+ && target->expr_type == EXPR_CONSTANT)
sym->ts.u.cl->length
  = gfc_get_int_expr (gfc_default_integer_kind,
  NULL, target->value.character.length);
diff --git a/gcc/testsuite/gfortran.dg/associate_22.f90 
b/gcc/testsuite/gfortran.dg/associate_22.f90
index 1558992..edf5932 100644
--- a/gcc/testsuite/gfortran.dg/associate_22.f90
+++ b/gcc/testsuite/gfortran.dg/associate_22.f90
@@ -24,10 +24,11 @@ program foo
end associate
 
! This failed.
-   a = trim(s) // 'abc'
-   associate(w => trim(s) // 'abc')
-  if (trim(w) /= trim(a)) call abort
-   end associate
+   ! This still doesn't work correctly, see PR 83344
+!   a = trim(s) // 'abc'
+!   associate(w => trim(s) // 'abc')
+!  if (trim(w) /= trim(a)) call abort
+!   end associate
 
! This failed.
associate(x => trim('abc'))
-- 
2.7.4



Re: [PATCH] PR fortran/83344 Don't set bogus constant value

2017-12-28 Thread Steve Kargl
On Thu, Dec 28, 2017 at 07:37:44PM +0200, Janne Blomqvist wrote:
> This patch does not fix PR 83344, but merely fixes an error where we
> used to set a constant character length value from a non-constant
> expression, and thus set it to some bogus value.
> 
> As a result of this, I have commented out part of the associate_22.f90
> test which otherwise generates a warning message.
> 
> Regtested on x86_64-pc-linux-gnu, unless there are objections I'll
> commit this to trunk in a few days?
> 

This looks ok to me.

-- 
Steve


Re: [Patch, fortran] PR83076 - [8 Regression] ICE in gfc_deallocate_scalar_with_status, at fortran/trans.c:1598

2017-12-28 Thread Damian Rouson
I applied the patch the trunk and confirmed that it doesn’t break any previously
passing OpenCoarrays tests.  Is that sufficient or should I also try applying 
the 
patch to the 7 branch?

Damian




RFA: Fix REG_ARGS_SIZE handling when pushing TLS addresses

2017-12-28 Thread Richard Sandiford
Andreas Schwab  writes:
> On Dez 23 2017, Richard Sandiford  wrote:
>> gcc/
>>  * expr.c (fixup_args_size_notes): Check that any existing
>>  REG_ARGS_SIZE notes are correct, and don't try to re-add them.
>>  (emit_single_push_insn_1): Move stack_pointer_delta adjustment to...
>>  (emit_single_push_insn): ...here.
>
> Successfully regtested on m68k-linux.

Thanks.  Now also tested on aarch64-linux-gnu, x86_64-linux-gnu and
powerpc64-linux-gnu (not that that will give mucn coverage).  Also
tested with a before-and-after comparison of testsuite output for
a range of targets.  OK to install?

Richard


The new assert in add_args_size_note triggered for gcc.dg/tls/opt-3.c
and other on m68k.  This looks like a pre-existing bug: if we pushed
a value that needs a call to something like __tls_get_addr, we ended
up with two different REG_ARGS_SIZE notes on the same instruction.

It seems to be OK for emit_single_push_insn to push something that
needs a call to __tls_get_addr:

  /* We have to allow non-call_pop patterns for the case
 of emit_single_push_insn of a TLS address.  */
  if (GET_CODE (pat) != PARALLEL)
return 0;

so I think the bug is in the way this is handled rather than the fact
that it occurs at all.

If we're pushing a value X that needs a call C to calculate, we'll
add REG_ARGS_SIZE notes to the pushes and pops for C as part of the
call sequence.  Then emit_single_push_insn calls fixup_args_size_notes
on the whole push sequence (the calculation of X, including C,
and the push of X itself).  This is where the double notes came from.
But emit_single_push_insn_1 adjusted stack_pointer_delta *before* the
push, so the notes added for C were relative to the situation after
the future push of X rather than before it.

Presumably this didn't matter in practice because the note added
second tended to trump the note added first.  But code is allowed to
walk REG_NOTES without having to disregard secondary notes.

2017-12-23  Richard Sandiford  

gcc/
* expr.c (fixup_args_size_notes): Check that any existing
REG_ARGS_SIZE notes are correct, and don't try to re-add them.
(emit_single_push_insn_1): Move stack_pointer_delta adjustment to...
(emit_single_push_insn): ...here.

Index: gcc/expr.c
===
--- gcc/expr.c  2017-12-23 09:29:20.226338285 +
+++ gcc/expr.c  2017-12-23 09:29:45.783339673 +
@@ -4089,6 +4089,14 @@ fixup_args_size_notes (rtx_insn *prev, r
   if (!NONDEBUG_INSN_P (insn))
continue;
 
+  /* We might have existing REG_ARGS_SIZE notes, e.g. when pushing
+a call argument containing a TLS address that itself requires
+a call to __tls_get_addr.  The handling of stack_pointer_delta
+in emit_single_push_insn is supposed to ensure that any such
+notes are already correct.  */
+  rtx note = find_reg_note (insn, REG_ARGS_SIZE, NULL_RTX);
+  gcc_assert (!note || known_eq (args_size, get_args_size (note)));
+
   poly_int64 this_delta = find_args_size_adjust (insn);
   if (known_eq (this_delta, 0))
{
@@ -4102,7 +4110,8 @@ fixup_args_size_notes (rtx_insn *prev, r
   if (known_eq (this_delta, HOST_WIDE_INT_MIN))
saw_unknown = true;
 
-  add_args_size_note (insn, args_size);
+  if (!note)
+   add_args_size_note (insn, args_size);
   if (STACK_GROWS_DOWNWARD)
this_delta = -poly_uint64 (this_delta);
 
@@ -4126,7 +4135,6 @@ emit_single_push_insn_1 (machine_mode mo
   rtx dest;
   enum insn_code icode;
 
-  stack_pointer_delta += PUSH_ROUNDING (GET_MODE_SIZE (mode));
   /* If there is push pattern, use it.  Otherwise try old way of throwing
  MEM representing push operation to move expander.  */
   icode = optab_handler (push_optab, mode);
@@ -4213,6 +4221,14 @@ emit_single_push_insn (machine_mode mode
 
   emit_single_push_insn_1 (mode, x, type);
 
+  /* Adjust stack_pointer_delta to describe the situation after the push
+ we just performed.  Note that we must do this after the push rather
+ than before the push in case calculating X needs pushes and pops of
+ its own (e.g. if calling __tls_get_addr).  The REG_ARGS_SIZE notes
+ for such pushes and pops must not include the effect of the future
+ push of X.  */
+  stack_pointer_delta += PUSH_ROUNDING (GET_MODE_SIZE (mode));
+
   last = get_last_insn ();
 
   /* Notice the common case where we emitted exactly one insn.  */


[committed] Use valid_for_const_vector_p instead of CONSTANT_P

2017-12-28 Thread Richard Sandiford
This patch makes the VEC_SERIES code use valid_for_const_vector_p
instead of CONSTANT_P, to match what we already do for VEC_DUPLICATE.
This showed up as a failure in gcc.c-torture/execute/pr28982b.c for -m32
on x86_64-linux-gnu after later patches.

Tested on aarch64-linux-gnu, x86_64-linux-gnu and powerpc64-linux-gnu,
and with a before-and-after comparison of testsuite output for a range
of targets.  Applied as obvious.

Richard


2017-12-28  Richard Sandiford  

gcc/
* emit-rtl.c (gen_const_vec_series): Use valid_for_const_vector_p
instead of CONSTANT_P.
(gen_vec_series): Likewise.
* simplify-rtx.c (simplify_binary_operation_1): Likewise.

Index: gcc/emit-rtl.c
===
--- gcc/emit-rtl.c  2017-12-23 09:29:19.465366872 +
+++ gcc/emit-rtl.c  2017-12-28 16:55:49.382742760 +
@@ -5949,7 +5949,8 @@ const_vec_series_p_1 (const_rtx x, rtx *
 rtx
 gen_const_vec_series (machine_mode mode, rtx base, rtx step)
 {
-  gcc_assert (CONSTANT_P (base) && CONSTANT_P (step));
+  gcc_assert (valid_for_const_vector_p (mode, base)
+ && valid_for_const_vector_p (mode, step));
 
   int nunits = GET_MODE_NUNITS (mode);
   rtvec v = rtvec_alloc (nunits);
@@ -5970,7 +5971,8 @@ gen_vec_series (machine_mode mode, rtx b
 {
   if (step == const0_rtx)
 return gen_vec_duplicate (mode, base);
-  if (CONSTANT_P (base) && CONSTANT_P (step))
+  if (valid_for_const_vector_p (mode, base)
+  && valid_for_const_vector_p (mode, step))
 return gen_const_vec_series (mode, base, step);
   return gen_rtx_VEC_SERIES (mode, base, step);
 }
Index: gcc/simplify-rtx.c
===
--- gcc/simplify-rtx.c  2017-12-23 09:29:19.493365821 +
+++ gcc/simplify-rtx.c  2017-12-28 16:55:49.383742722 +
@@ -3590,7 +3590,8 @@ simplify_binary_operation_1 (enum rtx_co
 case VEC_SERIES:
   if (op1 == CONST0_RTX (GET_MODE_INNER (mode)))
return gen_vec_duplicate (mode, op0);
-  if (CONSTANT_P (op0) && CONSTANT_P (op1))
+  if (valid_for_const_vector_p (mode, op0)
+ && valid_for_const_vector_p (mode, op1))
return gen_const_vec_series (mode, op0, op1);
   return 0;
 


[committed] [rs6000] Use gen_int_mode in ieee_128bit_negative_zero

2017-12-28 Thread Richard Sandiford
Previously we'd generate a non-canonical zero-extended CONST_INT
instead of a sign-extended one, which tripped the assert for
canonical CONST_INTs after a later patch.

Tested on powerpc64le-linux-gnu and installed as obvious.

Richard


2017-12-28  Richard Sandiford  

gcc/
* config/powerpcspe/powerpcspe.md (ieee_128bit_negative_zero): Use
gen_int_mode rather than GEN_INT.
* config/rs6000/rs6000.md (ieee_128bit_negative_zero): Likewise.

Index: gcc/config/powerpcspe/powerpcspe.md
===
--- gcc/config/powerpcspe/powerpcspe.md 2017-09-04 11:50:08.531037669 +0100
+++ gcc/config/powerpcspe/powerpcspe.md 2017-12-28 16:56:59.791383685 +
@@ -8113,7 +8113,7 @@ (define_expand "ieee_128bit_negative_zer
 RTVEC_ELT (v, i) = const0_rtx;
 
   high = (BYTES_BIG_ENDIAN) ? 0 : 15;
-  RTVEC_ELT (v, high) = GEN_INT (0x80);
+  RTVEC_ELT (v, high) = gen_int_mode (0x80, QImode);
 
   rs6000_expand_vector_init (operands[0], gen_rtx_PARALLEL (V16QImode, v));
   DONE;
Index: gcc/config/rs6000/rs6000.md
===
--- gcc/config/rs6000/rs6000.md 2017-12-07 18:38:16.674205190 +
+++ gcc/config/rs6000/rs6000.md 2017-12-28 16:56:59.792384020 +
@@ -8055,7 +8055,7 @@ (define_expand "ieee_128bit_negative_zer
 RTVEC_ELT (v, i) = const0_rtx;
 
   high = (BYTES_BIG_ENDIAN) ? 0 : 15;
-  RTVEC_ELT (v, high) = GEN_INT (0x80);
+  RTVEC_ELT (v, high) = gen_int_mode (0x80, QImode);
 
   rs6000_expand_vector_init (operands[0], gen_rtx_PARALLEL (V16QImode, v));
   DONE;


[0/4] Use new vector constant encoding for CONST_VECTOR

2017-12-28 Thread Richard Sandiford
This series of patches converts CONST_VECTOR to use the same encoding
scheme as VECTOR_CST.  I'd originally being holding it back until the
vec_perm series had been processed, but that was probably a mistake.

The main difference from VECTOR_CST is that we still include all the
elements for fixed-length vectors, to help with code that operates
directly on XVEC and XVECEXP.  See the covering note of patch 2 for
more details.

Tested on aarch64-linux-gnu, x86_64-linux-gnu and powerpc64el-linux-gnu.
Also tested by comparing the before-and-after assembly output for at
least one target per CPU directory.  OK to install?

Thanks,
Richard

[FWIW, the vec_perm patches are the only ones holding up the rest of
the poly_int ones.]


[1/4] Use CONST_VECTOR_ELT instead of XVECEXP

2017-12-28 Thread Richard Sandiford
This patch replaces target-independent uses of XVECEXP with uses
of CONST_VECTOR_ELT.  This kind of replacement isn't necessary
for code specific to targets other than AArch64.


2017-12-28  Richard Sandiford  

gcc/
* simplify-rtx.c (simplify_const_binary_operation): Use
CONST_VECTOR_ELT instead of XVECEXP.

Index: gcc/simplify-rtx.c
===
--- gcc/simplify-rtx.c  2017-12-21 18:36:04.778333159 +
+++ gcc/simplify-rtx.c  2017-12-22 13:08:49.287660877 +
@@ -4070,9 +4070,9 @@ simplify_const_binary_operation (enum rt
  gcc_assert (op0_n_elts + op1_n_elts == n_elts);
 
  for (i = 0; i < op0_n_elts; ++i)
-   RTVEC_ELT (v, i) = XVECEXP (op0, 0, i);
+   RTVEC_ELT (v, i) = CONST_VECTOR_ELT (op0, i);
  for (i = 0; i < op1_n_elts; ++i)
-   RTVEC_ELT (v, op0_n_elts+i) = XVECEXP (op1, 0, i);
+   RTVEC_ELT (v, op0_n_elts+i) = CONST_VECTOR_ELT (op1, i);
}
 
   return gen_rtx_CONST_VECTOR (mode, v);


[2/4] New CONST_VECTOR layout

2017-12-28 Thread Richard Sandiford
This patch makes CONST_VECTOR use the same encoding as VECTOR_CST.

One problem that occurs in RTL but not at the tree level is that a fair
amount of code uses XVEC and XVECEXP directly on CONST_VECTORs (which is
valid, just with looser checking).  This is complicated by the fact that
vectors are also represented as PARALLELs in some target interfaces,
so using XVECEXP is a good polymorphic way of handling both forms.

Rather than try to untangle all that, the best approach seemed to be to
continue to encode every element in a fixed-length vector.  That way only
target-independent and AArch64 code need to be precise about using
CONST_VECTOR_ELT over XVECEXP.

After this change is no longer valid to modify CONST_VECTORs in-place.
This needed some fix-up in the powerpc backends.


2017-12-28  Richard Sandiford  

gcc/
* doc/rtl.texi (const_vector): Describe new encoding scheme.
* Makefile.in (OBJS): Add rtx-vector-builder.o.
* rtx-vector-builder.h: New file.
* rtx-vector-builder.c: Likewise.
* rtl.h (rtx_def::u2): Add a const_vector field.
(CONST_VECTOR_NPATTERNS): New macro.
(CONST_VECTOR_NELTS_PER_PATTERN): Likewise.
(CONST_VECTOR_DUPLICATE_P): Likewise.
(CONST_VECTOR_STEPPED_P): Likewise.
(CONST_VECTOR_ENCODED_ELT): Likewise.
(const_vec_duplicate_p): Check for a duplicated vector encoding.
(unwrap_const_vec_duplicate): Likewise.
(const_vec_series_p): Check for a non-duplicated vector encoding.
Say that the function only returns true for integer vectors.
* emit-rtl.c: Include rtx-vector-builder.h.
(gen_const_vec_duplicate_1): Delete.
(gen_const_vector): Call gen_const_vec_duplicate instead of
gen_const_vec_duplicate_1.
(const_vec_series_p_1): Operate directly on the CONST_VECTOR encoding.
(gen_const_vec_duplicate): Use rtx_vector_builder.
(gen_const_vec_series): Likewise.
(gen_rtx_CONST_VECTOR): Likewise.
* config/powerpcspe/powerpcspe.c: Include rtx-vector-builder.h.
(swap_const_vector_halves): Take an rtx pointer rather than rtx.
Build a new vector rather than modifying a CONST_VECTOR in-place.
(handle_special_swappables): Update call accordingly.
* config/rs6000/rs6000-p8swap.c: Include rtx-vector-builder.h.
(swap_const_vector_halves): Take an rtx pointer rather than rtx.
Build a new vector rather than modifying a CONST_VECTOR in-place.
(handle_special_swappables): Update call accordingly.

Index: gcc/doc/rtl.texi
===
--- gcc/doc/rtl.texi2017-12-22 16:54:11.793940998 +
+++ gcc/doc/rtl.texi2017-12-28 16:58:43.018987597 +
@@ -1644,18 +1644,92 @@ low-level routines) and @code{const_poly
 
 @findex const_vector
 @item (const_vector:@var{m} [@var{x0} @var{x1} @dots{}])
-Represents a vector constant.  The square brackets stand for the vector
-containing the constant elements.  @var{x0}, @var{x1} and so on are
-the @code{const_int}, @code{const_wide_int}, @code{const_double} or
-@code{const_fixed} elements.
-
-The number of units in a @code{const_vector} is obtained with the macro
-@code{CONST_VECTOR_NUNITS} as in @code{CONST_VECTOR_NUNITS (@var{v})}.
-
-Individual elements in a vector constant are accessed with the macro
-@code{CONST_VECTOR_ELT} as in @code{CONST_VECTOR_ELT (@var{v}, @var{n})}
-where @var{v} is the vector constant and @var{n} is the element
-desired.
+Represents a vector constant.  The values in square brackets are
+elements of the vector, which are always @code{const_int},
+@code{const_wide_int}, @code{const_double} or @code{const_fixed}
+expressions.
+
+Each vector constant @var{v} is treated as a specific instance of an
+arbitrary-length sequence that itself contains
+@samp{CONST_VECTOR_NPATTERNS (@var{v})} interleaved patterns.  Each
+pattern has the form:
+
+@smallexample
+@{ @var{base0}, @var{base1}, @var{base1} + @var{step}, @var{base1} + 
@var{step} * 2, @dots{} @}
+@end smallexample
+
+The first three elements in each pattern are enough to determine the
+values of the other elements.  However, if all @var{step}s are zero,
+only the first two elements are needed.  If in addition each @var{base1}
+is equal to the corresponding @var{base0}, only the first element in
+each pattern is needed.  The number of determining elements per pattern
+is given by @samp{CONST_VECTOR_NELTS_PER_PATTERN (@var{v})}.
+
+For example, the constant:
+
+@smallexample
+@{ 0, 1, 2, 6, 3, 8, 4, 10, 5, 12, 6, 14, 7, 16, 8, 18 @}
+@end smallexample
+
+is interpreted as an interleaving of the sequences:
+
+@smallexample
+@{ 0, 2, 3, 4, 5, 6, 7, 8 @}
+@{ 1, 6, 8, 10, 12, 14, 16, 18 @}
+@end smallexample
+
+where the sequences are represented by the following patterns:
+
+@smallexample
+@var{base0} == 0, @var{base1} == 2, @var{step} == 1
+@var{base0} == 1, @var{base1} == 6, @var{step} == 2
+@end smallex

[3/4] Make more use of rtx_vector_builder

2017-12-28 Thread Richard Sandiford
This patch makes various bits of CONST_VECTOR-building code use
rtx_vector_builder, operating directly on a specific encoding.


2017-12-28  Richard Sandiford  

gcc/
* expr.c: Include rtx-vector-builder.h.
(const_vector_mask_from_tree): Use rtx_vector_builder and operate
directly on the tree encoding.
(const_vector_from_tree): Likewise.
* optabs.c: Include rtx-vector-builder.h.
(expand_vec_perm_var): Use rtx_vector_builder and create a repeating
sequence of "u" values.
* vec-perm-indices.c: Include rtx-vector-builder.h.
(vec_perm_indices_to_rtx): Use rtx_vector_builder and operate
directly on the vec_perm_indices encoding.

Index: gcc/expr.c
===
--- gcc/expr.c  2017-12-22 12:58:44.518127920 +
+++ gcc/expr.c  2017-12-22 13:09:48.535709302 +
@@ -61,6 +61,7 @@ Software Foundation; either version 3, o
 #include "tree-chkp.h"
 #include "rtl-chkp.h"
 #include "ccmp.h"
+#include "rtx-vector-builder.h"
 
 
 /* If this is nonzero, we do not bother generating VOLATILE
@@ -11761,32 +11762,25 @@ try_tablejump (tree index_type, tree ind
 static rtx
 const_vector_mask_from_tree (tree exp)
 {
-  rtvec v;
-  unsigned i, units;
-  tree elt;
-  machine_mode inner, mode;
-
-  mode = TYPE_MODE (TREE_TYPE (exp));
-  units = VECTOR_CST_NELTS (exp);
-  inner = GET_MODE_INNER (mode);
-
-  v = rtvec_alloc (units);
+  machine_mode mode = TYPE_MODE (TREE_TYPE (exp));
+  machine_mode inner = GET_MODE_INNER (mode);
 
-  for (i = 0; i < units; ++i)
+  rtx_vector_builder builder (mode, VECTOR_CST_NPATTERNS (exp),
+ VECTOR_CST_NELTS_PER_PATTERN (exp));
+  unsigned int count = builder.encoded_nelts ();
+  for (unsigned int i = 0; i < count; ++i)
 {
-  elt = VECTOR_CST_ELT (exp, i);
-
+  tree elt = VECTOR_CST_ELT (exp, i);
   gcc_assert (TREE_CODE (elt) == INTEGER_CST);
   if (integer_zerop (elt))
-   RTVEC_ELT (v, i) = CONST0_RTX (inner);
+   builder.quick_push (CONST0_RTX (inner));
   else if (integer_onep (elt)
   || integer_minus_onep (elt))
-   RTVEC_ELT (v, i) = CONSTM1_RTX (inner);
+   builder.quick_push (CONSTM1_RTX (inner));
   else
gcc_unreachable ();
 }
-
-  return gen_rtx_CONST_VECTOR (mode, v);
+  return builder.build ();
 }
 
 /* EXP is a VECTOR_CST in which each element is either all-zeros or all-ones.
@@ -11816,12 +11810,7 @@ const_scalar_mask_from_tree (scalar_int_
 static rtx
 const_vector_from_tree (tree exp)
 {
-  rtvec v;
-  unsigned i, units;
-  tree elt;
-  machine_mode inner, mode;
-
-  mode = TYPE_MODE (TREE_TYPE (exp));
+  machine_mode mode = TYPE_MODE (TREE_TYPE (exp));
 
   if (initializer_zerop (exp))
 return CONST0_RTX (mode);
@@ -11829,27 +11818,25 @@ const_vector_from_tree (tree exp)
   if (VECTOR_BOOLEAN_TYPE_P (TREE_TYPE (exp)))
 return const_vector_mask_from_tree (exp);
 
-  units = VECTOR_CST_NELTS (exp);
-  inner = GET_MODE_INNER (mode);
-
-  v = rtvec_alloc (units);
+  machine_mode inner = GET_MODE_INNER (mode);
 
-  for (i = 0; i < units; ++i)
+  rtx_vector_builder builder (mode, VECTOR_CST_NPATTERNS (exp),
+ VECTOR_CST_NELTS_PER_PATTERN (exp));
+  unsigned int count = builder.encoded_nelts ();
+  for (unsigned int i = 0; i < count; ++i)
 {
-  elt = VECTOR_CST_ELT (exp, i);
-
+  tree elt = VECTOR_CST_ELT (exp, i);
   if (TREE_CODE (elt) == REAL_CST)
-   RTVEC_ELT (v, i) = const_double_from_real_value (TREE_REAL_CST (elt),
-inner);
+   builder.quick_push (const_double_from_real_value (TREE_REAL_CST (elt),
+ inner));
   else if (TREE_CODE (elt) == FIXED_CST)
-   RTVEC_ELT (v, i) = CONST_FIXED_FROM_FIXED_VALUE (TREE_FIXED_CST (elt),
-inner);
+   builder.quick_push (CONST_FIXED_FROM_FIXED_VALUE (TREE_FIXED_CST (elt),
+ inner));
   else
-   RTVEC_ELT (v, i) = immed_wide_int_const (wi::to_poly_wide (elt),
-inner);
+   builder.quick_push (immed_wide_int_const (wi::to_poly_wide (elt),
+ inner));
 }
-
-  return gen_rtx_CONST_VECTOR (mode, v);
+  return builder.build ();
 }
 
 /* Build a decl for a personality function given a language prefix.  */
Index: gcc/optabs.c
===
--- gcc/optabs.c2017-12-22 13:06:03.092620276 +
+++ gcc/optabs.c2017-12-22 13:09:48.535709302 +
@@ -33,6 +33,7 @@ Software Foundation; either version 3, o
 #include "emit-rtl.h"
 #include "recog.h"
 #include "diagnostic-core.h"
+#include "rtx-vector-builder.h"
 
 /* Include insn-config.h before expr.h so that HAVE

[4/4] Make CONST_VECTOR_ELT handle implicitly-encoded elements

2017-12-28 Thread Richard Sandiford
This patch makes CONST_VECTOR_ELT handle implicitly-encoded elements,
in a similar way to VECTOR_CST_ELT.


2017-12-28  Richard Sandiford  

gcc/
* rtl.h (CONST_VECTOR_ELT): Redefine to const_vector_elt.
(const_vector_encoded_nelts): New function.
(CONST_VECTOR_NUNITS): Redefine to use GET_MODE_NUNITS.
(const_vector_int_elt, const_vector_elt): Declare.
* emit-rtl.c (const_vector_int_elt_1): New function.
(const_vector_elt): Likewise.
* simplify-rtx.c (simplify_immed_subreg): Avoid taking the address
of CONST_VECTOR_ELT.

Index: gcc/rtl.h
===
--- gcc/rtl.h   2017-12-22 13:12:00.662528577 +
+++ gcc/rtl.h   2017-12-22 13:12:00.834520649 +
@@ -1969,7 +1969,7 @@ #define CONST_FIXED_VALUE_LOW(r) \
   ((HOST_WIDE_INT) (CONST_FIXED_VALUE (r)->data.low))
 
 /* For a CONST_VECTOR, return element #n.  */
-#define CONST_VECTOR_ELT(RTX, N) XCVECEXP (RTX, 0, N, CONST_VECTOR)
+#define CONST_VECTOR_ELT(RTX, N) const_vector_elt (RTX, N)
 
 #define CONST_VECTOR_NPATTERNS(RTX) \
  (RTL_FLAG_CHECK1 ("CONST_VECTOR_NPATTERNS", (RTX), CONST_VECTOR) \
@@ -1987,8 +1987,16 @@ #define CONST_VECTOR_STEPPED_P(RTX) \
 
 #define CONST_VECTOR_ENCODED_ELT(RTX, N) XCVECEXP (RTX, 0, N, CONST_VECTOR)
 
+/* Return the number of elements encoded directly in a CONST_VECTOR.  */
+
+inline unsigned int
+const_vector_encoded_nelts (const_rtx x)
+{
+  return CONST_VECTOR_NPATTERNS (x) * CONST_VECTOR_NELTS_PER_PATTERN (x);
+}
+
 /* For a CONST_VECTOR, return the number of elements in a vector.  */
-#define CONST_VECTOR_NUNITS(RTX) XCVECLEN (RTX, 0, CONST_VECTOR)
+#define CONST_VECTOR_NUNITS(RTX) GET_MODE_NUNITS (GET_MODE (RTX))
 
 /* For a SUBREG rtx, SUBREG_REG extracts the value we want a subreg of.
SUBREG_BYTE extracts the byte-number.  */
@@ -3000,6 +3008,8 @@ unwrap_const_vec_duplicate (T x)
 }
 
 /* In emit-rtl.c.  */
+extern wide_int const_vector_int_elt (const_rtx, unsigned int);
+extern rtx const_vector_elt (const_rtx, unsigned int);
 extern bool const_vec_series_p_1 (const_rtx, rtx *, rtx *);
 
 /* Return true if X is an integer constant vector that contains a linear
Index: gcc/emit-rtl.c
===
--- gcc/emit-rtl.c  2017-12-22 13:11:58.364634475 +
+++ gcc/emit-rtl.c  2017-12-22 13:12:00.833520695 +
@@ -5862,6 +5862,62 @@ init_emit (void)
 #endif
 }
 
+/* Return the value of element I of CONST_VECTOR X as a wide_int.  */
+
+wide_int
+const_vector_int_elt (const_rtx x, unsigned int i)
+{
+  /* First handle elements that are directly encoded.  */
+  machine_mode elt_mode = GET_MODE_INNER (GET_MODE (x));
+  if (i < (unsigned int) XVECLEN (x, 0))
+return rtx_mode_t (CONST_VECTOR_ENCODED_ELT (x, i), elt_mode);
+
+  /* Identify the pattern that contains element I and work out the index of
+ the last encoded element for that pattern.  */
+  unsigned int encoded_nelts = const_vector_encoded_nelts (x);
+  unsigned int npatterns = CONST_VECTOR_NPATTERNS (x);
+  unsigned int count = i / npatterns;
+  unsigned int pattern = i % npatterns;
+  unsigned int final_i = encoded_nelts - npatterns + pattern;
+
+  /* If there are no steps, the final encoded value is the right one.  */
+  if (!CONST_VECTOR_STEPPED_P (x))
+return rtx_mode_t (CONST_VECTOR_ENCODED_ELT (x, final_i), elt_mode);
+
+  /* Otherwise work out the value from the last two encoded elements.  */
+  rtx v1 = CONST_VECTOR_ENCODED_ELT (x, final_i - npatterns);
+  rtx v2 = CONST_VECTOR_ENCODED_ELT (x, final_i);
+  wide_int diff = wi::sub (rtx_mode_t (v2, elt_mode),
+  rtx_mode_t (v1, elt_mode));
+  return wi::add (rtx_mode_t (v2, elt_mode), (count - 2) * diff);
+}
+
+/* Return the value of element I of CONST_VECTOR X.  */
+
+rtx
+const_vector_elt (const_rtx x, unsigned int i)
+{
+  /* First handle elements that are directly encoded.  */
+  if (i < (unsigned int) XVECLEN (x, 0))
+return CONST_VECTOR_ENCODED_ELT (x, i);
+
+  /* If there are no steps, the final encoded value is the right one.  */
+  if (!CONST_VECTOR_STEPPED_P (x))
+{
+  /* Identify the pattern that contains element I and work out the index of
+the last encoded element for that pattern.  */
+  unsigned int encoded_nelts = const_vector_encoded_nelts (x);
+  unsigned int npatterns = CONST_VECTOR_NPATTERNS (x);
+  unsigned int pattern = i % npatterns;
+  unsigned int final_i = encoded_nelts - npatterns + pattern;
+  return CONST_VECTOR_ENCODED_ELT (x, final_i);
+}
+
+  /* Otherwise work out the value from the last two encoded elements.  */
+  return immed_wide_int_const (const_vector_int_elt (x, i),
+  GET_MODE_INNER (GET_MODE (x)));
+}
+
 /* Return true if X is a valid element for a CONST_VECTOR of the given
   mode.  */
 
Index: gcc/simplify-rtx.c
===
-

Re: [PATCH] Add _Float/_FloatX rounding built-ins & improve gimple optimization of _Float/_FloatX built-in functions

2017-12-28 Thread Michael Meissner
On Thu, Dec 21, 2017 at 06:16:16PM +, Joseph Myers wrote:
> On Fri, 17 Nov 2017, Michael Meissner wrote:
> 
> > Here is the fixed patch.  It fixes the btrunc2 insn to use the correct
> > XSRPQI variant for truncf128.  I added the float128-hw11.c test as a runtime
> > test to make sure round, trunc, ceil, and floor return the correct values.  
> > The
> > machine independent portions are the same.
> 
> The architecture-independent changes are OK.  However, I have a comment on 
> the target parts:

Ok, I have committed the machine independent patches, and I will revise the
machine dependent patches, and add more tests.  Thanks for the review.

-- 
Michael Meissner, IBM
IBM, M/S 2506R, 550 King Street, Littleton, MA 01460-6245, USA
email: meiss...@linux.vnet.ibm.com, phone: +1 (978) 899-4797



Re: [patch, lingfortran] Bug 83560 - list-directed formatting of INTEGER is missing plus on output

2017-12-28 Thread Jerry DeLisle
On 12/25/2017 12:06 PM, Jerry DeLisle wrote:
> On 12/25/2017 05:10 AM, Dominique d'Humières wrote:
>> Dear Jerry,
>>
>> The lines
>>
>> +a=12.3456
>>
>> and
>>
>> +open(unit=10,sign='plus')
>>
>> in gfortran.dg/integer_plus.f90 could probably be removed.
>>
> 
> Yes, left over from some other testing I was doing
> 
>> From comment 2 in the PR (and the attached test), it seems that the reporter 
>> is expecting sign=‘plus’ to apply also to namelists, which is not the case 
>> with this patch.
>>
>> This seems supported by (my understanding of)
>>
>>> 10.11.4.2 Namelist output editing
>>>
>>> 1 Values in namelist output records are edited as for list-directed output 
>>> (10.10.4).
>>
>> Merry Christmas!
>>
>> Dominique
>>
>>
> 
> What I did last night made perfect sense at the time. Now, your point well
> taken. The previous write_integer suppressed leading spaces nicely for writing
> repeat counts, write_decimal does not do this directly. I am going to have to 
> be
> careful we don't put plus signs on repeat counts.
> 
> Merry Christmas to you and all!
> 
> Jerry

The attached patch adds the "plus" functionality to namelist writes. I had to
adjust write_decimal to not emit leading blanks and instead make them trailing
(in namelist mode). Our namelist read functions do not like spaces between the
repeat symbol and the plus sign. This required minor modification to two test
cases.  I got rid of the namelist_write_integer from my previous patch.

(I will do testsuite ChangeLog at time of commit.

Regression tested on x86_64-pc-linux-gnu.

OK for trunk?

Jerry

2017-12-28  Jerry DeLisle  

PR libgfortran/83560
* io/write.c (write_integer): Modify to use write_decimal.
For namelist mode, suppress leading blanks and emit them as
trailing blanks. Change parameter from len to kind for better
readability. (nml_write_obj): Fix comment style.


diff --git a/gcc/testsuite/gfortran.dg/integer_plus.f90 b/gcc/testsuite/gfortran.dg/integer_plus.f90
new file mode 100644
index 000..695f9d34621
--- /dev/null
+++ b/gcc/testsuite/gfortran.dg/integer_plus.f90
@@ -0,0 +1,12 @@
+! { dg-run run )
+! PR83560 list-directed formatting of INTEGER is missing plus on output
+! when output open with SIGN='PLUS'
+character(64) :: astring
+i=789
+open(unit=10, status='scratch', sign='plus')
+write(10,*) i
+rewind(10)
+read(10,*) astring
+close (10)
+if (astring.ne.'+789') call abort
+end
diff --git a/gcc/testsuite/gfortran.dg/namelist_53.f90 b/gcc/testsuite/gfortran.dg/namelist_53.f90
index d4fdf574e0e..9e5692abe6a 100644
--- a/gcc/testsuite/gfortran.dg/namelist_53.f90
+++ b/gcc/testsuite/gfortran.dg/namelist_53.f90
@@ -5,5 +5,5 @@
   n = 123
   line = ""
   write(line,nml=stuff)
-  if (line.ne."&STUFF  N=123,  /") call abort
+  if (line.ne."&STUFF  N=123,  /") print *, line
   end 
diff --git a/gcc/testsuite/gfortran.dg/namelist_57.f90 b/gcc/testsuite/gfortran.dg/namelist_57.f90
index 7db4c4bb83c..a110fa0d840 100644
--- a/gcc/testsuite/gfortran.dg/namelist_57.f90
+++ b/gcc/testsuite/gfortran.dg/namelist_57.f90
@@ -7,6 +7,6 @@
   line = ""
   write(line,nml=stuff)
   if (line(1) .ne. "&STUFF") call abort
-  if (line(2) .ne. " N=123,") call abort
+  if (line(2) .ne. " N=123,") call abort
   if (line(3) .ne. " /") call abort
   end 
diff --git a/libgfortran/io/write.c b/libgfortran/io/write.c
index 926d510f4d7..19e53ebdeb8 100644
--- a/libgfortran/io/write.c
+++ b/libgfortran/io/write.c
@@ -870,8 +870,11 @@ write_decimal (st_parameter_dt *dtp, const fnode *f, const char *source,
 	  goto done;
 	}
 
-  memset4 (p4, ' ', nblank);
-  p4 += nblank;
+  if (!dtp->u.p.namelist_mode)
+	{
+	  memset4 (p4, ' ', nblank);
+	  p4 += nblank;
+	}
 
   switch (sign)
 	{
@@ -890,6 +893,12 @@ write_decimal (st_parameter_dt *dtp, const fnode *f, const char *source,
 
   memcpy4 (p4, q, digits);
   return;
+
+  if (dtp->u.p.namelist_mode)
+	{
+	  p4 += digits;
+	  memset4 (p4, ' ', nblank);
+	}
 }
 
   if (nblank < 0)
@@ -898,8 +907,11 @@ write_decimal (st_parameter_dt *dtp, const fnode *f, const char *source,
   goto done;
 }
 
-  memset (p, ' ', nblank);
-  p += nblank;
+  if (!dtp->u.p.namelist_mode)
+{
+  memset (p, ' ', nblank);
+  p += nblank;
+}
 
   switch (sign)
 {
@@ -918,6 +930,12 @@ write_decimal (st_parameter_dt *dtp, const fnode *f, const char *source,
 
   memcpy (p, q, digits);
 
+  if (dtp->u.p.namelist_mode)
+{
+  p += digits;
+  memset (p, ' ', nblank);
+}
+
  done:
   return;
 }
@@ -1300,17 +1318,12 @@ write_logical (st_parameter_dt *dtp, const char *source, int length)
 /* Write a list-directed integer value.  */
 
 static void
-write_integer (st_parameter_dt *dtp, const char *source, int length)
+write_integer (st_parameter_dt *dtp, const char *source, int kind)
 {
-  char *p;
-  const char *q;
-  int digits;
   int width;
-  char itoa_buf[GFC_ITOA_BUF_SIZE];
-
-  q = gfc_itoa (extrac

Re: [nvptx, committed] Disable -gstatement-frontiers for nvptx

2017-12-28 Thread Alexandre Oliva
On Dec 27, 2017, Tom de Vries  wrote:

> .loc file_index line_number column_position

> so this causes ptxas errors when compiling something for nvptx with
> -g, which breaks the nvptx build.

What do the errors look like?

I ask because the patches that actually change the generated debug info,
adding view to .loc lines when the assembler supports them, are yet to
be installed, in the patches that introduces LVUs.  No significant
changes have been made to dwarf2out in the SFN patchset so far.

Furthermore, even with the LVU patch, .loc directives with view numbers
would only be used if the assembler is detected as supporting them at
compiler build time.

-- 
Alexandre Oliva, freedom fighterhttp://FSFLA.org/~lxoliva/
You must be the change you wish to see in the world. -- Gandhi
Be Free! -- http://FSFLA.org/   FSF Latin America board member
Free Software Evangelist|Red Hat Brasil GNU Toolchain Engineer


Re: [PATCH] Add _Float/_FloatX rounding built-ins & improve gimple optimization of _Float/_FloatX built-in functions

2017-12-28 Thread Michael Meissner
On Thu, Dec 21, 2017 at 01:03:26PM -0600, Segher Boessenkool wrote:
> On Thu, Dec 21, 2017 at 06:16:16PM +, Joseph Myers wrote:
> > On Fri, 17 Nov 2017, Michael Meissner wrote:
> > The architecture-independent changes are OK.  However, I have a comment on 
> > the target parts:
> > 
> > > +(define_insn "round2"
> > > +  [(set (match_operand:IEEE128 0 "altivec_register_operand" "=v")
> > > + (unspec:IEEE128
> > > +  [(match_operand:IEEE128 1 "altivec_register_operand" "v")]
> > > +  UNSPEC_FRIN))]
> > > +  "TARGET_FLOAT128_HW && FLOAT128_IEEE_P (mode)"
> > > +  "xsrqpi 0,%0,%1,3"
> > > +  [(set_attr "type" "vecfloat")
> > > +   (set_attr "size" "128")])
> > 
> > My reading of Power ISA 3.0B documentation is that 0,%0,%1,3 means round 
> > in the mode specified by FPSCR and you need 0,%0,%1,0 for 
> > round-to-nearest-away semantics which are what the round2 
> > instruction has (i.e., what you've written here is actually correct for 
> > nearbyint2, and would be rint2 if xsrqpix were used instead).  
> 
> Ah yes, the roundM2 insn is round-away-from-zero, so you are right.
> Tricky, from the name I assumed it would be "current rounding mode" :-/
> Not that "frin" would make sense if that were true.
> 
> Thanks!  And thanks for all the reviews in general.

Here is the corrected rs6000 part of the patch.  I added more round tests and I
checked it on a power9 prototype machine.  Roundf128 now produces the correct
answer.  Can I check this into the trunk?

[gcc]
2017-12-29  Michael Meissner  

* config/rs6000/rs6000.md (floor2): Add support for IEEE
128-bit round to integer instructions.
(ceil2): Likewise.
(btrunc2): Likewise.
(round2): Likewise.

[gcc/testsuite]
2017-12-29  Michael Meissner  

* gcc.target/powerpc/float128-hw2.c: Add tests for ceilf128,
floorf128, truncf128, and roundf128.
* gcc.target/powerpc/float128-hw5.c: New tests for _Float128
optimizations added in match.pd.
* gcc.target/powerpc/float128-hw6.c: Likewise.
* gcc.target/powerpc/float128-hw7.c: Likewise.
* gcc.target/powerpc/float128-hw8.c: Likewise.
* gcc.target/powerpc/float128-hw9.c: Likewise.
* gcc.target/powerpc/float128-hw10.c: Likewise.
* gcc.target/powerpc/float128-hw11.c: Likewise.

-- 
Michael Meissner, IBM
IBM, M/S 2506R, 550 King Street, Littleton, MA 01460-6245, USA
email: meiss...@linux.vnet.ibm.com, phone: +1 (978) 899-4797
Index: gcc/config/rs6000/rs6000.md
===
--- gcc/config/rs6000/rs6000.md (revision 256026)
+++ gcc/config/rs6000/rs6000.md (working copy)
@@ -14777,6 +14777,47 @@ (define_insn_and_split "floatuns2"
+  [(set (match_operand:IEEE128 0 "altivec_register_operand" "=v")
+   (unspec:IEEE128
+[(match_operand:IEEE128 1 "altivec_register_operand" "v")]
+UNSPEC_FRIM))]
+  "TARGET_FLOAT128_HW && FLOAT128_IEEE_P (mode)"
+  "xsrqpi 1,%0,%1,3"
+  [(set_attr "type" "vecfloat")
+   (set_attr "size" "128")])
+
+(define_insn "ceil2"
+  [(set (match_operand:IEEE128 0 "altivec_register_operand" "=v")
+   (unspec:IEEE128
+[(match_operand:IEEE128 1 "altivec_register_operand" "v")]
+UNSPEC_FRIP))]
+  "TARGET_FLOAT128_HW && FLOAT128_IEEE_P (mode)"
+  "xsrqpi 1,%0,%1,2"
+  [(set_attr "type" "vecfloat")
+   (set_attr "size" "128")])
+
+(define_insn "btrunc2"
+  [(set (match_operand:IEEE128 0 "altivec_register_operand" "=v")
+   (unspec:IEEE128
+[(match_operand:IEEE128 1 "altivec_register_operand" "v")]
+UNSPEC_FRIZ))]
+  "TARGET_FLOAT128_HW && FLOAT128_IEEE_P (mode)"
+  "xsrqpi 1,%0,%1,1"
+  [(set_attr "type" "vecfloat")
+   (set_attr "size" "128")])
+
+(define_insn "round2"
+  [(set (match_operand:IEEE128 0 "altivec_register_operand" "=v")
+   (unspec:IEEE128
+[(match_operand:IEEE128 1 "altivec_register_operand" "v")]
+UNSPEC_FRIN))]
+  "TARGET_FLOAT128_HW && FLOAT128_IEEE_P (mode)"
+  "xsrqpi 0,%0,%1,0"
+  [(set_attr "type" "vecfloat")
+   (set_attr "size" "128")])
+
 ;; IEEE 128-bit instructions with round to odd semantics
 (define_insn "add3_odd"
   [(set (match_operand:IEEE128 0 "altivec_register_operand" "=v")
Index: gcc/testsuite/gcc.target/powerpc/float128-hw2.c
===
--- gcc/testsuite/gcc.target/powerpc/float128-hw2.c (revision 256026)
+++ gcc/testsuite/gcc.target/powerpc/float128-hw2.c (working copy)
@@ -14,6 +14,10 @@
 extern _Float128 copysignf128 (_Float128, _Float128);
 extern _Float128 sqrtf128 (_Float128);
 extern _Float128 fmaf128 (_Float128, _Float128, _Float128);
+extern _Float128 ceilf128 (_Float128);
+extern _Float128 floorf128 (_Float128);
+extern _Float128 truncf128 (_Float128);
+extern _Float128 roundf128 (_Float128);
 
 _Float128
 do_copysign (_Float128 a, _Float128 b)
@@ -51,10 +55,35 @@ do_nfms (_Float128 a, _Float128 b, _Floa
   return -fmaf128 (a, b, -c);
 }
 
+_Float128
+do_