Re: [PATCH] s390: Optimize vec_cmpge followed by vec_sel

2023-07-17 Thread Andreas Krebbel via Gcc-patches
On 7/17/23 17:09, Juergen Christ wrote:
> A vec_cmpge produces a negation.  Replace this negation by swapping the two
> selection choices of a vec_sel based on the result of the vec_cmpge.
> 
> Bootstrapped and regression tested on s390x.
> 
> gcc/ChangeLog:
> 
>   * config/s390/vx-builtins.md: New vsel pattern.
> 
> gcc/testsuite/ChangeLog:
> 
>   * gcc.target/s390/vector/vec-cmpge.c: New test.
> 
> Signed-off-by: Juergen Christ 

Committed to mainline. Thanks!

Bye,

Andreas



[Committed] IBM Z: Handle unaligned symbols

2023-08-01 Thread Andreas Krebbel via Gcc-patches
The IBM Z ELF ABI mandates every symbol to reside on a 2 byte boundary
in order to be able to use the larl instruction. However, in some
situations it is difficult to enforce this, e.g. for common linker
scripts as used in the Linux kernel. This patch introduces the
-munaligned-symbols option. When that option is used, external symbols
without an explicit alignment are considered unaligned and its address
will be pushed into GOT or the literal pool.

If the symbol in the final linker step turns out end up on a 2 byte
boundary the linker is able to take this back and replace the indirect
reference with larl again. This should minimize the effect to symbols
which are actually unaligned in the end.

Bootstrapped and regression tested on s390x. Committed to mainline.

Backports to stable branches will follow.

gcc/ChangeLog:

* config/s390/s390.cc (s390_encode_section_info): Assume external
symbols without explicit alignment to be unaligned if
-munaligned-symbols has been specified.
* config/s390/s390.opt (-munaligned-symbols): New option.

gcc/testsuite/ChangeLog:

* gcc.target/s390/aligned-1.c: New test.
* gcc.target/s390/unaligned-1.c: New test.
---
 gcc/config/s390/s390.cc |  9 +++--
 gcc/config/s390/s390.opt|  7 +++
 gcc/testsuite/gcc.target/s390/aligned-1.c   | 20 
 gcc/testsuite/gcc.target/s390/unaligned-1.c | 20 
 4 files changed, 54 insertions(+), 2 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/s390/aligned-1.c
 create mode 100644 gcc/testsuite/gcc.target/s390/unaligned-1.c

diff --git a/gcc/config/s390/s390.cc b/gcc/config/s390/s390.cc
index 13970edcb5e..89474fd487a 100644
--- a/gcc/config/s390/s390.cc
+++ b/gcc/config/s390/s390.cc
@@ -13709,8 +13709,13 @@ s390_encode_section_info (tree decl, rtx rtl, int 
first)
 a larl/load-relative instruction.  We only handle the cases
 that can go wrong (i.e. no FUNC_DECLs).
 All symbols without an explicit alignment are assumed to be 2
-byte aligned as mandated by our ABI.  */
-  if (DECL_USER_ALIGN (decl) && DECL_ALIGN (decl) % 16)
+byte aligned as mandated by our ABI.  This behavior can be
+overridden for external symbols with the -munaligned-symbols
+switch.  */
+  if (DECL_ALIGN (decl) % 16
+ && (DECL_USER_ALIGN (decl)
+ || (!SYMBOL_REF_LOCAL_P (XEXP (rtl, 0))
+ && s390_unaligned_symbols_p)))
SYMBOL_FLAG_SET_NOTALIGN2 (XEXP (rtl, 0));
   else if (DECL_ALIGN (decl) % 32)
SYMBOL_FLAG_SET_NOTALIGN4 (XEXP (rtl, 0));
diff --git a/gcc/config/s390/s390.opt b/gcc/config/s390/s390.opt
index 344aa551f44..496572046f7 100644
--- a/gcc/config/s390/s390.opt
+++ b/gcc/config/s390/s390.opt
@@ -329,3 +329,10 @@ Target Undocumented Var(unroll_only_small_loops) Init(0) 
Save
 mpreserve-args
 Target Var(s390_preserve_args_p) Init(0)
 Store all argument registers on the stack.
+
+munaligned-symbols
+Target Var(s390_unaligned_symbols_p) Init(0)
+Assume external symbols to be potentially unaligned.  By default all
+symbols without explicit alignment are assumed to reside on a 2 byte
+boundary as mandated by the IBM Z ABI.
+
diff --git a/gcc/testsuite/gcc.target/s390/aligned-1.c 
b/gcc/testsuite/gcc.target/s390/aligned-1.c
new file mode 100644
index 000..2dc99cf66bd
--- /dev/null
+++ b/gcc/testsuite/gcc.target/s390/aligned-1.c
@@ -0,0 +1,20 @@
+/* Even symbols without explicite alignment are assumed to reside on a
+   2 byte boundary, as mandated by the IBM Z ELF ABI, and therefore
+   can be accessed using the larl instruction.  */
+
+/* { dg-do compile } */
+/* { dg-options "-O3 -march=z900 -fno-section-anchors" } */
+
+extern unsigned char extern_implicitly_aligned;
+extern unsigned char extern_explicitly_aligned __attribute__((aligned(2)));
+unsigned char aligned;
+
+unsigned char
+foo ()
+{
+  return extern_implicitly_aligned + extern_explicitly_aligned + aligned;
+}
+
+/* { dg-final { scan-assembler-times 
"larl\t%r\[0-9\]*,extern_implicitly_aligned\n" 1 } } */
+/* { dg-final { scan-assembler-times 
"larl\t%r\[0-9\]*,extern_explicitly_aligned\n" 1 } } */
+/* { dg-final { scan-assembler-times "larl\t%r\[0-9\]*,aligned\n" 1 } } */
diff --git a/gcc/testsuite/gcc.target/s390/unaligned-1.c 
b/gcc/testsuite/gcc.target/s390/unaligned-1.c
new file mode 100644
index 000..421330aded1
--- /dev/null
+++ b/gcc/testsuite/gcc.target/s390/unaligned-1.c
@@ -0,0 +1,20 @@
+/* With the -munaligned-symbols option all external symbols without
+   explicite alignment are assumed to be potentially unaligned and
+   therefore cannot be accessed with larl.  */
+
+/* { dg-do compile } */
+/* { dg-options "-O3 -march=z900 -fno-section-anchors -munaligned-symbols" } */
+
+extern unsigned char extern_unaligned;
+extern unsigned char extern_explicitly_aligned __attribute__((aligned(2)));
+unsigned char aligned;
+
+unsigned cha

[PATCH 1/1] PR 106101: IBM zSystems: Fix strict_low_part problem

2022-07-29 Thread Andreas Krebbel via Gcc-patches
This avoids generating illegal (strict_low_part (reg ...)) RTXs. This
required two changes:

1. Do not use gen_lowpart to generate the inner expression of a
STRICT_LOW_PART.  gen_lowpart might fold the SUBREG either because
there is already a paradoxical subreg or because it can directly be
applied to the register. A new wrapper function makes sure that we
always end up having an actual SUBREG.

2. Change the movstrict patterns to enforce a SUBREG as inner operand
of the STRICT_LOW_PARTs.  The new predicate introduced for the
destination operand requires a SUBREG expression with a
register_operand as inner operand.  However, since reload strips away
the majority of the SUBREGs we have to accept single registers as well
once we reach reload.

Bootstrapped and regression tested on IBM zSystems 64 bit.

gcc/ChangeLog:

PR target/106101
* config/s390/predicates.md (subreg_register_operand): New
predicate.
* config/s390/s390-protos.h (s390_gen_lowpart_subreg): New
function prototype.
* config/s390/s390.cc (s390_gen_lowpart_subreg): New function.
(s390_expand_insv): Use s390_gen_lowpart_subreg instead of
gen_lowpart.
* config/s390/s390.md ("*get_tp_64", "*zero_extendhisi2_31")
("*zero_extendqisi2_31", "*zero_extendqihi2_31"): Likewise.
("movstrictqi", "movstricthi", "movstrictsi"): Use the
subreg_register_operand predicate instead of register_operand.

gcc/testsuite/ChangeLog:

PR target/106101
* gcc.c-torture/compile/pr106101.c: New test.
---
 gcc/config/s390/predicates.md | 12 
 gcc/config/s390/s390-protos.h |  1 +
 gcc/config/s390/s390.cc   | 27 +++-
 gcc/config/s390/s390.md   | 36 +--
 .../gcc.c-torture/compile/pr106101.c  | 62 +++
 5 files changed, 116 insertions(+), 22 deletions(-)
 create mode 100644 gcc/testsuite/gcc.c-torture/compile/pr106101.c

diff --git a/gcc/config/s390/predicates.md b/gcc/config/s390/predicates.md
index 33194d3f3d6..430cf6edfd6 100644
--- a/gcc/config/s390/predicates.md
+++ b/gcc/config/s390/predicates.md
@@ -594,3 +594,15 @@
 (define_predicate "addv_const_operand"
   (and (match_code "const_int")
(match_test "INTVAL (op) >= -32768 && INTVAL (op) <= 32767")))
+
+; Match (subreg (reg ...)) operands.
+; Used for movstrict destination operands
+; When replacing pseudos with hard regs reload strips away the
+; subregs. Accept also plain registers then to prevent the insn from
+; becoming unrecognizable.
+(define_predicate "subreg_register_operand"
+  (ior (and (match_code "subreg")
+   (match_test "register_operand (SUBREG_REG (op), GET_MODE 
(SUBREG_REG (op)))"))
+   (and (match_code "reg")
+   (match_test "reload_completed || reload_in_progress")
+   (match_test "register_operand (op, GET_MODE (op))"
diff --git a/gcc/config/s390/s390-protos.h b/gcc/config/s390/s390-protos.h
index fd4acaae44a..765d843a418 100644
--- a/gcc/config/s390/s390-protos.h
+++ b/gcc/config/s390/s390-protos.h
@@ -50,6 +50,7 @@ extern void s390_set_has_landing_pad_p (bool);
 extern bool s390_hard_regno_rename_ok (unsigned int, unsigned int);
 extern int s390_class_max_nregs (enum reg_class, machine_mode);
 extern bool s390_return_addr_from_memory(void);
+extern rtx s390_gen_lowpart_subreg (machine_mode, rtx);
 extern bool s390_fma_allowed_p (machine_mode);
 #if S390_USE_TARGET_ATTRIBUTE
 extern tree s390_valid_target_attribute_tree (tree args,
diff --git a/gcc/config/s390/s390.cc b/gcc/config/s390/s390.cc
index 5aaf76a9490..5e06bf9350c 100644
--- a/gcc/config/s390/s390.cc
+++ b/gcc/config/s390/s390.cc
@@ -458,6 +458,31 @@ s390_return_addr_from_memory ()
   return cfun_gpr_save_slot(RETURN_REGNUM) == SAVE_SLOT_STACK;
 }
 
+/* Generate a SUBREG for the MODE lowpart of EXPR.
+
+   In contrast to gen_lowpart it will always return a SUBREG
+   expression.  This is useful to generate STRICT_LOW_PART
+   expressions.  */
+rtx
+s390_gen_lowpart_subreg (machine_mode mode, rtx expr)
+{
+  rtx lowpart = gen_lowpart (mode, expr);
+
+  /* There might be no SUBREG in case it could be applied to the hard
+ REG rtx or it could be folded with a paradoxical subreg.  Bring
+ it back.  */
+  if (!SUBREG_P (lowpart))
+{
+  machine_mode reg_mode = TARGET_ZARCH ? DImode : SImode;
+  gcc_assert (REG_P (lowpart));
+  lowpart = gen_lowpart_SUBREG (mode,
+   gen_rtx_REG (reg_mode,
+REGNO (lowpart)));
+}
+
+  return lowpart;
+}
+
 /* Return nonzero if it's OK to use fused multiply-add for MODE.  */
 bool
 s390_fma_allowed_p (machine_mode mode)
@@ -6520,7 +6545,7 @@ s390_expand_insv (rtx dest, rtx op1, rtx op2, rtx src)
   /* Emit a strict_low_part pattern if possible.  */
   if (smode_bsize == bitsize && bitpos == mode_bsize - smode_bsize)
{
- rtx low

Re: [PATCH] PR106342 - IBM zSystems: Provide vsel for all vector modes

2022-08-03 Thread Andreas Krebbel via Gcc-patches
On 8/3/22 12:20, Ilya Leoshkevich wrote:
> Bootstrapped and regtested on s390x-redhat-linux.  Ok for master?
> 
> 
> 
> dg.exp=pr104612.c fails with an ICE on s390x, because copysignv2sf3
> produces an insn that vsel is supposed to recognize, but can't,
> because it's not defined for V2SF.  Fix by defining it for all vector
> modes supported by copysign3.
> 
> gcc/ChangeLog:
> 
>   * config/s390/vector.md (V_HW_FT): New iterator.
>   * config/s390/vx-builtins.md (vsel): Use V instead of
>   V_HW.

Ok. There is a typo in the changelog:
"Use *V* instead ..." should probably read "Use V_HW_FT instead ..."

Thanks,

Andreas

> ---
>  gcc/config/s390/vector.md  |  6 ++
>  gcc/config/s390/vx-builtins.md | 12 ++--
>  2 files changed, 12 insertions(+), 6 deletions(-)
> 
> diff --git a/gcc/config/s390/vector.md b/gcc/config/s390/vector.md
> index a6c4b4eb974..624729814af 100644
> --- a/gcc/config/s390/vector.md
> +++ b/gcc/config/s390/vector.md
> @@ -63,6 +63,12 @@
>  V1DF V2DF
>  (V1TF "TARGET_VXE") (TF "TARGET_VXE")])
>  
> +; All modes present in V_HW and VFT.
> +(define_mode_iterator V_HW_FT [V16QI V8HI V4SI V2DI (V1TI "TARGET_VXE") V1DF
> +V2DF (V1SF "TARGET_VXE") (V2SF "TARGET_VXE")
> +(V4SF "TARGET_VXE") (V1TF "TARGET_VXE")
> +(TF "TARGET_VXE")])
> +
>  ; FP vector modes directly supported by the HW.  This does not include
>  ; vector modes using only part of a vector register and should be used
>  ; for instructions which might trigger IEEE exceptions.
> diff --git a/gcc/config/s390/vx-builtins.md b/gcc/config/s390/vx-builtins.md
> index d5130799804..98ee08b2683 100644
> --- a/gcc/config/s390/vx-builtins.md
> +++ b/gcc/config/s390/vx-builtins.md
> @@ -517,12 +517,12 @@
>  ; swapped in s390-c.cc when we get here.
>  
>  (define_insn "vsel"
> -  [(set (match_operand:V_HW  0 "register_operand" "=v")
> - (ior:V_HW
> -  (and:V_HW (match_operand:V_HW   1 "register_operand"  "v")
> -(match_operand:V_HW   3 "register_operand"  "v"))
> -  (and:V_HW (not:V_HW (match_dup 3))
> -(match_operand:V_HW   2 "register_operand"  "v"]
> +  [(set (match_operand:V_HW_FT   0 "register_operand" "=v")
> + (ior:V_HW_FT
> +  (and:V_HW_FT (match_operand:V_HW_FT 1 "register_operand"  "v")
> +   (match_operand:V_HW_FT 3 "register_operand"  "v"))
> +  (and:V_HW_FT (not:V_HW_FT (match_dup 3))
> +   (match_operand:V_HW_FT 2 "register_operand"  "v"]
>"TARGET_VX"
>"vsel\t%v0,%1,%2,%3"
>[(set_attr "op_type" "VRR")])



Re: [PATCH] PR106342 - IBM zSystems: Provide vsel for all vector modes

2022-08-10 Thread Andreas Krebbel via Gcc-patches
On 8/10/22 13:42, Ilya Leoshkevich wrote:
> On Wed, 2022-08-03 at 12:20 +0200, Ilya Leoshkevich wrote:
>> Bootstrapped and regtested on s390x-redhat-linux.  Ok for master?
>>
>>
>>
>> dg.exp=pr104612.c fails with an ICE on s390x, because copysignv2sf3
>> produces an insn that vsel is supposed to recognize, but can't,
>> because it's not defined for V2SF.  Fix by defining it for all vector
>> modes supported by copysign3.
>>
>> gcc/ChangeLog:
>>
>> * config/s390/vector.md (V_HW_FT): New iterator.
>> * config/s390/vx-builtins.md (vsel): Use V instead of
>> V_HW.
>> ---
>>  gcc/config/s390/vector.md  |  6 ++
>>  gcc/config/s390/vx-builtins.md | 12 ++--
>>  2 files changed, 12 insertions(+), 6 deletions(-)
> 
> Jakub pointed out that this is broken in gcc-12 as well.
> The patch applies cleanly, and I started a bootstrap/regtest.
> Ok for gcc-12?

Yes. Thanks!

Andreas


Re: [PATCH] s390: Enable vect_bswap test cases

2023-08-03 Thread Andreas Krebbel via Gcc-patches
On 8/3/23 08:48, Stefan Schulze Frielinghaus wrote:
> This enables the following tests which rely on instruction vperm which
> is available since z13 with the initial vector support.
> 
> testsuite/gcc.dg/vect/vect-bswap16.c
> 42:/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { 
> target { vect_bswap || sse4_runtime } } } } */
> 
> testsuite/gcc.dg/vect/vect-bswap32.c
> 42:/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { 
> target { vect_bswap || sse4_runtime } } } } */
> 
> testsuite/gcc.dg/vect/vect-bswap64.c
> 42:/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { 
> target { vect_bswap || sse4_runtime } } } } */
> 
> Ok for mainline?

Ok. Thanks!

Andreas

> 
> gcc/testsuite/ChangeLog:
> 
>   * lib/target-supports.exp (check_effective_target_vect_bswap):
>   Add s390.
> ---
>  gcc/testsuite/lib/target-supports.exp | 8 +---
>  1 file changed, 5 insertions(+), 3 deletions(-)
> 
> diff --git a/gcc/testsuite/lib/target-supports.exp 
> b/gcc/testsuite/lib/target-supports.exp
> index 4d04df2a709..2ccc0291442 100644
> --- a/gcc/testsuite/lib/target-supports.exp
> +++ b/gcc/testsuite/lib/target-supports.exp
> @@ -7087,9 +7087,11 @@ proc check_effective_target_whole_vector_shift { } {
>  
>  proc check_effective_target_vect_bswap { } {
>  return [check_cached_effective_target_indexed vect_bswap {
> -  expr { [istarget aarch64*-*-*]
> -  || [is-effective-target arm_neon]
> -  || [istarget amdgcn-*-*] }}]
> +  expr { ([istarget aarch64*-*-*]
> +   || [is-effective-target arm_neon]
> +   || [istarget amdgcn-*-*])
> +  || ([istarget s390*-*-*]
> +  && [check_effective_target_s390_vx]) }}]
>  }
>  
>  # Return 1 if the target supports comparison of bool vectors for at



Re: [PATCH] s390: Try to emit vlbr/vstbr instead of vperm et al.

2023-08-03 Thread Andreas Krebbel via Gcc-patches
On 8/3/23 08:51, Stefan Schulze Frielinghaus wrote:
> Bootstrapped and regtested on s390x.  Ok for mainline?
> 
> gcc/ChangeLog:
> 
>   * config/s390/s390.cc (expand_perm_as_a_vlbr_vstbr_candidate):
>   New function which handles bswap patterns for vec_perm_const.
>   (vectorize_vec_perm_const_1): Call new function.
>   * config/s390/vector.md (*bswap): Fix operands in output
>   template.
>   (*vstbr): New insn.
> 
> gcc/testsuite/ChangeLog:
> 
>   * gcc.target/s390/s390.exp: Add subdirectory vxe2.
>   * gcc.target/s390/vxe2/vlbr-1.c: New test.
>   * gcc.target/s390/vxe2/vstbr-1.c: New test.
>   * gcc.target/s390/vxe2/vstbr-2.c: New test.

Ok. Thanks!

Andreas


> ---
>  gcc/config/s390/s390.cc  | 55 
>  gcc/config/s390/vector.md| 16 --
>  gcc/testsuite/gcc.target/s390/s390.exp   |  3 ++
>  gcc/testsuite/gcc.target/s390/vxe2/vlbr-1.c  | 29 +++
>  gcc/testsuite/gcc.target/s390/vxe2/vstbr-1.c | 29 +++
>  gcc/testsuite/gcc.target/s390/vxe2/vstbr-2.c | 42 +++
>  6 files changed, 170 insertions(+), 4 deletions(-)
>  create mode 100644 gcc/testsuite/gcc.target/s390/vxe2/vlbr-1.c
>  create mode 100644 gcc/testsuite/gcc.target/s390/vxe2/vstbr-1.c
>  create mode 100644 gcc/testsuite/gcc.target/s390/vxe2/vstbr-2.c
> 
> diff --git a/gcc/config/s390/s390.cc b/gcc/config/s390/s390.cc
> index d9f10542473..91eb9232b10 100644
> --- a/gcc/config/s390/s390.cc
> +++ b/gcc/config/s390/s390.cc
> @@ -17698,6 +17698,58 @@ expand_perm_with_vstbrq (const struct 
> expand_vec_perm_d &d)
>return false;
>  }
>  
> +/* Try to emit vlbr/vstbr.  Note, this is only a candidate insn since
> +   TARGET_VECTORIZE_VEC_PERM_CONST operates on vector registers only.  Thus,
> +   either fwprop, combine et al. "fixes" one of the input/output operands 
> into
> +   a memory operand or a splitter has to reverse this into a general vperm
> +   operation.  */
> +
> +static bool
> +expand_perm_as_a_vlbr_vstbr_candidate (const struct expand_vec_perm_d &d)
> +{
> +  static const char perm[4][MAX_VECT_LEN]
> += { { 1,  0,  3,  2,  5,  4,  7, 6, 9,  8,  11, 10, 13, 12, 15, 14 },
> + { 3,  2,  1,  0,  7,  6,  5, 4, 11, 10, 9,  8,  15, 14, 13, 12 },
> + { 7,  6,  5,  4,  3,  2,  1, 0, 15, 14, 13, 12, 11, 10, 9,  8  },
> + { 15, 14, 13, 12, 11, 10, 9, 8, 7,  6,  5,  4,  3,  2,  1,  0  } };
> +
> +  if (!TARGET_VXE2 || d.vmode != V16QImode || d.op0 != d.op1)
> +return false;
> +
> +  if (memcmp (d.perm, perm[0], MAX_VECT_LEN) == 0)
> +{
> +  rtx target = gen_rtx_SUBREG (V8HImode, d.target, 0);
> +  rtx op0 = gen_rtx_SUBREG (V8HImode, d.op0, 0);
> +  emit_insn (gen_bswapv8hi (target, op0));
> +  return true;
> +}
> +
> +  if (memcmp (d.perm, perm[1], MAX_VECT_LEN) == 0)
> +{
> +  rtx target = gen_rtx_SUBREG (V4SImode, d.target, 0);
> +  rtx op0 = gen_rtx_SUBREG (V4SImode, d.op0, 0);
> +  emit_insn (gen_bswapv4si (target, op0));
> +  return true;
> +}
> +
> +  if (memcmp (d.perm, perm[2], MAX_VECT_LEN) == 0)
> +{
> +  rtx target = gen_rtx_SUBREG (V2DImode, d.target, 0);
> +  rtx op0 = gen_rtx_SUBREG (V2DImode, d.op0, 0);
> +  emit_insn (gen_bswapv2di (target, op0));
> +  return true;
> +}
> +
> +  if (memcmp (d.perm, perm[3], MAX_VECT_LEN) == 0)
> +{
> +  rtx target = gen_rtx_SUBREG (V1TImode, d.target, 0);
> +  rtx op0 = gen_rtx_SUBREG (V1TImode, d.op0, 0);
> +  emit_insn (gen_bswapv1ti (target, op0));
> +  return true;
> +}
> +
> +  return false;
> +}
>  
>  /* Try to find the best sequence for the vector permute operation
> described by D.  Return true if the operation could be
> @@ -17720,6 +17772,9 @@ vectorize_vec_perm_const_1 (const struct 
> expand_vec_perm_d &d)
>if (expand_perm_with_rot (d))
>  return true;
>  
> +  if (expand_perm_as_a_vlbr_vstbr_candidate (d))
> +return true;
> +
>return false;
>  }
>  
> diff --git a/gcc/config/s390/vector.md b/gcc/config/s390/vector.md
> index 21bec729efa..f0e9ed3d263 100644
> --- a/gcc/config/s390/vector.md
> +++ b/gcc/config/s390/vector.md
> @@ -47,6 +47,7 @@
>  (define_mode_iterator VI_HW [V16QI V8HI V4SI V2DI])
>  (define_mode_iterator VI_HW_QHS [V16QI V8HI V4SI])
>  (define_mode_iterator VI_HW_HSD [V8HI  V4SI V2DI])
> +(define_mode_iterator VI_HW_HSDT [V8HI V4SI V2DI V1TI TI])
>  (define_mode_iterator VI_HW_HS  [V8HI  V4SI])
>  (define_mode_iterator VI_HW_QH  [V16QI V8HI])
>  
> @@ -2876,12 +2877,12 @@
>   (use (match_dup 2))])]
>"TARGET_VX"
>  {
> -  static char p[4][16] =
> +  static const char p[4][16] =
>  { { 1,  0,  3,  2,  5,  4,  7, 6, 9,  8,  11, 10, 13, 12, 15, 14 },   /* 
> H */
>{ 3,  2,  1,  0,  7,  6,  5, 4, 11, 10, 9,  8,  15, 14, 13, 12 },   /* 
> S */
>{ 7,  6,  5,  4,  3,  2,  1, 0, 15, 14, 13, 12, 11, 10, 9,  8  },   /* 
> D */
>{ 15, 14, 13, 12, 11, 10, 9, 8, 7,  6,  5,  4,  3, 

[Committed] IBM zSystems: Assume symbols without explicit alignment to be ok

2023-06-26 Thread Andreas Krebbel via Gcc-patches
A change we have committed back in 2015 relies on the backend
requested ABI alignment to be applied to ALL symbols by the
middle-end. However, this does not appear to be the case for external
symbols. With this commit we assume all symbols without explicit
alignment to be aligned according to the ABI. That's the behavior we
had before.
This fixes a performance regression caused by the 2015 patch. Since
then the address of external char type symbols have been pushed to the
literal pool, although it is safe to access them with larl (which
requires symbols to reside at even addresses).

Bootstrapped and regression tested on s390x.

gcc/
* config/s390/s390.cc (s390_encode_section_info): Set
SYMBOL_FLAG_SET_NOTALIGN2 only if the symbol has explicitely been
misaligned.

gcc/testsuite/
* gcc.target/s390/larl-1.c: New test.
---
 gcc/config/s390/s390.cc|  6 +++--
 gcc/testsuite/gcc.target/s390/larl-1.c | 32 ++
 2 files changed, 36 insertions(+), 2 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/s390/larl-1.c

diff --git a/gcc/config/s390/s390.cc b/gcc/config/s390/s390.cc
index 9284477396d..d9f10542473 100644
--- a/gcc/config/s390/s390.cc
+++ b/gcc/config/s390/s390.cc
@@ -13706,8 +13706,10 @@ s390_encode_section_info (tree decl, rtx rtl, int 
first)
 {
   /* Store the alignment to be able to check if we can use
 a larl/load-relative instruction.  We only handle the cases
-that can go wrong (i.e. no FUNC_DECLs).  */
-  if (DECL_ALIGN (decl) == 0 || DECL_ALIGN (decl) % 16)
+that can go wrong (i.e. no FUNC_DECLs).
+All symbols without an explicit alignment are assumed to be 2
+byte aligned as mandated by our ABI.  */
+  if (DECL_USER_ALIGN (decl) && DECL_ALIGN (decl) % 16)
SYMBOL_FLAG_SET_NOTALIGN2 (XEXP (rtl, 0));
   else if (DECL_ALIGN (decl) % 32)
SYMBOL_FLAG_SET_NOTALIGN4 (XEXP (rtl, 0));
diff --git a/gcc/testsuite/gcc.target/s390/larl-1.c 
b/gcc/testsuite/gcc.target/s390/larl-1.c
new file mode 100644
index 000..5ef2ef63f82
--- /dev/null
+++ b/gcc/testsuite/gcc.target/s390/larl-1.c
@@ -0,0 +1,32 @@
+/* Check if load-address-relative instructions are created */
+
+/* { dg-do compile { target { s390*-*-* } } } */
+/* { dg-options "-O2 -march=z10 -mzarch -fno-section-anchors" } */
+
+/* An explicitely misaligned symbol.  This symbol is NOT aligned as
+   mandated by our ABI.  However, the back-end needs to handle that in
+   order to make things like __attribute__((packed)) work.  The symbol
+   address is expected to be loaded from literal pool.  */
+/* { dg-final { scan-assembler "lgrl\t%r2," { target { lp64 } } } } */
+/* { dg-final { scan-assembler "lrl\t%r2," { target { ! lp64 } } } } */
+extern char align1 __attribute__((aligned(1)));
+
+/* { dg-final { scan-assembler "larl\t%r2,align2" } } */
+extern char align2 __attribute__((aligned(2)));
+
+/* { dg-final { scan-assembler "larl\t%r2,align4" } } */
+extern char align4 __attribute__((aligned(4)));
+
+/* An external char symbol without explicit alignment has a DECL_ALIGN
+   of just 8. In contrast to local definitions DATA_ABI_ALIGNMENT is
+   NOT applied to DECL_ALIGN in that case.  Make sure the backend
+   still assumes this symbol to be aligned according to ABI
+   requirements.  */
+/* { dg-final { scan-assembler "larl\t%r2,align_default" } } */
+extern char align_default;
+
+char * foo1 () { return &align1; }
+char * foo2 () { return &align2; }
+char * foo3 () { return &align4; }
+char * foo4 () { return &align_default; }
+
-- 
2.41.0



Re: [PATCH] s390: Fix vec_init default expander

2023-07-07 Thread Andreas Krebbel via Gcc-patches
On 7/7/23 15:51, Juergen Christ wrote:
> Do not reinitialize vector lanes to zero since they are already initialized to
> zero.
> 
> Bootstrapped and regression tested on s390x.
> 
> gcc/ChangeLog:
> 
>   * config/s390/s390.cc (vec_init): Fix default case
> 
> gcc/Testsuite/ChangeLog:
> 
>   * gcc.target/s390/vector/vec-init-3.c: New test.

Ok. Pushed to mainline. Thanks!

Andreas



Re: [PATCH] s390: Add LEN_LOAD/LEN_STORE support.

2023-02-13 Thread Andreas Krebbel via Gcc-patches
On 2/2/23 09:43, Robin Dapp wrote:
> Hi,
> 
> this patch adds LEN_LOAD/LEN_STORE support for z14 and newer.
> It defines a bias value of -1 and implements the LEN_LOAD and LEN_STORE
> optabs.
> 
> It also includes various vll/vstl testcases adapted from Kewen Lin's patch
> for Power.
> 
> Bootstrapped and regtested on z13-z16.
> 
> Is it OK?
> 
> Regards
>  Robin
> 
> gcc/ChangeLog:
> 
>   * config/s390/predicates.md (vll_bias_operand): Add -1 bias.
>   * config/s390/s390.cc (s390_option_override_internal): Make
>   partial vector usage the default from z13 on.
>   * config/s390/vector.md (len_load_v16qi): Add.
>   (len_store_v16qi): Add.

...

> +;
> +; Implement len_load/len_store optabs with vll/vstl.
> +(define_expand "len_load_v16qi"
> +  [(match_operand:V16QI 0 "register_operand")
> +   (match_operand:V16QI 1 "memory_operand")
> +   (match_operand:QI 2 "register_operand")
> +   (match_operand:QI 3 "vll_bias_operand")
> +  ]
> +  "TARGET_VX && TARGET_64BIT"
> +{
> +  rtx src1 = XEXP (operands[1], 0);
> +  rtx src = gen_reg_rtx (Pmode);
> +  emit_move_insn (src, src1);
> +  rtx mem = gen_rtx_MEM (BLKmode, src);

Do you really need a copy of the address register? Couldn't you just do a
src = adjust_address (operands[1], BLKmode, 0);

> +
> +  rtx len = gen_lowpart (SImode, operands[2]);
> +  emit_insn (gen_vllv16qi (operands[0], len, mem));

You create a paradoxical subreg of the QImode input but vll actually uses the 
whole 32 bit value.
Couldn't we end up with uninitialized bytes being used as part of the length 
then? Do we need a
zero-extend here?

Bye,

Andreas



Re: [PATCH] IBM zSystems: Fix predicate execute_operation

2023-02-13 Thread Andreas Krebbel via Gcc-patches
On 2/11/23 17:10, Stefan Schulze Frielinghaus wrote:
> Use constrain_operands in order to check whether there exists a valid
> alternative instead of extract_constrain_insn which ICEs in case no
> alternative is found.
> 
> Bootstrapped and regtested on IBM zSystems.  Ok for mainline?
> 
> gcc/ChangeLog:
> 
>   * config/s390/predicates.md (execute_operation): Use
>   constrain_operands instead of extract_constrain_insn in order to
>   determine wheter there exists a valid alternative.

Ok. Thanks!

Andreas



Re: [PATCH] IBM zSystems: Do not propagate scheduler state across basic blocks [PR108102]

2023-02-13 Thread Andreas Krebbel via Gcc-patches
On 2/11/23 16:59, Stefan Schulze Frielinghaus wrote:
> So far we propagate scheduler state across basic blocks within EBBs and
> reset the state otherwise.  In certain circumstances the entry block of
> an EBB might be empty, i.e., no_real_insns_p is true.  In those cases
> scheduler state is not reset and subsequently wrong state is propagated
> to following blocks of the same EBB.
> 
> Since the performance benefit of tracking state across basic blocks is
> questionable on modern hardware, simply reset the state for each basic
> block.
> 
> Fix also resetting f{p,x}d_longrunning.
> 
> Bootstrapped and regtested on IBM zSystems.  Ok for mainline?
> 
> gcc/ChangeLog:
> 
>   * config/s390/s390.cc (s390_bb_fallthru_entry_likely): Remove.
>   (struct s390_sched_state): Initialise to zero.
>   (s390_sched_variable_issue): For better debuggability also emit
>   the current side.
>   (s390_sched_init): Unconditionally reset scheduler state.

Ok. Thanks!

Andreas




Re: [PATCH] s390: Add LEN_LOAD/LEN_STORE support.

2023-02-27 Thread Andreas Krebbel via Gcc-patches
On 2/27/23 11:13, Robin Dapp wrote:
>> Do you really need a copy of the address register? Couldn't you just do a
>> src = adjust_address (operands[1], BLKmode, 0);
>> You create a paradoxical subreg of the QImode input but vll actually
>> uses the whole 32 bit value. Couldn't we end up with uninitialized
>> bytes being used as part of the length then? Do we need a zero-extend
>> here?
> 
> v2 attached with these problems addressed.
> 
> Testsuite and bootstrap as before.

Ok. Thanks!

Andreas




Re: [PATCH] s390: Use arch14 instead of z16 for -march=native.

2023-03-03 Thread Andreas Krebbel via Gcc-patches
On 3/2/23 19:17, Robin Dapp wrote:
> Hi,
> 
> When compiling on a system where binutils do not yet support the 'z16'
> name assembling fails with -march=native which we currently interpret
> as -march=z16 (on a z16 machine).  This patch uses -march=arch14
> instead.
> 
> Is it OK?

Ok. Thanks!

Andreas


> 
> Regards
>  Robin
> 
> --
> 
> gcc/ChangeLog:
> 
>   * config/s390/driver-native.cc (s390_host_detect_local_cpu): Use
>   arch14 instead of z16.
> ---
>  gcc/config/s390/driver-native.cc | 4 ++--
>  1 file changed, 2 insertions(+), 2 deletions(-)
> 
> diff --git a/gcc/config/s390/driver-native.cc 
> b/gcc/config/s390/driver-native.cc
> index 563da45c7f6e..3b9c1e1ca5df 100644
> --- a/gcc/config/s390/driver-native.cc
> +++ b/gcc/config/s390/driver-native.cc
> @@ -125,10 +125,10 @@ s390_host_detect_local_cpu (int argc, const char **argv)
> break;
>   case 0x3931:
>   case 0x3932:
> -   cpu = "z16";
> +   cpu = "arch14";
> break;
>   default:
> -   cpu = "z16";
> +   cpu = "arch14";
> break;
>   }
>   }



Re: [PATCH] s390: libatomic: Fix 16 byte atomic {cas,load,store}

2023-03-03 Thread Andreas Krebbel via Gcc-patches
On 3/2/23 16:24, Stefan Schulze Frielinghaus wrote:
> This is a follow-up to commit a4c6bd0821099f6b8c0f64a96ffd9d01a025c413
> introducing a runtime check for alignment for 16 byte atomic
> compare-exchange, load, and store.
> 
> Bootstrapped and regtested on s390.
> Ok for mainline and gcc-{12,11,10}?
> 
> libatomic/ChangeLog:
> 
>   * config/s390/cas_n.c: New file.
>   * config/s390/load_n.c: New file.
>   * config/s390/store_n.c: New file.

Ok. Thanks!

Andreas

> ---
>  libatomic/config/s390/cas_n.c   | 65 +
>  libatomic/config/s390/load_n.c  | 57 +
>  libatomic/config/s390/store_n.c | 54 +++
>  3 files changed, 176 insertions(+)
>  create mode 100644 libatomic/config/s390/cas_n.c
>  create mode 100644 libatomic/config/s390/load_n.c
>  create mode 100644 libatomic/config/s390/store_n.c
> 
> diff --git a/libatomic/config/s390/cas_n.c b/libatomic/config/s390/cas_n.c
> new file mode 100644
> index 000..44b7152ca5d
> --- /dev/null
> +++ b/libatomic/config/s390/cas_n.c
> @@ -0,0 +1,65 @@
> +/* Copyright (C) 2018-2023 Free Software Foundation, Inc.
> +
> +   This file is part of the GNU Atomic Library (libatomic).
> +
> +   Libatomic is free software; you can redistribute it and/or modify it
> +   under the terms of the GNU General Public License as published by
> +   the Free Software Foundation; either version 3 of the License, or
> +   (at your option) any later version.
> +
> +   Libatomic is distributed in the hope that it will be useful, but WITHOUT 
> ANY
> +   WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
> +   FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
> +   more details.
> +
> +   Under Section 7 of GPL version 3, you are granted additional
> +   permissions described in the GCC Runtime Library Exception, version
> +   3.1, as published by the Free Software Foundation.
> +
> +   You should have received a copy of the GNU General Public License and
> +   a copy of the GCC Runtime Library Exception along with this program;
> +   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
> +   .  */
> +
> +#include 
> +
> +
> +/* Analog to config/s390/exch_n.c.  */
> +
> +#if !DONE && N == 16
> +bool
> +SIZE(libat_compare_exchange) (UTYPE *mptr, UTYPE *eptr, UTYPE newval,
> +   int smodel, int fmodel UNUSED)
> +{
> +  if (!((uintptr_t)mptr & 0xf))
> +{
> +  return __atomic_compare_exchange_n (
> + (UTYPE *)__builtin_assume_aligned (mptr, 16), eptr, newval, false,
> + __ATOMIC_SEQ_CST, __ATOMIC_RELAXED);
> +}
> +  else
> +{
> +  UTYPE oldval;
> +  UWORD magic;
> +  bool ret;
> +
> +  pre_seq_barrier (smodel);
> +  magic = protect_start (mptr);
> +
> +  oldval = *mptr;
> +  ret = (oldval == *eptr);
> +  if (ret)
> + *mptr = newval;
> +  else
> + *eptr = oldval;
> +
> +  protect_end (mptr, magic);
> +  post_seq_barrier (smodel);
> +
> +  return ret;
> +}
> +}
> +#define DONE 1
> +#endif /* N == 16 */
> +
> +#include "../../cas_n.c"
> diff --git a/libatomic/config/s390/load_n.c b/libatomic/config/s390/load_n.c
> new file mode 100644
> index 000..335d2f8b2c3
> --- /dev/null
> +++ b/libatomic/config/s390/load_n.c
> @@ -0,0 +1,57 @@
> +/* Copyright (C) 2018-2023 Free Software Foundation, Inc.
> +
> +   This file is part of the GNU Atomic Library (libatomic).
> +
> +   Libatomic is free software; you can redistribute it and/or modify it
> +   under the terms of the GNU General Public License as published by
> +   the Free Software Foundation; either version 3 of the License, or
> +   (at your option) any later version.
> +
> +   Libatomic is distributed in the hope that it will be useful, but WITHOUT 
> ANY
> +   WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
> +   FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
> +   more details.
> +
> +   Under Section 7 of GPL version 3, you are granted additional
> +   permissions described in the GCC Runtime Library Exception, version
> +   3.1, as published by the Free Software Foundation.
> +
> +   You should have received a copy of the GNU General Public License and
> +   a copy of the GCC Runtime Library Exception along with this program;
> +   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
> +   .  */
> +
> +#include 
> +
> +
> +/* Analog to config/s390/exch_n.c.  */
> +
> +#if !DONE && N == 16
> +UTYPE
> +SIZE(libat_load) (UTYPE *mptr, int smodel)
> +{
> +  if (!((uintptr_t)mptr & 0xf))
> +{
> +  return __atomic_load_n ((UTYPE *)__builtin_assume_aligned (mptr, 16),
> +   __ATOMIC_SEQ_CST);
> +}
> +  else
> +{
> +  UTYPE ret;
> +  UWORD magic;
> +
> +  pre_seq_barrier (smodel);
> +  magic = protect_start (mp

Re: [PATCH] s390: Fix ifcvt test cases

2023-03-03 Thread Andreas Krebbel via Gcc-patches
On 3/2/23 19:13, Robin Dapp wrote:
> Hi,
> 
> we seem to flip flop between the "high" and "not low" variants of load on
> condition.  Accept both in the affected test cases.
> 
> Going to commit this as obvious.
> 
> Regards
>  Robin
> 
> --
> 
> gcc/testsuite/ChangeLog:
> 
>   * gcc.target/s390/ifcvt-two-insns-bool.c: Allow "high" and
>   "not low or equal" load on condition variant.
>   * gcc.target/s390/ifcvt-two-insns-int.c: Dito.
>   * gcc.target/s390/ifcvt-two-insns-long.c: Dito.

Ok. Thanks!

Andreas

> ---
>  gcc/testsuite/gcc.target/s390/ifcvt-two-insns-bool.c | 4 ++--
>  gcc/testsuite/gcc.target/s390/ifcvt-two-insns-int.c  | 4 ++--
>  gcc/testsuite/gcc.target/s390/ifcvt-two-insns-long.c | 4 ++--
>  3 files changed, 6 insertions(+), 6 deletions(-)
> 
> diff --git a/gcc/testsuite/gcc.target/s390/ifcvt-two-insns-bool.c 
> b/gcc/testsuite/gcc.target/s390/ifcvt-two-insns-bool.c
> index 1027ddceb935..a56bc4676143 100644
> --- a/gcc/testsuite/gcc.target/s390/ifcvt-two-insns-bool.c
> +++ b/gcc/testsuite/gcc.target/s390/ifcvt-two-insns-bool.c
> @@ -3,8 +3,8 @@
>  /* { dg-do run } */
>  /* { dg-options "-O2 -march=z13 -mzarch --save-temps" } */
>  
> -/* { dg-final { scan-assembler "lochih\t%r.?,1" } } */
> -/* { dg-final { scan-assembler "locrh\t.*" } } */
> +/* { dg-final { scan-assembler "lochi(?:h|nle)\t%r.?,1" } } */
> +/* { dg-final { scan-assembler "locr(?:h|nle)\t.*" } } */
>  #include 
>  #include 
>  #include 
> diff --git a/gcc/testsuite/gcc.target/s390/ifcvt-two-insns-int.c 
> b/gcc/testsuite/gcc.target/s390/ifcvt-two-insns-int.c
> index fc6946f2466d..64b8a732290e 100644
> --- a/gcc/testsuite/gcc.target/s390/ifcvt-two-insns-int.c
> +++ b/gcc/testsuite/gcc.target/s390/ifcvt-two-insns-int.c
> @@ -3,8 +3,8 @@
>  /* { dg-do run } */
>  /* { dg-options "-O2 -march=z13 -mzarch --save-temps" } */
>  
> -/* { dg-final { scan-assembler "lochih\t%r.?,1" } } */
> -/* { dg-final { scan-assembler "locrh\t.*" } } */
> +/* { dg-final { scan-assembler "lochi(h|nle)\t%r.?,1" } } */
> +/* { dg-final { scan-assembler "locr(?:h|nle)\t.*" } } */
>  #include 
>  #include 
>  #include 
> diff --git a/gcc/testsuite/gcc.target/s390/ifcvt-two-insns-long.c 
> b/gcc/testsuite/gcc.target/s390/ifcvt-two-insns-long.c
> index 51af4985247a..f2d784e762a8 100644
> --- a/gcc/testsuite/gcc.target/s390/ifcvt-two-insns-long.c
> +++ b/gcc/testsuite/gcc.target/s390/ifcvt-two-insns-long.c
> @@ -3,8 +3,8 @@
>  /* { dg-do run } */
>  /* { dg-options "-O2 -march=z13 -mzarch --save-temps" } */
>  
> -/* { dg-final { scan-assembler "locghih\t%r.?,1" } } */
> -/* { dg-final { scan-assembler "locgrh\t.*" } } */
> +/* { dg-final { scan-assembler "locghi(?:h|nle)\t%r.?,1" } } */
> +/* { dg-final { scan-assembler "locgr(?:h|nle)\t.*" } } */
>  
>  #include 
>  #include 



Re: [PATCH 0/3] Refactor memory block operations

2023-05-15 Thread Andreas Krebbel via Gcc-patches
On 5/15/23 09:17, Stefan Schulze Frielinghaus wrote:
> Bootstrapped and regtested.  Ok for mainline?
> 
> Stefan Schulze Frielinghaus (3):
>   s390: Refactor block operation cpymem
>   s390: Add block operation movmem
>   s390: Refactor block operation setmem
> 
>  gcc/config/s390/s390-protos.h|   5 +-
>  gcc/config/s390/s390.cc  | 301 ---
>  gcc/config/s390/s390.md  |  61 -
>  gcc/testsuite/gcc.target/s390/memset-1.c |   7 +-
>  4 files changed, 331 insertions(+), 43 deletions(-)
> 

Ok. Thanks!

Andreas



Re: [PATCH] s390: Implement TARGET_ATOMIC_ALIGN_FOR_MODE

2023-05-16 Thread Andreas Krebbel via Gcc-patches
On 5/16/23 08:43, Stefan Schulze Frielinghaus wrote:
> So far atomic objects are aligned according to their default alignment.
> For 128 bit scalar types like int128 or long double this results in an
> 8 byte alignment which is wrong and must be 16 byte.
> 
> libstdc++ already computes a correct alignment, though, still adding a
> test case in order to make sure that both implementations are
> compatible.
> 
> Bootstrapped and regtested.  Ok for mainline?  Since this is an ABI
> break, is a backport to GCC 13 reasonable?

Ok for mainline.

I would also like to have it in GCC 13. It is an ABI breakage but on the other 
hand it also fixes an
ABI inconsistency between C and C++ which we should fix asap I think.

Andreas


> 
> gcc/ChangeLog:
> 
>   * config/s390/s390.cc (TARGET_ATOMIC_ALIGN_FOR_MODE):
>   New.
>   (s390_atomic_align_for_mode): New.
> 
> gcc/testsuite/ChangeLog:
> 
>   * g++.target/s390/atomic-align-1.C: New test.
>   * gcc.target/s390/atomic-align-1.c: New test.
>   * gcc.target/s390/atomic-align-2.c: New test.
> ---
>  gcc/config/s390/s390.cc   |  8 ++
>  .../g++.target/s390/atomic-align-1.C  | 25 +++
>  .../gcc.target/s390/atomic-align-1.c  | 23 +
>  .../gcc.target/s390/atomic-align-2.c  | 18 +
>  4 files changed, 74 insertions(+)
>  create mode 100644 gcc/testsuite/g++.target/s390/atomic-align-1.C
>  create mode 100644 gcc/testsuite/gcc.target/s390/atomic-align-1.c
>  create mode 100644 gcc/testsuite/gcc.target/s390/atomic-align-2.c
> 
> diff --git a/gcc/config/s390/s390.cc b/gcc/config/s390/s390.cc
> index 505de995da8..4813bf91dc4 100644
> --- a/gcc/config/s390/s390.cc
> +++ b/gcc/config/s390/s390.cc
> @@ -450,6 +450,14 @@ s390_preserve_fpr_arg_p (int regno)
> && regno >= FPR0_REGNUM);
>  }
>  
> +#undef TARGET_ATOMIC_ALIGN_FOR_MODE
> +#define TARGET_ATOMIC_ALIGN_FOR_MODE s390_atomic_align_for_mode
> +static unsigned int
> +s390_atomic_align_for_mode (machine_mode mode)
> +{
> +  return GET_MODE_BITSIZE (mode);
> +}
> +
>  /* A couple of shortcuts.  */
>  #define CONST_OK_FOR_J(x) \
>   CONST_OK_FOR_CONSTRAINT_P((x), 'J', "J")
> diff --git a/gcc/testsuite/g++.target/s390/atomic-align-1.C 
> b/gcc/testsuite/g++.target/s390/atomic-align-1.C
> new file mode 100644
> index 000..43aa0bc39ed
> --- /dev/null
> +++ b/gcc/testsuite/g++.target/s390/atomic-align-1.C
> @@ -0,0 +1,25 @@
> +/* { dg-do compile { target int128 } } */
> +/* { dg-options "-std=c++11" } */
> +/* { dg-final { scan-assembler-times {\.align\t2} 2 } } */
> +/* { dg-final { scan-assembler-times {\.align\t4} 2 } } */
> +/* { dg-final { scan-assembler-times {\.align\t8} 3 } } */
> +/* { dg-final { scan-assembler-times {\.align\t16} 2 } } */
> +
> +#include 
> +
> +// 2
> +std::atomic var_char;
> +std::atomic var_short;
> +// 4
> +std::atomic var_int;
> +// 8
> +std::atomic var_long;
> +std::atomic var_long_long;
> +// 16
> +std::atomic<__int128> var_int128;
> +// 4
> +std::atomic var_float;
> +// 8
> +std::atomic var_double;
> +// 16
> +std::atomic var_long_double;
> diff --git a/gcc/testsuite/gcc.target/s390/atomic-align-1.c 
> b/gcc/testsuite/gcc.target/s390/atomic-align-1.c
> new file mode 100644
> index 000..b2e1233e3ee
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/s390/atomic-align-1.c
> @@ -0,0 +1,23 @@
> +/* { dg-do compile { target int128 } } */
> +/* { dg-options "-std=c11" } */
> +/* { dg-final { scan-assembler-times {\.align\t2} 2 } } */
> +/* { dg-final { scan-assembler-times {\.align\t4} 2 } } */
> +/* { dg-final { scan-assembler-times {\.align\t8} 3 } } */
> +/* { dg-final { scan-assembler-times {\.align\t16} 2 } } */
> +
> +// 2
> +_Atomic char var_char;
> +_Atomic short var_short;
> +// 4
> +_Atomic int var_int;
> +// 8
> +_Atomic long var_long;
> +_Atomic long long var_long_long;
> +// 16
> +_Atomic __int128 var_int128;
> +// 4
> +_Atomic float var_float;
> +// 8
> +_Atomic double var_double;
> +// 16
> +_Atomic long double var_long_double;
> diff --git a/gcc/testsuite/gcc.target/s390/atomic-align-2.c 
> b/gcc/testsuite/gcc.target/s390/atomic-align-2.c
> new file mode 100644
> index 000..0bf17341bf8
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/s390/atomic-align-2.c
> @@ -0,0 +1,18 @@
> +/* { dg-do compile { target int128 } } */
> +/* { dg-options "-O -std=c11" } */
> +/* { dg-final { scan-assembler-not {abort} } } */
> +
> +/* The stack is 8 byte aligned which means GCC has to manually align a 16 
> byte
> +   aligned object.  This is done by allocating not 16 but rather 24 bytes for
> +   variable X and then manually aligning a pointer inside the memory block.
> +   Validate this by ensuring that the if-statement is optimized out.  */
> +
> +void bar (_Atomic unsigned __int128 *ptr);
> +
> +void foo (void) {
> +  _Atomic unsigned __int128 x;
> +  unsigned long n = (unsigned long)&x;
> +  if (n % 16 != 0)
> +__builtin_abort ();
> +  bar (&x);
> +}



Re: [PATCH] libgcc: Use initarray section type for .init_stack

2023-05-25 Thread Andreas Krebbel via Gcc-patches
On 3/20/23 07:33, Kewen.Lin wrote:
> Hi,
> 
> One of my workmates found there is a warning like:
> 
>   libgcc/config/rs6000/morestack.S:402: Warning: ignoring
> incorrect section type for .init_array.0
> 
> when compiling libgcc/config/rs6000/morestack.S.
> 
> Since commit r13-6545 touched that file recently, which was
> suspected to be responsible for this warning, I did some
> investigation and found this is a warning staying for a long
> time.  For section .init_stack*, it's preferred to use
> section type SHT_INIT_ARRAY.  So this patch is use
> "@init_array" to replace "@progbits".
> 
> Although the warning is trivial, Segher suggested me to
> post this to fix it, in order to avoid any possible
> misunderstanding/confusion on the warning.
> 
> As Alan confirmed, this doesn't require a premise check
> on if the existing binutils supports "@init_array" or not,
> "because if you want split-stack to work, you must link
> with gold, any version of binutils that has gold has an
> assembler that understands @init_array". (Thanks Alan!)
> 
> Bootstrapped and regtested on x86_64-redhat-linux
> and powerpc64{,le}-linux-gnu.
> 
> Is it ok for trunk when next stage 1 comes?
> 
> BR,
> Kewen
> -
> libgcc/ChangeLog:
> 
>   * config/i386/morestack.S: Use @init_array rather than
>   @progbits for section type of section .init_array.
>   * config/rs6000/morestack.S: Likewise.
>   * config/s390/morestack.S: Likewise.

s390 parts are ok. I did run a bootstrap and regression. Looks all good. Thanks!

Andreas



Re: [PATCH] IBM Z: Fix usage of "f" constraint with long doubles

2021-01-26 Thread Andreas Krebbel via Gcc-patches
On 1/18/21 10:54 PM, Ilya Leoshkevich wrote:
...

> +static rtx_insn *
> +s390_md_asm_adjust (vec &outputs, vec &inputs,
> + vec &input_modes,
> + vec &constraints, vec & /*clobbers*/,
> + HARD_REG_SET & /*clobbered_regs*/)
> +{
> +  if (!TARGET_VXE)
> +/* Long doubles are stored in FPR pairs - nothing to do.  */
> +return NULL;
> +
> +  rtx_insn *after_md_seq = NULL, *after_md_end = NULL;
> +
> +  unsigned ninputs = inputs.length ();
> +  unsigned noutputs = outputs.length ();
> +  for (unsigned i = 0; i < noutputs; i++)
> +{
> +  if (GET_MODE (outputs[i]) != TFmode)
> + /* Not a long double - nothing to do.  */
> + continue;
> +  const char *constraint = constraints[i];
> +  bool allows_mem, allows_reg, is_inout;
> +  bool ok = parse_output_constraint (&constraint, i, ninputs, noutputs,
> +  &allows_mem, &allows_reg, &is_inout);
> +  gcc_assert (ok);
> +  if (strcmp (constraint, "=f") != 0)
> + /* Long double with a constraint other than "=f" - nothing to do.  */
> + continue;

What about other constraint modifiers like & and %? Don't we need to handle 
matching constraints as
well here?

> +  gcc_assert (allows_reg);
> +  gcc_assert (!allows_mem);
> +  gcc_assert (!is_inout);
> +  /* Copy output value from a FPR pair into a vector register.  */
> +  rtx fprx2 = gen_reg_rtx (FPRX2mode);
> +  push_to_sequence2 (after_md_seq, after_md_end);
> +  emit_insn (gen_fprx2_to_tf (outputs[i], fprx2));
> +  after_md_seq = get_insns ();
> +  after_md_end = get_last_insn ();
> +  end_sequence ();
> +  outputs[i] = fprx2;
> +}
> +
> +  for (unsigned i = 0; i < ninputs; i++)
> +{
> +  if (GET_MODE (inputs[i]) != TFmode)
> + /* Not a long double - nothing to do.  */
> + continue;
> +  const char *constraint = constraints[noutputs + i];
> +  bool allows_mem, allows_reg;
> +  bool ok = parse_input_constraint (&constraint, i, ninputs, noutputs, 0,
> + constraints.address (), &allows_mem,
> + &allows_reg);
> +  gcc_assert (ok);
> +  if (strcmp (constraint, "f") != 0 && strcmp (constraint, "=f") != 0)
> + /* Long double with a constraint other than "f" (or "=f" for inout
> +operands) - nothing to do.  */
> + continue;
> +  gcc_assert (allows_reg);
> +  gcc_assert (!allows_mem);
> +  /* Copy input value from a vector register into a FPR pair.  */
> +  rtx fprx2 = gen_reg_rtx (FPRX2mode);
> +  emit_insn (gen_tf_to_fprx2 (fprx2, inputs[i]));
> +  inputs[i] = fprx2;
> +  input_modes[i] = FPRX2mode;
> +}
> +
> +  return after_md_seq;
> +}
> +
>  /* Initialize GCC target structure.  */
>  
>  #undef  TARGET_ASM_ALIGNED_HI_OP
> @@ -16995,6 +17065,9 @@ s390_shift_truncation_mask (machine_mode mode)
>  #undef TARGET_MAX_ANCHOR_OFFSET
>  #define TARGET_MAX_ANCHOR_OFFSET 0xfff
>  
> +#undef TARGET_MD_ASM_ADJUST
> +#define TARGET_MD_ASM_ADJUST s390_md_asm_adjust
> +
>  struct gcc_target targetm = TARGET_INITIALIZER;
>  
>  #include "gt-s390.h"
> diff --git a/gcc/config/s390/vector.md b/gcc/config/s390/vector.md
> index 0e3c31f5d4f..1332a65a1d1 100644
> --- a/gcc/config/s390/vector.md
> +++ b/gcc/config/s390/vector.md
> @@ -616,12 +616,23 @@ (define_insn "*vec_tf_to_v1tf_vr"
> vlvgp\t%v0,%1,%N1"
>[(set_attr "op_type" "VRR,VRX,VRX,VRI,VRR")])
>  
> -(define_insn "*fprx2_to_tf"
> -  [(set (match_operand:TF   0 "nonimmediate_operand" "=v")
> - (subreg:TF (match_operand:FPRX2 1 "general_operand"   "f") 0))]
> +(define_insn_and_split "fprx2_to_tf"
> +  [(set (match_operand:TF   0 "nonimmediate_operand" "=v,R")
> + (subreg:TF (match_operand:FPRX2 1 "general_operand"   "f,f") 0))]
>"TARGET_VXE"
> -  "vmrhg\t%v0,%1,%N1"
> -  [(set_attr "op_type" "VRR")])
> +  "@
> +   vmrhg\t%v0,%1,%N1
> +   #"
> +  "!(MEM_P (operands[0]) && MEM_VOLATILE_P (operands[0]))"
> +  [(set (match_dup 2) (match_dup 3))
> +   (set (match_dup 4) (match_dup 5))]
> +{
> +  operands[2] = simplify_gen_subreg (DFmode, operands[0], TFmode, 0);
> +  operands[3] = simplify_gen_subreg (DFmode, operands[1], FPRX2mode, 0);
> +  operands[4] = simplify_gen_subreg (DFmode, operands[0], TFmode, 8);
> +  operands[5] = simplify_gen_subreg (DFmode, operands[1], FPRX2mode, 8);
> +}
> +  [(set_attr "op_type" "VRR,*")])

Splitting an address like this might cause the displacement to overflow in the 
second part. This
would require an additional reg to make the address valid again. Which in turn 
will be a problem
after reload. You can use the 'AR' constraint for the memory alternative. That 
way reload will make
sure the address is offsetable.

Andreas


>  
>  (define_insn "*vec_ti_to_v1ti"
>[(set (match_operand:V1TI   0 "nonimmediate_operand" 
> "=v,v,R,  v,  v,v")
> @@ -753,6 +764,21 @

Re: [PATCH 0/2] IBM Z: Fix long double <-> DFP conversions

2021-02-19 Thread Andreas Krebbel via Gcc-patches
On 2/18/21 1:57 PM, Ilya Leoshkevich wrote:
> This series fixes PR99134.  Patch 1 is factored out from the pending
> [1], patch 2 is the actual fix.  Bootstrapped and regtested on
> s390x-redhat-linux.  Ok for master?
> 
> [1] https://gcc.gnu.org/pipermail/gcc-patches/2021-January/564380.html
> 
> Ilya Leoshkevich (2):
>   IBM Z: Improve FPRX2 <-> TF conversions
>   IBM Z: Fix long double <-> DFP conversions

Ok. Thanks!

Andreas


Re: [PATCH] IBM Z: Fix testcase vcond-shift.c

2021-03-01 Thread Andreas Krebbel via Gcc-patches
On 3/1/21 5:00 PM, Stefan Schulze Frielinghaus wrote:
> As of commit 3a6e3ad38a17a03ee0139b49a0946e7b9ded1eb1 expressions
> x CMP y ? -1 : 0 are fold into x CMP y.  Due to this we do not see
> shifts anymore after expand in our testcases but comparisons.  Thus
> replace instructions vesraX by corresponding vchX.  Keep testcases
> vchX_{lt,gt} where only a relational comparison is done and no shift in
> order to keep test coverage for vectorization.

The vcond-shift optimization verified by the testcase is currently implemented 
in s390_expand_vcond
but due to the common code change we go the vec_cmp route now. So we probably 
should do the same
also in s390_expand_vec_compare now. Perhaps like this ... it appears to fix 
the testcase for me:

diff --git a/gcc/config/s390/s390.c b/gcc/config/s390/s390.c
index 9d2cee950d0b..9d9f5a0f6f4e 100644
--- a/gcc/config/s390/s390.c
+++ b/gcc/config/s390/s390.c
@@ -6562,6 +6562,7 @@ s390_expand_vec_compare (rtx target, enum rtx_code cond,

   if (GET_MODE_CLASS (GET_MODE (cmp_op1)) == MODE_VECTOR_FLOAT)
 {
+  cmp_op2 = force_operand (cmp_op2, 0);
   switch (cond)
{
  /* NE a != b -> !(a == b) */
@@ -6600,6 +6601,19 @@ s390_expand_vec_compare (rtx target, enum rtx_code cond,
 }
   else
 {
+  /* Turn x < 0 into x >> (bits - )  */
+  if (cond == LT && cmp_op2 == CONST0_RTX (mode))
+   {
+ int shift = GET_MODE_BITSIZE (GET_MODE_INNER (mode)) - 1;
+ rtx res = expand_simple_binop (mode, ASHIFTRT, cmp_op1,
+GEN_INT (shift), target,
+0, OPTAB_DIRECT);
+ if (res != target)
+   emit_move_insn (target, res);
+ return;
+   }
+  cmp_op2 = force_operand (cmp_op2, 0);
+
   switch (cond)
{
  /* NE: a != b -> !(a == b) */
diff --git a/gcc/config/s390/vector.md b/gcc/config/s390/vector.md
index bc52211c55e5..c80d582a300d 100644
--- a/gcc/config/s390/vector.md
+++ b/gcc/config/s390/vector.md
@@ -1589,7 +1589,7 @@
   [(set (match_operand:  0 "register_operand" "")
(match_operator: 1 "vcond_comparison_operator"
  [(match_operand:V_HW 2 "register_operand" "")
-  (match_operand:V_HW 3 "register_operand" "")]))]
+  (match_operand:V_HW 3 "nonmemory_operand" "")]))]
   "TARGET_VX"
 {
   s390_expand_vec_compare (operands[0], GET_CODE(operands[1]), operands[2], 
operands[3]);

Andreas


> 
> gcc/testsuite/ChangeLog:
> 
>   * gcc.target/s390/vector/vcond-shift.c: Replace vesraX
>   instructions by corresponding vchX instructions.
> ---
>  .../gcc.target/s390/vector/vcond-shift.c  | 31 ++-
>  1 file changed, 17 insertions(+), 14 deletions(-)
> 
> diff --git a/gcc/testsuite/gcc.target/s390/vector/vcond-shift.c 
> b/gcc/testsuite/gcc.target/s390/vector/vcond-shift.c
> index a6b4e97aa50..9e472aef960 100644
> --- a/gcc/testsuite/gcc.target/s390/vector/vcond-shift.c
> +++ b/gcc/testsuite/gcc.target/s390/vector/vcond-shift.c
> @@ -3,10 +3,13 @@
>  /* { dg-do compile { target { s390*-*-* } } } */
>  /* { dg-options "-O3 -march=z13 -mzarch" } */
>  
> -/* { dg-final { scan-assembler-times "vesraf\t%v.?,%v.?,31" 6 } } */
> -/* { dg-final { scan-assembler-times "vesrah\t%v.?,%v.?,15" 6 } } */
> -/* { dg-final { scan-assembler-times "vesrab\t%v.?,%v.?,7" 6 } } */
> -/* { dg-final { scan-assembler-not "vzero\t*" } } */
> +/* { dg-final { scan-assembler-times "vzero\t" 9 } } */
> +/* { dg-final { scan-assembler-times "vchf\t" 6 } } */
> +/* { dg-final { scan-assembler-times "vesraf\t%v.?,%v.?,1" 2 } } */
> +/* { dg-final { scan-assembler-times "vchh\t" 6 } } */
> +/* { dg-final { scan-assembler-times "vesrah\t%v.?,%v.?,1" 2 } } */
> +/* { dg-final { scan-assembler-times "vchb\t" 6 } } */
> +/* { dg-final { scan-assembler-times "vesrab\t%v.?,%v.?,1" 2 } } */
>  /* { dg-final { scan-assembler-times "vesrlf\t%v.?,%v.?,31" 4 } } */
>  /* { dg-final { scan-assembler-times "vesrlh\t%v.?,%v.?,15" 4 } } */
>  /* { dg-final { scan-assembler-times "vesrlb\t%v.?,%v.?,7" 4 } } */
> @@ -15,19 +18,19 @@
>  #define ITER(X) (2 * (16 / sizeof (X[1])))
>  
>  void
> -vesraf_div (int *x)
> +vchf_vesraf_div (int *x)
>  {
>int i;
>int *xx = __builtin_assume_aligned (x, 8);
>  
>/* Should expand to (xx + (xx < 0 ? 1 : 0)) >> 1
> - which in turn should get simplified to (xx + (xx >> 31)) >> 1.  */
> + which in turn should get simplified to (xx - (xx < 0)) >> 1.  */
>for (i = 0; i < ITER (xx); i++)
>  xx[i] = xx[i] / 2;
>  }
>  
>  void
> -vesrah_div (short *x)
> +vchh_vesrah_div (short *x)
>  {
>int i;
>short *xx = __builtin_assume_aligned (x, 8);
> @@ -38,7 +41,7 @@ vesrah_div (short *x)
>  
>  
>  void
> -vesrab_div (signed char *x)
> +vchb_vesrab_div (signed char *x)
>  {
>int i;
>signed char *xx = __builtin_assume_aligned (x, 8);
> @@ -50,7 +53,7 @@ vesrab_div (signed char *x)
>  
>  
>  int
> -vesraf_lt (int *x)
> +vchf_lt

[Committed] IBM Z: Run mul-signed-overflow tests only on z14

2021-03-02 Thread Andreas Krebbel via Gcc-patches
gcc/testsuite/ChangeLog:

* gcc.target/s390/mul-signed-overflow-1.c: Run only on z14.
* gcc.target/s390/mul-signed-overflow-2.c: Run only on z14.
---
 gcc/testsuite/gcc.target/s390/mul-signed-overflow-1.c | 2 +-
 gcc/testsuite/gcc.target/s390/mul-signed-overflow-2.c | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/gcc/testsuite/gcc.target/s390/mul-signed-overflow-1.c 
b/gcc/testsuite/gcc.target/s390/mul-signed-overflow-1.c
index fdf56d6e695..be95acc54aa 100644
--- a/gcc/testsuite/gcc.target/s390/mul-signed-overflow-1.c
+++ b/gcc/testsuite/gcc.target/s390/mul-signed-overflow-1.c
@@ -1,4 +1,4 @@
-/* { dg-do run } */
+/* { dg-do run { target { s390_z14_hw } } } */
 /* z14 only because we need msrkc, msc, msgrkc, msgc  */
 /* { dg-options "-O3 -march=z14 -mzarch --save-temps" } */
 
diff --git a/gcc/testsuite/gcc.target/s390/mul-signed-overflow-2.c 
b/gcc/testsuite/gcc.target/s390/mul-signed-overflow-2.c
index d0088188aa2..f5fbf276c5f 100644
--- a/gcc/testsuite/gcc.target/s390/mul-signed-overflow-2.c
+++ b/gcc/testsuite/gcc.target/s390/mul-signed-overflow-2.c
@@ -1,4 +1,4 @@
-/* { dg-do run } */
+/* { dg-do run { target { s390_z14_hw } } } */
 /* z14 only because we need msrkc, msc, msgrkc, msgc  */
 /* { dg-options "-O3 -march=z14 -mzarch --save-temps" } */
 
-- 
2.29.2



[Committed 1/2] IBM Z: arch14: Add command line options

2021-03-02 Thread Andreas Krebbel via Gcc-patches
Prepare GCC for a future architecture extension.

gcc/ChangeLog:

* common/config/s390/s390-common.c (processor_flags_table): New entry.
* config.gcc: Enable arch14 for --with-arch and --with-tune.
* config/s390/driver-native.c (s390_host_detect_local_cpu): Pick
arch14 for unknown CPU models.
* config/s390/s390-opts.h (enum processor_type): Add PROCESSOR_ARCH14.
* config/s390/s390.c (s390_issue_rate): Add case for PROCESSOR_ARCH14.
(s390_get_sched_attrmask): Likewise.
(s390_get_unit_mask): Likewise.
* config/s390/s390.h (enum processor_flags): Add PF_NNPA and PF_ARCH14.
(TARGET_CPU_ARCH14, TARGET_CPU_ARCH14_P, TARGET_CPU_NNPA)
(TARGET_CPU_NNPA_P, TARGET_ARCH14, TARGET_ARCH14_P, TARGET_NNPA)
(TARGET_NNPA_P): New macro definitions.
* config/s390/s390.md ("cpu_facility", "enabled"): Add arch14 and nnpa.
* config/s390/s390.opt: Add PROCESSOR_ARCH14.

gcc/testsuite/ChangeLog:

* lib/target-supports.exp: Add check for nnpa facility.
---
 gcc/common/config/s390/s390-common.c  |  4 
 gcc/config.gcc|  2 +-
 gcc/config/s390/driver-native.c   |  2 +-
 gcc/config/s390/s390-opts.h   |  1 +
 gcc/config/s390/s390.c|  4 
 gcc/config/s390/s390.h| 20 +++-
 gcc/config/s390/s390.md   | 12 ++--
 gcc/config/s390/s390.opt  |  3 +++
 gcc/testsuite/lib/target-supports.exp | 16 
 9 files changed, 59 insertions(+), 5 deletions(-)

diff --git a/gcc/common/config/s390/s390-common.c 
b/gcc/common/config/s390/s390-common.c
index d066cf7395b..b6bc8501742 100644
--- a/gcc/common/config/s390/s390-common.c
+++ b/gcc/common/config/s390/s390-common.c
@@ -48,8 +48,12 @@ EXPORTED_CONST int processor_flags_table[] =
 | PF_EXTIMM | PF_DFP | PF_Z10 | PF_Z196 | PF_ZEC12 | PF_TX
 | PF_Z13 | PF_VX | PF_VXE | PF_Z14,
 /* z15 */PF_IEEE_FLOAT | PF_ZARCH | PF_LONG_DISPLACEMENT
+| PF_EXTIMM | PF_DFP | PF_Z10 | PF_Z196 | PF_ZEC12 | PF_TX
+| PF_Z13 | PF_VX | PF_VXE | PF_Z14 | PF_VXE2 | PF_Z15,
+/* arch14 */ PF_IEEE_FLOAT | PF_ZARCH | PF_LONG_DISPLACEMENT
 | PF_EXTIMM | PF_DFP | PF_Z10 | PF_Z196 | PF_ZEC12 | PF_TX
 | PF_Z13 | PF_VX | PF_VXE | PF_Z14 | PF_VXE2 | PF_Z15
+| PF_NNPA | PF_ARCH14
   };
 
 /* Change optimizations to be performed, depending on the
diff --git a/gcc/config.gcc b/gcc/config.gcc
index c8853009e55..966cbc888cb 100644
--- a/gcc/config.gcc
+++ b/gcc/config.gcc
@@ -5122,7 +5122,7 @@ case "${target}" in
for which in arch tune; do
eval "val=\$with_$which"
case ${val} in
-   "" | native | z900 | z990 | z9-109 | z9-ec | z10 | z196 
| zEC12 | z13 | z14 | z15 | arch5 | arch6 | arch7 | arch8 | arch9 | arch10 | 
arch11 | arch12 | arch13 )
+   "" | native | z900 | z990 | z9-109 | z9-ec | z10 | z196 
| zEC12 | z13 | z14 | z15 | arch5 | arch6 | arch7 | arch8 | arch9 | arch10 | 
arch11 | arch12 | arch13 | arch14 )
# OK
;;
*)
diff --git a/gcc/config/s390/driver-native.c b/gcc/config/s390/driver-native.c
index 4a065a52c17..c0247154c0b 100644
--- a/gcc/config/s390/driver-native.c
+++ b/gcc/config/s390/driver-native.c
@@ -124,7 +124,7 @@ s390_host_detect_local_cpu (int argc, const char **argv)
  cpu = "z15";
  break;
default:
- cpu = "z15";
+ cpu = "arch14";
  break;
}
}
diff --git a/gcc/config/s390/s390-opts.h b/gcc/config/s390/s390-opts.h
index d5751809ba5..4141b4d36dd 100644
--- a/gcc/config/s390/s390-opts.h
+++ b/gcc/config/s390/s390-opts.h
@@ -38,6 +38,7 @@ enum processor_type
   PROCESSOR_2964_Z13,
   PROCESSOR_3906_Z14,
   PROCESSOR_8561_Z15,
+  PROCESSOR_ARCH14,
   PROCESSOR_NATIVE,
   PROCESSOR_max
 };
diff --git a/gcc/config/s390/s390.c b/gcc/config/s390/s390.c
index 9d2cee950d0..fcb26316632 100644
--- a/gcc/config/s390/s390.c
+++ b/gcc/config/s390/s390.c
@@ -337,6 +337,7 @@ const struct s390_processor processor_table[] =
   { "z13","z13",PROCESSOR_2964_Z13,&zEC12_cost,  11 },
   { "z14","arch12", PROCESSOR_3906_Z14,&zEC12_cost,  12 },
   { "z15","arch13", PROCESSOR_8561_Z15,&zEC12_cost,  13 },
+  { "arch14", "",   PROCESSOR_ARCH14,  &zEC12_cost,  14 },
   { "native", "",   PROCESSOR_NATIVE,  NULL, 0  }
 };
 
@@ -8409,6 +8410,7 @@ s390_issue_rate (void)
 case PROCESSOR_2827_ZEC12:
 case PROCESSOR_2964_Z13:
 case PROCESSOR_3906_Z14:
+case PROCESSOR_ARCH14:
 default:
   return 1;
 }
@@ -14768,6 +14770,7 @@ s390_get_sched_attrmask (rtx_insn *insn)
mask |= S390_SCHED_ATTR_MASK_GROUPOFTWO;
   break;
 c

[Committed 2/2] IBM Z: arch14: New instrinsics

2021-03-02 Thread Andreas Krebbel via Gcc-patches
This adds support for 5 new builtins.

gcc/ChangeLog:

* config/s390/s390-builtin-types.def (BT_FN_V4SF_V8HI_UINT): New
builtin signature.
(BT_FN_V8HI_V8HI_UINT): Likewise.
(BT_FN_V8HI_V4SF_V4SF_UINT): Likewise.
* config/s390/s390-builtins.def (B_NNPA): New macro definition.
(s390_vclfnhs, s390_vclfnls, s390_vcrnfs, s390_vcfn, s390_vcnf):
New builtin definitions.
* config/s390/s390-c.c (s390_cpu_cpp_builtins_internal): Bump
vector extension version.
* config/s390/s390.c (s390_expand_builtin): Check if builtins are
available with current -march level.
* config/s390/s390.md (UNSPEC_NNPA_VCLFNHS_V8HI)
(UNSPEC_NNPA_VCLFNLS_V8HI, UNSPEC_NNPA_VCRNFS_V8HI)
(UNSPEC_NNPA_VCFN_V8HI, UNSPEC_NNPA_VCNF_V8HI): New constants.
* config/s390/vecintrin.h (vec_extend_to_fp32_hi): New macro.
(vec_extend_to_fp32_lo): Likewise.
(vec_round_from_fp32): Likewise.
(vec_convert_to_fp16): Likewise.
(vec_convert_from_fp16): Likewise.
* config/s390/vx-builtins.md (vclfnhs_v8hi): New insn pattern.
(vclfnls_v8hi): Likewise.
(vcrnfs_v8hi): Likewise.
(vcfn_v8hi): Likewise.
(vcnf_v8hi): Likewise.

gcc/testsuite/ChangeLog:

* gcc.target/s390/zvector/vec-nnpa-fp16-convert.c: New test.
* gcc.target/s390/zvector/vec-nnpa-fp32-convert-1.c: New test.
* gcc.target/s390/zvector/vec_convert_from_fp16.c: New test.
* gcc.target/s390/zvector/vec_convert_to_fp16.c: New test.
* gcc.target/s390/zvector/vec_extend_to_fp32_hi.c: New test.
* gcc.target/s390/zvector/vec_extend_to_fp32_lo.c: New test.
* gcc.target/s390/zvector/vec_round_from_fp32.c: New test.
---
 gcc/config/s390/s390-builtin-types.def|  3 +
 gcc/config/s390/s390-builtins.def | 12 
 gcc/config/s390/s390-c.c  |  2 +-
 gcc/config/s390/s390.c|  6 ++
 gcc/config/s390/s390.md   |  7 +++
 gcc/config/s390/vecintrin.h   |  6 ++
 gcc/config/s390/vx-builtins.md| 55 +++
 .../s390/zvector/vec-nnpa-fp16-convert.c  | 34 
 .../s390/zvector/vec-nnpa-fp32-convert-1.c| 27 +
 .../s390/zvector/vec_convert_from_fp16.c  | 12 
 .../s390/zvector/vec_convert_to_fp16.c| 12 
 .../s390/zvector/vec_extend_to_fp32_hi.c  | 12 
 .../s390/zvector/vec_extend_to_fp32_lo.c  | 12 
 .../s390/zvector/vec_round_from_fp32.c| 12 
 14 files changed, 211 insertions(+), 1 deletion(-)
 create mode 100644 
gcc/testsuite/gcc.target/s390/zvector/vec-nnpa-fp16-convert.c
 create mode 100644 
gcc/testsuite/gcc.target/s390/zvector/vec-nnpa-fp32-convert-1.c
 create mode 100644 
gcc/testsuite/gcc.target/s390/zvector/vec_convert_from_fp16.c
 create mode 100644 gcc/testsuite/gcc.target/s390/zvector/vec_convert_to_fp16.c
 create mode 100644 
gcc/testsuite/gcc.target/s390/zvector/vec_extend_to_fp32_hi.c
 create mode 100644 
gcc/testsuite/gcc.target/s390/zvector/vec_extend_to_fp32_lo.c
 create mode 100644 gcc/testsuite/gcc.target/s390/zvector/vec_round_from_fp32.c

diff --git a/gcc/config/s390/s390-builtin-types.def 
b/gcc/config/s390/s390-builtin-types.def
index a2b7d4a9a32..52ef5728539 100644
--- a/gcc/config/s390/s390-builtin-types.def
+++ b/gcc/config/s390/s390-builtin-types.def
@@ -267,6 +267,7 @@ DEF_FN_TYPE_2 (BT_FN_V2DI_V4SI_V4SI, BT_V2DI, BT_V4SI, 
BT_V4SI)
 DEF_FN_TYPE_2 (BT_FN_V4SF_FLT_INT, BT_V4SF, BT_FLT, BT_INT)
 DEF_FN_TYPE_2 (BT_FN_V4SF_V4SF_UCHAR, BT_V4SF, BT_V4SF, BT_UCHAR)
 DEF_FN_TYPE_2 (BT_FN_V4SF_V4SF_V4SF, BT_V4SF, BT_V4SF, BT_V4SF)
+DEF_FN_TYPE_2 (BT_FN_V4SF_V8HI_UINT, BT_V4SF, BT_V8HI, BT_UINT)
 DEF_FN_TYPE_2 (BT_FN_V4SI_BV4SI_V4SI, BT_V4SI, BT_BV4SI, BT_V4SI)
 DEF_FN_TYPE_2 (BT_FN_V4SI_INT_VOIDCONSTPTR, BT_V4SI, BT_INT, BT_VOIDCONSTPTR)
 DEF_FN_TYPE_2 (BT_FN_V4SI_UV4SI_UV4SI, BT_V4SI, BT_UV4SI, BT_UV4SI)
@@ -278,6 +279,7 @@ DEF_FN_TYPE_2 (BT_FN_V8HI_BV8HI_V8HI, BT_V8HI, BT_BV8HI, 
BT_V8HI)
 DEF_FN_TYPE_2 (BT_FN_V8HI_UV8HI_UV8HI, BT_V8HI, BT_UV8HI, BT_UV8HI)
 DEF_FN_TYPE_2 (BT_FN_V8HI_V16QI_V16QI, BT_V8HI, BT_V16QI, BT_V16QI)
 DEF_FN_TYPE_2 (BT_FN_V8HI_V4SI_V4SI, BT_V8HI, BT_V4SI, BT_V4SI)
+DEF_FN_TYPE_2 (BT_FN_V8HI_V8HI_UINT, BT_V8HI, BT_V8HI, BT_UINT)
 DEF_FN_TYPE_2 (BT_FN_V8HI_V8HI_V8HI, BT_V8HI, BT_V8HI, BT_V8HI)
 DEF_FN_TYPE_2 (BT_FN_VOID_UINT64PTR_UINT64, BT_VOID, BT_UINT64PTR, BT_UINT64)
 DEF_FN_TYPE_2 (BT_FN_VOID_V2DF_FLTPTR, BT_VOID, BT_V2DF, BT_FLTPTR)
@@ -345,6 +347,7 @@ DEF_FN_TYPE_3 (BT_FN_V4SI_V4SI_V4SI_V4SI, BT_V4SI, BT_V4SI, 
BT_V4SI, BT_V4SI)
 DEF_FN_TYPE_3 (BT_FN_V4SI_V8HI_V8HI_V4SI, BT_V4SI, BT_V8HI, BT_V8HI, BT_V4SI)
 DEF_FN_TYPE_3 (BT_FN_V8HI_UV8HI_UV8HI_INTPTR, BT_V8HI, BT_UV8HI, BT_UV8HI, 
BT_INTPTR)
 DEF_FN_TYPE_3 (BT_FN_V8HI_V16QI_V16QI_V8HI, BT_V8HI, BT_V16QI, BT_V16QI, 
BT_V8HI)
+DEF_FN_TYPE_3 (BT_FN_V8HI_V4SF_V4SF_UINT, BT_V8HI, BT_V4SF, BT_V4SF, BT

Re: [PATCH] IBM Z: Run mul-signed-overflow-*.c only on z14+

2021-03-02 Thread Andreas Krebbel via Gcc-patches
On 3/2/21 11:59 PM, Ilya Leoshkevich wrote:
> mul-signed-overflow-*.c execution tests fail on z13, because they
> contain z14-specific instructions.  Fix by requiring s390_z14_hw
> target.
> 
> gcc/testsuite/ChangeLog:
> 
>   * gcc.target/s390/mul-signed-overflow-1.c: Run only on z14+.
>   * gcc.target/s390/mul-signed-overflow-2.c: Likewise.

I did that change yesterday already.

Andreas

> ---
>  gcc/testsuite/gcc.target/s390/mul-signed-overflow-1.c | 3 ++-
>  gcc/testsuite/gcc.target/s390/mul-signed-overflow-2.c | 3 ++-
>  2 files changed, 4 insertions(+), 2 deletions(-)
> 
> diff --git a/gcc/testsuite/gcc.target/s390/mul-signed-overflow-1.c 
> b/gcc/testsuite/gcc.target/s390/mul-signed-overflow-1.c
> index fdf56d6e695..e8b1938dab7 100644
> --- a/gcc/testsuite/gcc.target/s390/mul-signed-overflow-1.c
> +++ b/gcc/testsuite/gcc.target/s390/mul-signed-overflow-1.c
> @@ -1,4 +1,5 @@
> -/* { dg-do run } */
> +/* { dg-do compile } */
> +/* { dg-do run { target { s390_z14_hw } } } */
>  /* z14 only because we need msrkc, msc, msgrkc, msgc  */
>  /* { dg-options "-O3 -march=z14 -mzarch --save-temps" } */
>  
> diff --git a/gcc/testsuite/gcc.target/s390/mul-signed-overflow-2.c 
> b/gcc/testsuite/gcc.target/s390/mul-signed-overflow-2.c
> index d0088188aa2..01328e1d286 100644
> --- a/gcc/testsuite/gcc.target/s390/mul-signed-overflow-2.c
> +++ b/gcc/testsuite/gcc.target/s390/mul-signed-overflow-2.c
> @@ -1,4 +1,5 @@
> -/* { dg-do run } */
> +/* { dg-do compile } */
> +/* { dg-do run { target { s390_z14_hw } } } */
>  /* z14 only because we need msrkc, msc, msgrkc, msgc  */
>  /* { dg-options "-O3 -march=z14 -mzarch --save-temps" } */
>  
> 



Re: [PATCH] IBM Z: Run mul-signed-overflow-*.c only on z14+

2021-03-03 Thread Andreas Krebbel via Gcc-patches
On 3/3/21 11:50 AM, Ilya Leoshkevich wrote:
> On Wed, 2021-03-03 at 07:50 +0100, Andreas Krebbel wrote:
>> On 3/2/21 11:59 PM, Ilya Leoshkevich wrote:
>>> mul-signed-overflow-*.c execution tests fail on z13, because they
>>> contain z14-specific instructions.  Fix by requiring s390_z14_hw
>>> target.
>>>
>>> gcc/testsuite/ChangeLog:
>>>
>>> * gcc.target/s390/mul-signed-overflow-1.c: Run only on
>>> z14+.
>>> * gcc.target/s390/mul-signed-overflow-2.c: Likewise.
>>
>> I did that change yesterday already.
> 
> Ah, I haven't noticed.  One difference between our patches is, though,
> that I also have `dg-do compile` - this way, compile tests still run on
> z13.

Ok, that's a bit better indeed. Feel free to commit that change ontop.

Andreas


Re: [PATCH v3] IBM Z: Fix usage of "f" constraint with long doubles

2021-03-07 Thread Andreas Krebbel via Gcc-patches
On 3/4/21 3:08 PM, Ilya Leoshkevich wrote:
> v1: https://gcc.gnu.org/pipermail/gcc-patches/2021-January/563799.html
> v1 -> v2:
> - Handle constraint modifiers, use AR constraint instead of R, add
>   testcases for & and %.
> 
> v2: https://gcc.gnu.org/pipermail/gcc-patches/2021-January/564380.html
> v2 -> v3:
> - The main prereq is now committed:
>   https://gcc.gnu.org/pipermail/gcc-patches/2021-March/566237.html
> - Dropped long-double-asm-abi.c test, because its prereq is not
>   approved (yet):
>   https://gcc.gnu.org/pipermail/gcc-patches/2021-March/566218.html
> - Removed superfluous constraint pointer increment.
> 
> 
> 
> After switching the s390 backend to store long doubles in vector
> registers, "f" constraint broke when used with the former: long doubles
> correspond to TFmode, which in combination with "f" corresponds to
> hard regs %v0-%v15, however, asm users expect a %f0-%f15 pair.
> 
> Fix by using TARGET_MD_ASM_ADJUST hook to convert TFmode values to
> FPRX2mode and back.
> 
> gcc/ChangeLog:
> 
> 2020-12-14  Ilya Leoshkevich  
> 
>   * config/s390/s390.c (f_constraint_p): New function.
>   (s390_md_asm_adjust): Implement TARGET_MD_ASM_ADJUST.
>   (TARGET_MD_ASM_ADJUST): Likewise.
>   * config/s390/vector.md (fprx2_to_tf): Rename from *fprx2_to_tf,
>   add memory alternative.
>   (tf_to_fprx2): New pattern.
> 
> gcc/testsuite/ChangeLog:
> 
> 2020-12-14  Ilya Leoshkevich  
> 
>   * gcc.target/s390/vector/long-double-asm-commutative.c: New
>   test.
>   * gcc.target/s390/vector/long-double-asm-earlyclobber.c: New
>   test.
>   * gcc.target/s390/vector/long-double-asm-in-out.c: New test.
>   * gcc.target/s390/vector/long-double-asm-inout.c: New test.
>   * gcc.target/s390/vector/long-double-asm-matching.c: New test.
>   * gcc.target/s390/vector/long-double-asm-regmem.c: New test.
>   * gcc.target/s390/vector/long-double-volatile-from-i64.c: New
>   test.

Ok. Thanks!

Andreas


[Committed] IBM Z: Fix vcond-shift.c testcase.

2021-03-08 Thread Andreas Krebbel via Gcc-patches
Due to a common code change the comparison in the testcase is emitted
via vec_cmp instead of vcond.  The testcase checks for an optimization
currently only available via vcond.

Fixed by implementing the same optimization also in
s390_expand_vec_compare.

Bootstrapped and regression tested on s390x with -march=z15

This fixes the following testsuite fails:

< FAIL: gcc.target/s390/vector/vcond-shift.c scan-assembler-not vzero\\t*
< FAIL: gcc.target/s390/vector/vcond-shift.c scan-assembler-times 
vesrab\\t%v.?,%v.?,7 6
< FAIL: gcc.target/s390/vector/vcond-shift.c scan-assembler-times 
vesraf\\t%v.?,%v.?,31 6
< FAIL: gcc.target/s390/vector/vcond-shift.c scan-assembler-times 
vesrah\\t%v.?,%v.?,15 6

gcc/ChangeLog:

* config/s390/s390.c (s390_expand_vec_compare): Implement <0
comparison with arithmetic right shift.
(s390_expand_vcond): No need for a force_reg anymore.
s390_vec_compare will do it.
* config/s390/vector.md ("vec_cmp"): Accept also
immediate operands.
---
 gcc/config/s390/s390.c| 20 +++-
 gcc/config/s390/vector.md |  2 +-
 2 files changed, 16 insertions(+), 6 deletions(-)

diff --git a/gcc/config/s390/s390.c b/gcc/config/s390/s390.c
index f3d0d1ba596..c9aea21fe40 100644
--- a/gcc/config/s390/s390.c
+++ b/gcc/config/s390/s390.c
@@ -6569,6 +6569,7 @@ s390_expand_vec_compare (rtx target, enum rtx_code cond,
 
   if (GET_MODE_CLASS (GET_MODE (cmp_op1)) == MODE_VECTOR_FLOAT)
 {
+  cmp_op2 = force_reg (GET_MODE (cmp_op1), cmp_op2);
   switch (cond)
{
  /* NE a != b -> !(a == b) */
@@ -6607,6 +6608,19 @@ s390_expand_vec_compare (rtx target, enum rtx_code cond,
 }
   else
 {
+  /* Turn x < 0 into x >> (bits per element - 1)  */
+  if (cond == LT && cmp_op2 == CONST0_RTX (mode))
+   {
+ int shift = GET_MODE_BITSIZE (GET_MODE_INNER (mode)) - 1;
+ rtx res = expand_simple_binop (mode, ASHIFTRT, cmp_op1,
+GEN_INT (shift), target,
+0, OPTAB_DIRECT);
+ if (res != target)
+   emit_move_insn (target, res);
+ return;
+   }
+  cmp_op2 = force_reg (GET_MODE (cmp_op1), cmp_op2);
+
   switch (cond)
{
  /* NE: a != b -> !(a == b) */
@@ -6824,11 +6838,7 @@ s390_expand_vcond (rtx target, rtx then, rtx els,
   if (!REG_P (cmp_op1))
 cmp_op1 = force_reg (GET_MODE (cmp_op1), cmp_op1);
 
-  if (!REG_P (cmp_op2))
-cmp_op2 = force_reg (GET_MODE (cmp_op2), cmp_op2);
-
-  s390_expand_vec_compare (result_target, cond,
-  cmp_op1, cmp_op2);
+  s390_expand_vec_compare (result_target, cond, cmp_op1, cmp_op2);
 
   /* If the results are supposed to be either -1 or 0 we are done
  since this is what our compare instructions generate anyway.  */
diff --git a/gcc/config/s390/vector.md b/gcc/config/s390/vector.md
index bc52211c55e..c80d582a300 100644
--- a/gcc/config/s390/vector.md
+++ b/gcc/config/s390/vector.md
@@ -1589,7 +1589,7 @@ (define_expand "vec_cmp"
   [(set (match_operand:  0 "register_operand" "")
(match_operator: 1 "vcond_comparison_operator"
  [(match_operand:V_HW 2 "register_operand" "")
-  (match_operand:V_HW 3 "register_operand" "")]))]
+  (match_operand:V_HW 3 "nonmemory_operand" "")]))]
   "TARGET_VX"
 {
   s390_expand_vec_compare (operands[0], GET_CODE(operands[1]), operands[2], 
operands[3]);
-- 
2.29.2



Re: [PATCH] IBM Z: Fix *vec_tf_to_v1tf constraints

2020-09-16 Thread Andreas Krebbel via Gcc-patches
On 03.09.20 08:39, Ilya Leoshkevich wrote:
> Bootstrapped (with BOOT_CFLAGS='-g -O2 -Wno-error=maybe-uninitialized')
> and regtested on s390x-redhat-linux. Ok for master?
> 
> 
> 
> Certain alternatives of *vec_tf_to_v1tf use "v" constraint for its
> TFmode source operand. Therefore it is assigned to VEC_REGS class, and
> when it is reloaded using *movtf_64, whose relevant alternatives need
> FP_REGS, LRA loops and ICE happens. The reason is that register class
> mismatch causes LRA to emit another reload, which triggers this issue
> again.
> 
> Fix by using "f" constraint, which is more appropriate for FP register
> pairs anyway.
> 
> gcc/ChangeLog:
> 
> 2020-09-02  Ilya Leoshkevich  
> 
>   * config/s390/vector.md(*vec_tf_to_v1tf): Use "f" instead of "v"
> for the source operand.

Ok. Thanks!

Andreas

> ---
>  gcc/config/s390/vector.md | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
> 
> diff --git a/gcc/config/s390/vector.md b/gcc/config/s390/vector.md
> index 131bbda09bc..2573b7d980a 100644
> --- a/gcc/config/s390/vector.md
> +++ b/gcc/config/s390/vector.md
> @@ -567,7 +567,7 @@ (define_insn "*vec_splats_bswap_elem"
>  ; single vector register.
>  (define_insn "*vec_tf_to_v1tf"
>[(set (match_operand:V1TF   0 "nonimmediate_operand" 
> "=v,v,R,v,v")
> - (vec_duplicate:V1TF (match_operand:TF 1 "general_operand"   
> "v,R,v,G,d")))]
> + (vec_duplicate:V1TF (match_operand:TF 1 "general_operand"   
> "f,R,f,G,d")))]
>"TARGET_VX"
>"@
> vmrhg\t%v0,%1,%N1
> 



Re: [PATCH] IBM Z: Try to make use of load-and-test instructions

2020-09-21 Thread Andreas Krebbel via Gcc-patches
On 18.09.20 13:10, Stefan Schulze Frielinghaus wrote:
> This patch enables a peephole2 optimization which transforms a load of
> constant zero into a temporary register which is then finally used to
> compare against a floating-point register of interest into a single load
> and test instruction.  However, the optimization is only applied if both
> registers are dead afterwards and if we test for (in)equality only.
> This is relaxed in case of fast math.
> 
> This is a follow up to PR88856.
> 
> Bootstrapped and regtested on IBM Z.
> 
> gcc/ChangeLog:
> 
>   * config/s390/s390.md ("*cmp_ccs_0", "*cmp_ccz_0",
>   "*cmp_ccs_0_fastmath"): Basically change "*cmp_ccs_0" into
>   "*cmp_ccz_0" and for fast math add "*cmp_ccs_0_fastmath".
> 
> gcc/testsuite/ChangeLog:
> 
>   * gcc.target/s390/load-and-test-fp-1.c: Change test to include all
>   possible combinations of dead/live registers and comparisons (equality,
>   relational).
>   * gcc.target/s390/load-and-test-fp-2.c: Same as load-and-test-fp-1.c
>   but for fast math.
>   * gcc.target/s390/load-and-test-fp.h: New test included by
>   load-and-test-fp-{1,2}.c.

Ok for mainline. Please see below for some comments.

Thanks!

Andreas

> ---
>  gcc/config/s390/s390.md   | 54 +++
>  .../gcc.target/s390/load-and-test-fp-1.c  | 19 +++
>  .../gcc.target/s390/load-and-test-fp-2.c  | 17 ++
>  .../gcc.target/s390/load-and-test-fp.h| 12 +
>  4 files changed, 67 insertions(+), 35 deletions(-)
>  create mode 100644 gcc/testsuite/gcc.target/s390/load-and-test-fp.h
> 
> diff --git a/gcc/config/s390/s390.md b/gcc/config/s390/s390.md
> index 4c3e5400a2b..e591aa7c324 100644
> --- a/gcc/config/s390/s390.md
> +++ b/gcc/config/s390/s390.md
> @@ -1391,23 +1391,55 @@
>  ; (TF|DF|SF|TD|DD|SD) instructions
>  
>  
> -; FIXME: load and test instructions turn SNaN into QNaN what is not
> -; acceptable if the target will be used afterwards.  On the other hand
> -; they are quite convenient for implementing comparisons with 0.0. So
> -; try to enable them via splitter/peephole if the value isn't needed anymore.
> -; See testcases: load-and-test-fp-1.c and load-and-test-fp-2.c
> +; load and test instructions turn a signaling NaN into a quiet NaN.  Thus 
> they
> +; may only be used if the target register is dead afterwards or if fast math
> +; is enabled.  The former is done via a peephole optimization.  Note, load 
> and
> +; test instructions may only be used for (in)equality comparisons because
> +; relational comparisons must treat a quiet NaN like a signaling NaN which is
> +; not the case for load and test instructions.  For fast math insn
> +; "cmp_ccs_0_fastmath" applies.
> +; See testcases load-and-test-fp-{1,2}.c
> +
> +(define_peephole2
> +  [(set (match_operand:FP 0 "register_operand")
> + (match_operand:FP 1 "const0_operand"))
> +   (set (reg:CCZ CC_REGNUM)
> + (compare:CCZ (match_operand:FP 2 "register_operand")
> +  (match_operand:FP 3 "register_operand")))]
> +  "TARGET_HARD_FLOAT
> +   && FP_REG_P (operands[2])
> +   && REGNO (operands[0]) == REGNO (operands[3])
> +   && peep2_reg_dead_p (2, operands[0])
> +   && peep2_reg_dead_p (2, operands[2])"
> +  [(parallel
> +[(set (reg:CCZ CC_REGNUM)
> +   (match_op_dup 4 [(match_dup 2) (match_dup 1)]))
> + (clobber (match_dup 2))])]
> +  "operands[4] = gen_rtx_COMPARE (CCZmode, operands[2], operands[1]);")

Couldn't this be written as:

 [(parallel
[(set (reg:CCZ CC_REGNUM)
  (compare:CCZ (match_dup 2) (match_dup 1)))
 (clobber (match_dup 2))])])

>  
>  ; ltxbr, ltdbr, ltebr, ltxtr, ltdtr
> -(define_insn "*cmp_ccs_0"
> -  [(set (reg CC_REGNUM)
> - (compare (match_operand:FP 0 "register_operand"  "f")
> -  (match_operand:FP 1 "const0_operand""")))
> -   (clobber (match_operand:FP  2 "register_operand" "=0"))]
> -  "s390_match_ccmode(insn, CCSmode) && TARGET_HARD_FLOAT"
> +(define_insn "*cmp_ccz_0"
> +  [(set (reg:CCZ CC_REGNUM)
> + (compare:CCZ (match_operand:FP 0 "register_operand" "f")
> +  (match_operand:FP 1 "const0_operand")))
> +   (clobber (match_operand:FP 2 "register_operand" "=0"))]
> +  "TARGET_HARD_FLOAT"
>"ltr\t%0,%0"
> [(set_attr "op_type" "RRE")
>  (set_attr "type"  "fsimp")])
>  
> +(define_insn "*cmp_ccs_0_fastmath"
> +  [(set (reg CC_REGNUM)
> + (compare (match_operand:FP 0 "register_operand" "f")
> +  (match_operand:FP 1 "const0_operand")))]
> +  "s390_match_ccmode (insn, CCSmode)
> +   && TARGET_HARD_FLOAT
> +   && !flag_trapping_math
> +   && !flag_signaling_nans"
> +  "ltr\t%0,%0"
> +  [(set_attr "op_type" "RRE")
> +   (set_attr "type" "fsimp")])
> +
>  ; VX: TFmode in FPR pairs: use cxbr instead of wfcxb
>  ; cxtr, cdtr, cxbr, cdbr, cebr, cdb, ceb, wfcsb, wfcdb
>  (define_insn "*cmp_ccs"
> diff --git a/gcc/testsuite/gcc.target/s390/load-and-test-fp-1.c 
> b/gcc/testsuite/gcc.target/

Re: [PATCH] S/390: Do not turn maybe-uninitialized warnings into errors

2020-09-22 Thread Andreas Krebbel via Gcc-patches
On 15.09.20 17:02, Stefan Schulze Frielinghaus wrote:
> Over the last couple of months quite a few warnings about uninitialized
> variables were raised while building GCC.  A reason why these warnings
> show up on S/390 only is due to the aggressive inlining settings here.
> Some of these warnings (2c832ffedf0, b776bdca932, 2786c0221b6,
> 1657178f59b) could be fixed or in case of a false positive silenced by
> initializing the corresponding variable.  Since the latter reoccurs and
> while bootstrapping such warnings are turned into errors bootstrapping
> fails on S/390 consistently.  Therefore, for the moment do not turn
> those warnings into errors.
> 
> config/ChangeLog:
> 
>   * warnings.m4: Do not turn maybe-uninitialized warnings into errors
>   on S/390.
> 
> fixincludes/ChangeLog:
> 
>   * configure: Regenerate.
> 
> gcc/ChangeLog:
> 
>   * configure: Regenerate.
> 
> libcc1/ChangeLog:
> 
>   * configure: Regenerate.
> 
> libcpp/ChangeLog:
> 
>   * configure: Regenerate.
> 
> libdecnumber/ChangeLog:
> 
>   * configure: Regenerate.

That change looks good to me. Could a global reviewer please comment!

Andreas

> ---
>  config/warnings.m4 | 20 ++--
>  fixincludes/configure  |  8 +++-
>  gcc/configure  | 12 +---
>  libcc1/configure   |  8 +++-
>  libcpp/configure   |  8 +++-
>  libdecnumber/configure |  8 +++-
>  6 files changed, 51 insertions(+), 13 deletions(-)
> 
> diff --git a/config/warnings.m4 b/config/warnings.m4
> index ce007f9b73e..d977bfb20af 100644
> --- a/config/warnings.m4
> +++ b/config/warnings.m4
> @@ -101,8 +101,10 @@ AC_ARG_ENABLE(werror-always,
>  AS_HELP_STRING([--enable-werror-always],
>  [enable -Werror despite compiler version]),
>  [], [enable_werror_always=no])
> -AS_IF([test $enable_werror_always = yes],
> -  [acx_Var="$acx_Var${acx_Var:+ }-Werror"])
> +AS_IF([test $enable_werror_always = yes], [dnl
> +  acx_Var="$acx_Var${acx_Var:+ }-Werror"
> +  AS_CASE([$host], [s390*-*-*],
> +  [acx_Var="$acx_Var -Wno-error=maybe-uninitialized"])])
>   m4_if($1, [manual],,
>   [AS_VAR_PUSHDEF([acx_GCCvers], [acx_cv_prog_cc_gcc_$1_or_newer])dnl
>AC_CACHE_CHECK([whether $CC is GCC >=$1], acx_GCCvers,
> @@ -116,7 +118,9 @@ AS_IF([test $enable_werror_always = yes],
> [AS_VAR_SET(acx_GCCvers, yes)],
> [AS_VAR_SET(acx_GCCvers, no)])])
>   AS_IF([test AS_VAR_GET(acx_GCCvers) = yes],
> -   [acx_Var="$acx_Var${acx_Var:+ }-Werror"])
> +   [acx_Var="$acx_Var${acx_Var:+ }-Werror"
> +AS_CASE([$host], [s390*-*-*],
> +[acx_Var="$acx_Var -Wno-error=maybe-uninitialized"])])
>AS_VAR_POPDEF([acx_GCCvers])])
>  m4_popdef([acx_Var])dnl
>  AC_LANG_POP(C)
> @@ -205,8 +209,10 @@ AC_ARG_ENABLE(werror-always,
>  AS_HELP_STRING([--enable-werror-always],
>  [enable -Werror despite compiler version]),
>  [], [enable_werror_always=no])
> -AS_IF([test $enable_werror_always = yes],
> -  [acx_Var="$acx_Var${acx_Var:+ }-Werror"])
> +AS_IF([test $enable_werror_always = yes], [dnl
> +  acx_Var="$acx_Var${acx_Var:+ }-Werror"
> +  AS_CASE([$host], [s390*-*-*],
> +  [strict_warn="$strict_warn -Wno-error=maybe-uninitialized"])])
>   m4_if($1, [manual],,
>   [AS_VAR_PUSHDEF([acx_GXXvers], [acx_cv_prog_cxx_gxx_$1_or_newer])dnl
>AC_CACHE_CHECK([whether $CXX is G++ >=$1], acx_GXXvers,
> @@ -220,7 +226,9 @@ AS_IF([test $enable_werror_always = yes],
> [AS_VAR_SET(acx_GXXvers, yes)],
> [AS_VAR_SET(acx_GXXvers, no)])])
>   AS_IF([test AS_VAR_GET(acx_GXXvers) = yes],
> -   [acx_Var="$acx_Var${acx_Var:+ }-Werror"])
> +   [acx_Var="$acx_Var${acx_Var:+ }-Werror"
> +AS_CASE([$host], [s390*-*-*],
> +[acx_Var="$acx_Var -Wno-error=maybe-uninitialized"])])
>AS_VAR_POPDEF([acx_GXXvers])])
>  m4_popdef([acx_Var])dnl
>  AC_LANG_POP(C++)
> diff --git a/fixincludes/configure b/fixincludes/configure
> index 6e2d67b655b..e0d679cc18e 100755
> --- a/fixincludes/configure
> +++ b/fixincludes/configure
> @@ -4753,7 +4753,13 @@ else
>  fi
>  
>  if test $enable_werror_always = yes; then :
> -  WERROR="$WERROR${WERROR:+ }-Werror"
> +WERROR="$WERROR${WERROR:+ }-Werror"
> +  case $host in #(
> +  s390*-*-*) :
> +WERROR="$WERROR -Wno-error=maybe-uninitialized" ;; #(
> +  *) :
> + ;;
> +esac
>  fi
>  
>  ac_ext=c
> diff --git a/gcc/configure b/gcc/configure
> index 0a09777dd42..ea03581537a 100755
> --- a/gcc/configure
> +++ b/gcc/configure
> @@ -7064,7 +7064,13 @@ else
>  fi
>  
>  if test $enable_werror_always = yes; then :
> -  strict_warn="$strict_warn${strict_warn:+ }-Werror"
> +strict_warn="$strict_warn${strict_warn:+ }-Werror"
> +  case $host in #(
> +  s390*-*-*) :
> +strict_warn="$strict_warn -Wno-error=maybe-uninitialized" ;; #(
> +  *) :
> + ;;
> +esac
>  fi
>  
>  ac_ext=cpp
> @@ -19013,7 +19019,7 @@ else
>lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2
>lt_status=$lt_dl

Re: [committed] s390: Fix up s390_atomic_assign_expand_fenv

2020-10-01 Thread Andreas Krebbel via Gcc-patches
On 01.10.20 11:13, Jakub Jelinek wrote:
> Hi!
> 
> The following patch fixes
> -FAIL: gcc.dg/pr94780.c (internal compiler error)
> -FAIL: gcc.dg/pr94780.c (test for excess errors)
> -FAIL: gcc.dg/pr94842.c (internal compiler error)
> -FAIL: gcc.dg/pr94842.c (test for excess errors)
> on s390x-linux.  The fix is essentially the same as has been applied to many
> other targets (i386, aarch64, arm, rs6000, alpha, riscv).
> 
> Bootstrapped/regtested on s390x-linux, committed to trunk and release
> branches as obvious.
> 
> 2020-10-01  Jakub Jelinek  
> 
>   * config/s390/s390.c (s390_atomic_assign_expand_fenv): Use
>   TARGET_EXPR instead of MODIFY_EXPR for the first assignments to
>   fenv_var and old_fpc.  Formatting fixes.

Thanks!

Andreas

> 
> --- gcc/config/s390/s390.c.jj 2020-09-14 09:04:36.086851054 +0200
> +++ gcc/config/s390/s390.c2020-09-30 10:22:50.579603271 +0200
> @@ -16082,12 +16082,13 @@ s390_atomic_assign_expand_fenv (tree *ho
>  
>   fenv_var = __builtin_s390_efpc ();
>   __builtin_s390_sfpc (fenv_var & mask) */
> -  tree old_fpc = build2 (MODIFY_EXPR, unsigned_type_node, fenv_var, 
> call_efpc);
> -  tree new_fpc =
> -build2 (BIT_AND_EXPR, unsigned_type_node, fenv_var,
> - build_int_cst (unsigned_type_node,
> -~(FPC_DXC_MASK | FPC_FLAGS_MASK |
> -  FPC_EXCEPTION_MASK)));
> +  tree old_fpc = build4 (TARGET_EXPR, unsigned_type_node, fenv_var, 
> call_efpc,
> +  NULL_TREE, NULL_TREE);
> +  tree new_fpc
> += build2 (BIT_AND_EXPR, unsigned_type_node, fenv_var,
> +   build_int_cst (unsigned_type_node,
> +  ~(FPC_DXC_MASK | FPC_FLAGS_MASK
> +| FPC_EXCEPTION_MASK)));
>tree set_new_fpc = build_call_expr (sfpc, 1, new_fpc);
>*hold = build2 (COMPOUND_EXPR, void_type_node, old_fpc, set_new_fpc);
>  
> @@ -16106,8 +16107,8 @@ s390_atomic_assign_expand_fenv (tree *ho
>__atomic_feraiseexcept ((old_fpc & FPC_FLAGS_MASK) >> FPC_FLAGS_SHIFT);  */
>  
>old_fpc = create_tmp_var_raw (unsigned_type_node);
> -  tree store_old_fpc = build2 (MODIFY_EXPR, void_type_node,
> -old_fpc, call_efpc);
> +  tree store_old_fpc = build4 (TARGET_EXPR, void_type_node, old_fpc, 
> call_efpc,
> +NULL_TREE, NULL_TREE);
>  
>set_new_fpc = build_call_expr (sfpc, 1, fenv_var);
>  
> 
> 
>   Jakub
> 



[Committed] IBM Z: Doc: Add z15/arch13 to the list of -march/-mtune options

2020-10-05 Thread Andreas Krebbel via Gcc-patches
gcc/ChangeLog:

* doc/invoke.texi: Add z15/arch13 to the list of documented
-march/-mtune options.
---
 gcc/doc/invoke.texi | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
index f623467b763..7c81d7f41bd 100644
--- a/gcc/doc/invoke.texi
+++ b/gcc/doc/invoke.texi
@@ -27698,7 +27698,7 @@ system representing a certain processor type.  Possible 
values for
 @var{cpu-type} are @samp{z900}/@samp{arch5}, @samp{z990}/@samp{arch6},
 @samp{z9-109}, @samp{z9-ec}/@samp{arch7}, @samp{z10}/@samp{arch8},
 @samp{z196}/@samp{arch9}, @samp{zEC12}, @samp{z13}/@samp{arch11},
-@samp{z14}/@samp{arch12}, and @samp{native}.
+@samp{z14}/@samp{arch12}, @samp{z15}/@samp{arch13}, and @samp{native}.
 
 The default is @option{-march=z900}.
 
-- 
2.25.1



Re: [PATCH] IBM Z: Change vector copysign to use bitwise operations

2020-10-09 Thread Andreas Krebbel via Gcc-patches
On 08.10.20 11:38, Ilya Leoshkevich wrote:
> Bootstrapped and regtested on s390x-redhat-linux.  OK for master?
> 
> The vector copysign pattern incorrectly assumes that vector
> if_then_else operates on bits, not on elements.  This can theoretically
> mislead the optimizers.  Fix by changing it to use bitwise operations,
> like commit 2930bb321794 ("PR94613: Fix vec_sel builtin for IBM Z") did
> for vec_sel builtin.
> 
> gcc/ChangeLog:
> 
> 2020-10-07  Ilya Leoshkevich  
> 
>   * config/s390/s390-protos.h (s390_build_signbit_mask): New
>   function.
>   * config/s390/s390.c (s390_tointvec): New function.
>   (s390_contiguous_bitmask_vector_p): Bitcast the argument to
>   an integral mode.
>   (s390_expand_vec_init): Do not call
>   s390_contiguous_bitmask_vector_p with a scalar argument.
>   (s390_build_signbit_mask): New function.
>   * config/s390/vector.md (copysign3): Use bitwise
>   operations.

Couldn't s390_tointvec be implemented/replaced with related_int_vector_mode?

Ok, Thanks!

Andreas

> ---
>  gcc/config/s390/s390-protos.h |  1 +
>  gcc/config/s390/s390.c| 92 ---
>  gcc/config/s390/vector.md | 31 
>  3 files changed, 95 insertions(+), 29 deletions(-)
> 
> diff --git a/gcc/config/s390/s390-protos.h b/gcc/config/s390/s390-protos.h
> index 6f1bc07db17..029f7289fac 100644
> --- a/gcc/config/s390/s390-protos.h
> +++ b/gcc/config/s390/s390-protos.h
> @@ -121,6 +121,7 @@ extern void s390_expand_vec_compare_cc (rtx, enum 
> rtx_code, rtx, rtx, bool);
>  extern enum rtx_code s390_reverse_condition (machine_mode, enum rtx_code);
>  extern void s390_expand_vcond (rtx, rtx, rtx, enum rtx_code, rtx, rtx);
>  extern void s390_expand_vec_init (rtx, rtx);
> +extern rtx s390_build_signbit_mask (machine_mode);
>  extern rtx s390_return_addr_rtx (int, rtx);
>  extern rtx s390_back_chain_rtx (void);
>  extern rtx_insn *s390_emit_call (rtx, rtx, rtx, rtx);
> diff --git a/gcc/config/s390/s390.c b/gcc/config/s390/s390.c
> index 93894307d62..554c1adf40a 100644
> --- a/gcc/config/s390/s390.c
> +++ b/gcc/config/s390/s390.c
> @@ -2450,6 +2450,54 @@ s390_contiguous_bitmask_p (unsigned HOST_WIDE_INT in, 
> bool wrap_p,
>return b;
>  }
>  
> +/* Return the associated integral mode of VEC_MODE.  Must be in sync with
> +   tointvec mode_attr.  */
> +static machine_mode
> +s390_tointvec (machine_mode vec_mode)
> +{
> +  switch (vec_mode)
> +{
> +case V1QImode:
> +  return V1QImode;
> +case V2QImode:
> +  return V2QImode;
> +case V4QImode:
> +  return V4QImode;
> +case V8QImode:
> +  return V8QImode;
> +case V16QImode:
> +  return V16QImode;
> +case V1HImode:
> +  return V1HImode;
> +case V2HImode:
> +  return V2HImode;
> +case V4HImode:
> +  return V4HImode;
> +case V8HImode:
> +  return V8HImode;
> +case V1SImode:
> +case V1SFmode:
> +  return V1SImode;
> +case V2SImode:
> +case V2SFmode:
> +  return V2SImode;
> +case V4SImode:
> +case V4SFmode:
> +  return V4SImode;
> +case V1DImode:
> +case V1DFmode:
> +  return V1DImode;
> +case V2DImode:
> +case V2DFmode:
> +  return V2DImode;
> +case V1TImode:
> +case V1TFmode:
> +  return V1TImode;
> +default:
> +  gcc_unreachable ();
> +}
> +}
> +
>  /* Return true if OP contains the same contiguous bitfield in *all*
> its elements.  START and END can be used to obtain the start and
> end position of the bitfield.
> @@ -2467,6 +2515,9 @@ s390_contiguous_bitmask_vector_p (rtx op, int *start, 
> int *end)
>rtx elt;
>bool b;
>  
> +  /* Handle floats by bitcasting them to ints.  */
> +  op = gen_lowpart (s390_tointvec (GET_MODE (op)), op);
> +
>gcc_assert (!!start == !!end);
>if (!const_vec_duplicate_p (op, &elt)
>|| !CONST_INT_P (elt))
> @@ -6863,15 +6914,16 @@ s390_expand_vec_init (rtx target, rtx vals)
>  }
>  
>/* Use vector gen mask or vector gen byte mask if possible.  */
> -  if (all_same && all_const_int
> -  && (XVECEXP (vals, 0, 0) == const0_rtx
> -   || s390_contiguous_bitmask_vector_p (XVECEXP (vals, 0, 0),
> -NULL, NULL)
> -   || s390_bytemask_vector_p (XVECEXP (vals, 0, 0), NULL)))
> +  if (all_same && all_const_int)
>  {
> -  emit_insn (gen_rtx_SET (target,
> -   gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0;
> -  return;
> +  rtx vec = gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0));
> +  if (XVECEXP (vals, 0, 0) == const0_rtx
> +   || s390_contiguous_bitmask_vector_p (vec, NULL, NULL)
> +   || s390_bytemask_vector_p (vec, NULL))
> + {
> +   emit_insn (gen_rtx_SET (target, vec));
> +   return;
> + }
>  }
>  
>/* Use vector replicate instructions.  vlrep/vrepi/vrep  */
> @@ -6949,6 +7001,30 @@ s390_expand_vec_init (rtx target, rtx vals)
>  }
>  }
>

Re: [PATCH v2] IBM Z: Change vector copysign to use bitwise operations

2020-10-12 Thread Andreas Krebbel via Gcc-patches
On 09.10.20 17:49, Ilya Leoshkevich wrote:
> Bootstrapped and regtested on s390x-redhat-linux.  OK for master?
> 
> v1: https://gcc.gnu.org/pipermail/gcc-patches/2020-October/555782.html
> v1 -> v2: Use related_int_vector_mode.
> 
> 
> 
> The vector copysign pattern incorrectly assumes that vector
> if_then_else operates on bits, not on elements.  This can theoretically
> mislead the optimizers.  Fix by changing it to use bitwise operations,
> like commit 2930bb321794 ("PR94613: Fix vec_sel builtin for IBM Z") did
> for vec_sel builtin.
> 
> gcc/ChangeLog:
> 
> 2020-10-07  Ilya Leoshkevich  
> 
>   * config/s390/s390-protos.h (s390_build_signbit_mask): New
>   function.
>   * config/s390/s390.c (s390_contiguous_bitmask_vector_p):
>   Bitcast the argument to an integral mode.
>   (s390_expand_vec_init): Do not call
>   s390_contiguous_bitmask_vector_p with a scalar argument.
>   (s390_build_signbit_mask): New function.
>   * config/s390/vector.md (copysign3): Use bitwise
>   operations.

Ok. Thanks!

Andreas

> ---
>  gcc/config/s390/s390-protos.h |  1 +
>  gcc/config/s390/s390.c| 44 ---
>  gcc/config/s390/vector.md | 28 +++---
>  3 files changed, 45 insertions(+), 28 deletions(-)
> 
> diff --git a/gcc/config/s390/s390-protos.h b/gcc/config/s390/s390-protos.h
> index 6f1bc07db17..029f7289fac 100644
> --- a/gcc/config/s390/s390-protos.h
> +++ b/gcc/config/s390/s390-protos.h
> @@ -121,6 +121,7 @@ extern void s390_expand_vec_compare_cc (rtx, enum 
> rtx_code, rtx, rtx, bool);
>  extern enum rtx_code s390_reverse_condition (machine_mode, enum rtx_code);
>  extern void s390_expand_vcond (rtx, rtx, rtx, enum rtx_code, rtx, rtx);
>  extern void s390_expand_vec_init (rtx, rtx);
> +extern rtx s390_build_signbit_mask (machine_mode);
>  extern rtx s390_return_addr_rtx (int, rtx);
>  extern rtx s390_back_chain_rtx (void);
>  extern rtx_insn *s390_emit_call (rtx, rtx, rtx, rtx);
> diff --git a/gcc/config/s390/s390.c b/gcc/config/s390/s390.c
> index 93894307d62..dbb541bbea7 100644
> --- a/gcc/config/s390/s390.c
> +++ b/gcc/config/s390/s390.c
> @@ -2467,6 +2467,9 @@ s390_contiguous_bitmask_vector_p (rtx op, int *start, 
> int *end)
>rtx elt;
>bool b;
>  
> +  /* Handle floats by bitcasting them to ints.  */
> +  op = gen_lowpart (related_int_vector_mode (GET_MODE (op)).require (), op);
> +
>gcc_assert (!!start == !!end);
>if (!const_vec_duplicate_p (op, &elt)
>|| !CONST_INT_P (elt))
> @@ -6863,15 +6866,16 @@ s390_expand_vec_init (rtx target, rtx vals)
>  }
>  
>/* Use vector gen mask or vector gen byte mask if possible.  */
> -  if (all_same && all_const_int
> -  && (XVECEXP (vals, 0, 0) == const0_rtx
> -   || s390_contiguous_bitmask_vector_p (XVECEXP (vals, 0, 0),
> -NULL, NULL)
> -   || s390_bytemask_vector_p (XVECEXP (vals, 0, 0), NULL)))
> +  if (all_same && all_const_int)
>  {
> -  emit_insn (gen_rtx_SET (target,
> -   gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0;
> -  return;
> +  rtx vec = gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0));
> +  if (XVECEXP (vals, 0, 0) == const0_rtx
> +   || s390_contiguous_bitmask_vector_p (vec, NULL, NULL)
> +   || s390_bytemask_vector_p (vec, NULL))
> + {
> +   emit_insn (gen_rtx_SET (target, vec));
> +   return;
> + }
>  }
>  
>/* Use vector replicate instructions.  vlrep/vrepi/vrep  */
> @@ -6949,6 +6953,30 @@ s390_expand_vec_init (rtx target, rtx vals)
>  }
>  }
>  
> +/* Emit a vector constant that contains 1s in each element's sign bit 
> position
> +   and 0s in other positions.  MODE is the desired constant's mode.  */
> +extern rtx
> +s390_build_signbit_mask (machine_mode mode)
> +{
> +  /* Generate the integral element mask value.  */
> +  machine_mode inner_mode = GET_MODE_INNER (mode);
> +  int inner_bitsize = GET_MODE_BITSIZE (inner_mode);
> +  wide_int mask_val = wi::set_bit_in_zero (inner_bitsize - 1, inner_bitsize);
> +
> +  /* Emit the element mask rtx.  Use gen_lowpart in order to cast the 
> integral
> + value to the desired mode.  */
> +  machine_mode int_mode = related_int_vector_mode (mode).require ();
> +  rtx mask = immed_wide_int_const (mask_val, GET_MODE_INNER (int_mode));
> +  mask = gen_lowpart (inner_mode, mask);
> +
> +  /* Emit the vector mask rtx by mode the element mask rtx.  */
> +  int nunits = GET_MODE_NUNITS (mode);
> +  rtvec v = rtvec_alloc (nunits);
> +  for (int i = 0; i < nunits; i++)
> +RTVEC_ELT (v, i) = mask;
> +  return gen_rtx_CONST_VECTOR (mode, v);
> +}
> +
>  /* Structure to hold the initial parameters for a compare_and_swap operation
> in HImode and QImode.  */
>  
> diff --git a/gcc/config/s390/vector.md b/gcc/config/s390/vector.md
> index 2573b7d980a..e9332bad0fd 100644
> --- a/gcc/config/s390/vector.md
> +++ b/gcc/config/s390/vector.md
> @@ -1425,28 +1425,16 @@

Re: [PATCH] s390: Fix up *cmp_and_trap_unsigned_int constraints [PR104775]

2022-03-07 Thread Andreas Krebbel via Gcc-patches
On 3/5/22 09:33, Jakub Jelinek wrote:
> Hi!
> 
> The following testcase fails to assemble due to clgte %r6,0(%r1,%r10)
> insn not being accepted by assembler.
> My rough understanding is that in the RSY-b insn format the spot
> in other formats used for index registers is used instead for M3 what
> kind of comparison it is, so this patch follows what other similar
> instructions use for constraint (i.e. one without index register).
> 
> Bootstrapped on s390x-linux, regtest there still pending, ok for
> trunk if it passes it?
> 
> 2022-03-05  Jakub Jelinek  
> 
>   PR target/104775
>   * config/s390/s390.md (*cmp_and_trap_unsigned_int): Use
>   S constraint instead of T in the last alternative.
> 
>   * gcc.target/s390/pr104775.c: New test.

Ok. Thanks for the fix!

Bye,

Andreas


[PATCH] PR102024 - IBM Z: Add psabi diagnostics

2022-03-25 Thread Andreas Krebbel via Gcc-patches
For IBM Z in particular there is a problem with structs like:

struct A { float a; int :0; };

Our ABI document allows passing a struct in an FPR only if it has
exactly one member. On the other hand it says that structs of 1,2,4,8
bytes are passed in a GPR. So this struct is expected to be passed in
a GPR. Since we don't return structs in registers (regardless of the
number of members) it is always returned in memory.

Situation is as follows:

All compiler versions tested return it in memory - as expected.

gcc 11, gcc 12, g++ 12, and clang 13 pass it in a GPR - as expected.

g++ 11 as well as clang++ 13 pass in an FPR

For IBM Z we stick to the current GCC 12 behavior, i.e. zero-width
bitfields are NOT ignored.  A struct as above will be passed in a
GPR. Rational behind this is that not affecting the C ABI is more
important here.

A patch for clang is in progress: https://reviews.llvm.org/D122388

In addition to the usual regression test I ran the compat and
struct-layout-1 testsuites comparing the compiler before and after the
patch.

gcc/ChangeLog:
PR target/102024
* config/s390/s390-protos.h (s390_function_arg_vector): Remove
prototype.
* config/s390/s390.cc (s390_single_field_struct_p): New function.
(s390_function_arg_vector): Invoke s390_single_field_struct_p.
(s390_function_arg_float): Likewise.

gcc/testsuite/ChangeLog:
PR target/102024
* g++.target/s390/pr102024-1.C: New test.
* g++.target/s390/pr102024-2.C: New test.
* g++.target/s390/pr102024-3.C: New test.
* g++.target/s390/pr102024-4.C: New test.
* g++.target/s390/pr102024-5.C: New test.
* g++.target/s390/pr102024-6.C: New test.
---
 gcc/config/s390/s390-protos.h  |   1 -
 gcc/config/s390/s390.cc| 212 +++--
 gcc/testsuite/g++.target/s390/pr102024-1.C |  12 ++
 gcc/testsuite/g++.target/s390/pr102024-2.C |  14 ++
 gcc/testsuite/g++.target/s390/pr102024-3.C |  15 ++
 gcc/testsuite/g++.target/s390/pr102024-4.C |  15 ++
 gcc/testsuite/g++.target/s390/pr102024-5.C |  14 ++
 gcc/testsuite/g++.target/s390/pr102024-6.C |  12 ++
 8 files changed, 195 insertions(+), 100 deletions(-)
 create mode 100644 gcc/testsuite/g++.target/s390/pr102024-1.C
 create mode 100644 gcc/testsuite/g++.target/s390/pr102024-2.C
 create mode 100644 gcc/testsuite/g++.target/s390/pr102024-3.C
 create mode 100644 gcc/testsuite/g++.target/s390/pr102024-4.C
 create mode 100644 gcc/testsuite/g++.target/s390/pr102024-5.C
 create mode 100644 gcc/testsuite/g++.target/s390/pr102024-6.C

diff --git a/gcc/config/s390/s390-protos.h b/gcc/config/s390/s390-protos.h
index e6251595870..fd4acaae44a 100644
--- a/gcc/config/s390/s390-protos.h
+++ b/gcc/config/s390/s390-protos.h
@@ -49,7 +49,6 @@ extern void s390_function_profiler (FILE *, int);
 extern void s390_set_has_landing_pad_p (bool);
 extern bool s390_hard_regno_rename_ok (unsigned int, unsigned int);
 extern int s390_class_max_nregs (enum reg_class, machine_mode);
-extern bool s390_function_arg_vector (machine_mode, const_tree);
 extern bool s390_return_addr_from_memory(void);
 extern bool s390_fma_allowed_p (machine_mode);
 #if S390_USE_TARGET_ATTRIBUTE
diff --git a/gcc/config/s390/s390.cc b/gcc/config/s390/s390.cc
index d2af6d8813d..6cfa586b9cd 100644
--- a/gcc/config/s390/s390.cc
+++ b/gcc/config/s390/s390.cc
@@ -12148,29 +12148,29 @@ s390_function_arg_size (machine_mode mode, const_tree 
type)
   gcc_unreachable ();
 }
 
-/* Return true if a function argument of type TYPE and mode MODE
-   is to be passed in a vector register, if available.  */
-
-bool
-s390_function_arg_vector (machine_mode mode, const_tree type)
+/* Return true if a variable of TYPE should be passed as single value
+   with type CODE. If STRICT_SIZE_CHECK_P is true the sizes of the
+   record type and the field type must match.
+
+   The ABI says that record types with a single member are treated
+   just like that member would be.  This function is a helper to
+   detect such cases.  The function also produces the proper
+   diagnostics for cases where the outcome might be different
+   depending on the GCC version.  */
+static bool
+s390_single_field_struct_p (enum tree_code code, const_tree type,
+   bool strict_size_check_p)
 {
-  if (!TARGET_VX_ABI)
-return false;
-
-  if (s390_function_arg_size (mode, type) > 16)
-return false;
-
-  /* No type info available for some library calls ...  */
-  if (!type)
-return VECTOR_MODE_P (mode);
-
-  /* The ABI says that record types with a single member are treated
- just like that member would be.  */
   int empty_base_seen = 0;
+  bool zero_width_bf_seen_p = false;
   const_tree orig_type = type;
+  bool single_p = true;
+
   while (TREE_CODE (type) == RECORD_TYPE)
 {
-  tree field, single = NULL_TREE;
+  tree field, single_type = NULL_TREE;
+  int num_zero_width_bf_seen = 0;
+  int num_fields_seen = 0;
 

Re: [PATCH] testsuite: Add -fno-tree-loop-distribute-patterns for s390.

2022-04-04 Thread Andreas Krebbel via Gcc-patches
On 4/4/22 13:51, Robin Dapp wrote:
> Hi,
> 
> in gcc.dg/Wuse-after-free-2.c we try to detect a use-after-free.  On
> s390 the test's while loop is converted into a rawmemchr builtin making
> it impossible to determine that the pointers *p and *q are related.
> 
> Therefore, disable the tree loop distribute patterns pass on s390 for
> this test.
> 
> OK for trunk?
> 
> Regards
>  Robin
> 
> gcc/testsuite/ChangeLog:
> 
>   * gcc.dg/Wuse-after-free-2.c:
>   Add -fno-tree-loop-distribute-patterns for s390*.

Ok. Thanks!

Andreas


Re: [PATCH] testsuite/s390: Change nle -> h in ifcvt tests.

2022-04-04 Thread Andreas Krebbel via Gcc-patches
On 4/4/22 13:51, Robin Dapp wrote:
> Hi,
> 
> we have been emitting the "higher" variantes instead of the "not less or
> equal" ones for a while.  Change the test expectations accordingly.
> 
> OK for trunk?
> 
> Regards
>  Robin
> 
> gcc/testsuite/ChangeLog:
> 
>   * gcc.target/s390/ifcvt-two-insns-bool.c: Change nle to h.
>   * gcc.target/s390/ifcvt-two-insns-int.c: Dito.
>   * gcc.target/s390/ifcvt-two-insns-long.c: Dito.

Ok. Thanks!

Andreas


Re: [PATCH] testsuite/s390: Adapt test expections.

2022-04-04 Thread Andreas Krebbel via Gcc-patches
On 4/4/22 13:52, Robin Dapp wrote:
> Hi,
> 
> some tests expect a convert instruction but nowadays the conversion is
> already done at compile time.  This results in a literal-pool load.
> Change the tests accordingly.
> 
> OK for trunk?
> 
> Regards
>  Robin
> 
> gcc/testsuite/ChangeLog:
> 
>   * gcc.target/s390/zvector/vec-double-compile.c: Expect vl
> instead of vc*.
>   * gcc.target/s390/zvector/vec-float-compile.c: Dito.
>   * gcc.target/s390/zvector/vec-signed-compile.c: Dito.
>   * gcc.target/s390/zvector/vec-unsigned-compile.c: Dito.

I've seen Mike's comment but I'm not opposed to checking it in that way. These 
kind of comments have
probably saved me a few hours of bisecting already. Next time you might 
consider moving it to the
commit message instead.

Ok. Thanks!

Bye,

Andreas


Re: [PATCH] rs6000/testsuite: Skip pr105140.c

2022-04-06 Thread Andreas Krebbel via Gcc-patches
On 4/6/22 17:32, Segher Boessenkool wrote:
> This test fails with error "AltiVec argument passed to unprototyped
> function", but the code (in rs6000.c:invalid_arg_for_unprototyped_fn,
> from 2005) actually tests for any vector type argument.  It also does
> not fail on Darwin, not reflected here though.
> 
> Andreas, s390 has this same hook code, you may need to do the same?

Yes, thanks for the pointer. I've just committed the following:

IBM zSystems/testsuite: PR105147: Skip pr105140.c

pr105140.c fails on IBM zSystems with "vector argument passed to
unprototyped function".  s390_invalid_arg_for_unprototyped_fn in
s390.cc is triggered by that.

gcc/testsuite/ChangeLog:

PR target/105147
* gcc.dg/pr105140.c: Skip for s390*-*-*.
---
 gcc/testsuite/gcc.dg/pr105140.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/gcc/testsuite/gcc.dg/pr105140.c b/gcc/testsuite/gcc.dg/pr105140.c
index da34e7ad656..7d30985e850 100644
--- a/gcc/testsuite/gcc.dg/pr105140.c
+++ b/gcc/testsuite/gcc.dg/pr105140.c
@@ -1,6 +1,6 @@
 /* { dg-do compile } */
 /* { dg-options "-Os -w -Wno-psabi" } */
-/* { dg-skip-if "PR105147" { powerpc*-*-* } } */
+/* { dg-skip-if "PR105147" { powerpc*-*-* s390*-*-* } } */

 typedef char __attribute__((__vector_size__ (16 * sizeof (char U;
 typedef int __attribute__((__vector_size__ (16 * sizeof (int V;


[PATCH] v2 PR102024 - IBM Z: Add psabi diagnostics

2022-04-11 Thread Andreas Krebbel via Gcc-patches
v2:

- Remove redundant num_zero_width_bf_seen and num_fields_seen
  tracking. (Thanks Stefan Schulze-Frielinghaus)

Re-tested with testsuite and ABI tests.



For IBM Z in particular there is a problem with structs like:

struct A { float a; int :0; };

Our ABI document allows passing a struct in an FPR only if it has
exactly one member. On the other hand it says that structs of 1,2,4,8
bytes are passed in a GPR. So this struct is expected to be passed in
a GPR. Since we don't return structs in registers (regardless of the
number of members) it is always returned in memory.

Situation is as follows:

All compiler versions tested return it in memory - as expected.

gcc 11, gcc 12, g++ 12, and clang 13 pass it in a GPR - as expected.

g++ 11 as well as clang++ 13 pass in an FPR

For IBM Z we stick to the current GCC 12 behavior, i.e. zero-width
bitfields are NOT ignored.  A struct as above will be passed in a
GPR. Rational behind this is that not affecting the C ABI is more
important here.

A patch for clang is in progress: https://reviews.llvm.org/D122388

In addition to the usual regression test I ran the compat and
struct-layout-1 testsuites comparing the compiler before and after the
patch.

gcc/ChangeLog:
PR target/102024
* config/s390/s390-protos.h (s390_function_arg_vector): Remove
prototype.
* config/s390/s390.cc (s390_single_field_struct_p): New function.
(s390_function_arg_vector): Invoke s390_single_field_struct_p.
(s390_function_arg_float): Likewise.

gcc/testsuite/ChangeLog:
PR target/102024
* g++.target/s390/pr102024-1.C: New test.
* g++.target/s390/pr102024-2.C: New test.
* g++.target/s390/pr102024-3.C: New test.
* g++.target/s390/pr102024-4.C: New test.
* g++.target/s390/pr102024-5.C: New test.
* g++.target/s390/pr102024-6.C: New test.
---
 gcc/config/s390/s390-protos.h  |   1 -
 gcc/config/s390/s390.cc| 208 +++--
 gcc/testsuite/g++.target/s390/pr102024-1.C |  12 ++
 gcc/testsuite/g++.target/s390/pr102024-2.C |  14 ++
 gcc/testsuite/g++.target/s390/pr102024-3.C |  15 ++
 gcc/testsuite/g++.target/s390/pr102024-4.C |  15 ++
 gcc/testsuite/g++.target/s390/pr102024-5.C |  14 ++
 gcc/testsuite/g++.target/s390/pr102024-6.C |  12 ++
 8 files changed, 187 insertions(+), 104 deletions(-)
 create mode 100644 gcc/testsuite/g++.target/s390/pr102024-1.C
 create mode 100644 gcc/testsuite/g++.target/s390/pr102024-2.C
 create mode 100644 gcc/testsuite/g++.target/s390/pr102024-3.C
 create mode 100644 gcc/testsuite/g++.target/s390/pr102024-4.C
 create mode 100644 gcc/testsuite/g++.target/s390/pr102024-5.C
 create mode 100644 gcc/testsuite/g++.target/s390/pr102024-6.C

diff --git a/gcc/config/s390/s390-protos.h b/gcc/config/s390/s390-protos.h
index e6251595870..fd4acaae44a 100644
--- a/gcc/config/s390/s390-protos.h
+++ b/gcc/config/s390/s390-protos.h
@@ -49,7 +49,6 @@ extern void s390_function_profiler (FILE *, int);
 extern void s390_set_has_landing_pad_p (bool);
 extern bool s390_hard_regno_rename_ok (unsigned int, unsigned int);
 extern int s390_class_max_nregs (enum reg_class, machine_mode);
-extern bool s390_function_arg_vector (machine_mode, const_tree);
 extern bool s390_return_addr_from_memory(void);
 extern bool s390_fma_allowed_p (machine_mode);
 #if S390_USE_TARGET_ATTRIBUTE
diff --git a/gcc/config/s390/s390.cc b/gcc/config/s390/s390.cc
index d2af6d8813d..c091d2a692a 100644
--- a/gcc/config/s390/s390.cc
+++ b/gcc/config/s390/s390.cc
@@ -12148,29 +12148,26 @@ s390_function_arg_size (machine_mode mode, const_tree 
type)
   gcc_unreachable ();
 }
 
-/* Return true if a function argument of type TYPE and mode MODE
-   is to be passed in a vector register, if available.  */
-
-bool
-s390_function_arg_vector (machine_mode mode, const_tree type)
+/* Return true if a variable of TYPE should be passed as single value
+   with type CODE. If STRICT_SIZE_CHECK_P is true the sizes of the
+   record type and the field type must match.
+
+   The ABI says that record types with a single member are treated
+   just like that member would be.  This function is a helper to
+   detect such cases.  The function also produces the proper
+   diagnostics for cases where the outcome might be different
+   depending on the GCC version.  */
+static bool
+s390_single_field_struct_p (enum tree_code code, const_tree type,
+   bool strict_size_check_p)
 {
-  if (!TARGET_VX_ABI)
-return false;
-
-  if (s390_function_arg_size (mode, type) > 16)
-return false;
-
-  /* No type info available for some library calls ...  */
-  if (!type)
-return VECTOR_MODE_P (mode);
-
-  /* The ABI says that record types with a single member are treated
- just like that member would be.  */
   int empty_base_seen = 0;
+  bool zero_width_bf_skipped_p = false;
   const_tree orig_type = type;
+
   while (TREE_CODE (type) == RECORD_TYPE)
 {
-  tree field, sin

[Committed] IBM zSystems: Add support for z16 as CPU name.

2022-04-11 Thread Andreas Krebbel via Gcc-patches
So far z16 was identified as arch14. After the machine has been
announced we can now add the real name.

gcc/ChangeLog:

* common/config/s390/s390-common.cc: Rename PF_ARCH14 to PF_Z16.
* config.gcc: Add z16 as march/mtune switch.
* config/s390/driver-native.cc (s390_host_detect_local_cpu):
Recognize z16 with -march=native.
* config/s390/s390-opts.h (enum processor_type): Rename
PROCESSOR_ARCH14 to PROCESSOR_3931_Z16.
* config/s390/s390.cc (PROCESSOR_ARCH14): Rename to ...
(PROCESSOR_3931_Z16): ... throughout the file.
(s390_processor processor_table): Add z16 as cpu string.
* config/s390/s390.h (enum processor_flags): Rename PF_ARCH14 to
PF_Z16.
(TARGET_CPU_ARCH14): Rename to ...
(TARGET_CPU_Z16): ... this.
(TARGET_CPU_ARCH14_P): Rename to ...
(TARGET_CPU_Z16_P): ... this.
(TARGET_ARCH14): Rename to ...
(TARGET_Z16): ... this.
(TARGET_ARCH14_P): Rename to ...
(TARGET_Z16_P): ... this.
* config/s390/s390.md (cpu_facility): Rename arch14 to z16 and
check TARGET_Z16 instead of TARGET_ARCH14.
* config/s390/s390.opt: Add z16 to processor_type.
* doc/invoke.texi: Document z16 and arch14.
---
 gcc/common/config/s390/s390-common.cc |  4 ++--
 gcc/config.gcc|  2 +-
 gcc/config/s390/driver-native.cc  |  6 +-
 gcc/config/s390/s390-opts.h   |  2 +-
 gcc/config/s390/s390.cc   | 14 --
 gcc/config/s390/s390.h| 16 
 gcc/config/s390/s390.md   |  6 +++---
 gcc/config/s390/s390.opt  |  5 -
 gcc/doc/invoke.texi   |  3 ++-
 9 files changed, 30 insertions(+), 28 deletions(-)

diff --git a/gcc/common/config/s390/s390-common.cc 
b/gcc/common/config/s390/s390-common.cc
index caec2f14c6c..72a5ef47eaa 100644
--- a/gcc/common/config/s390/s390-common.cc
+++ b/gcc/common/config/s390/s390-common.cc
@@ -50,10 +50,10 @@ EXPORTED_CONST int processor_flags_table[] =
 /* z15 */PF_IEEE_FLOAT | PF_ZARCH | PF_LONG_DISPLACEMENT
 | PF_EXTIMM | PF_DFP | PF_Z10 | PF_Z196 | PF_ZEC12 | PF_TX
 | PF_Z13 | PF_VX | PF_VXE | PF_Z14 | PF_VXE2 | PF_Z15,
-/* arch14 */ PF_IEEE_FLOAT | PF_ZARCH | PF_LONG_DISPLACEMENT
+/* z16 */PF_IEEE_FLOAT | PF_ZARCH | PF_LONG_DISPLACEMENT
 | PF_EXTIMM | PF_DFP | PF_Z10 | PF_Z196 | PF_ZEC12 | PF_TX
 | PF_Z13 | PF_VX | PF_VXE | PF_Z14 | PF_VXE2 | PF_Z15
-| PF_NNPA | PF_ARCH14
+| PF_NNPA | PF_Z16
   };
 
 /* Change optimizations to be performed, depending on the
diff --git a/gcc/config.gcc b/gcc/config.gcc
index 48a5bbcf787..c5064dd3766 100644
--- a/gcc/config.gcc
+++ b/gcc/config.gcc
@@ -5532,7 +5532,7 @@ case "${target}" in
for which in arch tune; do
eval "val=\$with_$which"
case ${val} in
-   "" | native | z900 | z990 | z9-109 | z9-ec | z10 | z196 
| zEC12 | z13 | z14 | z15 | arch5 | arch6 | arch7 | arch8 | arch9 | arch10 | 
arch11 | arch12 | arch13 | arch14 )
+   "" | native | z900 | z990 | z9-109 | z9-ec | z10 | z196 
| zEC12 | z13 | z14 | z15 | z16 | arch5 | arch6 | arch7 | arch8 | arch9 | 
arch10 | arch11 | arch12 | arch13 | arch14 )
# OK
;;
*)
diff --git a/gcc/config/s390/driver-native.cc b/gcc/config/s390/driver-native.cc
index 48524c49251..b5eb222872d 100644
--- a/gcc/config/s390/driver-native.cc
+++ b/gcc/config/s390/driver-native.cc
@@ -123,8 +123,12 @@ s390_host_detect_local_cpu (int argc, const char **argv)
case 0x8562:
  cpu = "z15";
  break;
+   case 0x3931:
+   case 0x3932:
+ cpu = "z16";
+ break;
default:
- cpu = "arch14";
+ cpu = "z16";
  break;
}
}
diff --git a/gcc/config/s390/s390-opts.h b/gcc/config/s390/s390-opts.h
index 1ec84631a5f..4ef82ac5d34 100644
--- a/gcc/config/s390/s390-opts.h
+++ b/gcc/config/s390/s390-opts.h
@@ -38,7 +38,7 @@ enum processor_type
   PROCESSOR_2964_Z13,
   PROCESSOR_3906_Z14,
   PROCESSOR_8561_Z15,
-  PROCESSOR_ARCH14,
+  PROCESSOR_3931_Z16,
   PROCESSOR_NATIVE,
   PROCESSOR_max
 };
diff --git a/gcc/config/s390/s390.cc b/gcc/config/s390/s390.cc
index d2af6d8813d..1342a2e7db0 100644
--- a/gcc/config/s390/s390.cc
+++ b/gcc/config/s390/s390.cc
@@ -337,7 +337,7 @@ const struct s390_processor processor_table[] =
   { "z13","z13",PROCESSOR_2964_Z13,&zEC12_cost,  11 },
   { "z14","arch12", PROCESSOR_3906_Z14,&zEC12_cost,  12 },
   { "z15","arch13", PROCESSOR_8561_Z15,&zEC12_cost,  13 },
-  { "arch14", "arch14", PROCESSOR_ARCH14,  &zEC12_cost,  14 },
+  { "z16","arch14", PROCESSOR_3931_Z16,

Re: [PATCH] testsuite: Skip pr105250.c for powerpc and s390 [PR105266]

2022-04-13 Thread Andreas Krebbel via Gcc-patches
On 4/14/22 05:10, Kewen.Lin wrote:
> Hi,
> 
> The test case pr105250.c is like its related pr105140.c, which
> suffers the error with message like "{AltiVec,vector} argument
> passed to unprototyped" on powerpc and s390.  So like commits
> r12-8025 and r12-8039, this fix is to add the dg-skip-if for
> powerpc*-*-* and s390*-*-*.
> 
> Tested on powerpc64le-linux-gnu P9 and it should work on s390
> as its similar PR105147.
> 
> Is it ok for trunk?
> 
> BR,
> Kewen
> -
> 
> gcc/testsuite/ChangeLog:
> 
>   PR testsuite/105266
>   * gcc.dg/pr105250.c: Skip for powerpc*-*-* and s390*-*-*.

Ok for s390. Thanks!

Andreas


Re: [PATCH] testsuite/s390: Silence warning in pr80725.c

2022-04-13 Thread Andreas Krebbel via Gcc-patches
On 4/13/22 09:35, Robin Dapp wrote:
> Hi,
> 
> this test case checks that we do not ICE but FAILs because of
> -Wint-to-pointer-cast.  Silence this warning.
> 
> Is it OK?

Ok. Thanks!

Andreas



Re: [PATCH] s390: Add scheduler description for z16

2022-04-13 Thread Andreas Krebbel via Gcc-patches
On 4/13/22 12:23, Robin Dapp wrote:
> Hi,
> 
> this patch adds the scheduler description for z16.  Bootstrapped and
> regtested with --with-arch=z16.
> 
> Is it OK?
> 
> Regards
>  Robin
> 
> 
> gcc/ChangeLog:
> 
>   * config/s390/s390.cc (s390_get_sched_attrmask): Add z16.
>   (s390_get_unit_mask): Likewise.
>   (s390_is_fpd): Likewise.
>   (s390_is_fxd): Likewise.
>   * config/s390/s390.md 
> (z900,z990,z9_109,z9_ec,z10,z196,zEC12,z13,z14,z15):
>   Add z16.
>   (z900,z990,z9_109,z9_ec,z10,z196,zEC12,z13,z14,z15,z16):
>   Likewise.
>   * config/s390/3931.md: New file.

Ok. Thanks!

Andreas




Re: GCC 11.2.1 Status Report (2022-04-13), branch frozen for release

2022-04-14 Thread Andreas Krebbel via Gcc-patches
On 4/13/22 09:30, Richard Biener via Gcc wrote:
> 
> Status
> ==
> 
> The gcc-11 branch is now frozen in preparation for a GCC 11.3 release
> candidate and the GCC 11.3 release next week.  All changes now require
> release manager approval.

Hi,

I would like to push:

https://gcc.gnu.org/pipermail/gcc-patches/2022-April/593103.html

to GCC 11 branch before 11.3 release. Ok?

Bye,

Andreas


Re: [PATCH v2] Disable -fsplit-stack support on non-glibc targets

2022-01-20 Thread Andreas Krebbel via Gcc-patches
On 1/20/22 23:52, Richard Sandiford wrote:
> cc:ing the x86 and s390 maintainers
> 
> soeren--- via Gcc-patches  writes:
>> From: Sören Tempel 
>>
>> The -fsplit-stack option requires the pthread_t TCB definition in the
>> libc to provide certain struct fields at specific hardcoded offsets. As
>> far as I know, only glibc provides these fields at the required offsets.
>> Most notably, musl libc does not have these fields. However, since gcc
>> accesses the fields using a fixed offset, this does not cause a
>> compile-time error, but instead results in a silent memory corruption at
>> run-time with musl libc. For example, on s390x libgcc's
>> __stack_split_initialize CTOR will overwrite the cancel field in the
>> pthread_t TCB on musl.
>>
>> The -fsplit-stack option is used within the gcc code base itself by
>> gcc-go (if available). On musl-based systems with split-stack support
>> (i.e. s390x or x86) this causes Go programs compiled with gcc-go to
>> misbehave at run-time.
>>
>> This patch fixes gcc-go on musl by disabling -fsplit-stack in gcc itself
>> since it is not supported on non-glibc targets anyhow. This is achieved
>> by checking if gcc targets a glibc-based system. This check has been
>> added for x86 and s390x, the rs6000 config already checks for
>> TARGET_GLIBC_MAJOR. Other architectures do not have split-stack
>> support. With this patch applied, the gcc-go configure script will
>> detect that -fsplit-stack support is not available and will not use it.
>>
>> See https://www.openwall.com/lists/musl/2012/10/16/12
>>
>> This patch was written under the assumption that glibc is the only libc
>> implementation which supports the required fields at the required
>> offsets in the pthread_t TCB. The patch has been tested on Alpine Linux
>> Edge on the s390x and x86 architectures by bootstrapping Google's Go
>> implementation with gcc-go.
>>
>> Signed-off-by: Sören Tempel 
>>
>> gcc/ChangeLog:
>>
>>  * common/config/s390/s390-common.c (s390_supports_split_stack):
>>  Only support split-stack on glibc targets.
>>  * config/i386/gnu-user-common.h (STACK_CHECK_STATIC_BUILTIN): Ditto.
>>  * config/i386/gnu.h (defined): Ditto.

s390 parts are ok.

Thanks!

Andreas

>> ---
>> This version of the patch addresses feedback by Andrew Pinski and uses
>> OPTION_GLIBC as well as opts->x_linux_libc == LIBC_GLIBC to detect glibc
>> targets (instead of relying on TARGET_GLIBC_MAJOR).
>>
>>  gcc/common/config/s390/s390-common.c | 11 +--
>>  gcc/config/i386/gnu-user-common.h|  5 +++--
>>  gcc/config/i386/gnu.h|  6 +-
>>  3 files changed, 17 insertions(+), 5 deletions(-)
> 
> Sorry for the slow review.  The patch LGTM bar some minor formatting
> nits below, but target maintainers should have the final say.
> 
>> diff --git a/gcc/common/config/s390/s390-common.c 
>> b/gcc/common/config/s390/s390-common.c
>> index b6bc8501742..fc86e0bc5e7 100644
>> --- a/gcc/common/config/s390/s390-common.c
>> +++ b/gcc/common/config/s390/s390-common.c
>> @@ -116,13 +116,20 @@ s390_handle_option (struct gcc_options *opts 
>> ATTRIBUTE_UNUSED,
>>  
>>  /* -fsplit-stack uses a field in the TCB, available with glibc-2.23.
>> We don't verify it, since earlier versions just have padding at
>> -   its place, which works just as well.  */
>> +   its place, which works just as well. For other libc implementations
> 
> GCC style is to use 2 spaces after a full stop.  Same for the x86 part.
> 
>> +   we disable the feature entirely to avoid corrupting the TCB.  */
>>  
>>  static bool
>>  s390_supports_split_stack (bool report ATTRIBUTE_UNUSED,
>> struct gcc_options *opts ATTRIBUTE_UNUSED)
> 
> These parameters are no longer unused after the patch, so it'd be good
> to remove the attributes.
> 
>>  {
>> -  return true;
>> +  if (opts->x_linux_libc == LIBC_GLIBC) {
>> +return true;
>> +  } else {
>> +if (report)
>> +  error("%<-fsplit-stack%> currently only supported on GNU/Linux");
>> +return false;
>> +  }
> 
> Normal GCC formatting would be something like:
> 
>   if (opts->x_linux_libc == LIBC_GLIBC)
> return true;
> 
>   if (report)
> error ("%<-fsplit-stack%> currently only supported on GNU/Linux");
>   return false;
> 
> Sorry for the fussy rules.
> 
> Thanks,
> Richard
> 
>>  }
>>  
>>  #undef TARGET_DEFAULT_TARGET_FLAGS
>> diff --git a/gcc/config/i386/gnu-user-common.h 
>> b/gcc/config/i386/gnu-user-common.h
>> index 00226f5a455..6e13315b5a3 100644
>> --- a/gcc/config/i386/gnu-user-common.h
>> +++ b/gcc/config/i386/gnu-user-common.h
>> @@ -66,7 +66,8 @@ along with GCC; see the file COPYING3.  If not see
>>  #define STACK_CHECK_STATIC_BUILTIN 1
>>  
>>  /* We only build the -fsplit-stack support in libgcc if the
>> -   assembler has full support for the CFI directives.  */
>> -#if HAVE_GAS_CFI_PERSONALITY_DIRECTIVE
>> +   assembler has full support for the CFI directives and
>> +   targets glibc.  */
>> +#if HAVE_GAS_CFI_PERSONALITY_DIRECTIVE 

Re: [PATCH] s390: Split CCSmode into CCSINT and CCSFP

2022-01-21 Thread Andreas Krebbel via Gcc-patches
On 1/20/22 17:13, Robin Dapp wrote:
> Hi,
> 
> this patch splits the CCSmode into an integer and a floating point
> variant.  This allows ifcvt to consider floating point compares which
> would be rejected before because they could not be reversed.
> 
> Bootstrapped and regtested on s390x.
> 
> Is it OK?
> 
> Regards
>  Robin
> 
> --
> 
> gcc/ChangeLog:
> 
>   * config/s390/predicates.md: Add CCSINTmode and CCSFPmode.
>   * config/s390/s390-modes.def (UNORDERED): Likewise.
>   (CC_MODE): Likewise.
>   * config/s390/s390.cc (s390_cc_modes_compatible): Likewise.
>   (s390_match_ccmode_set): Likewise.
>   (s390_select_ccmode): Likewise.
>   (s390_branch_condition_mask): Likewise.
>   (s390_reverse_condition): Likewise.
>   * config/s390/s390.h (REVERSIBLE_CC_MODE): Likewise.
>   * config/s390/s390.md: Likewise.
>   * config/s390/subst.md: Likewise.

> diff --git a/gcc/config/s390/predicates.md b/gcc/config/s390/predicates.md
> index 33194d3f3d6..ec47416cc1b 100644
> --- a/gcc/config/s390/predicates.md
> +++ b/gcc/config/s390/predicates.md
> @@ -325,7 +325,8 @@
>  case E_CCURmode:
>return GET_CODE (op) == LTU;
>
> -case E_CCSmode:
> +case E_CCSINTmode:
> +case E_CCSFPmode:
>return GET_CODE (op) == UNGT;

Can we get an UNGT for CCSINTmode here? Shouldn't this be just GT?

>
>  case E_CCSRmode:
> @@ -370,7 +371,8 @@
>  case E_CCURmode:
>return GET_CODE (op) == GEU;
>
> -case E_CCSmode:
> +case E_CCSINTmode:
> +case E_CCSFPmode:
>return GET_CODE (op) == LE;
>
>  case E_CCSRmode:
> diff --git a/gcc/config/s390/s390-modes.def b/gcc/config/s390/s390-modes.def
> index b419907960e..eafe1e12938 100644
> --- a/gcc/config/s390/s390-modes.def
> +++ b/gcc/config/s390/s390-modes.def
> @@ -48,12 +48,12 @@ CCUR: EQ  GTU  LTU NE 
> (CLGF/R)
>
>  Signed compares
>
> -CCS:  EQ  LT   GT  UNORDERED  (LTGFR, LTGR, LTR, 
> ICM/Y,
> -   LTDBR, LTDR, LTEBR, 
> LTER,
> +CCSINT: EQLT   GT  UNORDERED  (LTGFR, LTGR, LTR, 
> ICM/Y,

CC3 for signed integer compares should not occur. So perhaps '-' instead of 
UNORDERED?

> CG/R, C/R/Y, CGHI, 
> CHI,
> -   CDB/R, CD/R, CEB/R, 
> CE/R,
> -   ADB/R, AEB/R, SDB/R, 
> SEB/R,
> SRAG, SRA, SRDA)
> +CCSFP:  EQLT   GT  UNORDERED  (CDB/R, CD/R, CEB/R, 
> CE/R,
> +   LTDBR, LTDR, LTEBR, 
> LTER,
> +   ADB/R, AEB/R, SDB/R, 
> SEB/R)
>  CCSR: EQ  GT   LT  UNORDERED  (CGF/R, CH/Y)
>  CCSFPS: EQLT   GT  UNORDERED  (KEB/R, KDB/R, KXBR, 
> KDTR,
>  KXTR, WFK)
...
> @@ -2139,7 +2148,8 @@ s390_branch_condition_mask (rtx code)
>   }
>break;
>
> -case E_CCSmode:
> +case E_CCSINTmode:
> +case E_CCSFPmode:
>  case E_CCSFPSmode:
>switch (GET_CODE (code))
>   {

We will need a new switch statement for CCSINT without all the FP only 
comparison operators.

Andreas


Re: [PATCH] s390: Change costs for load on condition.

2022-01-21 Thread Andreas Krebbel via Gcc-patches
On 1/20/22 11:10, Robin Dapp wrote:
> Hi,
> 
> this patch is a follow-up patch to the recent ifcvt changes. It
> increased costs for a load on condition to 6.  This ensures that we
> if-convert sequences of three regular instructions (of cost 4) e.g. a
> compare and two SETs into two loads on condition (of cost 6).  With a
> cost of 5, four-insn sequences (three SETs) would also be if-converted.
> 
> The adjustment to the mov[qi/si]cc expander makes sure we if-convert a
> QImode/bool.  Before, combine would create a paradoxical subreg itself
> but need an additional insn.
> 
> Bootstrapped and regtested on s390x.
> 
> Is it OK?
> 
> Regards
>  Robin
> 
> --
> 
> gcc/ChangeLog:
> 
>   * config/s390/s390.cc (s390_rtx_costs): Increase costs for load
>   on condition.
>   * config/s390/s390.md: Change mov[qi/si]cc expander.

Could you please add two tests for the sequences which are improved here. Just 
to make sure we get
aware once it breaks again.

Patch is ok. Thanks!

Andreas


[PATCH] PR101260 regcprop: Add mode change check for copy reg

2022-01-21 Thread Andreas Krebbel via Gcc-patches
When propagating a multi-word register into an access with a smaller
mode the can_change_mode backend hook is already consulted for the
original register.  This however is also required for the intermediate
copy in copy_regno which might use a different register class.

Bootstrapped on x86_64 and s390x. No testsuite regressions.

Ok for mainline?

gcc/ChangeLog:

PR rtl-optimization/101260
* regcprop.cc (maybe_mode_change): Invoke mode_change_ok also for
copy_regno.
---
 gcc/regcprop.cc | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/gcc/regcprop.cc b/gcc/regcprop.cc
index 1a9bcf0a1ad..8e966f2b5ac 100644
--- a/gcc/regcprop.cc
+++ b/gcc/regcprop.cc
@@ -426,7 +426,8 @@ maybe_mode_change (machine_mode orig_mode, machine_mode 
copy_mode,
 
   if (orig_mode == new_mode)
 return gen_raw_REG (new_mode, regno);
-  else if (mode_change_ok (orig_mode, new_mode, regno))
+  else if (mode_change_ok (orig_mode, new_mode, regno)
+  && mode_change_ok (copy_mode, new_mode, copy_regno))
 {
   int copy_nregs = hard_regno_nregs (copy_regno, copy_mode);
   int use_nregs = hard_regno_nregs (copy_regno, new_mode);
-- 
2.34.1



Re: [PATCH] IBM Z: fix `section type conflict` with -mindirect-branch-table

2022-02-01 Thread Andreas Krebbel via Gcc-patches
On 2/1/22 21:49, Ilya Leoshkevich wrote:
> Bootstrapped and regtested on s390x-redhat-linux.  Ok for master?
> 
> 
> s390_code_end () puts indirect branch tables into separate sections and
> tries to switch back to wherever it was in the beginning by calling
> switch_to_section (current_function_section ()).
> 
> First of all, this is unnecessary - the other backends don't do it.
> 
> Furthermore, at this time there is no current function, but if the
> last processed function was cold, in_cold_section_p remains set.  This
> causes targetm.asm_out.function_section () to call
> targetm.section_type_flags (), which in absence of current function
> decl classifies the section as SECTION_WRITE.  This causes a section
> type conflict with the existing SECTION_CODE.
> 
> gcc/ChangeLog:
> 
>   * config/s390/s390.cc (s390_code_end): Do not switch back to
>   code section.
> 
> gcc/testsuite/ChangeLog:
> 
>   * gcc.target/s390/nobp-section-type-conflict.c: New test.

Ok. Thanks!

Andreas


> ---
>  gcc/config/s390/s390.cc   |  1 -
>  .../s390/nobp-section-type-conflict.c | 22 +++
>  2 files changed, 22 insertions(+), 1 deletion(-)
>  create mode 100644 gcc/testsuite/gcc.target/s390/nobp-section-type-conflict.c
> 
> diff --git a/gcc/config/s390/s390.cc b/gcc/config/s390/s390.cc
> index 43c5c72554a..2db12d4ba4b 100644
> --- a/gcc/config/s390/s390.cc
> +++ b/gcc/config/s390/s390.cc
> @@ -16809,7 +16809,6 @@ s390_code_end (void)
> assemble_name_raw (asm_out_file, label_start);
> fputs ("-.\n", asm_out_file);
>   }
> -   switch_to_section (current_function_section ());
>   }
>  }
>  }
> diff --git a/gcc/testsuite/gcc.target/s390/nobp-section-type-conflict.c 
> b/gcc/testsuite/gcc.target/s390/nobp-section-type-conflict.c
> new file mode 100644
> index 000..5d78bc99bb5
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/s390/nobp-section-type-conflict.c
> @@ -0,0 +1,22 @@
> +/* Checks that we don't get error: section type conflict with ‘put_page’.  */
> +
> +/* { dg-do compile } */
> +/* { dg-options "-mindirect-branch=thunk-extern 
> -mfunction-return=thunk-extern -mindirect-branch-table -O2" } */
> +
> +int a;
> +int b (void);
> +void c (int);
> +
> +static void
> +put_page (void)
> +{
> +  if (b ())
> +c (a);
> +}
> +
> +__attribute__ ((__section__ (".init.text"), __cold__)) void
> +d (void)
> +{
> +  put_page ();
> +  put_page ();
> +}



Re: [PATCH][GCC11] IBM Z: fix `section type conflict` with -mindirect-branch-table

2022-02-02 Thread Andreas Krebbel via Gcc-patches
On 2/2/22 12:57, Ilya Leoshkevich wrote:
> Bootstrapped and regtested on s390x-redhat-linux.  Ok for
> releases/gcc-11?
> 
> 
> 
> s390_code_end () puts indirect branch tables into separate sections and
> tries to switch back to wherever it was in the beginning by calling
> switch_to_section (current_function_section ()).
> 
> First of all, this is unnecessary - the other backends don't do it.
> 
> Furthermore, at this time there is no current function, but if the
> last processed function was cold, in_cold_section_p remains set.  This
> causes targetm.asm_out.function_section () to call
> targetm.section_type_flags (), which in absence of current function
> decl classifies the section as SECTION_WRITE.  This causes a section
> type conflict with the existing SECTION_CODE.
> 
> gcc/ChangeLog:
> 
>   * config/s390/s390.c (s390_code_end): Do not switch back to
>   code section.
> 
> gcc/testsuite/ChangeLog:
> 
>   * gcc.target/s390/nobp-section-type-conflict.c: New test.

Ok. Thanks!

Andreas

> 
> (cherry picked from commit 8753b13a31c777cdab0265dae0b68534247908f7)
> ---
>  gcc/config/s390/s390.c|  1 -
>  .../s390/nobp-section-type-conflict.c | 22 +++
>  2 files changed, 22 insertions(+), 1 deletion(-)
>  create mode 100644 gcc/testsuite/gcc.target/s390/nobp-section-type-conflict.c
> 
> diff --git a/gcc/config/s390/s390.c b/gcc/config/s390/s390.c
> index 8895dd7cc76..2d2e6522eb4 100644
> --- a/gcc/config/s390/s390.c
> +++ b/gcc/config/s390/s390.c
> @@ -16700,7 +16700,6 @@ s390_code_end (void)
> assemble_name_raw (asm_out_file, label_start);
> fputs ("-.\n", asm_out_file);
>   }
> -   switch_to_section (current_function_section ());
>   }
>  }
>  }
> diff --git a/gcc/testsuite/gcc.target/s390/nobp-section-type-conflict.c 
> b/gcc/testsuite/gcc.target/s390/nobp-section-type-conflict.c
> new file mode 100644
> index 000..5d78bc99bb5
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/s390/nobp-section-type-conflict.c
> @@ -0,0 +1,22 @@
> +/* Checks that we don't get error: section type conflict with ‘put_page’.  */
> +
> +/* { dg-do compile } */
> +/* { dg-options "-mindirect-branch=thunk-extern 
> -mfunction-return=thunk-extern -mindirect-branch-table -O2" } */
> +
> +int a;
> +int b (void);
> +void c (int);
> +
> +static void
> +put_page (void)
> +{
> +  if (b ())
> +c (a);
> +}
> +
> +__attribute__ ((__section__ (".init.text"), __cold__)) void
> +d (void)
> +{
> +  put_page ();
> +  put_page ();
> +}



[PATCH] Check always_inline flag in s390_can_inline_p [PR104327]

2022-02-06 Thread Andreas Krebbel via Gcc-patches
MASK_MVCLE is set for -Os but not for other optimization levels. In
general it should not make much sense to inline across calls where the
flag is different but we have to allow it for always_inline.

The patch also rearranges the hook implementation a bit based on the
recommendations from Jakub und Martin in the PR.

Bootstrapped and regression tested on s390x with various arch flags.
Will commit after giving a few days for comments.

gcc/ChangeLog:

PR target/104327
* config/s390/s390.cc (s390_can_inline_p): Accept a few more flags
if always_inline is set. Don't inline when tune differs without
always_inline.

gcc/testsuite/ChangeLog:

PR target/104327
* gcc.c-torture/compile/pr104327.c: New test.
---
 gcc/config/s390/s390.cc   | 66 ++-
 .../gcc.c-torture/compile/pr104327.c  | 15 +
 2 files changed, 64 insertions(+), 17 deletions(-)
 create mode 100644 gcc/testsuite/gcc.c-torture/compile/pr104327.c

diff --git a/gcc/config/s390/s390.cc b/gcc/config/s390/s390.cc
index 5c2a830f9f0..bbf2dd8dfb4 100644
--- a/gcc/config/s390/s390.cc
+++ b/gcc/config/s390/s390.cc
@@ -16091,6 +16091,25 @@ s390_valid_target_attribute_p (tree fndecl,
 static bool
 s390_can_inline_p (tree caller, tree callee)
 {
+  unsigned HOST_WIDE_INT all_masks =
+(MASK_64BIT | MASK_BACKCHAIN | MASK_DEBUG_ARG | MASK_ZARCH
+ | MASK_HARD_DFP | MASK_SOFT_FLOAT
+ | MASK_OPT_HTM | MASK_LONG_DOUBLE_128 | MASK_MVCLE | MASK_PACKED_STACK
+ | MASK_SMALL_EXEC | MASK_OPT_VX | MASK_ZVECTOR);
+
+  /* Flags which if present in the callee are required in the caller as well.  
*/
+  unsigned HOST_WIDE_INT caller_required_masks = MASK_OPT_HTM;
+
+  /* Flags which affect the ABI and in general prevent inlining.  */
+  unsigned HOST_WIDE_INT must_match_masks =
+(MASK_64BIT | MASK_ZARCH | MASK_HARD_DFP | MASK_SOFT_FLOAT
+ | MASK_LONG_DOUBLE_128 | MASK_OPT_VX);
+
+  /* Flags which we in general want to prevent inlining but accept for
+ always_inline.  */
+  unsigned HOST_WIDE_INT always_inline_safe_masks =
+MASK_MVCLE | MASK_BACKCHAIN | MASK_SMALL_EXEC;
+
   tree caller_tree = DECL_FUNCTION_SPECIFIC_TARGET (caller);
   tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (callee);
 
@@ -16103,16 +16122,18 @@ s390_can_inline_p (tree caller, tree callee)
 
   struct cl_target_option *caller_opts = TREE_TARGET_OPTION (caller_tree);
   struct cl_target_option *callee_opts = TREE_TARGET_OPTION (callee_tree);
-  bool ret = true;
 
-  if ((caller_opts->x_target_flags & ~(MASK_SOFT_FLOAT | MASK_HARD_DFP))
-  != (callee_opts->x_target_flags & ~(MASK_SOFT_FLOAT | MASK_HARD_DFP)))
-ret = false;
+  /* If one of these triggers make sure to add proper handling of your
+ new flag to this hook.  */
+  gcc_assert (!(caller_opts->x_target_flags & ~all_masks));
+  gcc_assert (!(callee_opts->x_target_flags & ~all_masks));
 
-  /* Don't inline functions to be compiled for a more recent arch into a
- function for an older arch.  */
-  else if (caller_opts->x_s390_arch < callee_opts->x_s390_arch)
-ret = false;
+  bool always_inline
+= (DECL_DISREGARD_INLINE_LIMITS (callee)
+   && lookup_attribute ("always_inline", DECL_ATTRIBUTES (callee)));
+
+  if (!always_inline)
+must_match_masks |= always_inline_safe_masks;
 
   /* Inlining a hard float function into a soft float function is only
  allowed if the hard float function doesn't actually make use of
@@ -16120,16 +16141,27 @@ s390_can_inline_p (tree caller, tree callee)
 
  We are called from FEs for multi-versioning call optimization, so
  beware of ipa_fn_summaries not available.  */
-  else if (((TARGET_SOFT_FLOAT_P (caller_opts->x_target_flags)
-&& !TARGET_SOFT_FLOAT_P (callee_opts->x_target_flags))
-   || (!TARGET_HARD_DFP_P (caller_opts->x_target_flags)
-   && TARGET_HARD_DFP_P (callee_opts->x_target_flags)))
-  && (! ipa_fn_summaries
-  || ipa_fn_summaries->get
-  (cgraph_node::get (callee))->fp_expressions))
-ret = false;
+  if (always_inline && ipa_fn_summaries
+  && !ipa_fn_summaries->get(cgraph_node::get (callee))->fp_expressions)
+must_match_masks &= ~(MASK_HARD_DFP | MASK_SOFT_FLOAT);
 
-  return ret;
+  if ((caller_opts->x_target_flags & must_match_masks)
+  != (callee_opts->x_target_flags & must_match_masks))
+return false;
+
+  if (~(caller_opts->x_target_flags & caller_required_masks)
+  & (callee_opts->x_target_flags & caller_required_masks))
+return false;
+
+  /* Don't inline functions to be compiled for a more recent arch into a
+ function for an older arch.  */
+  if (caller_opts->x_s390_arch < callee_opts->x_s390_arch)
+return false;
+
+  if (!always_inline && caller_opts->x_s390_tune != callee_opts->x_s390_tune)
+return false;
+
+  return true;
 }
 #endif
 
diff --git a/gcc/testsuite/gcc.c-torture/compile/pr104327.c 
b/gcc/testsuite/gcc.c-torture/co

Re: [PATCH] Check always_inline flag in s390_can_inline_p [PR104327]

2022-02-07 Thread Andreas Krebbel via Gcc-patches
On 2/7/22 09:11, Jakub Jelinek wrote:
...
> 1) formatting, = should be at the start of next line rather than end of the
>line
> 2) all_masks, always_inline_safe_masks and caller_required_masks aren't
>ever modified, perhaps make them const?
> 3) I wonder if there is any advantage to have all_masks with all the masks
>enumerated, compared to
>const HOST_WIDE_INT all_masks
>  = (caller_required_masks | must_match_masks | always_inline_safe_masks
>   | MASK_DEBUG_ARG | MASK_PACKED_STACK | MASK_ZVECTOR);
>i.e. when you add a new mask, instead of listing it in all_masks
>and one or more of the other vars you'd just stick it either in one
>or more of those vars or in all_masks.

I've just committed the patch with these changes. Thanks Jakub!

Andreas


diff --git a/gcc/config/s390/s390.cc b/gcc/config/s390/s390.cc
index 5c2a830f9f0..c6cfe41ad7b 100644
--- a/gcc/config/s390/s390.cc
+++ b/gcc/config/s390/s390.cc
@@ -16091,6 +16091,23 @@ s390_valid_target_attribute_p (tree fndecl,
 static bool
 s390_can_inline_p (tree caller, tree callee)
 {
+  /* Flags which if present in the callee are required in the caller as well.  
*/
+  const unsigned HOST_WIDE_INT caller_required_masks = MASK_OPT_HTM;
+
+  /* Flags which affect the ABI and in general prevent inlining.  */
+  unsigned HOST_WIDE_INT must_match_masks
+= (MASK_64BIT | MASK_ZARCH | MASK_HARD_DFP | MASK_SOFT_FLOAT
+   | MASK_LONG_DOUBLE_128 | MASK_OPT_VX);
+
+  /* Flags which we in general want to prevent inlining but accept for
+ always_inline.  */
+  const unsigned HOST_WIDE_INT always_inline_safe_masks
+= MASK_MVCLE | MASK_BACKCHAIN | MASK_SMALL_EXEC;
+
+  const HOST_WIDE_INT all_masks
+ = (caller_required_masks | must_match_masks | always_inline_safe_masks
+   | MASK_DEBUG_ARG | MASK_PACKED_STACK | MASK_ZVECTOR);
+
   tree caller_tree = DECL_FUNCTION_SPECIFIC_TARGET (caller);
   tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (callee);

@@ -16103,16 +16120,18 @@ s390_can_inline_p (tree caller, tree callee)

   struct cl_target_option *caller_opts = TREE_TARGET_OPTION (caller_tree);
   struct cl_target_option *callee_opts = TREE_TARGET_OPTION (callee_tree);
-  bool ret = true;

-  if ((caller_opts->x_target_flags & ~(MASK_SOFT_FLOAT | MASK_HARD_DFP))
-  != (callee_opts->x_target_flags & ~(MASK_SOFT_FLOAT | MASK_HARD_DFP)))
-ret = false;
+  /* If one of these triggers make sure to add proper handling of your
+ new flag to this hook.  */
+  gcc_assert (!(caller_opts->x_target_flags & ~all_masks));
+  gcc_assert (!(callee_opts->x_target_flags & ~all_masks));

-  /* Don't inline functions to be compiled for a more recent arch into a
- function for an older arch.  */
-  else if (caller_opts->x_s390_arch < callee_opts->x_s390_arch)
-ret = false;
+  bool always_inline
+= (DECL_DISREGARD_INLINE_LIMITS (callee)
+   && lookup_attribute ("always_inline", DECL_ATTRIBUTES (callee)));
+
+  if (!always_inline)
+must_match_masks |= always_inline_safe_masks;

   /* Inlining a hard float function into a soft float function is only
  allowed if the hard float function doesn't actually make use of
@@ -16120,16 +16139,27 @@ s390_can_inline_p (tree caller, tree callee)

  We are called from FEs for multi-versioning call optimization, so
  beware of ipa_fn_summaries not available.  */
-  else if (((TARGET_SOFT_FLOAT_P (caller_opts->x_target_flags)
-&& !TARGET_SOFT_FLOAT_P (callee_opts->x_target_flags))
-   || (!TARGET_HARD_DFP_P (caller_opts->x_target_flags)
-   && TARGET_HARD_DFP_P (callee_opts->x_target_flags)))
-  && (! ipa_fn_summaries
-  || ipa_fn_summaries->get
-  (cgraph_node::get (callee))->fp_expressions))
-ret = false;
+  if (always_inline && ipa_fn_summaries
+  && !ipa_fn_summaries->get(cgraph_node::get (callee))->fp_expressions)
+must_match_masks &= ~(MASK_HARD_DFP | MASK_SOFT_FLOAT);

-  return ret;
+  if ((caller_opts->x_target_flags & must_match_masks)
+  != (callee_opts->x_target_flags & must_match_masks))
+return false;
+
+  if (~(caller_opts->x_target_flags & caller_required_masks)
+  & (callee_opts->x_target_flags & caller_required_masks))
+return false;
+
+  /* Don't inline functions to be compiled for a more recent arch into a
+ function for an older arch.  */
+  if (caller_opts->x_s390_arch < callee_opts->x_s390_arch)
+return false;
+
+  if (!always_inline && caller_opts->x_s390_tune != callee_opts->x_s390_tune)
+return false;
+
+  return true;
 }
 #endif

diff --git a/gcc/testsuite/gcc.c-torture/compile/pr104327.c
b/gcc/testsuite/gcc.c-torture/compile/pr104327.c
new file mode 100644
index 000..d54e5d58cc4
--- /dev/null
+++ b/gcc/testsuite/gcc.c-torture/compile/pr104327.c
@@ -0,0 +1,15 @@
+/* PR target/104327 */
+
+void foo (int *);
+
+static inline __attribute__((always_inline)) void
+bar (int *x)
+{
+  foo (x);
+}
+
+__attribute__((cold, optimize

Re: [PATCH] s390: Change SET rtx_cost handling.

2022-02-25 Thread Andreas Krebbel via Gcc-patches
On 2/25/22 12:38, Robin Dapp wrote:
> Hi,
> 
> the IF_THEN_ELSE detection currently prevents us from properly costing
> register-register moves which causes the lower-subreg pass to assume
> that a VR-VR move is as expensive as two GPR-GPR moves.
> 
> This patch adds handling for SETs containing REGs as well as MEMs and is
> inspired by the aarch64 implementation.
> 
> Bootstrapped and regtested on z900 up to z15. Is it OK?
> 
> Regards
>  Robin
> 
> --
> 
> gcc/ChangeLog:
> 
>   * config/s390/s390.cc (s390_address_cost): Declare.
>   (s390_hard_regno_nregs): Declare.
>   (s390_rtx_costs): Add handling for REG and MEM in SET.
> 
> gcc/testsuite/ChangeLog:
> 
>   * gcc.target/s390/vector/vec-sum-across-no-lower-subreg-1.c: New
> test.

Ok. Thanks

Andreas


Re: [PATCH] IBM Z: Fix load-and-test peephole2 condition

2021-11-19 Thread Andreas Krebbel via Gcc-patches
On 11/19/21 10:45, Stefan Schulze Frielinghaus wrote:
...
> diff --git a/gcc/testsuite/gcc.target/s390/2029.c 
> b/gcc/testsuite/gcc.target/s390/2029.c
> new file mode 100644
> index 000..1a6df4f4b89
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/s390/2029.c
> @@ -0,0 +1,12 @@
> +/* { dg-do run } */
> +/* { dg-options "-Os -march=z10" } */

Although z10 is pretty old we will need an effective target check here. Ok with 
that change.

Thanks!

Andreas


Re: [PATCH] s390: Fix bootstrap error with checking and -m31

2022-10-18 Thread Andreas Krebbel via Gcc-patches
On 10/19/22 08:22, Robin Dapp wrote:
> Hi,
> 
> since r13-2746 we hit an ICE when bootstrapping with -m31 and
> --enable-checking=all.
> 
> ../../../../libgfortran/ieee/ieee_helper.c: In function
> 'ieee_class_helper_16':
> ../../../../libgfortran/ieee/ieee_helper.c:77:3: internal compiler
> error: RTL check: expected code 'reg', have 'subreg' in rhs_regno, at
> rtl.h:1932
>77 |   }
>   |   ^
> ../../../../libgfortran/ieee/ieee_helper.c:87:1: note: in expansion of
> macro 'CLASSMACRO'
>87 | CLASSMACRO(16)
>   | ^~
> 
> This patch fixes the problem by first checking for reload_completed
> and also ensuring that REGNO is only called on reg operands rather
> than subregs.
> 
> Bootstrapped and regtested --with-arch=arch14 and --enable-checking=all.
> 
> Is it OK?
Ok. Thanks!

Andreas



Re: [PATCH] IBM zSystems: Fix function_ok_for_sibcall [PR106355]

2022-10-18 Thread Andreas Krebbel via Gcc-patches
On 8/17/22 13:50, Stefan Schulze Frielinghaus wrote:
> For a parameter with BLKmode we cannot use REG_NREGS in order to
> determine the number of consecutive registers.  Streamlined this with
> the implementation of s390_function_arg.
> 
> Fix some indentation whitespace, too.
> 
> Assuming bootstrap and regtest are ok for mainline and gcc-{10,11,12},
> ok to install for all of those?
> 
> PR target/106355
> 
> gcc/ChangeLog:
> 
>   * config/s390/s390.cc (s390_call_saved_register_used): For a
>   parameter with BLKmode fix determining number of consecutive
>   registers.
> 
> gcc/testsuite/ChangeLog:
> 
>   * gcc.target/s390/pr106355.h: Common code for new tests.
>   * gcc.target/s390/pr106355-1.c: New test.
>   * gcc.target/s390/pr106355-2.c: New test.
>   * gcc.target/s390/pr106355-3.c: New test.

Ok for all those branches. Please check if the branches are currently open 
before committing. GCC 11
and 12 appear to be but I'm not sure if GCC 10 has been re-opened again. There 
should be a final
10.5 release some day though.

Thanks!

Andreas


Re: [PATCH] s390: Add -munroll-only-small-loops.

2022-08-15 Thread Andreas Krebbel via Gcc-patches
On 8/12/22 12:00, Robin Dapp wrote:
> Hi,
> 
> inspired by Power we also introduce -munroll-only-small-loops.  This
> implies activating -funroll-loops and -munroll-only-small-loops at -O2
> and above.
> 
> Bootstrapped and regtested.
> 
> This introduces one regression in gcc.dg/sms-compare-debug-1.c but
> currently dumps for sms are broken as well.  The difference is in the
> location of some INSN_DELETED notes so I would consider this a minor issue.
> 
> Is it OK?
> 
> Regards
>  Robin
> 
> gcc/ChangeLog:
> 
>   * common/config/s390/s390-common.cc: Enable -funroll-loops and
>   -munroll-only-small-loops for OPT_LEVELS_2_PLUS_SPEED_ONLY.
>   * config/s390/s390.cc (s390_loop_unroll_adjust): Do not unroll
>   loops larger than 12 instructions.
>   (s390_override_options_after_change): Set unroll options.
>   (s390_option_override_internal): Likewise.
>   * config/s390/s390.opt: Document munroll-only-small-loops.
> 
> gcc/testsuite/ChangeLog:
> 
>   * gcc.target/s390/vector/vec-copysign.c: Do not unroll.
>   * gcc.target/s390/zvector/autovec-double-quiet-uneq.c: Dito.
>   * gcc.target/s390/zvector/autovec-double-signaling-ltgt.c: Dito.
>   * gcc.target/s390/zvector/autovec-float-quiet-uneq.c: Dito.
>   * gcc.target/s390/zvector/autovec-float-signaling-ltgt.c: Dito.

Ok. Thanks!

Andreas


Re: [PATCH] s390: Add z15 to s390_issue_rate.

2022-08-15 Thread Andreas Krebbel via Gcc-patches
On 8/12/22 12:02, Robin Dapp wrote:
> Hi,
> 
> this patch tries to be more explicit by mentioning z15 in s390_issue_rate.
> 
> No changes in testsuite, bootstrap or SPEC obviously.
> 
> Is it OK?
> 
> Regards
>  Robin
> 
> gcc/ChangeLog:
> 
>   * config/s390/s390.cc (s390_issue_rate): Add z15.
> ---
>  gcc/config/s390/s390.cc | 1 +
>  1 file changed, 1 insertion(+)
> 
> diff --git a/gcc/config/s390/s390.cc b/gcc/config/s390/s390.cc
> index ef38fbe68c84..528cd8c7f0f6 100644
> --- a/gcc/config/s390/s390.cc
> +++ b/gcc/config/s390/s390.cc
> @@ -8582,6 +8582,7 @@ s390_issue_rate (void)
>  case PROCESSOR_2827_ZEC12:
>  case PROCESSOR_2964_Z13:
>  case PROCESSOR_3906_Z14:
> +case PROCESSOR_8561_Z15:
>  case PROCESSOR_3931_Z16:
>  default:
>return 1;

Ok. Thanks!

Andreas



Re: [PATCH] s390: Use vpdi and verllg in vec_reve.

2022-08-15 Thread Andreas Krebbel via Gcc-patches
On 8/12/22 12:13, Robin Dapp wrote:
> Hi,
> 
> swapping the two elements of a V2DImode or V2DFmode vector can be done
> with vpdi instead of using the generic way of loading a permutation mask
> from the literal pool and vperm.
> 
> Analogous to the V2DI/V2DF case reversing the elements of a four-element
> vector can be done by first swapping the elements of the first
> doubleword as well the ones of the second one and subsequently rotate
> the doublewords by 32 bits.
> 
> Bootstrapped and regtested, no regressions.
> 
> Is it OK?
> 
> Regards
>  Robin
> 
> gcc/ChangeLog:
> 
>   PR target/100869
>   * config/s390/vector.md (@vpdi4_2): New pattern.
>   (rotl3_di): New pattern.
>   * config/s390/vx-builtins.md: Use vpdi and verll for reversing
>   elements.
> 
> gcc/testsuite/ChangeLog:
> 
>   * gcc.target/s390/zvector/vec-reve-int-long.c: New test.

Ok. Thanks!

Andreas


Re: [PATCH] s390: Implement vec_extract via vec_select.

2022-08-16 Thread Andreas Krebbel via Gcc-patches
On 8/12/22 16:19, Robin Dapp wrote:
> Hi,
> 
> vec_select can handle dynamic/runtime masks nowadays.  Therefore we can
> get rid of the UNSPEC_VEC_EXTRACT that was preventing further
> optimizations like combining instructions with vec_extract patterns.
> 
> Bootstrapped and regtested. No regressions.
> 
> Is it OK?
> 
> Regards
>  Robin
> 
> gcc/ChangeLog:
> 
>   * config/s390/s390.md: Remove UNSPEC_VEC_EXTRACT.
>   * config/s390/vector.md: Rewrite patterns to use vec_select.
>   * config/s390/vx-builtins.md (vec_scatter_element_SI):
>   Likewise.

Ok. Thanks!

Andreas


Re: [PATCH] s390: Implement vec_set with vec_merge and, vec_duplicate.

2022-08-16 Thread Andreas Krebbel via Gcc-patches
On 8/12/22 16:48, Robin Dapp wrote:
> Hi,
> 
> similar to other backends this patch implements vec_set via
> vec_merge and vec_duplicate instead of an unspec.  This opens up
> more possibilites to combine instructions.
> 
> Bootstrapped and regtested. No regressions.
> 
> Is it OK?
> 
> Regards
>  Robin
> 
> gcc/ChangeLog:
> 
>   * config/s390/s390.md: Implement vec_set with vec_merge and
>   vec_duplicate.
>   * config/s390/vector.md: Likewise.
>   * config/s390/vx-builtins.md: Likewise.
>   * config/s390/s390.cc (s390_expand_vec_init): Emit new pattern.
>   (print_operand_address): New output modifier.
>   (print_operand): New output modifier.

The way you handle the element selector doesn't look right to me. It appears to 
be an index if it is
a CONST_INT and a bitmask otherwise. I don't think it is legal to change 
operand semantics like this
depending on the operand type. This would break e.g. if LRA would decide to 
load the immediate index
in a register.

Couldn't you make the shift part of the RTX instead and have the parameter 
always as an index?

Bye,

Andreas

> ---
> 
> diff --git a/gcc/config/s390/s390.cc b/gcc/config/s390/s390.cc
> index c86b26933d7a..ff89fb83360a 100644
> --- a/gcc/config/s390/s390.cc
> +++ b/gcc/config/s390/s390.cc
> @@ -7073,11 +7073,10 @@ s390_expand_vec_init (rtx target, rtx vals)
>if (!general_operand (elem, GET_MODE (elem)))
>   elem = force_reg (inner_mode, elem);
> 
> -  emit_insn (gen_rtx_SET (target,
> -   gen_rtx_UNSPEC (mode,
> -   gen_rtvec (3, elem,
> -  GEN_INT (i), target),
> -   UNSPEC_VEC_SET)));
> +  emit_insn
> + (gen_rtx_SET
> +  (target, gen_rtx_VEC_MERGE
> +   (mode, gen_rtx_VEC_DUPLICATE (mode, elem), target, GEN_INT (1 << 
> i;
>  }
>  }
> 
> @@ -8057,6 +8056,8 @@ print_operand_address (FILE *file, rtx addr)
>  'S': print S-type memory reference (base+displacement).
>  'Y': print address style operand without index (e.g. shift count or
> setmem
>operand).
> +'P': print address-style operand without index but with the offset as
> +  if it were specified by a 'p' format flag.
> 
>  'b': print integer X as if it's an unsigned byte.
>  'c': print integer X as if it's an signed byte.
> @@ -8068,6 +8069,7 @@ print_operand_address (FILE *file, rtx addr)
>  'k': print the first nonzero SImode part of X.
>  'm': print the first SImode part unequal to -1 of X.
>  'o': print integer X as if it's an unsigned 32bit word.
> +'p': print N such that 2^N == X (X must be a power of 2 and const int).
>  's': "start" of contiguous bitmask X in either DImode or vector
> inner mode.
>  't': CONST_INT: "start" of contiguous bitmask X in SImode.
>CONST_VECTOR: Generate a bitmask for vgbm instruction.
> @@ -8237,6 +8239,16 @@ print_operand (FILE *file, rtx x, int code)
>print_shift_count_operand (file, x);
>return;
> 
> +case 'P':
> +  if (CONST_INT_P (x))
> + {
> +   ival = exact_log2 (INTVAL (x));
> +   fprintf (file, HOST_WIDE_INT_PRINT_DEC, ival);
> + }
> +  else
> + print_shift_count_operand (file, x);
> +  return;
> +
>  case 'K':
>/* Append @PLT to both local and non-local symbols in order to
> support
>Linux Kernel livepatching: patches contain individual functions and
> @@ -8321,6 +8333,9 @@ print_operand (FILE *file, rtx x, int code)
>   case 'o':
> ival &= 0x;
> break;
> + case 'p':
> +   ival = exact_log2 (INTVAL (x));
> +   break;
>   case 'e': case 'f':
>   case 's': case 't':
> {
> diff --git a/gcc/config/s390/s390.md b/gcc/config/s390/s390.md
> index f37d8fd33a15..a82db4c624fa 100644
> --- a/gcc/config/s390/s390.md
> +++ b/gcc/config/s390/s390.md
> @@ -183,7 +183,6 @@ (define_c_enum "unspec" [
> UNSPEC_VEC_GFMSUM_128
> UNSPEC_VEC_GFMSUM_ACCUM
> UNSPEC_VEC_GFMSUM_ACCUM_128
> -   UNSPEC_VEC_SET
> 
> UNSPEC_VEC_VSUMG
> UNSPEC_VEC_VSUMQ
> diff --git a/gcc/config/s390/vector.md b/gcc/config/s390/vector.md
> index c50451a8326c..bde3a39db3d4 100644
> --- a/gcc/config/s390/vector.md
> +++ b/gcc/config/s390/vector.md
> @@ -467,12 +467,17 @@ (define_insn "mov"
>  ; vec_set is supposed to *modify* an existing vector so operand 0 is
>  ; duplicated as input operand.
>  (define_expand "vec_set"
> -  [(set (match_operand:V0 "register_operand"  "")
> - (unspec:V [(match_operand: 1 "general_operand"   "")
> -(match_operand:SI2 "nonmemory_operand" "")
> -(match_dup 0)]
> -UNSPEC_VEC_SET))]
> -  "TARGET_VX")
> +  [(set (match_operand:V  0 "register_operand" "")
> + (vec_merge:V
> +   (vec_duplicate:V
> + (match_operand: 1 "general_operand" ""))
>

Re: [PATCH] s390: Recognize reverse/element swap permute patterns.

2022-08-22 Thread Andreas Krebbel via Gcc-patches
On 8/22/22 17:10, Robin Dapp wrote:
> Hi,
> 
> after discussing off-list, here is v2 of the patch.  We now recognize if
> the permutation mask only refers to the first or the second operand and
> use this later when emitting vpdi.
> 
> Regtested and bootstrapped, no regressions.
> 
> Is it OK?
> 
> Regards
>  Robin
> 
> From 1f11a6b89c9b0ad64b480229cd4db06e887a Mon Sep 17 00:00:00 2001
> From: Robin Dapp 
> Date: Fri, 24 Jun 2022 15:17:08 +0200
> Subject: [PATCH v2] s390: Recognize reverse/element swap permute patterns.
> 
> This adds functions to recognize reverse/element swap permute patterns
> for vler, vster as well as vpdi and rotate.
> 
> gcc/ChangeLog:
> 
>   * config/s390/s390.cc (expand_perm_with_vpdi): Recognize swap pattern.
>   (is_reverse_perm_mask): New function.
>   (expand_perm_with_rot): Recognize reverse pattern.
>   (expand_perm_with_vstbrq): New function.
>   (expand_perm_with_vster): Use vler/vster for element reversal on z15.
>   (vectorize_vec_perm_const_1): Use.
>   (s390_vectorize_vec_perm_const): Add expand functions.
>   * config/s390/vx-builtins.md: Prefer vster over vler.
> 
> gcc/testsuite/ChangeLog:
> 
>   * gcc.target/s390/vector/vperm-rev-z14.c: New test.
>   * gcc.target/s390/vector/vperm-rev-z15.c: New test.
>   * gcc.target/s390/zvector/vec-reve-store-byte.c: Adjust test
>   expectation.

Ok, thanks!

Andreas


Re: [PATCH 1/4] IBM Z: Remove unused RRe and RXe mode_attrs

2020-11-04 Thread Andreas Krebbel via Gcc-patches
On 03.11.20 22:36, Ilya Leoshkevich wrote:
> gcc/ChangeLog:
> 
> 2020-11-03  Ilya Leoshkevich  
> 
>   * config/s390/s390.md (RRe): Remove.
>   (RXe): Remove.

Ok. Thanks!

Andreas


Re: [PATCH 2/4] IBM Z: Unhardcode NR_C_MODES

2020-11-04 Thread Andreas Krebbel via Gcc-patches
On 03.11.20 22:45, Ilya Leoshkevich wrote:
> gcc/ChangeLog:
> 
> 2020-11-03  Ilya Leoshkevich  
> 
>   * config/s390/s390.c (NR_C_MODES): Unhardcode.
>   (s390_alloc_pool): Use size_t for iterating from 0 to
>   NR_C_MODES.
>   (s390_add_constant): Likewise.
>   (s390_find_constant): Likewise.
>   (s390_dump_pool): Likewise.
>   (s390_free_pool): Likewise.

Ok. Thanks!

Andreas



Re: [PATCH 3/4] IBM Z: Store long doubles in vector registers when possible

2020-11-04 Thread Andreas Krebbel via Gcc-patches
On 03.11.20 22:45, Ilya Leoshkevich wrote:
> On z14+, there are instructions for working with 128-bit floats (long
> doubles) in vector registers.  It's beneficial to use them instead of
> instructions that operate on floating point register pairs, because it
> allows to store 4 times more data in registers at a time, relieveing
> register pressure.  The performance of new instructions is almost the
> same.
> 
> Implement by storing TFmode values in vector registers on z14+.  Since
> not all operations are available with the new instructions, keep the old
> ones using the new FPRX2 mode, and convert between it and TFmode when
> necessary (this is called "forwarder" expanders below).  Change the
> existing TFmode expanders to call either new- or old-style ones
> depending on whether we are on z14+ or older machines ("dispatcher"
> expanders).
> 
> gcc/ChangeLog:
> 
> 2020-11-03  Ilya Leoshkevich  
> 
>   * config/s390/s390-modes.def (FPRX2): New mode.
>   * config/s390/s390-protos.h (s390_fma_allowed_p): New function.
>   * config/s390/s390.c (s390_fma_allowed_p): Likewise.
>   (s390_build_signbit_mask): Support 128-bit masks.
>   (print_operand): Support printing the second word of a TFmode
>   operand as vector register.
>   (constant_modes): Add FPRX2mode.
>   (s390_class_max_nregs): Return 1 for TFmode on z14+.
>   (s390_is_fpr128): New function.
>   (s390_is_vr128): Likewise.
>   (s390_can_change_mode_class): Use s390_is_fpr128 and
>   s390_is_vr128 in order to determine whether mode refers to a FPR
>   pair or to a VR.
>   * config/s390/s390.h (EXPAND_MOVTF): New macro.
>   (EXPAND_TF): Likewise.
>   * config/s390/s390.md (PFPO_OP_TYPE_FPRX2): PFPO_OP_TYPE_TF
>   alias.
>   (ALL): Add FPRX2.
>   (FP_ALL): Add FPRX2 for z14+, restrict TFmode to z13-.
>   (FP): Likewise.
>   (FP_ANYTF): New mode iterator.
>   (BFP): Add FPRX2 for z14+, restrict TFmode to z13-.
>   (TD_TF): Likewise.
>   (xde): Add FPRX2.
>   (nBFP): Likewise.
>   (nDFP): Likewise.
>   (DSF): Likewise.
>   (DFDI): Likewise.
>   (SFSI): Likewise.
>   (DF): Likewise.
>   (SF): Likewise.
>   (fT0): Likewise.
>   (bt): Likewise.
>   (_d): Likewise.
>   (HALF_TMODE): Likewise.
>   (tf_fpr): New mode_attr.
>   (type): New mode_attr.
>   (*cmp_ccz_0): Use type instead of mode with fsimp.
>   (*cmp_ccs_0_fastmath): Likewise.
>   (*cmptf_ccs): New pattern for wfcxb.
>   (*cmptf_ccsfps): New pattern for wfkxb.
>   (mov): Rename to mov.
>   (signbit2): Rename to signbit2.
>   (isinf2): Renamed to isinf2.
>   (*TDC_insn_): Use type instead of mode with fsimp.
>   (fixuns_trunc2): Rename to
>   fixuns_trunc2.
>   (fix_trunctf2): Rename to fix_trunctf2_fpr.
>   (floatdi2): Rename to floatdi2, use type
>   instead of mode with itof.
>   (floatsi2): Rename to floatsi2, use type
>   instead of mode with itof.
>   (*floatuns2): Use type instead of mode for
>   itof.
>   (floatuns2): Rename to
>   floatuns2.
>   (trunctf2): Rename to trunctf2_fpr, use type instead
>   of mode with fsimp.
>   (extend2): Rename to
>   extend2.
>   (2): Rename to
>   2, use type instead of
>   mode with fsimp.
>   (rint2): Rename to rint2, use
>   type instead of mode with fsimp.
>   (2): Use type instead of mode for
>   fsimp.
>   (rint2): Likewise.
>   (trunc2): Rename to
>   trunc2.
>   (trunc2): Rename to
>   trunc2.
>   (extend2): Rename to
>   extend2.
>   (extend2): Rename to
>   extend2.
>   (add3): Rename to add3, use type instead of
>   mode with fsimp.
>   (*add3_cc): Use type instead of mode with fsimp.
>   (*add3_cconly): Likewise.
>   (sub3): Rename to sub3, use type instead of
>   mode with fsimp.
>   (*sub3_cc): Use type instead of mode with fsimp.
>   (*sub3_cconly): Likewise.
>   (mul3): Rename to mul3, use type instead of
>   mode with fsimp.
>   (fma4): Restrict using s390_fma_allowed_p.
>   (fms4): Restrict using s390_fma_allowed_p.
>   (div3): Rename to div3, use type instead of
>   mode with fdiv.
>   (neg2): Rename to neg2.
>   (*neg2_cc): Use type instead of mode with fsimp.
>   (*neg2_cconly): Likewise.
>   (*neg2_nocc): Likewise.
>   (*neg2): Likeiwse.
>   (abs2): Rename to abs2, use type instead of
>   mode with fdiv.
>   (*abs2_cc): Use type instead of mode with fsimp.
>   (*abs2_cconly): Likewise.
>   (*abs2_nocc): Likewise.
>   (*abs2): Likewise.
>   (*negabs2_cc): Likewise.
>   (*negabs2_cconly): Likewise.
>   (*negabs2_nocc): Likewise.
>   (*negabs2): Likewise.
>   (sqrt2): Rename to sqrt2, use type instead
>   of mode with fsqrt.
>   (cbranch4): Use FP_ANYTF instead of FP.
>   (copysign3): Rename to copysign3, use 

Re: [PATCH 4/4] IBM Z: Test long doubles in vector registers

2020-11-04 Thread Andreas Krebbel via Gcc-patches
These tests all use the -mzvector option but do not appear to make use of the z 
vector languages
extensions. I think that option could be removed. Then these tests should be 
moved to the vector subdir.

You could do the asm scanning also in dg-do run tests.

Andreas


On 03.11.20 22:46, Ilya Leoshkevich wrote:
> gcc/testsuite/ChangeLog:
> 
> 2020-11-03  Ilya Leoshkevich  
> 
>   * gcc.target/s390/zvector/long-double-callee-abi-scan.c: New test.
>   * gcc.target/s390/zvector/long-double-caller-abi-run.c: New test.
>   * gcc.target/s390/zvector/long-double-caller-abi-scan.c: New test.
>   * gcc.target/s390/zvector/long-double-copysign-run.c: New test.
>   * gcc.target/s390/zvector/long-double-copysign-scan.c: New test.
>   * gcc.target/s390/zvector/long-double-fprx2-constant.c: New test.
>   * gcc.target/s390/zvector/long-double-from-double-run.c: New test.
>   * gcc.target/s390/zvector/long-double-from-double-scan.c: New test.
>   * gcc.target/s390/zvector/long-double-from-float-run.c: New test.
>   * gcc.target/s390/zvector/long-double-from-float-scan.c: New test.
>   * gcc.target/s390/zvector/long-double-from-i16-run.c: New test.
>   * gcc.target/s390/zvector/long-double-from-i16-scan.c: New test.
>   * gcc.target/s390/zvector/long-double-from-i32-run.c: New test.
>   * gcc.target/s390/zvector/long-double-from-i32-scan.c: New test.
>   * gcc.target/s390/zvector/long-double-from-i64-run.c: New test.
>   * gcc.target/s390/zvector/long-double-from-i64-scan.c: New test.
>   * gcc.target/s390/zvector/long-double-from-i8-run.c: New test.
>   * gcc.target/s390/zvector/long-double-from-i8-scan.c: New test.
>   * gcc.target/s390/zvector/long-double-from-u16-run.c: New test.
>   * gcc.target/s390/zvector/long-double-from-u16-scan.c: New test.
>   * gcc.target/s390/zvector/long-double-from-u32-run.c: New test.
>   * gcc.target/s390/zvector/long-double-from-u32-scan.c: New test.
>   * gcc.target/s390/zvector/long-double-from-u64-run.c: New test.
>   * gcc.target/s390/zvector/long-double-from-u64-scan.c: New test.
>   * gcc.target/s390/zvector/long-double-from-u8-run.c: New test.
>   * gcc.target/s390/zvector/long-double-from-u8-scan.c: New test.
>   * gcc.target/s390/zvector/long-double-to-double-run.c: New test.
>   * gcc.target/s390/zvector/long-double-to-double-scan.c: New test.
>   * gcc.target/s390/zvector/long-double-to-float-run.c: New test.
>   * gcc.target/s390/zvector/long-double-to-float-scan.c: New test.
>   * gcc.target/s390/zvector/long-double-to-i16-run.c: New test.
>   * gcc.target/s390/zvector/long-double-to-i16-scan.c: New test.
>   * gcc.target/s390/zvector/long-double-to-i32-run.c: New test.
>   * gcc.target/s390/zvector/long-double-to-i32-scan.c: New test.
>   * gcc.target/s390/zvector/long-double-to-i64-run.c: New test.
>   * gcc.target/s390/zvector/long-double-to-i64-scan.c: New test.
>   * gcc.target/s390/zvector/long-double-to-i8-run.c: New test.
>   * gcc.target/s390/zvector/long-double-to-i8-scan.c: New test.
>   * gcc.target/s390/zvector/long-double-to-u16-run.c: New test.
>   * gcc.target/s390/zvector/long-double-to-u16-scan.c: New test.
>   * gcc.target/s390/zvector/long-double-to-u32-run.c: New test.
>   * gcc.target/s390/zvector/long-double-to-u32-scan.c: New test.
>   * gcc.target/s390/zvector/long-double-to-u64-run.c: New test.
>   * gcc.target/s390/zvector/long-double-to-u64-scan.c: New test.
>   * gcc.target/s390/zvector/long-double-to-u8-run.c: New test.
>   * gcc.target/s390/zvector/long-double-to-u8-scan.c: New test.
>   * gcc.target/s390/zvector/long-double-vec-duplicate.c: New test.
>   * gcc.target/s390/zvector/long-double-wf.h: New test.
>   * gcc.target/s390/zvector/long-double-wfaxb-run.c: New test.
>   * gcc.target/s390/zvector/long-double-wfaxb-scan.c: New test.
>   * gcc.target/s390/zvector/long-double-wfaxb.c: New test.
>   * gcc.target/s390/zvector/long-double-wfcxb-0001.c: New test.
>   * gcc.target/s390/zvector/long-double-wfcxb-0111.c: New test.
>   * gcc.target/s390/zvector/long-double-wfcxb-1011.c: New test.
>   * gcc.target/s390/zvector/long-double-wfcxb-1101.c: New test.
>   * gcc.target/s390/zvector/long-double-wfdxb-run.c: New test.
>   * gcc.target/s390/zvector/long-double-wfdxb-scan.c: New test.
>   * gcc.target/s390/zvector/long-double-wfdxb.c: New test.
>   * gcc.target/s390/zvector/long-double-wfixb.c: New test.
>   * gcc.target/s390/zvector/long-double-wfkxb-0111.c: New test.
>   * gcc.target/s390/zvector/long-double-wfkxb-1011.c: New test.
>   * gcc.target/s390/zvector/long-double-wfkxb-1101.c: New test.
>   * gcc.target/s390/zvector/long-double-wflcxb.c: New test.
>   * gcc.target/s390/zvector/long-double-wflpxb.c: New test.
>   * gcc.target/s390/zvector/long-double-wfmaxb-2.c: New test.
> 

Re: [PATCH 3/4] IBM Z: Store long doubles in vector registers when possible

2020-11-04 Thread Andreas Krebbel via Gcc-patches
On 04.11.20 23:12, Ilya Leoshkevich wrote:
> On Wed, 2020-11-04 at 18:16 +0100, Andreas Krebbel wrote:
>> On 03.11.20 22:45, Ilya Leoshkevich wrote:
>>> On z14+, there are instructions for working with 128-bit floats
>>> (long
>>> doubles) in vector registers.  It's beneficial to use them instead
>>> of
>>> instructions that operate on floating point register pairs, because
>>> it
>>> allows to store 4 times more data in registers at a time,
>>> relieveing
>>> register pressure.  The performance of new instructions is almost
>>> the
>>> same.
>>>
>>> Implement by storing TFmode values in vector registers on
>>> z14+.  Since
>>> not all operations are available with the new instructions, keep
>>> the old
>>> ones using the new FPRX2 mode, and convert between it and TFmode
>>> when
>>> necessary (this is called "forwarder" expanders below).  Change the
>>> existing TFmode expanders to call either new- or old-style ones
>>> depending on whether we are on z14+ or older machines ("dispatcher"
>>> expanders).
>>>
>>> gcc/ChangeLog:
>>>
>>> 2020-11-03  Ilya Leoshkevich  
>>>
>>> * config/s390/s390-modes.def (FPRX2): New mode.
>>> * config/s390/s390-protos.h (s390_fma_allowed_p): New function.
>>> * config/s390/s390.c (s390_fma_allowed_p): Likewise.
>>> (s390_build_signbit_mask): Support 128-bit masks.
>>> (print_operand): Support printing the second word of a TFmode
>>> operand as vector register.
>>> (constant_modes): Add FPRX2mode.
>>> (s390_class_max_nregs): Return 1 for TFmode on z14+.
>>> (s390_is_fpr128): New function.
>>> (s390_is_vr128): Likewise.
>>> (s390_can_change_mode_class): Use s390_is_fpr128 and
>>> s390_is_vr128 in order to determine whether mode refers to a
>>> FPR
>>> pair or to a VR.
>>> * config/s390/s390.h (EXPAND_MOVTF): New macro.
>>> (EXPAND_TF): Likewise.
>>> * config/s390/s390.md (PFPO_OP_TYPE_FPRX2): PFPO_OP_TYPE_TF
>>> alias.
>>> (ALL): Add FPRX2.
>>> (FP_ALL): Add FPRX2 for z14+, restrict TFmode to z13-.
>>> (FP): Likewise.
>>> (FP_ANYTF): New mode iterator.
>>> (BFP): Add FPRX2 for z14+, restrict TFmode to z13-.
>>> (TD_TF): Likewise.
>>> (xde): Add FPRX2.
>>> (nBFP): Likewise.
>>> (nDFP): Likewise.
>>> (DSF): Likewise.
>>> (DFDI): Likewise.
>>> (SFSI): Likewise.
>>> (DF): Likewise.
>>> (SF): Likewise.
>>> (fT0): Likewise.
>>> (bt): Likewise.
>>> (_d): Likewise.
>>> (HALF_TMODE): Likewise.
>>> (tf_fpr): New mode_attr.
>>> (type): New mode_attr.
>>> (*cmp_ccz_0): Use type instead of mode with fsimp.
>>> (*cmp_ccs_0_fastmath): Likewise.
>>> (*cmptf_ccs): New pattern for wfcxb.
>>> (*cmptf_ccsfps): New pattern for wfkxb.
>>> (mov): Rename to mov.
>>> (signbit2): Rename to signbit2.
>>> (isinf2): Renamed to isinf2.
>>> (*TDC_insn_): Use type instead of mode with fsimp.
>>> (fixuns_trunc2): Rename to
>>> fixuns_trunc2.
>>> (fix_trunctf2): Rename to fix_trunctf2_fpr.
>>> (floatdi2): Rename to floatdi2, use type
>>> instead of mode with itof.
>>> (floatsi2): Rename to floatsi2, use type
>>> instead of mode with itof.
>>> (*floatuns2): Use type instead of mode for
>>> itof.
>>> (floatuns2): Rename to
>>> floatuns2.
>>> (trunctf2): Rename to trunctf2_fpr, use type
>>> instead
>>> of mode with fsimp.
>>> (extend2): Rename to
>>> extend2.
>>> (2): Rename to
>>> 2, use type instead of
>>> mode with fsimp.
>>> (rint2): Rename to rint2, use
>>> type instead of mode with fsimp.
>>> (2): Use type instead of mode for
>>> fsimp.
>>> (rint2): Likewise.
>>> (trunc2): Rename to
>>> trunc2.
>>> (trunc2): Rename to
>>> trunc2.
>>> (extend2): Rename to
>>> extend2.
>>> (extend2): Rename to
>>> extend2.
>>> (add3): Rename to add3, use type instead of
>>> mode with fsimp.
>>> (*add3_cc): Use type instead of mode with fsimp.
>>> (*add3_cconly): Likewise.
>>> (sub3): Rename to sub3, use type instead of
>>> mode with fsimp.
>>> (*sub3_cc): Use type instead of mode with fsimp.
>>> (*sub3_cconly): Likewise.
>>> (mul3): Rename to mul3, use type instead of
>>> mode with fsimp.
>>> (fma4): Restrict using s390_fma_allowed_p.
>>> (fms4): Restrict using s390_fma_allowed_p.
>>> (div3): Rename to div3, use type instead of
>>> mode with fdiv.
>>> (neg2): Rename to neg2.
>>> (*neg2_cc): Use type instead of mode with fsimp.
>>> (*neg2_cconly): Likewise.
>>> (*neg2_nocc): Likewise.
>>> (*neg2): Likeiwse.
>>> (abs2): Rename to abs2, use type instead of
>>> mode with fdiv.
>>> (*abs2_cc): Use type instead of mode with fsimp.
>>> (*abs2_cconly): Likewise.
>>> (*abs2_nocc): Likewise.
>>> (*abs2): Likewise.
>>> (*negabs2_cc): Likewise.
>>> (*negabs2_cconly): Likewise.
>>> (*negabs2_nocc): Likewise.
>>> (*negabs2): Lik

Re: [PATCH 4/4] IBM Z: Test long doubles in vector registers

2020-11-04 Thread Andreas Krebbel via Gcc-patches
On 04.11.20 23:19, Ilya Leoshkevich wrote:
> On Wed, 2020-11-04 at 18:28 +0100, Andreas Krebbel wrote:
>> These tests all use the -mzvector option but do not appear to make
>> use of the z vector languages
>> extensions. I think that option could be removed. Then these tests
>> should be moved to the vector subdir.
> 
> Will change, thanks!
> 
>> You could do the asm scanning also in dg-do run tests.
> 
> This doesn't seem to work.  For example, if I add 
> 
> /* { dg-final { scan-assembler-times {aaa} 999 } } */
> 
> to long-double-from-double-run.c, it won't fail.

You will have to add --save-temps to dg-options to make it work. Otherwise the 
scan test will stay
unresolved.

Andreas

> 
>>
>> Andreas
>>
>>
>> On 03.11.20 22:46, Ilya Leoshkevich wrote:
>>> gcc/testsuite/ChangeLog:
>>>
>>> 2020-11-03  Ilya Leoshkevich  
>>>
>>> * gcc.target/s390/zvector/long-double-callee-abi-scan.c: New
>>> test.
>>> * gcc.target/s390/zvector/long-double-caller-abi-run.c: New
>>> test.
>>> * gcc.target/s390/zvector/long-double-caller-abi-scan.c: New
>>> test.
>>> * gcc.target/s390/zvector/long-double-copysign-run.c: New test.
>>> * gcc.target/s390/zvector/long-double-copysign-scan.c: New
>>> test.
>>> * gcc.target/s390/zvector/long-double-fprx2-constant.c: New
>>> test.
>>> * gcc.target/s390/zvector/long-double-from-double-run.c: New
>>> test.
>>> * gcc.target/s390/zvector/long-double-from-double-scan.c: New
>>> test.
>>> * gcc.target/s390/zvector/long-double-from-float-run.c: New
>>> test.
>>> * gcc.target/s390/zvector/long-double-from-float-scan.c: New
>>> test.
>>> * gcc.target/s390/zvector/long-double-from-i16-run.c: New test.
>>> * gcc.target/s390/zvector/long-double-from-i16-scan.c: New
>>> test.
>>> * gcc.target/s390/zvector/long-double-from-i32-run.c: New test.
>>> * gcc.target/s390/zvector/long-double-from-i32-scan.c: New
>>> test.
>>> * gcc.target/s390/zvector/long-double-from-i64-run.c: New test.
>>> * gcc.target/s390/zvector/long-double-from-i64-scan.c: New
>>> test.
>>> * gcc.target/s390/zvector/long-double-from-i8-run.c: New test.
>>> * gcc.target/s390/zvector/long-double-from-i8-scan.c: New test.
>>> * gcc.target/s390/zvector/long-double-from-u16-run.c: New test.
>>> * gcc.target/s390/zvector/long-double-from-u16-scan.c: New
>>> test.
>>> * gcc.target/s390/zvector/long-double-from-u32-run.c: New test.
>>> * gcc.target/s390/zvector/long-double-from-u32-scan.c: New
>>> test.
>>> * gcc.target/s390/zvector/long-double-from-u64-run.c: New test.
>>> * gcc.target/s390/zvector/long-double-from-u64-scan.c: New
>>> test.
>>> * gcc.target/s390/zvector/long-double-from-u8-run.c: New test.
>>> * gcc.target/s390/zvector/long-double-from-u8-scan.c: New test.
>>> * gcc.target/s390/zvector/long-double-to-double-run.c: New
>>> test.
>>> * gcc.target/s390/zvector/long-double-to-double-scan.c: New
>>> test.
>>> * gcc.target/s390/zvector/long-double-to-float-run.c: New test.
>>> * gcc.target/s390/zvector/long-double-to-float-scan.c: New
>>> test.
>>> * gcc.target/s390/zvector/long-double-to-i16-run.c: New test.
>>> * gcc.target/s390/zvector/long-double-to-i16-scan.c: New test.
>>> * gcc.target/s390/zvector/long-double-to-i32-run.c: New test.
>>> * gcc.target/s390/zvector/long-double-to-i32-scan.c: New test.
>>> * gcc.target/s390/zvector/long-double-to-i64-run.c: New test.
>>> * gcc.target/s390/zvector/long-double-to-i64-scan.c: New test.
>>> * gcc.target/s390/zvector/long-double-to-i8-run.c: New test.
>>> * gcc.target/s390/zvector/long-double-to-i8-scan.c: New test.
>>> * gcc.target/s390/zvector/long-double-to-u16-run.c: New test.
>>> * gcc.target/s390/zvector/long-double-to-u16-scan.c: New test.
>>> * gcc.target/s390/zvector/long-double-to-u32-run.c: New test.
>>> * gcc.target/s390/zvector/long-double-to-u32-scan.c: New test.
>>> * gcc.target/s390/zvector/long-double-to-u64-run.c: New test.
>>> * gcc.target/s390/zvector/long-double-to-u64-scan.c: New test.
>>> * gcc.target/s390/zvector/long-double-to-u8-run.c: New test.
>>> * gcc.target/s390/zvector/long-double-to-u8-scan.c: New test.
>>> * gcc.target/s390/zvector/long-double-vec-duplicate.c: New
>>> test.
>>> * gcc.target/s390/zvector/long-double-wf.h: New test.
>>> * gcc.target/s390/zvector/long-double-wfaxb-run.c: New test.
>>> * gcc.target/s390/zvector/long-double-wfaxb-scan.c: New test.
>>> * gcc.target/s390/zvector/long-double-wfaxb.c: New test.
>>> * gcc.target/s390/zvector/long-double-wfcxb-0001.c: New test.
>>> * gcc.target/s390/zvector/long-double-wfcxb-0111.c: New test.
>>> * gcc.target/s390/zvector/long-double-wfcxb-1011.c: New test.
>>> * gcc.target/s390/zvector/long-double-wfcxb-1101.c: New test.
>>> * gcc.target/s390/zvector/long-double-wfdxb-run.c: New test.
>>> * gcc.target/s390/zvector/long-double-wfdxb-scan.c: New test.
>>> * gcc

Re: [PING] [PATCH] S/390: Do not turn maybe-uninitialized warnings into errors

2020-11-06 Thread Andreas Krebbel via Gcc-patches
On 06.11.20 04:52, Jeff Law via Gcc-patches wrote:
> 
> On 10/30/20 7:01 AM, Richard Biener wrote:
>>
>> It's not that more / different inlining inherently exposes _more_
>> false positives in the middle-end warnings.  They simply expose
>> others and the GCC codebase is cleansed (by those who change
>> inliner heuristics / tunings) from those by either fixing the analysis
>> or modifying the code (like putting in initializers).
> 
> Right.  The change in heuristics inherently perturb the middle end
> warnings.  It has been and continues to be a source of significant
> headaches in Fedora.

Stefan did some measurements and in fact we see only a few benchmarks improving 
with our aggressive
settings. However, in these cases the performance benefits are significant. We 
will continue looking
into these cases. Perhaps more selective ways can be found to achieve the same.

I've just committed a patch to switch back to the default values. With that 
patch bootstrapping on Z
works fine again even without --disable-werror.

Andreas

gcc/ChangeLog:

* config/s390/s390.c (s390_option_override_internal): Remove
override of inline params.
---
 gcc/config/s390/s390.c | 7 ---
 1 file changed, 7 deletions(-)

diff --git a/gcc/config/s390/s390.c b/gcc/config/s390/s390.c
index b8961a315aa..847cedde674 100644
--- a/gcc/config/s390/s390.c
+++ b/gcc/config/s390/s390.c
@@ -15469,13 +15469,6 @@ s390_option_override_internal (struct gcc_options 
*opts,
   SET_OPTION_IF_UNSET (opts, opts_set, param_sched_pressure_algorithm, 2);
   SET_OPTION_IF_UNSET (opts, opts_set, param_min_vect_loop_bound, 2);

-  /* Use aggressive inlining parameters.  */
-  if (opts->x_s390_tune >= PROCESSOR_2964_Z13)
-{
-  SET_OPTION_IF_UNSET (opts, opts_set, param_inline_min_speedup, 2);
-  SET_OPTION_IF_UNSET (opts, opts_set, param_max_inline_insns_auto, 80);
-}
-
   /* Set the default alignment.  */
   s390_default_align (opts);


Re: [PATCH 1/2] IBM Z: Store long doubles in vector registers when possible

2020-11-10 Thread Andreas Krebbel via Gcc-patches
On 09.11.20 20:54, Ilya Leoshkevich wrote:
> On z14+, there are instructions for working with 128-bit floats (long
> doubles) in vector registers.  It's beneficial to use them instead of
> instructions that operate on floating point register pairs, because it
> allows to store 4 times more data in registers at a time, relieving
> register pressure.  The raw performance of the new instructions is
> almost the same as that of the new ones.
> 
> Implement by storing TFmode values in vector registers on z14+.  Since
> not all operations are available with the new instructions, keep the
> old ones available using the new FPRX2 mode, and convert between it and
> TFmode when necessary (this is called "forwarder" expanders below).
> Change the existing TFmode expanders to call either new- or old-style
> ones depending on whether we are on z14+ or older machines
> ("dispatcher" expanders).
> 
> gcc/ChangeLog:
> 
> 2020-11-03  Ilya Leoshkevich  
> 
>   * config/s390/s390-modes.def (FPRX2): New mode.
>   * config/s390/s390-protos.h (s390_fma_allowed_p): New function.
>   * config/s390/s390.c (s390_fma_allowed_p): Likewise.
>   (s390_build_signbit_mask): Support 128-bit masks.
>   (print_operand): Support printing the second word of a TFmode
>   operand as vector register.
>   (constant_modes): Add FPRX2mode.
>   (s390_class_max_nregs): Return 1 for TFmode on z14+.
>   (s390_is_fpr128): New function.
>   (s390_is_vr128): Likewise.
>   (s390_can_change_mode_class): Use s390_is_fpr128 and
>   s390_is_vr128 in order to determine whether mode refers to a FPR
>   pair or to a VR.
>   (s390_emit_compare): Force TFmode operands into registers on
>   z14+.
>   * config/s390/s390.h (HAVE_TF): New macro.
>   (EXPAND_MOVTF): New macro.
>   (EXPAND_TF): Likewise.
>   * config/s390/s390.md (PFPO_OP_TYPE_FPRX2): PFPO_OP_TYPE_TF
>   alias.
>   (ALL): Add FPRX2.
>   (FP_ALL): Add FPRX2 for z14+, restrict TFmode to z13-.
>   (FP): Likewise.
>   (FP_ANYTF): New mode iterator.
>   (BFP): Add FPRX2 for z14+, restrict TFmode to z13-.
>   (TD_TF): Likewise.
>   (xde): Add FPRX2.
>   (nBFP): Likewise.
>   (nDFP): Likewise.
>   (DSF): Likewise.
>   (DFDI): Likewise.
>   (SFSI): Likewise.
>   (DF): Likewise.
>   (SF): Likewise.
>   (fT0): Likewise.
>   (bt): Likewise.
>   (_d): Likewise.
>   (HALF_TMODE): Likewise.
>   (tf_fpr): New mode_attr.
>   (type): New mode_attr.
>   (*cmp_ccz_0): Use type instead of mode with fsimp.
>   (*cmp_ccs_0_fastmath): Likewise.
>   (*cmptf_ccs): New pattern for wfcxb.
>   (*cmptf_ccsfps): New pattern for wfkxb.
>   (mov): Rename to mov.
>   (signbit2): Rename to signbit2.
>   (isinf2): Renamed to isinf2.
>   (*TDC_insn_): Use type instead of mode with fsimp.
>   (fixuns_trunc2): Rename to
>   fixuns_trunc2.
>   (fix_trunctf2): Rename to fix_trunctf2_fpr.
>   (floatdi2): Rename to floatdi2, use type
>   instead of mode with itof.
>   (floatsi2): Rename to floatsi2, use type
>   instead of mode with itof.
>   (*floatuns2): Use type instead of mode for
>   itof.
>   (floatuns2): Rename to
>   floatuns2.
>   (trunctf2): Rename to trunctf2_fpr, use type instead
>   of mode with fsimp.
>   (extend2): Rename to
>   extend2.
>   (2): Rename to
>   2, use type instead of
>   mode with fsimp.
>   (rint2): Rename to rint2, use
>   type instead of mode with fsimp.
>   (2): Use type instead of mode for
>   fsimp.
>   (rint2): Likewise.
>   (trunc2): Rename to
>   trunc2.
>   (trunc2): Rename to
>   trunc2.
>   (extend2): Rename to
>   extend2.
>   (extend2): Rename to
>   extend2.
>   (add3): Rename to add3, use type instead of
>   mode with fsimp.
>   (*add3_cc): Use type instead of mode with fsimp.
>   (*add3_cconly): Likewise.
>   (sub3): Rename to sub3, use type instead of
>   mode with fsimp.
>   (*sub3_cc): Use type instead of mode with fsimp.
>   (*sub3_cconly): Likewise.
>   (mul3): Rename to mul3, use type instead of
>   mode with fsimp.
>   (fma4): Restrict using s390_fma_allowed_p.
>   (fms4): Restrict using s390_fma_allowed_p.
>   (div3): Rename to div3, use type instead of
>   mode with fdiv.
>   (neg2): Rename to neg2.
>   (*neg2_cc): Use type instead of mode with fsimp.
>   (*neg2_cconly): Likewise.
>   (*neg2_nocc): Likewise.
>   (*neg2): Likeiwse.
>   (abs2): Rename to abs2, use type instead of
>   mode with fdiv.
>   (*abs2_cc): Use type instead of mode with fsimp.
>   (*abs2_cconly): Likewise.
>   (*abs2_nocc): Likewise.
>   (*abs2): Likewise.
>   (*negabs2_cc): Likewise.
>   (*negabs2_cconly): Likewise.
>   (*negabs2_nocc): Likewise.
>   (*negabs2): Likewise.
>   (sqrt2): Ren

Re: [PATCH 2/2] IBM Z: Test long doubles in vector registers

2020-11-10 Thread Andreas Krebbel via Gcc-patches
On 09.11.20 20:54, Ilya Leoshkevich wrote:
> gcc/testsuite/ChangeLog:
> 
> 2020-11-05  Ilya Leoshkevich  
> 
>   * gcc.target/s390/vector/long-double-callee-abi-scan.c: New test.
>   * gcc.target/s390/vector/long-double-caller-abi-run.c: New test.
>   * gcc.target/s390/vector/long-double-caller-abi-scan.c: New test.
>   * gcc.target/s390/vector/long-double-copysign.c: New test.
>   * gcc.target/s390/vector/long-double-fprx2-constant.c: New test.
>   * gcc.target/s390/vector/long-double-from-double.c: New test.
>   * gcc.target/s390/vector/long-double-from-float.c: New test.
>   * gcc.target/s390/vector/long-double-from-i16.c: New test.
>   * gcc.target/s390/vector/long-double-from-i32.c: New test.
>   * gcc.target/s390/vector/long-double-from-i64.c: New test.
>   * gcc.target/s390/vector/long-double-from-i8.c: New test.
>   * gcc.target/s390/vector/long-double-from-u16.c: New test.
>   * gcc.target/s390/vector/long-double-from-u32.c: New test.
>   * gcc.target/s390/vector/long-double-from-u64.c: New test.
>   * gcc.target/s390/vector/long-double-from-u8.c: New test.
>   * gcc.target/s390/vector/long-double-to-double.c: New test.
>   * gcc.target/s390/vector/long-double-to-float.c: New test.
>   * gcc.target/s390/vector/long-double-to-i16.c: New test.
>   * gcc.target/s390/vector/long-double-to-i32.c: New test.
>   * gcc.target/s390/vector/long-double-to-i64.c: New test.
>   * gcc.target/s390/vector/long-double-to-i8.c: New test.
>   * gcc.target/s390/vector/long-double-to-u16.c: New test.
>   * gcc.target/s390/vector/long-double-to-u32.c: New test.
>   * gcc.target/s390/vector/long-double-to-u64.c: New test.
>   * gcc.target/s390/vector/long-double-to-u8.c: New test.
>   * gcc.target/s390/vector/long-double-vec-duplicate.c: New test.
>   * gcc.target/s390/vector/long-double-wf.h: New test.
>   * gcc.target/s390/vector/long-double-wfaxb.c: New test.
>   * gcc.target/s390/vector/long-double-wfcxb-0001.c: New test.
>   * gcc.target/s390/vector/long-double-wfcxb-0111.c: New test.
>   * gcc.target/s390/vector/long-double-wfcxb-1011.c: New test.
>   * gcc.target/s390/vector/long-double-wfcxb-1101.c: New test.
>   * gcc.target/s390/vector/long-double-wfdxb.c: New test.
>   * gcc.target/s390/vector/long-double-wfixb.c: New test.
>   * gcc.target/s390/vector/long-double-wfkxb-0111.c: New test.
>   * gcc.target/s390/vector/long-double-wfkxb-1011.c: New test.
>   * gcc.target/s390/vector/long-double-wfkxb-1101.c: New test.
>   * gcc.target/s390/vector/long-double-wflcxb.c: New test.
>   * gcc.target/s390/vector/long-double-wflpxb.c: New test.
>   * gcc.target/s390/vector/long-double-wfmaxb-2.c: New test.
>   * gcc.target/s390/vector/long-double-wfmaxb-3.c: New test.
>   * gcc.target/s390/vector/long-double-wfmaxb-disabled.c: New test.
>   * gcc.target/s390/vector/long-double-wfmaxb.c: New test.
>   * gcc.target/s390/vector/long-double-wfmsxb-disabled.c: New test.
>   * gcc.target/s390/vector/long-double-wfmsxb.c: New test.
>   * gcc.target/s390/vector/long-double-wfmxb.c: New test.
>   * gcc.target/s390/vector/long-double-wfnmaxb-disabled.c: New test.
>   * gcc.target/s390/vector/long-double-wfnmaxb.c: New test.
>   * gcc.target/s390/vector/long-double-wfnmsxb-disabled.c: New test.
>   * gcc.target/s390/vector/long-double-wfnmsxb.c: New test.
>   * gcc.target/s390/vector/long-double-wfsqxb.c: New test.
>   * gcc.target/s390/vector/long-double-wfsxb-1.c: New test.
>   * gcc.target/s390/vector/long-double-wfsxb.c: New test.
>   * gcc.target/s390/vector/long-double-wftcixb-1.c: New test.
>   * gcc.target/s390/vector/long-double-wftcixb.c: New test.

Ok. Thanks!

Andreas



Re: [PATCH] IBM Z: Fix bootstrap breakage due to HAVE_TF macro

2020-11-10 Thread Andreas Krebbel via Gcc-patches
On 10.11.20 18:43, Ilya Leoshkevich wrote:
> Bootstrap and regtest running on s390x-redhat-linux with --enable-shared
> --with-system-zlib --enable-threads=posix --enable-__cxa_atexit
> --enable-checking=yes,rtl --enable-gnu-indirect-function
> --disable-werror --enable-languages=c,c++,fortran,objc,obj-c++
> --with-arch=arch13.  Ok for master?
> 
> 
> 
> Commit e627cda56865 ("IBM Z: Store long doubles in vector registers
> when possible") introduced HAVE_TF macro which expands to a logical
> "or" of HAVE_ constants.  Not all of these constants are available in
> GENERATOR_FILE context, so a hack was used: simply expand to true in
> this case, because the actual value matters only during compiler
> runtime and not during generation.
> 
> However, one aspect of this value matters during generation after all:
> whether or not it's a constant, which in this case it appears to be.
> This results in incorrect values in insn-flags.h and broken bootstrap
> for some configurations.
> 
> Fix by using a dummy value that is not a constant.
> 
> gcc/ChangeLog:
> 
> 2020-11-10  Ilya Leoshkevich  
> 
>   * config/s390/s390.h (HAVE_TF): Use opaque value when
>   GENERATOR_FILE is defined.

Ok. Thanks!

Andreas


[Committed 1/2] IBM Z: Rename mode attr tointvec to TOINTVEC

2020-11-11 Thread Andreas Krebbel via Gcc-patches
Just a preparation to add a lower-case tointvec.

Bootstrapped and regression tested on s390x.

gcc/ChangeLog:

* config/s390/vector.md: Rename tointvec to TOINTVEC.
* config/s390/vx-builtins.md: Likewise.
---
 gcc/config/s390/vector.md  | 142 -
 gcc/config/s390/vx-builtins.md |  50 ++--
 2 files changed, 96 insertions(+), 96 deletions(-)

diff --git a/gcc/config/s390/vector.md b/gcc/config/s390/vector.md
index 31d323930b2..58b8999f2db 100644
--- a/gcc/config/s390/vector.md
+++ b/gcc/config/s390/vector.md
@@ -137,7 +137,7 @@ (define_mode_attr w [(V1QI "")  (V2QI "")  (V4QI "")  (V8QI 
"") (V16QI "")
 
 ; Resulting mode of a vector comparison.  For floating point modes an
 ; integer vector mode with the same element size is picked.
-(define_mode_attr tointvec [(V1QI "V1QI") (V2QI "V2QI") (V4QI "V4QI") (V8QI 
"V8QI") (V16QI "V16QI")
+(define_mode_attr TOINTVEC [(V1QI "V1QI") (V2QI "V2QI") (V4QI "V4QI") (V8QI 
"V8QI") (V16QI "V16QI")
(V1HI "V1HI") (V2HI "V2HI") (V4HI "V4HI") (V8HI 
"V8HI")
(V1SI "V1SI") (V2SI "V2SI") (V4SI "V4SI")
(V1DI "V1DI") (V2DI "V2DI")
@@ -697,12 +697,12 @@ (define_expand "vcondu"
 (define_expand "vcond_mask_"
   [(set (match_operand:V 0 "register_operand" "")
(if_then_else:V
-(eq (match_operand: 3 "register_operand" "")
+(eq (match_operand: 3 "register_operand" "")
 (match_dup 4))
 (match_operand:V 2 "register_operand" "")
 (match_operand:V 1 "register_operand" "")))]
   "TARGET_VX"
-  "operands[4] = CONST0_RTX (mode);")
+  "operands[4] = CONST0_RTX (mode);")
 
 
 ; We only have HW support for byte vectors.  The middle-end is
@@ -1586,8 +1586,8 @@ (define_insn 
"*vec_cmp_nocc"
 
 ; vfcesb, vfcedb, wfcexb: non-signaling "==" comparison (a == b)
 (define_insn "*vec_cmpeq_quiet_nocc"
-  [(set (match_operand: 0 "register_operand" "=v")
-   (eq: (match_operand:VFT 1 "register_operand" "v")
+  [(set (match_operand: 0 "register_operand" "=v")
+   (eq: (match_operand:VFT 1 "register_operand" "v")
   (match_operand:VFT 2 "register_operand" "v")))]
   "TARGET_VX"
   "fceb\t%v0,%v1,%v2"
@@ -1595,45 +1595,45 @@ (define_insn "*vec_cmpeq_quiet_nocc"
 
 ; vfchsb, vfchdb, wfchxb: non-signaling > comparison (!(b u>= a))
 (define_insn "vec_cmpgt_quiet_nocc"
-  [(set (match_operand:0 "register_operand" "=v")
-   (not:
-(unge: (match_operand:VFT 2 "register_operand" "v")
+  [(set (match_operand:0 "register_operand" "=v")
+   (not:
+(unge: (match_operand:VFT 2 "register_operand" "v")
  (match_operand:VFT 1 "register_operand" "v"]
   "TARGET_VX"
   "fchb\t%v0,%v1,%v2"
   [(set_attr "op_type" "VRR")])
 
 (define_expand "vec_cmplt_quiet_nocc"
-  [(set (match_operand:0 "register_operand" "=v")
-   (not:
-(unge: (match_operand:VFT 1 "register_operand" "v")
+  [(set (match_operand:0 "register_operand" "=v")
+   (not:
+(unge: (match_operand:VFT 1 "register_operand" "v")
  (match_operand:VFT 2 "register_operand" "v"]
   "TARGET_VX")
 
 ; vfchesb, vfchedb, wfchexb: non-signaling >= comparison (!(a u< b))
 (define_insn "vec_cmpge_quiet_nocc"
-  [(set (match_operand:0 "register_operand" "=v")
-   (not:
-(unlt: (match_operand:VFT 1 "register_operand" "v")
+  [(set (match_operand:0 "register_operand" "=v")
+   (not:
+(unlt: (match_operand:VFT 1 "register_operand" "v")
  (match_operand:VFT 2 "register_operand" "v"]
   "TARGET_VX"
   "fcheb\t%v0,%v1,%v2"
   [(set_attr "op_type" "VRR")])
 
 (define_expand "vec_cmple_quiet_nocc"
-  [(set (match_operand:0 "register_operand" "=v")
-   (not:
-(unlt: (match_operand:VFT 2 "register_operand" "v")
+  [(set (match_operand:0 "register_operand" "=v")
+   (not:
+(unlt: (match_operand:VFT 2 "register_operand" "v")
  (match_operand:VFT 1 "register_operand" "v"]
   "TARGET_VX")
 
 ; vfkesb, vfkedb, wfkexb: signaling == comparison ((a >= b) & (b >= a))
 (define_insn "*vec_cmpeq_signaling_nocc"
-  [(set (match_operand:  0 "register_operand" "=v")
-   (and:
-(ge: (match_operand:VFT 1 "register_operand" "v")
+  [(set (match_operand:  0 "register_operand" "=v")
+   (and:
+(ge: (match_operand:VFT 1 "register_operand" "v")
(match_operand:VFT 2 "register_operand" "v"))
-(ge: (match_dup 2)
+(ge: (match_dup 2)
(match_dup 1]
   "TARGET_VXE"
   "fkeb\t%v0,%v1,%v2"
@@ -1641,16 +1641,16 @@ (define_insn "*vec_cmpeq_signaling_nocc"
 
 ; vfkhsb, vfkhdb, wfkhxb: signaling > comparison (a > b)
 (define_insn "*vec_cmpgt_signaling_nocc"
-  [(se

[Committed 2/2] IBM Z: Fix PR97326: Enable fp compares in vec_cmp

2020-11-11 Thread Andreas Krebbel via Gcc-patches
Bootstrapped and regression tested on s390x.

gcc/ChangeLog:

PR target/97326
* config/s390/vector.md: Support vector floating point modes in
vec_cmp.
---
 gcc/config/s390/vector.md | 22 --
 1 file changed, 16 insertions(+), 6 deletions(-)

diff --git a/gcc/config/s390/vector.md b/gcc/config/s390/vector.md
index 58b8999f2db..fef68644625 100644
--- a/gcc/config/s390/vector.md
+++ b/gcc/config/s390/vector.md
@@ -145,6 +145,16 @@ (define_mode_attr TOINTVEC [(V1QI "V1QI") (V2QI "V2QI") 
(V4QI "V4QI") (V8QI "V8Q
(V1SF "V1SI") (V2SF "V2SI") (V4SF "V4SI")
(V1DF "V1DI") (V2DF "V2DI")
(V1TF "V1TI") (TF "V1TI")])
+
+(define_mode_attr tointvec [(V1QI "v1qi") (V2QI "v2qi") (V4QI "v4qi") (V8QI 
"v8qi") (V16QI "v16qi")
+   (V1HI "v1hi") (V2HI "v2hi") (V4HI "v4hi") (V8HI 
"v8hi")
+   (V1SI "v1si") (V2SI "v2si") (V4SI "v4si")
+   (V1DI "v1di") (V2DI "v2di")
+   (V1TI "v1ti")
+   (V1SF "v1si") (V2SF "v2si") (V4SF "v4si")
+   (V1DF "v1di") (V2DF "v2di")
+   (V1TF "v1ti") (TF   "v1ti")])
+
 (define_mode_attr vw [(SF "w") (V1SF "w") (V2SF "v") (V4SF "v")
  (DF "w") (V1DF "w") (V2DF "v")
  (TF "w") (V1TF "w")])
@@ -1546,14 +1556,14 @@ (define_expand "copysign3"
 })
 
 ;;
-;; Integer compares
+;; Compares
 ;;
 
-(define_expand "vec_cmp"
-  [(set (match_operand:VI_HW0 "register_operand" "")
-   (match_operator:VI_HW   1 ""
- [(match_operand:VI_HW 2 "register_operand" "")
-  (match_operand:VI_HW 3 "register_operand" "")]))]
+(define_expand "vec_cmp"
+  [(set (match_operand:  0 "register_operand" "")
+   (match_operator: 1 ""
+ [(match_operand:V_HW 2 "register_operand" "")
+  (match_operand:V_HW 3 "register_operand" "")]))]
   "TARGET_VX"
 {
   s390_expand_vec_compare (operands[0], GET_CODE(operands[1]), operands[2], 
operands[3]);
-- 
2.25.1



Re: [PATCH] IBM Z: Fix output template for "*vfees"

2020-11-12 Thread Andreas Krebbel via Gcc-patches
On 12.11.20 13:25, Stefan Schulze Frielinghaus wrote:
> Bootstrapped and regtested on IBM Z.  Ok for master?
> 
> gcc/ChangeLog:
> 
>   * config/s390/vx-builtins.md ("*vfees"): Fix output
> template.
> ---
>  gcc/config/s390/vx-builtins.md | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
> 
> diff --git a/gcc/config/s390/vx-builtins.md b/gcc/config/s390/vx-builtins.md
> index 010db4d1115..0c2e7170223 100644
> --- a/gcc/config/s390/vx-builtins.md
> +++ b/gcc/config/s390/vx-builtins.md
> @@ -1395,7 +1395,7 @@
>  
>if (flags == VSTRING_FLAG_ZS)
>  return "vfeezs\t%v0,%v1,%v2";
> -  return "vfees\t%v0,%v1,%v2,%b3";
> +  return "vfees\t%v0,%v1,%v2";
>  }
>[(set_attr "op_type" "VRR")])
>  
> 

Ok. Thanks!

Andreas


Re: [PATCH] IBM Z: Define vec_vfees instruction pattern

2020-11-12 Thread Andreas Krebbel via Gcc-patches
On 12.11.20 13:21, Stefan Schulze Frielinghaus wrote:
> Bootstrapped and regtested on IBM Z.  Ok for master?
> 
> gcc/ChangeLog:
> 
>   * config/s390/vector.md ("vec_vfees"): New insn pattern.
> ---
>  gcc/config/s390/vector.md | 26 ++
>  1 file changed, 26 insertions(+)
> 
> diff --git a/gcc/config/s390/vector.md b/gcc/config/s390/vector.md
> index 31d323930b2..4333a2191ae 100644
> --- a/gcc/config/s390/vector.md
> +++ b/gcc/config/s390/vector.md
> @@ -1798,6 +1798,32 @@
>"vll\t%v0,%1,%2"
>[(set_attr "op_type" "VRS")])
>  
> +; vfeebs, vfeehs, vfeefs
> +; vfeezbs, vfeezhs, vfeezfs
> +(define_insn "vec_vfees"
> +  [(set (match_operand:VI_HW_QHS 0 "register_operand" "=v")
> + (unspec:VI_HW_QHS [(match_operand:VI_HW_QHS 1 "register_operand" "v")
> +(match_operand:VI_HW_QHS 2 "register_operand" "v")
> +(match_operand:QI 3 "const_mask_operand" "C")]
> +   UNSPEC_VEC_VFEE))
> +   (set (reg:CCRAW CC_REGNUM)
> + (unspec:CCRAW [(match_dup 1)
> +(match_dup 2)
> +(match_dup 3)]
> +   UNSPEC_VEC_VFEECC))]
> +  "TARGET_VX"
> +{
> +  unsigned HOST_WIDE_INT flags = UINTVAL (operands[3]);
> +
> +  gcc_assert (!(flags & ~(VSTRING_FLAG_ZS | VSTRING_FLAG_CS)));
> +  flags &= ~VSTRING_FLAG_CS;
> +
> +  if (flags == VSTRING_FLAG_ZS)
> +return "vfeezs\t%v0,%v1,%v2";
> +  return "vfees\t%v0,%v1,%v2";
> +}
> +  [(set_attr "op_type" "VRR")])
> +
>  ; vfenebs, vfenehs, vfenefs
>  ; vfenezbs, vfenezhs, vfenezfs
>  (define_insn "vec_vfenes"
> 

Since this is mostly a copy of the pattern in vx-builtins.md I think we should 
remove the other
version then.

I also would prefer this to be committed together with the code making use of 
the expander. So far
this would be dead code - right?

Andreas


Re: [PATCH] IBM Z: Do not run long double tests on old machines

2020-11-16 Thread Andreas Krebbel via Gcc-patches
On 13.11.20 23:23, Ilya Leoshkevich wrote:
> Bootstrapped and regtested on z13 s390x-redhat-linux.  Ok for master?
> 
> gcc/testsuite/ChangeLog:
> 
> 2020-11-12  Ilya Leoshkevich  
> 
>   * gcc.target/s390/s390.exp (check_effective_target_s390_z14_hw):
>   New predicate.
>   * gcc.target/s390/vector/long-double-caller-abi-run.c: Use the
>   new predicate.
>   * gcc.target/s390/vector/long-double-copysign.c: Likewise.
>   * gcc.target/s390/vector/long-double-from-double.c: Likewise.
>   * gcc.target/s390/vector/long-double-from-float.c: Likewise.
>   * gcc.target/s390/vector/long-double-from-i16.c: Likewise.
>   * gcc.target/s390/vector/long-double-from-i32.c: Likewise.
>   * gcc.target/s390/vector/long-double-from-i64.c: Likewise.
>   * gcc.target/s390/vector/long-double-from-i8.c: Likewise.
>   * gcc.target/s390/vector/long-double-from-u16.c: Likewise.
>   * gcc.target/s390/vector/long-double-from-u32.c: Likewise.
>   * gcc.target/s390/vector/long-double-from-u64.c: Likewise.
>   * gcc.target/s390/vector/long-double-from-u8.c: Likewise.
>   * gcc.target/s390/vector/long-double-to-double.c: Likewise.
>   * gcc.target/s390/vector/long-double-to-float.c: Likewise.
>   * gcc.target/s390/vector/long-double-to-i16.c: Likewise.
>   * gcc.target/s390/vector/long-double-to-i32.c: Likewise.
>   * gcc.target/s390/vector/long-double-to-i64.c: Likewise.
>   * gcc.target/s390/vector/long-double-to-i8.c: Likewise.
>   * gcc.target/s390/vector/long-double-to-u16.c: Likewise.
>   * gcc.target/s390/vector/long-double-to-u32.c: Likewise.
>   * gcc.target/s390/vector/long-double-to-u64.c: Likewise.
>   * gcc.target/s390/vector/long-double-to-u8.c: Likewise.
>   * gcc.target/s390/vector/long-double-wfaxb.c: Likewise.
>   * gcc.target/s390/vector/long-double-wfdxb.c: Likewise.
>   * gcc.target/s390/vector/long-double-wfsxb-1.c: Likewise.

Ok. Thanks!

Andreas


[Committed] IBM Z: Fix PR102222

2021-09-22 Thread Andreas Krebbel via Gcc-patches
Avoid emitting a strict low part move if the insv target actually
affects the whole target reg.

Bootstrapped and regression tested on s390x.

gcc/ChangeLog:

PR target/10
* config/s390/s390.c (s390_expand_insv): Emit a normal move if it
is actually a full copy of the source operand into the target.
Don't emit a strict low part move if source and target mode match.

gcc/testsuite/ChangeLog:

* gcc.target/s390/pr10.c: New test.
---
 gcc/config/s390/s390.c   | 10 ++
 gcc/testsuite/gcc.target/s390/pr10.c | 16 
 2 files changed, 26 insertions(+)
 create mode 100644 gcc/testsuite/gcc.target/s390/pr10.c

diff --git a/gcc/config/s390/s390.c b/gcc/config/s390/s390.c
index 54dd6332c3a..e04385451cf 100644
--- a/gcc/config/s390/s390.c
+++ b/gcc/config/s390/s390.c
@@ -6414,6 +6414,15 @@ s390_expand_insv (rtx dest, rtx op1, rtx op2, rtx src)
   if (bitsize + bitpos > GET_MODE_BITSIZE (mode))
 return false;
 
+  /* Just a move.  */
+  if (bitpos == 0
+  && bitsize == GET_MODE_BITSIZE (GET_MODE (src))
+  && mode == GET_MODE (src))
+{
+  emit_move_insn (dest, src);
+  return true;
+}
+
   /* Generate INSERT IMMEDIATE (IILL et al).  */
   /* (set (ze (reg)) (const_int)).  */
   if (TARGET_ZARCH
@@ -6510,6 +6519,7 @@ s390_expand_insv (rtx dest, rtx op1, rtx op2, rtx src)
   && (bitpos & 32) == ((bitpos + bitsize - 1) & 32)
   && MEM_P (src)
   && (mode == DImode || mode == SImode)
+  && mode != smode
   && register_operand (dest, mode))
 {
   /* Emit a strict_low_part pattern if possible.  */
diff --git a/gcc/testsuite/gcc.target/s390/pr10.c 
b/gcc/testsuite/gcc.target/s390/pr10.c
new file mode 100644
index 000..47d075e47fc
--- /dev/null
+++ b/gcc/testsuite/gcc.target/s390/pr10.c
@@ -0,0 +1,16 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -m31 -mesa" } */
+
+struct squashfs_reg_inode_header_1 read_inode_inode;
+
+int read_inode_val;
+
+struct squashfs_reg_inode_header_1
+{
+  int file_size:32;
+} __attribute__((packed)) read_inode ();
+
+void foo (void)
+{
+  read_inode_inode.file_size = read_inode_val;
+}
-- 
2.31.1



[Committed] IBM Z: TPF: Add cc clobber to profiling expanders

2021-09-22 Thread Andreas Krebbel via Gcc-patches
The code sequence emitted uses CC internally.

gcc/ChangeLog:

* config/s390/tpf.md (prologue_tpf, epilogue_tpf): Add cc clobber.
---
 gcc/config/s390/tpf.md | 6 --
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/gcc/config/s390/tpf.md b/gcc/config/s390/tpf.md
index 297e9d1f755..35b37190705 100644
--- a/gcc/config/s390/tpf.md
+++ b/gcc/config/s390/tpf.md
@@ -21,7 +21,8 @@ (define_insn "prologue_tpf"
   [(unspec_volatile [(match_operand 0 "const_int_operand" "J")
 (match_operand 1 "const_int_operand" "J")]
UNSPECV_TPF_PROLOGUE)
-   (clobber (reg:DI 1))]
+   (clobber (reg:DI 1))
+   (clobber (reg:CC CC_REGNUM))]
   "TARGET_TPF_PROFILING"
   "larl\t%%r1,.+14\;tm\t%0,255\;bnz\t%1"
   [(set_attr "length"   "14")])
@@ -31,7 +32,8 @@ (define_insn "epilogue_tpf"
   [(unspec_volatile [(match_operand 0 "const_int_operand" "J")
 (match_operand 1 "const_int_operand" "J")]
UNSPECV_TPF_EPILOGUE)
-   (clobber (reg:DI 1))]
+   (clobber (reg:DI 1))
+   (clobber (reg:CC CC_REGNUM))]
   "TARGET_TPF_PROFILING"
   "larl\t%%r1,.+14\;tm\t%0,255\;bnz\t%1"
   [(set_attr "length"   "14")])
-- 
2.31.1



Re: [PATCH gcc-11 0/2] Backport kpatch changes

2021-09-30 Thread Andreas Krebbel via Gcc-patches
On 9/30/21 10:50, Ilya Leoshkevich wrote:
> Hi,
> 
> This series contains a backport of kpatch changes needed to support
> https://github.com/dynup/kpatch/pull/1203 so that it could be used in
> RHEL 9.  The patches have been in master for 4 months now without
> issues.
> 
> Bootstrapped and regtested on s390x-redhat-linux.
> 
> Ok for gcc-11?

Ok for both. Thanks!

Andreas


Re: [PATCH] IBM Z: Fix address of operands will never be NULL warnings

2021-11-02 Thread Andreas Krebbel via Gcc-patches
On 10/30/21 12:43, Stefan Schulze Frielinghaus wrote:
> Since a recent enhancement of -Waddress a couple of warnings are emitted
> and turned into errors during bootstrap:
> 
> gcc/config/s390/s390.md:12087:25: error: the address of 'operands' will never 
> be NULL [-Werror=address]
> 12087 |   "TARGET_HTM && operands != NULL
> build/gencondmd.c:59:12: note: 'operands' declared here
>59 | extern rtx operands[];
>   |^~~~
> 
> Fixed by removing those non-null checks.
> Bootstrapped and regtested on IBM Z.  Ok for mainline?
> 
> gcc/ChangeLog:
> 
>   * config/s390/s390.md ("*cc_to_int", "tabort", "*tabort_1",
>   "*tabort_1_plus"): Remove operands non-null check.

Ok. Thanks!

Andreas


Re: [PATCH] IBM Z: ldist-{rawmemchr,strlen} tests require vector extensions

2021-11-02 Thread Andreas Krebbel via Gcc-patches
On 11/2/21 15:54, Stefan Schulze Frielinghaus wrote:
> The tests require vector extensions which are only available for z13 and
> later while using the z/Architecture.
> 
> Bootstrapped and regtested on IBM Z.  Ok for mainline?
> 
> gcc/testsuite/ChangeLog:
> 
>   * gcc.dg/tree-ssa/ldist-rawmemchr-1.c: For IBM Z set arch to z13
>   and use z/Architecture since the tests require vector extensions.
>   * gcc.dg/tree-ssa/ldist-rawmemchr-2.c: Likewise.
>   * gcc.dg/tree-ssa/ldist-strlen-1.c: Likewise.
>   * gcc.dg/tree-ssa/ldist-strlen-3.c: Likewise.

Ok. Thanks!

Andreas


Re: [PATCH] IBM Z: Free bbs in s390_loop_unroll_adjust

2021-11-03 Thread Andreas Krebbel via Gcc-patches
On 11/2/21 18:31, Stefan Schulze Frielinghaus wrote:
> Bootstrapped and regtested on IBM Z.  Ok for mainline?
> 
> gcc/ChangeLog:
> 
>   * config/s390/s390.c (s390_loop_unroll_adjust): In case of early
>   exit free bbs.

Ok. Thanks!

Andreas



[Committed] IBM Z: Define STACK_CHECK_MOVING_SP

2021-11-04 Thread Andreas Krebbel via Gcc-patches
With -fstack-check the stack probes emitted access memory below the
stack pointer.

Bootstrapped and regression tested on s390x.

Committed to mainline

gcc/ChangeLog:

* config/s390/s390.h (STACK_CHECK_MOVING_SP): New macro
definition.
---
 gcc/config/s390/s390.h | 5 +
 1 file changed, 5 insertions(+)

diff --git a/gcc/config/s390/s390.h b/gcc/config/s390/s390.h
index fb16a455a03..186c5c6200b 100644
--- a/gcc/config/s390/s390.h
+++ b/gcc/config/s390/s390.h
@@ -332,6 +332,11 @@ extern const char *s390_host_detect_local_cpu (int argc, 
const char **argv);
 
 #define STACK_SIZE_MODE (Pmode)
 
+/* Make the stack pointer to be moved downwards while issuing stack probes with
+   -fstack-check.  We need this to prevent memory below the stack pointer from
+   being accessed.  */
+#define STACK_CHECK_MOVING_SP 1
+
 #ifndef IN_LIBGCC2
 
 /* Width of a word, in units (bytes).  */
-- 
2.31.1



[PATCH] Fix PR103028

2021-11-05 Thread Andreas Krebbel via Gcc-patches
This prevents find_cond_trap from being invoked after reload.  It may
generate compares which would require reloading.

Bootstrapped and regression tested on s390x.

Ok for mainline?

gcc/ChangeLog:

PR rtl-optimization/103028
* ifcvt.c (find_if_header): Invoke find_cond_trap only before
reload.

gcc/testsuite/ChangeLog:

PR rtl-optimization/103028
* gcc.dg/pr103028.c: New test.
---
 gcc/ifcvt.c |  3 ++-
 gcc/testsuite/gcc.dg/pr103028.c | 16 
 2 files changed, 18 insertions(+), 1 deletion(-)
 create mode 100644 gcc/testsuite/gcc.dg/pr103028.c

diff --git a/gcc/ifcvt.c b/gcc/ifcvt.c
index 017944f4f79..1f5b9476ac2 100644
--- a/gcc/ifcvt.c
+++ b/gcc/ifcvt.c
@@ -4341,7 +4341,8 @@ find_if_header (basic_block test_bb, int pass)
   && cond_exec_find_if_block (&ce_info))
 goto success;
 
-  if (targetm.have_trap ()
+  if (!reload_completed
+  && targetm.have_trap ()
   && optab_handler (ctrap_optab, word_mode) != CODE_FOR_nothing
   && find_cond_trap (test_bb, then_edge, else_edge))
 goto success;
diff --git a/gcc/testsuite/gcc.dg/pr103028.c b/gcc/testsuite/gcc.dg/pr103028.c
new file mode 100644
index 000..e299ac5d5b5
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/pr103028.c
@@ -0,0 +1,16 @@
+/* PR rtl-optimization/103028 */
+/* { dg-do compile } */
+/* { dg-options "-Og -fif-conversion2 -fharden-conditional-branches" } */
+
+/* This used to fail on s390x only with -march=z9-109 and -march=z9-ec */
+/* { dg-additional-options "-march=z9-ec" { target s390*-*-* } } */
+
+unsigned char x;
+int foo(void)
+{
+  unsigned long long i = x;
+  i = i + 0x8000;
+  if (i > 0x)
+return x;
+  return 0;
+}
-- 
2.31.1



Re: [PATCH] Fix PR103028

2021-11-05 Thread Andreas Krebbel via Gcc-patches
On 11/5/21 20:34, Jeff Law wrote:
> 
> 
> On 11/5/2021 4:19 AM, Andreas Krebbel via Gcc-patches wrote:
>> This prevents find_cond_trap from being invoked after reload.  It may
>> generate compares which would require reloading.
>>
>> Bootstrapped and regression tested on s390x.
>>
>> Ok for mainline?
>>
>> gcc/ChangeLog:
>>
>>  PR rtl-optimization/103028
>>  * ifcvt.c (find_if_header): Invoke find_cond_trap only before
>>  reload.
>>
>> gcc/testsuite/ChangeLog:
>>
>>  PR rtl-optimization/103028
>>  * gcc.dg/pr103028.c: New test.
> Shouldn't this be handled by the target by rejecting creating the trap 
> after reload has completed since the target seems to need new pseudos to 
> generate a conditional trap?  Otherwise we're penalizing targets which 
> don't need new pseudos to generate conditional traps.

In this case we do not explicitely create a new pseudo. It is rather that we 
emit a pattern which
would need to be handled be reload. I think passes which run after reload are 
not allowed to emit
patterns which would require reloading and it cannot be up to the backend to 
prevent this.

Instead of disabling this path after reload we could also try to check all the 
to be emitted insns
with constrain_operands to make sure at least one of the alternatives is an 
immediate match. This
should only reject cases which are really broken. I didn't try this because I 
haven't seen anything
like this in ifcvt.c while I have seen several places where we just bail out 
once reload_completed
is true.

Andreas


Re: [PATCH] IBM Z: Remove match_scratch workaround

2021-06-01 Thread Andreas Krebbel via Gcc-patches
On 6/2/21 4:21 AM, Ilya Leoshkevich wrote:
> Bootstrapped and regtested on s390x-redhat-linux.  Ok for master?
> 
> 
> 
> Since commit dd1ef00c45ba ("Fix bug in the define_subst handling that
> made match_scratch unusable for multi-alternative patterns.") the
> workaround for that bug in *ashrdi3_31 is not only no
> longer necessary, but actually breaks the build.
> 
> Get rid of it by using only one alternative in (match_scratch).  It
> will be replicated as many times as needed in order to match the
> pattern with which (define_subst) is used.
> 
> gcc/ChangeLog:
> 
>   * config/s390/s390.md(*ashrdi3_31): Use a single
>   constraint.
>   * config/s390/subst.md(cconly_subst): Use a single constraint
>   in (match_scratch).
> 
> gcc/testsuite/ChangeLog:
> 
>   * gcc.target/s390/ashr.c: New test.

Ok. Thanks!

Andreas


Re: [PATCH] s390: Add more vcond_mask patterns.

2021-06-09 Thread Andreas Krebbel via Gcc-patches
On 6/9/21 2:47 PM, Robin Dapp wrote:
>> I think the real problem is the expander name. That's why it could not be 
>> found by optab. The second
>> mode needs to be the int vector mode of op3. With that change the testcases 
>> work as expected:
>>
>> diff --git a/gcc/config/s390/vector.md b/gcc/config/s390/vector.md
>> index c80d582a300d..ab605b3d2cf3 100644
>> --- a/gcc/config/s390/vector.md
>> +++ b/gcc/config/s390/vector.md
>> @@ -715,7 +715,7 @@
>> DONE;
>>   })
>>
>> -(define_expand "vcond_mask_"
>> +(define_expand "vcond_mask_"
>> [(set (match_operand:V 0 "register_operand" "")
>>  (if_then_else:V
>>   (eq (match_operand: 3 "register_operand" "")
> 
> Ah, yes, it's indeed much simpler that way.  Attached the revised 
> version with the small change and the new tests as a single patch now.
> 
> Regtest and bootstrap was successful.

Ok. Thanks!

Andreas


Re: [PATCH] IBM Z: Provide rawmemchr{qi,hi,si} expander

2021-10-07 Thread Andreas Krebbel via Gcc-patches
On 9/20/21 11:24, Stefan Schulze Frielinghaus wrote:
> This patch implements the rawmemchr expander as introduced in
> https://gcc.gnu.org/pipermail/gcc-patches/2021-September/579649.html
> 
> Bootstrapped and regtested in conjunction with the patch from above on
> IBM Z.  Ok for mainline?
> 

> From 551362cda54048dc1a51588112f11c070ed52020 Mon Sep 17 00:00:00 2001
> From: Stefan Schulze Frielinghaus 
> Date: Mon, 8 Feb 2021 10:35:39 +0100
> Subject: [PATCH 2/2] IBM Z: Provide rawmemchr{qi,hi,si} expander
>
> gcc/ChangeLog:
>
>   * config/s390/s390-protos.h (s390_rawmemchrqi): Add prototype.
>   (s390_rawmemchrhi): Add prototype.
>   (s390_rawmemchrsi): Add prototype.
>   * config/s390/s390.c (s390_rawmemchr): New function.
>   (s390_rawmemchrqi): New function.
>   (s390_rawmemchrhi): New function.
>   (s390_rawmemchrsi): New function.
>   * config/s390/s390.md (rawmemchr): New expander.
>   (rawmemchr): New expander.
>   * config/s390/vector.md (vec_vfees): Basically a copy of
>   the pattern vfees from vx-builtins.md.
>   * config/s390/vx-builtins.md (*vfees): Remove.

Thanks! Would it make sense to also extend the strlen and movstr expanders
we have to support the additional character modes?

A few style comments below.

>
> gcc/testsuite/ChangeLog:
>
>   * gcc.target/s390/rawmemchr-1.c: New test.
> ---
>  gcc/config/s390/s390-protos.h   |  4 +
>  gcc/config/s390/s390.c  | 89 ++
>  gcc/config/s390/s390.md | 20 +
>  gcc/config/s390/vector.md   | 26 ++
>  gcc/config/s390/vx-builtins.md  | 26 --
>  gcc/testsuite/gcc.target/s390/rawmemchr-1.c | 99 +
>  6 files changed, 238 insertions(+), 26 deletions(-)
>  create mode 100644 gcc/testsuite/gcc.target/s390/rawmemchr-1.c
>
> diff --git a/gcc/config/s390/s390-protos.h b/gcc/config/s390/s390-protos.h
> index 4b03c6e99f5..0d9619e8254 100644
> --- a/gcc/config/s390/s390-protos.h
> +++ b/gcc/config/s390/s390-protos.h
> @@ -66,6 +66,10 @@ s390_asm_declare_function_size (FILE *asm_out_file,
>   const char *fnname ATTRIBUTE_UNUSED, tree decl);
>  #endif
>
> +extern void s390_rawmemchrqi(rtx dst, rtx src, rtx pat);
> +extern void s390_rawmemchrhi(rtx dst, rtx src, rtx pat);
> +extern void s390_rawmemchrsi(rtx dst, rtx src, rtx pat);
> +
>  #ifdef RTX_CODE
>  extern int s390_extra_constraint_str (rtx, int, const char *);
>  extern int s390_const_ok_for_constraint_p (HOST_WIDE_INT, int, const char *);
> diff --git a/gcc/config/s390/s390.c b/gcc/config/s390/s390.c
> index 54dd6332c3a..1435ce156e2 100644
> --- a/gcc/config/s390/s390.c
> +++ b/gcc/config/s390/s390.c
> @@ -16559,6 +16559,95 @@ s390_excess_precision (enum excess_precision_type 
> type)
>  }
>  #endif
>
> +template  +   machine_mode elt_mode,
> +   rtx (*gen_vec_vfees) (rtx, rtx, rtx, rtx)>
> +static void
> +s390_rawmemchr(rtx dst, rtx src, rtx pat) {

I think it would be a bit easier to turn the vec_vfees expander into a
'parameterized name' and add the mode as parameter.  I'll attach a patch
to illustrate how this might look like.

> +  rtx lens = gen_reg_rtx (V16QImode);
> +  rtx pattern = gen_reg_rtx (vec_mode);
> +  rtx loop_start = gen_label_rtx ();
> +  rtx loop_end = gen_label_rtx ();
> +  rtx addr = gen_reg_rtx (Pmode);
> +  rtx offset = gen_reg_rtx (Pmode);
> +  rtx tmp = gen_reg_rtx (Pmode);
> +  rtx loadlen = gen_reg_rtx (SImode);
> +  rtx matchlen = gen_reg_rtx (SImode);
> +  rtx mem;
> +
> +  pat = GEN_INT (trunc_int_for_mode (INTVAL (pat), elt_mode));
> +  emit_insn (gen_rtx_SET (pattern, gen_rtx_VEC_DUPLICATE (vec_mode, pat)));
> +
> +  emit_move_insn (addr, XEXP (src, 0));
> +
> +  // alignment
> +  emit_insn (gen_vlbb (lens, gen_rtx_MEM (BLKmode, addr), GEN_INT (6)));
> +  emit_insn (gen_lcbb (loadlen, addr, GEN_INT (6)));
> +  lens = convert_to_mode (vec_mode, lens, 1);
> +  emit_insn (gen_vec_vfees (lens, lens, pattern, GEN_INT (0)));
> +  lens = convert_to_mode (V4SImode, lens, 1);
> +  emit_insn (gen_vec_extractv4sisi (matchlen, lens, GEN_INT (1)));
> +  lens = convert_to_mode (vec_mode, lens, 1);

That back and forth NOP conversion stuff is ugly but I couldn't find a
more elegant way to write this without generating worse code.  Of
course we want to benefit here from the fact that the result operand
of vfees is already zero-extended.  Perhaps factor this out into a
utility function or an extra expander because we appear to need this
frequently?! Not a requirement for this patch though.

> +  emit_cmp_and_jump_insns (matchlen, loadlen, LT, NULL_RTX, SImode, 1, 
> loop_end);
> +  force_expand_binop (Pmode, and_optab, addr, GEN_INT (15), tmp, 1, 
> OPTAB_DIRECT);
> +  force_expand_binop (Pmode, sub_optab, GEN_INT (16), tmp, tmp, 1, 
> OPTAB_DIRECT);
> +  force_expand_binop (Pmode, add_optab, addr, tmp, addr, 1, OPTAB_DIRECT);

Couldn't we just do this as '(addr + 16) & ~0xf' her

Re: [PATCH] IBM Z: Provide rawmemchr{qi,hi,si} expander

2021-10-08 Thread Andreas Krebbel via Gcc-patches
On 10/8/21 16:23, Stefan Schulze Frielinghaus wrote:
> On Thu, Oct 07, 2021 at 11:16:24AM +0200, Andreas Krebbel wrote:
>> On 9/20/21 11:24, Stefan Schulze Frielinghaus wrote:
>>> This patch implements the rawmemchr expander as introduced in
>>> https://gcc.gnu.org/pipermail/gcc-patches/2021-September/579649.html
>>>
>>> Bootstrapped and regtested in conjunction with the patch from above on
>>> IBM Z.  Ok for mainline?
>>>
>>
>>> From 551362cda54048dc1a51588112f11c070ed52020 Mon Sep 17 00:00:00 2001
>>> From: Stefan Schulze Frielinghaus 
>>> Date: Mon, 8 Feb 2021 10:35:39 +0100
>>> Subject: [PATCH 2/2] IBM Z: Provide rawmemchr{qi,hi,si} expander
>>>
>>> gcc/ChangeLog:
>>>
>>> * config/s390/s390-protos.h (s390_rawmemchrqi): Add prototype.
>>> (s390_rawmemchrhi): Add prototype.
>>> (s390_rawmemchrsi): Add prototype.
>>> * config/s390/s390.c (s390_rawmemchr): New function.
>>> (s390_rawmemchrqi): New function.
>>> (s390_rawmemchrhi): New function.
>>> (s390_rawmemchrsi): New function.
>>> * config/s390/s390.md (rawmemchr): New expander.
>>> (rawmemchr): New expander.
>>> * config/s390/vector.md (vec_vfees): Basically a copy of
>>> the pattern vfees from vx-builtins.md.
>>> * config/s390/vx-builtins.md (*vfees): Remove.
>>
>> Thanks! Would it make sense to also extend the strlen and movstr expanders
>> we have to support the additional character modes?
> 
> For strlen-like loops over non-character arrays the current
> implementation in the loop distribution pass uses rawmemchr and
> computes pointer difference in order to compute the length.  Thus we get
> strlen for free and don't need to reimplement it.

Good to know. Thanks!

...
> Please find a new version attached.  I did another bootstrap+regtest on
> IBM Z.  Ok for mainline?
> 
> Thanks for your detailed review!

Ok for mainline. Thanks!

Andreas


Re: [PATCH v3] IBM Z: Use @PLT symbols for local functions in 64-bit mode

2021-07-16 Thread Andreas Krebbel via Gcc-patches
On 7/12/21 9:23 PM, Ilya Leoshkevich wrote:
> Bootstrapped and regtested on s390x-redhat-linux.  Ok for master?
> 
> v1: https://gcc.gnu.org/pipermail/gcc-patches/2021-June/573614.html
> v1 -> v2: Do not use UNSPEC_PLT in 64-bit code and rename it to
>   UNSPEC_PLT31 (Ulrich, Andreas).  Do not append @PLT only to
>   weak symbols in non-PIC code (Ulrich).  Add TLS tests.
> 
> v2: https://gcc.gnu.org/pipermail/gcc-patches/2021-July/574646.html
> v2 -> v3: Use %K in function_profiler() and s390_output_mi_thunk(),
>   add tests for these cases.
> 
> 
> 
> This helps with generating code for kernel hotpatches, which contain
> individual functions and are loaded more than 2G away from vmlinux.
> This should not create performance regressions for the normal use
> cases, because for local functions ld replaces @PLT calls with direct
> calls.
> 
> gcc/ChangeLog:
> 
>   * config/s390/predicates.md (bras_sym_operand): Accept all
>   functions in 64-bit mode, use UNSPEC_PLT31.
>   (larl_operand): Use UNSPEC_PLT31.
>   * config/s390/s390.c (s390_loadrelative_operand_p): Likewise.
>   (legitimize_pic_address): Likewise.
>   (s390_emit_tls_call_insn): Mark __tls_get_offset as function,
>   use UNSPEC_PLT31.
>   (s390_delegitimize_address): Use UNSPEC_PLT31.
>   (s390_output_addr_const_extra): Likewise.
>   (print_operand): Add @PLT to TLS calls, handle %K.
>   (s390_function_profiler): Mark __fentry__/_mcount as function,
>   use %K, use UNSPEC_PLT31.
>   (s390_output_mi_thunk): Use only UNSPEC_GOT, use %K.
>   (s390_emit_call): Use UNSPEC_PLT31.
>   (s390_emit_tpf_eh_return): Mark __tpf_eh_return as function.
>   * config/s390/s390.md (UNSPEC_PLT31): Rename from UNSPEC_PLT.
>   (*movdi_64): Use %K.
>   (reload_base_64): Likewise.
>   (*sibcall_brc): Likewise.
>   (*sibcall_brcl): Likewise.
>   (*sibcall_value_brc): Likewise.
>   (*sibcall_value_brcl): Likewise.
>   (*bras): Likewise.
>   (*brasl): Likewise.
>   (*bras_r): Likewise.
>   (*brasl_r): Likewise.
>   (*bras_tls): Likewise.
>   (*brasl_tls): Likewise.
>   (main_base_64): Likewise.
>   (reload_base_64): Likewise.
>   (@split_stack_call): Likewise.

Ok. Thanks!

Andreas


[PATCH] Adjust docu of TARGET_VECTORIZE_VEC_PERM_CONST

2021-07-27 Thread Andreas Krebbel via Gcc-patches
There are also memory operands passed for in0 and in1.

Ok for mainline?

gcc/ChangeLog:

* target.def: Describe in0 and in1 as being either register or
memory operands.
* doc/tm.texi: Regenerate.
---
 gcc/doc/tm.texi | 7 ---
 gcc/target.def  | 7 ---
 2 files changed, 8 insertions(+), 6 deletions(-)

diff --git a/gcc/doc/tm.texi b/gcc/doc/tm.texi
index c8f4abe3e41..31f188daf00 100644
--- a/gcc/doc/tm.texi
+++ b/gcc/doc/tm.texi
@@ -6124,9 +6124,10 @@ This hook is used to test whether the target can permute 
up to two
 vectors of mode @var{mode} using the permutation vector @code{sel}, and
 also to emit such a permutation.  In the former case @var{in0}, @var{in1}
 and @var{out} are all null.  In the latter case @var{in0} and @var{in1} are
-the source vectors and @var{out} is the destination vector; all three are
-registers of mode @var{mode}.  @var{in1} is the same as @var{in0} if
-@var{sel} describes a permutation on one vector instead of two.
+the source vectors and @var{out} is the destination vector.  The destination
+vector is a register of mode @var{mode} while the source vectors can be either
+register or memory operands of mode @var{mode}.  @var{in1} is the same as
+@var{in0} if @var{sel} describes a permutation on one vector instead of two.
 
 Return true if the operation is possible, emitting instructions for it
 if rtxes are provided.
diff --git a/gcc/target.def b/gcc/target.def
index 2e40448e6c5..b368d81be63 100644
--- a/gcc/target.def
+++ b/gcc/target.def
@@ -1860,9 +1860,10 @@ DEFHOOK
 vectors of mode @var{mode} using the permutation vector @code{sel}, and\n\
 also to emit such a permutation.  In the former case @var{in0}, @var{in1}\n\
 and @var{out} are all null.  In the latter case @var{in0} and @var{in1} are\n\
-the source vectors and @var{out} is the destination vector; all three are\n\
-registers of mode @var{mode}.  @var{in1} is the same as @var{in0} if\n\
-@var{sel} describes a permutation on one vector instead of two.\n\
+the source vectors and @var{out} is the destination vector.  The destination\n\
+vector is a register of mode @var{mode} while the source vectors can be 
either\n\
+register or memory operands of mode @var{mode}.  @var{in1} is the same as\n\
+@var{in0} if @var{sel} describes a permutation on one vector instead of two.\n\
 \n\
 Return true if the operation is possible, emitting instructions for it\n\
 if rtxes are provided.\n\
-- 
2.31.1



Re: [PATCH] IBM Z: Enable LSan and TSan

2021-07-27 Thread Andreas Krebbel via Gcc-patches
On 7/27/21 10:04 PM, Ilya Leoshkevich via Gcc-patches wrote:
> Bootstrapped and regtested on s390x-redhat-linux.  Ok for master?
> 
> libsanitizer/ChangeLog:
> 
>   * configure.tgt (s390*-*-linux*): Enable LSan and TSan for
>   s390x.

Ok. Thanks!

Andreas


Re: [PATCH] Adjust docu of TARGET_VECTORIZE_VEC_PERM_CONST

2021-07-28 Thread Andreas Krebbel via Gcc-patches
On 7/28/21 9:43 AM, Richard Biener wrote:
> On Wed, Jul 28, 2021 at 8:44 AM Andreas Krebbel via Gcc-patches
>  wrote:
>>
>> There are also memory operands passed for in0 and in1.
>>
>> Ok for mainline?
> 
> They can also be constant vectors, I'd just not specify the operand
> kind - usually
> expanders are not limited as to what they feed down.

Right, I'll just replace "registers" with "operands" then. Ok?

 also to emit such a permutation.  In the former case @var{in0}, @var{in1}\n\
 and @var{out} are all null.  In the latter case @var{in0} and @var{in1} are\n\
 the source vectors and @var{out} is the destination vector; all three are\n\
-registers of mode @var{mode}.  @var{in1} is the same as @var{in0} if\n\
+operands of mode @var{mode}.  @var{in1} is the same as @var{in0} if\n\
 @var{sel} describes a permutation on one vector instead of two.\n\
 \n\
 Return true if the operation is possible, emitting instructions for it\n\

Andreas


Re: [PATCH] IBM Z: Fix 5 tests in 31-bit mode

2021-07-28 Thread Andreas Krebbel via Gcc-patches
On 7/23/21 2:47 PM, Ilya Leoshkevich wrote:
> Bootstrapped and regtested on s390x-redhat-linux.  Ok for master?
> 
> 
> 
> gcc/testsuite/ChangeLog:
> 
>   * gcc.target/s390/global-array-element-pic2.c: Add -mzarch, add
>   an expectation for 31-bit mode.
>   * gcc.target/s390/load-imm64-1.c: Use unsigned long long.
>   * gcc.target/s390/load-imm64-2.c: Likewise.
>   * gcc.target/s390/vector/long-double-vx-macro-off-on.c: Use
>   -mzarch.
>   * gcc.target/s390/vector/long-double-vx-macro-on-off.c:
>   Likewise.

Ok. Thanks!

Andreas


[PATCH 4/5] IBM Z: Implement TARGET_VECTORIZE_VEC_PERM_CONST for vector merge

2021-07-29 Thread Andreas Krebbel via Gcc-patches
This patch implements the TARGET_VECTORIZE_VEC_PERM_CONST in the IBM Z
backend. The initial implementation only exploits the vector merge
instruction but there is more to come.

gcc/ChangeLog:

* config/s390/s390.c (MAX_VECT_LEN): Define macro.
(struct expand_vec_perm_d): Define struct.
(expand_perm_with_merge): New function.
(vectorize_vec_perm_const_1): New function.
(s390_vectorize_vec_perm_const): New function.
(TARGET_VECTORIZE_VEC_PERM_CONST): Define target macro.

gcc/testsuite/ChangeLog:

* gcc.target/s390/vector/perm-merge.c: New test.
* gcc.target/s390/vector/vec-types.h: New test.
---
 gcc/config/s390/s390.c| 108 ++
 .../gcc.target/s390/vector/perm-merge.c   | 104 +
 .../gcc.target/s390/vector/vec-types.h|  35 ++
 3 files changed, 247 insertions(+)
 create mode 100644 gcc/testsuite/gcc.target/s390/vector/perm-merge.c
 create mode 100644 gcc/testsuite/gcc.target/s390/vector/vec-types.h

diff --git a/gcc/config/s390/s390.c b/gcc/config/s390/s390.c
index b1a9ca9d8aa..684241b00b8 100644
--- a/gcc/config/s390/s390.c
+++ b/gcc/config/s390/s390.c
@@ -16928,6 +16928,110 @@ s390_md_asm_adjust (vec &outputs, vec 
&inputs,
   return after_md_seq;
 }
 
+#define MAX_VECT_LEN   16
+
+struct expand_vec_perm_d
+{
+  rtx target, op0, op1;
+  unsigned char perm[MAX_VECT_LEN];
+  machine_mode vmode;
+  unsigned char nelt;
+  bool testing_p;
+};
+
+/* Try to expand the vector permute operation described by D using the
+   vector merge instructions vml and vmh.  Return true if vector merge
+   could be used.  */
+static bool
+expand_perm_with_merge (const struct expand_vec_perm_d &d)
+{
+  bool merge_lo_p = true;
+  bool merge_hi_p = true;
+
+  if (d.nelt % 2)
+return false;
+
+  // For V4SI this checks for: { 0, 4, 1, 5 }
+  for (int telt = 0; telt < d.nelt; telt++)
+if (d.perm[telt] != telt / 2 + (telt % 2) * d.nelt)
+  {
+   merge_hi_p = false;
+   break;
+  }
+
+  if (!merge_hi_p)
+{
+  // For V4SI this checks for: { 2, 6, 3, 7 }
+  for (int telt = 0; telt < d.nelt; telt++)
+   if (d.perm[telt] != (telt + d.nelt) / 2 + (telt % 2) * d.nelt)
+ {
+   merge_lo_p = false;
+   break;
+ }
+}
+  else
+merge_lo_p = false;
+
+  if (d.testing_p)
+return merge_lo_p || merge_hi_p;
+
+  if (merge_lo_p || merge_hi_p)
+s390_expand_merge (d.target, d.op0, d.op1, merge_hi_p);
+
+  return merge_lo_p || merge_hi_p;
+}
+
+/* Try to find the best sequence for the vector permute operation
+   described by D.  Return true if the operation could be
+   expanded.  */
+static bool
+vectorize_vec_perm_const_1 (const struct expand_vec_perm_d &d)
+{
+  if (expand_perm_with_merge (d))
+return true;
+
+  return false;
+}
+
+/* Return true if we can emit instructions for the constant
+   permutation vector in SEL.  If OUTPUT, IN0, IN1 are non-null the
+   hook is supposed to emit the required INSNs.  */
+
+bool
+s390_vectorize_vec_perm_const (machine_mode vmode, rtx target, rtx op0, rtx 
op1,
+  const vec_perm_indices &sel)
+{
+  struct expand_vec_perm_d d;
+  unsigned char perm[MAX_VECT_LEN];
+  unsigned int i, nelt;
+
+  if (!s390_vector_mode_supported_p (vmode) || GET_MODE_SIZE (vmode) != 16)
+return false;
+
+  d.target = target;
+  d.op0 = op0;
+  d.op1 = op1;
+
+  d.vmode = vmode;
+  gcc_assert (VECTOR_MODE_P (d.vmode));
+  d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
+  d.testing_p = target == NULL_RTX;
+
+  gcc_assert (target == NULL_RTX || REG_P (target));
+  gcc_assert (sel.length () == nelt);
+  gcc_checking_assert (sizeof (d.perm) == sizeof (perm));
+
+  for (i = 0; i < nelt; i++)
+{
+  unsigned char e = sel[i];
+  gcc_assert (e < 2 * nelt);
+  d.perm[i] = e;
+  perm[i] = e;
+}
+
+  return vectorize_vec_perm_const_1 (d);
+}
+
 /* Initialize GCC target structure.  */
 
 #undef  TARGET_ASM_ALIGNED_HI_OP
@@ -17238,6 +17342,10 @@ s390_md_asm_adjust (vec &outputs, vec 
&inputs,
 #undef TARGET_MD_ASM_ADJUST
 #define TARGET_MD_ASM_ADJUST s390_md_asm_adjust
 
+#undef TARGET_VECTORIZE_VEC_PERM_CONST
+#define TARGET_VECTORIZE_VEC_PERM_CONST s390_vectorize_vec_perm_const
+
+
 struct gcc_target targetm = TARGET_INITIALIZER;
 
 #include "gt-s390.h"
diff --git a/gcc/testsuite/gcc.target/s390/vector/perm-merge.c 
b/gcc/testsuite/gcc.target/s390/vector/perm-merge.c
new file mode 100644
index 000..51b23ddd886
--- /dev/null
+++ b/gcc/testsuite/gcc.target/s390/vector/perm-merge.c
@@ -0,0 +1,104 @@
+/* { dg-do compile } */
+/* { dg-options "-O3 -mzarch -march=z14 -mzvector --save-temps" } */
+/* { dg-do run { target { s390_z14_hw } } } */
+
+/* { dg-final { scan-assembler-times "\tvmrhb\t" 2 } } */
+/* { dg-final { scan-assembler-times "\tvmrlb\t" 2 } } */
+/* { dg-final { scan-assembler-times "\tvmrhh\t" 2 } } */
+/* { dg-final { scan-assembler-times "\tvmrlh\t" 2 } 

  1   2   >