Bootstrapped and regtested on s390x-redhat-linux. Tested with valgrind too (PR 100278 is now fixed). Ok for master?
v1: https://gcc.gnu.org/pipermail/gcc-patches/2021-April/568771.html v1 -> v2: Use the UNSPEC pattern, which is less efficient, but is more on the "obviously correct" side than gen_raw_SUBREG(). gen_fprx2_to_tf() and gen_tf_to_fprx2() cannot handle hard registers, since the subregs they create do not pass validation. Change s390_md_asm_adjust() to manually copy between hard VRs and FPRs instead of using these two functions. gcc/ChangeLog: PR target/100217 * config/s390/s390.c (s390_hard_fp_reg_p): New function. (s390_md_asm_adjust): Handle hard registers. gcc/testsuite/ChangeLog: PR target/100217 * gcc.target/s390/vector/long-double-asm-in-out-hard-fp-reg.c: New test. * gcc.target/s390/vector/long-double-asm-inout-hard-fp-reg.c: New test. --- gcc/config/s390/s390.c | 52 +++++++++++++++++-- .../long-double-asm-in-out-hard-fp-reg.c | 33 ++++++++++++ .../long-double-asm-inout-hard-fp-reg.c | 31 +++++++++++ 3 files changed, 112 insertions(+), 4 deletions(-) create mode 100644 gcc/testsuite/gcc.target/s390/vector/long-double-asm-in-out-hard-fp-reg.c create mode 100644 gcc/testsuite/gcc.target/s390/vector/long-double-asm-inout-hard-fp-reg.c diff --git a/gcc/config/s390/s390.c b/gcc/config/s390/s390.c index a9c945c5ee9..88361f98c7e 100644 --- a/gcc/config/s390/s390.c +++ b/gcc/config/s390/s390.c @@ -16754,6 +16754,23 @@ f_constraint_p (const char *constraint) return seen_f_p && !seen_v_p; } +/* Return TRUE iff X is a hard floating-point (and not a vector) register. */ + +static bool +s390_hard_fp_reg_p (rtx x) +{ + if (!(REG_P (x) && HARD_REGISTER_P (x) && REG_ATTRS (x))) + return false; + + tree decl = REG_EXPR (x); + if (!(HAS_DECL_ASSEMBLER_NAME_P (decl) && DECL_ASSEMBLER_NAME_SET_P (decl))) + return false; + + const char *name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl)); + + return name[0] == '*' && name[1] == 'f'; +} + /* Implement TARGET_MD_ASM_ADJUST hook in order to fix up "f" constraints when long doubles are stored in vector registers. */ @@ -16787,9 +16804,24 @@ s390_md_asm_adjust (vec<rtx> &outputs, vec<rtx> &inputs, gcc_assert (allows_reg); gcc_assert (!is_inout); /* Copy output value from a FPR pair into a vector register. */ - rtx fprx2 = gen_reg_rtx (FPRX2mode); + rtx fprx2; push_to_sequence2 (after_md_seq, after_md_end); - emit_insn (gen_fprx2_to_tf (outputs[i], fprx2)); + if (s390_hard_fp_reg_p (outputs[i])) + { + fprx2 = gen_rtx_REG (FPRX2mode, REGNO (outputs[i])); + /* The first half is already at the correct location, copy only the + * second one. Use the UNSPEC pattern instead of the SUBREG one, + * since s390_can_change_mode_class() rejects + * (subreg:DF (reg:TF %fN) 8) and thus subreg validation fails. */ + rtx v1 = gen_rtx_REG (V2DFmode, REGNO (outputs[i])); + rtx v3 = gen_rtx_REG (V2DFmode, REGNO (outputs[i]) + 1); + emit_insn (gen_vec_permiv2df (v1, v1, v3, const0_rtx)); + } + else + { + fprx2 = gen_reg_rtx (FPRX2mode); + emit_insn (gen_fprx2_to_tf (outputs[i], fprx2)); + } after_md_seq = get_insns (); after_md_end = get_last_insn (); end_sequence (); @@ -16813,8 +16845,20 @@ s390_md_asm_adjust (vec<rtx> &outputs, vec<rtx> &inputs, continue; gcc_assert (allows_reg); /* Copy input value from a vector register into a FPR pair. */ - rtx fprx2 = gen_reg_rtx (FPRX2mode); - emit_insn (gen_tf_to_fprx2 (fprx2, inputs[i])); + rtx fprx2; + if (s390_hard_fp_reg_p (inputs[i])) + { + fprx2 = gen_rtx_REG (FPRX2mode, REGNO (inputs[i])); + /* Copy only the second half. */ + rtx v1 = gen_rtx_REG (V2DFmode, REGNO (inputs[i]) + 1); + rtx v2 = gen_rtx_REG (V2DFmode, REGNO (inputs[i])); + emit_insn (gen_vec_permiv2df (v1, v2, v1, GEN_INT (3))); + } + else + { + fprx2 = gen_reg_rtx (FPRX2mode); + emit_insn (gen_tf_to_fprx2 (fprx2, inputs[i])); + } inputs[i] = fprx2; input_modes[i] = FPRX2mode; } diff --git a/gcc/testsuite/gcc.target/s390/vector/long-double-asm-in-out-hard-fp-reg.c b/gcc/testsuite/gcc.target/s390/vector/long-double-asm-in-out-hard-fp-reg.c new file mode 100644 index 00000000000..2dcaf08f00b --- /dev/null +++ b/gcc/testsuite/gcc.target/s390/vector/long-double-asm-in-out-hard-fp-reg.c @@ -0,0 +1,33 @@ +/* { dg-do compile } */ +/* { dg-options "-O3 -march=z14 -mzarch --save-temps" } */ +/* { dg-do run { target { s390_z14_hw } } } */ +#include <assert.h> +#include <stdint.h> + +__attribute__ ((noipa)) static long double +sqxbr (long double x) +{ + register long double in asm("f0") = x; + register long double out asm("f1"); + + asm("sqxbr\t%0,%1" : "=f"(out) : "f"(in)); + asm("# %0" : "+f"(out)); + + return out; +} + +/* Ideally `vpdi %v3,%v1,%v3,5` should be optimized away, but the compiler + * can't do it, because the UNSPEC pattern operates on the whole register. + * Using the SUBREG pattern solves this problem, but it's fragile. + */ +/* { dg-final { scan-assembler-times {\n\tvpdi\t%v2,%v0,%v2,5\n} 1 } } */ +/* { dg-final { scan-assembler-times {\n\tvpdi\t%v1,%v1,%v3,0\n} 2 } } */ +/* { dg-final { scan-assembler-times {\n\tvpdi\t%v3,%v1,%v3,5\n} 1 } } */ + +int +main (void) +{ + long double x = 0x1.0000000000001p+0L, + exp = 1.00000000000000011102230246251564788e+0L; + assert (sqxbr (x) == exp); +} diff --git a/gcc/testsuite/gcc.target/s390/vector/long-double-asm-inout-hard-fp-reg.c b/gcc/testsuite/gcc.target/s390/vector/long-double-asm-inout-hard-fp-reg.c new file mode 100644 index 00000000000..6c5f88d8652 --- /dev/null +++ b/gcc/testsuite/gcc.target/s390/vector/long-double-asm-inout-hard-fp-reg.c @@ -0,0 +1,31 @@ +/* { dg-do compile } */ +/* { dg-options "-O3 -march=z14 -mzarch --save-temps" } */ +/* { dg-do run { target { s390_z14_hw } } } */ +#include <assert.h> +#include <stdint.h> + +__attribute__ ((noipa)) static long double +sqxbr (long double x) +{ + register long double inout asm("f4") = x; + + asm("sqxbr\t%0,%0" : "+f"(inout)); + asm("# %0" : "+f"(inout)); + + return inout; +} + +/* Ideally there should be just one `vpdi %v6,%v4,%v6,5`, but the compiler + * can't optimize it away, because the UNSPEC pattern operates on the whole + * register. Using the SUBREG pattern solves this problem, but it's fragile. + */ +/* { dg-final { scan-assembler-times {\n\tvpdi\t%v6,%v4,%v6,5\n} 2 } } */ +/* { dg-final { scan-assembler-times {\n\tvpdi\t%v4,%v4,%v6,0\n} 2 } } */ + +int +main (void) +{ + long double x = 0x1.0000000000001p+0L, + exp = 1.00000000000000011102230246251564788e+0L; + assert (sqxbr (x) == exp); +} -- 2.29.2