https://gcc.gnu.org/g:0f0b91ef70218e2cb4ab795ef04725a68ea04b15
commit r15-7049-g0f0b91ef70218e2cb4ab795ef04725a68ea04b15 Author: Stefan Schulze Frielinghaus <stefa...@gcc.gnu.org> Date: Mon Jan 20 10:01:09 2025 +0100 s390: arch15: Count leading/trailing zeros Add vector single element 128-bit integer support utilizing new instructions vclzq and vctzq. Furthermore, add scalar 64-bit integer support utilizing new instructions clzg and ctzg. For ctzg, also define the resulting value if the input operand equals zero. gcc/ChangeLog: * config/s390/s390-builtins.def (s390_vec_cntlz): Add 128-bit integer overloads. (s390_vclzq): Add. (s390_vec_cnttz): Add 128-bit integer overloads. (s390_vctzq): Add. * config/s390/s390-builtin-types.def: Update accordingly. * config/s390/s390.h (CTZ_DEFINED_VALUE_AT_ZERO): Define. * config/s390/s390.md (*clzg): New insn. (clztidi2): Exploit new insn for target arch15. (ctzdi2): New insn. * config/s390/vector.md (clz<mode>2): Extend modes including 128-bit integer. (ctz<mode>2): Likewise. Diff: --- gcc/config/s390/s390-builtin-types.def | 1 + gcc/config/s390/s390-builtins.def | 10 +++++++-- gcc/config/s390/s390.h | 3 +++ gcc/config/s390/s390.md | 40 ++++++++++++++++++++++++++-------- gcc/config/s390/vector.md | 15 ++++++++----- 5 files changed, 52 insertions(+), 17 deletions(-) diff --git a/gcc/config/s390/s390-builtin-types.def b/gcc/config/s390/s390-builtin-types.def index f05618393092..6f903deb7450 100644 --- a/gcc/config/s390/s390-builtin-types.def +++ b/gcc/config/s390/s390-builtin-types.def @@ -610,6 +610,7 @@ DEF_OV_TYPE (BT_OV_UV1TI_UV2DI, BT_UV1TI, BT_UV2DI) DEF_OV_TYPE (BT_OV_UV1TI_UV2DI_UV2DI, BT_UV1TI, BT_UV2DI, BT_UV2DI) DEF_OV_TYPE (BT_OV_UV1TI_UV2DI_UV2DI_UV1TI, BT_UV1TI, BT_UV2DI, BT_UV2DI, BT_UV1TI) DEF_OV_TYPE (BT_OV_UV1TI_UV4SI_UV4SI, BT_UV1TI, BT_UV4SI, BT_UV4SI) +DEF_OV_TYPE (BT_OV_UV1TI_V1TI, BT_UV1TI, BT_V1TI) DEF_OV_TYPE (BT_OV_UV2DI_BV2DI_UV2DI, BT_UV2DI, BT_BV2DI, BT_UV2DI) DEF_OV_TYPE (BT_OV_UV2DI_LONG_ULONGLONGCONSTPTR, BT_UV2DI, BT_LONG, BT_ULONGLONGCONSTPTR) DEF_OV_TYPE (BT_OV_UV2DI_ULONGLONG, BT_UV2DI, BT_ULONGLONG) diff --git a/gcc/config/s390/s390-builtins.def b/gcc/config/s390/s390-builtins.def index 2cf443f6cdba..8eb07e6c79d3 100644 --- a/gcc/config/s390/s390-builtins.def +++ b/gcc/config/s390/s390-builtins.def @@ -1639,7 +1639,7 @@ B_DEF (vec_cmpltuv2di, vec_cmpltuv2di, 0, B_DEF (vec_cmpltv4sf, vec_cmpltv4sf_quiet_nocc,0, B_INT | B_VXE, 0, BT_FN_V4SI_V4SF_V4SF) B_DEF (vec_cmpltv2df, vec_cmpltv2df_quiet_nocc,0, B_INT | B_VX, 0, BT_FN_V2DI_V2DF_V2DF) -OB_DEF (s390_vec_cntlz, s390_vec_cntlz_s8, s390_vec_cntlz_u64, B_VX, BT_FN_OV4SI_OV4SI) +OB_DEF (s390_vec_cntlz, s390_vec_cntlz_s8, s390_vec_cntlz_u128,B_VX, BT_FN_OV4SI_OV4SI) OB_DEF_VAR (s390_vec_cntlz_s8, s390_vclzb, 0, 0, BT_OV_UV16QI_V16QI) OB_DEF_VAR (s390_vec_cntlz_u8, s390_vclzb, 0, 0, BT_OV_UV16QI_UV16QI) OB_DEF_VAR (s390_vec_cntlz_s16, s390_vclzh, 0, 0, BT_OV_UV8HI_V8HI) @@ -1648,13 +1648,16 @@ OB_DEF_VAR (s390_vec_cntlz_s32, s390_vclzf, 0, OB_DEF_VAR (s390_vec_cntlz_u32, s390_vclzf, 0, 0, BT_OV_UV4SI_UV4SI) OB_DEF_VAR (s390_vec_cntlz_s64, s390_vclzg, 0, 0, BT_OV_UV2DI_V2DI) OB_DEF_VAR (s390_vec_cntlz_u64, s390_vclzg, 0, 0, BT_OV_UV2DI_UV2DI) +OB_DEF_VAR (s390_vec_cntlz_s128, s390_vclzq, B_VXE3, 0, BT_OV_UV1TI_V1TI) +OB_DEF_VAR (s390_vec_cntlz_u128, s390_vclzq, B_VXE3, 0, BT_OV_UV1TI_UV1TI) B_DEF (s390_vclzb, clzv16qi2, 0, B_VX, 0, BT_FN_UV16QI_UV16QI) B_DEF (s390_vclzh, clzv8hi2, 0, B_VX, 0, BT_FN_UV8HI_UV8HI) B_DEF (s390_vclzf, clzv4si2, 0, B_VX, 0, BT_FN_UV4SI_UV4SI) B_DEF (s390_vclzg, clzv2di2, 0, B_VX, 0, BT_FN_UV2DI_UV2DI) +B_DEF (s390_vclzq, clzti2, 0, B_VXE3, 0, BT_FN_UINT128_UINT128) -OB_DEF (s390_vec_cnttz, s390_vec_cnttz_s8, s390_vec_cnttz_u64, B_VX, BT_FN_OV4SI_OV4SI) +OB_DEF (s390_vec_cnttz, s390_vec_cnttz_s8, s390_vec_cnttz_u128,B_VX, BT_FN_OV4SI_OV4SI) OB_DEF_VAR (s390_vec_cnttz_s8, s390_vctzb, 0, 0, BT_OV_UV16QI_V16QI) OB_DEF_VAR (s390_vec_cnttz_u8, s390_vctzb, 0, 0, BT_OV_UV16QI_UV16QI) OB_DEF_VAR (s390_vec_cnttz_s16, s390_vctzh, 0, 0, BT_OV_UV8HI_V8HI) @@ -1663,11 +1666,14 @@ OB_DEF_VAR (s390_vec_cnttz_s32, s390_vctzf, 0, OB_DEF_VAR (s390_vec_cnttz_u32, s390_vctzf, 0, 0, BT_OV_UV4SI_UV4SI) OB_DEF_VAR (s390_vec_cnttz_s64, s390_vctzg, 0, 0, BT_OV_UV2DI_V2DI) OB_DEF_VAR (s390_vec_cnttz_u64, s390_vctzg, 0, 0, BT_OV_UV2DI_UV2DI) +OB_DEF_VAR (s390_vec_cnttz_s128, s390_vctzq, B_VXE3, 0, BT_OV_UV1TI_V1TI) +OB_DEF_VAR (s390_vec_cnttz_u128, s390_vctzq, B_VXE3, 0, BT_OV_UV1TI_UV1TI) B_DEF (s390_vctzb, ctzv16qi2, 0, B_VX, 0, BT_FN_UV16QI_UV16QI) B_DEF (s390_vctzh, ctzv8hi2, 0, B_VX, 0, BT_FN_UV8HI_UV8HI) B_DEF (s390_vctzf, ctzv4si2, 0, B_VX, 0, BT_FN_UV4SI_UV4SI) B_DEF (s390_vctzg, ctzv2di2, 0, B_VX, 0, BT_FN_UV2DI_UV2DI) +B_DEF (s390_vctzq, ctzti2, 0, B_VXE3, 0, BT_FN_UINT128_UINT128) OB_DEF (s390_vec_xor, s390_vec_xor_b8, s390_vec_xor_dbl_c, B_VX, BT_FN_OV4SI_OV4SI_OV4SI) OB_DEF_VAR (s390_vec_xor_b8, s390_vx, 0, 0, BT_OV_BV16QI_BV16QI_BV16QI) diff --git a/gcc/config/s390/s390.h b/gcc/config/s390/s390.h index 2f5b95fa2b84..957877b6a389 100644 --- a/gcc/config/s390/s390.h +++ b/gcc/config/s390/s390.h @@ -1034,6 +1034,9 @@ do { \ /* Specify the value which is used when clz operand is zero. */ #define CLZ_DEFINED_VALUE_AT_ZERO(MODE, VALUE) ((VALUE) = 64, 1) +/* Specify the value which is used when ctz operand is zero. */ +#define CTZ_DEFINED_VALUE_AT_ZERO(MODE, VALUE) ((VALUE) = 64, 1) + /* Machine-specific symbol_ref flags. */ #define SYMBOL_FLAG_ALIGN_SHIFT SYMBOL_FLAG_MACH_DEP_SHIFT #define SYMBOL_FLAG_ALIGN_MASK \ diff --git a/gcc/config/s390/s390.md b/gcc/config/s390/s390.md index 7b5b9709f56e..03bd85e1398d 100644 --- a/gcc/config/s390/s390.md +++ b/gcc/config/s390/s390.md @@ -9479,21 +9479,31 @@ (clz:DI (match_operand:DI 1 "register_operand" "d")))] "TARGET_EXTIMM && TARGET_ZARCH" { - rtx_insn *insn; - rtx clz_equal; - rtx wide_reg = gen_reg_rtx (TImode); - rtx msb = gen_rtx_CONST_INT (DImode, HOST_WIDE_INT_1U << 63); + if (!(TARGET_ARCH15 && TARGET_64BIT)) + { + rtx_insn *insn; + rtx clz_equal; + rtx wide_reg = gen_reg_rtx (TImode); + rtx msb = gen_rtx_CONST_INT (DImode, HOST_WIDE_INT_1U << 63); - clz_equal = gen_rtx_CLZ (DImode, operands[1]); + clz_equal = gen_rtx_CLZ (DImode, operands[1]); - emit_insn (gen_clztidi2 (wide_reg, operands[1], msb)); + emit_insn (gen_clztidi2 (wide_reg, operands[1], msb)); - insn = emit_move_insn (operands[0], gen_highpart (DImode, wide_reg)); - set_unique_reg_note (insn, REG_EQUAL, clz_equal); + insn = emit_move_insn (operands[0], gen_highpart (DImode, wide_reg)); + set_unique_reg_note (insn, REG_EQUAL, clz_equal); - DONE; + DONE; + } }) +(define_insn "*clzg" + [(set (match_operand:DI 0 "register_operand" "=d") + (clz:DI (match_operand:DI 1 "register_operand" "d")))] + "TARGET_ARCH15 && TARGET_64BIT" + "clzg\t%0,%1" + [(set_attr "op_type" "RRE")]) + ; CLZ result is in hard reg op0 - this is the high part of the target operand ; The source with the left-most one bit cleared is in hard reg op0 + 1 - the low part (define_insn "clztidi2" @@ -9512,6 +9522,18 @@ [(set_attr "op_type" "RRE")]) +;; +;; Count Trailing Zeros. +;; + +(define_insn "ctzdi2" + [(set (match_operand:DI 0 "register_operand" "=d") + (ctz:DI (match_operand:DI 1 "register_operand" "d")))] + "TARGET_ARCH15 && TARGET_64BIT" + "ctzg\t%0,%1" + [(set_attr "op_type" "RRE")]) + + ;; ;;- Rotate instructions. ;; diff --git a/gcc/config/s390/vector.md b/gcc/config/s390/vector.md index 06641bfcc7bf..2e7419c45c38 100644 --- a/gcc/config/s390/vector.md +++ b/gcc/config/s390/vector.md @@ -25,6 +25,9 @@ (define_mode_iterator VT [V1QI V2QI V4QI V8QI V16QI V1HI V2HI V4HI V8HI V1SI V2SI V4SI V1DI V2DI V1SF V2SF V4SF V1DF V2DF V1TF V1TI TI]) +(define_mode_iterator VT_VXE3 + [V1QI V2QI V4QI V8QI V16QI V1HI V2HI V4HI V8HI V1SI V2SI V4SI V1DI V2DI V1SF + V2SF V4SF V1DF V2DF V1TF (V1TI "TARGET_VXE3") (TI "TARGET_VXE3")]) ; All modes directly supported by the hardware having full vector reg size (define_mode_iterator V_HW [V16QI V8HI V4SI V2DI V1TI TI V2DF @@ -1369,19 +1372,19 @@ }) ; Count leading zeros -; vclzb, vclzh, vclzf, vclzg +; vclzb, vclzh, vclzf, vclzg, vclzq (define_insn "clz<mode>2" - [(set (match_operand:V 0 "register_operand" "=v") - (clz:V (match_operand:V 1 "register_operand" "v")))] + [(set (match_operand:VT_VXE3 0 "register_operand" "=v") + (clz:VT_VXE3 (match_operand:VT_VXE3 1 "register_operand" "v")))] "TARGET_VX" "vclz<bhfgq>\t%v0,%v1" [(set_attr "op_type" "VRR")]) ; Count trailing zeros -; vctzb, vctzh, vctzf, vctzg +; vctzb, vctzh, vctzf, vctzg, vctzq (define_insn "ctz<mode>2" - [(set (match_operand:V 0 "register_operand" "=v") - (ctz:V (match_operand:V 1 "register_operand" "v")))] + [(set (match_operand:VT_VXE3 0 "register_operand" "=v") + (ctz:VT_VXE3 (match_operand:VT_VXE3 1 "register_operand" "v")))] "TARGET_VX" "vctz<bhfgq>\t%v0,%v1" [(set_attr "op_type" "VRR")])