https://gcc.gnu.org/g:0f0b91ef70218e2cb4ab795ef04725a68ea04b15

commit r15-7049-g0f0b91ef70218e2cb4ab795ef04725a68ea04b15
Author: Stefan Schulze Frielinghaus <stefa...@gcc.gnu.org>
Date:   Mon Jan 20 10:01:09 2025 +0100

    s390: arch15: Count leading/trailing zeros
    
    Add vector single element 128-bit integer support utilizing new
    instructions vclzq and vctzq.  Furthermore, add scalar 64-bit integer
    support utilizing new instructions clzg and ctzg.  For ctzg, also define
    the resulting value if the input operand equals zero.
    
    gcc/ChangeLog:
    
            * config/s390/s390-builtins.def (s390_vec_cntlz): Add 128-bit
            integer overloads.
            (s390_vclzq): Add.
            (s390_vec_cnttz): Add 128-bit integer overloads.
            (s390_vctzq): Add.
            * config/s390/s390-builtin-types.def: Update accordingly.
            * config/s390/s390.h (CTZ_DEFINED_VALUE_AT_ZERO): Define.
            * config/s390/s390.md (*clzg): New insn.
            (clztidi2): Exploit new insn for target arch15.
            (ctzdi2): New insn.
            * config/s390/vector.md (clz<mode>2): Extend modes including
            128-bit integer.
            (ctz<mode>2): Likewise.

Diff:
---
 gcc/config/s390/s390-builtin-types.def |  1 +
 gcc/config/s390/s390-builtins.def      | 10 +++++++--
 gcc/config/s390/s390.h                 |  3 +++
 gcc/config/s390/s390.md                | 40 ++++++++++++++++++++++++++--------
 gcc/config/s390/vector.md              | 15 ++++++++-----
 5 files changed, 52 insertions(+), 17 deletions(-)

diff --git a/gcc/config/s390/s390-builtin-types.def 
b/gcc/config/s390/s390-builtin-types.def
index f05618393092..6f903deb7450 100644
--- a/gcc/config/s390/s390-builtin-types.def
+++ b/gcc/config/s390/s390-builtin-types.def
@@ -610,6 +610,7 @@ DEF_OV_TYPE (BT_OV_UV1TI_UV2DI, BT_UV1TI, BT_UV2DI)
 DEF_OV_TYPE (BT_OV_UV1TI_UV2DI_UV2DI, BT_UV1TI, BT_UV2DI, BT_UV2DI)
 DEF_OV_TYPE (BT_OV_UV1TI_UV2DI_UV2DI_UV1TI, BT_UV1TI, BT_UV2DI, BT_UV2DI, 
BT_UV1TI)
 DEF_OV_TYPE (BT_OV_UV1TI_UV4SI_UV4SI, BT_UV1TI, BT_UV4SI, BT_UV4SI)
+DEF_OV_TYPE (BT_OV_UV1TI_V1TI, BT_UV1TI, BT_V1TI)
 DEF_OV_TYPE (BT_OV_UV2DI_BV2DI_UV2DI, BT_UV2DI, BT_BV2DI, BT_UV2DI)
 DEF_OV_TYPE (BT_OV_UV2DI_LONG_ULONGLONGCONSTPTR, BT_UV2DI, BT_LONG, 
BT_ULONGLONGCONSTPTR)
 DEF_OV_TYPE (BT_OV_UV2DI_ULONGLONG, BT_UV2DI, BT_ULONGLONG)
diff --git a/gcc/config/s390/s390-builtins.def 
b/gcc/config/s390/s390-builtins.def
index 2cf443f6cdba..8eb07e6c79d3 100644
--- a/gcc/config/s390/s390-builtins.def
+++ b/gcc/config/s390/s390-builtins.def
@@ -1639,7 +1639,7 @@ B_DEF      (vec_cmpltuv2di,             vec_cmpltuv2di,   
  0,
 B_DEF      (vec_cmpltv4sf,              vec_cmpltv4sf_quiet_nocc,0,            
 B_INT | B_VXE,      0,                  BT_FN_V4SI_V4SF_V4SF)
 B_DEF      (vec_cmpltv2df,              vec_cmpltv2df_quiet_nocc,0,            
 B_INT | B_VX,       0,                  BT_FN_V2DI_V2DF_V2DF)
 
-OB_DEF     (s390_vec_cntlz,             s390_vec_cntlz_s8,  
s390_vec_cntlz_u64, B_VX,               BT_FN_OV4SI_OV4SI)
+OB_DEF     (s390_vec_cntlz,             s390_vec_cntlz_s8,  
s390_vec_cntlz_u128,B_VX,               BT_FN_OV4SI_OV4SI)
 OB_DEF_VAR (s390_vec_cntlz_s8,          s390_vclzb,         0,                 
 0,                  BT_OV_UV16QI_V16QI)
 OB_DEF_VAR (s390_vec_cntlz_u8,          s390_vclzb,         0,                 
 0,                  BT_OV_UV16QI_UV16QI)
 OB_DEF_VAR (s390_vec_cntlz_s16,         s390_vclzh,         0,                 
 0,                  BT_OV_UV8HI_V8HI)
@@ -1648,13 +1648,16 @@ OB_DEF_VAR (s390_vec_cntlz_s32,         s390_vclzf,     
    0,
 OB_DEF_VAR (s390_vec_cntlz_u32,         s390_vclzf,         0,                 
 0,                  BT_OV_UV4SI_UV4SI)
 OB_DEF_VAR (s390_vec_cntlz_s64,         s390_vclzg,         0,                 
 0,                  BT_OV_UV2DI_V2DI)
 OB_DEF_VAR (s390_vec_cntlz_u64,         s390_vclzg,         0,                 
 0,                  BT_OV_UV2DI_UV2DI)
+OB_DEF_VAR (s390_vec_cntlz_s128,        s390_vclzq,         B_VXE3,            
 0,                  BT_OV_UV1TI_V1TI)
+OB_DEF_VAR (s390_vec_cntlz_u128,        s390_vclzq,         B_VXE3,            
 0,                  BT_OV_UV1TI_UV1TI)
 
 B_DEF      (s390_vclzb,                 clzv16qi2,          0,                 
 B_VX,               0,                  BT_FN_UV16QI_UV16QI)
 B_DEF      (s390_vclzh,                 clzv8hi2,           0,                 
 B_VX,               0,                  BT_FN_UV8HI_UV8HI)
 B_DEF      (s390_vclzf,                 clzv4si2,           0,                 
 B_VX,               0,                  BT_FN_UV4SI_UV4SI)
 B_DEF      (s390_vclzg,                 clzv2di2,           0,                 
 B_VX,               0,                  BT_FN_UV2DI_UV2DI)
+B_DEF      (s390_vclzq,                 clzti2,             0,                 
 B_VXE3,             0,                  BT_FN_UINT128_UINT128)
 
-OB_DEF     (s390_vec_cnttz,             s390_vec_cnttz_s8,  
s390_vec_cnttz_u64, B_VX,               BT_FN_OV4SI_OV4SI)
+OB_DEF     (s390_vec_cnttz,             s390_vec_cnttz_s8,  
s390_vec_cnttz_u128,B_VX,               BT_FN_OV4SI_OV4SI)
 OB_DEF_VAR (s390_vec_cnttz_s8,          s390_vctzb,         0,                 
 0,                  BT_OV_UV16QI_V16QI)
 OB_DEF_VAR (s390_vec_cnttz_u8,          s390_vctzb,         0,                 
 0,                  BT_OV_UV16QI_UV16QI)
 OB_DEF_VAR (s390_vec_cnttz_s16,         s390_vctzh,         0,                 
 0,                  BT_OV_UV8HI_V8HI)
@@ -1663,11 +1666,14 @@ OB_DEF_VAR (s390_vec_cnttz_s32,         s390_vctzf,     
    0,
 OB_DEF_VAR (s390_vec_cnttz_u32,         s390_vctzf,         0,                 
 0,                  BT_OV_UV4SI_UV4SI)
 OB_DEF_VAR (s390_vec_cnttz_s64,         s390_vctzg,         0,                 
 0,                  BT_OV_UV2DI_V2DI)
 OB_DEF_VAR (s390_vec_cnttz_u64,         s390_vctzg,         0,                 
 0,                  BT_OV_UV2DI_UV2DI)
+OB_DEF_VAR (s390_vec_cnttz_s128,        s390_vctzq,         B_VXE3,            
 0,                  BT_OV_UV1TI_V1TI)
+OB_DEF_VAR (s390_vec_cnttz_u128,        s390_vctzq,         B_VXE3,            
 0,                  BT_OV_UV1TI_UV1TI)
 
 B_DEF      (s390_vctzb,                 ctzv16qi2,          0,                 
 B_VX,               0,                  BT_FN_UV16QI_UV16QI)
 B_DEF      (s390_vctzh,                 ctzv8hi2,           0,                 
 B_VX,               0,                  BT_FN_UV8HI_UV8HI)
 B_DEF      (s390_vctzf,                 ctzv4si2,           0,                 
 B_VX,               0,                  BT_FN_UV4SI_UV4SI)
 B_DEF      (s390_vctzg,                 ctzv2di2,           0,                 
 B_VX,               0,                  BT_FN_UV2DI_UV2DI)
+B_DEF      (s390_vctzq,                 ctzti2,             0,                 
 B_VXE3,             0,                  BT_FN_UINT128_UINT128)
 
 OB_DEF     (s390_vec_xor,               s390_vec_xor_b8,    
s390_vec_xor_dbl_c, B_VX,               BT_FN_OV4SI_OV4SI_OV4SI)
 OB_DEF_VAR (s390_vec_xor_b8,            s390_vx,            0,                 
 0,                  BT_OV_BV16QI_BV16QI_BV16QI)
diff --git a/gcc/config/s390/s390.h b/gcc/config/s390/s390.h
index 2f5b95fa2b84..957877b6a389 100644
--- a/gcc/config/s390/s390.h
+++ b/gcc/config/s390/s390.h
@@ -1034,6 +1034,9 @@ do {                                                      
                \
 /* Specify the value which is used when clz operand is zero.  */
 #define CLZ_DEFINED_VALUE_AT_ZERO(MODE, VALUE) ((VALUE) = 64, 1)
 
+/* Specify the value which is used when ctz operand is zero.  */
+#define CTZ_DEFINED_VALUE_AT_ZERO(MODE, VALUE) ((VALUE) = 64, 1)
+
 /* Machine-specific symbol_ref flags.  */
 #define SYMBOL_FLAG_ALIGN_SHIFT          SYMBOL_FLAG_MACH_DEP_SHIFT
 #define SYMBOL_FLAG_ALIGN_MASK    \
diff --git a/gcc/config/s390/s390.md b/gcc/config/s390/s390.md
index 7b5b9709f56e..03bd85e1398d 100644
--- a/gcc/config/s390/s390.md
+++ b/gcc/config/s390/s390.md
@@ -9479,21 +9479,31 @@
        (clz:DI (match_operand:DI 1 "register_operand" "d")))]
   "TARGET_EXTIMM && TARGET_ZARCH"
 {
-  rtx_insn *insn;
-  rtx clz_equal;
-  rtx wide_reg = gen_reg_rtx (TImode);
-  rtx msb = gen_rtx_CONST_INT (DImode, HOST_WIDE_INT_1U << 63);
+  if (!(TARGET_ARCH15 && TARGET_64BIT))
+    {
+      rtx_insn *insn;
+      rtx clz_equal;
+      rtx wide_reg = gen_reg_rtx (TImode);
+      rtx msb = gen_rtx_CONST_INT (DImode, HOST_WIDE_INT_1U << 63);
 
-  clz_equal = gen_rtx_CLZ (DImode, operands[1]);
+      clz_equal = gen_rtx_CLZ (DImode, operands[1]);
 
-  emit_insn (gen_clztidi2 (wide_reg, operands[1], msb));
+      emit_insn (gen_clztidi2 (wide_reg, operands[1], msb));
 
-  insn = emit_move_insn (operands[0], gen_highpart (DImode, wide_reg));
-  set_unique_reg_note (insn, REG_EQUAL, clz_equal);
+      insn = emit_move_insn (operands[0], gen_highpart (DImode, wide_reg));
+      set_unique_reg_note (insn, REG_EQUAL, clz_equal);
 
-  DONE;
+      DONE;
+    }
 })
 
+(define_insn "*clzg"
+  [(set (match_operand:DI 0 "register_operand" "=d")
+       (clz:DI (match_operand:DI 1 "register_operand" "d")))]
+  "TARGET_ARCH15 && TARGET_64BIT"
+  "clzg\t%0,%1"
+  [(set_attr "op_type" "RRE")])
+
 ; CLZ result is in hard reg op0 - this is the high part of the target operand
 ; The source with the left-most one bit cleared is in hard reg op0 + 1 - the 
low part
 (define_insn "clztidi2"
@@ -9512,6 +9522,18 @@
   [(set_attr "op_type"  "RRE")])
 
 
+;;
+;; Count Trailing Zeros.
+;;
+
+(define_insn "ctzdi2"
+  [(set (match_operand:DI 0 "register_operand" "=d")
+       (ctz:DI (match_operand:DI 1 "register_operand" "d")))]
+  "TARGET_ARCH15 && TARGET_64BIT"
+  "ctzg\t%0,%1"
+  [(set_attr "op_type" "RRE")])
+
+
 ;;
 ;;- Rotate instructions.
 ;;
diff --git a/gcc/config/s390/vector.md b/gcc/config/s390/vector.md
index 06641bfcc7bf..2e7419c45c38 100644
--- a/gcc/config/s390/vector.md
+++ b/gcc/config/s390/vector.md
@@ -25,6 +25,9 @@
 (define_mode_iterator VT
   [V1QI V2QI V4QI V8QI V16QI V1HI V2HI V4HI V8HI V1SI V2SI V4SI V1DI V2DI V1SF
    V2SF V4SF V1DF V2DF V1TF V1TI TI])
+(define_mode_iterator VT_VXE3
+  [V1QI V2QI V4QI V8QI V16QI V1HI V2HI V4HI V8HI V1SI V2SI V4SI V1DI V2DI V1SF
+   V2SF V4SF V1DF V2DF V1TF (V1TI "TARGET_VXE3") (TI "TARGET_VXE3")])
 
 ; All modes directly supported by the hardware having full vector reg size
 (define_mode_iterator V_HW  [V16QI V8HI V4SI V2DI V1TI TI V2DF
@@ -1369,19 +1372,19 @@
 })
 
 ; Count leading zeros
-; vclzb, vclzh, vclzf, vclzg
+; vclzb, vclzh, vclzf, vclzg, vclzq
 (define_insn "clz<mode>2"
-  [(set (match_operand:V        0 "register_operand" "=v")
-       (clz:V (match_operand:V 1 "register_operand"  "v")))]
+  [(set (match_operand:VT_VXE3              0 "register_operand" "=v")
+       (clz:VT_VXE3 (match_operand:VT_VXE3 1 "register_operand"  "v")))]
   "TARGET_VX"
   "vclz<bhfgq>\t%v0,%v1"
   [(set_attr "op_type" "VRR")])
 
 ; Count trailing zeros
-; vctzb, vctzh, vctzf, vctzg
+; vctzb, vctzh, vctzf, vctzg, vctzq
 (define_insn "ctz<mode>2"
-  [(set (match_operand:V        0 "register_operand" "=v")
-       (ctz:V (match_operand:V 1 "register_operand"  "v")))]
+  [(set (match_operand:VT_VXE3              0 "register_operand" "=v")
+       (ctz:VT_VXE3 (match_operand:VT_VXE3 1 "register_operand"  "v")))]
   "TARGET_VX"
   "vctz<bhfgq>\t%v0,%v1"
   [(set_attr "op_type" "VRR")])

Reply via email to