On Wed, Sep 10, 2025 at 10:06:35AM +0200, Juergen Christ wrote: > To properly implement __builtin_ffs for SI mode, implement clz and > (for >= z17) ctz for SI mode. Otherwise, gcc falls back to a libcall > which causes problems for Linux kernel code. > > Also adjust the C?Z_DEFINED_VALUE_AT_ZERO macros to return 2. Since > the optabs now return exactly the value set by these macros, return > value 2 is more appropriate and leads to better code. > > Bootstrapped and regtested on s390. Ok for trunk? > > gcc/ChangeLog: > > * config/s390/s390.h (CLZ_DEFINED_VALUE_AT_ZERO): Adjust and > return 2. > (CTZ_DEFINED_VALUE_AT_ZERO): Return 2. > * config/s390/s390.md (clzsi2): Implement. > (ctzsi2): Implement. > > gcc/testsuite/ChangeLog: > > * gcc.dg/vect/pr109011-2.c: Fix expected outcome. > * gcc.dg/vect/pr109011-4.c: Fix expected outcome. > * gcc.target/s390/ffs-1.c: New test. > > Signed-off-by: Juergen Christ <jchr...@linux.ibm.com> > --- > gcc/config/s390/s390.h | 4 ++-- > gcc/config/s390/s390.md | 23 +++++++++++++++++++++++ > gcc/testsuite/gcc.dg/vect/pr109011-2.c | 3 +-- > gcc/testsuite/gcc.dg/vect/pr109011-4.c | 3 +-- > gcc/testsuite/gcc.target/s390/ffs-1.c | 18 ++++++++++++++++++ > 5 files changed, 45 insertions(+), 6 deletions(-) > create mode 100644 gcc/testsuite/gcc.target/s390/ffs-1.c > > diff --git a/gcc/config/s390/s390.h b/gcc/config/s390/s390.h > index 8b04bc9a7557..6478be8c7acd 100644 > --- a/gcc/config/s390/s390.h > +++ b/gcc/config/s390/s390.h > @@ -1001,10 +1001,10 @@ do { > \ > #define FUNCTION_MODE QImode > > /* Specify the value which is used when clz operand is zero. */ > -#define CLZ_DEFINED_VALUE_AT_ZERO(MODE, VALUE) ((VALUE) = 64, 1) > +#define CLZ_DEFINED_VALUE_AT_ZERO(MODE, VALUE) ((VALUE) = GET_MODE_PRECISION > (MODE), 2) > > /* Specify the value which is used when ctz operand is zero. */ > -#define CTZ_DEFINED_VALUE_AT_ZERO(MODE, VALUE) ((VALUE) = 64, 1) > +#define CTZ_DEFINED_VALUE_AT_ZERO(MODE, VALUE) ((VALUE) = 64, 2) > > /* Machine-specific symbol_ref flags. */ > #define SYMBOL_FLAG_ALIGN_SHIFT SYMBOL_FLAG_MACH_DEP_SHIFT > diff --git a/gcc/config/s390/s390.md b/gcc/config/s390/s390.md > index 858387cd85c5..7525688c5fb1 100644 > --- a/gcc/config/s390/s390.md > +++ b/gcc/config/s390/s390.md > @@ -9738,6 +9738,19 @@ > "flogr\t%0,%1" > [(set_attr "op_type" "RRE")]) > > +(define_expand "clzsi2" > + [(match_operand:SI 0 "register_operand" "") > + (match_operand:SI 1 "register_operand" "")] > + "TARGET_EXTIMM && TARGET_ZARCH" > +{ > + rtx extreg = gen_reg_rtx (DImode); > + rtx clzreg = gen_reg_rtx (DImode); > + emit_insn (gen_zero_extendsidi2 (extreg, operands[1])); > + emit_insn (gen_clzdi2 (clzreg, extreg)); > + rtx truncreg = gen_lowpart (SImode, clzreg); > + emit_insn (gen_addsi3 (operands[0], truncreg, GEN_INT(-32))); ^ ~~~~ nit: whitespace
Ok otherwise. Thanks, Stefan > + DONE; > +}) > > ;; > ;; Count Trailing Zeros. > @@ -9750,6 +9763,16 @@ > "ctzg\t%0,%1" > [(set_attr "op_type" "RRE")]) > > +(define_expand "ctzsi2" > + [(set (match_dup 2) > + (zero_extend:DI (match_operand:SI 1 "register_operand" ""))) > + (set (match_dup 3) (ctz:DI (match_dup 2))) > + (set (match_operand:SI 0 "register_operand" "") (subreg:SI (match_dup 3) > 4))] > + "TARGET_Z17 && TARGET_64BIT" > +{ > + operands[2] = gen_reg_rtx (DImode); > + operands[3] = gen_reg_rtx (DImode); > +}) > > ;; > ;;- Rotate instructions. > diff --git a/gcc/testsuite/gcc.dg/vect/pr109011-2.c > b/gcc/testsuite/gcc.dg/vect/pr109011-2.c > index 4c7e6ad07a46..dc62d01da5d9 100644 > --- a/gcc/testsuite/gcc.dg/vect/pr109011-2.c > +++ b/gcc/testsuite/gcc.dg/vect/pr109011-2.c > @@ -31,5 +31,4 @@ baz (int *p, int *q) > > /* { dg-final { scan-tree-dump-times " = \.CLZ \\\(vect" 3 "optimized" { > target { { { { i?86-*-* x86_64-*-* } && avx512cd } && lzcnt } && bmi } } } } > */ > /* { dg-final { scan-tree-dump-times " = \.CTZ \\\(vect" 3 "optimized" { > target powerpc_vsx } } } */ > -/* { dg-final { scan-tree-dump-times " = \.CTZ \\\(vect" 2 "optimized" { > target s390_vx } } } */ > -/* { dg-final { scan-tree-dump-times " = \.POPCOUNT \\\(vect" 1 "optimized" > { target s390_vx } } } */ > +/* { dg-final { scan-tree-dump-times " = \.CTZ \\\(vect" 3 "optimized" { > target s390_vx } } } */ > diff --git a/gcc/testsuite/gcc.dg/vect/pr109011-4.c > b/gcc/testsuite/gcc.dg/vect/pr109011-4.c > index 38b2ab4d511e..8440ec73080a 100644 > --- a/gcc/testsuite/gcc.dg/vect/pr109011-4.c > +++ b/gcc/testsuite/gcc.dg/vect/pr109011-4.c > @@ -31,5 +31,4 @@ baz (long long *p, long long *q) > > /* { dg-final { scan-tree-dump-times " = \.CLZ \\\(vect" 3 "optimized" { > target { { { { i?86-*-* x86_64-*-* } && avx512cd } && lzcnt } && bmi } } } } > */ > /* { dg-final { scan-tree-dump-times " = \.CTZ \\\(vect" 3 "optimized" { > target powerpc_vsx } } } */ > -/* { dg-final { scan-tree-dump-times " = \.CTZ \\\(vect" 2 "optimized" { > target s390_vx } } } */ > -/* { dg-final { scan-tree-dump-times " = \.POPCOUNT \\\(vect" 1 "optimized" > { target s390_vx } } } */ > +/* { dg-final { scan-tree-dump-times " = \.CTZ \\\(vect" 3 "optimized" { > target s390_vx } } } */ > diff --git a/gcc/testsuite/gcc.target/s390/ffs-1.c > b/gcc/testsuite/gcc.target/s390/ffs-1.c > new file mode 100644 > index 000000000000..79774d29ddac > --- /dev/null > +++ b/gcc/testsuite/gcc.target/s390/ffs-1.c > @@ -0,0 +1,18 @@ > +/* Check that __builtin_ffs does not expand to libcall. This is required by > + Linux kernel code since libcalls are not present there. */ > +/* { dg-do compile } */ > +/* { dg-options "-march=z10" } */ > + > +long > +fool (long x) > +{ > + return __builtin_ffsl (x); > +} > + > +int > +foo (int x) > +{ > + return __builtin_ffs (x); > +} > + > +/* { dg-final { scan-assembler-not "brasl" } } */ > -- > 2.43.7 >