On Wed, Sep 10, 2025 at 10:06:35AM +0200, Juergen Christ wrote:
> To properly implement __builtin_ffs for SI mode, implement clz and
> (for >= z17) ctz for SI mode.  Otherwise, gcc falls back to a libcall
> which causes problems for Linux kernel code.
> 
> Also adjust the C?Z_DEFINED_VALUE_AT_ZERO macros to return 2.  Since
> the optabs now return exactly the value set by these macros, return
> value 2 is more appropriate and leads to better code.
> 
> Bootstrapped and regtested on s390.  Ok for trunk?
> 
> gcc/ChangeLog:
> 
>       * config/s390/s390.h (CLZ_DEFINED_VALUE_AT_ZERO): Adjust and
>         return 2.
>       (CTZ_DEFINED_VALUE_AT_ZERO): Return 2.
>       * config/s390/s390.md (clzsi2): Implement.
>       (ctzsi2): Implement.
> 
> gcc/testsuite/ChangeLog:
> 
>       * gcc.dg/vect/pr109011-2.c: Fix expected outcome.
>       * gcc.dg/vect/pr109011-4.c: Fix expected outcome.
>       * gcc.target/s390/ffs-1.c: New test.
> 
> Signed-off-by: Juergen Christ <jchr...@linux.ibm.com>
> ---
>  gcc/config/s390/s390.h                 |  4 ++--
>  gcc/config/s390/s390.md                | 23 +++++++++++++++++++++++
>  gcc/testsuite/gcc.dg/vect/pr109011-2.c |  3 +--
>  gcc/testsuite/gcc.dg/vect/pr109011-4.c |  3 +--
>  gcc/testsuite/gcc.target/s390/ffs-1.c  | 18 ++++++++++++++++++
>  5 files changed, 45 insertions(+), 6 deletions(-)
>  create mode 100644 gcc/testsuite/gcc.target/s390/ffs-1.c
> 
> diff --git a/gcc/config/s390/s390.h b/gcc/config/s390/s390.h
> index 8b04bc9a7557..6478be8c7acd 100644
> --- a/gcc/config/s390/s390.h
> +++ b/gcc/config/s390/s390.h
> @@ -1001,10 +1001,10 @@ do {                                                  
>                 \
>  #define FUNCTION_MODE QImode
>  
>  /* Specify the value which is used when clz operand is zero.  */
> -#define CLZ_DEFINED_VALUE_AT_ZERO(MODE, VALUE) ((VALUE) = 64, 1)
> +#define CLZ_DEFINED_VALUE_AT_ZERO(MODE, VALUE) ((VALUE) = GET_MODE_PRECISION 
> (MODE), 2)
>  
>  /* Specify the value which is used when ctz operand is zero.  */
> -#define CTZ_DEFINED_VALUE_AT_ZERO(MODE, VALUE) ((VALUE) = 64, 1)
> +#define CTZ_DEFINED_VALUE_AT_ZERO(MODE, VALUE) ((VALUE) = 64, 2)
>  
>  /* Machine-specific symbol_ref flags.  */
>  #define SYMBOL_FLAG_ALIGN_SHIFT        SYMBOL_FLAG_MACH_DEP_SHIFT
> diff --git a/gcc/config/s390/s390.md b/gcc/config/s390/s390.md
> index 858387cd85c5..7525688c5fb1 100644
> --- a/gcc/config/s390/s390.md
> +++ b/gcc/config/s390/s390.md
> @@ -9738,6 +9738,19 @@
>    "flogr\t%0,%1"
>    [(set_attr "op_type"  "RRE")])
>  
> +(define_expand "clzsi2"
> +  [(match_operand:SI 0 "register_operand" "")
> +   (match_operand:SI 1 "register_operand" "")]
> +  "TARGET_EXTIMM && TARGET_ZARCH"
> +{
> +  rtx extreg = gen_reg_rtx (DImode);
> +  rtx clzreg = gen_reg_rtx (DImode);
> +  emit_insn (gen_zero_extendsidi2 (extreg, operands[1]));
> +  emit_insn (gen_clzdi2 (clzreg, extreg));
> +  rtx truncreg = gen_lowpart (SImode, clzreg);
> +  emit_insn (gen_addsi3 (operands[0], truncreg, GEN_INT(-32)));
                                                          ^
                                                        ~~~~
nit: whitespace

Ok otherwise.

Thanks,
Stefan

> +  DONE;
> +})
>  
>  ;;
>  ;; Count Trailing Zeros.
> @@ -9750,6 +9763,16 @@
>    "ctzg\t%0,%1"
>    [(set_attr "op_type" "RRE")])
>  
> +(define_expand "ctzsi2"
> +  [(set (match_dup 2)
> +     (zero_extend:DI (match_operand:SI 1 "register_operand" "")))
> +   (set (match_dup 3) (ctz:DI (match_dup 2)))
> +   (set (match_operand:SI 0 "register_operand" "") (subreg:SI (match_dup 3) 
> 4))]
> +  "TARGET_Z17 && TARGET_64BIT"
> +{
> +  operands[2] = gen_reg_rtx (DImode);
> +  operands[3] = gen_reg_rtx (DImode);
> +})
>  
>  ;;
>  ;;- Rotate instructions.
> diff --git a/gcc/testsuite/gcc.dg/vect/pr109011-2.c 
> b/gcc/testsuite/gcc.dg/vect/pr109011-2.c
> index 4c7e6ad07a46..dc62d01da5d9 100644
> --- a/gcc/testsuite/gcc.dg/vect/pr109011-2.c
> +++ b/gcc/testsuite/gcc.dg/vect/pr109011-2.c
> @@ -31,5 +31,4 @@ baz (int *p, int *q)
>  
>  /* { dg-final { scan-tree-dump-times " = \.CLZ \\\(vect" 3 "optimized" { 
> target { { { { i?86-*-* x86_64-*-* } && avx512cd } && lzcnt } && bmi } } } } 
> */
>  /* { dg-final { scan-tree-dump-times " = \.CTZ \\\(vect" 3 "optimized" { 
> target powerpc_vsx } } } */
> -/* { dg-final { scan-tree-dump-times " = \.CTZ \\\(vect" 2 "optimized" { 
> target s390_vx } } } */
> -/* { dg-final { scan-tree-dump-times " = \.POPCOUNT \\\(vect" 1 "optimized" 
> { target s390_vx } } } */
> +/* { dg-final { scan-tree-dump-times " = \.CTZ \\\(vect" 3 "optimized" { 
> target s390_vx } } } */
> diff --git a/gcc/testsuite/gcc.dg/vect/pr109011-4.c 
> b/gcc/testsuite/gcc.dg/vect/pr109011-4.c
> index 38b2ab4d511e..8440ec73080a 100644
> --- a/gcc/testsuite/gcc.dg/vect/pr109011-4.c
> +++ b/gcc/testsuite/gcc.dg/vect/pr109011-4.c
> @@ -31,5 +31,4 @@ baz (long long *p, long long *q)
>  
>  /* { dg-final { scan-tree-dump-times " = \.CLZ \\\(vect" 3 "optimized" { 
> target { { { { i?86-*-* x86_64-*-* } && avx512cd } && lzcnt } && bmi } } } } 
> */
>  /* { dg-final { scan-tree-dump-times " = \.CTZ \\\(vect" 3 "optimized" { 
> target powerpc_vsx } } } */
> -/* { dg-final { scan-tree-dump-times " = \.CTZ \\\(vect" 2 "optimized" { 
> target s390_vx } } } */
> -/* { dg-final { scan-tree-dump-times " = \.POPCOUNT \\\(vect" 1 "optimized" 
> { target s390_vx } } } */
> +/* { dg-final { scan-tree-dump-times " = \.CTZ \\\(vect" 3 "optimized" { 
> target s390_vx } } } */
> diff --git a/gcc/testsuite/gcc.target/s390/ffs-1.c 
> b/gcc/testsuite/gcc.target/s390/ffs-1.c
> new file mode 100644
> index 000000000000..79774d29ddac
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/s390/ffs-1.c
> @@ -0,0 +1,18 @@
> +/* Check that __builtin_ffs does not expand to libcall.  This is required by
> +   Linux kernel code since libcalls are not present there.  */
> +/* { dg-do compile } */
> +/* { dg-options "-march=z10" } */
> +
> +long
> +fool (long x)
> +{
> +  return __builtin_ffsl (x);
> +}
> +
> +int
> +foo (int x)
> +{
> +  return __builtin_ffs (x);
> +}
> +
> +/* { dg-final { scan-assembler-not "brasl" } } */
> -- 
> 2.43.7
> 

Reply via email to