To properly implement __builtin_ffs for SI mode, implement clz and
(for >= z17) ctz for SI mode. Otherwise, gcc falls back to a libcall
which causes problems for Linux kernel code.
Also adjust the C?Z_DEFINED_VALUE_AT_ZERO macros to return 2. Since
the optabs now return exactly the value set by these macros, return
value 2 is more appropriate and leads to better code.
gcc/ChangeLog:
* config/s390/s390.h (CLZ_DEFINED_VALUE_AT_ZERO): Adjust and
return 2.
(CTZ_DEFINED_VALUE_AT_ZERO): Return 2.
* config/s390/s390.md (clzsi2): Implement.
(ctzsi2): Implement.
gcc/testsuite/ChangeLog:
* gcc.dg/vect/pr109011-2.c: Fix expected outcome.
* gcc.dg/vect/pr109011-4.c: Fix expected outcome.
* gcc.target/s390/ffs-1.c: New test.
Signed-off-by: Juergen Christ <[email protected]>
Bootstrapped and regtested on s390. Ok for trunk?
---
gcc/config/s390/s390.h | 4 ++--
gcc/config/s390/s390.md | 25 +++++++++++++++++++++++++
gcc/testsuite/gcc.dg/vect/pr109011-2.c | 3 +--
gcc/testsuite/gcc.dg/vect/pr109011-4.c | 3 +--
gcc/testsuite/gcc.target/s390/ffs-1.c | 18 ++++++++++++++++++
5 files changed, 47 insertions(+), 6 deletions(-)
create mode 100644 gcc/testsuite/gcc.target/s390/ffs-1.c
diff --git a/gcc/config/s390/s390.h b/gcc/config/s390/s390.h
index 8b04bc9a7557..6478be8c7acd 100644
--- a/gcc/config/s390/s390.h
+++ b/gcc/config/s390/s390.h
@@ -1001,10 +1001,10 @@ do {
\
#define FUNCTION_MODE QImode
/* Specify the value which is used when clz operand is zero. */
-#define CLZ_DEFINED_VALUE_AT_ZERO(MODE, VALUE) ((VALUE) = 64, 1)
+#define CLZ_DEFINED_VALUE_AT_ZERO(MODE, VALUE) ((VALUE) = GET_MODE_PRECISION
(MODE), 2)
/* Specify the value which is used when ctz operand is zero. */
-#define CTZ_DEFINED_VALUE_AT_ZERO(MODE, VALUE) ((VALUE) = 64, 1)
+#define CTZ_DEFINED_VALUE_AT_ZERO(MODE, VALUE) ((VALUE) = 64, 2)
/* Machine-specific symbol_ref flags. */
#define SYMBOL_FLAG_ALIGN_SHIFT SYMBOL_FLAG_MACH_DEP_SHIFT
diff --git a/gcc/config/s390/s390.md b/gcc/config/s390/s390.md
index 858387cd85c5..00fd25650c30 100644
--- a/gcc/config/s390/s390.md
+++ b/gcc/config/s390/s390.md
@@ -9738,6 +9738,21 @@
"flogr\t%0,%1"
[(set_attr "op_type" "RRE")])
+(define_expand "clzsi2"
+ [(set (match_operand:SI 0 "register_operand" "")
+ (minus (truncate:SI
+ (clz:DI (zero_extend:DI (match_operand:SI 1 "register_operand"
""))))
+ (const_int 32)))]
+ "TARGET_EXTIMM && TARGET_ZARCH"
+{
+ rtx extreg = gen_reg_rtx (DImode);
+ rtx clzreg = gen_reg_rtx (DImode);
+ emit_insn (gen_zero_extendsidi2 (extreg, operands[1]));
+ emit_insn (gen_clzdi2 (clzreg, extreg));
+ rtx truncreg = gen_lowpart (SImode, clzreg);
+ emit_insn (gen_addsi3 (operands[0], truncreg, GEN_INT(-32)));
+ DONE;
+})
;;
;; Count Trailing Zeros.
@@ -9750,6 +9765,16 @@
"ctzg\t%0,%1"
[(set_attr "op_type" "RRE")])
+(define_expand "ctzsi2"
+ [(set (match_dup 2)
+ (zero_extend:DI (match_operand:SI 1 "register_operand" "")))
+ (set (match_dup 3) (ctz:DI (match_dup 2)))
+ (set (match_operand:SI 0 "register_operand" "") (subreg:SI (match_dup 3)
4))]
+ "TARGET_Z17 && TARGET_64BIT"
+{
+ operands[2] = gen_reg_rtx (DImode);
+ operands[3] = gen_reg_rtx (DImode);
+})
;;
;;- Rotate instructions.
diff --git a/gcc/testsuite/gcc.dg/vect/pr109011-2.c
b/gcc/testsuite/gcc.dg/vect/pr109011-2.c
index 4c7e6ad07a46..dc62d01da5d9 100644
--- a/gcc/testsuite/gcc.dg/vect/pr109011-2.c
+++ b/gcc/testsuite/gcc.dg/vect/pr109011-2.c
@@ -31,5 +31,4 @@ baz (int *p, int *q)
/* { dg-final { scan-tree-dump-times " = \.CLZ \\\(vect" 3 "optimized" {
target { { { { i?86-*-* x86_64-*-* } && avx512cd } && lzcnt } && bmi } } } } */
/* { dg-final { scan-tree-dump-times " = \.CTZ \\\(vect" 3 "optimized" {
target powerpc_vsx } } } */
-/* { dg-final { scan-tree-dump-times " = \.CTZ \\\(vect" 2 "optimized" {
target s390_vx } } } */
-/* { dg-final { scan-tree-dump-times " = \.POPCOUNT \\\(vect" 1 "optimized" {
target s390_vx } } } */
+/* { dg-final { scan-tree-dump-times " = \.CTZ \\\(vect" 3 "optimized" {
target s390_vx } } } */
diff --git a/gcc/testsuite/gcc.dg/vect/pr109011-4.c
b/gcc/testsuite/gcc.dg/vect/pr109011-4.c
index 38b2ab4d511e..8440ec73080a 100644
--- a/gcc/testsuite/gcc.dg/vect/pr109011-4.c
+++ b/gcc/testsuite/gcc.dg/vect/pr109011-4.c
@@ -31,5 +31,4 @@ baz (long long *p, long long *q)
/* { dg-final { scan-tree-dump-times " = \.CLZ \\\(vect" 3 "optimized" {
target { { { { i?86-*-* x86_64-*-* } && avx512cd } && lzcnt } && bmi } } } } */
/* { dg-final { scan-tree-dump-times " = \.CTZ \\\(vect" 3 "optimized" {
target powerpc_vsx } } } */
-/* { dg-final { scan-tree-dump-times " = \.CTZ \\\(vect" 2 "optimized" {
target s390_vx } } } */
-/* { dg-final { scan-tree-dump-times " = \.POPCOUNT \\\(vect" 1 "optimized" {
target s390_vx } } } */
+/* { dg-final { scan-tree-dump-times " = \.CTZ \\\(vect" 3 "optimized" {
target s390_vx } } } */
diff --git a/gcc/testsuite/gcc.target/s390/ffs-1.c
b/gcc/testsuite/gcc.target/s390/ffs-1.c
new file mode 100644
index 000000000000..79774d29ddac
--- /dev/null
+++ b/gcc/testsuite/gcc.target/s390/ffs-1.c
@@ -0,0 +1,18 @@
+/* Check that __builtin_ffs does not expand to libcall. This is required by
+ Linux kernel code since libcalls are not present there. */
+/* { dg-do compile } */
+/* { dg-options "-march=z10" } */
+
+long
+fool (long x)
+{
+ return __builtin_ffsl (x);
+}
+
+int
+foo (int x)
+{
+ return __builtin_ffs (x);
+}
+
+/* { dg-final { scan-assembler-not "brasl" } } */
--
2.43.7