On Mon, Jul 13, 2020 at 6:42 AM H.J. Lu <hjl.to...@gmail.com> wrote: > > Change CTZ_DEFINED_VALUE_AT_ZERO/CTZ_DEFINED_VALUE_AT_ZERO to return 0/2 > to enable table-based clz/ctz optimization: > > -- Macro: CLZ_DEFINED_VALUE_AT_ZERO (MODE, VALUE) > -- Macro: CTZ_DEFINED_VALUE_AT_ZERO (MODE, VALUE) > A C expression that indicates whether the architecture defines a > value for 'clz' or 'ctz' with a zero operand. A result of '0' > indicates the value is undefined. If the value is defined for only > the RTL expression, the macro should evaluate to '1'; if the value > applies also to the corresponding optab entry (which is normally > the case if it expands directly into the corresponding RTL), then > the macro should evaluate to '2'. In the cases where the value is > defined, VALUE should be set to this value. > > gcc/ > > PR target/95863 > * config/i386/i386.h (CTZ_DEFINED_VALUE_AT_ZERO): Return 0/2. > (CLZ_DEFINED_VALUE_AT_ZERO): Likewise. > > gcc/testsuite/ > > PR target/95863 > * gcc.target/i386/pr95863-1.c: New test. > * gcc.target/i386/pr95863-2.c: Likewise. > --- > gcc/config/i386/i386.h | 4 +- > gcc/testsuite/gcc.target/i386/pr95863-1.c | 47 +++++++++++++++++++++++ > gcc/testsuite/gcc.target/i386/pr95863-2.c | 27 +++++++++++++ > 3 files changed, 76 insertions(+), 2 deletions(-) > create mode 100644 gcc/testsuite/gcc.target/i386/pr95863-1.c > create mode 100644 gcc/testsuite/gcc.target/i386/pr95863-2.c > > diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h > index f4a8f1391fa..1deb59f286f 100644 > --- a/gcc/config/i386/i386.h > +++ b/gcc/config/i386/i386.h > @@ -2946,9 +2946,9 @@ extern void debug_dispatch_window (int); > /* The value at zero is only defined for the BMI instructions > LZCNT and TZCNT, not the BSR/BSF insns in the original isa. */ > #define CTZ_DEFINED_VALUE_AT_ZERO(MODE, VALUE) \ > - ((VALUE) = GET_MODE_BITSIZE (MODE), TARGET_BMI ? 1 : 0) > + ((VALUE) = GET_MODE_BITSIZE (MODE), TARGET_BMI ? 2 : 0) > #define CLZ_DEFINED_VALUE_AT_ZERO(MODE, VALUE) \ > - ((VALUE) = GET_MODE_BITSIZE (MODE), TARGET_LZCNT ? 1 : 0) > + ((VALUE) = GET_MODE_BITSIZE (MODE), TARGET_LZCNT ? 2 : 0) > > > /* Flags returned by ix86_get_callcvt (). */ > diff --git a/gcc/testsuite/gcc.target/i386/pr95863-1.c > b/gcc/testsuite/gcc.target/i386/pr95863-1.c > new file mode 100644 > index 00000000000..f3918a1a766 > --- /dev/null > +++ b/gcc/testsuite/gcc.target/i386/pr95863-1.c > @@ -0,0 +1,47 @@ > +/* { dg-do compile } */ > +/* { dg-options "-O -mbmi" } */ > + > +int ctz1 (unsigned x) > +{ > + static const char table[32] = > + { > + 0, 1, 28, 2, 29, 14, 24, 3, 30, 22, 20, 15, 25, 17, 4, 8, > + 31, 27, 13, 23, 21, 19, 16, 7, 26, 12, 18, 6, 11, 5, 10, 9 > + }; > + > + return table[((unsigned)((x & -x) * 0x077CB531U)) >> 27]; > +} > + > +int ctz2 (unsigned x) > +{ > +#define u 0 > + static short table[64] = > + { > + 32, 0, 1,12, 2, 6, u,13, 3, u, 7, u, u, u, u,14, > + 10, 4, u, u, 8, u, u,25, u, u, u, u, u,21,27,15, > + 31,11, 5, u, u, u, u, u, 9, u, u,24, u, u,20,26, > + 30, u, u, u, u,23, u,19,29, u,22,18,28,17,16, u > + }; > + > + x = (x & -x) * 0x0450FBAF; > + return table[x >> 26]; > +} > + > +int ctz3 (unsigned x) > +{ > + static int table[32] = > + { > + 0, 1, 2,24, 3,19, 6,25, 22, 4,20,10,16, 7,12,26, > + 31,23,18, 5,21, 9,15,11,30,17, 8,14,29,13,28,27 > + }; > + > + if (x == 0) return 32; > + x = (x & -x) * 0x04D7651F; > + return table[x >> 27]; > +} > + > +/* { dg-final { scan-assembler-times "tzcntl\t" 3 } } */ > +/* { dg-final { scan-assembler-times "andl\t" 1 } } */ > +/* { dg-final { scan-assembler-not "neg" } } */ > +/* { dg-final { scan-assembler-not "imul" } } */ > +/* { dg-final { scan-assembler-not "shr" } } */ > diff --git a/gcc/testsuite/gcc.target/i386/pr95863-2.c > b/gcc/testsuite/gcc.target/i386/pr95863-2.c > new file mode 100644 > index 00000000000..cb56dfc6d94 > --- /dev/null > +++ b/gcc/testsuite/gcc.target/i386/pr95863-2.c > @@ -0,0 +1,27 @@ > +/* { dg-do compile { target { ! ia32 } } } */ > +/* { dg-options "-O -mbmi" } */ > + > +static const unsigned long long magic = 0x03f08c5392f756cdULL; > + > +static const char table[64] = { > + 0, 1, 12, 2, 13, 22, 17, 3, > + 14, 33, 23, 36, 18, 58, 28, 4, > + 62, 15, 34, 26, 24, 48, 50, 37, > + 19, 55, 59, 52, 29, 44, 39, 5, > + 63, 11, 21, 16, 32, 35, 57, 27, > + 61, 25, 47, 49, 54, 51, 43, 38, > + 10, 20, 31, 56, 60, 46, 53, 42, > + 9, 30, 45, 41, 8, 40, 7, 6, > +}; > + > +int ctz4 (unsigned long long x) > +{ > + unsigned long long lsb = x & -x; > + return table[(lsb * magic) >> 58]; > +} > + > +/* { dg-final { scan-assembler-times "tzcntq\t" 1 } } */ > +/* { dg-final { scan-assembler-times "andl\t" 1 } } */ > +/* { dg-final { scan-assembler-not "negq" } } */ > +/* { dg-final { scan-assembler-not "imulq" } } */ > +/* { dg-final { scan-assembler-not "shrq" } } */ > -- > 2.26.2 >
PING. -- H.J.