On Mon, Jul 13, 2020 at 6:42 AM H.J. Lu <hjl.to...@gmail.com> wrote:
>
> Change CTZ_DEFINED_VALUE_AT_ZERO/CTZ_DEFINED_VALUE_AT_ZERO to return 0/2
> to enable table-based clz/ctz optimization:
>
>  -- Macro: CLZ_DEFINED_VALUE_AT_ZERO (MODE, VALUE)
>  -- Macro: CTZ_DEFINED_VALUE_AT_ZERO (MODE, VALUE)
>      A C expression that indicates whether the architecture defines a
>      value for 'clz' or 'ctz' with a zero operand.  A result of '0'
>      indicates the value is undefined.  If the value is defined for only
>      the RTL expression, the macro should evaluate to '1'; if the value
>      applies also to the corresponding optab entry (which is normally
>      the case if it expands directly into the corresponding RTL), then
>      the macro should evaluate to '2'.  In the cases where the value is
>      defined, VALUE should be set to this value.
>
> gcc/
>
>         PR target/95863
>         * config/i386/i386.h (CTZ_DEFINED_VALUE_AT_ZERO): Return 0/2.
>         (CLZ_DEFINED_VALUE_AT_ZERO): Likewise.
>
> gcc/testsuite/
>
>         PR target/95863
>         * gcc.target/i386/pr95863-1.c: New test.
>         * gcc.target/i386/pr95863-2.c: Likewise.
> ---
>  gcc/config/i386/i386.h                    |  4 +-
>  gcc/testsuite/gcc.target/i386/pr95863-1.c | 47 +++++++++++++++++++++++
>  gcc/testsuite/gcc.target/i386/pr95863-2.c | 27 +++++++++++++
>  3 files changed, 76 insertions(+), 2 deletions(-)
>  create mode 100644 gcc/testsuite/gcc.target/i386/pr95863-1.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/pr95863-2.c
>
> diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h
> index f4a8f1391fa..1deb59f286f 100644
> --- a/gcc/config/i386/i386.h
> +++ b/gcc/config/i386/i386.h
> @@ -2946,9 +2946,9 @@ extern void debug_dispatch_window (int);
>  /* The value at zero is only defined for the BMI instructions
>     LZCNT and TZCNT, not the BSR/BSF insns in the original isa.  */
>  #define CTZ_DEFINED_VALUE_AT_ZERO(MODE, VALUE) \
> -       ((VALUE) = GET_MODE_BITSIZE (MODE), TARGET_BMI ? 1 : 0)
> +       ((VALUE) = GET_MODE_BITSIZE (MODE), TARGET_BMI ? 2 : 0)
>  #define CLZ_DEFINED_VALUE_AT_ZERO(MODE, VALUE) \
> -       ((VALUE) = GET_MODE_BITSIZE (MODE), TARGET_LZCNT ? 1 : 0)
> +       ((VALUE) = GET_MODE_BITSIZE (MODE), TARGET_LZCNT ? 2 : 0)
>
>
>  /* Flags returned by ix86_get_callcvt ().  */
> diff --git a/gcc/testsuite/gcc.target/i386/pr95863-1.c 
> b/gcc/testsuite/gcc.target/i386/pr95863-1.c
> new file mode 100644
> index 00000000000..f3918a1a766
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/pr95863-1.c
> @@ -0,0 +1,47 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O -mbmi" } */
> +
> +int ctz1 (unsigned x)
> +{
> +  static const char table[32] =
> +    {
> +      0, 1, 28, 2, 29, 14, 24, 3, 30, 22, 20, 15, 25, 17, 4, 8,
> +      31, 27, 13, 23, 21, 19, 16, 7, 26, 12, 18, 6, 11, 5, 10, 9
> +    };
> +
> +  return table[((unsigned)((x & -x) * 0x077CB531U)) >> 27];
> +}
> +
> +int ctz2 (unsigned x)
> +{
> +#define u 0
> +  static short table[64] =
> +    {
> +      32, 0, 1,12, 2, 6, u,13, 3, u, 7, u, u, u, u,14,
> +      10, 4, u, u, 8, u, u,25, u, u, u, u, u,21,27,15,
> +      31,11, 5, u, u, u, u, u, 9, u, u,24, u, u,20,26,
> +      30, u, u, u, u,23, u,19,29, u,22,18,28,17,16, u
> +    };
> +
> +  x = (x & -x) * 0x0450FBAF;
> +  return table[x >> 26];
> +}
> +
> +int ctz3 (unsigned x)
> +{
> +  static int table[32] =
> +    {
> +      0, 1, 2,24, 3,19, 6,25, 22, 4,20,10,16, 7,12,26,
> +      31,23,18, 5,21, 9,15,11,30,17, 8,14,29,13,28,27
> +    };
> +
> +  if (x == 0) return 32;
> +  x = (x & -x) * 0x04D7651F;
> +  return table[x >> 27];
> +}
> +
> +/* { dg-final { scan-assembler-times "tzcntl\t" 3 } } */
> +/* { dg-final { scan-assembler-times "andl\t" 1 } } */
> +/* { dg-final { scan-assembler-not "neg" } } */
> +/* { dg-final { scan-assembler-not "imul" } } */
> +/* { dg-final { scan-assembler-not "shr" } } */
> diff --git a/gcc/testsuite/gcc.target/i386/pr95863-2.c 
> b/gcc/testsuite/gcc.target/i386/pr95863-2.c
> new file mode 100644
> index 00000000000..cb56dfc6d94
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/pr95863-2.c
> @@ -0,0 +1,27 @@
> +/* { dg-do compile { target { ! ia32 } } } */
> +/* { dg-options "-O -mbmi" } */
> +
> +static const unsigned long long magic = 0x03f08c5392f756cdULL;
> +
> +static const char table[64] = {
> +     0,  1, 12,  2, 13, 22, 17,  3,
> +    14, 33, 23, 36, 18, 58, 28,  4,
> +    62, 15, 34, 26, 24, 48, 50, 37,
> +    19, 55, 59, 52, 29, 44, 39,  5,
> +    63, 11, 21, 16, 32, 35, 57, 27,
> +    61, 25, 47, 49, 54, 51, 43, 38,
> +    10, 20, 31, 56, 60, 46, 53, 42,
> +     9, 30, 45, 41,  8, 40,  7,  6,
> +};
> +
> +int ctz4 (unsigned long long x)
> +{
> +  unsigned long long lsb = x & -x;
> +  return table[(lsb * magic) >> 58];
> +}
> +
> +/* { dg-final { scan-assembler-times "tzcntq\t" 1 } } */
> +/* { dg-final { scan-assembler-times "andl\t" 1 } } */
> +/* { dg-final { scan-assembler-not "negq" } } */
> +/* { dg-final { scan-assembler-not "imulq" } } */
> +/* { dg-final { scan-assembler-not "shrq" } } */
> --
> 2.26.2
>

PING.

-- 
H.J.

Reply via email to