On Sun, 27 Oct 2013, Richard Sandiford wrote:
> This patch adds some more optimisations to the wi:: comparison functions.
> It uses the:
>
> #define CONSTANT(X) (__builtin_constant_p (X) && (X))
>
> idiom that was mentioned before, except that I thought CONSTANT would be
> too easily confused with CONSTANT_P, so I went for CAN_TELL instead.
> Better names welcome.
STATIC_CONSTANT_P similar to static_assert?
> The changes are:
>
> - Add a fast path to eq_p for when one of the inputs isn't sign-extended.
> This includes code to handle compile-time 0 specially.
>
> - Add the opposite optimisation to Mike's lts_p change, if we can tell at
> compile time that it applies.
I think the cases Mike added should only be enabled when we can figure
them out at compile-time, too.
Ok with that change.
Thanks,
Richard.
> - Add fast paths to ltu_p for constants.
>
> E.g.:
>
> bool
> f1 (const_tree x)
> {
> return wi::eq_p (x, 0);
> }
>
> now gives:
>
> xorl %eax, %eax
> cmpw $1, 4(%rdi)
> je .L5
> rep ret
> .p2align 4,,10
> .p2align 3
> .L5:
> cmpq $0, 16(%rdi)
> sete %al
> ret
>
> bool
> f2 (const_tree x, HOST_WIDE_INT y)
> {
> return wi::eq_p (x, y);
> }
>
> gives:
>
> movq 8(%rdi), %rax
> movzwl 52(%rax), %edx
> xorl %eax, %eax
> andw $1023, %dx
> cmpw $1, 4(%rdi)
> je .L10
> rep ret
> .p2align 4,,10
> .p2align 3
> .L10:
> xorq 16(%rdi), %rsi
> movzwl %dx, %edx
> movl $64, %ecx
> subl %edx, %ecx
> movq %rsi, %rax
> salq %cl, %rax
> testl %ecx, %ecx
> cmovg %rax, %rsi
> testq %rsi, %rsi
> sete %al
> ret
>
> bool
> f3 (HOST_WIDE_INT x, const_tree y)
> {
> return wi::lts_p (x, y);
> }
>
> is similarly ugly because of way that it ignores TYPE_SIGN and so has to
> explicitly sign-extend "small-prec" cases:
>
> movq 8(%rsi), %rax
> movzwl 4(%rsi), %ecx
> movzwl 52(%rax), %edx
> andl $1023, %edx
> cmpl $1, %ecx
> je .L16
> leal -1(%rcx), %eax
> sall $6, %ecx
> subl %edx, %ecx
> movq 16(%rsi,%rax,8), %rax
> movq %rax, %rdx
> salq %cl, %rdx
> testl %ecx, %ecx
> cmovg %rdx, %rax
> sarq $63, %rax
> addl $1, %eax
> ret
> .p2align 4,,10
> .p2align 3
> .L16:
> cmpl $63, %edx
> movq 16(%rsi), %rax
> ja .L13
> movb $64, %cl
> subl %edx, %ecx
> salq %cl, %rax
> sarq %cl, %rax
> .L13:
> cmpq %rdi, %rax
> setg %al
> ret
>
> but:
>
> bool
> f4 (HOST_WIDE_INT x, const_tree y)
> {
> return wi::lts_p (x, wi::to_widest (y));
> }
>
> is a bit more respectable:
>
> movzwl 6(%rsi), %eax
> cmpl $1, %eax
> je .L20
> subl $1, %eax
> movq 16(%rsi,%rax,8), %rax
> sarq $63, %rax
> addl $1, %eax
> ret
> .p2align 4,,10
> .p2align 3
> .L20:
> cmpq %rdi, 16(%rsi)
> setg %al
> ret
>
> For similar reasons:
>
> bool
> f5 (const_tree x)
> {
> return wi::ltu_p (x, 100);
> }
>
> gives:
>
> movq 8(%rdi), %rax
> movzwl 52(%rax), %ecx
> xorl %eax, %eax
> andw $1023, %cx
> cmpw $1, 4(%rdi)
> je .L26
> rep ret
> .p2align 4,,10
> .p2align 3
> .L26:
> cmpw $63, %cx
> ja .L23
> movl $1, %eax
> salq %cl, %rax
> subq $1, %rax
> andq 16(%rdi), %rax
> .L24:
> cmpq $99, %rax
> setbe %al
> ret
> .p2align 4,,10
> .p2align 3
> .L23:
> movq 16(%rdi), %rax
> jmp .L24
>
> but:
>
> bool
> f6 (const_tree x)
> {
> return wi::ltu_p (wi::to_widest (x), 100);
> }
>
> gives:
>
> xorl %eax, %eax
> cmpw $1, 6(%rdi)
> je .L30
> rep ret
> .p2align 4,,10
> .p2align 3
> .L30:
> cmpq $99, 16(%rdi)
> setbe %al
> ret
>
> Tested on powerpc64-linux-gnu and x86_64-linux-gnu. OK for wide-int?
>
> Thanks,
> Richard
>
>
> Index: gcc/system.h
> ===================================================================
> --- gcc/system.h 2013-10-27 14:25:19.144723977 +0000
> +++ gcc/system.h 2013-10-27 14:25:20.716738045 +0000
> @@ -711,6 +711,12 @@ #define gcc_unreachable() __builtin_unre
> #define gcc_unreachable() (fancy_abort (__FILE__, __LINE__, __FUNCTION__))
> #endif
>
> +#if GCC_VERSION >= 3001
> +#define CAN_TELL(X) (__builtin_constant_p (X) && (X))
> +#else
> +#define CAN_TELL(X) (false && (X))
> +#endif
> +
> /* Until we can use STATIC_ASSERT. */
> #define STATIC_ASSERT(X) \
> typedef int assertion1[(X) ? 1 : -1] ATTRIBUTE_UNUSED
> Index: gcc/wide-int.h
> ===================================================================
> --- gcc/wide-int.h 2013-10-27 14:25:19.144723977 +0000
> +++ gcc/wide-int.h 2013-10-27 14:37:34.834443832 +0000
> @@ -1495,6 +1495,7 @@ wi::eq_p (const T1 &x, const T2 &y)
> WIDE_INT_REF_FOR (T2) yi (y, precision);
> if (xi.is_sign_extended && yi.is_sign_extended)
> {
> + /* This case reduces to array equality. */
> if (xi.len != yi.len)
> return false;
> unsigned int i = 0;
> @@ -1504,10 +1505,21 @@ wi::eq_p (const T1 &x, const T2 &y)
> while (++i != xi.len);
> return true;
> }
> - if (precision <= HOST_BITS_PER_WIDE_INT)
> + if (yi.len == 1)
> {
> - unsigned HOST_WIDE_INT diff = xi.ulow () ^ yi.ulow ();
> - return (diff << (-precision % HOST_BITS_PER_WIDE_INT)) == 0;
> + /* XI is only equal to YI if it too has a single HWI. */
> + if (xi.len != 1)
> + return false;
> + /* Excess bits in xi.val[0] will be signs or zeros, so comparisons
> + with 0 are simple. */
> + if (CAN_TELL (yi.val[0] == 0))
> + return xi.val[0] == 0;
> + /* Otherwise flush out any excess bits first. */
> + unsigned HOST_WIDE_INT diff = xi.val[0] ^ yi.val[0];
> + int excess = HOST_BITS_PER_WIDE_INT - precision;
> + if (excess > 0)
> + diff <<= excess;
> + return diff == 0;
> }
> return eq_p_large (xi.val, xi.len, yi.val, yi.len, precision);
> }
> @@ -1528,20 +1540,28 @@ wi::lts_p (const T1 &x, const T2 &y)
> unsigned int precision = get_binary_precision (x, y);
> WIDE_INT_REF_FOR (T1) xi (x, precision);
> WIDE_INT_REF_FOR (T2) yi (y, precision);
> - // We optimize x < y, where y is 64 or fewer bits.
> + /* We optimize x < y, where y is 64 or fewer bits. */
> if (wi::fits_shwi_p (yi))
> {
> - // If x fits directly into a shwi, we can compare directly.
> + /* Make lts_p (x, 0) as efficient as wi::neg_p (x). */
> + if (CAN_TELL (yi.val[0] == 0))
> + return neg_p (xi);
> + /* If x fits directly into a shwi, we can compare directly. */
> if (wi::fits_shwi_p (xi))
> return xi.to_shwi () < yi.to_shwi ();
> - // If x doesn't fit and is negative, then it must be more
> - // negative than any value in y, and hence smaller than y.
> - if (neg_p (xi, SIGNED))
> + /* If x doesn't fit and is negative, then it must be more
> + negative than any value in y, and hence smaller than y. */
> + if (neg_p (xi))
> return true;
> - // If x is positive, then it must be larger than any value in y,
> - // and hence greater than y.
> + /* If x is positive, then it must be larger than any value in y,
> + and hence greater than y. */
> return false;
> }
> + /* Optimize the opposite case, if it can be detected at compile time. */
> + if (CAN_TELL (xi.len == 1))
> + /* If YI is negative it is lower than the least HWI.
> + If YI is positive it is greater than the greatest HWI. */
> + return !neg_p (yi);
> return lts_p_large (xi.val, xi.len, precision, yi.val, yi.len);
> }
>
> @@ -1553,6 +1573,12 @@ wi::ltu_p (const T1 &x, const T2 &y)
> unsigned int precision = get_binary_precision (x, y);
> WIDE_INT_REF_FOR (T1) xi (x, precision);
> WIDE_INT_REF_FOR (T2) yi (y, precision);
> + /* Optimize comparisons with constants and with sub-HWI unsigned
> + integers. */
> + if (CAN_TELL (yi.len == 1 && yi.val[0] >= 0))
> + return xi.len == 1 && xi.to_uhwi () < (unsigned HOST_WIDE_INT) yi.val[0];
> + if (CAN_TELL (xi.len == 1 && xi.val[0] >= 0))
> + return yi.len != 1 || yi.to_uhwi () > (unsigned HOST_WIDE_INT) xi.val[0];
> if (precision <= HOST_BITS_PER_WIDE_INT)
> {
> unsigned HOST_WIDE_INT xl = xi.to_uhwi ();
>
>
--
Richard Biener <[email protected]>
SUSE / SUSE Labs
SUSE LINUX Products GmbH - Nuernberg - AG Nuernberg - HRB 16746
GF: Jeff Hawn, Jennifer Guild, Felix Imend