This patch adds some more optimisations to the wi:: comparison functions. It uses the:
#define CONSTANT(X) (__builtin_constant_p (X) && (X)) idiom that was mentioned before, except that I thought CONSTANT would be too easily confused with CONSTANT_P, so I went for CAN_TELL instead. Better names welcome. The changes are: - Add a fast path to eq_p for when one of the inputs isn't sign-extended. This includes code to handle compile-time 0 specially. - Add the opposite optimisation to Mike's lts_p change, if we can tell at compile time that it applies. - Add fast paths to ltu_p for constants. E.g.: bool f1 (const_tree x) { return wi::eq_p (x, 0); } now gives: xorl %eax, %eax cmpw $1, 4(%rdi) je .L5 rep ret .p2align 4,,10 .p2align 3 .L5: cmpq $0, 16(%rdi) sete %al ret bool f2 (const_tree x, HOST_WIDE_INT y) { return wi::eq_p (x, y); } gives: movq 8(%rdi), %rax movzwl 52(%rax), %edx xorl %eax, %eax andw $1023, %dx cmpw $1, 4(%rdi) je .L10 rep ret .p2align 4,,10 .p2align 3 .L10: xorq 16(%rdi), %rsi movzwl %dx, %edx movl $64, %ecx subl %edx, %ecx movq %rsi, %rax salq %cl, %rax testl %ecx, %ecx cmovg %rax, %rsi testq %rsi, %rsi sete %al ret bool f3 (HOST_WIDE_INT x, const_tree y) { return wi::lts_p (x, y); } is similarly ugly because of way that it ignores TYPE_SIGN and so has to explicitly sign-extend "small-prec" cases: movq 8(%rsi), %rax movzwl 4(%rsi), %ecx movzwl 52(%rax), %edx andl $1023, %edx cmpl $1, %ecx je .L16 leal -1(%rcx), %eax sall $6, %ecx subl %edx, %ecx movq 16(%rsi,%rax,8), %rax movq %rax, %rdx salq %cl, %rdx testl %ecx, %ecx cmovg %rdx, %rax sarq $63, %rax addl $1, %eax ret .p2align 4,,10 .p2align 3 .L16: cmpl $63, %edx movq 16(%rsi), %rax ja .L13 movb $64, %cl subl %edx, %ecx salq %cl, %rax sarq %cl, %rax .L13: cmpq %rdi, %rax setg %al ret but: bool f4 (HOST_WIDE_INT x, const_tree y) { return wi::lts_p (x, wi::to_widest (y)); } is a bit more respectable: movzwl 6(%rsi), %eax cmpl $1, %eax je .L20 subl $1, %eax movq 16(%rsi,%rax,8), %rax sarq $63, %rax addl $1, %eax ret .p2align 4,,10 .p2align 3 .L20: cmpq %rdi, 16(%rsi) setg %al ret For similar reasons: bool f5 (const_tree x) { return wi::ltu_p (x, 100); } gives: movq 8(%rdi), %rax movzwl 52(%rax), %ecx xorl %eax, %eax andw $1023, %cx cmpw $1, 4(%rdi) je .L26 rep ret .p2align 4,,10 .p2align 3 .L26: cmpw $63, %cx ja .L23 movl $1, %eax salq %cl, %rax subq $1, %rax andq 16(%rdi), %rax .L24: cmpq $99, %rax setbe %al ret .p2align 4,,10 .p2align 3 .L23: movq 16(%rdi), %rax jmp .L24 but: bool f6 (const_tree x) { return wi::ltu_p (wi::to_widest (x), 100); } gives: xorl %eax, %eax cmpw $1, 6(%rdi) je .L30 rep ret .p2align 4,,10 .p2align 3 .L30: cmpq $99, 16(%rdi) setbe %al ret Tested on powerpc64-linux-gnu and x86_64-linux-gnu. OK for wide-int? Thanks, Richard Index: gcc/system.h =================================================================== --- gcc/system.h 2013-10-27 14:25:19.144723977 +0000 +++ gcc/system.h 2013-10-27 14:25:20.716738045 +0000 @@ -711,6 +711,12 @@ #define gcc_unreachable() __builtin_unre #define gcc_unreachable() (fancy_abort (__FILE__, __LINE__, __FUNCTION__)) #endif +#if GCC_VERSION >= 3001 +#define CAN_TELL(X) (__builtin_constant_p (X) && (X)) +#else +#define CAN_TELL(X) (false && (X)) +#endif + /* Until we can use STATIC_ASSERT. */ #define STATIC_ASSERT(X) \ typedef int assertion1[(X) ? 1 : -1] ATTRIBUTE_UNUSED Index: gcc/wide-int.h =================================================================== --- gcc/wide-int.h 2013-10-27 14:25:19.144723977 +0000 +++ gcc/wide-int.h 2013-10-27 14:37:34.834443832 +0000 @@ -1495,6 +1495,7 @@ wi::eq_p (const T1 &x, const T2 &y) WIDE_INT_REF_FOR (T2) yi (y, precision); if (xi.is_sign_extended && yi.is_sign_extended) { + /* This case reduces to array equality. */ if (xi.len != yi.len) return false; unsigned int i = 0; @@ -1504,10 +1505,21 @@ wi::eq_p (const T1 &x, const T2 &y) while (++i != xi.len); return true; } - if (precision <= HOST_BITS_PER_WIDE_INT) + if (yi.len == 1) { - unsigned HOST_WIDE_INT diff = xi.ulow () ^ yi.ulow (); - return (diff << (-precision % HOST_BITS_PER_WIDE_INT)) == 0; + /* XI is only equal to YI if it too has a single HWI. */ + if (xi.len != 1) + return false; + /* Excess bits in xi.val[0] will be signs or zeros, so comparisons + with 0 are simple. */ + if (CAN_TELL (yi.val[0] == 0)) + return xi.val[0] == 0; + /* Otherwise flush out any excess bits first. */ + unsigned HOST_WIDE_INT diff = xi.val[0] ^ yi.val[0]; + int excess = HOST_BITS_PER_WIDE_INT - precision; + if (excess > 0) + diff <<= excess; + return diff == 0; } return eq_p_large (xi.val, xi.len, yi.val, yi.len, precision); } @@ -1528,20 +1540,28 @@ wi::lts_p (const T1 &x, const T2 &y) unsigned int precision = get_binary_precision (x, y); WIDE_INT_REF_FOR (T1) xi (x, precision); WIDE_INT_REF_FOR (T2) yi (y, precision); - // We optimize x < y, where y is 64 or fewer bits. + /* We optimize x < y, where y is 64 or fewer bits. */ if (wi::fits_shwi_p (yi)) { - // If x fits directly into a shwi, we can compare directly. + /* Make lts_p (x, 0) as efficient as wi::neg_p (x). */ + if (CAN_TELL (yi.val[0] == 0)) + return neg_p (xi); + /* If x fits directly into a shwi, we can compare directly. */ if (wi::fits_shwi_p (xi)) return xi.to_shwi () < yi.to_shwi (); - // If x doesn't fit and is negative, then it must be more - // negative than any value in y, and hence smaller than y. - if (neg_p (xi, SIGNED)) + /* If x doesn't fit and is negative, then it must be more + negative than any value in y, and hence smaller than y. */ + if (neg_p (xi)) return true; - // If x is positive, then it must be larger than any value in y, - // and hence greater than y. + /* If x is positive, then it must be larger than any value in y, + and hence greater than y. */ return false; } + /* Optimize the opposite case, if it can be detected at compile time. */ + if (CAN_TELL (xi.len == 1)) + /* If YI is negative it is lower than the least HWI. + If YI is positive it is greater than the greatest HWI. */ + return !neg_p (yi); return lts_p_large (xi.val, xi.len, precision, yi.val, yi.len); } @@ -1553,6 +1573,12 @@ wi::ltu_p (const T1 &x, const T2 &y) unsigned int precision = get_binary_precision (x, y); WIDE_INT_REF_FOR (T1) xi (x, precision); WIDE_INT_REF_FOR (T2) yi (y, precision); + /* Optimize comparisons with constants and with sub-HWI unsigned + integers. */ + if (CAN_TELL (yi.len == 1 && yi.val[0] >= 0)) + return xi.len == 1 && xi.to_uhwi () < (unsigned HOST_WIDE_INT) yi.val[0]; + if (CAN_TELL (xi.len == 1 && xi.val[0] >= 0)) + return yi.len != 1 || yi.to_uhwi () > (unsigned HOST_WIDE_INT) xi.val[0]; if (precision <= HOST_BITS_PER_WIDE_INT) { unsigned HOST_WIDE_INT xl = xi.to_uhwi ();