Hi Paul, When one needs a fast test whether an addition of two 'signed char' or 'short' overflows, the macros in intprops.h yield a valid answer, but it is not so well optimized. The functions my_signed1_overflow, my_signed2_overflow in the attached file produce better machine code than the corresponding functions signed1_overflow, signed2_overflow that use intprops.h primitives.
Similarly, on 64-bit platforms, my_signed4_overflow produces slightly better machine code (no conditional branch) than signed4_overflow. On 32-bit platforms it depends: on SPARC my_signed4_overflow is good as well, but not on i386 (because 64-bit computations on 32-bit CPUs needs many registers, and i386 has few registers). Would it be possible to include some of these tricks into intprops.h? Bruno
#include "intprops.h" int signed1_overflow (signed char a, signed char b) { //return INT_ADD_RANGE_OVERFLOW (a, b, (signed char) 0x80, (signed char) 0x7F); return _GL_ADD_OVERFLOW (a, b, (signed char) 0x80, (signed char) 0x7F); } int my_signed1_overflow (signed char a, signed char b) { return (((int) (signed char) ((unsigned char) a + (unsigned char) b) - (int) a) ^ (int) b) < 0; } int signed2_overflow (short a, short b) { //return INT_ADD_RANGE_OVERFLOW (a, b, (short) 0x8000, (short) 0x7FFF); return _GL_ADD_OVERFLOW (a, b, (short) 0x8000, (short) 0x7FFF); } int my_signed2_overflow (short a, short b) { return (((int) (short) ((unsigned short) a + (unsigned short) b) - (int) a) ^ (int) b) < 0; } int signed4_overflow (int a, int b) { //return INT_ADD_RANGE_OVERFLOW (a, b, (int) 0x80000000, (int) 0x7FFFFFFF); //return _GL_ADD_OVERFLOW (a, b, (int) 0x80000000, (int) 0x7FFFFFFF); return INT_ADD_OVERFLOW (a, b); } int my_signed4_overflow (int a, int b) { return (((long long) (int) ((unsigned int) a + (unsigned int) b) - (long long) a) ^ (long long) b) < 0; } #ifdef TEST #include <stdio.h> int main () { int u, v; /* Verify that signed1_overflow and my_signed1_overflow agree. */ for (u = -0x8; u <= 0x7; u++) for (v = -0x8; v <= 0x7; v++) { int a = u << 4; int b = v << 4; int x = signed1_overflow (a, b); int y = my_signed1_overflow (a, b); if (x != y) printf ("signed1 mistake: a=%d b=%d x=%d y=%d\n", a, b, x, y); } /* Verify that signed2_overflow and my_signed2_overflow agree. */ for (u = -0x8; u <= 0x7; u++) for (v = -0x8; v <= 0x7; v++) { int a = u << 12; int b = v << 12; int x = signed2_overflow (a, b); int y = my_signed2_overflow (a, b); if (x != y) printf ("signed2 mistake: a=%d b=%d x=%d y=%d\n", a, b, x, y); } /* Verify that signed4_overflow and my_signed4_overflow agree. */ for (u = -0x8; u <= 0x7; u++) for (v = -0x8; v <= 0x7; v++) { int a = u << 28; int b = v << 28; int x = signed4_overflow (a, b); int y = my_signed4_overflow (a, b); if (x != y) printf ("signed4 mistake: a=%d b=%d x=%d y=%d\n", a, b, x, y); } return 0; } #endif
.file "foo.c" .section .text.unlikely,"ax",@progbits .LCOLDB0: .text .LHOTB0: .p2align 4,,15 .globl signed1_overflow .type signed1_overflow, @function signed1_overflow: .LFB0: .cfi_startproc testb %sil, %sil js .L5 movsbl %sil, %esi movl $127, %eax movsbl %dil, %edi subl %esi, %eax cmpl %edi, %eax setl %al movzbl %al, %eax ret .p2align 4,,10 .p2align 3 .L5: movsbl %sil, %esi movl $-128, %eax movsbl %dil, %edi subl %esi, %eax cmpl %eax, %edi setl %al movzbl %al, %eax ret .cfi_endproc .LFE0: .size signed1_overflow, .-signed1_overflow .section .text.unlikely .LCOLDE0: .text .LHOTE0: .section .text.unlikely .LCOLDB1: .text .LHOTB1: .p2align 4,,15 .globl my_signed1_overflow .type my_signed1_overflow, @function my_signed1_overflow: .LFB1: .cfi_startproc leal (%rsi,%rdi), %eax movsbl %dil, %edi movsbl %sil, %esi movsbl %al, %eax subl %edi, %eax xorl %esi, %eax shrl $31, %eax ret .cfi_endproc .LFE1: .size my_signed1_overflow, .-my_signed1_overflow .section .text.unlikely .LCOLDE1: .text .LHOTE1: .section .text.unlikely .LCOLDB2: .text .LHOTB2: .p2align 4,,15 .globl signed2_overflow .type signed2_overflow, @function signed2_overflow: .LFB2: .cfi_startproc testw %si, %si js .L10 movswl %si, %esi movl $32767, %eax movswl %di, %edi subl %esi, %eax cmpl %edi, %eax setl %al movzbl %al, %eax ret .p2align 4,,10 .p2align 3 .L10: movswl %si, %esi movl $-32768, %eax movswl %di, %edi subl %esi, %eax cmpl %eax, %edi setl %al movzbl %al, %eax ret .cfi_endproc .LFE2: .size signed2_overflow, .-signed2_overflow .section .text.unlikely .LCOLDE2: .text .LHOTE2: .section .text.unlikely .LCOLDB3: .text .LHOTB3: .p2align 4,,15 .globl my_signed2_overflow .type my_signed2_overflow, @function my_signed2_overflow: .LFB3: .cfi_startproc leal (%rsi,%rdi), %eax movswl %di, %edi movswl %si, %esi cwtl subl %edi, %eax xorl %esi, %eax shrl $31, %eax ret .cfi_endproc .LFE3: .size my_signed2_overflow, .-my_signed2_overflow .section .text.unlikely .LCOLDE3: .text .LHOTE3: .section .text.unlikely .LCOLDB4: .text .LHOTB4: .p2align 4,,15 .globl signed4_overflow .type signed4_overflow, @function signed4_overflow: .LFB4: .cfi_startproc testl %esi, %esi js .L15 movl $2147483647, %eax subl %esi, %eax cmpl %eax, %edi setg %al movzbl %al, %eax ret .p2align 4,,10 .p2align 3 .L15: movl $-2147483648, %eax subl %esi, %eax cmpl %edi, %eax setg %al movzbl %al, %eax ret .cfi_endproc .LFE4: .size signed4_overflow, .-signed4_overflow .section .text.unlikely .LCOLDE4: .text .LHOTE4: .section .text.unlikely .LCOLDB5: .text .LHOTB5: .p2align 4,,15 .globl my_signed4_overflow .type my_signed4_overflow, @function my_signed4_overflow: .LFB5: .cfi_startproc leal (%rdi,%rsi), %eax movslq %edi, %rdi movslq %esi, %rsi cltq subq %rdi, %rax xorq %rsi, %rax shrq $63, %rax ret .cfi_endproc .LFE5: .size my_signed4_overflow, .-my_signed4_overflow .section .text.unlikely .LCOLDE5: .text .LHOTE5: .ident "GCC: (Ubuntu 5.4.0-6ubuntu1~16.04.12) 5.4.0 20160609" .section .note.GNU-stack,"",@progbits