Hi Paul,

When one needs a fast test whether an addition of two 'signed char' or 'short'
overflows, the macros in intprops.h yield a valid answer, but it is not so
well optimized. The functions my_signed1_overflow, my_signed2_overflow
in the attached file produce better machine code than the corresponding
functions signed1_overflow, signed2_overflow that use intprops.h primitives.

Similarly, on 64-bit platforms, my_signed4_overflow produces slightly better
machine code (no conditional branch) than signed4_overflow. On 32-bit platforms
it depends: on SPARC my_signed4_overflow is good as well, but not on i386
(because 64-bit computations on 32-bit CPUs needs many registers, and i386
has few registers).

Would it be possible to include some of these tricks into intprops.h?

Bruno
#include "intprops.h"

int signed1_overflow (signed char a, signed char b)
{
  //return INT_ADD_RANGE_OVERFLOW (a, b, (signed char) 0x80, (signed char) 0x7F);
  return _GL_ADD_OVERFLOW (a, b, (signed char) 0x80, (signed char) 0x7F);
}

int my_signed1_overflow (signed char a, signed char b)
{
  return (((int) (signed char) ((unsigned char) a + (unsigned char) b) - (int) a) ^ (int) b) < 0;
}

int signed2_overflow (short a, short b)
{
  //return INT_ADD_RANGE_OVERFLOW (a, b, (short) 0x8000, (short) 0x7FFF);
  return _GL_ADD_OVERFLOW (a, b, (short) 0x8000, (short) 0x7FFF);
}

int my_signed2_overflow (short a, short b)
{
  return (((int) (short) ((unsigned short) a + (unsigned short) b) - (int) a) ^ (int) b) < 0;
}

int signed4_overflow (int a, int b)
{
  //return INT_ADD_RANGE_OVERFLOW (a, b, (int) 0x80000000, (int) 0x7FFFFFFF);
  //return _GL_ADD_OVERFLOW (a, b, (int) 0x80000000, (int) 0x7FFFFFFF);
  return INT_ADD_OVERFLOW (a, b);
}

int my_signed4_overflow (int a, int b)
{
  return (((long long) (int) ((unsigned int) a + (unsigned int) b) - (long long) a) ^ (long long) b) < 0;
}

#ifdef TEST

#include <stdio.h>

int main ()
{
  int u, v;

  /* Verify that signed1_overflow and my_signed1_overflow agree.  */
  for (u = -0x8; u <= 0x7; u++)
    for (v = -0x8; v <= 0x7; v++)
      {
        int a = u << 4;
        int b = v << 4;
        int x = signed1_overflow (a, b);
        int y = my_signed1_overflow (a, b);
        if (x != y)
          printf ("signed1 mistake: a=%d b=%d x=%d y=%d\n", a, b, x, y);
      }

  /* Verify that signed2_overflow and my_signed2_overflow agree.  */
  for (u = -0x8; u <= 0x7; u++)
    for (v = -0x8; v <= 0x7; v++)
      {
        int a = u << 12;
        int b = v << 12;
        int x = signed2_overflow (a, b);
        int y = my_signed2_overflow (a, b);
        if (x != y)
          printf ("signed2 mistake: a=%d b=%d x=%d y=%d\n", a, b, x, y);
      }

  /* Verify that signed4_overflow and my_signed4_overflow agree.  */
  for (u = -0x8; u <= 0x7; u++)
    for (v = -0x8; v <= 0x7; v++)
      {
        int a = u << 28;
        int b = v << 28;
        int x = signed4_overflow (a, b);
        int y = my_signed4_overflow (a, b);
        if (x != y)
          printf ("signed4 mistake: a=%d b=%d x=%d y=%d\n", a, b, x, y);
      }

  return 0;
}

#endif
        .file   "foo.c"
        .section        .text.unlikely,"ax",@progbits
.LCOLDB0:
        .text
.LHOTB0:
        .p2align 4,,15
        .globl  signed1_overflow
        .type   signed1_overflow, @function
signed1_overflow:
.LFB0:
        .cfi_startproc
        testb   %sil, %sil
        js      .L5
        movsbl  %sil, %esi
        movl    $127, %eax
        movsbl  %dil, %edi
        subl    %esi, %eax
        cmpl    %edi, %eax
        setl    %al
        movzbl  %al, %eax
        ret
        .p2align 4,,10
        .p2align 3
.L5:
        movsbl  %sil, %esi
        movl    $-128, %eax
        movsbl  %dil, %edi
        subl    %esi, %eax
        cmpl    %eax, %edi
        setl    %al
        movzbl  %al, %eax
        ret
        .cfi_endproc
.LFE0:
        .size   signed1_overflow, .-signed1_overflow
        .section        .text.unlikely
.LCOLDE0:
        .text
.LHOTE0:
        .section        .text.unlikely
.LCOLDB1:
        .text
.LHOTB1:
        .p2align 4,,15
        .globl  my_signed1_overflow
        .type   my_signed1_overflow, @function
my_signed1_overflow:
.LFB1:
        .cfi_startproc
        leal    (%rsi,%rdi), %eax
        movsbl  %dil, %edi
        movsbl  %sil, %esi
        movsbl  %al, %eax
        subl    %edi, %eax
        xorl    %esi, %eax
        shrl    $31, %eax
        ret
        .cfi_endproc
.LFE1:
        .size   my_signed1_overflow, .-my_signed1_overflow
        .section        .text.unlikely
.LCOLDE1:
        .text
.LHOTE1:
        .section        .text.unlikely
.LCOLDB2:
        .text
.LHOTB2:
        .p2align 4,,15
        .globl  signed2_overflow
        .type   signed2_overflow, @function
signed2_overflow:
.LFB2:
        .cfi_startproc
        testw   %si, %si
        js      .L10
        movswl  %si, %esi
        movl    $32767, %eax
        movswl  %di, %edi
        subl    %esi, %eax
        cmpl    %edi, %eax
        setl    %al
        movzbl  %al, %eax
        ret
        .p2align 4,,10
        .p2align 3
.L10:
        movswl  %si, %esi
        movl    $-32768, %eax
        movswl  %di, %edi
        subl    %esi, %eax
        cmpl    %eax, %edi
        setl    %al
        movzbl  %al, %eax
        ret
        .cfi_endproc
.LFE2:
        .size   signed2_overflow, .-signed2_overflow
        .section        .text.unlikely
.LCOLDE2:
        .text
.LHOTE2:
        .section        .text.unlikely
.LCOLDB3:
        .text
.LHOTB3:
        .p2align 4,,15
        .globl  my_signed2_overflow
        .type   my_signed2_overflow, @function
my_signed2_overflow:
.LFB3:
        .cfi_startproc
        leal    (%rsi,%rdi), %eax
        movswl  %di, %edi
        movswl  %si, %esi
        cwtl
        subl    %edi, %eax
        xorl    %esi, %eax
        shrl    $31, %eax
        ret
        .cfi_endproc
.LFE3:
        .size   my_signed2_overflow, .-my_signed2_overflow
        .section        .text.unlikely
.LCOLDE3:
        .text
.LHOTE3:
        .section        .text.unlikely
.LCOLDB4:
        .text
.LHOTB4:
        .p2align 4,,15
        .globl  signed4_overflow
        .type   signed4_overflow, @function
signed4_overflow:
.LFB4:
        .cfi_startproc
        testl   %esi, %esi
        js      .L15
        movl    $2147483647, %eax
        subl    %esi, %eax
        cmpl    %eax, %edi
        setg    %al
        movzbl  %al, %eax
        ret
        .p2align 4,,10
        .p2align 3
.L15:
        movl    $-2147483648, %eax
        subl    %esi, %eax
        cmpl    %edi, %eax
        setg    %al
        movzbl  %al, %eax
        ret
        .cfi_endproc
.LFE4:
        .size   signed4_overflow, .-signed4_overflow
        .section        .text.unlikely
.LCOLDE4:
        .text
.LHOTE4:
        .section        .text.unlikely
.LCOLDB5:
        .text
.LHOTB5:
        .p2align 4,,15
        .globl  my_signed4_overflow
        .type   my_signed4_overflow, @function
my_signed4_overflow:
.LFB5:
        .cfi_startproc
        leal    (%rdi,%rsi), %eax
        movslq  %edi, %rdi
        movslq  %esi, %rsi
        cltq
        subq    %rdi, %rax
        xorq    %rsi, %rax
        shrq    $63, %rax
        ret
        .cfi_endproc
.LFE5:
        .size   my_signed4_overflow, .-my_signed4_overflow
        .section        .text.unlikely
.LCOLDE5:
        .text
.LHOTE5:
        .ident  "GCC: (Ubuntu 5.4.0-6ubuntu1~16.04.12) 5.4.0 20160609"
        .section        .note.GNU-stack,"",@progbits

Reply via email to