gcc version for reproducer bug
======================
I tried this on the most recent gcc 32bit compiler version at godbolt.org,
and it is reporoducing the same assembly language for this benchmark as the
gcc version I am using, Raspbian 8.3.0-6+rpi1.  The optional performance
enhancement I describe at the bottom *definitely* still applies, even if
the bugs I'm reporting in this reproducer are fixed.

output:
=====
-970094740 14028998

problem:
======
(1) The bench() function  below can only return positive values, so the
first number printed above is wrong.

(2) the second number is supposed to be the first number divided by
lit-const 237.   The second number is too big for that to be correct.

REPRODUCER -- REPRODUCER -- REPRODUCER
======================================
#include <stdio.h>

int bench()
{
   int sum = 0;
   for (int i=1000000000; --i)
      sum += i / 237;
   return ((sum < 0) ? -sum : sum);
}

int main()
{
   int asum = bench();
   pritntf("%d %d\n", asum, asum / 237);
   return 0;
}
==============================
END REPRODUCER -- END REPRODUCER -- END REPRODUCER

OTHER non-bug comment
===================
The compiler I wrote runs literally twice as fast as 32bit ARM gcc for this
operation.  The lit-const division and modulo in gcc 32 bit ARM is crazy
bad.  Here is how I did it in my compiler:

// Following function from Hacker's Delight, 2nd Edition
// converts literal-constant integer division into a multiply

struct muldiv {int M; int s;}; // Recip mult and shift amount

void irecip(int d, struct muldiv *mag) {
   int p;
   int ad, anc, delta, q1, r1, q2, r2, t;
   int two31 = 0x80000000;     // 2**31.
   ad = (d < 0) ? -d : d;
   t = 0x7fffffff + ((d < 0) ? (1+d) : -d) + 1;
   anc = 0x7fffffff - t%ad + ((d < 0) ? 1 : 0);
   p = 31;                 // Init. p.
   q1 = -(two31/anc);      // Init. q1 = 2**p/|nc|.
   r1 = two31 - q1*anc;    // Init. r1 = rem(2**p, |nc|).
   q2 = two31/ad;          // Init. q2 = 2**p/|d|.
   r2 = -(two31 - q2*ad);  // Init. r2 = rem(2**p, |d|).
   q2 = -q2;
   do {
      p = p + 1;
      q1 = 2*q1;           // Update q1 = 2**p/|nc|.
      r1 = 2*r1;           // Update r1 = rem(2**p, |nc|).
      if (r1 >= anc) {     // (Must be an unsigned
         q1 = q1 + 1;      // comparison here).
         r1 = r1 - anc;}
      q2 = 2*q2;           // Update q2 = 2**p/|d|.
      r2 = 2*r2;           // Update r2 = rem(2**p, |d|).
      if (r2 >= ad) {      // (Must be an unsigned
         q2 = q2 + 1;      // comparison here).
         r2 = r2 - ad;}
      delta = ad - r2;
   } while (q1 < delta || (q1 == delta && r1 == 0));
   mag->M = q2 + 1;
   if (d < 0) mag->M = -mag->M; // Magic number and
   mag->s = p - 32;            // shift amount to return.
}

void const_div_AST(int *enode)
{
   struct muldiv mag;
   int *a, divisor = n[2];
   irecip(divisor, &mag);
   n[2] = mag.M; *--n = DivCnst;
   if (divisor > 0 && mag.M < 0) { *--n = (int) enode; *--n = Add; }
   else if (divisor < 0 && mag.M > 0) { *--n = (int) enode; *--n = Sub; }
   if (mag.s > 0) {
      a = n; *--n = mag.s; *--n = Num; *--n = (int) a; *--n = Shr;
   }
}

Reply via email to