https://gcc.gnu.org/bugzilla/show_bug.cgi?id=102974

--- Comment #11 from Mason <slash.tmp at free dot fr> ---
Here's umul_least_64() rewritten as mul_64x64_128() in C

typedef unsigned int u32;
typedef unsigned long long u64;

/* u32 acc[3], a[1], b[1] */
static void mul_add_32x32(u32 *acc, const u32 *a, const u32 *b)
{
  u64 res = (u64)a[0] * b[0];
  u32 lo = res, hi = res >> 32;
  asm("add %[LO], %[D0]\n\t" "adc %[HI], %[D1]\n\t" "adc $0, %[D2]" :
  [D0] "+m" (acc[0]), [D1] "+m" (acc[1]), [D2] "+m" (acc[2]) :
  [LO] "r" (lo), [HI] "r" (hi) : "cc");
}

/* u32 acc[5], a[2], b[2] */
void mul_64x64_128(u32 *acc, const u32 *a, const u32 *b)
{
  mul_add_32x32(acc+0, a+0, b+0);
  mul_add_32x32(acc+1, a+0, b+1);
  mul_add_32x32(acc+1, a+1, b+0);
  mul_add_32x32(acc+2, a+1, b+1);
}

gcc-trunk -O3 -m32

mul_64x64_128:
  pushl %esi
  pushl %ebx
  movl 16(%esp), %ebx   ; ebx = a
  movl 20(%esp), %esi   ; esi = b
  movl 12(%esp), %ecx   ; ecx = acc
  movl (%esi), %eax     ; b0
  mull (%ebx)           ; a0*b0
  add %eax, (%ecx)
  adc %edx, 4(%ecx)
  adc $0, 8(%ecx)
  movl 4(%esi), %eax    ; b1
  mull (%ebx)           ; a0*b1
  add %eax, 4(%ecx)
  adc %edx, 8(%ecx)
  adc $0, 12(%ecx)
  movl (%esi), %eax     ; b0
  mull 4(%ebx)          ; a1*b0
  add %eax, 4(%ecx)
  adc %edx, 8(%ecx)
  adc $0, 12(%ecx)
  movl 4(%esi), %eax    ; b1
  mull 4(%ebx)          ; a1*b1
  add %eax, 8(%ecx)
  adc %edx, 12(%ecx)
  adc $0, 16(%ecx)
  popl %ebx
  popl %esi
  ret

Reply via email to