> Eric, while looking at soft-fp code generated in glibc I noticed that > for v9 on 32-bit we end up doing software multiplies and divides :-/ > > I also noticed that the two-limb addition and subtraction could be > done using a branchless sequence on 64-bit. > > Any objections?
None on principle, but... > #if ((defined (__sparc__) && defined (__arch64__)) || defined (__sparcv9)) > \ && W_TYPE_SIZE == 64 > #define add_ssaaaa(sh, sl, ah, al, bh, bl) \ > - __asm__ ("addcc %r4,%5,%1\n\t" \ > - "add %r2,%3,%0\n\t" \ > - "bcs,a,pn %%xcc, 1f\n\t" \ > - "add %0, 1, %0\n" \ > - "1:" \ > + do { > \ > + UDItype __carry = 0; \ > + __asm__ ("addcc\t%r5,%6,%1\n\t" \ > + "add\t%r3,%4,%0\n\t" \ > + "movcs\t%%xcc, 1, %2\n\t" \ > + "add\t%0, %2, %0" > \ > > : "=r" ((UDItype)(sh)), \ > > - "=&r" ((UDItype)(sl)) \ > + "=&r" ((UDItype)(sl)), \ > + "=&r" (__carry) \ > > : "%rJ" ((UDItype)(ah)), \ > > "rI" ((UDItype)(bh)), \ > "%rJ" ((UDItype)(al)), \ > - "rI" ((UDItype)(bl)) \ > - __CLOBBER_CC) > + "rI" ((UDItype)(bl)), \ > + "2" (__carry) \ > + __CLOBBER_CC); \ > + } while (0) If __carry is used as both source and destination for %2, why not use a single operand with the + modifier? > -#define sub_ddmmss(sh, sl, ah, al, bh, bl) \ > - __asm__ ("subcc %r4,%5,%1\n\t" \ > - "sub %r2,%3,%0\n\t" \ > - "bcs,a,pn %%xcc, 1f\n\t" \ > - "sub %0, 1, %0\n\t" \ > - "1:" \ > +#define sub_ddmmss(sh, sl, ah, al, bh, bl) \ > + do { > \ > + UDItype __carry = 0; \ > + __asm__ ("subcc\t%r5,%6,%1\n\t" \ > + "sub\t%r3,%4,%0\n\t" \ > + "movcs\t%%xcc, 1, %2\n\t" \ > + "add\t%0, %2, %0" > \ > > : "=r" ((UDItype)(sh)), \ > > - "=&r" ((UDItype)(sl)) \ > - : "rJ" ((UDItype)(ah)), \ > + "=&r" ((UDItype)(sl)), \ > + "=&r" (__carry) \ > + : "%rJ" ((UDItype)(ah)), \ > "rI" ((UDItype)(bh)), \ > - "rJ" ((UDItype)(al)), \ > - "rI" ((UDItype)(bl)) \ > - __CLOBBER_CC) > + "%rJ" ((UDItype)(al)), \ > + "rI" ((UDItype)(bl)), \ > + "2" (__carry) \ > + __CLOBBER_CC); \ > + } while (0) Likewise. -- Eric Botcazou