> Eric, while looking at soft-fp code generated in glibc I noticed that
> for v9 on 32-bit we end up doing software multiplies and divides :-/
>
> I also noticed that the two-limb addition and subtraction could be
> done using a branchless sequence on 64-bit.
>
> Any objections?

None on principle, but...

>  #if ((defined (__sparc__) && defined (__arch64__)) || defined (__sparcv9))
> \ && W_TYPE_SIZE == 64
>  #define add_ssaaaa(sh, sl, ah, al, bh, bl)                           \
> -  __asm__ ("addcc %r4,%5,%1\n\t"                                     \
> -        "add %r2,%3,%0\n\t"                                          \
> -        "bcs,a,pn %%xcc, 1f\n\t"                                     \
> -        "add %0, 1, %0\n"                                            \
> -        "1:"                                                         \
> +  do {                                                                       
> \
> +    UDItype __carry = 0;                                             \
> +    __asm__ ("addcc\t%r5,%6,%1\n\t"                                  \
> +          "add\t%r3,%4,%0\n\t"                                       \
> +          "movcs\t%%xcc, 1, %2\n\t"                                  \
> +             "add\t%0, %2, %0"                                               
> \
>
>          : "=r" ((UDItype)(sh)),                                      \
>
> -          "=&r" ((UDItype)(sl))                                      \
> +          "=&r" ((UDItype)(sl)),                                     \
> +          "=&r" (__carry)                                            \
>
>          : "%rJ" ((UDItype)(ah)),                                     \
>
>            "rI" ((UDItype)(bh)),                                      \
>            "%rJ" ((UDItype)(al)),                                     \
> -          "rI" ((UDItype)(bl))                                       \
> -        __CLOBBER_CC)
> +          "rI" ((UDItype)(bl)),                                      \
> +          "2" (__carry)                                              \
> +        __CLOBBER_CC);                                               \
> +  } while (0)

If __carry is used as both source and destination for %2, why not use a single 
operand with the + modifier?

> -#define sub_ddmmss(sh, sl, ah, al, bh, bl)                           \
> -  __asm__ ("subcc %r4,%5,%1\n\t"                                     \
> -        "sub %r2,%3,%0\n\t"                                          \
> -        "bcs,a,pn %%xcc, 1f\n\t"                                     \
> -        "sub %0, 1, %0\n\t"                                          \
> -        "1:"                                                         \
> +#define sub_ddmmss(sh, sl, ah, al, bh, bl)                           \
> +  do {                                                                       
> \
> +    UDItype __carry = 0;                                             \
> +    __asm__ ("subcc\t%r5,%6,%1\n\t"                                  \
> +          "sub\t%r3,%4,%0\n\t"                                       \
> +          "movcs\t%%xcc, 1, %2\n\t"                                  \
> +             "add\t%0, %2, %0"                                               
> \
>
>          : "=r" ((UDItype)(sh)),                                      \
>
> -          "=&r" ((UDItype)(sl))                                      \
> -        : "rJ" ((UDItype)(ah)),                                      \
> +          "=&r" ((UDItype)(sl)),                                     \
> +          "=&r" (__carry)                                            \
> +        : "%rJ" ((UDItype)(ah)),                                     \
>            "rI" ((UDItype)(bh)),                                      \
> -          "rJ" ((UDItype)(al)),                                      \
> -          "rI" ((UDItype)(bl))                                       \
> -        __CLOBBER_CC)
> +          "%rJ" ((UDItype)(al)),                                     \
> +          "rI" ((UDItype)(bl)),                                      \
> +          "2" (__carry)                                              \
> +        __CLOBBER_CC);                                               \
> +  } while (0)

Likewise.

-- 
Eric Botcazou

Reply via email to