Richard Sandiford <rdsandif...@googlemail.com> writes: > This patch handles multiplications using a single HWIxHWI->2HWI multiplication > on hosts that have one. This removes all uses of the slow (half-HWI) path > for insn-recog.ii. The slow path is still used 58 times for cp/parser.ii > and 168 times for fold-const.ii, but at that kind of level it shouldn't > matter much. > > I followed Joseph's suggestion and reused longlong.h. I copied it from > libgcc rather than glibc since it seemed better for GCC to have a single > version across both gcc/ and libgcc/. I can put it in include/ if that > seems better.
I've committed the patch to move longlong.h to trunk and merged back to the branch, so all that's left is the wide-int.cc patch. OK to install? Thanks, Richard Index: gcc/wide-int.cc =================================================================== --- gcc/wide-int.cc 2013-12-03 23:59:08.133658567 +0000 +++ gcc/wide-int.cc 2013-12-04 12:55:28.466895358 +0000 @@ -27,6 +27,16 @@ along with GCC; see the file COPYING3. #include "tree.h" #include "dumpfile.h" +#if GCC_VERSION >= 3000 +#define W_TYPE_SIZE HOST_BITS_PER_WIDE_INT +typedef unsigned HOST_HALF_WIDE_INT UHWtype; +typedef unsigned HOST_WIDE_INT UWtype; +typedef unsigned int UQItype __attribute__ ((mode (QI))); +typedef unsigned int USItype __attribute__ ((mode (SI))); +typedef unsigned int UDItype __attribute__ ((mode (DI))); +#include "longlong.h" +#endif + /* This is the maximal size of the buffer needed for dump. */ const unsigned int MAX_SIZE = (4 * (MAX_BITSIZE_MODE_ANY_INT / 4 + (MAX_BITSIZE_MODE_ANY_INT @@ -1255,8 +1265,8 @@ wi_pack (unsigned HOST_WIDE_INT *result, record in *OVERFLOW whether the result overflowed. SGN controls the signedness and is used to check overflow or if HIGH is set. */ unsigned int -wi::mul_internal (HOST_WIDE_INT *val, const HOST_WIDE_INT *op1, - unsigned int op1len, const HOST_WIDE_INT *op2, +wi::mul_internal (HOST_WIDE_INT *val, const HOST_WIDE_INT *op1val, + unsigned int op1len, const HOST_WIDE_INT *op2val, unsigned int op2len, unsigned int prec, signop sgn, bool *overflow, bool high) { @@ -1285,24 +1295,53 @@ wi::mul_internal (HOST_WIDE_INT *val, co if (needs_overflow) *overflow = false; + wide_int_ref op1 = wi::storage_ref (op1val, op1len, prec); + wide_int_ref op2 = wi::storage_ref (op2val, op2len, prec); + /* This is a surprisingly common case, so do it first. */ - if ((op1len == 1 && op1[0] == 0) || (op2len == 1 && op2[0] == 0)) + if (op1 == 0 || op2 == 0) { val[0] = 0; return 1; } +#ifdef umul_ppmm + if (sgn == UNSIGNED) + { + /* If the inputs are single HWIs and the output has room for at + least two HWIs, we can use umul_ppmm directly. */ + if (prec >= HOST_BITS_PER_WIDE_INT * 2 + && wi::fits_uhwi_p (op1) + && wi::fits_uhwi_p (op2)) + { + umul_ppmm (val[1], val[0], op1.ulow (), op2.ulow ()); + return 1 + (val[1] != 0 || val[0] < 0); + } + /* Likewise if the output is a full single HWI, except that the + upper HWI of the result is only used for determining overflow. + (We handle this case inline when overflow isn't needed.) */ + else if (prec == HOST_BITS_PER_WIDE_INT) + { + unsigned HOST_WIDE_INT upper; + umul_ppmm (upper, val[0], op1.ulow (), op2.ulow ()); + if (needs_overflow) + *overflow = (upper != 0); + return 1; + } + } +#endif + /* Handle multiplications by 1. */ - if (op1len == 1 && op1[0] == 1) + if (op1 == 1) { for (i = 0; i < op2len; i++) - val[i] = op2[i]; + val[i] = op2val[i]; return op2len; } - if (op2len == 1 && op2[0] == 1) + if (op2 == 1) { for (i = 0; i < op1len; i++) - val[i] = op1[i]; + val[i] = op1val[i]; return op1len; } @@ -1316,13 +1355,13 @@ wi::mul_internal (HOST_WIDE_INT *val, co if (sgn == SIGNED) { - o0 = sext_hwi (op1[0], prec); - o1 = sext_hwi (op2[0], prec); + o0 = op1.to_shwi (); + o1 = op2.to_shwi (); } else { - o0 = zext_hwi (op1[0], prec); - o1 = zext_hwi (op2[0], prec); + o0 = op1.to_uhwi (); + o1 = op2.to_uhwi (); } r = o0 * o1; @@ -1344,9 +1383,9 @@ wi::mul_internal (HOST_WIDE_INT *val, co } /* We do unsigned mul and then correct it. */ - wi_unpack (u, (const unsigned HOST_WIDE_INT*)op1, op1len, + wi_unpack (u, (const unsigned HOST_WIDE_INT *) op1val, op1len, half_blocks_needed, prec, SIGNED); - wi_unpack (v, (const unsigned HOST_WIDE_INT*)op2, op2len, + wi_unpack (v, (const unsigned HOST_WIDE_INT *) op2val, op2len, half_blocks_needed, prec, SIGNED); /* The 2 is for a full mult. */ @@ -1371,7 +1410,7 @@ wi::mul_internal (HOST_WIDE_INT *val, co if (sgn == SIGNED && (high || needs_overflow)) { unsigned HOST_WIDE_INT b; - if (op1[op1len-1] < 0) + if (wi::neg_p (op1)) { b = 0; for (i = 0; i < half_blocks_needed; i++) @@ -1382,7 +1421,7 @@ wi::mul_internal (HOST_WIDE_INT *val, co b = t >> (HOST_BITS_PER_WIDE_INT - 1); } } - if (op2[op2len-1] < 0) + if (wi::neg_p (op2)) { b = 0; for (i = 0; i < half_blocks_needed; i++)