On Wed, Dec 4, 2013 at 12:56 PM, Richard Sandiford
<rdsandif...@googlemail.com> wrote:
> Richard Sandiford <rdsandif...@googlemail.com> writes:
>> This patch handles multiplications using a single HWIxHWI->2HWI 
>> multiplication
>> on hosts that have one.  This removes all uses of the slow (half-HWI) path
>> for insn-recog.ii.  The slow path is still used 58 times for cp/parser.ii
>> and 168 times for fold-const.ii, but at that kind of level it shouldn't
>> matter much.
>>
>> I followed Joseph's suggestion and reused longlong.h.  I copied it from
>> libgcc rather than glibc since it seemed better for GCC to have a single
>> version across both gcc/ and libgcc/.  I can put it in include/ if that
>> seems better.
>
> I've committed the patch to move longlong.h to trunk and merged back
> to the branch, so all that's left is the wide-int.cc patch.  OK to install?
>
> Thanks,
> Richard
>
>
> Index: gcc/wide-int.cc
> ===================================================================
> --- gcc/wide-int.cc     2013-12-03 23:59:08.133658567 +0000
> +++ gcc/wide-int.cc     2013-12-04 12:55:28.466895358 +0000
> @@ -27,6 +27,16 @@ along with GCC; see the file COPYING3.
>  #include "tree.h"
>  #include "dumpfile.h"
>
> +#if GCC_VERSION >= 3000
> +#define W_TYPE_SIZE HOST_BITS_PER_WIDE_INT
> +typedef unsigned HOST_HALF_WIDE_INT UHWtype;
> +typedef unsigned HOST_WIDE_INT UWtype;
> +typedef unsigned int UQItype __attribute__ ((mode (QI)));
> +typedef unsigned int USItype __attribute__ ((mode (SI)));
> +typedef unsigned int UDItype __attribute__ ((mode (DI)));
> +#include "longlong.h"

We also need something like the attached patch to handle architectures
which use UDWType in longlong.h. I noticed this when trying rth's
patch stack http://gcc.gnu.org/ml/gcc-patches/2014-01/msg00391.html to
improve longlong.h for AArch64.

It's needed when rth's patches go in finally for aarch64 but could
probably go in now - after all the comment in longlong.h says you need
to define UDWtype ... Don't mind either way.

regards
Ramana

<DATE>  Ramana Radhakrishnan  <ramana.radhakrish...@arm.com>

       * wide-int.cc (UTItype): Define.
       (UDWtype): Define for appropriate W_TYPE_SIZE.


> +#endif
> +
>  /* This is the maximal size of the buffer needed for dump.  */
>  const unsigned int MAX_SIZE = (4 * (MAX_BITSIZE_MODE_ANY_INT / 4
>                                     + (MAX_BITSIZE_MODE_ANY_INT
> @@ -1255,8 +1265,8 @@ wi_pack (unsigned HOST_WIDE_INT *result,
>     record in *OVERFLOW whether the result overflowed.  SGN controls
>     the signedness and is used to check overflow or if HIGH is set.  */
>  unsigned int
> -wi::mul_internal (HOST_WIDE_INT *val, const HOST_WIDE_INT *op1,
> -                 unsigned int op1len, const HOST_WIDE_INT *op2,
> +wi::mul_internal (HOST_WIDE_INT *val, const HOST_WIDE_INT *op1val,
> +                 unsigned int op1len, const HOST_WIDE_INT *op2val,
>                   unsigned int op2len, unsigned int prec, signop sgn,
>                   bool *overflow, bool high)
>  {
> @@ -1285,24 +1295,53 @@ wi::mul_internal (HOST_WIDE_INT *val, co
>    if (needs_overflow)
>      *overflow = false;
>
> +  wide_int_ref op1 = wi::storage_ref (op1val, op1len, prec);
> +  wide_int_ref op2 = wi::storage_ref (op2val, op2len, prec);
> +
>    /* This is a surprisingly common case, so do it first.  */
> -  if ((op1len == 1 && op1[0] == 0) || (op2len == 1 && op2[0] == 0))
> +  if (op1 == 0 || op2 == 0)
>      {
>        val[0] = 0;
>        return 1;
>      }
>
> +#ifdef umul_ppmm
> +  if (sgn == UNSIGNED)
> +    {
> +      /* If the inputs are single HWIs and the output has room for at
> +        least two HWIs, we can use umul_ppmm directly.  */
> +      if (prec >= HOST_BITS_PER_WIDE_INT * 2
> +         && wi::fits_uhwi_p (op1)
> +         && wi::fits_uhwi_p (op2))
> +       {
> +         umul_ppmm (val[1], val[0], op1.ulow (), op2.ulow ());
> +         return 1 + (val[1] != 0 || val[0] < 0);
> +       }
> +      /* Likewise if the output is a full single HWI, except that the
> +        upper HWI of the result is only used for determining overflow.
> +        (We handle this case inline when overflow isn't needed.)  */
> +      else if (prec == HOST_BITS_PER_WIDE_INT)
> +       {
> +         unsigned HOST_WIDE_INT upper;
> +         umul_ppmm (upper, val[0], op1.ulow (), op2.ulow ());
> +         if (needs_overflow)
> +           *overflow = (upper != 0);
> +         return 1;
> +       }
> +    }
> +#endif
> +
>    /* Handle multiplications by 1.  */
> -  if (op1len == 1 && op1[0] == 1)
> +  if (op1 == 1)
>      {
>        for (i = 0; i < op2len; i++)
> -       val[i] = op2[i];
> +       val[i] = op2val[i];
>        return op2len;
>      }
> -  if (op2len == 1 && op2[0] == 1)
> +  if (op2 == 1)
>      {
>        for (i = 0; i < op1len; i++)
> -       val[i] = op1[i];
> +       val[i] = op1val[i];
>        return op1len;
>      }
>
> @@ -1316,13 +1355,13 @@ wi::mul_internal (HOST_WIDE_INT *val, co
>
>        if (sgn == SIGNED)
>         {
> -         o0 = sext_hwi (op1[0], prec);
> -         o1 = sext_hwi (op2[0], prec);
> +         o0 = op1.to_shwi ();
> +         o1 = op2.to_shwi ();
>         }
>        else
>         {
> -         o0 = zext_hwi (op1[0], prec);
> -         o1 = zext_hwi (op2[0], prec);
> +         o0 = op1.to_uhwi ();
> +         o1 = op2.to_uhwi ();
>         }
>
>        r = o0 * o1;
> @@ -1344,9 +1383,9 @@ wi::mul_internal (HOST_WIDE_INT *val, co
>      }
>
>    /* We do unsigned mul and then correct it.  */
> -  wi_unpack (u, (const unsigned HOST_WIDE_INT*)op1, op1len,
> +  wi_unpack (u, (const unsigned HOST_WIDE_INT *) op1val, op1len,
>              half_blocks_needed, prec, SIGNED);
> -  wi_unpack (v, (const unsigned HOST_WIDE_INT*)op2, op2len,
> +  wi_unpack (v, (const unsigned HOST_WIDE_INT *) op2val, op2len,
>              half_blocks_needed, prec, SIGNED);
>
>    /* The 2 is for a full mult.  */
> @@ -1371,7 +1410,7 @@ wi::mul_internal (HOST_WIDE_INT *val, co
>    if (sgn == SIGNED && (high || needs_overflow))
>      {
>        unsigned HOST_WIDE_INT b;
> -      if (op1[op1len-1] < 0)
> +      if (wi::neg_p (op1))
>         {
>           b = 0;
>           for (i = 0; i < half_blocks_needed; i++)
> @@ -1382,7 +1421,7 @@ wi::mul_internal (HOST_WIDE_INT *val, co
>               b = t >> (HOST_BITS_PER_WIDE_INT - 1);
>             }
>         }
> -      if (op2[op2len-1] < 0)
> +      if (wi::neg_p (op2))
>         {
>           b = 0;
>           for (i = 0; i < half_blocks_needed; i++)
commit 62672d152281515c1d97ec1370698544cb2c81b6
Author: Ramana Radhakrishnan <ramana.radhakrish...@arm.com>
Date:   Thu May 8 09:56:59 2014 +0000

    Fix up UDWtype for wide-int merge.

diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index 354cdb9..8ef9a0f 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,8 @@
+2014-05-08  Ramana Radhakrishnan  <ramana.radhakrish...@arm.com>
+
+       * wide-int.cc (UTItype): Define.
+       (UDWtype): Define for appropriate W_TYPE_SIZE.
+
 2014-05-08  Alan Modra  <amo...@gmail.com>
 
        PR target/60737
diff --git a/gcc/wide-int.cc b/gcc/wide-int.cc
index 69a15bc..3552e03 100644
--- a/gcc/wide-int.cc
+++ b/gcc/wide-int.cc
@@ -34,6 +34,12 @@ typedef unsigned HOST_WIDE_INT UWtype;
 typedef unsigned int UQItype __attribute__ ((mode (QI)));
 typedef unsigned int USItype __attribute__ ((mode (SI)));
 typedef unsigned int UDItype __attribute__ ((mode (DI)));
+typedef unsigned int UTItype __attribute__ ((mode (TI)));
+#if W_TYPE_SIZE == 32
+# define UDWtype       UDItype
+#elif W_TYPE_SIZE == 64
+# define UDWtype       UTItype
+#endif
 #include "longlong.h"
 #endif
 

Reply via email to