https://gcc.gnu.org/g:6efc770a71b7227cdbdc24c947ce1fef10794f4c

commit r15-3864-g6efc770a71b7227cdbdc24c947ce1fef10794f4c
Author: Richard Biener <rguent...@suse.de>
Date:   Wed Sep 25 13:15:42 2024 +0200

    Speed up wide_int_storage::operator=(wide_int_storage const&)
    
    wide_int_storage shows up high in the profile for the testcase in
    PR114855 where the apparent issue is that the conditional jump
    on 'precision' after the (inlined) memcpy stalls the pipeline due
    to the data dependence and required store-to-load forwarding.  We
    can add scheduling freedom by instead testing precision as from the
    source which speeds up the function by 30%.  I've applied the
    same logic to the copy CTOR.
    
            * wide-int.h (wide_int_storage::wide_int_storage): Branch
            on source precision to avoid data dependence on memcpy
            destination.
            (wide_int_storage::operator=): Likewise.

Diff:
---
 gcc/wide-int.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/gcc/wide-int.h b/gcc/wide-int.h
index 64b8bf2040c1..777f017f5ae0 100644
--- a/gcc/wide-int.h
+++ b/gcc/wide-int.h
@@ -1196,7 +1196,7 @@ inline wide_int_storage::wide_int_storage (const T &x)
 inline wide_int_storage::wide_int_storage (const wide_int_storage &x)
 {
   memcpy (this, &x, sizeof (wide_int_storage));
-  if (UNLIKELY (precision > WIDE_INT_MAX_INL_PRECISION))
+  if (UNLIKELY (x.precision > WIDE_INT_MAX_INL_PRECISION))
     {
       u.valp = XNEWVEC (HOST_WIDE_INT, CEIL (precision, 
HOST_BITS_PER_WIDE_INT));
       memcpy (u.valp, x.u.valp, len * sizeof (HOST_WIDE_INT));
@@ -1219,9 +1219,9 @@ wide_int_storage::operator = (const wide_int_storage &x)
       XDELETEVEC (u.valp);
     }
   memcpy (this, &x, sizeof (wide_int_storage));
-  if (UNLIKELY (precision > WIDE_INT_MAX_INL_PRECISION))
+  if (UNLIKELY (x.precision > WIDE_INT_MAX_INL_PRECISION))
     {
-      u.valp = XNEWVEC (HOST_WIDE_INT, CEIL (precision, 
HOST_BITS_PER_WIDE_INT));
+      u.valp = XNEWVEC (HOST_WIDE_INT, CEIL (x.precision, 
HOST_BITS_PER_WIDE_INT));
       memcpy (u.valp, x.u.valp, len * sizeof (HOST_WIDE_INT));
     }
   return *this;

Reply via email to