https://gcc.gnu.org/g:44e065a52fa6069d6c8cacebc8f876840d278dd0

commit r15-2218-g44e065a52fa6069d6c8cacebc8f876840d278dd0
Author: Richard Biener <rguent...@suse.de>
Date:   Fri Jul 19 16:23:51 2024 +0200

    [v2] rtl-optimization/116002 - cselib hash is bad
    
    The following addresses the bad hash function of cselib which uses
    integer plus for merging.  This causes a huge number of collisions
    for the testcase in the PR and thus very large compile-time.
    
    The following rewrites it to use inchash, eliding duplicate mixing
    of RTX code and mode in some cases and more consistently avoiding
    a return value of zero as well as treating zero as fatal.  An
    important part is to preserve mixing of hashes of commutative
    operators as commutative.
    
    For cselib_hash_plus_const_int this removes the apparent attempt
    of making sure to hash the same as a PLUS as cselib_hash_rtx makes
    sure to dispatch to cselib_hash_plus_const_int consistently.
    
    This reduces compile-time for the testcase in the PR from unknown
    to 22s and for a reduced testcase from 73s to 9s.  There's another
    pending patchset to improve the speed of inchash mixing, but it's
    not in the profile for this testcase (PTA pops up now).
    
    The generated code is equal.  I've also compared cc1 builds
    with and without the patch and they are now commparing equal
    after retaining commutative hashing for commutative operators.
    
            PR rtl-optimization/116002
            * cselib.cc (cselib_hash_rtx): Use inchash to get proper mixing.
            Consistently avoid a zero return value when hashing successfully.
            Consistently treat a zero hash value from recursing as fatal.
            Use hashval_t where appropriate.
            (cselib_hash_plus_const_int): Likewise.
            (new_cselib_val): Use hashval_t.
            (cselib_lookup_1): Likewise.

Diff:
---
 gcc/cselib.cc | 224 ++++++++++++++++++++++++++++++++--------------------------
 1 file changed, 122 insertions(+), 102 deletions(-)

diff --git a/gcc/cselib.cc b/gcc/cselib.cc
index cbaab7d515cc..7beaca424244 100644
--- a/gcc/cselib.cc
+++ b/gcc/cselib.cc
@@ -51,7 +51,7 @@ static void unchain_one_value (cselib_val *);
 static void unchain_one_elt_list (struct elt_list **);
 static void unchain_one_elt_loc_list (struct elt_loc_list **);
 static void remove_useless_values (void);
-static unsigned int cselib_hash_rtx (rtx, int, machine_mode);
+static hashval_t cselib_hash_rtx (rtx, int, machine_mode);
 static cselib_val *new_cselib_val (unsigned int, machine_mode, rtx);
 static void add_mem_for_addr (cselib_val *, cselib_val *, rtx);
 static cselib_val *cselib_lookup_mem (rtx, int);
@@ -1244,7 +1244,7 @@ cselib_redundant_set_p (rtx set)
 /* Helper function for cselib_hash_rtx.  Arguments like for cselib_hash_rtx,
    except that it hashes (plus:P x c).  */
 
-static unsigned int
+static hashval_t
 cselib_hash_plus_const_int (rtx x, HOST_WIDE_INT c, int create,
                            machine_mode memmode)
 {
@@ -1266,14 +1266,13 @@ cselib_hash_plus_const_int (rtx x, HOST_WIDE_INT c, int 
create,
   if (c == 0)
     return e->hash;
 
-  unsigned hash = (unsigned) PLUS + (unsigned) GET_MODE (x);
-  hash += e->hash;
-  unsigned int tem_hash = (unsigned) CONST_INT + (unsigned) VOIDmode;
-  tem_hash += ((unsigned) CONST_INT << 7) + (unsigned HOST_WIDE_INT) c;
-  if (tem_hash == 0)
-    tem_hash = (unsigned int) CONST_INT;
-  hash += tem_hash;
-  return hash ? hash : 1 + (unsigned int) PLUS;
+  inchash::hash hash;
+  hash.add_int (PLUS);
+  hash.add_int (GET_MODE (x));
+  hash.merge_hash (e->hash);
+  hash.add_hwi (c);
+
+  return hash.end () ? hash.end () : 1 + (unsigned int) PLUS;
 }
 
 /* Hash an rtx.  Return 0 if we couldn't hash the rtx.
@@ -1298,7 +1297,7 @@ cselib_hash_plus_const_int (rtx x, HOST_WIDE_INT c, int 
create,
    If the mode is important in any context, it must be checked specifically
    in a comparison anyway, since relying on hash differences is unsafe.  */
 
-static unsigned int
+static hashval_t
 cselib_hash_rtx (rtx x, int create, machine_mode memmode)
 {
   cselib_val *e;
@@ -1306,10 +1305,11 @@ cselib_hash_rtx (rtx x, int create, machine_mode 
memmode)
   int i, j;
   enum rtx_code code;
   const char *fmt;
-  unsigned int hash = 0;
+  inchash::hash hash;
 
   code = GET_CODE (x);
-  hash += (unsigned) code + (unsigned) GET_MODE (x);
+  hash.add_int (code);
+  hash.add_int (GET_MODE (x));
 
   switch (code)
     {
@@ -1326,19 +1326,16 @@ cselib_hash_rtx (rtx x, int create, machine_mode 
memmode)
       return e->hash;
 
     case DEBUG_EXPR:
-      hash += ((unsigned) DEBUG_EXPR << 7)
-             + DEBUG_TEMP_UID (DEBUG_EXPR_TREE_DECL (x));
-      return hash ? hash : (unsigned int) DEBUG_EXPR;
+      hash.add_int (DEBUG_TEMP_UID (DEBUG_EXPR_TREE_DECL (x)));
+      return hash.end () ? hash.end() : (unsigned int) DEBUG_EXPR;
 
     case DEBUG_IMPLICIT_PTR:
-      hash += ((unsigned) DEBUG_IMPLICIT_PTR << 7)
-             + DECL_UID (DEBUG_IMPLICIT_PTR_DECL (x));
-      return hash ? hash : (unsigned int) DEBUG_IMPLICIT_PTR;
+      hash.add_int (DECL_UID (DEBUG_IMPLICIT_PTR_DECL (x)));
+      return hash.end () ? hash.end () : (unsigned int) DEBUG_IMPLICIT_PTR;
 
     case DEBUG_PARAMETER_REF:
-      hash += ((unsigned) DEBUG_PARAMETER_REF << 7)
-             + DECL_UID (DEBUG_PARAMETER_REF_DECL (x));
-      return hash ? hash : (unsigned int) DEBUG_PARAMETER_REF;
+      hash.add_int (DECL_UID (DEBUG_PARAMETER_REF_DECL (x)));
+      return hash.end () ? hash.end () : (unsigned int) DEBUG_PARAMETER_REF;
 
     case ENTRY_VALUE:
       /* ENTRY_VALUEs are function invariant, thus try to avoid
@@ -1347,51 +1344,49 @@ cselib_hash_rtx (rtx x, int create, machine_mode 
memmode)
         ENTRY_VALUE hash would depend on the current value
         in some register or memory.  */
       if (REG_P (ENTRY_VALUE_EXP (x)))
-       hash += (unsigned int) REG
-               + (unsigned int) GET_MODE (ENTRY_VALUE_EXP (x))
-               + (unsigned int) REGNO (ENTRY_VALUE_EXP (x));
+       hash.add_int ((unsigned int) REG
+                     + (unsigned int) GET_MODE (ENTRY_VALUE_EXP (x))
+                     + (unsigned int) REGNO (ENTRY_VALUE_EXP (x)));
       else if (MEM_P (ENTRY_VALUE_EXP (x))
               && REG_P (XEXP (ENTRY_VALUE_EXP (x), 0)))
-       hash += (unsigned int) MEM
-               + (unsigned int) GET_MODE (XEXP (ENTRY_VALUE_EXP (x), 0))
-               + (unsigned int) REGNO (XEXP (ENTRY_VALUE_EXP (x), 0));
+       hash.add_int ((unsigned int) MEM
+                     + (unsigned int) GET_MODE (XEXP (ENTRY_VALUE_EXP (x), 0))
+                     + (unsigned int) REGNO (XEXP (ENTRY_VALUE_EXP (x), 0)));
       else
-       hash += cselib_hash_rtx (ENTRY_VALUE_EXP (x), create, memmode);
-      return hash ? hash : (unsigned int) ENTRY_VALUE;
+       hash.add_int (cselib_hash_rtx (ENTRY_VALUE_EXP (x), create, memmode));
+      return hash.end () ? hash.end () : (unsigned int) ENTRY_VALUE;
 
     case CONST_INT:
-      hash += ((unsigned) CONST_INT << 7) + UINTVAL (x);
-      return hash ? hash : (unsigned int) CONST_INT;
+      hash.add_hwi (UINTVAL (x));
+      return hash.end () ? hash.end () : (unsigned int) CONST_INT;
 
     case CONST_WIDE_INT:
       for (i = 0; i < CONST_WIDE_INT_NUNITS (x); i++)
-       hash += CONST_WIDE_INT_ELT (x, i);
-      return hash;
+       hash.add_hwi (CONST_WIDE_INT_ELT (x, i));
+      return hash.end () ? hash.end () : (unsigned int) CONST_WIDE_INT;
 
     case CONST_POLY_INT:
       {
-       inchash::hash h;
-       h.add_int (hash);
        for (unsigned int i = 0; i < NUM_POLY_INT_COEFFS; ++i)
-         h.add_wide_int (CONST_POLY_INT_COEFFS (x)[i]);
-       return h.end ();
+         hash.add_wide_int (CONST_POLY_INT_COEFFS (x)[i]);
+       return hash.end () ? hash.end () : (unsigned int) CONST_POLY_INT;
       }
 
     case CONST_DOUBLE:
       /* This is like the general case, except that it only counts
         the integers representing the constant.  */
-      hash += (unsigned) code + (unsigned) GET_MODE (x);
       if (TARGET_SUPPORTS_WIDE_INT == 0 && GET_MODE (x) == VOIDmode)
-       hash += ((unsigned) CONST_DOUBLE_LOW (x)
-                + (unsigned) CONST_DOUBLE_HIGH (x));
+       {
+         hash.add_hwi (CONST_DOUBLE_LOW (x));
+         hash.add_hwi (CONST_DOUBLE_HIGH (x));
+       }
       else
-       hash += real_hash (CONST_DOUBLE_REAL_VALUE (x));
-      return hash ? hash : (unsigned int) CONST_DOUBLE;
+       hash.merge_hash (real_hash (CONST_DOUBLE_REAL_VALUE (x)));
+      return hash.end () ? hash.end () : (unsigned int) CONST_DOUBLE;
 
     case CONST_FIXED:
-      hash += (unsigned int) code + (unsigned int) GET_MODE (x);
-      hash += fixed_hash (CONST_FIXED_VALUE (x));
-      return hash ? hash : (unsigned int) CONST_FIXED;
+      hash.merge_hash (fixed_hash (CONST_FIXED_VALUE (x)));
+      return hash.end () ? hash.end () : (unsigned int) CONST_FIXED;
 
     case CONST_VECTOR:
       {
@@ -1403,19 +1398,18 @@ cselib_hash_rtx (rtx x, int create, machine_mode 
memmode)
        for (i = 0; i < units; ++i)
          {
            elt = CONST_VECTOR_ENCODED_ELT (x, i);
-           hash += cselib_hash_rtx (elt, 0, memmode);
+           hash.merge_hash (cselib_hash_rtx (elt, 0, memmode));
          }
 
-       return hash;
+       return hash.end () ? hash.end () : (unsigned int) CONST_VECTOR;
       }
 
       /* Assume there is only one rtx object for any given label.  */
     case LABEL_REF:
       /* We don't hash on the address of the CODE_LABEL to avoid bootstrap
         differences and differences between each stage's debugging dumps.  */
-      hash += (((unsigned int) LABEL_REF << 7)
-              + CODE_LABEL_NUMBER (label_ref_label (x)));
-      return hash ? hash : (unsigned int) LABEL_REF;
+      hash.add_int (CODE_LABEL_NUMBER (label_ref_label (x)));
+      return hash.end () ? hash.end () : (unsigned int) LABEL_REF;
 
     case SYMBOL_REF:
       {
@@ -1424,51 +1418,69 @@ cselib_hash_rtx (rtx x, int create, machine_mode 
memmode)
           different orders and thus different registers to be used in the
           final assembler.  This also avoids differences in the dump files
           between various stages.  */
-       unsigned int h = 0;
-       const unsigned char *p = (const unsigned char *) XSTR (x, 0);
+       const char *p = (const char *) XSTR (x, 0);
 
-       while (*p)
-         h += (h << 7) + *p++; /* ??? revisit */
+       if (*p)
+         hash.add (p, strlen (p));
 
-       hash += ((unsigned int) SYMBOL_REF << 7) + h;
-       return hash ? hash : (unsigned int) SYMBOL_REF;
+       return hash.end () ? hash.end () : (unsigned int) SYMBOL_REF;
       }
 
     case PRE_DEC:
     case PRE_INC:
-      /* We can't compute these without knowing the MEM mode.  */
-      gcc_assert (memmode != VOIDmode);
-      offset = GET_MODE_SIZE (memmode);
-      if (code == PRE_DEC)
-       offset = -offset;
-      /* Adjust the hash so that (mem:MEMMODE (pre_* (reg))) hashes
-        like (mem:MEMMODE (plus (reg) (const_int I))).  */
-      if (GET_MODE (x) == Pmode
-         && (REG_P (XEXP (x, 0))
-             || MEM_P (XEXP (x, 0))
-             || GET_CODE (XEXP (x, 0)) == VALUE))
-       {
-         HOST_WIDE_INT c;
-         if (offset.is_constant (&c))
-           return cselib_hash_plus_const_int (XEXP (x, 0),
-                                              trunc_int_for_mode (c, Pmode),
-                                              create, memmode);
-       }
-      hash = ((unsigned) PLUS + (unsigned) GET_MODE (x)
-             + cselib_hash_rtx (XEXP (x, 0), create, memmode)
-             + cselib_hash_rtx (gen_int_mode (offset, GET_MODE (x)),
-                                create, memmode));
-      return hash ? hash : 1 + (unsigned) PLUS;
+      {
+       /* We can't compute these without knowing the MEM mode.  */
+       gcc_assert (memmode != VOIDmode);
+       offset = GET_MODE_SIZE (memmode);
+       if (code == PRE_DEC)
+         offset = -offset;
+       /* Adjust the hash so that (mem:MEMMODE (pre_* (reg))) hashes
+          like (mem:MEMMODE (plus (reg) (const_int I))).  */
+       if (GET_MODE (x) == Pmode
+           && (REG_P (XEXP (x, 0))
+               || MEM_P (XEXP (x, 0))
+               || GET_CODE (XEXP (x, 0)) == VALUE))
+         {
+           HOST_WIDE_INT c;
+           if (offset.is_constant (&c))
+             return cselib_hash_plus_const_int (XEXP (x, 0),
+                                                trunc_int_for_mode (c, Pmode),
+                                                create, memmode);
+         }
+
+       hashval_t tem_hash = cselib_hash_rtx (XEXP (x, 0), create, memmode);
+       if (tem_hash == 0)
+         return 0;
+       hash.merge_hash (tem_hash);
+       tem_hash = cselib_hash_rtx (gen_int_mode (offset, GET_MODE (x)),
+                                   create, memmode);
+       if (tem_hash == 0)
+         return 0;
+       hash.merge_hash (tem_hash);
+       return hash.end () ? hash.end () : 1 + (unsigned) PLUS;
+      }
 
     case PRE_MODIFY:
-      gcc_assert (memmode != VOIDmode);
-      return cselib_hash_rtx (XEXP (x, 1), create, memmode);
+      {
+       gcc_assert (memmode != VOIDmode);
+       hashval_t tem_hash = cselib_hash_rtx (XEXP (x, 1), create, memmode);
+       if (tem_hash == 0)
+         return 0;
+       hash.merge_hash (tem_hash);
+       return hash.end () ? hash.end () : 1 + (unsigned) PRE_MODIFY;
+      }
 
     case POST_DEC:
     case POST_INC:
     case POST_MODIFY:
-      gcc_assert (memmode != VOIDmode);
-      return cselib_hash_rtx (XEXP (x, 0), create, memmode);
+      {
+       gcc_assert (memmode != VOIDmode);
+       hashval_t tem_hash = cselib_hash_rtx (XEXP (x, 0), create, memmode);
+       if (tem_hash == 0)
+         return 0;
+       hash.merge_hash (tem_hash);
+       return hash.end () ? hash.end () : 1 + (unsigned) code;
+      }
 
     case PC:
     case CALL:
@@ -1497,6 +1509,20 @@ cselib_hash_rtx (rtx x, int create, machine_mode memmode)
 
   i = GET_RTX_LENGTH (code) - 1;
   fmt = GET_RTX_FORMAT (code);
+
+  if (COMMUTATIVE_P (x))
+    {
+      gcc_assert (i == 1 && fmt[0] == 'e' && fmt[1] == 'e');
+      hashval_t tem1_hash = cselib_hash_rtx (XEXP (x, 1), create, memmode);
+      if (tem1_hash == 0)
+       return 0;
+      hashval_t tem0_hash = cselib_hash_rtx (XEXP (x, 0), create, memmode);
+      if (tem0_hash == 0)
+       return 0;
+      hash.add_commutative (tem0_hash, tem1_hash);
+      return hash.end () ? hash.end () : 1 + (unsigned int) GET_CODE (x);
+    }
+
   for (; i >= 0; i--)
     {
       switch (fmt[i])
@@ -1504,43 +1530,38 @@ cselib_hash_rtx (rtx x, int create, machine_mode 
memmode)
        case 'e':
          {
            rtx tem = XEXP (x, i);
-           unsigned int tem_hash = cselib_hash_rtx (tem, create, memmode);
-
+           hashval_t tem_hash = cselib_hash_rtx (tem, create, memmode);
            if (tem_hash == 0)
              return 0;
-
-           hash += tem_hash;
+           hash.merge_hash (tem_hash);
          }
          break;
        case 'E':
          for (j = 0; j < XVECLEN (x, i); j++)
            {
-             unsigned int tem_hash
+             hashval_t tem_hash
                = cselib_hash_rtx (XVECEXP (x, i, j), create, memmode);
-
              if (tem_hash == 0)
                return 0;
-
-             hash += tem_hash;
+             hash.merge_hash (tem_hash);
            }
          break;
 
        case 's':
          {
-           const unsigned char *p = (const unsigned char *) XSTR (x, i);
+           const char *p = (const char *) XSTR (x, i);
 
-           if (p)
-             while (*p)
-               hash += *p++;
+           if (p && *p)
+             hash.add (p, strlen (p));
            break;
          }
 
        case 'i':
-         hash += XINT (x, i);
+         hash.add_hwi (XINT (x, i));
          break;
 
        case 'p':
-         hash += constant_lower_bound (SUBREG_BYTE (x));
+         hash.add_int (constant_lower_bound (SUBREG_BYTE (x)));
          break;
 
        case '0':
@@ -1553,14 +1574,14 @@ cselib_hash_rtx (rtx x, int create, machine_mode 
memmode)
        }
     }
 
-  return hash ? hash : 1 + (unsigned int) GET_CODE (x);
+  return hash.end () ? hash.end () : 1 + (unsigned int) GET_CODE (x);
 }
 
 /* Create a new value structure for VALUE and initialize it.  The mode of the
    value is MODE.  */
 
 static inline cselib_val *
-new_cselib_val (unsigned int hash, machine_mode mode, rtx x)
+new_cselib_val (hashval_t hash, machine_mode mode, rtx x)
 {
   cselib_val *e = cselib_val_pool.allocate ();
 
@@ -2293,7 +2314,6 @@ cselib_lookup_1 (rtx x, machine_mode mode,
 {
   cselib_val **slot;
   cselib_val *e;
-  unsigned int hashval;
 
   if (GET_MODE (x) != VOIDmode)
     mode = GET_MODE (x);
@@ -2387,7 +2407,7 @@ cselib_lookup_1 (rtx x, machine_mode mode,
   if (MEM_P (x))
     return cselib_lookup_mem (x, create);
 
-  hashval = cselib_hash_rtx (x, create, memmode);
+  hashval_t hashval = cselib_hash_rtx (x, create, memmode);
   /* Can't even create if hashing is not possible.  */
   if (! hashval)
     return 0;

Reply via email to