Re: [22/32] miscelaneous c++ bits

Nathan Sidwell Fri, 13 Nov 2020 05:42:15 -0800

On 11/3/20 4:16 PM, Nathan Sidwell wrote:

This is probably the messiest diff.


Let's break this diff apart a bit more, for digestibility.

Here's the MODULE_VECTOR piece. This is a sparse array used for namelookup. A namespace symbol table entry may contain one of these, whichholds the bindings for each loaded module. If a module doesn't bindthat name, there'll be no slot for it. The current TU always uses slot0. The Global Module uses slot 1, and in a named-module partitionsshare slot 2. Slots 1 & 2 are used for duplicate declaration matching.

[These slots are managed entirely in name-lookup].

For data layout purposes, the vector is partitioned into clusters. Eachcluster holds two slots. A slot is the pointer to the bound value(often an OVERLOAD) or a lazy cookie, a slot index, and a slot span.The span is usually '1', except for namespaces which are module-spanningentities. It is '0' in the fixed slots, for simplicity. Both the indexand spans are 16-bit 'unsigned short', and the slot is a 32 or 64 bitpointer. Thus on a 64-bit host we can pack 2 indices, 2 spans and 2pointers without padding. (the slots are not adjacent to their index/span).

The slot itself can contain either a pointer or a load cookie. Theseare distinguished using bit 0 -- 1 for cookie, zero for pointer. Thuspointers have their natural representation (they are 4 or 8 bytealigned). The lazy cookie happens to be partitioned into two pieces. Apair of bits concerning pending template instantiations and memberdefinitions, with the remaining bits (29 or 61) being a section numberin that module's CMI. When we do name lookup we see if there's a lazycookie in a slot of interest, and if so load that section beforeproceeding. (the same cookie may be present in several slots [indifferent bindings], loading will populate all those slots.) There's apreceding check to determine whether the contents of that slot arevisible to us (the containing module has been exported to us). theresult is that importing a module is a cheap operation with an amortizedload cost depending what you look at. (there's a flag to turn lazyloading off)

As you can guess, there's a limit of 16383 imported modules (there is anindependent limit of 2^31 imported entities)

Indices are always in increasing order, and by construction we only needto append to the array.


nathan


--
Nathan Sidwell

diff --git c/gcc/cp/cp-tree.h w/gcc/cp/cp-tree.h
index 63724c0e84f..4752ddef898 100644
--- c/gcc/cp/cp-tree.h
+++ w/gcc/cp/cp-tree.h
@@ -929,6 +959,84 @@ struct named_decl_hash : ggc_remove <tree> {
   static void mark_deleted (value_type) { gcc_unreachable (); }
 };
 
+/* Bindings for modules are held in a sparse array.  There is always a
+   current TU slot, others are allocated as needed.  By construction
+   of the importing mechanism we only ever need to append to the
+   array.  Rather than have straight index/slot tuples, we bunch them
+   up for greater packing.
+
+   The cluster representation packs well on a 64-bit system.  */
+
+#define MODULE_VECTOR_SLOTS_PER_CLUSTER 2
+struct mc_index {
+  unsigned short base;
+  unsigned short span;
+};
+
+struct GTY(()) module_cluster
+{
+  mc_index GTY((skip)) indices[MODULE_VECTOR_SLOTS_PER_CLUSTER];
+  mc_slot slots[MODULE_VECTOR_SLOTS_PER_CLUSTER];
+};
+
+/* These two fields overlay lang flags.  So don't use those.  */
+#define MODULE_VECTOR_ALLOC_CLUSTERS(NODE) \
+  (MODULE_VECTOR_CHECK (NODE)->base.u.dependence_info.clique)
+#define MODULE_VECTOR_NUM_CLUSTERS(NODE) \
+  (MODULE_VECTOR_CHECK (NODE)->base.u.dependence_info.base)
+#define MODULE_VECTOR_CLUSTER_BASE(NODE) \
+  (((tree_module_vec *)MODULE_VECTOR_CHECK (NODE))->vec)
+#define MODULE_VECTOR_CLUSTER_LAST(NODE) \
+  (&MODULE_VECTOR_CLUSTER (NODE, MODULE_VECTOR_NUM_CLUSTERS (NODE) - 1))
+#define MODULE_VECTOR_CLUSTER(NODE,IX) \
+  (((tree_module_vec *)MODULE_VECTOR_CHECK (NODE))->vec[IX])
+
+struct GTY(()) tree_module_vec {
+  struct tree_base base;
+  tree name;
+  module_cluster GTY((length ("%h.base.u.dependence_info.base"))) vec[1];
+};
+
+/* The name of a module vector.  */
+#define MODULE_VECTOR_NAME(NODE) \
+  (((tree_module_vec *)MODULE_VECTOR_CHECK (NODE))->name)
+
+/* tree_module_vec does uses  base.u.dependence_info.base field for
+   length.  It does not have lang_flag etc available!  */
+
+/* These two flags note if a module-vector contains deduplicated
+   bindings (i.e. multiple declarations in different imports).  */
+/* This binding contains duplicate references to a global module
+   entity.  */
+#define MODULE_VECTOR_GLOBAL_DUPS_P(NODE) \
+  (MODULE_VECTOR_CHECK (NODE)->base.static_flag)
+/* This binding contains duplicate references to a partioned module
+   entity.  */
+#define MODULE_VECTOR_PARTITION_DUPS_P(NODE) \
+  (MODULE_VECTOR_CHECK (NODE)->base.volatile_flag)
+
+/* These two flags indicate the provenence of the bindings on this
+   particular vector slot.  We can of course determine this from slot
+   number, but that's a relatively expensive lookup.  This avoids
+   that when iterating.  */
+/* This slot is part of the global module (a header unit).  */
+#define MODULE_BINDING_GLOBAL_P(NODE) \
+  (OVERLOAD_CHECK (NODE)->base.static_flag)
+/* This slot is part of the current module (a partition or primary).  */
+#define MODULE_BINDING_PARTITION_P(NODE)		\
+  (OVERLOAD_CHECK (NODE)->base.volatile_flag)
+
+/* There are specializations of a template keyed to this binding.  */
+#define MODULE_VECTOR_PENDING_SPECIALIZATIONS_P(NODE) \
+  (MODULE_VECTOR_CHECK (NODE)->base.public_flag)
+/* The key is in a header unit (not a named module partition or
+   primary).  */
+#define MODULE_VECTOR_PENDING_IS_HEADER_P(NODE) \
+  (MODULE_VECTOR_CHECK (NODE)->base.protected_flag)
+/* The key is in a named module (primary or partition).  */
+#define MODULE_VECTOR_PENDING_IS_PARTITION_P(NODE) \
+  (MODULE_VECTOR_CHECK (NODE)->base.private_flag)
+
 /* Simplified unique_ptr clone to release a tree vec on exit.  */
 
 class releasing_vec
@@ -7349,6 +7657,7 @@ extern tree hash_tree_cons			(tree, tree, tree);
 extern tree hash_tree_chain			(tree, tree);
 extern tree build_qualified_name		(tree, tree, tree, bool);
 extern tree build_ref_qualified_type		(tree, cp_ref_qualifier);
+extern tree make_module_vec			(tree, unsigned clusters);
 inline tree ovl_first				(tree) ATTRIBUTE_PURE;
 extern tree ovl_make				(tree fn,
 						 tree next = NULL_TREE);
@@ -7972,14 +8295,16 @@ type_unknown_p (const_tree expr)
 inline hashval_t
 named_decl_hash::hash (const value_type decl)
 {
-  tree name = OVL_NAME (decl);
+  tree name = (TREE_CODE (decl) == MODULE_VECTOR
+	       ? MODULE_VECTOR_NAME (decl) : OVL_NAME (decl));
   return name ? IDENTIFIER_HASH_VALUE (name) : 0;
 }
 
 inline bool
 named_decl_hash::equal (const value_type existing, compare_type candidate)
 {
-  tree name = OVL_NAME (existing);
+  tree name = (TREE_CODE (existing) == MODULE_VECTOR
+	       ? MODULE_VECTOR_NAME (existing) : OVL_NAME (existing));
   return candidate == name;
 }
 
diff --git c/gcc/cp/tree.c w/gcc/cp/tree.c
index 28e591086b3..9979fd4f828 100644
--- c/gcc/cp/tree.c
+++ w/gcc/cp/tree.c
@@ -2218,6 +2226,23 @@ build_ref_qualified_type (tree type, cp_ref_qualifier rqual)
   return build_cp_fntype_variant (type, rqual, raises, late);
 }
 
+tree
+make_module_vec (tree name, unsigned clusters MEM_STAT_DECL)
+{
+  /* Stored in an unsigned short, but we're limited to the number of
+     modules anyway.  */
+  gcc_checking_assert (clusters <= (unsigned short)(~0));
+  size_t length = (clusters * sizeof (module_cluster)
+		   + sizeof (tree_module_vec) - sizeof (module_cluster));
+  tree vec = ggc_alloc_cleared_tree_node_stat (length PASS_MEM_STAT);
+  TREE_SET_CODE (vec, MODULE_VECTOR);
+  MODULE_VECTOR_NAME (vec) = name;
+  MODULE_VECTOR_ALLOC_CLUSTERS (vec) = clusters;
+  MODULE_VECTOR_NUM_CLUSTERS (vec) = 0;
+
+  return vec;
+}
+
 /* Make a raw overload node containing FN.  */
 
 tree

Re: [22/32] miscelaneous c++ bits

Reply via email to