On Thu, May 22, 2025 at 12:15:02PM +1000, Nathaniel Shead wrote:
> Another approach would be to fix 'write_class_def' to handle these
> declarations better, but that ended up being more work and felt fragile.
> It also meant streaming a lot more information that we don't need.
>
> Long term I had been playing around with reworking ubsan.cc entirely to
> have a fixed set of types it would use we that we could merge with, but
> given that there seems to be at least one other place we are creating
> ad-hoc types (the struct for constexpr new allocations), and I couldn't
> see an easy way of reworking that, I thought we should support this.
>
> Finally, I'm not 100% certain about the hard-coding MK_unique for fields
> of contextless types, but given that we've given up merging the owning
> TYPE_DECL with anything anyway I think it should be OK.
>
> -- >8 --
>
> Currently, most declarations must have a DECL_CONTEXT for modules
> streaming to behave correctly, so that they can have an appropriate
> merge key generated and be correctly deduplicated on import.
>
> There are a few exceptions, however, for internally generated
> declarations that will never be merged and don't necessarily have an
> appropriate parent to key off for the context. One case that's come up
> a few times is TYPE_DECLs, especially temporary RECORD_TYPEs used as
> intermediaries within expressions.
>
> Previously I've tried to give all such types a DECL_CONTEXT, but in some
> cases that has ended up being infeasible, such as with the types
> generated by UBSan (which are shared with the C frontend and don't know
> their context, especially when created at global scope). Additionally,
> these types often don't have many of the parts that a normal struct
> declaration created via parsing user code would have, which confuses
> module streaming.
>
> Given that these types are typically intended to be one-off and unique
> anyway, this patch instead adds support for by-value streaming of
> uncontexted TYPE_DECLs. The patch only support streaming the bare
> minimum amount of fields needed for the cases I've come across so far;
> in general the preference should still be to ensure that DECL_CONTEXT is
> set where possible.
>
> PR c++/98735
> PR c++/120040
>
> gcc/cp/ChangeLog:
>
> * module.cc (trees_out::tree_value): Write TYPE_DECLs.
> (trees_in::tree_value): Read TYPE_DECLs.
> (trees_out::tree_node): Support uncontexted TYPE_DECLs, and
> ensure that all parts of a by-value decl are marked for
> streaming.
> (trees_out::get_merge_kind): Treat members of uncontexted types
> as always unique.
>
> gcc/testsuite/ChangeLog:
>
> * g++.dg/modules/pr120040_a.C: New test.
> * g++.dg/modules/pr120040_b.C: New test.
>
> Signed-off-by: Nathaniel Shead <[email protected]>
> ---
> gcc/cp/module.cc | 112 ++++++++++++++++++++++++++++++++++++++++++++---
> 1 file changed, 107 insertions(+), 5 deletions(-)
>
> diff --git a/gcc/cp/module.cc b/gcc/cp/module.cc
> index 99cbfdbf01d..ddb5299b244 100644
> --- a/gcc/cp/module.cc
> +++ b/gcc/cp/module.cc
> @@ -9654,9 +9654,10 @@ trees_out::tree_value (tree t)
>
> if (DECL_P (t))
> /* No template, type, var or function, except anonymous
> - non-context vars. */
> + non-context vars and types. */
> gcc_checking_assert ((TREE_CODE (t) != TEMPLATE_DECL
> - && TREE_CODE (t) != TYPE_DECL
> + && (TREE_CODE (t) != TYPE_DECL
> + || (DECL_ARTIFICIAL (t) && !DECL_CONTEXT (t)))
> && (TREE_CODE (t) != VAR_DECL
> || (!DECL_NAME (t) && !DECL_CONTEXT (t)))
> && TREE_CODE (t) != FUNCTION_DECL));
> @@ -9670,7 +9671,7 @@ trees_out::tree_value (tree t)
> tree_node_bools (t);
> }
>
> - if (TREE_CODE (t) == TREE_BINFO)
> + if (TREE_CODE (t) == TREE_BINFO)
> /* Binfos are decl-like and need merging information. */
> binfo_mergeable (t);
>
> @@ -9679,8 +9680,51 @@ trees_out::tree_value (tree t)
> dump (dumper::TREE)
> && dump ("Writing tree:%d %C:%N", tag, TREE_CODE (t), t);
>
> + int type_tag = 0;
> + tree type = NULL_TREE;
> + if (TREE_CODE (t) == TYPE_DECL)
> + {
> + type = TREE_TYPE (t);
> +
> + /* We only support a limited set of features for uncontexted types;
> + these are typically types created in the language-independent
> + parts of the frontend (such as ubsan). */
> + gcc_checking_assert (RECORD_OR_UNION_TYPE_P (type)
> + && TYPE_MAIN_VARIANT (type) == type
> + && TYPE_NAME (type) == t
> + && TYPE_STUB_DECL (type) == t
> + && !TYPE_VFIELD (type)
> + && !TYPE_BINFO (type));
> + gcc_checking_assert (!TYPE_LANG_SPECIFIC (type)
> + || (!TYPE_CONTAINS_VPTR_P (type)
> + && !CLASSTYPE_MEMBER_VEC (type)));
> +
> + if (streaming_p ())
> + {
> + start (type);
> + tree_node_bools (type);
> + }
> +
> + type_tag = insert (type, WK_value);
> + if (streaming_p ())
> + dump (dumper::TREE)
> + && dump ("Writing type: %d %C:%N", type_tag,
> + TREE_CODE (type), type);
> + }
> +
> tree_node_vals (t);
>
> + if (type)
> + {
> + tree_node_vals (type);
> + tree_node (TYPE_SIZE (type));
> + tree_node (TYPE_SIZE_UNIT (type));
> + vec_chained_decls (TYPE_FIELDS (type));
Just noticed a mistake: this should be 'chained_decls (TYPE_FIELDS (type))'...
> + if (streaming_p ())
> + dump (dumper::TREE)
> + && dump ("Written type:%d %C:%N", type_tag, TREE_CODE (type), type);
> + }
> +
> if (streaming_p ())
> dump (dumper::TREE) && dump ("Written tree:%d %C:%N", tag, TREE_CODE
> (t), t);
> }
> @@ -9719,14 +9763,57 @@ trees_in::tree_value ()
> dump (dumper::TREE)
> && dump ("Reading tree:%d %C", tag, TREE_CODE (t));
>
> - if (!tree_node_vals (t))
> + int type_tag = 0;
> + tree type = NULL_TREE;
> + if (TREE_CODE (t) == TYPE_DECL)
> + {
> + type = start ();
> + if (!type || !tree_node_bools (type))
> + t = NULL_TREE;
> +
> + type_tag = insert (type);
> + if (t)
> + dump (dumper::TREE)
> + && dump ("Reading type:%d %C", type_tag, TREE_CODE (type));
> + }
> +
> + if (!t)
> {
> +bail:
> back_refs[~tag] = NULL_TREE;
> + if (type_tag)
> + back_refs[~type_tag] = NULL_TREE;
> set_overrun ();
> - /* Bail. */
> return NULL_TREE;
> }
>
> + if (!tree_node_vals (t))
> + goto bail;
> +
> + if (type)
> + {
> + if (!tree_node_vals (type))
> + goto bail;
> +
> + TYPE_SIZE (type) = tree_node ();
> + TYPE_SIZE_UNIT (type) = tree_node ();
> + vec<tree, va_heap> *fields = vec_chained_decls ();
> +
> + tree *chain = &TYPE_FIELDS (type);
> + for (tree decl : fields)
> + {
> + gcc_checking_assert (!*chain);
> + *chain = decl;
> + chain = &DECL_CHAIN (decl);
> + }
...and then this can just be 'TYPE_FIELDS (type) = chained_decls ();'
which will avoid the memory leak.
> +
> + if (get_overrun ())
> + goto bail;
> +
> + dump (dumper::TREE)
> + && dump ("Read type:%d %C:%N", type_tag, TREE_CODE (type), type);
> + }
> +
> if (TREE_CODE (t) == LAMBDA_EXPR
> && CLASSTYPE_LAMBDA_EXPR (TREE_TYPE (t)))
> {
> @@ -9980,7 +10067,17 @@ trees_out::tree_node (tree t)
> PARM_DECLS. It'd be nice if they had a
> distinguishing flag to double check. */
> break;
> +
> + case TYPE_DECL:
> + /* Some parts of the compiler need internal struct types;
> + these types may not have an appropriate context to use.
> + Walk the whole type (including its definition) by value. */
> + gcc_checking_assert (DECL_ARTIFICIAL (t)
> + && TYPE_ARTIFICIAL (TREE_TYPE (t))
> + && RECORD_OR_UNION_TYPE_P (TREE_TYPE (t)));
> + break;
> }
> + mark_declaration (t, has_definition (t));
> goto by_value;
> }
> }
> @@ -11119,6 +11216,11 @@ trees_out::get_merge_kind (tree decl, depset *dep)
> return MK_local_friend;
>
> gcc_checking_assert (TYPE_P (ctx));
> +
> + /* Internal-only types will not need to dedup their members. */
> + if (!DECL_CONTEXT (TYPE_NAME (ctx)))
> + return MK_unique;
> +
> if (TREE_CODE (decl) == USING_DECL)
> return MK_field;
>
> --
> 2.47.0
>