https://github.com/vsapsai created 
https://github.com/llvm/llvm-project/pull/135887

Fixes the assertion failure
> Assertion failed: (DeclIDs.contains(D) && "Declaration not emitted!"), 
> function getDeclID, file ASTWriter.cpp, line 6873.

We prepare to serialize a `Decl` by adding it to `DeclIDs` in 
`ASTWriter::GetDeclRef`. But the checks before this call aren't the same as 
when we are actually serializing a `Decl` in 
`ASTIdentifierTableTrait::EmitData` and `ASTWriter::WriteIdentifierTable`. 
That's how we can end up serializing a `Decl` not present in `DeclIDs` and 
hitting the assertion. With the assertions disabled clang crashes when trying 
to use a deserialized null `Decl`.

Fix by making the code checks before `ASTWriter::GetDeclRef` call similar to 
those we have before the serialization.

rdar://139319683

>From d944252ce2f08d1522f7297e4d7f9a7b730db180 Mon Sep 17 00:00:00 2001
From: Volodymyr Sapsai <vsap...@apple.com>
Date: Tue, 15 Apr 2025 16:34:47 -0700
Subject: [PATCH] [Modules] Fix the inconsistency of which `Decl` should be
 serialized for an identifier.

Fixes the assertion failure
> Assertion failed: (DeclIDs.contains(D) && "Declaration not emitted!"), 
> function getDeclID, file ASTWriter.cpp, line 6873.

We prepare to serialize a `Decl` by adding it to `DeclIDs` in
`ASTWriter::GetDeclRef`. But the checks before this call aren't the same as
when we are actually serializing a `Decl` in `ASTIdentifierTableTrait::EmitData`
and `ASTWriter::WriteIdentifierTable`. That's how we can end up serializing
a `Decl` not present in `DeclIDs` and hitting the assertion. With the assertions
disabled clang crashes when trying to use a deserialized null `Decl`.

Fix by making the code checks before `ASTWriter::GetDeclRef` call similar
to those we have before the serialization.

rdar://139319683
---
 clang/include/clang/Serialization/ASTWriter.h |   1 +
 clang/lib/Serialization/ASTWriter.cpp         |  11 +-
 clang/test/Modules/non-modular-decl-use.c     | 100 ++++++++++++++++++
 3 files changed, 109 insertions(+), 3 deletions(-)
 create mode 100644 clang/test/Modules/non-modular-decl-use.c

diff --git a/clang/include/clang/Serialization/ASTWriter.h 
b/clang/include/clang/Serialization/ASTWriter.h
index bdf3aca0637c8..5075232596cea 100644
--- a/clang/include/clang/Serialization/ASTWriter.h
+++ b/clang/include/clang/Serialization/ASTWriter.h
@@ -224,6 +224,7 @@ class ASTWriter : public ASTDeserializationListener,
   /// discovery) and start at 2. 1 is reserved for the translation
   /// unit, while 0 is reserved for NULL.
   llvm::DenseMap<const Decl *, LocalDeclID> DeclIDs;
+  // TMP: ^ DeclIDs type for reference
 
   /// Set of predefined decls. This is a helper data to determine if a decl
   /// is predefined. It should be more clear and safer to query the set
diff --git a/clang/lib/Serialization/ASTWriter.cpp 
b/clang/lib/Serialization/ASTWriter.cpp
index 95b5718f1d140..c9db69b3b784b 100644
--- a/clang/lib/Serialization/ASTWriter.cpp
+++ b/clang/lib/Serialization/ASTWriter.cpp
@@ -3925,6 +3925,7 @@ class ASTIdentifierTableTrait {
       // Only emit declarations that aren't from a chained PCH, though.
       SmallVector<NamedDecl *, 16> Decls(IdResolver->decls(II));
       for (NamedDecl *D : llvm::reverse(Decls))
+        // TMP: corresponding `getDeclForLocalLookup` call
         LE.write<DeclID>((DeclID)Writer.getDeclID(
             getDeclForLocalLookup(PP.getLangOpts(), D)));
     }
@@ -3969,6 +3970,7 @@ void ASTWriter::WriteIdentifierTable(Preprocessor &PP,
       if (isLocalIdentifierID(ID) || II->hasChangedSinceDeserialization() ||
           (Trait.needDecls() &&
            II->hasFETokenInfoChangedSinceDeserialization()))
+        // TMP: ^ corresponding `hasFETokenInfoChangedSinceDeserialization` 
call
         Generator.insert(II, ID, Trait);
     }
 
@@ -5664,14 +5666,16 @@ void ASTWriter::PrepareWritingSpecialDecls(Sema 
&SemaRef) {
     llvm::SmallVector<const IdentifierInfo*, 256> IIs;
     for (const auto &ID : SemaRef.PP.getIdentifierTable()) {
       const IdentifierInfo *II = ID.second;
-      if (!Chain || !II->isFromAST() || II->hasChangedSinceDeserialization())
+      if (!Chain || !II->isFromAST() || II->hasChangedSinceDeserialization() ||
+          II->hasFETokenInfoChangedSinceDeserialization())
         IIs.push_back(II);
     }
     // Sort the identifiers to visit based on their name.
     llvm::sort(IIs, llvm::deref<std::less<>>());
+    const LangOptions &LangOpts = getLangOpts();
     for (const IdentifierInfo *II : IIs)
-      for (const Decl *D : SemaRef.IdResolver.decls(II))
-        GetDeclRef(D);
+      for (NamedDecl *D : SemaRef.IdResolver.decls(II))
+        GetDeclRef(getDeclForLocalLookup(LangOpts, D));
   }
 
   // Write all of the DeclsToCheckForDeferredDiags.
@@ -6845,6 +6849,7 @@ LocalDeclID ASTWriter::GetDeclRef(const Decl *D) {
   }
 
   assert(!(reinterpret_cast<uintptr_t>(D) & 0x01) && "Invalid decl pointer");
+  // TMP: adding D to DeclIDs
   LocalDeclID &ID = DeclIDs[D];
   if (ID.isInvalid()) {
     if (DoneWritingDeclsAndTypes) {
diff --git a/clang/test/Modules/non-modular-decl-use.c 
b/clang/test/Modules/non-modular-decl-use.c
new file mode 100644
index 0000000000000..1bd87bf284620
--- /dev/null
+++ b/clang/test/Modules/non-modular-decl-use.c
@@ -0,0 +1,100 @@
+// RUN: rm -rf %t
+// RUN: split-file %s %t
+// RUN: %clang_cc1 -fsyntax-only -I %t/include %t/test.c \
+// RUN:            -fmodules -fimplicit-module-maps 
-fmodules-cache-path=%t/modules.cache
+
+// Test when a decl is present in multiple modules through an inclusion of
+// a non-modular header. Make sure such decl is serialized correctly and can be
+// used after deserialization.
+
+//--- include/non-modular.h
+#ifndef NON_MODULAR_H
+#define NON_MODULAR_H
+
+union TestUnion {
+  int x;
+  float y;
+};
+
+struct ReferenceUnion1 {
+  union TestUnion name;
+  unsigned versionMajor;
+};
+struct ReferenceUnion2 {
+  union TestUnion name;
+  unsigned versionMinor;
+};
+
+// Test another kind of RecordDecl.
+struct TestStruct {
+  int p;
+  float q;
+};
+
+struct ReferenceStruct1 {
+  unsigned fieldA;
+  struct TestStruct fieldB;
+};
+
+struct ReferenceStruct2 {
+  unsigned field1;
+  struct TestStruct field2;
+};
+
+#endif
+
+//--- include/piecewise1-empty.h
+//--- include/piecewise1-initially-hidden.h
+#include <non-modular.h>
+
+//--- include/piecewise2-empty.h
+//--- include/piecewise2-initially-hidden.h
+#include <non-modular.h>
+
+//--- include/with-multiple-decls.h
+#include <piecewise1-empty.h>
+// Include the non-modular header and resolve a name duplication between decl
+// in non-modular.h and in piecewise1-initially-hidden.h, create a
+// redeclaration chain.
+#include <non-modular.h>
+// Include a decl with a duplicate name again to add more to 
IdentifierResolver.
+#include <piecewise2-empty.h>
+
+//--- include/module.modulemap
+module Piecewise1 {
+  module Empty {
+    header "piecewise1-empty.h"
+  }
+  module InitiallyHidden {
+    header "piecewise1-initially-hidden.h"
+    export *
+  }
+}
+
+module Piecewise2 {
+  module Empty {
+    header "piecewise2-empty.h"
+  }
+  module InitiallyHidden {
+    header "piecewise2-initially-hidden.h"
+    export *
+  }
+}
+
+module WithMultipleDecls {
+  header "with-multiple-decls.h"
+  export *
+}
+
+//--- test.c
+#include <with-multiple-decls.h>
+
+struct Test {
+  int x;
+  union TestUnion name;
+};
+
+struct Test2 {
+  struct TestStruct name;
+  float y;
+};

_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to