Author: adams381
Date: 2026-05-22T16:10:26-05:00
New Revision: 56bf9850bbb6442b7ff5bc17a7f5dc0bab7d83b5

URL: 
https://github.com/llvm/llvm-project/commit/56bf9850bbb6442b7ff5bc17a7f5dc0bab7d83b5
DIFF: 
https://github.com/llvm/llvm-project/commit/56bf9850bbb6442b7ff5bc17a7f5dc0bab7d83b5.diff

LOG: [CIR] Include union tail pad in getTypeSizeInBits (#198361)

Padded CIR unions (e.g. libstdc++ `std::string` SSO layout) carry a
trailing byte-array member so the record matches the AST layout size.
`RecordType::getTypeSizeInBits` was returning only the largest-aligned
member and ignored that tail, so the CIR view of the union was 8 bytes
smaller than what `LowerToLLVM` emits.  Parent structs then picked up
a spurious trailing pad via `insertPadding`, arrays of those structs
used the wrong stride, and heap allocations could be overrun (Eigen's
`array_of_string` hits this directly).

The fix adds the padding member's size when the union is marked
`padded`, so struct size, GEP strides, and `new T[n]` allocation sizes
match OGCG.  Regression test models the SSO-shaped record and checks
the 96-byte `new` for three elements.

Added: 
    clang/test/CIR/CodeGen/record-with-padded-union.cpp

Modified: 
    clang/include/clang/CIR/Dialect/IR/CIRTypes.td
    clang/lib/CIR/Dialect/IR/CIRTypes.cpp

Removed: 
    


################################################################################
diff  --git a/clang/include/clang/CIR/Dialect/IR/CIRTypes.td 
b/clang/include/clang/CIR/Dialect/IR/CIRTypes.td
index 504ec850ddb5a..9e639df13de70 100644
--- a/clang/include/clang/CIR/Dialect/IR/CIRTypes.td
+++ b/clang/include/clang/CIR/Dialect/IR/CIRTypes.td
@@ -738,6 +738,11 @@ def CIR_RecordType : CIR_Type<"Record", "record", [
     bool isIncomplete() const;
 
     mlir::Type getLargestMember(const mlir::DataLayout &dataLayout) const;
+
+    /// Tail-padding member for a padded union (last member appended by
+    /// lowerUnion).  Empty type when the record is not padded.
+    mlir::Type getPadding() const;
+
     size_t getNumElements() const { return getMembers().size(); };
     std::string getKindAsStr() {
       switch (getKind()) {

diff  --git a/clang/lib/CIR/Dialect/IR/CIRTypes.cpp 
b/clang/lib/CIR/Dialect/IR/CIRTypes.cpp
index 4256bb43d1cb7..23c327e81831b 100644
--- a/clang/lib/CIR/Dialect/IR/CIRTypes.cpp
+++ b/clang/lib/CIR/Dialect/IR/CIRTypes.cpp
@@ -345,6 +345,15 @@ Type RecordType::getLargestMember(const ::mlir::DataLayout 
&dataLayout) const {
   });
 }
 
+mlir::Type RecordType::getPadding() const {
+  if (!getPadded())
+    return {};
+  llvm::ArrayRef<mlir::Type> members = getMembers();
+  if (members.empty())
+    return {};
+  return members.back();
+}
+
 bool RecordType::isLayoutIdentical(const RecordType &other) {
   if (getImpl() == other.getImpl())
     return true;
@@ -382,7 +391,23 @@ RecordType::getTypeSizeInBits(const mlir::DataLayout 
&dataLayout,
     mlir::Type largest = getLargestMember(dataLayout);
     if (!largest)
       return llvm::TypeSize::getFixed(0);
-    return dataLayout.getTypeSizeInBits(largest);
+    // `getLargestMember` returns the highest-aligned variant (which dictates
+    // the union's alignment), not necessarily the largest by size.  When the
+    // union is `padded` -- i.e., its highest-aligned variant is strictly
+    // smaller than its layout size, as happens for any union containing both
+    // a small high-alignment scalar and a larger low-alignment array (e.g.,
+    // `union { char[16]; size_t; }`) -- `lowerUnion` appended a trailing
+    // byte-array member to extend the highest-aligned variant up to the
+    // layout size, and `LowerToLLVM` mirrors this by emitting the union as
+    // `{largest, padding}`.  Include that padding here so `getTypeSize`
+    // reports the same size `LowerToLLVM` produces; otherwise a parent
+    // record containing the union gets a spurious tail-padding member added
+    // by `insertPadding`, making `sizeof(parent)` and array GEPs off by the
+    // missing bytes.
+    llvm::TypeSize size = dataLayout.getTypeSizeInBits(largest);
+    if (mlir::Type tailPad = getPadding())
+      size += dataLayout.getTypeSizeInBits(tailPad);
+    return size;
   }
 
   auto recordSize = static_cast<uint64_t>(computeStructSize(dataLayout));

diff  --git a/clang/test/CIR/CodeGen/record-with-padded-union.cpp 
b/clang/test/CIR/CodeGen/record-with-padded-union.cpp
new file mode 100644
index 0000000000000..f20ca4a36f888
--- /dev/null
+++ b/clang/test/CIR/CodeGen/record-with-padded-union.cpp
@@ -0,0 +1,32 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir %s -o 
%t.cir
+// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-llvm %s -o 
%t-cir.ll
+// RUN: FileCheck --check-prefix=LLVM --input-file=%t-cir.ll %s
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -emit-llvm %s -o %t.ll
+// RUN: FileCheck --check-prefix=LLVM --input-file=%t.ll %s
+
+struct SSO {
+  char *p;
+  unsigned long len;
+  union {
+    char local[16];
+    unsigned long capacity;
+  };
+};
+
+// Inner union's tail padding must not bleed into the outer record.
+// CIR: !rec_anon{{.*}} = !cir.record<union "anon{{.*}}" padded 
{!cir.array<!s8i x 16>, !u64i, !cir.array<!u8i x 8>}>
+// CIR: !rec_SSO = !cir.record<struct "SSO" {!cir.ptr<!s8i>, !u64i, 
!rec_anon{{.*}}}>
+
+// LLVM: %struct.SSO = type { ptr, i64, %union.anon{{.*}} }
+// LLVM: %union.anon{{.*}} = type { i64, [8 x i8] }
+
+extern "C" SSO *last_of_three() {
+  SSO *p = new SSO[3];
+  return &p[2];
+}
+
+// Allocation is 3*sizeof(SSO)=96; per-element stride comes from struct size.
+// LLVM-LABEL: define {{.*}}@last_of_three
+// LLVM: call {{.*}}@_Znam(i64 noundef 96)
+// LLVM: getelementptr{{.*}}%struct.SSO, ptr %{{.+}}, i64 2


        
_______________________________________________
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to