Author: adams381 Date: 2026-05-22T16:10:26-05:00 New Revision: 56bf9850bbb6442b7ff5bc17a7f5dc0bab7d83b5
URL: https://github.com/llvm/llvm-project/commit/56bf9850bbb6442b7ff5bc17a7f5dc0bab7d83b5 DIFF: https://github.com/llvm/llvm-project/commit/56bf9850bbb6442b7ff5bc17a7f5dc0bab7d83b5.diff LOG: [CIR] Include union tail pad in getTypeSizeInBits (#198361) Padded CIR unions (e.g. libstdc++ `std::string` SSO layout) carry a trailing byte-array member so the record matches the AST layout size. `RecordType::getTypeSizeInBits` was returning only the largest-aligned member and ignored that tail, so the CIR view of the union was 8 bytes smaller than what `LowerToLLVM` emits. Parent structs then picked up a spurious trailing pad via `insertPadding`, arrays of those structs used the wrong stride, and heap allocations could be overrun (Eigen's `array_of_string` hits this directly). The fix adds the padding member's size when the union is marked `padded`, so struct size, GEP strides, and `new T[n]` allocation sizes match OGCG. Regression test models the SSO-shaped record and checks the 96-byte `new` for three elements. Added: clang/test/CIR/CodeGen/record-with-padded-union.cpp Modified: clang/include/clang/CIR/Dialect/IR/CIRTypes.td clang/lib/CIR/Dialect/IR/CIRTypes.cpp Removed: ################################################################################ diff --git a/clang/include/clang/CIR/Dialect/IR/CIRTypes.td b/clang/include/clang/CIR/Dialect/IR/CIRTypes.td index 504ec850ddb5a..9e639df13de70 100644 --- a/clang/include/clang/CIR/Dialect/IR/CIRTypes.td +++ b/clang/include/clang/CIR/Dialect/IR/CIRTypes.td @@ -738,6 +738,11 @@ def CIR_RecordType : CIR_Type<"Record", "record", [ bool isIncomplete() const; mlir::Type getLargestMember(const mlir::DataLayout &dataLayout) const; + + /// Tail-padding member for a padded union (last member appended by + /// lowerUnion). Empty type when the record is not padded. + mlir::Type getPadding() const; + size_t getNumElements() const { return getMembers().size(); }; std::string getKindAsStr() { switch (getKind()) { diff --git a/clang/lib/CIR/Dialect/IR/CIRTypes.cpp b/clang/lib/CIR/Dialect/IR/CIRTypes.cpp index 4256bb43d1cb7..23c327e81831b 100644 --- a/clang/lib/CIR/Dialect/IR/CIRTypes.cpp +++ b/clang/lib/CIR/Dialect/IR/CIRTypes.cpp @@ -345,6 +345,15 @@ Type RecordType::getLargestMember(const ::mlir::DataLayout &dataLayout) const { }); } +mlir::Type RecordType::getPadding() const { + if (!getPadded()) + return {}; + llvm::ArrayRef<mlir::Type> members = getMembers(); + if (members.empty()) + return {}; + return members.back(); +} + bool RecordType::isLayoutIdentical(const RecordType &other) { if (getImpl() == other.getImpl()) return true; @@ -382,7 +391,23 @@ RecordType::getTypeSizeInBits(const mlir::DataLayout &dataLayout, mlir::Type largest = getLargestMember(dataLayout); if (!largest) return llvm::TypeSize::getFixed(0); - return dataLayout.getTypeSizeInBits(largest); + // `getLargestMember` returns the highest-aligned variant (which dictates + // the union's alignment), not necessarily the largest by size. When the + // union is `padded` -- i.e., its highest-aligned variant is strictly + // smaller than its layout size, as happens for any union containing both + // a small high-alignment scalar and a larger low-alignment array (e.g., + // `union { char[16]; size_t; }`) -- `lowerUnion` appended a trailing + // byte-array member to extend the highest-aligned variant up to the + // layout size, and `LowerToLLVM` mirrors this by emitting the union as + // `{largest, padding}`. Include that padding here so `getTypeSize` + // reports the same size `LowerToLLVM` produces; otherwise a parent + // record containing the union gets a spurious tail-padding member added + // by `insertPadding`, making `sizeof(parent)` and array GEPs off by the + // missing bytes. + llvm::TypeSize size = dataLayout.getTypeSizeInBits(largest); + if (mlir::Type tailPad = getPadding()) + size += dataLayout.getTypeSizeInBits(tailPad); + return size; } auto recordSize = static_cast<uint64_t>(computeStructSize(dataLayout)); diff --git a/clang/test/CIR/CodeGen/record-with-padded-union.cpp b/clang/test/CIR/CodeGen/record-with-padded-union.cpp new file mode 100644 index 0000000000000..f20ca4a36f888 --- /dev/null +++ b/clang/test/CIR/CodeGen/record-with-padded-union.cpp @@ -0,0 +1,32 @@ +// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir %s -o %t.cir +// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s +// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-llvm %s -o %t-cir.ll +// RUN: FileCheck --check-prefix=LLVM --input-file=%t-cir.ll %s +// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -emit-llvm %s -o %t.ll +// RUN: FileCheck --check-prefix=LLVM --input-file=%t.ll %s + +struct SSO { + char *p; + unsigned long len; + union { + char local[16]; + unsigned long capacity; + }; +}; + +// Inner union's tail padding must not bleed into the outer record. +// CIR: !rec_anon{{.*}} = !cir.record<union "anon{{.*}}" padded {!cir.array<!s8i x 16>, !u64i, !cir.array<!u8i x 8>}> +// CIR: !rec_SSO = !cir.record<struct "SSO" {!cir.ptr<!s8i>, !u64i, !rec_anon{{.*}}}> + +// LLVM: %struct.SSO = type { ptr, i64, %union.anon{{.*}} } +// LLVM: %union.anon{{.*}} = type { i64, [8 x i8] } + +extern "C" SSO *last_of_three() { + SSO *p = new SSO[3]; + return &p[2]; +} + +// Allocation is 3*sizeof(SSO)=96; per-element stride comes from struct size. +// LLVM-LABEL: define {{.*}}@last_of_three +// LLVM: call {{.*}}@_Znam(i64 noundef 96) +// LLVM: getelementptr{{.*}}%struct.SSO, ptr %{{.+}}, i64 2 _______________________________________________ cfe-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
