================ @@ -42,6 +42,66 @@ using namespace CodeGen; namespace { class ConstExprEmitter; +llvm::Constant *getPadding(const CodeGenModule &CGM, CharUnits PadSize) { + if (!CGM.getLangOpts().CPlusPlus) { + // In C23 (N3096) $6.7.10: + // """ + // If any object is initialized with an empty iniitializer, then it is + // subject to default initialization: + // - if it is an aggregate, every member is initialized (recursively) + // according to these rules, and any padding is initialized to zero bits; + // - if it is a union, the first named member is initialized (recursively) + // according to these rules, and any padding is initialized to zero bits. + // + // If the aggregate or union contains elements or members that are + // aggregates or unions, these rules apply recursively to the subaggregates + // or contained unions. + // + // If there are fewer initializers in a brace-enclosed list than there are + // elements or members of an aggregate, or fewer characters in a string + // literal used to initialize an array of known size than there are elements + // in the array, the remainder of the aggregate is subject to default + // initialization. + // """ + // + // From my understanding, the standard is ambiguous in the following two + // areas: + // 1. For a union type with empty initializer, if the first named member is + // not the largest member, then the bytes comes after the first named member + // but before padding are left unspecified. An example is: + // union U { int a; long long b;}; + // union U u = {}; // The first 4 bytes are 0, but 4-8 bytes are left + // unspecified. + // + // 2. It only mentions padding for empty initializer, but doesn't mention + // padding for a non empty initialization list. And if the aggregation or + // union contains elements or members that are aggregates or unions, and + // some are non empty initializers, while others are empty initiailizers, + // the padding initialization is unclear. An example is: + // struct S1 { int a; long long b; }; + // struct S2 { char c; struct S1 s1; }; + // // The values for paddings between s2.c and s2.s1.a, between s2.s1.a + // and s2.s1.b are unclear. + // struct S2 s2 = { 'c' }; + // + // Here we choose to zero initiailize left bytes of a union type. Because + // projects like the Linux kernel are relying on this behavior. If we don't + // explicitly zero initialize them, the undef values can be optimized to + // return gabage data. We also choose to zero initialize paddings for + // aggregates and unions, no matter they are initialized by empty + // initializers or non empty initializers. This can provide a consistent + // behavior. So projects like the Linux kernel can rely on it. + if (PadSize == CharUnits::One()) + return llvm::ConstantInt::get(CGM.CharTy, 0); + llvm::Type *Ty = llvm::ArrayType::get(CGM.CharTy, PadSize.getQuantity()); + return llvm::ConstantAggregateZero::get(Ty); ---------------- efriedma-quic wrote:
Constant::getNullValue() to unify the codepaths a bit. https://github.com/llvm/llvm-project/pull/97121 _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits