https://github.com/ilovepi updated https://github.com/llvm/llvm-project/pull/170517
>From e0f9bccee9dd6401818ff8146aa958675432343f Mon Sep 17 00:00:00 2001 From: Paul Kirth <[email protected]> Date: Mon, 1 Dec 2025 16:28:42 -0800 Subject: [PATCH 1/2] [clang] Limit lifetimes of temporaries to the full expression We have several issues describing suboptimal stack usage related to the lifetimes of temporary objects, such as #68747, #43598, and #109204. Previously, https://reviews.llvm.org/D74094 tried to address this. In that review, a few issues were brought up, particularly a concern about the lifetimes of the temporaries needing to be extended to end of the full expression. While there are arguably more optimal lifetime bounds we could enforce, for now we can conservatively make them extend to the end of the full expression, and later refine the optimization to use tighter bounds (or perhaps a better mechanism in the middle end?). Fixes #68747 Co-authored-by: Nick Desaulniers <[email protected]> Co-authored-by: Erik Pilkington <[email protected]> --- clang/docs/ReleaseNotes.rst | 9 ++ clang/include/clang/Basic/CodeGenOptions.def | 4 + clang/include/clang/Options/Options.td | 5 + clang/lib/CodeGen/CGCall.cpp | 19 +++- clang/test/CodeGen/lifetime-call-temp.c | 98 +++++++++++++++++++ clang/test/CodeGen/stack-usage-lifetimes.c | 89 +++++++++++++++++ .../CodeGenCXX/amdgcn-call-with-aggarg.cc | 19 ++++ .../CodeGenCXX/stack-reuse-miscompile.cpp | 4 + clang/test/CodeGenCoroutines/pr59181.cpp | 4 + 9 files changed, 250 insertions(+), 1 deletion(-) create mode 100644 clang/test/CodeGen/lifetime-call-temp.c create mode 100644 clang/test/CodeGen/stack-usage-lifetimes.c create mode 100644 clang/test/CodeGenCXX/amdgcn-call-with-aggarg.cc diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst index 6838e926f4c9d..165f52c73194c 100644 --- a/clang/docs/ReleaseNotes.rst +++ b/clang/docs/ReleaseNotes.rst @@ -86,6 +86,15 @@ Potentially Breaking Changes options-related code has been moved out of the Driver into a separate library. - The ``clangFrontend`` library no longer depends on ``clangDriver``, which may break downstream projects that relied on this transitive dependency. +- Clang is now more precise with regards to the lifetime of temporary objects + such as when aggregates are passed by value to a function, resulting in + better sharing of stack slots and reduced stack usage. This change can lead + to use-after-scope related issues in code that unintentionally relied on the + previous behavior. If recompiling with ``-fsanitize=address`` shows a + use-after-scope warning, then this is likely the case, and the report printed + should be able to help users pinpoint where the use-after-scope is occurring. + Users can use ``-Xclang -sloppy-temporary-lifetimes`` to retain the old + behavior until they are able to find and resolve issues in their code. C/C++ Language Potentially Breaking Changes ------------------------------------------- diff --git a/clang/include/clang/Basic/CodeGenOptions.def b/clang/include/clang/Basic/CodeGenOptions.def index 76a6463881c6f..e7f5b4c9a08a9 100644 --- a/clang/include/clang/Basic/CodeGenOptions.def +++ b/clang/include/clang/Basic/CodeGenOptions.def @@ -475,6 +475,10 @@ ENUM_CODEGENOPT(ZeroCallUsedRegs, ZeroCallUsedRegsKind, /// non-deleting destructors. (No effect on Microsoft ABI.) CODEGENOPT(CtorDtorReturnThis, 1, 0, Benign) +/// Set via -Xclang -sloppy-temporary-lifetimes to disable emission of lifetime +/// marker intrinsic calls. +CODEGENOPT(NoLifetimeMarkersForTemporaries, 1, 0, Benign) + /// Enables emitting Import Call sections on supported targets that can be used /// by the Windows kernel to enable import call optimization. CODEGENOPT(ImportCallOptimization, 1, 0, Benign) diff --git a/clang/include/clang/Options/Options.td b/clang/include/clang/Options/Options.td index d31bd7d6be322..3d5cb7cad80b7 100644 --- a/clang/include/clang/Options/Options.td +++ b/clang/include/clang/Options/Options.td @@ -8141,6 +8141,11 @@ def import_call_optimization : Flag<["-"], "import-call-optimization">, def replaceable_function: Joined<["-"], "loader-replaceable-function=">, MarshallingInfoStringVector<CodeGenOpts<"LoaderReplaceableFunctionNames">>; +def sloppy_temporary_lifetimes + : Flag<["-"], "sloppy-temporary-lifetimes">, + HelpText<"Don't emit lifetime markers for temporary objects">, + MarshallingInfoFlag<CodeGenOpts<"NoLifetimeMarkersForTemporaries">>; + } // let Visibility = [CC1Option] //===----------------------------------------------------------------------===// diff --git a/clang/lib/CodeGen/CGCall.cpp b/clang/lib/CodeGen/CGCall.cpp index efacb3cc04c01..ccccb6fa1ed82 100644 --- a/clang/lib/CodeGen/CGCall.cpp +++ b/clang/lib/CodeGen/CGCall.cpp @@ -43,6 +43,7 @@ #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/Intrinsics.h" #include "llvm/IR/Type.h" +#include "llvm/Support/TypeSize.h" #include "llvm/Transforms/Utils/Local.h" #include <optional> using namespace clang; @@ -4947,7 +4948,23 @@ void CodeGenFunction::EmitCallArg(CallArgList &args, const Expr *E, return; } - args.add(EmitAnyExprToTemp(E), type); + AggValueSlot ArgSlot = AggValueSlot::ignored(); + // If the callee returns a reference, skip this stack saving optimization; + // we don't want to prematurely end the lifetime of the temporary. It may be + // possible to still perform this optimization if the return type is a + // reference to a different type than the parameter. + if (hasAggregateEvaluationKind(E->getType())) { + RawAddress ArgSlotAlloca = Address::invalid(); + ArgSlot = CreateAggTemp(E->getType(), "agg.tmp", &ArgSlotAlloca); + + // Emit a lifetime start/end for this temporary at the end of the full + // expression. + if (!CGM.getCodeGenOpts().NoLifetimeMarkersForTemporaries && + EmitLifetimeStart(ArgSlotAlloca.getPointer())) + pushFullExprCleanup<CallLifetimeEnd>(NormalAndEHCleanup, ArgSlotAlloca); + } + + args.add(EmitAnyExpr(E, ArgSlot), type); } QualType CodeGenFunction::getVarArgType(const Expr *Arg) { diff --git a/clang/test/CodeGen/lifetime-call-temp.c b/clang/test/CodeGen/lifetime-call-temp.c new file mode 100644 index 0000000000000..3bc68b5e8024a --- /dev/null +++ b/clang/test/CodeGen/lifetime-call-temp.c @@ -0,0 +1,98 @@ +// RUN: %clang -cc1 -triple x86_64-apple-macos -O1 -disable-llvm-passes %s \ +// RUN: -emit-llvm -o - | FileCheck %s --implicit-check-not=llvm.lifetime +// RUN: %clang -cc1 -xc++ -std=c++17 -triple x86_64-apple-macos -O1 \ +// RUN: -disable-llvm-passes %s -emit-llvm -o - -Wno-return-type-c-linkage | \ +// RUN: FileCheck %s --implicit-check-not=llvm.lifetime --check-prefixes=CHECK,CXX +// RUN: %clang -cc1 -xobjective-c -triple x86_64-apple-macos -O1 \ +// RUN: -disable-llvm-passes %s -emit-llvm -o - | \ +// RUN: FileCheck %s --implicit-check-not=llvm.lifetime --check-prefixes=CHECK,OBJC +// RUN: %clang -cc1 -triple x86_64-apple-macos -O1 -disable-llvm-passes %s \ +// RUN: -emit-llvm -o - -sloppy-temporary-lifetimes | \ +// RUN: FileCheck %s --implicit-check-not=llvm.lifetime --check-prefixes=SLOPPY + +typedef struct { int x[100]; } aggregate; + +#ifdef __cplusplus +extern "C" { +#endif + +void takes_aggregate(aggregate); +aggregate gives_aggregate(); + +// CHECK-LABEL: define void @t1 +void t1() { + takes_aggregate(gives_aggregate()); + + // CHECK: [[AGGTMP:%.*]] = alloca %struct.aggregate, align 8 + // CHECK: call void @llvm.lifetime.start.p0(ptr [[AGGTMP]]) + // CHECK: call void{{.*}} @gives_aggregate(ptr{{.*}}sret(%struct.aggregate) align 4 [[AGGTMP]]) + // CHECK: call void @takes_aggregate(ptr noundef byval(%struct.aggregate) align 8 [[AGGTMP]]) + // CHECK: call void @llvm.lifetime.end.p0(ptr [[AGGTMP]]) + + // SLOPPY: [[AGGTMP:%.*]] = alloca %struct.aggregate, align 8 + // SLOPPY-NEXT: call void (ptr, ...) @gives_aggregate(ptr{{.*}}sret(%struct.aggregate) align 4 [[AGGTMP]]) + // SLOPPY-NEXT: call void @takes_aggregate(ptr noundef byval(%struct.aggregate) align 8 [[AGGTMP]]) +} + +// CHECK: declare {{.*}}llvm.lifetime.start +// CHECK: declare {{.*}}llvm.lifetime.end + +#ifdef __cplusplus +// CXX: define void @t2 +void t2() { + struct S { + S(aggregate) {} + }; + S{gives_aggregate()}; + + // CXX: [[AGG:%.*]] = alloca %struct.aggregate + // CXX: call void @llvm.lifetime.start.p0(ptr [[AGG]] + // CXX: call void @gives_aggregate(ptr{{.*}}sret(%struct.aggregate) align 4 [[AGG]]) + // CXX: call void @_ZZ2t2EN1SC1E9aggregate(ptr {{.*}}, ptr {{.*}} byval(%struct.aggregate) align 8 [[AGG]]) + // CXX: call void @llvm.lifetime.end.p0(ptr [[AGG]] +} + +struct Dtor { + ~Dtor(); +}; + +void takes_dtor(Dtor); +Dtor gives_dtor(); + +// CXX: define void @t3 +void t3() { + takes_dtor(gives_dtor()); + + // CXX: [[AGG:%.*]] = alloca %struct.Dtor + // CXX: call void @llvm.lifetime.start.p0(ptr [[AGG]]) + // CXX: call void @gives_dtor(ptr{{.*}}sret(%struct.Dtor) align 1 [[AGG]]) + // CXX: call void @takes_dtor(ptr noundef [[AGG]]) + // CXX: call void @_ZN4DtorD1Ev(ptr {{.*}} [[AGG]]) + // CXX: call void @llvm.lifetime.end.p0(ptr [[AGG]]) + // CXX: ret void +} + +#endif + +#ifdef __OBJC__ + +@interface X +-m:(aggregate)x; +@end + +// OBJC: define void @t4 +void t4(X *x) { + [x m: gives_aggregate()]; + + // OBJC: [[AGG:%.*]] = alloca %struct.aggregate + // OBJC: call void @llvm.lifetime.start.p0(ptr [[AGG]] + // OBJC: call void{{.*}} @gives_aggregate(ptr{{.*}}sret(%struct.aggregate) align 4 [[AGGTMP]]) + // OBJC: call {{.*}}@objc_msgSend + // OBJC: call void @llvm.lifetime.end.p0(ptr [[AGG]] +} + +#endif + +#ifdef __cplusplus +} +#endif diff --git a/clang/test/CodeGen/stack-usage-lifetimes.c b/clang/test/CodeGen/stack-usage-lifetimes.c new file mode 100644 index 0000000000000..3787a29e4ce7d --- /dev/null +++ b/clang/test/CodeGen/stack-usage-lifetimes.c @@ -0,0 +1,89 @@ +// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -O1 -emit-codegen-only -Rpass-analysis=prologepilog %s -verify=x86-precise +// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -O1 -emit-codegen-only -Rpass-analysis=prologepilog -sloppy-temporary-lifetimes %s -verify=x86-sloppy + +// RUN: %clang_cc1 -triple aarch64-unknown-linux-gnu -O1 -emit-codegen-only -Rpass-analysis=prologepilog %s -verify=aarch64-precise +// RUN: %clang_cc1 -triple aarch64-unknown-linux-gnu -O1 -emit-codegen-only -Rpass-analysis=prologepilog -sloppy-temporary-lifetimes %s -verify=aarch64-sloppy + +// RUN: %clang_cc1 -triple riscv64-unknown-linux-gnu -O1 -emit-codegen-only -Rpass-analysis=prologepilog %s -verify=riscv-precise +// RUN: %clang_cc1 -triple riscv64-unknown-linux-gnu -O1 -emit-codegen-only -Rpass-analysis=prologepilog -sloppy-temporary-lifetimes %s -verify=riscv-sloppy + +// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -O1 -emit-codegen-only -Rpass-analysis=prologepilog %s -verify=x86-precise -xc++ +// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -O1 -emit-codegen-only -Rpass-analysis=prologepilog -sloppy-temporary-lifetimes %s -verify=x86-sloppy -xc++ + +// RUN: %clang_cc1 -triple aarch64-unknown-linux-gnu -O1 -emit-codegen-only -Rpass-analysis=prologepilog %s -verify=aarch64-precise -xc++ +// RUN: %clang_cc1 -triple aarch64-unknown-linux-gnu -O1 -emit-codegen-only -Rpass-analysis=prologepilog -sloppy-temporary-lifetimes %s -verify=aarch64-sloppy -xc++ + +// RUN: %clang_cc1 -triple riscv64-unknown-linux-gnu -O1 -emit-codegen-only -Rpass-analysis=prologepilog %s -verify=riscv-precise -xc++ +// RUN: %clang_cc1 -triple riscv64-unknown-linux-gnu -O1 -emit-codegen-only -Rpass-analysis=prologepilog -sloppy-temporary-lifetimes %s -verify=riscv-sloppy -xc++ + + +typedef struct { char x[32]; } A; +typedef struct { char *w, *x, *y, *z; } B; + +void useA(A); +void useB(B); +A genA(void); +B genB(void); + +void t1(int c) { + // x86-precise-remark@-1 {{40 stack bytes}} + // x86-sloppy-remark@-2 {{72 stack bytes}} + // aarch64-precise-remark@-3 {{48 stack bytes}} + // aarch64-sloppy-remark@-4 {{80 stack bytes}} + // riscv-precise-remark@-5 {{48 stack bytes}} + // riscv-sloppy-remark@-6 {{80 stack bytes}} + + if (c) + useA(genA()); + else + useA(genA()); +} + +void t2(void) { + // x86-precise-remark@-1 {{72 stack bytes}} + // x86-sloppy-remark@-2 {{72 stack bytes}} + // aarch64-precise-remark@-3 {{80 stack bytes}} + // aarch64-sloppy-remark@-4 {{80 stack bytes}} + // riscv-precise-remark@-5 {{80 stack bytes}} + // riscv-sloppy-remark@-6 {{80 stack bytes}} + + useA(genA()); + useA(genA()); +} + +void t3(void) { + // x86-precise-remark@-1 {{72 stack bytes}} + // x86-sloppy-remark@-2 {{72 stack bytes}} + // aarch64-precise-remark@-3 {{80 stack bytes}} + // aarch64-sloppy-remark@-4 {{80 stack bytes}} + // riscv-precise-remark@-5 {{80 stack bytes}} + // riscv-sloppy-remark@-6 {{80 stack bytes}} + + useB(genB()); + useB(genB()); +} + +#ifdef __cplusplus +struct C { + char x[24]; + char *ptr; + ~C() {}; +}; + +void useC(C); +C genC(void); + +// This case works in C++, since its AST is structured slightly differently +// than it is in C (CompundStmt/ExprWithCleanup/CallExpr vs CompundStmt/CallExpr). +void t4() { + // x86-precise-remark@-1 {{40 stack bytes}} + // x86-sloppy-remark@-2 {{72 stack bytes}} + // aarch64-precise-remark@-3 {{48 stack bytes}} + // aarch64-sloppy-remark@-4 {{80 stack bytes}} + // riscv-precise-remark@-5 {{48 stack bytes}} + // riscv-sloppy-remark@-6 {{80 stack bytes}} + + useC(genC()); + useC(genC()); +} +#endif diff --git a/clang/test/CodeGenCXX/amdgcn-call-with-aggarg.cc b/clang/test/CodeGenCXX/amdgcn-call-with-aggarg.cc new file mode 100644 index 0000000000000..9b598a48f6436 --- /dev/null +++ b/clang/test/CodeGenCXX/amdgcn-call-with-aggarg.cc @@ -0,0 +1,19 @@ +// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -emit-llvm -O3 -disable-llvm-passes -o - %s | FileCheck %s + +struct A { + float x, y, z, w; +}; + +void foo(A a); + +// CHECK-LABEL: @_Z4testv +// CHECK: [[A:%.*]] = alloca [[STRUCT_A:%.*]], align 4, addrspace(5) +// CHECK-NEXT: [[AGG_TMP:%.*]] = alloca [[STRUCT_A]], align 4, addrspace(5) +// CHECK-NEXT: [[A_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A]] to ptr +// CHECK-NEXT: [[AGG_TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[AGG_TMP]] to ptr +// CHECK-NEXT: call void @llvm.lifetime.start.p5(i64 16, ptr addrspace(5) [[A]]) #[[ATTR4:[0-9]+]] +// CHECK-NEXT: call void @llvm.lifetime.start.p5(i64 16, ptr addrspace(5) [[AGG_TMP]]) #[[ATTR4]] +void test() { + A a; + foo(a); +} diff --git a/clang/test/CodeGenCXX/stack-reuse-miscompile.cpp b/clang/test/CodeGenCXX/stack-reuse-miscompile.cpp index 67fa9f9c9cd98..50c374d2710f4 100644 --- a/clang/test/CodeGenCXX/stack-reuse-miscompile.cpp +++ b/clang/test/CodeGenCXX/stack-reuse-miscompile.cpp @@ -26,6 +26,8 @@ const char * f(S s) // CHECK: [[T2:%.*]] = alloca %class.T, align 4 // CHECK: [[T3:%.*]] = alloca %class.T, align 4 // +// CHECK: [[AGG:%.*]] = alloca %class.S, align 4 +// // FIXME: We could defer starting the lifetime of the return object of concat // until the call. // CHECK: call void @llvm.lifetime.start.p0(ptr [[T1]]) @@ -34,10 +36,12 @@ const char * f(S s) // CHECK: [[T4:%.*]] = call noundef ptr @_ZN1TC1EPKc(ptr {{[^,]*}} [[T2]], ptr noundef @.str) // // CHECK: call void @llvm.lifetime.start.p0(ptr [[T3]]) +// CHECK: call void @llvm.lifetime.start.p0(ptr [[AGG]]) // CHECK: [[T5:%.*]] = call noundef ptr @_ZN1TC1E1S(ptr {{[^,]*}} [[T3]], [2 x i32] %{{.*}}) // // CHECK: call void @_ZNK1T6concatERKS_(ptr dead_on_unwind writable sret(%class.T) align 4 [[T1]], ptr {{[^,]*}} [[T2]], ptr noundef nonnull align 4 dereferenceable(16) [[T3]]) // CHECK: [[T6:%.*]] = call noundef ptr @_ZNK1T3strEv(ptr {{[^,]*}} [[T1]]) +// CHECK: call void @llvm.lifetime.end.p0(ptr [[AGG]]) // // CHECK: call void @llvm.lifetime.end.p0( // CHECK: call void @llvm.lifetime.end.p0( diff --git a/clang/test/CodeGenCoroutines/pr59181.cpp b/clang/test/CodeGenCoroutines/pr59181.cpp index 21e784e0031de..a68a61984f981 100644 --- a/clang/test/CodeGenCoroutines/pr59181.cpp +++ b/clang/test/CodeGenCoroutines/pr59181.cpp @@ -49,6 +49,7 @@ void foo() { } // CHECK: cleanup.cont:{{.*}} +// CHECK-NEXT: call void @llvm.lifetime.start.p0(ptr [[AGG:%agg.tmp]]) // CHECK-NEXT: load i8 // CHECK-NEXT: trunc // CHECK-NEXT: store i1 false @@ -57,3 +58,6 @@ void foo() { // CHECK-NOT: call void @llvm.lifetime // CHECK: call void @llvm.coro.await.suspend.void( // CHECK-NEXT: %{{[0-9]+}} = call i8 @llvm.coro.suspend( + +// CHECK-LABEL: cond.end: +// check call @llvm.lifetime.end.p0(ptr [[AGG]]) >From 764a10cbe24ba75a5585fbc07b23e18e466fb66a Mon Sep 17 00:00:00 2001 From: Paul Kirth <[email protected]> Date: Wed, 3 Dec 2025 14:15:08 -0800 Subject: [PATCH 2/2] Use more accurate comment --- clang/lib/CodeGen/CGCall.cpp | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/clang/lib/CodeGen/CGCall.cpp b/clang/lib/CodeGen/CGCall.cpp index ccccb6fa1ed82..bb81ddfc20fb4 100644 --- a/clang/lib/CodeGen/CGCall.cpp +++ b/clang/lib/CodeGen/CGCall.cpp @@ -4949,10 +4949,13 @@ void CodeGenFunction::EmitCallArg(CallArgList &args, const Expr *E, } AggValueSlot ArgSlot = AggValueSlot::ignored(); - // If the callee returns a reference, skip this stack saving optimization; - // we don't want to prematurely end the lifetime of the temporary. It may be - // possible to still perform this optimization if the return type is a - // reference to a different type than the parameter. + // For arguments with aggregate type, create an alloca to store + // the value. If the argument's type has a destructor, that destructor + // will run at the end of the full-expression; emit matching lifetime + // markers. + // + // FIXME: For types which don't have a destructor, consider using a + // narrower lifetime bound. if (hasAggregateEvaluationKind(E->getType())) { RawAddress ArgSlotAlloca = Address::invalid(); ArgSlot = CreateAggTemp(E->getType(), "agg.tmp", &ArgSlotAlloca); _______________________________________________ cfe-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
