https://github.com/nikola-tesic-ns updated https://github.com/llvm/llvm-project/pull/96633
>From 41427a3de345517025477257bfd4f614f06cbcfe Mon Sep 17 00:00:00 2001 From: Nikola Tesic <nikola.te...@nextsilicon.com> Date: Tue, 25 Jun 2024 15:58:18 +0300 Subject: [PATCH 1/3] [Clang] Access tls_guard via llvm.threadlocal.address This patch fixes compiler generated code in `tls_init` function to access TLS variable (`tls_guard`) via llvm.threadlocal.address intrinsic. --- clang/lib/CodeGen/CGDeclCXX.cpp | 29 +++++++++++------- clang/test/CodeGenCXX/cxx11-thread-local.cpp | 9 ++++-- .../static-initializer-branch-weights.cpp | 3 +- clang/test/OpenMP/parallel_copyin_codegen.cpp | 6 ++-- .../OpenMP/target_has_device_addr_codegen.cpp | 6 ++-- clang/test/OpenMP/threadprivate_codegen.cpp | 30 ++++++++++++------- 6 files changed, 55 insertions(+), 28 deletions(-) diff --git a/clang/lib/CodeGen/CGDeclCXX.cpp b/clang/lib/CodeGen/CGDeclCXX.cpp index e18b339b31d24..0663a083bf3e8 100644 --- a/clang/lib/CodeGen/CGDeclCXX.cpp +++ b/clang/lib/CodeGen/CGDeclCXX.cpp @@ -1059,9 +1059,10 @@ CodeGenFunction::GenerateCXXGlobalInitFunc(llvm::Function *Fn, if (Guard.isValid()) { // If we have a guard variable, check whether we've already performed // these initializations. This happens for TLS initialization functions. - llvm::Value *GuardVal = Builder.CreateLoad(Guard); - llvm::Value *Uninit = Builder.CreateIsNull(GuardVal, - "guard.uninitialized"); + llvm::Value *GuardVal = EmitLoadOfScalar( + MakeAddrLValue(Guard, getContext().IntTy), SourceLocation()); + llvm::Value *Uninit = + Builder.CreateIsNull(GuardVal, "guard.uninitialized"); llvm::BasicBlock *InitBlock = createBasicBlock("init"); ExitBlock = createBasicBlock("exit"); EmitCXXGuardedInitBranch(Uninit, InitBlock, ExitBlock, @@ -1070,13 +1071,21 @@ CodeGenFunction::GenerateCXXGlobalInitFunc(llvm::Function *Fn, // Mark as initialized before initializing anything else. If the // initializers use previously-initialized thread_local vars, that's // probably supposed to be OK, but the standard doesn't say. - Builder.CreateStore(llvm::ConstantInt::get(GuardVal->getType(),1), Guard); - - // The guard variable can't ever change again. - EmitInvariantStart( - Guard.getPointer(), - CharUnits::fromQuantity( - CGM.getDataLayout().getTypeAllocSize(GuardVal->getType()))); + EmitStoreOfScalar(llvm::ConstantInt::get(GuardVal->getType(), 1), + MakeAddrLValue(Guard, getContext().IntTy)); + + // Emit invariant start for TLS guard address. + if (CGM.getCodeGenOpts().OptimizationLevel > 0) { + uint64_t Width = + CGM.getDataLayout().getTypeAllocSize(GuardVal->getType()); + llvm::Value *TLSAddr = Guard.getPointer(); + // Get the thread-local address via intrinsic. + if (auto *GV = dyn_cast<llvm::GlobalValue>(Guard.getPointer())) + if (GV->isThreadLocal()) + TLSAddr = Builder.CreateThreadLocalAddress(Guard.getPointer()); + Builder.CreateInvariantStart( + TLSAddr, llvm::ConstantInt::getSigned(Int64Ty, Width)); + } } RunCleanupsScope Scope(*this); diff --git a/clang/test/CodeGenCXX/cxx11-thread-local.cpp b/clang/test/CodeGenCXX/cxx11-thread-local.cpp index bcc490bc32e6e..e9a0799cf8d9a 100644 --- a/clang/test/CodeGenCXX/cxx11-thread-local.cpp +++ b/clang/test/CodeGenCXX/cxx11-thread-local.cpp @@ -358,12 +358,15 @@ void set_anon_i() { // CHECK: define {{.*}}@__tls_init() -// CHECK: load i8, ptr @__tls_guard +// CHECK: [[TLS_GUARD_ADDR_1:%.+]] = call align 1 ptr @llvm.threadlocal.address.p0(ptr align 1 @__tls_guard) +// CHECK: load i8, ptr [[TLS_GUARD_ADDR_1]] // CHECK: %[[NEED_TLS_INIT:.*]] = icmp eq i8 %{{.*}}, 0 // CHECK: br i1 %[[NEED_TLS_INIT]], // init: -// CHECK: store i8 1, ptr @__tls_guard -// CHECK-OPT: call ptr @llvm.invariant.start.p0(i64 1, ptr @__tls_guard) +// CHECK: [[TLS_GUARD_ADDR_2:%.+]] = call align 1 ptr @llvm.threadlocal.address.p0(ptr align 1 @__tls_guard) +// CHECK: store i8 1, ptr [[TLS_GUARD_ADDR_2]] +// CHECK-OPT: [[TLS_GUARD_ADDR_3:%.+]] = call align 1 ptr @llvm.threadlocal.address.p0(ptr align 1 @__tls_guard) +// CHECK-OPT: call ptr @llvm.invariant.start.p0(i64 1, ptr [[TLS_GUARD_ADDR_3]]) // CHECK-NOT: call void @[[V_M_INIT]]() // CHECK: call void @[[A_INIT]]() // CHECK-NOT: call void @[[V_M_INIT]]() diff --git a/clang/test/CodeGenCXX/static-initializer-branch-weights.cpp b/clang/test/CodeGenCXX/static-initializer-branch-weights.cpp index 121b9b2029959..e855f54643eae 100644 --- a/clang/test/CodeGenCXX/static-initializer-branch-weights.cpp +++ b/clang/test/CodeGenCXX/static-initializer-branch-weights.cpp @@ -118,7 +118,8 @@ void use_b() { } // CHECK-LABEL: define {{.*}}tls_init() -// CHECK: load i8, ptr @__tls_guard, align 1 +// CHECK: [[TLS_GUARD_ADDR:%.+]] = call align 1 ptr @llvm.threadlocal.address.p0(ptr align 1 @__tls_guard) +// CHECK: load i8, ptr [[TLS_GUARD_ADDR]], align 1 // CHECK: icmp eq i8 {{.*}}, 0 // CHECK: br i1 {{.*}}, !prof ![[WEIGHTS_THREAD_LOCAL]] diff --git a/clang/test/OpenMP/parallel_copyin_codegen.cpp b/clang/test/OpenMP/parallel_copyin_codegen.cpp index e653a7734161b..aa2ea78c4fe09 100644 --- a/clang/test/OpenMP/parallel_copyin_codegen.cpp +++ b/clang/test/OpenMP/parallel_copyin_codegen.cpp @@ -1760,11 +1760,13 @@ void foo() { // CHECK16-LABEL: define {{[^@]+}}@__tls_init // CHECK16-SAME: () #[[ATTR0]] { // CHECK16-NEXT: entry: -// CHECK16-NEXT: [[TMP0:%.*]] = load i8, ptr @__tls_guard, align 1 +// CHECK16-NEXT: [[TLS_ADR_1:%.*]] = call align 1 ptr @llvm.threadlocal.address.p0(ptr align 1 @__tls_guard) +// CHECK16-NEXT: [[TMP0:%.*]] = load i8, ptr [[TLS_ADR_1]], align 1 // CHECK16-NEXT: [[GUARD_UNINITIALIZED:%.*]] = icmp eq i8 [[TMP0]], 0 // CHECK16-NEXT: br i1 [[GUARD_UNINITIALIZED]], label [[INIT:%.*]], label [[EXIT:%.*]], !prof [[PROF5:![0-9]+]] // CHECK16: init: -// CHECK16-NEXT: store i8 1, ptr @__tls_guard, align 1 +// CHECK16-NEXT: [[TLS_ADR_2:%.*]] = call align 1 ptr @llvm.threadlocal.address.p0(ptr align 1 @__tls_guard) +// CHECK16-NEXT: store i8 1, ptr [[TLS_ADR_2]], align 1 // CHECK16-NEXT: call void @__cxx_global_var_init() // CHECK16-NEXT: br label [[EXIT]] // CHECK16: exit: diff --git a/clang/test/OpenMP/target_has_device_addr_codegen.cpp b/clang/test/OpenMP/target_has_device_addr_codegen.cpp index ba1b618ed8bdd..71dd92eb9fe48 100644 --- a/clang/test/OpenMP/target_has_device_addr_codegen.cpp +++ b/clang/test/OpenMP/target_has_device_addr_codegen.cpp @@ -1304,11 +1304,13 @@ void use_template() { // CHECK-LABEL: define {{[^@]+}}@__tls_init // CHECK-SAME: () #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = load i8, ptr @__tls_guard, align 1 +// CHECK-NEXT: [[TLS_ADR_1:%.*]] = call align 1 ptr @llvm.threadlocal.address.p0(ptr align 1 @__tls_guard) +// CHECK-NEXT: [[TMP0:%.*]] = load i8, ptr [[TLS_ADR_1]], align 1 // CHECK-NEXT: [[GUARD_UNINITIALIZED:%.*]] = icmp eq i8 [[TMP0]], 0 // CHECK-NEXT: br i1 [[GUARD_UNINITIALIZED]], label [[INIT:%.*]], label [[EXIT:%.*]], !prof [[PROF18:![0-9]+]] // CHECK: init: -// CHECK-NEXT: store i8 1, ptr @__tls_guard, align 1 +// CHECK-NEXT: [[TLS_ADR_2:%.*]] = call align 1 ptr @llvm.threadlocal.address.p0(ptr align 1 @__tls_guard) +// CHECK-NEXT: store i8 1, ptr [[TLS_ADR_2]], align 1 // CHECK-NEXT: call void @__cxx_global_var_init.4() // CHECK-NEXT: br label [[EXIT]] // CHECK: exit: diff --git a/clang/test/OpenMP/threadprivate_codegen.cpp b/clang/test/OpenMP/threadprivate_codegen.cpp index b5eb4651d6c33..2dbdfc5eb6095 100644 --- a/clang/test/OpenMP/threadprivate_codegen.cpp +++ b/clang/test/OpenMP/threadprivate_codegen.cpp @@ -958,11 +958,13 @@ int foobar() { // OMP50-TLS: define {{.*}}void [[ST_S4_ST_DTOR2]](ptr {{.*}}) // CHECK-TLS: define internal void @__tls_init() -// CHECK-TLS: [[GRD:%.*]] = load i8, ptr @__tls_guard +// CHECK-TLS: [[TLS_ADR_1:%.*]] = call align 1 ptr @llvm.threadlocal.address.p0(ptr align 1 @__tls_guard) +// CHECK-TLS-NEXT: [[GRD:%.*]] = load i8, ptr [[TLS_ADR_1]] // CHECK-TLS-NEXT: [[IS_INIT:%.*]] = icmp eq i8 [[GRD]], 0 // CHECK-TLS-NEXT: br i1 [[IS_INIT]], label %[[INIT_LABEL:[^,]+]], label %[[DONE_LABEL:[^,]+]]{{.*}} // CHECK-TLS: [[INIT_LABEL]] -// CHECK-TLS-NEXT: store i8 1, ptr @__tls_guard +// CHECK-TLS-NEXT: [[TLS_ADR_2:%.*]] = call align 1 ptr @llvm.threadlocal.address.p0(ptr align 1 @__tls_guard) +// CHECK-TLS-NEXT: store i8 1, ptr [[TLS_ADR_2]] // CHECK-TLS: call void [[GS1_CXX_INIT]] // CHECK-TLS-NOT: call void [[GS2_CXX_INIT]] // CHECK-TLS: call void [[ARR_X_CXX_INIT]] @@ -3829,11 +3831,13 @@ int foobar() { // CHECK-TLS1-LABEL: define {{[^@]+}}@__tls_init // CHECK-TLS1-SAME: () #[[ATTR0]] { // CHECK-TLS1-NEXT: entry: -// CHECK-TLS1-NEXT: [[TMP0:%.*]] = load i8, ptr @__tls_guard, align 1 +// CHECK-TLS1-NEXT: [[TLS_ADR_3:%.*]] = call align 1 ptr @llvm.threadlocal.address.p0(ptr align 1 @__tls_guard) +// CHECK-TLS1-NEXT: [[TMP0:%.*]] = load i8, ptr [[TLS_ADR_3]], align 1 // CHECK-TLS1-NEXT: [[GUARD_UNINITIALIZED:%.*]] = icmp eq i8 [[TMP0]], 0 // CHECK-TLS1-NEXT: br i1 [[GUARD_UNINITIALIZED]], label [[INIT:%.*]], label [[EXIT:%.*]], !prof [[PROF3]] // CHECK-TLS1: init: -// CHECK-TLS1-NEXT: store i8 1, ptr @__tls_guard, align 1 +// CHECK-TLS1-NEXT: [[TLS_ADR_4:%.*]] = call align 1 ptr @llvm.threadlocal.address.p0(ptr align 1 @__tls_guard) +// CHECK-TLS1-NEXT: store i8 1, ptr [[TLS_ADR_4]], align 1 // CHECK-TLS1-NEXT: call void @__cxx_global_var_init() // CHECK-TLS1-NEXT: call void @__cxx_global_var_init.2() // CHECK-TLS1-NEXT: br label [[EXIT]] @@ -4366,11 +4370,13 @@ int foobar() { // CHECK-TLS2-LABEL: define {{[^@]+}}@__tls_init // CHECK-TLS2-SAME: () #[[ATTR6]] { // CHECK-TLS2-NEXT: entry: -// CHECK-TLS2-NEXT: [[TMP0:%.*]] = load i8, ptr @__tls_guard, align 1 +// CHECK-TLS2-NEXT: [[TLS_ADR_5:%.*]] = call align 1 ptr @llvm.threadlocal.address.p0(ptr align 1 @__tls_guard) +// CHECK-TLS2-NEXT: [[TMP0:%.*]] = load i8, ptr [[TLS_ADR_5]], align 1 // CHECK-TLS2-NEXT: [[GUARD_UNINITIALIZED:%.*]] = icmp eq i8 [[TMP0]], 0 // CHECK-TLS2-NEXT: br i1 [[GUARD_UNINITIALIZED]], label [[INIT:%.*]], label [[EXIT:%.*]], !prof [[PROF3]] // CHECK-TLS2: init: -// CHECK-TLS2-NEXT: store i8 1, ptr @__tls_guard, align 1 +// CHECK-TLS2-NEXT: [[TLS_ADR_6:%.*]] = call align 1 ptr @llvm.threadlocal.address.p0(ptr align 1 @__tls_guard) +// CHECK-TLS2-NEXT: store i8 1, ptr [[TLS_ADR_6]], align 1 // CHECK-TLS2-NEXT: call void @__cxx_global_var_init() // CHECK-TLS2-NEXT: call void @__cxx_global_var_init.2() // CHECK-TLS2-NEXT: br label [[EXIT]] @@ -4918,11 +4924,13 @@ int foobar() { // CHECK-TLS3-LABEL: define {{[^@]+}}@__tls_init // CHECK-TLS3-SAME: () #[[ATTR0]] !dbg [[DBG326:![0-9]+]] { // CHECK-TLS3-NEXT: entry: -// CHECK-TLS3-NEXT: [[TMP0:%.*]] = load i8, ptr @__tls_guard, align 1, !dbg [[DBG327:![0-9]+]] +// CHECK-TLS3-NEXT: [[TLS_ADR_7:%.*]] = call align 1 ptr @llvm.threadlocal.address.p0(ptr align 1 @__tls_guard) +// CHECK-TLS3-NEXT: [[TMP0:%.*]] = load i8, ptr [[TLS_ADR_7]], align 1, !dbg [[DBG327:![0-9]+]] // CHECK-TLS3-NEXT: [[GUARD_UNINITIALIZED:%.*]] = icmp eq i8 [[TMP0]], 0, !dbg [[DBG327]] // CHECK-TLS3-NEXT: br i1 [[GUARD_UNINITIALIZED]], label [[INIT:%.*]], label [[EXIT:%.*]], !dbg [[DBG327]], !prof [[PROF206]] // CHECK-TLS3: init: -// CHECK-TLS3-NEXT: store i8 1, ptr @__tls_guard, align 1, !dbg [[DBG327]] +// CHECK-TLS3-NEXT: [[TLS_ADR_8:%.*]] = call align 1 ptr @llvm.threadlocal.address.p0(ptr align 1 @__tls_guard) +// CHECK-TLS3-NEXT: store i8 1, ptr [[TLS_ADR_8]], align 1, !dbg [[DBG327]] // CHECK-TLS3-NEXT: call void @__cxx_global_var_init(), !dbg [[DBG327]] // CHECK-TLS3-NEXT: call void @__cxx_global_var_init.2(), !dbg [[DBG327]] // CHECK-TLS3-NEXT: br label [[EXIT]], !dbg [[DBG327]] @@ -5482,11 +5490,13 @@ int foobar() { // CHECK-TLS4-LABEL: define {{[^@]+}}@__tls_init // CHECK-TLS4-SAME: () #[[ATTR6]] !dbg [[DBG326:![0-9]+]] { // CHECK-TLS4-NEXT: entry: -// CHECK-TLS4-NEXT: [[TMP0:%.*]] = load i8, ptr @__tls_guard, align 1, !dbg [[DBG327:![0-9]+]] +// CHECK-TLS4-NEXT: [[TLS_ADR_9:%.*]] = call align 1 ptr @llvm.threadlocal.address.p0(ptr align 1 @__tls_guard) +// CHECK-TLS4-NEXT: [[TMP0:%.*]] = load i8, ptr [[TLS_ADR_9]], align 1, !dbg [[DBG327:![0-9]+]] // CHECK-TLS4-NEXT: [[GUARD_UNINITIALIZED:%.*]] = icmp eq i8 [[TMP0]], 0, !dbg [[DBG327]] // CHECK-TLS4-NEXT: br i1 [[GUARD_UNINITIALIZED]], label [[INIT:%.*]], label [[EXIT:%.*]], !dbg [[DBG327]], !prof [[PROF119]] // CHECK-TLS4: init: -// CHECK-TLS4-NEXT: store i8 1, ptr @__tls_guard, align 1, !dbg [[DBG327]] +// CHECK-TLS4-NEXT: [[TLS_ADR_10:%.*]] = call align 1 ptr @llvm.threadlocal.address.p0(ptr align 1 @__tls_guard) +// CHECK-TLS4-NEXT: store i8 1, ptr [[TLS_ADR_10]], align 1, !dbg [[DBG327]] // CHECK-TLS4-NEXT: call void @__cxx_global_var_init(), !dbg [[DBG327]] // CHECK-TLS4-NEXT: call void @__cxx_global_var_init.2(), !dbg [[DBG327]] // CHECK-TLS4-NEXT: br label [[EXIT]], !dbg [[DBG327]] >From 9989b88e0206f130a0870b490b890e1f3ddcdfbf Mon Sep 17 00:00:00 2001 From: Nikola Tesic <nikola.te...@nextsilicon.com> Date: Wed, 26 Jun 2024 14:09:02 +0300 Subject: [PATCH 2/3] Fixup1: Use CreateThreadLocalAddress API directly --- clang/lib/CodeGen/CGDeclCXX.cpp | 22 ++++++++++++++++------ 1 file changed, 16 insertions(+), 6 deletions(-) diff --git a/clang/lib/CodeGen/CGDeclCXX.cpp b/clang/lib/CodeGen/CGDeclCXX.cpp index 0663a083bf3e8..54051f146e191 100644 --- a/clang/lib/CodeGen/CGDeclCXX.cpp +++ b/clang/lib/CodeGen/CGDeclCXX.cpp @@ -1059,8 +1059,13 @@ CodeGenFunction::GenerateCXXGlobalInitFunc(llvm::Function *Fn, if (Guard.isValid()) { // If we have a guard variable, check whether we've already performed // these initializations. This happens for TLS initialization functions. - llvm::Value *GuardVal = EmitLoadOfScalar( - MakeAddrLValue(Guard, getContext().IntTy), SourceLocation()); + Address GuardAddr = Guard; + if (auto *GV = dyn_cast<llvm::GlobalValue>(Guard.getPointer())) + // Get the thread-local address via intrinsic. + if (GV->isThreadLocal()) + GuardAddr = GuardAddr.withPointer( + Builder.CreateThreadLocalAddress(GV), NotKnownNonNull); + llvm::Value *GuardVal = Builder.CreateLoad(GuardAddr); llvm::Value *Uninit = Builder.CreateIsNull(GuardVal, "guard.uninitialized"); llvm::BasicBlock *InitBlock = createBasicBlock("init"); @@ -1071,18 +1076,23 @@ CodeGenFunction::GenerateCXXGlobalInitFunc(llvm::Function *Fn, // Mark as initialized before initializing anything else. If the // initializers use previously-initialized thread_local vars, that's // probably supposed to be OK, but the standard doesn't say. - EmitStoreOfScalar(llvm::ConstantInt::get(GuardVal->getType(), 1), - MakeAddrLValue(Guard, getContext().IntTy)); + if (auto *GV = dyn_cast<llvm::GlobalValue>(Guard.getPointer())) + // Get the thread-local address via intrinsic. + if (GV->isThreadLocal()) + GuardAddr = GuardAddr.withPointer( + Builder.CreateThreadLocalAddress(GV), NotKnownNonNull); + Builder.CreateStore(llvm::ConstantInt::get(GuardVal->getType(), 1), + GuardAddr); // Emit invariant start for TLS guard address. if (CGM.getCodeGenOpts().OptimizationLevel > 0) { uint64_t Width = CGM.getDataLayout().getTypeAllocSize(GuardVal->getType()); llvm::Value *TLSAddr = Guard.getPointer(); - // Get the thread-local address via intrinsic. if (auto *GV = dyn_cast<llvm::GlobalValue>(Guard.getPointer())) + // Get the thread-local address via intrinsic. if (GV->isThreadLocal()) - TLSAddr = Builder.CreateThreadLocalAddress(Guard.getPointer()); + TLSAddr = Builder.CreateThreadLocalAddress(GV); Builder.CreateInvariantStart( TLSAddr, llvm::ConstantInt::getSigned(Int64Ty, Width)); } >From b6802de028ccb46cfc8df859a06b35e220c5dc29 Mon Sep 17 00:00:00 2001 From: Nikola Tesic <nikola.te...@nextsilicon.com> Date: Tue, 2 Jul 2024 18:58:22 +0300 Subject: [PATCH 3/3] Fixup2: Pass IsTLS flag --- clang/lib/CodeGen/CGDeclCXX.cpp | 64 +++++++++++++++-------------- clang/lib/CodeGen/CodeGenFunction.h | 11 ++--- clang/lib/CodeGen/ItaniumCXXABI.cpp | 3 +- 3 files changed, 41 insertions(+), 37 deletions(-) diff --git a/clang/lib/CodeGen/CGDeclCXX.cpp b/clang/lib/CodeGen/CGDeclCXX.cpp index 54051f146e191..b05ae470188fc 100644 --- a/clang/lib/CodeGen/CGDeclCXX.cpp +++ b/clang/lib/CodeGen/CGDeclCXX.cpp @@ -154,21 +154,28 @@ static void EmitDeclInvariant(CodeGenFunction &CGF, const VarDecl &D, Addr, CGF.getContext().getTypeSizeInChars(D.getType())); } -void CodeGenFunction::EmitInvariantStart(llvm::Constant *Addr, CharUnits Size) { +void CodeGenFunction::EmitInvariantStart(llvm::Constant *Addr, CharUnits Size, + bool IsTLS) { // Do not emit the intrinsic if we're not optimizing. if (!CGM.getCodeGenOpts().OptimizationLevel) return; // Grab the llvm.invariant.start intrinsic. llvm::Intrinsic::ID InvStartID = llvm::Intrinsic::invariant_start; + llvm::Value *AddrPtr = Addr; + // Get the thread-local address via intrinsic. + if (IsTLS) + AddrPtr = Builder.CreateThreadLocalAddress(AddrPtr); + // Overloaded address space type. - assert(Addr->getType()->isPointerTy() && "Address must be a pointer"); - llvm::Type *ObjectPtr[1] = {Addr->getType()}; + assert(AddrPtr->getType()->isPointerTy() && "Address must be a pointer"); + llvm::Type *ObjectPtr[1] = {AddrPtr->getType()}; llvm::Function *InvariantStart = CGM.getIntrinsic(InvStartID, ObjectPtr); // Emit a call with the size in bytes of the object. uint64_t Width = Size.getQuantity(); - llvm::Value *Args[2] = {llvm::ConstantInt::getSigned(Int64Ty, Width), Addr}; + llvm::Value *Args[2] = {llvm::ConstantInt::getSigned(Int64Ty, Width), + AddrPtr}; Builder.CreateCall(InvariantStart, Args); } @@ -760,6 +767,7 @@ void CodeGenModule::EmitCXXModuleInitFunc(Module *Primary) { // If we have a completely empty initializer then we do not want to create // the guard variable. ConstantAddress GuardAddr = ConstantAddress::invalid(); + bool IsTLS = false; if (!ModuleInits.empty()) { // Create the guard var. llvm::GlobalVariable *Guard = new llvm::GlobalVariable( @@ -769,9 +777,10 @@ void CodeGenModule::EmitCXXModuleInitFunc(Module *Primary) { CharUnits GuardAlign = CharUnits::One(); Guard->setAlignment(GuardAlign.getAsAlign()); GuardAddr = ConstantAddress(Guard, Int8Ty, GuardAlign); + IsTLS = Guard->isThreadLocal(); } - CodeGenFunction(*this).GenerateCXXGlobalInitFunc(Fn, ModuleInits, - GuardAddr); + CodeGenFunction(*this).GenerateCXXGlobalInitFunc(Fn, ModuleInits, GuardAddr, + IsTLS); } // We allow for the case that a module object is added to a linked binary @@ -1044,10 +1053,9 @@ void CodeGenFunction::GenerateCXXGlobalVarDeclInitFunc(llvm::Function *Fn, FinishFunction(); } -void -CodeGenFunction::GenerateCXXGlobalInitFunc(llvm::Function *Fn, - ArrayRef<llvm::Function *> Decls, - ConstantAddress Guard) { +void CodeGenFunction::GenerateCXXGlobalInitFunc( + llvm::Function *Fn, ArrayRef<llvm::Function *> Decls, ConstantAddress Guard, + bool IsTLS) { { auto NL = ApplyDebugLocation::CreateEmpty(*this); StartFunction(GlobalDecl(), getContext().VoidTy, Fn, @@ -1060,11 +1068,11 @@ CodeGenFunction::GenerateCXXGlobalInitFunc(llvm::Function *Fn, // If we have a guard variable, check whether we've already performed // these initializations. This happens for TLS initialization functions. Address GuardAddr = Guard; - if (auto *GV = dyn_cast<llvm::GlobalValue>(Guard.getPointer())) - // Get the thread-local address via intrinsic. - if (GV->isThreadLocal()) - GuardAddr = GuardAddr.withPointer( - Builder.CreateThreadLocalAddress(GV), NotKnownNonNull); + // Get the thread-local address via intrinsic. + if (IsTLS) + GuardAddr = GuardAddr.withPointer( + Builder.CreateThreadLocalAddress(Guard.getPointer()), + NotKnownNonNull); llvm::Value *GuardVal = Builder.CreateLoad(GuardAddr); llvm::Value *Uninit = Builder.CreateIsNull(GuardVal, "guard.uninitialized"); @@ -1076,26 +1084,20 @@ CodeGenFunction::GenerateCXXGlobalInitFunc(llvm::Function *Fn, // Mark as initialized before initializing anything else. If the // initializers use previously-initialized thread_local vars, that's // probably supposed to be OK, but the standard doesn't say. - if (auto *GV = dyn_cast<llvm::GlobalValue>(Guard.getPointer())) - // Get the thread-local address via intrinsic. - if (GV->isThreadLocal()) - GuardAddr = GuardAddr.withPointer( - Builder.CreateThreadLocalAddress(GV), NotKnownNonNull); + // Get the thread-local address via intrinsic. + if (IsTLS) + GuardAddr = GuardAddr.withPointer( + Builder.CreateThreadLocalAddress(Guard.getPointer()), + NotKnownNonNull); Builder.CreateStore(llvm::ConstantInt::get(GuardVal->getType(), 1), GuardAddr); // Emit invariant start for TLS guard address. - if (CGM.getCodeGenOpts().OptimizationLevel > 0) { - uint64_t Width = - CGM.getDataLayout().getTypeAllocSize(GuardVal->getType()); - llvm::Value *TLSAddr = Guard.getPointer(); - if (auto *GV = dyn_cast<llvm::GlobalValue>(Guard.getPointer())) - // Get the thread-local address via intrinsic. - if (GV->isThreadLocal()) - TLSAddr = Builder.CreateThreadLocalAddress(GV); - Builder.CreateInvariantStart( - TLSAddr, llvm::ConstantInt::getSigned(Int64Ty, Width)); - } + EmitInvariantStart( + Guard.getPointer(), + CharUnits::fromQuantity( + CGM.getDataLayout().getTypeAllocSize(GuardVal->getType())), + IsTLS); } RunCleanupsScope Scope(*this); diff --git a/clang/lib/CodeGen/CodeGenFunction.h b/clang/lib/CodeGen/CodeGenFunction.h index a9c497bde6871..757217f8bf1e3 100644 --- a/clang/lib/CodeGen/CodeGenFunction.h +++ b/clang/lib/CodeGen/CodeGenFunction.h @@ -39,6 +39,7 @@ #include "llvm/ADT/MapVector.h" #include "llvm/ADT/SmallVector.h" #include "llvm/Frontend/OpenMP/OMPIRBuilder.h" +#include "llvm/IR/Constant.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/ValueHandle.h" #include "llvm/Support/Debug.h" @@ -4832,7 +4833,8 @@ class CodeGenFunction : public CodeGenTypeCache { llvm::GlobalVariable *GV); // Emit an @llvm.invariant.start call for the given memory region. - void EmitInvariantStart(llvm::Constant *Addr, CharUnits Size); + void EmitInvariantStart(llvm::Constant *Addr, CharUnits Size, + bool IsTLS = false); /// EmitCXXGlobalVarDeclInit - Create the initializer for a C++ /// variable with global storage. @@ -4881,10 +4883,9 @@ class CodeGenFunction : public CodeGenTypeCache { /// GenerateCXXGlobalInitFunc - Generates code for initializing global /// variables. - void - GenerateCXXGlobalInitFunc(llvm::Function *Fn, - ArrayRef<llvm::Function *> CXXThreadLocals, - ConstantAddress Guard = ConstantAddress::invalid()); + void GenerateCXXGlobalInitFunc( + llvm::Function *Fn, ArrayRef<llvm::Function *> CXXThreadLocals, + ConstantAddress Guard = ConstantAddress::invalid(), bool IsTLS = false); /// GenerateCXXGlobalCleanUpFunc - Generates code for cleaning up global /// variables. diff --git a/clang/lib/CodeGen/ItaniumCXXABI.cpp b/clang/lib/CodeGen/ItaniumCXXABI.cpp index 01a735c1437e1..f1a1ffa07a164 100644 --- a/clang/lib/CodeGen/ItaniumCXXABI.cpp +++ b/clang/lib/CodeGen/ItaniumCXXABI.cpp @@ -2933,7 +2933,8 @@ void ItaniumCXXABI::EmitThreadLocalInitFuncs( Guard->setAlignment(GuardAlign.getAsAlign()); CodeGenFunction(CGM).GenerateCXXGlobalInitFunc( - InitFunc, OrderedInits, ConstantAddress(Guard, CGM.Int8Ty, GuardAlign)); + InitFunc, OrderedInits, ConstantAddress(Guard, CGM.Int8Ty, GuardAlign), + Guard->isThreadLocal()); // On Darwin platforms, use CXX_FAST_TLS calling convention. if (CGM.getTarget().getTriple().isOSDarwin()) { InitFunc->setCallingConv(llvm::CallingConv::CXX_FAST_TLS); _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits