ChuanqiXu updated this revision to Diff 435461.
ChuanqiXu added a comment.
Handle function local thread locals.
CHANGES SINCE LAST ACTION
https://reviews.llvm.org/D125291/new/
https://reviews.llvm.org/D125291
Files:
clang/lib/CodeGen/CGExpr.cpp
clang/lib/CodeGen/ItaniumCXXABI.cpp
clang/test/CodeGenCXX/cxx11-thread-local-instantiated.cpp
clang/test/CodeGenCXX/pr18635.cpp
clang/test/CodeGenCXX/threadlocal_address.cpp
llvm/docs/LangRef.rst
llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
llvm/include/llvm/IR/IRBuilder.h
llvm/include/llvm/IR/Intrinsics.td
llvm/lib/CodeGen/PreISelIntrinsicLowering.cpp
llvm/lib/IR/IRBuilder.cpp
llvm/test/Transforms/PreISelIntrinsicLowering/threadlocal_address.ll
Index: llvm/test/Transforms/PreISelIntrinsicLowering/threadlocal_address.ll
===================================================================
--- /dev/null
+++ llvm/test/Transforms/PreISelIntrinsicLowering/threadlocal_address.ll
@@ -0,0 +1,25 @@
+; RUN: opt -pre-isel-intrinsic-lowering -opaque-pointers -S -o - < %s | FileCheck %s
+
+@i = thread_local global i32 0, align 4
+
+define dso_local noundef i32 @foo() {
+; CHECK-LABEL: @foo(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr @i, align 4
+; CHECK-NEXT: [[INC:%.*]] = add nsw i32 [[TMP0]], 1
+; CHECK-NEXT: store i32 [[INC]], ptr @i, align 4
+; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr @i, align 4
+; CHECK-NEXT: ret i32 [[TMP1]]
+;
+; CHECK-NOT: call{{.*}}@llvm.threadlocal.address(
+entry:
+ %0 = call ptr @llvm.threadlocal.address(ptr @i)
+ %1 = load i32, ptr %0, align 4
+ %inc = add nsw i32 %1, 1
+ store i32 %inc, ptr %0, align 4
+ %2 = call ptr @llvm.threadlocal.address(ptr @i)
+ %3 = load i32, ptr %2, align 4
+ ret i32 %3
+}
+
+declare ptr @llvm.threadlocal.address(ptr) nounwind readnone willreturn
Index: llvm/lib/IR/IRBuilder.cpp
===================================================================
--- llvm/lib/IR/IRBuilder.cpp
+++ llvm/lib/IR/IRBuilder.cpp
@@ -499,6 +499,13 @@
return createCallHelper(TheFn, Ops, this);
}
+CallInst *IRBuilderBase::CreateThreadLocalAddress(Value *Ptr) {
+ assert(isa<GlobalValue>(Ptr) && cast<GlobalValue>(Ptr)->isThreadLocal() &&
+ "threadlocal_address only applies to thread local variables.");
+ return CreateIntrinsic(llvm::Intrinsic::threadlocal_address, llvm::None,
+ {Ptr});
+}
+
CallInst *
IRBuilderBase::CreateAssumption(Value *Cond,
ArrayRef<OperandBundleDef> OpBundles) {
Index: llvm/lib/CodeGen/PreISelIntrinsicLowering.cpp
===================================================================
--- llvm/lib/CodeGen/PreISelIntrinsicLowering.cpp
+++ llvm/lib/CodeGen/PreISelIntrinsicLowering.cpp
@@ -6,8 +6,8 @@
//
//===----------------------------------------------------------------------===//
//
-// This pass implements IR lowering for the llvm.load.relative and llvm.objc.*
-// intrinsics.
+// This pass implements IR lowering for the llvm.threadlocal_address,
+// llvm.load.relative and llvm.objc.* intrinsics.
//
//===----------------------------------------------------------------------===//
@@ -128,6 +128,19 @@
return true;
}
+static bool lowerThreadLocalIntrinsics(Function &F) {
+ if (F.use_empty())
+ return false;
+
+ for (Use &U : llvm::make_early_inc_range(F.uses())) {
+ auto *CB = cast<CallBase>(U.getUser());
+ CB->replaceAllUsesWith(CB->getOperand(0));
+ CB->eraseFromParent();
+ }
+
+ return true;
+}
+
static bool lowerIntrinsics(Module &M) {
bool Changed = false;
for (Function &F : M) {
@@ -213,6 +226,9 @@
case Intrinsic::objc_sync_exit:
Changed |= lowerObjCCall(F, "objc_sync_exit");
break;
+ case Intrinsic::threadlocal_address:
+ Changed |= lowerThreadLocalIntrinsics(F);
+ break;
}
}
return Changed;
Index: llvm/include/llvm/IR/Intrinsics.td
===================================================================
--- llvm/include/llvm/IR/Intrinsics.td
+++ llvm/include/llvm/IR/Intrinsics.td
@@ -1390,6 +1390,9 @@
def int_ptrmask: DefaultAttrsIntrinsic<[llvm_anyptr_ty], [LLVMMatchType<0>, llvm_anyint_ty],
[IntrNoMem, IntrSpeculatable, IntrWillReturn]>;
+def int_threadlocal_address : Intrinsic<[llvm_ptr_ty], [llvm_ptr_ty],
+ [IntrNoMem, IntrWillReturn]>;
+
def int_experimental_stepvector : DefaultAttrsIntrinsic<[llvm_anyvector_ty],
[], [IntrNoMem]>;
Index: llvm/include/llvm/IR/IRBuilder.h
===================================================================
--- llvm/include/llvm/IR/IRBuilder.h
+++ llvm/include/llvm/IR/IRBuilder.h
@@ -743,6 +743,9 @@
/// If the pointer isn't i8* it will be converted.
CallInst *CreateInvariantStart(Value *Ptr, ConstantInt *Size = nullptr);
+ /// Create a call to llvm.threadlocal.address intrinsic.
+ CallInst *CreateThreadLocalAddress(Value *Ptr);
+
/// Create a call to Masked Load intrinsic
CallInst *CreateMaskedLoad(Type *Ty, Value *Ptr, Align Alignment, Value *Mask,
Value *PassThru = nullptr, const Twine &Name = "");
Index: llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
===================================================================
--- llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
+++ llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
@@ -642,6 +642,7 @@
case Intrinsic::coro_align:
case Intrinsic::coro_suspend:
case Intrinsic::coro_subfn_addr:
+ case Intrinsic::threadlocal_address:
// These intrinsics don't actually represent code after lowering.
return 0;
}
Index: llvm/docs/LangRef.rst
===================================================================
--- llvm/docs/LangRef.rst
+++ llvm/docs/LangRef.rst
@@ -24402,6 +24402,37 @@
mask argument does not match the pointer size of the target, the mask is
zero-extended or truncated accordingly.
+.. _int_threadlocal_address:
+
+'``llvm.threadlocal.address``' Intrinsic
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Syntax:
+"""""""
+
+::
+
+ declare ptr @llvm.threadlocal.address(ptr) nounwind readnone willreturn
+
+Arguments:
+""""""""""
+
+The first argument is pointer, which refers to a thread local variable.
+
+Overview:
+""""""""""
+
+The LLVM treated the address of thread local variable as a constant expression.
+But it is not true. The ``llvm.threadlocal.address`` intrinsic would represent
+the address of the thread local variable.
+
+Semantics:
+""""""""""
+
+The address of a thread local variable is not a constant, since it depends on
+the calling thread. The `llvm.threadlocal.address` intrinsic returns the
+address of the given thread local variable in the calling thread.
+
.. _int_vscale:
'``llvm.vscale``' Intrinsic
Index: clang/test/CodeGenCXX/threadlocal_address.cpp
===================================================================
--- /dev/null
+++ clang/test/CodeGenCXX/threadlocal_address.cpp
@@ -0,0 +1,54 @@
+// Test that the use of thread local variables would be wrapped by @llvm.threadlocal.address intrinsics.
+// RUN: %clang_cc1 -std=c++11 -emit-llvm -triple %itanium_abi_triple -o - %s -disable-llvm-passes | FileCheck %s
+// RUN: %clang_cc1 -std=c++11 -emit-llvm -triple %itanium_abi_triple -o - -O1 %s | FileCheck %s -check-prefix=CHECK-O1
+thread_local int i;
+int g() {
+ i++;
+ return i;
+}
+// CHECK: @i = thread_local global i32 0
+// CHECK: @_ZZ1fvE1j = internal thread_local global i32 0
+//
+// CHECK: @_Z1gv()
+// CHECK-NEXT: entry
+// CHECK-NEXT: %[[IA:.+]] = call ptr @llvm.threadlocal.address(ptr @i)
+// CHECK-NEXT: %[[VA:.+]] = load i32, ptr %[[IA]]
+// CHECK-NEXT: %[[INC:.+]] = add nsw i32 %[[VA]], 1
+// CHECK-NEXT: store i32 %[[INC]], ptr %[[IA]], align 4
+// CHECK-NEXT: %[[IA2:.+]] = call ptr @llvm.threadlocal.address(ptr @i)
+// CHECK-NEXT: %[[RET:.+]] = load i32, ptr %[[IA2]], align 4
+// CHECK-NEXT: ret i32 %[[RET]]
+//
+// CHECK: declare ptr @llvm.threadlocal.address(ptr) #[[ATTR_NUM:.+]]
+//
+// CHECK-O1-LABEL: @_Z1gv
+// CHECK-O1-NEXT: entry:
+// CHECK-O1-NEXT: %[[I_ADDR:.+]] = call ptr @llvm.threadlocal.address(ptr nonnull @i)
+// CHECK-O1-NEXT: %[[VAL:.+]] = load i32, ptr %[[I_ADDR]]
+// CHECK-O1-NEXT: %[[INC:.+]] = add nsw i32 %[[VAL]], 1
+// CHECK-O1-NEXT: store i32 %[[INC]], ptr %[[I_ADDR]]
+// CHECK-O1-NEXT: ret i32 %[[INC]]
+int f() {
+ thread_local int j = 0;
+ j++;
+ return j;
+}
+// CHECK: @_Z1fv()
+// CHECK-NEXT: entry
+// CHECK-NEXT: %[[JA:.+]] = call ptr @llvm.threadlocal.address(ptr @_ZZ1fvE1j)
+// CHECK-NEXT: %[[VA:.+]] = load i32, ptr %[[JA]]
+// CHECK-NEXT: %[[INC:.+]] = add nsw i32 %[[VA]], 1
+// CHECK-NEXT: store i32 %[[INC]], ptr %[[IA]], align 4
+// CHECK-NEXT: %[[JA2:.+]] = call ptr @llvm.threadlocal.address(ptr @_ZZ1fvE1j)
+// CHECK-NEXT: %[[RET:.+]] = load i32, ptr %[[JA2]], align 4
+// CHECK-NEXT: ret i32 %[[RET]]
+//
+// CHECK-O1-LABEL: @_Z1fv
+// CHECK-O1-NEXT: entry:
+// CHECK-O1-NEXT: %[[J_ADDR:.+]] = call ptr @llvm.threadlocal.address(ptr nonnull @_ZZ1fvE1j)
+// CHECK-O1-NEXT: %[[VAL:.+]] = load i32, ptr %[[J_ADDR]]
+// CHECK-O1-NEXT: %[[INC:.+]] = add nsw i32 %[[VAL]], 1
+// CHECK-O1-NEXT: store i32 %[[INC]], ptr %[[J_ADDR]]
+// CHECK-O1-NEXT: ret i32 %[[INC]]
+//
+// CHECK: attributes #[[ATTR_NUM]] = { nounwind readnone willreturn }
Index: clang/test/CodeGenCXX/pr18635.cpp
===================================================================
--- clang/test/CodeGenCXX/pr18635.cpp
+++ clang/test/CodeGenCXX/pr18635.cpp
@@ -4,7 +4,9 @@
// CHECK: [[X_GLOBAL:@[^ ]+]]{{.*}}thread_local global
// returned somewhere in TLS wrapper:
-// CHECK: ret{{.*}}[[X_GLOBAL]]
+// CHECK: define {{.+}} ptr @_ZTW1x(
+// CHECK: [[X_GLOBAL_ADDR:%[^ ]+]] = call ptr @llvm.threadlocal.address(ptr [[X_GLOBAL]])
+// CHECK: ret{{.*}}[[X_GLOBAL_ADDR]]
template <typename T> class unique_ptr {
template <typename F, typename S> struct pair {
Index: clang/test/CodeGenCXX/cxx11-thread-local-instantiated.cpp
===================================================================
--- clang/test/CodeGenCXX/cxx11-thread-local-instantiated.cpp
+++ clang/test/CodeGenCXX/cxx11-thread-local-instantiated.cpp
@@ -17,7 +17,8 @@
// CHECK-LABEL: define weak_odr hidden {{.*}} @_ZTWN3TLSI1SE5mDataE() {{.*}} comdat {
// CHECK: call void @_ZTHN3TLSI1SE5mDataE()
-// CHECK: ret {{.*}} @_ZN3TLSI1SE5mDataE
+// CHECK: [[TLSmData_ADDR:%[^ ]+]] = call ptr @llvm.threadlocal.address(ptr @_ZN3TLSI1SE5mDataE)
+// CHECK: ret {{.*}} [[TLSmData_ADDR]]
// Unlike for a global, the global initialization function must not be in a
// COMDAT with the variable, because it is referenced from the _ZTH function
Index: clang/lib/CodeGen/ItaniumCXXABI.cpp
===================================================================
--- clang/lib/CodeGen/ItaniumCXXABI.cpp
+++ clang/lib/CodeGen/ItaniumCXXABI.cpp
@@ -2990,6 +2990,11 @@
if (Val->getType() != Wrapper->getReturnType())
Val = Builder.CreatePointerBitCastOrAddrSpaceCast(
Val, Wrapper->getReturnType(), "");
+
+ // TODO: Remove this condition once we support opaque pointers only.
+ if (CGM.getCodeGenOpts().OpaquePointers)
+ Val = Builder.CreateThreadLocalAddress(Val);
+
Builder.CreateRet(Val);
}
}
Index: clang/lib/CodeGen/CGExpr.cpp
===================================================================
--- clang/lib/CodeGen/CGExpr.cpp
+++ clang/lib/CodeGen/CGExpr.cpp
@@ -2604,6 +2604,15 @@
}
llvm::Value *V = CGF.CGM.GetAddrOfGlobalVar(VD);
+
+ if (VD->getTLSKind() != VarDecl::TLS_None &&
+ // We only use @llvm.threadlocal.address if opaque pointers enabled.
+ // Otherwise, we need to pay for many unnecessary bitcasts.
+ //
+ // TODO: Remove this condition once we support opaque pointers only.
+ CGF.CGM.getCodeGenOpts().OpaquePointers)
+ V = CGF.Builder.CreateThreadLocalAddress(V);
+
llvm::Type *RealVarTy = CGF.getTypes().ConvertTypeForMem(VD->getType());
V = EmitBitCastOfLValueToProperType(CGF, V, RealVarTy);
CharUnits Alignment = CGF.getContext().getDeclAlign(VD);
@@ -2873,6 +2882,12 @@
llvm_unreachable("DeclRefExpr for Decl not entered in LocalDeclMap?");
}
+ // Handle threadlocal function locals.
+ if (VD->getTLSKind() != VarDecl::TLS_None &&
+ CGM.getCodeGenOpts().OpaquePointers) {
+ auto *var = Builder.CreateThreadLocalAddress(addr.getPointer());
+ addr = Address(var, addr.getElementType(), addr.getAlignment());
+ }
// Check for OpenMP threadprivate variables.
if (getLangOpts().OpenMP && !getLangOpts().OpenMPSimd &&
_______________________________________________
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits