https://github.com/dtcxzyw created https://github.com/llvm/llvm-project/pull/107822
Closes #94949. >From 110eea45aaaca6508f41032641a083df1c43092f Mon Sep 17 00:00:00 2001 From: Yingwei Zheng <dtcxzyw2...@gmail.com> Date: Mon, 9 Sep 2024 15:53:05 +0800 Subject: [PATCH 1/2] [FMV][Clang][CodeGen] Add pre-commit tests. NFC. --- .../test/CodeGen/attr-target-clones-inline.c | 295 ++++++++++++++++++ 1 file changed, 295 insertions(+) create mode 100644 clang/test/CodeGen/attr-target-clones-inline.c diff --git a/clang/test/CodeGen/attr-target-clones-inline.c b/clang/test/CodeGen/attr-target-clones-inline.c new file mode 100644 index 00000000000000..cc9e8dc762e483 --- /dev/null +++ b/clang/test/CodeGen/attr-target-clones-inline.c @@ -0,0 +1,295 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --check-attributes --check-globals all --include-generated-funcs --version 5 +// RUN: %clang_cc1 -O1 -disable-llvm-passes -triple x86_64-linux-gnu -emit-llvm %s -o - | FileCheck %s + +__attribute__((target_clones("default,sse4.2,avx2"))) +int callee(void) { return 1; } + +__attribute__((target_clones("default,avx2,sse4.2"))) +int caller(void) { return callee(); } + +__attribute__((target_clones("default,sse4.2,avx2"))) +int callee_decl(void); + +__attribute__((target_clones("default,avx2,sse4.2"))) +int caller_decl(void) { return callee_decl(); } + +__attribute__((target_clones("default,sse4.2,avx2"))) +int callee_deferred_def(void); + +__attribute__((target_clones("default,avx2,sse4.2"))) +int caller_deferred_def(void) { return callee_deferred_def(); } + +__attribute__((target_clones("default,sse4.2,avx2"))) +int callee_deferred_def(void) { return 1; } +//. +// CHECK: @__cpu_model = external dso_local global { i32, i32, i32, [1 x i32] } +// CHECK: @__cpu_features2 = external dso_local global [3 x i32] +// CHECK: @callee.ifunc = weak_odr alias i32 (), ptr @callee +// CHECK: @caller.ifunc = weak_odr alias i32 (), ptr @caller +// CHECK: @callee_decl.ifunc = weak_odr alias i32 (), ptr @callee_decl +// CHECK: @caller_decl.ifunc = weak_odr alias i32 (), ptr @caller_decl +// CHECK: @callee_deferred_def.ifunc = weak_odr alias i32 (), ptr @callee_deferred_def +// CHECK: @caller_deferred_def.ifunc = weak_odr alias i32 (), ptr @caller_deferred_def +// CHECK: @callee = weak_odr ifunc i32 (), ptr @callee.resolver +// CHECK: @caller = weak_odr ifunc i32 (), ptr @caller.resolver +// CHECK: @callee_decl = weak_odr ifunc i32 (), ptr @callee_decl.resolver +// CHECK: @caller_decl = weak_odr ifunc i32 (), ptr @caller_decl.resolver +// CHECK: @callee_deferred_def = weak_odr ifunc i32 (), ptr @callee_deferred_def.resolver +// CHECK: @caller_deferred_def = weak_odr ifunc i32 (), ptr @caller_deferred_def.resolver +//. +// CHECK: Function Attrs: nounwind +// CHECK-LABEL: define dso_local i32 @callee.default.2( +// CHECK-SAME: ) #[[ATTR0:[0-9]+]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: ret i32 1 +// +// +// CHECK: Function Attrs: nounwind +// CHECK-LABEL: define dso_local i32 @callee.sse4.2.0( +// CHECK-SAME: ) #[[ATTR1:[0-9]+]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: ret i32 1 +// +// +// CHECK: Function Attrs: nounwind +// CHECK-LABEL: define dso_local i32 @callee.avx2.1( +// CHECK-SAME: ) #[[ATTR2:[0-9]+]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: ret i32 1 +// +// +// CHECK-LABEL: define weak_odr ptr @callee.resolver() comdat { +// CHECK-NEXT: [[RESOLVER_ENTRY:.*:]] +// CHECK-NEXT: call void @__cpu_indicator_init() +// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr getelementptr inbounds ({ i32, i32, i32, [1 x i32] }, ptr @__cpu_model, i32 0, i32 3, i32 0), align 4 +// CHECK-NEXT: [[TMP1:%.*]] = and i32 [[TMP0]], 1024 +// CHECK-NEXT: [[TMP2:%.*]] = icmp eq i32 [[TMP1]], 1024 +// CHECK-NEXT: [[TMP3:%.*]] = and i1 true, [[TMP2]] +// CHECK-NEXT: br i1 [[TMP3]], label %[[RESOLVER_RETURN:.*]], label %[[RESOLVER_ELSE:.*]] +// CHECK: [[RESOLVER_RETURN]]: +// CHECK-NEXT: ret ptr @callee.avx2.1 +// CHECK: [[RESOLVER_ELSE]]: +// CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr getelementptr inbounds ({ i32, i32, i32, [1 x i32] }, ptr @__cpu_model, i32 0, i32 3, i32 0), align 4 +// CHECK-NEXT: [[TMP5:%.*]] = and i32 [[TMP4]], 256 +// CHECK-NEXT: [[TMP6:%.*]] = icmp eq i32 [[TMP5]], 256 +// CHECK-NEXT: [[TMP7:%.*]] = and i1 true, [[TMP6]] +// CHECK-NEXT: br i1 [[TMP7]], label %[[RESOLVER_RETURN1:.*]], label %[[RESOLVER_ELSE2:.*]] +// CHECK: [[RESOLVER_RETURN1]]: +// CHECK-NEXT: ret ptr @callee.sse4.2.0 +// CHECK: [[RESOLVER_ELSE2]]: +// CHECK-NEXT: ret ptr @callee.default.2 +// +// +// CHECK: Function Attrs: nounwind +// CHECK-LABEL: define dso_local i32 @caller.default.2( +// CHECK-SAME: ) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[CALL:%.*]] = call i32 @callee() +// CHECK-NEXT: ret i32 [[CALL]] +// +// +// CHECK: Function Attrs: nounwind +// CHECK-LABEL: define dso_local i32 @caller.avx2.0( +// CHECK-SAME: ) #[[ATTR2]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[CALL:%.*]] = call i32 @callee() +// CHECK-NEXT: ret i32 [[CALL]] +// +// +// CHECK: Function Attrs: nounwind +// CHECK-LABEL: define dso_local i32 @caller.sse4.2.1( +// CHECK-SAME: ) #[[ATTR1]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[CALL:%.*]] = call i32 @callee() +// CHECK-NEXT: ret i32 [[CALL]] +// +// +// CHECK-LABEL: define weak_odr ptr @caller.resolver() comdat { +// CHECK-NEXT: [[RESOLVER_ENTRY:.*:]] +// CHECK-NEXT: call void @__cpu_indicator_init() +// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr getelementptr inbounds ({ i32, i32, i32, [1 x i32] }, ptr @__cpu_model, i32 0, i32 3, i32 0), align 4 +// CHECK-NEXT: [[TMP1:%.*]] = and i32 [[TMP0]], 1024 +// CHECK-NEXT: [[TMP2:%.*]] = icmp eq i32 [[TMP1]], 1024 +// CHECK-NEXT: [[TMP3:%.*]] = and i1 true, [[TMP2]] +// CHECK-NEXT: br i1 [[TMP3]], label %[[RESOLVER_RETURN:.*]], label %[[RESOLVER_ELSE:.*]] +// CHECK: [[RESOLVER_RETURN]]: +// CHECK-NEXT: ret ptr @caller.avx2.0 +// CHECK: [[RESOLVER_ELSE]]: +// CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr getelementptr inbounds ({ i32, i32, i32, [1 x i32] }, ptr @__cpu_model, i32 0, i32 3, i32 0), align 4 +// CHECK-NEXT: [[TMP5:%.*]] = and i32 [[TMP4]], 256 +// CHECK-NEXT: [[TMP6:%.*]] = icmp eq i32 [[TMP5]], 256 +// CHECK-NEXT: [[TMP7:%.*]] = and i1 true, [[TMP6]] +// CHECK-NEXT: br i1 [[TMP7]], label %[[RESOLVER_RETURN1:.*]], label %[[RESOLVER_ELSE2:.*]] +// CHECK: [[RESOLVER_RETURN1]]: +// CHECK-NEXT: ret ptr @caller.sse4.2.1 +// CHECK: [[RESOLVER_ELSE2]]: +// CHECK-NEXT: ret ptr @caller.default.2 +// +// +// CHECK: Function Attrs: nounwind +// CHECK-LABEL: define dso_local i32 @caller_decl.default.2( +// CHECK-SAME: ) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[CALL:%.*]] = call i32 @callee_decl() +// CHECK-NEXT: ret i32 [[CALL]] +// +// +// CHECK-LABEL: define weak_odr ptr @callee_decl.resolver() comdat { +// CHECK-NEXT: [[RESOLVER_ENTRY:.*:]] +// CHECK-NEXT: call void @__cpu_indicator_init() +// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr getelementptr inbounds ({ i32, i32, i32, [1 x i32] }, ptr @__cpu_model, i32 0, i32 3, i32 0), align 4 +// CHECK-NEXT: [[TMP1:%.*]] = and i32 [[TMP0]], 1024 +// CHECK-NEXT: [[TMP2:%.*]] = icmp eq i32 [[TMP1]], 1024 +// CHECK-NEXT: [[TMP3:%.*]] = and i1 true, [[TMP2]] +// CHECK-NEXT: br i1 [[TMP3]], label %[[RESOLVER_RETURN:.*]], label %[[RESOLVER_ELSE:.*]] +// CHECK: [[RESOLVER_RETURN]]: +// CHECK-NEXT: ret ptr @callee_decl.avx2.1 +// CHECK: [[RESOLVER_ELSE]]: +// CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr getelementptr inbounds ({ i32, i32, i32, [1 x i32] }, ptr @__cpu_model, i32 0, i32 3, i32 0), align 4 +// CHECK-NEXT: [[TMP5:%.*]] = and i32 [[TMP4]], 256 +// CHECK-NEXT: [[TMP6:%.*]] = icmp eq i32 [[TMP5]], 256 +// CHECK-NEXT: [[TMP7:%.*]] = and i1 true, [[TMP6]] +// CHECK-NEXT: br i1 [[TMP7]], label %[[RESOLVER_RETURN1:.*]], label %[[RESOLVER_ELSE2:.*]] +// CHECK: [[RESOLVER_RETURN1]]: +// CHECK-NEXT: ret ptr @callee_decl.sse4.2.0 +// CHECK: [[RESOLVER_ELSE2]]: +// CHECK-NEXT: ret ptr @callee_decl.default.2 +// +// +// CHECK: Function Attrs: nounwind +// CHECK-LABEL: define dso_local i32 @caller_decl.avx2.0( +// CHECK-SAME: ) #[[ATTR2]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[CALL:%.*]] = call i32 @callee_decl() +// CHECK-NEXT: ret i32 [[CALL]] +// +// +// CHECK: Function Attrs: nounwind +// CHECK-LABEL: define dso_local i32 @caller_decl.sse4.2.1( +// CHECK-SAME: ) #[[ATTR1]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[CALL:%.*]] = call i32 @callee_decl() +// CHECK-NEXT: ret i32 [[CALL]] +// +// +// CHECK-LABEL: define weak_odr ptr @caller_decl.resolver() comdat { +// CHECK-NEXT: [[RESOLVER_ENTRY:.*:]] +// CHECK-NEXT: call void @__cpu_indicator_init() +// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr getelementptr inbounds ({ i32, i32, i32, [1 x i32] }, ptr @__cpu_model, i32 0, i32 3, i32 0), align 4 +// CHECK-NEXT: [[TMP1:%.*]] = and i32 [[TMP0]], 1024 +// CHECK-NEXT: [[TMP2:%.*]] = icmp eq i32 [[TMP1]], 1024 +// CHECK-NEXT: [[TMP3:%.*]] = and i1 true, [[TMP2]] +// CHECK-NEXT: br i1 [[TMP3]], label %[[RESOLVER_RETURN:.*]], label %[[RESOLVER_ELSE:.*]] +// CHECK: [[RESOLVER_RETURN]]: +// CHECK-NEXT: ret ptr @caller_decl.avx2.0 +// CHECK: [[RESOLVER_ELSE]]: +// CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr getelementptr inbounds ({ i32, i32, i32, [1 x i32] }, ptr @__cpu_model, i32 0, i32 3, i32 0), align 4 +// CHECK-NEXT: [[TMP5:%.*]] = and i32 [[TMP4]], 256 +// CHECK-NEXT: [[TMP6:%.*]] = icmp eq i32 [[TMP5]], 256 +// CHECK-NEXT: [[TMP7:%.*]] = and i1 true, [[TMP6]] +// CHECK-NEXT: br i1 [[TMP7]], label %[[RESOLVER_RETURN1:.*]], label %[[RESOLVER_ELSE2:.*]] +// CHECK: [[RESOLVER_RETURN1]]: +// CHECK-NEXT: ret ptr @caller_decl.sse4.2.1 +// CHECK: [[RESOLVER_ELSE2]]: +// CHECK-NEXT: ret ptr @caller_decl.default.2 +// +// +// CHECK: Function Attrs: nounwind +// CHECK-LABEL: define dso_local i32 @caller_deferred_def.default.2( +// CHECK-SAME: ) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[CALL:%.*]] = call i32 @callee_deferred_def() +// CHECK-NEXT: ret i32 [[CALL]] +// +// +// CHECK-LABEL: define weak_odr ptr @callee_deferred_def.resolver() comdat { +// CHECK-NEXT: [[RESOLVER_ENTRY:.*:]] +// CHECK-NEXT: call void @__cpu_indicator_init() +// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr getelementptr inbounds ({ i32, i32, i32, [1 x i32] }, ptr @__cpu_model, i32 0, i32 3, i32 0), align 4 +// CHECK-NEXT: [[TMP1:%.*]] = and i32 [[TMP0]], 1024 +// CHECK-NEXT: [[TMP2:%.*]] = icmp eq i32 [[TMP1]], 1024 +// CHECK-NEXT: [[TMP3:%.*]] = and i1 true, [[TMP2]] +// CHECK-NEXT: br i1 [[TMP3]], label %[[RESOLVER_RETURN:.*]], label %[[RESOLVER_ELSE:.*]] +// CHECK: [[RESOLVER_RETURN]]: +// CHECK-NEXT: ret ptr @callee_deferred_def.avx2.1 +// CHECK: [[RESOLVER_ELSE]]: +// CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr getelementptr inbounds ({ i32, i32, i32, [1 x i32] }, ptr @__cpu_model, i32 0, i32 3, i32 0), align 4 +// CHECK-NEXT: [[TMP5:%.*]] = and i32 [[TMP4]], 256 +// CHECK-NEXT: [[TMP6:%.*]] = icmp eq i32 [[TMP5]], 256 +// CHECK-NEXT: [[TMP7:%.*]] = and i1 true, [[TMP6]] +// CHECK-NEXT: br i1 [[TMP7]], label %[[RESOLVER_RETURN1:.*]], label %[[RESOLVER_ELSE2:.*]] +// CHECK: [[RESOLVER_RETURN1]]: +// CHECK-NEXT: ret ptr @callee_deferred_def.sse4.2.0 +// CHECK: [[RESOLVER_ELSE2]]: +// CHECK-NEXT: ret ptr @callee_deferred_def.default.2 +// +// +// CHECK: Function Attrs: nounwind +// CHECK-LABEL: define dso_local i32 @caller_deferred_def.avx2.0( +// CHECK-SAME: ) #[[ATTR2]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[CALL:%.*]] = call i32 @callee_deferred_def() +// CHECK-NEXT: ret i32 [[CALL]] +// +// +// CHECK: Function Attrs: nounwind +// CHECK-LABEL: define dso_local i32 @caller_deferred_def.sse4.2.1( +// CHECK-SAME: ) #[[ATTR1]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[CALL:%.*]] = call i32 @callee_deferred_def() +// CHECK-NEXT: ret i32 [[CALL]] +// +// +// CHECK-LABEL: define weak_odr ptr @caller_deferred_def.resolver() comdat { +// CHECK-NEXT: [[RESOLVER_ENTRY:.*:]] +// CHECK-NEXT: call void @__cpu_indicator_init() +// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr getelementptr inbounds ({ i32, i32, i32, [1 x i32] }, ptr @__cpu_model, i32 0, i32 3, i32 0), align 4 +// CHECK-NEXT: [[TMP1:%.*]] = and i32 [[TMP0]], 1024 +// CHECK-NEXT: [[TMP2:%.*]] = icmp eq i32 [[TMP1]], 1024 +// CHECK-NEXT: [[TMP3:%.*]] = and i1 true, [[TMP2]] +// CHECK-NEXT: br i1 [[TMP3]], label %[[RESOLVER_RETURN:.*]], label %[[RESOLVER_ELSE:.*]] +// CHECK: [[RESOLVER_RETURN]]: +// CHECK-NEXT: ret ptr @caller_deferred_def.avx2.0 +// CHECK: [[RESOLVER_ELSE]]: +// CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr getelementptr inbounds ({ i32, i32, i32, [1 x i32] }, ptr @__cpu_model, i32 0, i32 3, i32 0), align 4 +// CHECK-NEXT: [[TMP5:%.*]] = and i32 [[TMP4]], 256 +// CHECK-NEXT: [[TMP6:%.*]] = icmp eq i32 [[TMP5]], 256 +// CHECK-NEXT: [[TMP7:%.*]] = and i1 true, [[TMP6]] +// CHECK-NEXT: br i1 [[TMP7]], label %[[RESOLVER_RETURN1:.*]], label %[[RESOLVER_ELSE2:.*]] +// CHECK: [[RESOLVER_RETURN1]]: +// CHECK-NEXT: ret ptr @caller_deferred_def.sse4.2.1 +// CHECK: [[RESOLVER_ELSE2]]: +// CHECK-NEXT: ret ptr @caller_deferred_def.default.2 +// +// +// CHECK: Function Attrs: nounwind +// CHECK-LABEL: define dso_local i32 @callee_deferred_def.default.2( +// CHECK-SAME: ) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: ret i32 1 +// +// +// CHECK: Function Attrs: nounwind +// CHECK-LABEL: define dso_local i32 @callee_deferred_def.sse4.2.0( +// CHECK-SAME: ) #[[ATTR1]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: ret i32 1 +// +// +// CHECK: Function Attrs: nounwind +// CHECK-LABEL: define dso_local i32 @callee_deferred_def.avx2.1( +// CHECK-SAME: ) #[[ATTR2]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: ret i32 1 +// +//. +// CHECK: attributes #[[ATTR0]] = { nounwind "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+cx8,+mmx,+sse,+sse2,+x87" } +// CHECK: attributes #[[ATTR1]] = { nounwind "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+crc32,+cx8,+mmx,+popcnt,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87" } +// CHECK: attributes #[[ATTR2]] = { nounwind "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+avx,+avx2,+crc32,+cx8,+mmx,+popcnt,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave" } +// CHECK: attributes #[[ATTR3:[0-9]+]] = { "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+cx8,+mmx,+sse,+sse2,+x87" } +// CHECK: attributes #[[ATTR4:[0-9]+]] = { "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+crc32,+cx8,+mmx,+popcnt,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87" } +// CHECK: attributes #[[ATTR5:[0-9]+]] = { "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+avx,+avx2,+crc32,+cx8,+mmx,+popcnt,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave" } +//. +// CHECK: [[META0:![0-9]+]] = !{i32 1, !"wchar_size", i32 4} +// CHECK: [[META1:![0-9]+]] = !{!"{{.*}}clang version {{.*}}"} +//. >From e2edb0cb73a348959399987584837b1c3a6b4eae Mon Sep 17 00:00:00 2001 From: Yingwei Zheng <dtcxzyw2...@gmail.com> Date: Mon, 9 Sep 2024 16:26:42 +0800 Subject: [PATCH 2/2] [FMV][Clang][CodeGen] Resolves corresponding callee for multi-versioning callers --- clang/lib/CodeGen/CGExpr.cpp | 29 ++++ .../test/CodeGen/attr-target-clones-inline.c | 128 +++++++----------- 2 files changed, 81 insertions(+), 76 deletions(-) diff --git a/clang/lib/CodeGen/CGExpr.cpp b/clang/lib/CodeGen/CGExpr.cpp index 99cd61b9e78953..714c4b8922094b 100644 --- a/clang/lib/CodeGen/CGExpr.cpp +++ b/clang/lib/CodeGen/CGExpr.cpp @@ -5637,6 +5637,35 @@ static CGCallee EmitDirectCallee(CodeGenFunction &CGF, GlobalDecl GD) { return CGCallee::forBuiltin(builtinID, FD); } + // Resolves corresponding callee for this version of multi-versioning caller + // if they share the same features. + if (CGF.CGM.getCodeGenOpts().OptimizationLevel > 0 && + FD->isTargetClonesMultiVersion()) { + if (auto *TC = CGF.CurFuncDecl->getAttr<TargetClonesAttr>()) { + llvm::Constant *CalleePtr = nullptr; + CGF.getContext().forEachMultiversionedFunctionVersion( + FD, [&](const FunctionDecl *CurFD) { + if (const auto *CalleeTC = FD->getAttr<TargetClonesAttr>()) { + StringRef FeatStr = + TC->getFeatureStr(CGF.CurGD.getMultiVersionIndex()); + auto It = llvm::find(CalleeTC->featuresStrs(), FeatStr); + if (It != CalleeTC->featuresStrs_end()) { + GD = GlobalDecl(CurFD, It - CalleeTC->featuresStrs_begin()); + const CGFunctionInfo &FI = + CGF.CGM.getTypes().arrangeGlobalDeclaration(GD); + llvm::FunctionType *Ty = CGF.CGM.getTypes().GetFunctionType(FI); + CalleePtr = CGF.CGM.GetAddrOfFunction( + GD, Ty, /*ForVTable=*/false, + /*DontDefer=*/false, ForDefinition); + } + } + }); + + if (CalleePtr) + return CGCallee::forDirect(CalleePtr, GD); + } + } + llvm::Constant *CalleePtr = CGF.CGM.getRawFunctionPointer(GD); if (CGF.CGM.getLangOpts().CUDA && !CGF.CGM.getLangOpts().CUDAIsDevice && FD->hasAttr<CUDAGlobalAttr>()) diff --git a/clang/test/CodeGen/attr-target-clones-inline.c b/clang/test/CodeGen/attr-target-clones-inline.c index cc9e8dc762e483..7dc3819b81e188 100644 --- a/clang/test/CodeGen/attr-target-clones-inline.c +++ b/clang/test/CodeGen/attr-target-clones-inline.c @@ -26,16 +26,14 @@ int callee_deferred_def(void) { return 1; } // CHECK: @__cpu_features2 = external dso_local global [3 x i32] // CHECK: @callee.ifunc = weak_odr alias i32 (), ptr @callee // CHECK: @caller.ifunc = weak_odr alias i32 (), ptr @caller -// CHECK: @callee_decl.ifunc = weak_odr alias i32 (), ptr @callee_decl // CHECK: @caller_decl.ifunc = weak_odr alias i32 (), ptr @caller_decl -// CHECK: @callee_deferred_def.ifunc = weak_odr alias i32 (), ptr @callee_deferred_def // CHECK: @caller_deferred_def.ifunc = weak_odr alias i32 (), ptr @caller_deferred_def +// CHECK: @callee_deferred_def.ifunc = weak_odr alias i32 (), ptr @callee_deferred_def // CHECK: @callee = weak_odr ifunc i32 (), ptr @callee.resolver // CHECK: @caller = weak_odr ifunc i32 (), ptr @caller.resolver -// CHECK: @callee_decl = weak_odr ifunc i32 (), ptr @callee_decl.resolver // CHECK: @caller_decl = weak_odr ifunc i32 (), ptr @caller_decl.resolver -// CHECK: @callee_deferred_def = weak_odr ifunc i32 (), ptr @callee_deferred_def.resolver // CHECK: @caller_deferred_def = weak_odr ifunc i32 (), ptr @caller_deferred_def.resolver +// CHECK: @callee_deferred_def = weak_odr ifunc i32 (), ptr @callee_deferred_def.resolver //. // CHECK: Function Attrs: nounwind // CHECK-LABEL: define dso_local i32 @callee.default.2( @@ -84,7 +82,7 @@ int callee_deferred_def(void) { return 1; } // CHECK-LABEL: define dso_local i32 @caller.default.2( // CHECK-SAME: ) #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[CALL:%.*]] = call i32 @callee() +// CHECK-NEXT: [[CALL:%.*]] = call i32 @callee.default.2() // CHECK-NEXT: ret i32 [[CALL]] // // @@ -92,7 +90,7 @@ int callee_deferred_def(void) { return 1; } // CHECK-LABEL: define dso_local i32 @caller.avx2.0( // CHECK-SAME: ) #[[ATTR2]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[CALL:%.*]] = call i32 @callee() +// CHECK-NEXT: [[CALL:%.*]] = call i32 @callee.avx2.1() // CHECK-NEXT: ret i32 [[CALL]] // // @@ -100,7 +98,7 @@ int callee_deferred_def(void) { return 1; } // CHECK-LABEL: define dso_local i32 @caller.sse4.2.1( // CHECK-SAME: ) #[[ATTR1]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[CALL:%.*]] = call i32 @callee() +// CHECK-NEXT: [[CALL:%.*]] = call i32 @callee.sse4.2.0() // CHECK-NEXT: ret i32 [[CALL]] // // @@ -130,37 +128,15 @@ int callee_deferred_def(void) { return 1; } // CHECK-LABEL: define dso_local i32 @caller_decl.default.2( // CHECK-SAME: ) #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[CALL:%.*]] = call i32 @callee_decl() +// CHECK-NEXT: [[CALL:%.*]] = call i32 @callee_decl.default.2() // CHECK-NEXT: ret i32 [[CALL]] // // -// CHECK-LABEL: define weak_odr ptr @callee_decl.resolver() comdat { -// CHECK-NEXT: [[RESOLVER_ENTRY:.*:]] -// CHECK-NEXT: call void @__cpu_indicator_init() -// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr getelementptr inbounds ({ i32, i32, i32, [1 x i32] }, ptr @__cpu_model, i32 0, i32 3, i32 0), align 4 -// CHECK-NEXT: [[TMP1:%.*]] = and i32 [[TMP0]], 1024 -// CHECK-NEXT: [[TMP2:%.*]] = icmp eq i32 [[TMP1]], 1024 -// CHECK-NEXT: [[TMP3:%.*]] = and i1 true, [[TMP2]] -// CHECK-NEXT: br i1 [[TMP3]], label %[[RESOLVER_RETURN:.*]], label %[[RESOLVER_ELSE:.*]] -// CHECK: [[RESOLVER_RETURN]]: -// CHECK-NEXT: ret ptr @callee_decl.avx2.1 -// CHECK: [[RESOLVER_ELSE]]: -// CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr getelementptr inbounds ({ i32, i32, i32, [1 x i32] }, ptr @__cpu_model, i32 0, i32 3, i32 0), align 4 -// CHECK-NEXT: [[TMP5:%.*]] = and i32 [[TMP4]], 256 -// CHECK-NEXT: [[TMP6:%.*]] = icmp eq i32 [[TMP5]], 256 -// CHECK-NEXT: [[TMP7:%.*]] = and i1 true, [[TMP6]] -// CHECK-NEXT: br i1 [[TMP7]], label %[[RESOLVER_RETURN1:.*]], label %[[RESOLVER_ELSE2:.*]] -// CHECK: [[RESOLVER_RETURN1]]: -// CHECK-NEXT: ret ptr @callee_decl.sse4.2.0 -// CHECK: [[RESOLVER_ELSE2]]: -// CHECK-NEXT: ret ptr @callee_decl.default.2 -// -// // CHECK: Function Attrs: nounwind // CHECK-LABEL: define dso_local i32 @caller_decl.avx2.0( // CHECK-SAME: ) #[[ATTR2]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[CALL:%.*]] = call i32 @callee_decl() +// CHECK-NEXT: [[CALL:%.*]] = call i32 @callee_decl.avx2.1() // CHECK-NEXT: ret i32 [[CALL]] // // @@ -168,7 +144,7 @@ int callee_deferred_def(void) { return 1; } // CHECK-LABEL: define dso_local i32 @caller_decl.sse4.2.1( // CHECK-SAME: ) #[[ATTR1]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[CALL:%.*]] = call i32 @callee_decl() +// CHECK-NEXT: [[CALL:%.*]] = call i32 @callee_decl.sse4.2.0() // CHECK-NEXT: ret i32 [[CALL]] // // @@ -198,48 +174,47 @@ int callee_deferred_def(void) { return 1; } // CHECK-LABEL: define dso_local i32 @caller_deferred_def.default.2( // CHECK-SAME: ) #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[CALL:%.*]] = call i32 @callee_deferred_def() +// CHECK-NEXT: [[CALL:%.*]] = call i32 @callee_deferred_def.default.2() // CHECK-NEXT: ret i32 [[CALL]] // // -// CHECK-LABEL: define weak_odr ptr @callee_deferred_def.resolver() comdat { -// CHECK-NEXT: [[RESOLVER_ENTRY:.*:]] -// CHECK-NEXT: call void @__cpu_indicator_init() -// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr getelementptr inbounds ({ i32, i32, i32, [1 x i32] }, ptr @__cpu_model, i32 0, i32 3, i32 0), align 4 -// CHECK-NEXT: [[TMP1:%.*]] = and i32 [[TMP0]], 1024 -// CHECK-NEXT: [[TMP2:%.*]] = icmp eq i32 [[TMP1]], 1024 -// CHECK-NEXT: [[TMP3:%.*]] = and i1 true, [[TMP2]] -// CHECK-NEXT: br i1 [[TMP3]], label %[[RESOLVER_RETURN:.*]], label %[[RESOLVER_ELSE:.*]] -// CHECK: [[RESOLVER_RETURN]]: -// CHECK-NEXT: ret ptr @callee_deferred_def.avx2.1 -// CHECK: [[RESOLVER_ELSE]]: -// CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr getelementptr inbounds ({ i32, i32, i32, [1 x i32] }, ptr @__cpu_model, i32 0, i32 3, i32 0), align 4 -// CHECK-NEXT: [[TMP5:%.*]] = and i32 [[TMP4]], 256 -// CHECK-NEXT: [[TMP6:%.*]] = icmp eq i32 [[TMP5]], 256 -// CHECK-NEXT: [[TMP7:%.*]] = and i1 true, [[TMP6]] -// CHECK-NEXT: br i1 [[TMP7]], label %[[RESOLVER_RETURN1:.*]], label %[[RESOLVER_ELSE2:.*]] -// CHECK: [[RESOLVER_RETURN1]]: -// CHECK-NEXT: ret ptr @callee_deferred_def.sse4.2.0 -// CHECK: [[RESOLVER_ELSE2]]: -// CHECK-NEXT: ret ptr @callee_deferred_def.default.2 +// CHECK: Function Attrs: nounwind +// CHECK-LABEL: define dso_local i32 @callee_deferred_def.default.2( +// CHECK-SAME: ) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: ret i32 1 // // // CHECK: Function Attrs: nounwind // CHECK-LABEL: define dso_local i32 @caller_deferred_def.avx2.0( // CHECK-SAME: ) #[[ATTR2]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[CALL:%.*]] = call i32 @callee_deferred_def() +// CHECK-NEXT: [[CALL:%.*]] = call i32 @callee_deferred_def.avx2.1() // CHECK-NEXT: ret i32 [[CALL]] // // // CHECK: Function Attrs: nounwind +// CHECK-LABEL: define dso_local i32 @callee_deferred_def.avx2.1( +// CHECK-SAME: ) #[[ATTR2]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: ret i32 1 +// +// +// CHECK: Function Attrs: nounwind // CHECK-LABEL: define dso_local i32 @caller_deferred_def.sse4.2.1( // CHECK-SAME: ) #[[ATTR1]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[CALL:%.*]] = call i32 @callee_deferred_def() +// CHECK-NEXT: [[CALL:%.*]] = call i32 @callee_deferred_def.sse4.2.0() // CHECK-NEXT: ret i32 [[CALL]] // // +// CHECK: Function Attrs: nounwind +// CHECK-LABEL: define dso_local i32 @callee_deferred_def.sse4.2.0( +// CHECK-SAME: ) #[[ATTR1]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: ret i32 1 +// +// // CHECK-LABEL: define weak_odr ptr @caller_deferred_def.resolver() comdat { // CHECK-NEXT: [[RESOLVER_ENTRY:.*:]] // CHECK-NEXT: call void @__cpu_indicator_init() @@ -262,33 +237,34 @@ int callee_deferred_def(void) { return 1; } // CHECK-NEXT: ret ptr @caller_deferred_def.default.2 // // -// CHECK: Function Attrs: nounwind -// CHECK-LABEL: define dso_local i32 @callee_deferred_def.default.2( -// CHECK-SAME: ) #[[ATTR0]] { -// CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: ret i32 1 -// -// -// CHECK: Function Attrs: nounwind -// CHECK-LABEL: define dso_local i32 @callee_deferred_def.sse4.2.0( -// CHECK-SAME: ) #[[ATTR1]] { -// CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: ret i32 1 -// -// -// CHECK: Function Attrs: nounwind -// CHECK-LABEL: define dso_local i32 @callee_deferred_def.avx2.1( -// CHECK-SAME: ) #[[ATTR2]] { -// CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: ret i32 1 +// CHECK-LABEL: define weak_odr ptr @callee_deferred_def.resolver() comdat { +// CHECK-NEXT: [[RESOLVER_ENTRY:.*:]] +// CHECK-NEXT: call void @__cpu_indicator_init() +// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr getelementptr inbounds ({ i32, i32, i32, [1 x i32] }, ptr @__cpu_model, i32 0, i32 3, i32 0), align 4 +// CHECK-NEXT: [[TMP1:%.*]] = and i32 [[TMP0]], 1024 +// CHECK-NEXT: [[TMP2:%.*]] = icmp eq i32 [[TMP1]], 1024 +// CHECK-NEXT: [[TMP3:%.*]] = and i1 true, [[TMP2]] +// CHECK-NEXT: br i1 [[TMP3]], label %[[RESOLVER_RETURN:.*]], label %[[RESOLVER_ELSE:.*]] +// CHECK: [[RESOLVER_RETURN]]: +// CHECK-NEXT: ret ptr @callee_deferred_def.avx2.1 +// CHECK: [[RESOLVER_ELSE]]: +// CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr getelementptr inbounds ({ i32, i32, i32, [1 x i32] }, ptr @__cpu_model, i32 0, i32 3, i32 0), align 4 +// CHECK-NEXT: [[TMP5:%.*]] = and i32 [[TMP4]], 256 +// CHECK-NEXT: [[TMP6:%.*]] = icmp eq i32 [[TMP5]], 256 +// CHECK-NEXT: [[TMP7:%.*]] = and i1 true, [[TMP6]] +// CHECK-NEXT: br i1 [[TMP7]], label %[[RESOLVER_RETURN1:.*]], label %[[RESOLVER_ELSE2:.*]] +// CHECK: [[RESOLVER_RETURN1]]: +// CHECK-NEXT: ret ptr @callee_deferred_def.sse4.2.0 +// CHECK: [[RESOLVER_ELSE2]]: +// CHECK-NEXT: ret ptr @callee_deferred_def.default.2 // //. // CHECK: attributes #[[ATTR0]] = { nounwind "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+cx8,+mmx,+sse,+sse2,+x87" } // CHECK: attributes #[[ATTR1]] = { nounwind "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+crc32,+cx8,+mmx,+popcnt,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87" } // CHECK: attributes #[[ATTR2]] = { nounwind "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+avx,+avx2,+crc32,+cx8,+mmx,+popcnt,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave" } // CHECK: attributes #[[ATTR3:[0-9]+]] = { "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+cx8,+mmx,+sse,+sse2,+x87" } -// CHECK: attributes #[[ATTR4:[0-9]+]] = { "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+crc32,+cx8,+mmx,+popcnt,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87" } -// CHECK: attributes #[[ATTR5:[0-9]+]] = { "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+avx,+avx2,+crc32,+cx8,+mmx,+popcnt,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave" } +// CHECK: attributes #[[ATTR4:[0-9]+]] = { "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+avx,+avx2,+crc32,+cx8,+mmx,+popcnt,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave" } +// CHECK: attributes #[[ATTR5:[0-9]+]] = { "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+crc32,+cx8,+mmx,+popcnt,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87" } //. // CHECK: [[META0:![0-9]+]] = !{i32 1, !"wchar_size", i32 4} // CHECK: [[META1:![0-9]+]] = !{!"{{.*}}clang version {{.*}}"} _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits