jhuber6 created this revision. jhuber6 added reviewers: jdoerfert, tianshilei1992, JonChesterfield. Herald added subscribers: asavonic, guansong, yaxunl, jvesely. Herald added a project: All. jhuber6 requested review of this revision. Herald added subscribers: cfe-commits, sstefan1. Herald added a project: clang.
Currently the device kernels all have weak linkage to prevent linkage errors on multiple defintions. However, this prevents some optimizations from adequately analyzing them because of the nature of weak linkage. This patch replaces the weak linkage with weak_odr linkage so we can statically assert that multiple declarations of the same kernel will have the same definition. Repository: rG LLVM Github Monorepo https://reviews.llvm.org/D122443 Files: clang/lib/CodeGen/CGOpenMPRuntime.cpp clang/test/OpenMP/amdgcn_target_codegen.cpp clang/test/OpenMP/assumes_include_nvptx.cpp clang/test/OpenMP/declare_target_codegen.cpp clang/test/OpenMP/declare_target_link_codegen.cpp clang/test/OpenMP/metadirective_device_isa_codegen_amdgcn.cpp clang/test/OpenMP/nvptx_target_firstprivate_codegen.cpp clang/test/OpenMP/target_firstprivate_codegen.cpp clang/test/OpenMP/target_private_codegen.cpp clang/test/OpenMP/target_reduction_codegen.cpp
Index: clang/test/OpenMP/target_reduction_codegen.cpp =================================================================== --- clang/test/OpenMP/target_reduction_codegen.cpp +++ clang/test/OpenMP/target_reduction_codegen.cpp @@ -45,7 +45,7 @@ { } - // TCHECK: define weak void @__omp_offloading_{{.+}}(i32*{{.+}} %{{.+}}) + // TCHECK: define weak_odr void @__omp_offloading_{{.+}}(i32*{{.+}} %{{.+}}) // TCHECK: [[A:%.+]] = alloca i{{[0-9]+}}*, // TCHECK: store {{.+}}, {{.+}} [[A]], // TCHECK: load i32*, i32** [[A]], @@ -56,7 +56,7 @@ a = 1; } - // TCHECK: define weak void @__omp_offloading_{{.+}}(i32*{{.+}} %{{.+}}) + // TCHECK: define weak_odr void @__omp_offloading_{{.+}}(i32*{{.+}} %{{.+}}) // TCHECK: [[A:%.+]] = alloca i{{[0-9]+}}*, // TCHECK: store {{.+}}, {{.+}} [[A]], // TCHECK: [[REF:%.+]] = load i32*, i32** [[A]], @@ -69,7 +69,7 @@ aa = 1; } - // TCHECK: define weak void @__omp_offloading_{{.+}}(i32*{{.+}} [[A:%.+]], i16*{{.+}} [[AA:%.+]]) + // TCHECK: define weak_odr void @__omp_offloading_{{.+}}(i32*{{.+}} [[A:%.+]], i16*{{.+}} [[AA:%.+]]) // TCHECK: [[A:%.+]] = alloca i{{[0-9]+}}*, // TCHECK: [[AA:%.+]] = alloca i{{[0-9]+}}*, // TCHECK: store {{.+}}, {{.+}} [[A]], @@ -118,7 +118,7 @@ return a; } -// TCHECK: define weak void @__omp_offloading_{{.+}}(i32*{{.+}}, i16*{{.+}}, i8*{{.+}}, [10 x i32]*{{.+}}) +// TCHECK: define weak_odr void @__omp_offloading_{{.+}}(i32*{{.+}}, i16*{{.+}}, i8*{{.+}}, [10 x i32]*{{.+}}) // TCHECK: [[A:%.+]] = alloca i{{[0-9]+}}*, // TCHECK: [[A2:%.+]] = alloca i{{[0-9]+}}*, // TCHECK: [[A3:%.+]] = alloca i{{[0-9]+}}*, @@ -154,7 +154,7 @@ return c[1][1] + (int)b; } - // TCHECK: define weak void @__omp_offloading_{{.+}}([[S1]]* noundef [[TH:%.+]], i32*{{.+}}, i{{[0-9]+}} noundef [[VLA:%.+]], i{{[0-9]+}} noundef [[VLA1:%.+]], i16*{{.+}}) + // TCHECK: define weak_odr void @__omp_offloading_{{.+}}([[S1]]* noundef [[TH:%.+]], i32*{{.+}}, i{{[0-9]+}} noundef [[VLA:%.+]], i{{[0-9]+}} noundef [[VLA1:%.+]], i16*{{.+}}) // TCHECK: [[TH_ADDR:%.+]] = alloca [[S1]]*, // TCHECK: [[B_ADDR:%.+]] = alloca i{{[0-9]+}}*, // TCHECK: [[VLA_ADDR:%.+]] = alloca i{{[0-9]+}}, @@ -206,7 +206,7 @@ } // template -// TCHECK: define weak void @__omp_offloading_{{.+}}(i{{[0-9]+}}*{{.+}}, i{{[0-9]+}}*{{.+}}, [10 x i32]*{{.+}}) +// TCHECK: define weak_odr void @__omp_offloading_{{.+}}(i{{[0-9]+}}*{{.+}}, i{{[0-9]+}}*{{.+}}, [10 x i32]*{{.+}}) // TCHECK: [[A:%.+]] = alloca i{{[0-9]+}}*, // TCHECK: [[A2:%.+]] = alloca i{{[0-9]+}}*, // TCHECK: [[B:%.+]] = alloca [10 x i{{[0-9]+}}]*, Index: clang/test/OpenMP/target_private_codegen.cpp =================================================================== --- clang/test/OpenMP/target_private_codegen.cpp +++ clang/test/OpenMP/target_private_codegen.cpp @@ -45,7 +45,7 @@ { } - // TCHECK: define weak void @__omp_offloading_{{.+}}() + // TCHECK: define weak_odr void @__omp_offloading_{{.+}}() // TCHECK: [[A:%.+]] = alloca i{{[0-9]+}}, // TCHECK-NOT: store {{.+}}, {{.+}} [[A]], // TCHECK: ret void @@ -55,7 +55,7 @@ a = 1; } - // TCHECK: define weak void @__omp_offloading_{{.+}}() + // TCHECK: define weak_odr void @__omp_offloading_{{.+}}() // TCHECK: [[A:%.+]] = alloca i{{[0-9]+}}, // TCHECK: store i{{[0-9]+}} 1, i{{[0-9]+}}* [[A]], // TCHECK: ret void @@ -66,7 +66,7 @@ aa = 1; } - // TCHECK: define weak void @__omp_offloading_{{.+}}() + // TCHECK: define weak_odr void @__omp_offloading_{{.+}}() // TCHECK: [[A:%.+]] = alloca i{{[0-9]+}}, // TCHECK: [[A2:%.+]] = alloca i{{[0-9]+}}, // TCHECK: store i{{[0-9]+}} 1, i{{[0-9]+}}* [[A]], @@ -85,7 +85,7 @@ } // make sure that private variables are generated in all cases and that we use those instances for operations inside the // target region - // TCHECK: define weak void @__omp_offloading_{{.+}}(i{{[0-9]+}} noundef [[VLA:%.+]], i{{[0-9]+}} noundef [[VLA1:%.+]], i{{[0-9]+}} noundef [[VLA3:%.+]]) + // TCHECK: define weak_odr void @__omp_offloading_{{.+}}(i{{[0-9]+}} noundef [[VLA:%.+]], i{{[0-9]+}} noundef [[VLA1:%.+]], i{{[0-9]+}} noundef [[VLA3:%.+]]) // TCHECK: [[VLA_ADDR:%.+]] = alloca i{{[0-9]+}}, // TCHECK: [[VLA_ADDR2:%.+]] = alloca i{{[0-9]+}}, // TCHECK: [[VLA_ADDR4:%.+]] = alloca i{{[0-9]+}}, @@ -179,7 +179,7 @@ return a; } -// TCHECK: define weak void @__omp_offloading_{{.+}}() +// TCHECK: define weak_odr void @__omp_offloading_{{.+}}() // TCHECK: [[A:%.+]] = alloca i{{[0-9]+}}, // TCHECK: [[A2:%.+]] = alloca i{{[0-9]+}}, // TCHECK: [[A3:%.+]] = alloca i{{[0-9]+}}, @@ -207,7 +207,7 @@ return c[1][1] + (int)b; } - // TCHECK: define weak void @__omp_offloading_{{.+}}([[S1]]* noundef [[TH:%.+]], i{{[0-9]+}} noundef [[VLA:%.+]], i{{[0-9]+}} noundef [[VLA1:%.+]]) + // TCHECK: define weak_odr void @__omp_offloading_{{.+}}([[S1]]* noundef [[TH:%.+]], i{{[0-9]+}} noundef [[VLA:%.+]], i{{[0-9]+}} noundef [[VLA1:%.+]]) // TCHECK: [[TH_ADDR:%.+]] = alloca [[S1]]*, // TCHECK: [[VLA_ADDR:%.+]] = alloca i{{[0-9]+}}, // TCHECK: [[VLA_ADDR2:%.+]] = alloca i{{[0-9]+}}, @@ -261,7 +261,7 @@ } // template -// TCHECK: define weak void @__omp_offloading_{{.+}}() +// TCHECK: define weak_odr void @__omp_offloading_{{.+}}() // TCHECK: [[A:%.+]] = alloca i{{[0-9]+}}, // TCHECK: [[A2:%.+]] = alloca i{{[0-9]+}}, // TCHECK: [[B:%.+]] = alloca [10 x i{{[0-9]+}}], Index: clang/test/OpenMP/target_firstprivate_codegen.cpp =================================================================== --- clang/test/OpenMP/target_firstprivate_codegen.cpp +++ clang/test/OpenMP/target_firstprivate_codegen.cpp @@ -143,7 +143,7 @@ // CHECK: [[PTR_GEP_ARG:%.+]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[PTR_ARR]], i{{[0-9]+}} 0, i{{[0-9]+}} 0 // CHECK: {{.+}} = call i32 @__tgt_target_mapper(%struct.ident_t* @{{.+}}, i64 -1, {{.+}}, i32 3, i8** [[BASE_PTR_GEP_ARG]], i8** [[PTR_GEP_ARG]], i[[SZ]]* getelementptr inbounds ([3 x i[[SZ]]], [3 x i[[SZ]]]* [[SIZET]], i32 0, i32 0), i64* getelementptr inbounds ([3 x i64], [3 x i64]* [[MAPT]], i32 0, i32 0), i8** null, i8** null) - // TCHECK: define weak void @__omp_offloading_{{.+}}(i{{[0-9]+}} noundef [[A_IN:%.+]], i32** noundef nonnull align {{[0-9]+}} dereferenceable({{[0-9]+}}) [[P_IN:%.+]], i{{[0-9]+}} noundef [[GA_IN:%.+]]) + // TCHECK: define weak_odr void @__omp_offloading_{{.+}}(i{{[0-9]+}} noundef [[A_IN:%.+]], i32** noundef nonnull align {{[0-9]+}} dereferenceable({{[0-9]+}}) [[P_IN:%.+]], i{{[0-9]+}} noundef [[GA_IN:%.+]]) // TCHECK: [[A_ADDR:%.+]] = alloca i{{[0-9]+}}, // TCHECK: [[P_ADDR:%.+]] = alloca i32**, // TCHECK: [[GA_ADDR:%.+]] = alloca i{{64|32}}, @@ -352,7 +352,7 @@ // CHECK: [[PTR_GEP_ARG3:%.+]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[PTR_ARR3]], i{{[0-9]+}} 0, i{{[0-9]+}} 0 // CHECK: {{.+}} = call i32 @__tgt_target_mapper(%struct.ident_t* @{{.+}}, i64 -1, {{.+}}, i32 2, i8** [[BASE_PTR_GEP_ARG3]], i8** [[PTR_GEP_ARG3]], i[[SZ]]* getelementptr inbounds ([2 x i[[SZ]]], [2 x i[[SZ]]]* [[SIZET3]], i32 0, i32 0), i64* getelementptr inbounds ([2 x i64], [2 x i64]* [[MAPT3]], i32 0, i32 0), i8** null, i8** null) - // TCHECK: define weak void @__omp_offloading_{{.+}}(double* noundef [[PTR_IN:%.+]], [[TTII]]* noundef nonnull align {{[0-9]+}} dereferenceable({{[0-9]+}}) [[E:%.+]]) + // TCHECK: define weak_odr void @__omp_offloading_{{.+}}(double* noundef [[PTR_IN:%.+]], [[TTII]]* noundef nonnull align {{[0-9]+}} dereferenceable({{[0-9]+}}) [[E:%.+]]) // TCHECK: [[PTR_ADDR:%.+]] = alloca double*, // TCHECK-NOT: alloca [[TTII]], // TCHECK-NOT: alloca double*, @@ -391,7 +391,7 @@ return a; } -// TCHECK: define weak void @__omp_offloading_{{.+}}(i{{[0-9]+}} noundef [[A_IN:%.+]], i{{[0-9]+}} noundef [[A3_IN:%.+]], [10 x i{{[0-9]+}}]*{{.+}} [[B_IN:%.+]]) +// TCHECK: define weak_odr void @__omp_offloading_{{.+}}(i{{[0-9]+}} noundef [[A_IN:%.+]], i{{[0-9]+}} noundef [[A3_IN:%.+]], [10 x i{{[0-9]+}}]*{{.+}} [[B_IN:%.+]]) // TCHECK: [[A_ADDR:%.+]] = alloca i{{[0-9]+}}, // TCHECK: [[A3_ADDR:%.+]] = alloca i{{[0-9]+}}, // TCHECK: [[B_ADDR:%.+]] = alloca [10 x i{{[0-9]+}}]*, @@ -479,7 +479,7 @@ // only check that we use the map types stored in the global variable // CHECK: call i32 @__tgt_target_mapper(%struct.ident_t* @{{.+}}, i64 -1, {{.+}}, i32 5, i8** {{.+}}, i8** {{.+}}, i{{[0-9]+}}* {{.+}}, i64* getelementptr inbounds ([5 x i64], [5 x i64]* [[MAPT4]], i32 0, i32 0), i8** null, i8** null) - // TCHECK: define weak void @__omp_offloading_{{.+}}([[S1]]* noundef [[TH:%.+]], i{{[0-9]+}} noundef [[B_IN:%.+]], i{{[0-9]+}} noundef [[VLA:%.+]], i{{[0-9]+}} noundef [[VLA1:%.+]], i{{[0-9]+}}{{.+}} [[C_IN:%.+]]) + // TCHECK: define weak_odr void @__omp_offloading_{{.+}}([[S1]]* noundef [[TH:%.+]], i{{[0-9]+}} noundef [[B_IN:%.+]], i{{[0-9]+}} noundef [[VLA:%.+]], i{{[0-9]+}} noundef [[VLA1:%.+]], i{{[0-9]+}}{{.+}} [[C_IN:%.+]]) // TCHECK: [[TH_ADDR:%.+]] = alloca [[S1]]*, // TCHECK: [[B_ADDR:%.+]] = alloca i{{[0-9]+}}, // TCHECK: [[VLA_ADDR:%.+]] = alloca i{{[0-9]+}}, @@ -587,7 +587,7 @@ // CHECK: call i32 @__tgt_target_mapper(%struct.ident_t* @{{.+}}, i64 -1, {{.+}}, i32 2, i8** {{.+}}, i8** {{.+}}, i[[SZ]]* getelementptr inbounds ([2 x i[[SZ]]], [2 x i[[SZ]]]* [[SIZET6]], i32 0, i32 0), i64* getelementptr inbounds ([2 x i64], [2 x i64]* [[MAPT6]], i32 0, i32 0), i8** null, i8** null) -// TCHECK: define weak void @__omp_offloading_{{.+}}(i{{[0-9]+}} noundef [[A_IN:%.+]], [10 x i{{[0-9]+}}]*{{.+}} [[B_IN:%.+]]) +// TCHECK: define weak_odr void @__omp_offloading_{{.+}}(i{{[0-9]+}} noundef [[A_IN:%.+]], [10 x i{{[0-9]+}}]*{{.+}} [[B_IN:%.+]]) // TCHECK: [[A_ADDR:%.+]] = alloca i{{[0-9]+}}, // TCHECK: [[B_ADDR:%.+]] = alloca [10 x i{{[0-9]+}}]*, // TCHECK-NOT: alloca i{{[0-9]+}}, Index: clang/test/OpenMP/nvptx_target_firstprivate_codegen.cpp =================================================================== --- clang/test/OpenMP/nvptx_target_firstprivate_codegen.cpp +++ clang/test/OpenMP/nvptx_target_firstprivate_codegen.cpp @@ -95,7 +95,7 @@ ptr[0]++; } - // TCHECK: define weak void @__omp_offloading_{{.+}}(double* noundef [[PTR_IN:%.+]]) + // TCHECK: define weak_odr void @__omp_offloading_{{.+}}(double* noundef [[PTR_IN:%.+]]) // TCHECK: [[PTR_ADDR:%.+]] = alloca double*, // TCHECK-NOT: alloca double*, // TCHECK: store double* [[PTR_IN]], double** [[PTR_ADDR]], Index: clang/test/OpenMP/metadirective_device_isa_codegen_amdgcn.cpp =================================================================== --- clang/test/OpenMP/metadirective_device_isa_codegen_amdgcn.cpp +++ clang/test/OpenMP/metadirective_device_isa_codegen_amdgcn.cpp @@ -22,7 +22,7 @@ return threadCount; } -// CHECK: define weak amdgpu_kernel void @__omp_offloading_{{.*}}amdgcn_device_isa_selected +// CHECK: define weak_odr amdgpu_kernel void @__omp_offloading_{{.*}}amdgcn_device_isa_selected // CHECK: user_code.entry: // CHECK: call void @__kmpc_parallel_51 // CHECK-NOT: call i32 @__kmpc_single @@ -44,7 +44,7 @@ return threadCount; } -// CHECK: define weak amdgpu_kernel void @__omp_offloading_{{.*}}amdgcn_device_isa_not_selected +// CHECK: define weak_odr amdgpu_kernel void @__omp_offloading_{{.*}}amdgcn_device_isa_not_selected // CHECK: user_code.entry: // CHECK: call i32 @__kmpc_single // CHECK-NOT: call void @__kmpc_parallel_51 Index: clang/test/OpenMP/declare_target_link_codegen.cpp =================================================================== --- clang/test/OpenMP/declare_target_link_codegen.cpp +++ clang/test/OpenMP/declare_target_link_codegen.cpp @@ -50,7 +50,7 @@ return 0; } -// DEVICE: define weak void @__omp_offloading_{{.*}}_{{.*}}maini1{{.*}}_l42(i32* noundef nonnull align {{[0-9]+}} dereferenceable{{[^,]*}} +// DEVICE: define weak_odr void @__omp_offloading_{{.*}}_{{.*}}maini1{{.*}}_l42(i32* noundef nonnull align {{[0-9]+}} dereferenceable{{[^,]*}} // DEVICE: [[C_REF:%.+]] = load i32*, i32** @c_decl_tgt_ref_ptr, // DEVICE: [[C:%.+]] = load i32, i32* [[C_REF]], // DEVICE: store i32 [[C]], i32* % Index: clang/test/OpenMP/declare_target_codegen.cpp =================================================================== --- clang/test/OpenMP/declare_target_codegen.cpp +++ clang/test/OpenMP/declare_target_codegen.cpp @@ -140,7 +140,7 @@ int maini1() { int a; static long aa = 32 + bbb + ccc + fff + ggg; -// CHECK-DAG: define weak void @__omp_offloading_{{.*}}maini1{{.*}}_l[[@LINE+1]](i32* noundef nonnull align {{[0-9]+}} dereferenceable({{[0-9]+}}) %{{.*}}, i64 {{.*}}, i64 {{.*}}) +// CHECK-DAG: define weak_odr void @__omp_offloading_{{.*}}maini1{{.*}}_l[[@LINE+1]](i32* noundef nonnull align {{[0-9]+}} dereferenceable({{[0-9]+}}) %{{.*}}, i64 {{.*}}, i64 {{.*}}) #pragma omp target map(tofrom \ : a, b) { @@ -153,7 +153,7 @@ int baz3() { return 2 + baz2(); } int baz2() { -// CHECK-DAG: define weak void @__omp_offloading_{{.*}}baz2{{.*}}_l[[@LINE+1]](i64 {{.*}}) +// CHECK-DAG: define weak_odr void @__omp_offloading_{{.*}}baz2{{.*}}_l[[@LINE+1]](i64 {{.*}}) #pragma omp target parallel ++c; return 2 + baz3(); @@ -165,7 +165,7 @@ int baz5() { bool a; -// CHECK-DAG: define weak void @__omp_offloading_{{.*}}baz5{{.*}}_l[[@LINE+1]](i64 {{.*}}) +// CHECK-DAG: define weak_odr void @__omp_offloading_{{.*}}baz5{{.*}}_l[[@LINE+1]](i64 {{.*}}) #pragma omp target a = __extension__(void *) & __t_create != 0; return a; Index: clang/test/OpenMP/assumes_include_nvptx.cpp =================================================================== --- clang/test/OpenMP/assumes_include_nvptx.cpp +++ clang/test/OpenMP/assumes_include_nvptx.cpp @@ -11,11 +11,11 @@ // TODO: Think about teaching the OMPIRBuilder about default attributes as well so the __kmpc* declarations are annotated. -// CHECK: define weak void @__omp_offloading_{{.*}}__Z17complex_reductionIfEvv_{{.*}}() [[attr0:#[0-9]]] +// CHECK: define weak_odr void @__omp_offloading_{{.*}}__Z17complex_reductionIfEvv_{{.*}}() [[attr0:#[0-9]]] // CHECK: call i32 @__kmpc_target_init( // CHECK: declare noundef float @_Z3sinf(float noundef) [[attr1:#[0-9]*]] // CHECK: declare void @__kmpc_target_deinit( -// CHECK: define weak void @__omp_offloading_{{.*}}__Z17complex_reductionIdEvv_{{.*}}() [[attr0]] +// CHECK: define weak_odr void @__omp_offloading_{{.*}}__Z17complex_reductionIdEvv_{{.*}}() [[attr0]] // CHECK: %call = call noundef double @_Z3sind(double noundef 0.000000e+00) [[attr2:#[0-9]]] // CHECK: declare noundef double @_Z3sind(double noundef) [[attr1]] Index: clang/test/OpenMP/amdgcn_target_codegen.cpp =================================================================== --- clang/test/OpenMP/amdgcn_target_codegen.cpp +++ clang/test/OpenMP/amdgcn_target_codegen.cpp @@ -9,7 +9,7 @@ #define N 1000 int test_amdgcn_target_tid_threads() { -// CHECK-LABEL: define weak amdgpu_kernel void @{{.*}}test_amdgcn_target_tid_threads +// CHECK-LABEL: define weak_odr amdgpu_kernel void @{{.*}}test_amdgcn_target_tid_threads int arr[N]; @@ -23,7 +23,7 @@ } int test_amdgcn_target_tid_threads_simd() { -// CHECK-LABEL: define weak amdgpu_kernel void @{{.*}}test_amdgcn_target_tid_threads_simd +// CHECK-LABEL: define weak_odr amdgpu_kernel void @{{.*}}test_amdgcn_target_tid_threads_simd int arr[N]; Index: clang/lib/CodeGen/CGOpenMPRuntime.cpp =================================================================== --- clang/lib/CodeGen/CGOpenMPRuntime.cpp +++ clang/lib/CodeGen/CGOpenMPRuntime.cpp @@ -6537,7 +6537,7 @@ if (CGM.getLangOpts().OpenMPIsDevice) { OutlinedFnID = llvm::ConstantExpr::getBitCast(OutlinedFn, CGM.Int8PtrTy); - OutlinedFn->setLinkage(llvm::GlobalValue::WeakAnyLinkage); + OutlinedFn->setLinkage(llvm::GlobalValue::WeakODRLinkage); OutlinedFn->setDSOLocal(false); if (CGM.getTriple().isAMDGCN()) OutlinedFn->setCallingConv(llvm::CallingConv::AMDGPU_KERNEL);
_______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits