jdoerfert created this revision. jdoerfert added reviewers: jhuber6, fghanim, JonChesterfield, grokos, AndreyChurbanov, ye-luo, tianshilei1992, ggeorgakoudis. Herald added subscribers: llvm-commits, cfe-commits, sstefan1, guansong, bollu, yaxunl, jholewinski. Herald added projects: clang, OpenMP, LLVM.
There are various runtime calls in the device runtime with unused, or always fixed, arguments. This is bad for all sorts of reasons. Clean up two before as we match them in OpenMPOpt now. Repository: rG LLVM Github Monorepo https://reviews.llvm.org/D83268 Files: clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp clang/test/OpenMP/nvptx_data_sharing.cpp clang/test/OpenMP/nvptx_parallel_codegen.cpp clang/test/OpenMP/nvptx_target_codegen.cpp clang/test/OpenMP/nvptx_target_teams_codegen.cpp clang/test/OpenMP/nvptx_target_teams_distribute_codegen.cpp llvm/include/llvm/Frontend/OpenMP/OMPKinds.def openmp/libomptarget/deviceRTLs/common/src/parallel.cu openmp/libomptarget/deviceRTLs/interface.h
Index: openmp/libomptarget/deviceRTLs/interface.h =================================================================== --- openmp/libomptarget/deviceRTLs/interface.h +++ openmp/libomptarget/deviceRTLs/interface.h @@ -475,10 +475,8 @@ int16_t RequiresDataSharing); EXTERN __attribute__((deprecated)) void __kmpc_spmd_kernel_deinit(); EXTERN void __kmpc_spmd_kernel_deinit_v2(int16_t RequiresOMPRuntime); -EXTERN void __kmpc_kernel_prepare_parallel(void *WorkFn, - int16_t IsOMPRuntimeInitialized); -EXTERN bool __kmpc_kernel_parallel(void **WorkFn, - int16_t IsOMPRuntimeInitialized); +EXTERN void __kmpc_kernel_prepare_parallel(void *WorkFn); +EXTERN bool __kmpc_kernel_parallel(void **WorkFn); EXTERN void __kmpc_kernel_end_parallel(); EXTERN bool __kmpc_kernel_convergent_parallel(void *buffer, __kmpc_impl_lanemask_t Mask, Index: openmp/libomptarget/deviceRTLs/common/src/parallel.cu =================================================================== --- openmp/libomptarget/deviceRTLs/common/src/parallel.cu +++ openmp/libomptarget/deviceRTLs/common/src/parallel.cu @@ -227,10 +227,8 @@ } // This routine is always called by the team master.. -EXTERN void __kmpc_kernel_prepare_parallel(void *WorkFn, - int16_t IsOMPRuntimeInitialized) { +EXTERN void __kmpc_kernel_prepare_parallel(void *WorkFn) { PRINT0(LD_IO, "call to __kmpc_kernel_prepare_parallel\n"); - ASSERT0(LT_FUSSY, IsOMPRuntimeInitialized, "Expected initialized runtime."); omptarget_nvptx_workFn = WorkFn; @@ -275,12 +273,9 @@ // returns True if this thread is active, else False. // // Only the worker threads call this routine. -EXTERN bool __kmpc_kernel_parallel(void **WorkFn, - int16_t IsOMPRuntimeInitialized) { +EXTERN bool __kmpc_kernel_parallel(void **WorkFn) { PRINT0(LD_IO | LD_PAR, "call to __kmpc_kernel_parallel\n"); - ASSERT0(LT_FUSSY, IsOMPRuntimeInitialized, "Expected initialized runtime."); - // Work function and arguments for L1 parallel region. *WorkFn = omptarget_nvptx_workFn; Index: llvm/include/llvm/Frontend/OpenMP/OMPKinds.def =================================================================== --- llvm/include/llvm/Frontend/OpenMP/OMPKinds.def +++ llvm/include/llvm/Frontend/OpenMP/OMPKinds.def @@ -584,6 +584,11 @@ __OMP_RTL(__kmpc_task_allow_completion_event, false, VoidPtr, IdentPtr, /* Int */ Int32, /* kmp_task_t */ VoidPtr) +/// Note that device runtime functions (in the following) do not necessarily +/// need attributes as we expect to see the definitions. +__OMP_RTL(__kmpc_kernel_parallel, false, Int1, VoidPtrPtr) +__OMP_RTL(__kmpc_kernel_prepare_parallel, false, Void, VoidPtr) + __OMP_RTL(__last, false, Void, ) #undef __OMP_RTL Index: clang/test/OpenMP/nvptx_target_teams_distribute_codegen.cpp =================================================================== --- clang/test/OpenMP/nvptx_target_teams_distribute_codegen.cpp +++ clang/test/OpenMP/nvptx_target_teams_distribute_codegen.cpp @@ -88,7 +88,7 @@ // CHECK: [[I_ADDR:%.+]] = getelementptr inbounds [[GLOB_TY]], [[GLOB_TY]]* [[RD]], i32 0, i32 0 // // CHECK: call void @__kmpc_for_static_init_4( - // CHECK: call void @__kmpc_kernel_prepare_parallel(i8* bitcast (void (i16, i32)* @{{.+}} to i8*), i16 1) + // CHECK: call void @__kmpc_kernel_prepare_parallel(i8* bitcast (void (i16, i32)* @{{.+}} to i8*)) // CHECK: call void @__kmpc_begin_sharing_variables(i8*** [[SHARED_VARS_PTR:%.+]], i{{64|32}} 1) // CHECK: [[SHARED_VARS_BUF:%.+]] = load i8**, i8*** [[SHARED_VARS_PTR]], // CHECK: [[VARS_BUF:%.+]] = getelementptr inbounds i8*, i8** [[SHARED_VARS_BUF]], i{{64|32}} 0 Index: clang/test/OpenMP/nvptx_target_teams_codegen.cpp =================================================================== --- clang/test/OpenMP/nvptx_target_teams_codegen.cpp +++ clang/test/OpenMP/nvptx_target_teams_codegen.cpp @@ -68,7 +68,7 @@ // // CHECK: [[AWAIT_WORK]] // CHECK: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* null, i32 0) - // CHECK: [[KPR:%.+]] = call i1 @__kmpc_kernel_parallel(i8** [[OMP_WORK_FN]], i16 1) + // CHECK: [[KPR:%.+]] = call i1 @__kmpc_kernel_parallel(i8** [[OMP_WORK_FN]]) // CHECK: [[KPRB:%.+]] = zext i1 [[KPR]] to i8 // store i8 [[KPRB]], i8* [[OMP_EXEC_STATUS]], align 1 // CHECK: [[WORK:%.+]] = load i8*, i8** [[OMP_WORK_FN]], @@ -154,7 +154,7 @@ // // CHECK: [[AWAIT_WORK]] // CHECK: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* null, i32 0) - // CHECK: [[KPR:%.+]] = call i1 @__kmpc_kernel_parallel(i8** [[OMP_WORK_FN]], i16 1) + // CHECK: [[KPR:%.+]] = call i1 @__kmpc_kernel_parallel(i8** [[OMP_WORK_FN]]) // CHECK: [[KPRB:%.+]] = zext i1 [[KPR]] to i8 // store i8 [[KPRB]], i8* [[OMP_EXEC_STATUS]], align 1 // CHECK: [[WORK:%.+]] = load i8*, i8** [[OMP_WORK_FN]], Index: clang/test/OpenMP/nvptx_target_codegen.cpp =================================================================== --- clang/test/OpenMP/nvptx_target_codegen.cpp +++ clang/test/OpenMP/nvptx_target_codegen.cpp @@ -612,7 +612,7 @@ // CHECK: call void @__kmpc_end_serialized_parallel(%struct.ident_t* [[UNKNOWN]], i32 [[GTID]]) // CHECK: br label -// CHECK: call void @__kmpc_kernel_prepare_parallel(i8* bitcast (void (i16, i32)* @{{.+}} to i8*), i16 1) +// CHECK: call void @__kmpc_kernel_prepare_parallel(i8* bitcast (void (i16, i32)* @{{.+}} to i8*)) // CHECK: call void @__kmpc_begin_sharing_variables(i8*** [[SHARED_PTR:%.+]], i{{64|32}} 2) // CHECK: [[SHARED:%.+]] = load i8**, i8*** [[SHARED_PTR]], // CHECK: [[REF:%.+]] = getelementptr inbounds i8*, i8** [[SHARED]], i{{64|32}} 0 Index: clang/test/OpenMP/nvptx_parallel_codegen.cpp =================================================================== --- clang/test/OpenMP/nvptx_parallel_codegen.cpp +++ clang/test/OpenMP/nvptx_parallel_codegen.cpp @@ -92,7 +92,7 @@ // // CHECK: [[AWAIT_WORK]] // CHECK: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* null, i32 0) #[[#CONVERGENT:]] -// CHECK: [[KPR:%.+]] = call i1 @__kmpc_kernel_parallel(i8** [[OMP_WORK_FN]] +// CHECK: [[KPR:%.+]] = call i1 @__kmpc_kernel_parallel(i8** [[OMP_WORK_FN]]) // CHECK: [[KPRB:%.+]] = zext i1 [[KPR]] to i8 // store i8 [[KPRB]], i8* [[OMP_EXEC_STATUS]], align 1 // CHECK: [[WORK:%.+]] = load i8*, i8** [[OMP_WORK_FN]], @@ -166,13 +166,13 @@ // CHECK-DAG: [[MWS:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize() // CHECK: [[MTMP1:%.+]] = sub nuw i32 [[MNTH]], [[MWS]] // CHECK: call void @__kmpc_kernel_init(i32 [[MTMP1]] -// CHECK: call void @__kmpc_kernel_prepare_parallel(i8* bitcast (void (i16, i32)* [[PARALLEL_FN1]]_wrapper to i8*), +// CHECK: call void @__kmpc_kernel_prepare_parallel(i8* bitcast (void (i16, i32)* [[PARALLEL_FN1]]_wrapper to i8*)) // CHECK: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* null, i32 0) // CHECK: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* null, i32 0) // CHECK: call void @__kmpc_serialized_parallel( // CHECK: {{call|invoke}} void [[PARALLEL_FN3:@.+]]( // CHECK: call void @__kmpc_end_serialized_parallel( -// CHECK: call void @__kmpc_kernel_prepare_parallel(i8* bitcast (void (i16, i32)* [[PARALLEL_FN2]]_wrapper to i8*), +// CHECK: call void @__kmpc_kernel_prepare_parallel(i8* bitcast (void (i16, i32)* [[PARALLEL_FN2]]_wrapper to i8*)) // CHECK: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* null, i32 0) // CHECK: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* null, i32 0) // CHECK-64-DAG: load i32, i32* [[REF_A]] @@ -211,7 +211,7 @@ // // CHECK: [[AWAIT_WORK]] // CHECK: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* null, i32 0) -// CHECK: [[KPR:%.+]] = call i1 @__kmpc_kernel_parallel(i8** [[OMP_WORK_FN]], +// CHECK: [[KPR:%.+]] = call i1 @__kmpc_kernel_parallel(i8** [[OMP_WORK_FN]]) // CHECK: [[KPRB:%.+]] = zext i1 [[KPR]] to i8 // store i8 [[KPRB]], i8* [[OMP_EXEC_STATUS]], align 1 // CHECK: [[WORK:%.+]] = load i8*, i8** [[OMP_WORK_FN]], @@ -291,7 +291,7 @@ // CHECK: br i1 [[CMP]], label {{%?}}[[IF_THEN:.+]], label {{%?}}[[IF_ELSE:.+]] // // CHECK: [[IF_THEN]] -// CHECK: call void @__kmpc_kernel_prepare_parallel(i8* bitcast (void (i16, i32)* [[PARALLEL_FN4]]_wrapper to i8*), +// CHECK: call void @__kmpc_kernel_prepare_parallel(i8* bitcast (void (i16, i32)* [[PARALLEL_FN4]]_wrapper to i8*)) // CHECK: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* null, i32 0) // CHECK: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* null, i32 0) // CHECK: br label {{%?}}[[IF_END:.+]] Index: clang/test/OpenMP/nvptx_data_sharing.cpp =================================================================== --- clang/test/OpenMP/nvptx_data_sharing.cpp +++ clang/test/OpenMP/nvptx_data_sharing.cpp @@ -55,7 +55,7 @@ // CK1: [[A:%.+]] = getelementptr inbounds %struct._globalized_locals_ty, %struct._globalized_locals_ty* [[GLOBALSTACK2]], i32 0, i32 0 // CK1: [[B:%.+]] = getelementptr inbounds %struct._globalized_locals_ty, %struct._globalized_locals_ty* [[GLOBALSTACK2]], i32 0, i32 1 // CK1: store i32 10, i32* [[A]] -// CK1: call void @__kmpc_kernel_prepare_parallel({{.*}}, i16 1) +// CK1: call void @__kmpc_kernel_prepare_parallel({{.*}}) // CK1: call void @__kmpc_begin_sharing_variables(i8*** [[SHAREDARGS1]], i64 1) // CK1: [[SHARGSTMP1:%.+]] = load i8**, i8*** [[SHAREDARGS1]] // CK1: [[SHARGSTMP2:%.+]] = getelementptr inbounds i8*, i8** [[SHARGSTMP1]], i64 0 @@ -65,7 +65,7 @@ // CK1: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* null, i32 0) // CK1: call void @__kmpc_end_sharing_variables() // CK1: store i32 100, i32* [[B]] -// CK1: call void @__kmpc_kernel_prepare_parallel({{.*}}, i16 1) +// CK1: call void @__kmpc_kernel_prepare_parallel({{.*}}) // CK1: call void @__kmpc_begin_sharing_variables(i8*** [[SHAREDARGS2]], i64 2) // CK1: [[SHARGSTMP3:%.+]] = load i8**, i8*** [[SHAREDARGS2]] // CK1: [[SHARGSTMP4:%.+]] = getelementptr inbounds i8*, i8** [[SHARGSTMP3]], i64 0 Index: clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp =================================================================== --- clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp +++ clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp @@ -38,11 +38,9 @@ /// Call to void __kmpc_spmd_kernel_deinit_v2(int16_t RequiresOMPRuntime); OMPRTL_NVPTX__kmpc_spmd_kernel_deinit_v2, /// Call to void __kmpc_kernel_prepare_parallel(void - /// *outlined_function, int16_t - /// IsOMPRuntimeInitialized); + /// *outlined_function); OMPRTL_NVPTX__kmpc_kernel_prepare_parallel, - /// Call to bool __kmpc_kernel_parallel(void **outlined_function, - /// int16_t IsOMPRuntimeInitialized); + /// Call to bool __kmpc_kernel_parallel(void **outlined_function); OMPRTL_NVPTX__kmpc_kernel_parallel, /// Call to void __kmpc_kernel_end_parallel(); OMPRTL_NVPTX__kmpc_kernel_end_parallel, @@ -1466,8 +1464,7 @@ CGF.InitTempAlloca(WorkFn, llvm::Constant::getNullValue(CGF.Int8PtrTy)); // TODO: Optimize runtime initialization and pass in correct value. - llvm::Value *Args[] = {WorkFn.getPointer(), - /*RequiresOMPRuntime=*/Bld.getInt16(1)}; + llvm::Value *Args[] = {WorkFn.getPointer()}; llvm::Value *Ret = CGF.EmitRuntimeCall( createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_kernel_parallel), Args); Bld.CreateStore(Bld.CreateZExt(Ret, CGF.Int8Ty), ExecStatus); @@ -1595,17 +1592,16 @@ } case OMPRTL_NVPTX__kmpc_kernel_prepare_parallel: { /// Build void __kmpc_kernel_prepare_parallel( - /// void *outlined_function, int16_t IsOMPRuntimeInitialized); - llvm::Type *TypeParams[] = {CGM.Int8PtrTy, CGM.Int16Ty}; + /// void *outlined_function); + llvm::Type *TypeParams[] = {CGM.Int8PtrTy}; auto *FnTy = llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_kernel_prepare_parallel"); break; } case OMPRTL_NVPTX__kmpc_kernel_parallel: { - /// Build bool __kmpc_kernel_parallel(void **outlined_function, - /// int16_t IsOMPRuntimeInitialized); - llvm::Type *TypeParams[] = {CGM.Int8PtrPtrTy, CGM.Int16Ty}; + /// Build bool __kmpc_kernel_parallel(void **outlined_function); + llvm::Type *TypeParams[] = {CGM.Int8PtrPtrTy}; llvm::Type *RetTy = CGM.getTypes().ConvertType(CGM.getContext().BoolTy); auto *FnTy = llvm::FunctionType::get(RetTy, TypeParams, /*isVarArg*/ false); @@ -2569,7 +2565,7 @@ llvm::Value *ID = Bld.CreateBitOrPointerCast(WFn, CGM.Int8PtrTy); // Prepare for parallel region. Indicate the outlined function. - llvm::Value *Args[] = {ID, /*RequiresOMPRuntime=*/Bld.getInt16(1)}; + llvm::Value *Args[] = {ID}; CGF.EmitRuntimeCall( createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_kernel_prepare_parallel), Args);
_______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits