One comment. -----Original Message----- From: [email protected] [mailto:[email protected]] On Behalf Of Zhigang Gong Sent: Friday, December 13, 2013 2:47 PM To: [email protected] Cc: Gong, Zhigang Subject: [Beignet] [PATCH] GBE: fix clang's "incorrect" optimization for barrier call.
Clang may duplicate one barrier call to multiple branches which breaks opencl's spec and may cause gpu hang. To fix this issue, we have to implement the barrier in a llvm module file and specify the function attribute to noduplicate, and we have to link this pre-compiled module before we compile the user kernel, so we set it the pcm lib file to the LinkBitCodeFile field of the clang instance. Signed-off-by: Zhigang Gong <[email protected]> --- backend/src/CMakeLists.txt | 29 ++++++++++++++++++++++++++++- backend/src/GBEConfig.h.in | 1 + backend/src/backend/program.cpp | 17 +++++++++++++++++ backend/src/ocl_barrier.ll | 39 +++++++++++++++++++++++++++++++++++++++ backend/src/ocl_stdlib.tmpl.h | 9 +-------- 5 files changed, 86 insertions(+), 9 deletions(-) create mode 100644 backend/src/ocl_barrier.ll diff --git a/backend/src/CMakeLists.txt b/backend/src/CMakeLists.txt index 36bf688..fa69321 100644 --- a/backend/src/CMakeLists.txt +++ b/backend/src/CMakeLists.txt @@ -59,6 +59,26 @@ add_custom_command( add_custom_target(pch_object DEPENDS ${pch_object}) +macro(ll_add_library ll_lib ll_sources) + foreach (ll ${${ll_sources}}) + add_custom_command( + OUTPUT ${ll}.bc + COMMAND rm -f ${ll}.bc + COMMAND llvm-as -o ${ll}.bc ${GBE_SOURCE_DIR}/src/${ll} + DEPENDS ${ll} + ) + set (ll_objects ${ll_objects} ${ll}.bc) + endforeach (ll ${ll_sources}) + add_custom_command( + OUTPUT ${ll_lib} + COMMAND llvm-link -o ${ll_lib} ${ll_objects} + DEPENDS ${ll_objects} + ) + add_custom_target(${ll_lib} + DEPENDS ${CMAKE_CURRENT_BINARY_DIR}/${ll_lib}) + message(${ll_lib}, ${ll_objects}) +endmacro(ll_add_library) + if (GBE_USE_BLOB) set (GBE_SRC blob.cpp @@ -144,7 +164,12 @@ link_directories (${LLVM_LIBRARY_DIRS}) include_directories(${LLVM_INCLUDE_DIRS}) add_library (gbe SHARED ${GBE_SRC}) -ADD_DEPENDENCIES (gbe pch_object) +# for pre compiled module library. +set (pcm_lib "beignet.bc") +set (pcm_sources ocl_barrier.ll) +ll_add_library (${pcm_lib} pcm_sources) + +ADD_DEPENDENCIES (gbe pch_object ${pcm_lib}) target_link_libraries( gbe ${DRM_INTEL_LIBRARY} @@ -161,9 +186,11 @@ TARGET_LINK_LIBRARIES(gbe_bin_generater gbe) install (TARGETS gbe LIBRARY DESTINATION lib) install (FILES ${pch_object} DESTINATION lib) +install (FILES ${CMAKE_CURRENT_BINARY_DIR}/${pcm_lib} DESTINATION lib) install (FILES backend/program.h DESTINATION include/gen) set (PCH_OBJECT_DIR "${pch_object};${CMAKE_INSTALL_PREFIX}/lib/ocl_stdlib.h.pch") +set (PCM_LIB_DIR +"${CMAKE_CURRENT_BINARY_DIR}/${pcm_lib};${CMAKE_INSTALL_PREFIX}/lib/${p +cm_lib}") configure_file ( "GBEConfig.h.in" "GBEConfig.h" diff --git a/backend/src/GBEConfig.h.in b/backend/src/GBEConfig.h.in index 74bef3f..9920d25 100644 --- a/backend/src/GBEConfig.h.in +++ b/backend/src/GBEConfig.h.in @@ -2,3 +2,4 @@ #define LIBGBE_VERSION_MAJOR @LIBGBE_VERSION_MAJOR@ #define LIBGBE_VERSION_MINOR @LIBGBE_VERSION_MINOR@ #define PCH_OBJECT_DIR "@PCH_OBJECT_DIR@" +#define PCM_LIB_DIR "@PCM_LIB_DIR@" diff --git a/backend/src/backend/program.cpp b/backend/src/backend/program.cpp index fc9b03c..46ec04f 100644 --- a/backend/src/backend/program.cpp +++ b/backend/src/backend/program.cpp @@ -587,6 +587,21 @@ namespace gbe { // Create an action and make the compiler instance carry it out llvm::OwningPtr<clang::CodeGenAction> Act(new clang::EmitLLVMOnlyAction()); + + std::string dirs = PCM_LIB_DIR, pcmLib; + std::istringstream idirs(dirs); + bool findPcm = false; + + while (getline(idirs, pcmLib, ';')) { + if(access(pcmLib.c_str(), R_OK) == 0) { + findPcm = true; + break; + } + } + + GBE_ASSERT(findPcm && "Could not find pre compiled module + library.\n"); + + Clang.getCodeGenOpts().LinkBitcodeFile = pcmLib; auto retVal = Clang.ExecuteAction(*Act); if (err != NULL) { @@ -755,6 +770,8 @@ namespace gbe { if (err != NULL) *errSize += clangErrSize; gbe_mutex.unlock(); + if (OCL_OUTPUT_BUILD_LOG && options) + llvm::errs() << options; remove(llName.c_str()); } else p = NULL; diff --git a/backend/src/ocl_barrier.ll b/backend/src/ocl_barrier.ll new file mode 100644 index 0000000..0f5f104 --- /dev/null +++ b/backend/src/ocl_barrier.ll @@ -0,0 +1,39 @@ +;XXX FIXME as llvm can't use macros, we hardcoded 3, 1, 2 ;here, we may +need to use a more grace way to handle this type ;of values latter. +;#define CLK_LOCAL_MEM_FENCE (1 << 0) +;#define CLK_GLOBAL_MEM_FENCE (1 << 1) + +declare i32 @_get_local_mem_fence() nounwind alwaysinline declare i32 +@_get_global_mem_fence() nounwind alwaysinline declare void +@__gen_ocl_barrier_local() nounwind noduplicate alwaysinline declare +void @__gen_ocl_barrier_global() nounwind noduplicate alwaysinline +declare void @__gen_ocl_barrier_local_and_global() nounwind noduplicate +alwaysinline + +define void @barrier(i32 %flags) nounwind noduplicate alwaysinline { + %1 = icmp eq i32 %flags, 3 + br i1 %1, label %barrier_local_global, label %barrier_local_check + +barrier_local_global: + call void @__gen_ocl_barrier_local_and_global() noduplicate + br label %done + +barrier_local_check: + %2 = icmp eq i32 %flags, 1 + br i1 %2, label %barrier_local, label %barrier_global_check + +barrier_local: + call void @__gen_ocl_barrier_local() noduplicate + br label %done + +barrier_global_check: + %3 = icmp eq i32 %flags, 2 + br i1 %3, label %barrier_global, label %done + +barrier_global: + call void @__gen_ocl_barrier_local_and_global() noduplicate >>>> Is it @__gen_ocl_barrier_global()? + br label %done + +done: + ret void +} diff --git a/backend/src/ocl_stdlib.tmpl.h b/backend/src/ocl_stdlib.tmpl.h index e5f356e..a4989ed 100644 --- a/backend/src/ocl_stdlib.tmpl.h +++ b/backend/src/ocl_stdlib.tmpl.h @@ -2685,14 +2685,7 @@ void __gen_ocl_barrier_global(void); void __gen_ocl_barrier_local_and_global(void); typedef uint cl_mem_fence_flags; -INLINE void barrier(cl_mem_fence_flags flags) { - if (flags == (CLK_LOCAL_MEM_FENCE | CLK_GLOBAL_MEM_FENCE)) - __gen_ocl_barrier_local_and_global(); - else if (flags == CLK_LOCAL_MEM_FENCE) - __gen_ocl_barrier_local(); - else if (flags == CLK_GLOBAL_MEM_FENCE) - __gen_ocl_barrier_global(); -} +void barrier(cl_mem_fence_flags flags); INLINE void mem_fence(cl_mem_fence_flags flags) { } -- 1.7.9.5 _______________________________________________ Beignet mailing list [email protected] http://lists.freedesktop.org/mailman/listinfo/beignet _______________________________________________ Beignet mailing list [email protected] http://lists.freedesktop.org/mailman/listinfo/beignet
