One comment.

-----Original Message-----
From: [email protected] 
[mailto:[email protected]] On Behalf Of Zhigang Gong
Sent: Friday, December 13, 2013 2:47 PM
To: [email protected]
Cc: Gong, Zhigang
Subject: [Beignet] [PATCH] GBE: fix clang's "incorrect" optimization for 
barrier call.

Clang may duplicate one barrier call to multiple branches which breaks opencl's 
spec and may cause gpu hang. To fix this issue, we have to implement the 
barrier in a llvm module file and specify the function attribute to 
noduplicate, and we have to link this pre-compiled module before we compile the 
user kernel, so we set it the pcm lib file to the LinkBitCodeFile field of the 
clang instance.

Signed-off-by: Zhigang Gong <[email protected]>
---
 backend/src/CMakeLists.txt      |   29 ++++++++++++++++++++++++++++-
 backend/src/GBEConfig.h.in      |    1 +
 backend/src/backend/program.cpp |   17 +++++++++++++++++
 backend/src/ocl_barrier.ll      |   39 +++++++++++++++++++++++++++++++++++++++
 backend/src/ocl_stdlib.tmpl.h   |    9 +--------
 5 files changed, 86 insertions(+), 9 deletions(-)  create mode 100644 
backend/src/ocl_barrier.ll

diff --git a/backend/src/CMakeLists.txt b/backend/src/CMakeLists.txt index 
36bf688..fa69321 100644
--- a/backend/src/CMakeLists.txt
+++ b/backend/src/CMakeLists.txt
@@ -59,6 +59,26 @@ add_custom_command(
 add_custom_target(pch_object
                   DEPENDS ${pch_object})
 
+macro(ll_add_library ll_lib ll_sources)
+  foreach (ll ${${ll_sources}})
+  add_custom_command(
+       OUTPUT  ${ll}.bc
+       COMMAND rm -f ${ll}.bc
+       COMMAND llvm-as -o ${ll}.bc ${GBE_SOURCE_DIR}/src/${ll}
+       DEPENDS ${ll}
+       )
+  set (ll_objects ${ll_objects} ${ll}.bc)
+  endforeach (ll ${ll_sources})
+  add_custom_command(
+       OUTPUT ${ll_lib}
+       COMMAND llvm-link -o ${ll_lib} ${ll_objects}
+       DEPENDS ${ll_objects}
+       )
+  add_custom_target(${ll_lib}
+                    DEPENDS ${CMAKE_CURRENT_BINARY_DIR}/${ll_lib})
+  message(${ll_lib}, ${ll_objects})
+endmacro(ll_add_library)
+
 if (GBE_USE_BLOB)
   set (GBE_SRC
        blob.cpp
@@ -144,7 +164,12 @@ link_directories (${LLVM_LIBRARY_DIRS})
 include_directories(${LLVM_INCLUDE_DIRS})
 add_library (gbe SHARED ${GBE_SRC})
 
-ADD_DEPENDENCIES (gbe pch_object)
+# for pre compiled module library.
+set (pcm_lib "beignet.bc")
+set (pcm_sources ocl_barrier.ll)
+ll_add_library (${pcm_lib} pcm_sources)
+
+ADD_DEPENDENCIES (gbe pch_object ${pcm_lib})
 target_link_libraries(
                       gbe
                       ${DRM_INTEL_LIBRARY} @@ -161,9 +186,11 @@ 
TARGET_LINK_LIBRARIES(gbe_bin_generater gbe)
 
 install (TARGETS gbe LIBRARY DESTINATION lib)  install (FILES ${pch_object} 
DESTINATION lib)
+install (FILES ${CMAKE_CURRENT_BINARY_DIR}/${pcm_lib} DESTINATION lib)
 install (FILES backend/program.h DESTINATION include/gen)
 
 set (PCH_OBJECT_DIR 
"${pch_object};${CMAKE_INSTALL_PREFIX}/lib/ocl_stdlib.h.pch")
+set (PCM_LIB_DIR 
+"${CMAKE_CURRENT_BINARY_DIR}/${pcm_lib};${CMAKE_INSTALL_PREFIX}/lib/${p
+cm_lib}")
 configure_file (
   "GBEConfig.h.in"
   "GBEConfig.h"
diff --git a/backend/src/GBEConfig.h.in b/backend/src/GBEConfig.h.in index 
74bef3f..9920d25 100644
--- a/backend/src/GBEConfig.h.in
+++ b/backend/src/GBEConfig.h.in
@@ -2,3 +2,4 @@
 #define LIBGBE_VERSION_MAJOR @LIBGBE_VERSION_MAJOR@  #define 
LIBGBE_VERSION_MINOR @LIBGBE_VERSION_MINOR@  #define PCH_OBJECT_DIR 
"@PCH_OBJECT_DIR@"
+#define PCM_LIB_DIR "@PCM_LIB_DIR@"
diff --git a/backend/src/backend/program.cpp b/backend/src/backend/program.cpp 
index fc9b03c..46ec04f 100644
--- a/backend/src/backend/program.cpp
+++ b/backend/src/backend/program.cpp
@@ -587,6 +587,21 @@ namespace gbe {
 
     // Create an action and make the compiler instance carry it out
     llvm::OwningPtr<clang::CodeGenAction> Act(new clang::EmitLLVMOnlyAction());
+
+    std::string dirs = PCM_LIB_DIR, pcmLib;
+    std::istringstream idirs(dirs);
+    bool findPcm = false;
+
+    while (getline(idirs, pcmLib, ';')) {
+      if(access(pcmLib.c_str(), R_OK) == 0) {
+        findPcm = true;
+        break;
+      }
+    }
+
+    GBE_ASSERT(findPcm && "Could not find pre compiled module 
+ library.\n");
+
+    Clang.getCodeGenOpts().LinkBitcodeFile = pcmLib;
     auto retVal = Clang.ExecuteAction(*Act);
 
     if (err != NULL) {
@@ -755,6 +770,8 @@ namespace gbe {
       if (err != NULL)
         *errSize += clangErrSize;
       gbe_mutex.unlock();
+      if (OCL_OUTPUT_BUILD_LOG && options)
+        llvm::errs() << options;
       remove(llName.c_str());
     } else
       p = NULL;
diff --git a/backend/src/ocl_barrier.ll b/backend/src/ocl_barrier.ll new file 
mode 100644 index 0000000..0f5f104
--- /dev/null
+++ b/backend/src/ocl_barrier.ll
@@ -0,0 +1,39 @@
+;XXX FIXME as llvm can't use macros, we hardcoded 3, 1, 2 ;here, we may 
+need to use a more grace way to handle this type ;of values latter.
+;#define CLK_LOCAL_MEM_FENCE  (1 << 0)
+;#define CLK_GLOBAL_MEM_FENCE (1 << 1)
+
+declare i32 @_get_local_mem_fence() nounwind alwaysinline declare i32 
+@_get_global_mem_fence() nounwind alwaysinline declare void 
+@__gen_ocl_barrier_local() nounwind noduplicate alwaysinline declare 
+void @__gen_ocl_barrier_global() nounwind noduplicate alwaysinline 
+declare void @__gen_ocl_barrier_local_and_global() nounwind noduplicate 
+alwaysinline
+
+define void @barrier(i32 %flags) nounwind noduplicate alwaysinline {
+  %1 = icmp eq i32 %flags, 3
+  br i1 %1, label %barrier_local_global, label %barrier_local_check
+
+barrier_local_global:
+  call void @__gen_ocl_barrier_local_and_global() noduplicate
+  br label %done
+
+barrier_local_check:
+  %2 = icmp eq i32 %flags, 1
+  br i1 %2, label %barrier_local, label %barrier_global_check
+
+barrier_local:
+  call void @__gen_ocl_barrier_local() noduplicate
+  br label %done
+
+barrier_global_check:
+  %3 = icmp eq i32 %flags, 2
+  br i1 %3, label %barrier_global, label %done
+
+barrier_global:
+  call void @__gen_ocl_barrier_local_and_global() noduplicate

>>>> Is it @__gen_ocl_barrier_global()?

+  br label %done
+
+done:
+  ret void
+}
diff --git a/backend/src/ocl_stdlib.tmpl.h b/backend/src/ocl_stdlib.tmpl.h 
index e5f356e..a4989ed 100644
--- a/backend/src/ocl_stdlib.tmpl.h
+++ b/backend/src/ocl_stdlib.tmpl.h
@@ -2685,14 +2685,7 @@ void __gen_ocl_barrier_global(void);  void 
__gen_ocl_barrier_local_and_global(void);
 
 typedef uint cl_mem_fence_flags;
-INLINE void barrier(cl_mem_fence_flags flags) {
-  if (flags == (CLK_LOCAL_MEM_FENCE | CLK_GLOBAL_MEM_FENCE))
-    __gen_ocl_barrier_local_and_global();
-  else if (flags == CLK_LOCAL_MEM_FENCE)
-    __gen_ocl_barrier_local();
-  else if (flags == CLK_GLOBAL_MEM_FENCE)
-    __gen_ocl_barrier_global();
-}
+void barrier(cl_mem_fence_flags flags);
 
 INLINE void mem_fence(cl_mem_fence_flags flags) {  }
--
1.7.9.5

_______________________________________________
Beignet mailing list
[email protected]
http://lists.freedesktop.org/mailman/listinfo/beignet
_______________________________________________
Beignet mailing list
[email protected]
http://lists.freedesktop.org/mailman/listinfo/beignet

Reply via email to