ABataev updated this revision to Diff 268592.
ABataev added a comment.

Rebase and fixes


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D81192/new/

https://reviews.llvm.org/D81192

Files:
  clang/lib/CodeGen/CGOpenMPRuntime.cpp
  clang/test/OpenMP/critical_codegen.cpp
  clang/test/OpenMP/openmp_win_codegen.cpp
  clang/test/OpenMP/simd_codegen.cpp
  llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h
  llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp

Index: llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
===================================================================
--- llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
+++ llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
@@ -814,13 +814,14 @@
   Instruction *ExitCall = Builder.CreateCall(ExitRTLFn, Args);
 
   return EmitOMPInlinedRegion(OMPD, EntryCall, ExitCall, BodyGenCB, FiniCB,
-                              /*Conditional*/ false, /*hasFinalize*/ true);
+                              /*Conditional*/ false, /*hasFinalize*/ true,
+                              /*WithMemBarrier=*/true);
 }
 
 OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::EmitOMPInlinedRegion(
     Directive OMPD, Instruction *EntryCall, Instruction *ExitCall,
     BodyGenCallbackTy BodyGenCB, FinalizeCallbackTy FiniCB, bool Conditional,
-    bool HasFinalize) {
+    bool HasFinalize, bool WithMemBarrier) {
 
   if (HasFinalize)
     FinalizationStack.push_back({FiniCB, OMPD, /*IsCancellable*/ false});
@@ -837,6 +838,8 @@
 
   Builder.SetInsertPoint(EntryBB->getTerminator());
   emitCommonDirectiveEntry(OMPD, EntryCall, ExitBB, Conditional);
+  if (WithMemBarrier)
+    Builder.CreateFence(AtomicOrdering::Acquire);
 
   // generate body
   BodyGenCB(/* AllocaIP */ InsertPointTy(),
@@ -861,7 +864,7 @@
     assert(FiniBB->getTerminator()->getNumSuccessors() == 1 &&
            FiniBB->getTerminator()->getSuccessor(0) == ExitBB &&
            "Unexpected control flow graph state!!");
-    emitCommonDirectiveExit(OMPD, FinIP, ExitCall, HasFinalize);
+    emitCommonDirectiveExit(OMPD, FinIP, ExitCall, HasFinalize, WithMemBarrier);
     assert(FiniBB->getUniquePredecessor()->getUniqueSuccessor() == FiniBB &&
            "Unexpected Control Flow State!");
     MergeBlockIntoPredecessor(FiniBB);
@@ -919,7 +922,7 @@
 
 OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::emitCommonDirectiveExit(
     omp::Directive OMPD, InsertPointTy FinIP, Instruction *ExitCall,
-    bool HasFinalize) {
+    bool HasFinalize, bool WithMemBarrier) {
 
   Builder.restoreIP(FinIP);
 
@@ -940,6 +943,8 @@
     Builder.SetInsertPoint(FiniBBTI);
   }
 
+  if (WithMemBarrier)
+    Builder.CreateFence(AtomicOrdering::Release);
   // place the Exitcall as last instruction before Finalization block terminator
   ExitCall->removeFromParent();
   Builder.Insert(ExitCall);
Index: llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h
===================================================================
--- llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h
+++ llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h
@@ -354,12 +354,15 @@
   /// \param HasFinalize indicate if the directive will require finalization
   ///         and has a finalization callback in the stack that
   ///        should be called.
+  /// \param WithMemBarrier true if the memory fence instructions are required
+  /// to prevent optimizations.
   ///
   /// \return The insertion position in exit block
   InsertPointTy emitCommonDirectiveExit(omp::Directive OMPD,
                                         InsertPointTy FinIP,
                                         Instruction *ExitCall,
-                                        bool HasFinalize = true);
+                                        bool HasFinalize = true,
+                                        bool WithMemBarrier = false);
 
   /// Common Interface to generate OMP inlined regions
   ///
@@ -374,6 +377,8 @@
   /// \param HasFinalize indicate if the directive will require finalization
   ///         and has a finalization callback in the stack that
   /// should        be called.
+  /// \param WithMemBarrier true if the memory fence instructions are required
+  /// to prevent optimizations.
   ///
   /// \return The insertion point after the region
 
@@ -381,7 +386,7 @@
   EmitOMPInlinedRegion(omp::Directive OMPD, Instruction *EntryCall,
                        Instruction *ExitCall, BodyGenCallbackTy BodyGenCB,
                        FinalizeCallbackTy FiniCB, bool Conditional = false,
-                       bool HasFinalize = true);
+                       bool HasFinalize = true, bool WithMemBarrier = false);
 
   /// Get the platform-specific name separator.
   /// \param Parts different parts of the final name that needs separation
Index: clang/test/OpenMP/simd_codegen.cpp
===================================================================
--- clang/test/OpenMP/simd_codegen.cpp
+++ clang/test/OpenMP/simd_codegen.cpp
@@ -243,6 +243,7 @@
 // CHECK-NEXT: store i32 [[CONV]], i32* [[A_PRIV]],{{.+}}!llvm.access.group
 // OMP50-NEXT: [[IV:%.+]] = load i64, i64* [[OMP_IV7]],{{.+}}!llvm.access.group
 // OMP50RT:    call void @__kmpc_critical(%struct.ident_t* {{.+}}, i32 [[GTID:%.+]], [8 x i32]* [[A_REGION:@.+]]),{{.+}}!llvm.access.group
+// OMP50RT-NEXT: fence acquire
 // OMP50-NEXT: [[LAST_IV_VAL:%.+]] = load i64, i64* [[LAST_IV]],{{.+}}!llvm.access.group
 // OMP50-NEXT: [[CMP:%.+]] = icmp sle i64 [[LAST_IV_VAL]], [[IV]]
 // OMP50-NEXT: br i1 [[CMP]], label %[[LP_THEN:.+]], label %[[LP_DONE:[^,]+]]
@@ -252,6 +253,7 @@
 // OMP50-NEXT: store i32 [[A_VAL]], i32* [[LAST_A]],{{.+}}!llvm.access.group
 // OMP50-NEXT: br label %[[LP_DONE]]
 // OMP50:      [[LP_DONE]]:
+// OMP50RT-NEXT: fence release
 // OMP50RT-NEXT: call void @__kmpc_end_critical(%struct.ident_t* {{.+}}, i32 [[GTID]], [8 x i32]* [[A_REGION]]),{{.+}}!llvm.access.group
     A += i;
 // CHECK: [[IV7_2:%.+]] = load i64, i64* [[OMP_IV7]]{{.*}}!llvm.access.group
Index: clang/test/OpenMP/openmp_win_codegen.cpp
===================================================================
--- clang/test/OpenMP/openmp_win_codegen.cpp
+++ clang/test/OpenMP/openmp_win_codegen.cpp
@@ -54,10 +54,12 @@
 // CHECK: [[CATCHSWITCH:%.+]] = catchswitch within none
 // CHECK: [[CATCHPAD:%.+]] = catchpad within [[CATCHSWITCH]]
 // CHECK: call void @__kmpc_critical(%struct.ident_t* {{.*}}@0, i32 [[GID:%.+]],
+// CHECK-NEXT: fence acquire
 // CHECK: invoke void @{{.+}}bar
 // CHECK: call void @__kmpc_end_critical(%struct.ident_t* {{.*}}@0, i32 [[GID]],
 // CHECK: catchret from [[CATCHPAD]] to
 // CHECK:      cleanuppad within [[CATCHPAD]] []
+// CHECK-NEXT: fence release
 // CHECK-NEXT: call void @__kmpc_end_critical(%struct.ident_t* {{.*}}@0, i32 [[GID]],
 // CHECK-NEXT: cleanupret from {{.*}} unwind label %[[CATCHTERM:[^ ]+]]
 // CHECK:      cleanuppad within none []
Index: clang/test/OpenMP/critical_codegen.cpp
===================================================================
--- clang/test/OpenMP/critical_codegen.cpp
+++ clang/test/OpenMP/critical_codegen.cpp
@@ -30,28 +30,35 @@
   // ALL:       [[A_ADDR:%.+]] = alloca i8
   char a;
 
-// ALL:       			[[GTID:%.+]] = call {{.*}}i32 @__kmpc_global_thread_num([[IDENT_T_TY]]* [[DEFAULT_LOC:@.+]])
-// ALL:       			call {{.*}}void @__kmpc_critical([[IDENT_T_TY]]* [[DEFAULT_LOC]], i32 [[GTID]], [8 x i32]* [[UNNAMED_LOCK]])
-// ALL-NEXT:  			store i8 2, i8* [[A_ADDR]]
-// ALL-NEXT:  			call {{.*}}void @__kmpc_end_critical([[IDENT_T_TY]]* [[DEFAULT_LOC]], i32 [[GTID]], [8 x i32]* [[UNNAMED_LOCK]])
+// ALL:       [[GTID:%.+]] = call {{.*}}i32 @__kmpc_global_thread_num([[IDENT_T_TY]]* [[DEFAULT_LOC:@.+]])
+// ALL:       call {{.*}}void @__kmpc_critical([[IDENT_T_TY]]* [[DEFAULT_LOC]], i32 [[GTID]], [8 x i32]* [[UNNAMED_LOCK]])
+// ALL-NEXT:  fence acquire
+// ALL-NEXT:  store i8 2, i8* [[A_ADDR]]
+// ALL-NEXT:  fence release
+// ALL-NEXT:  call {{.*}}void @__kmpc_end_critical([[IDENT_T_TY]]* [[DEFAULT_LOC]], i32 [[GTID]], [8 x i32]* [[UNNAMED_LOCK]])
 #pragma omp critical
   a = 2;
 // IRBUILDER:       [[GTID:%.+]] = call {{.*}}i32 @__kmpc_global_thread_num([[IDENT_T_TY]]* [[DEFAULT_LOC:@.+]])
-// ALL:       			call {{.*}}void @__kmpc_critical([[IDENT_T_TY]]* [[DEFAULT_LOC]], i32 [[GTID]], [8 x i32]* [[THE_NAME_LOCK]])
-// IRBUILDER-NEXT:	call {{.*}}void [[FOO]]()
-// NORMAL-NEXT:  		invoke {{.*}}void [[FOO]]()
-// ALL:      				call {{.*}}void @__kmpc_end_critical([[IDENT_T_TY]]* [[DEFAULT_LOC]], i32 [[GTID]], [8 x i32]* [[THE_NAME_LOCK]])
+// ALL:             call {{.*}}void @__kmpc_critical([[IDENT_T_TY]]* [[DEFAULT_LOC]], i32 [[GTID]], [8 x i32]* [[THE_NAME_LOCK]])
+// ALL-NEXT:        fence acquire
+// IRBUILDER-NEXT:  call {{.*}}void [[FOO]]()
+// NORMAL-NEXT:     invoke {{.*}}void [[FOO]]()
+// ALL:             fence release
+// ALL-NEXT:        call {{.*}}void @__kmpc_end_critical([[IDENT_T_TY]]* [[DEFAULT_LOC]], i32 [[GTID]], [8 x i32]* [[THE_NAME_LOCK]])
 #pragma omp critical(the_name)
   foo();
-// IRBUILDER:   		[[GTID:%.+]] = call {{.*}}i32 @__kmpc_global_thread_num([[IDENT_T_TY]]* [[DEFAULT_LOC:@.+]])
-// ALL: 	      		call {{.*}}void @__kmpc_critical_with_hint([[IDENT_T_TY]]* [[DEFAULT_LOC]], i32 [[GTID]], [8 x i32]* [[THE_NAME_LOCK1]], i{{64|32}} 23)
-// IRBUILDER-NEXT:	call {{.*}}void [[FOO]]()
-// NORMAL-NEXT:		  invoke {{.*}}void [[FOO]]()
-// ALL:		       		call {{.*}}void @__kmpc_end_critical([[IDENT_T_TY]]* [[DEFAULT_LOC]], i32 [[GTID]], [8 x i32]* [[THE_NAME_LOCK1]])
+// IRBUILDER:       [[GTID:%.+]] = call {{.*}}i32 @__kmpc_global_thread_num([[IDENT_T_TY]]* [[DEFAULT_LOC:@.+]])
+// ALL:             call {{.*}}void @__kmpc_critical_with_hint([[IDENT_T_TY]]* [[DEFAULT_LOC]], i32 [[GTID]], [8 x i32]* [[THE_NAME_LOCK1]], i{{64|32}} 23)
+// ALL-NEXT:        fence acquire
+// IRBUILDER-NEXT:  call {{.*}}void [[FOO]]()
+// NORMAL-NEXT:     invoke {{.*}}void [[FOO]]()
+// ALL:             fence release
+// ALL-NEXT:        call {{.*}}void @__kmpc_end_critical([[IDENT_T_TY]]* [[DEFAULT_LOC]], i32 [[GTID]], [8 x i32]* [[THE_NAME_LOCK1]])
 #pragma omp critical(the_name1) hint(23)
   foo();
-  // IRBUILDER:   		[[GTID:%.+]] = call {{.*}}i32 @__kmpc_global_thread_num([[IDENT_T_TY]]* [[DEFAULT_LOC:@.+]])
+  // IRBUILDER: [[GTID:%.+]] = call {{.*}}i32 @__kmpc_global_thread_num([[IDENT_T_TY]]* [[DEFAULT_LOC:@.+]])
   // ALL:       call {{.*}}void @__kmpc_critical([[IDENT_T_TY]]* [[DEFAULT_LOC]], i32 [[GTID]], [8 x i32]* [[THE_NAME_LOCK]])
+  // ALL-NEXT:  fence acquire
   // ALL:       br label
   // ALL-NOT:   call {{.*}}void @__kmpc_end_critical(
   // ALL:       br label
@@ -80,10 +87,12 @@
   // NORMAL: [[S_REF:%.+]] = load %struct.S*, %struct.S** [[S_ADDR]],
   // NORMAL: store %struct.S* [[S_REF]], %struct.S** [[S_ADDR:%.+]],
   // ALL: call void @__kmpc_critical(
+  // ALL: fence acquire
 #pragma omp critical
   // ALL: [[S_REF:%.+]] = load %struct.S*, %struct.S** [[S_ADDR]],
   // ALL: [[S_A_REF:%.+]] = getelementptr inbounds %struct.S, %struct.S* [[S_REF]], i32 0, i32 0
   ++s.a;
+  // ALL: fence release
   // ALL: call void @__kmpc_end_critical(
 }
 
@@ -94,15 +103,17 @@
 #pragma omp critical
   // TERM_DEBUG-NOT: __kmpc_global_thread_num
   // TERM_DEBUG:     call void @__kmpc_critical({{.+}}), !dbg [[DBG_LOC_START:![0-9]+]]
+  // TERM_DEBUG:     fence acquire
   // TERM_DEBUG:     invoke void {{.*}}foo{{.*}}()
   // TERM_DEBUG:     unwind label %[[TERM_LPAD:.+]],
   // TERM_DEBUG-NOT: __kmpc_global_thread_num
+  // TERM_DEBUG:     fence release
   // TERM_DEBUG:     call void @__kmpc_end_critical({{.+}}), !dbg [[DBG_LOC_END:![0-9]+]]
   // TERM_DEBUG:     [[TERM_LPAD]]
   // TERM_DEBUG:     call void @__clang_call_terminate
   // TERM_DEBUG:     unreachable
   foo();
 }
-// TERM_DEBUG-DAG: [[DBG_LOC_START]] = !DILocation(line: [[@LINE-12]],
+// TERM_DEBUG-DAG: [[DBG_LOC_START]] = !DILocation(line: [[@LINE-14]],
 // TERM_DEBUG-DAG: [[DBG_LOC_END]] = !DILocation(line: [[@LINE-3]],
 #endif
Index: clang/lib/CodeGen/CGOpenMPRuntime.cpp
===================================================================
--- clang/lib/CodeGen/CGOpenMPRuntime.cpp
+++ clang/lib/CodeGen/CGOpenMPRuntime.cpp
@@ -2234,15 +2234,18 @@
   llvm::FunctionCallee ExitCallee;
   ArrayRef<llvm::Value *> ExitArgs;
   bool Conditional;
+  bool WithMemBarrier = false;
   llvm::BasicBlock *ContBlock = nullptr;
 
 public:
   CommonActionTy(llvm::FunctionCallee EnterCallee,
                  ArrayRef<llvm::Value *> EnterArgs,
                  llvm::FunctionCallee ExitCallee,
-                 ArrayRef<llvm::Value *> ExitArgs, bool Conditional = false)
+                 ArrayRef<llvm::Value *> ExitArgs, bool Conditional = false,
+                 bool WithMemBarrier = false)
       : EnterCallee(EnterCallee), EnterArgs(EnterArgs), ExitCallee(ExitCallee),
-        ExitArgs(ExitArgs), Conditional(Conditional) {}
+        ExitArgs(ExitArgs), Conditional(Conditional),
+        WithMemBarrier(WithMemBarrier) {}
   void Enter(CodeGenFunction &CGF) override {
     llvm::Value *EnterRes = CGF.EmitRuntimeCall(EnterCallee, EnterArgs);
     if (Conditional) {
@@ -2253,6 +2256,8 @@
       CGF.Builder.CreateCondBr(CallBool, ThenBlock, ContBlock);
       CGF.EmitBlock(ThenBlock);
     }
+    if (WithMemBarrier)
+      CGF.Builder.CreateFence(llvm::AtomicOrdering::Acquire);
   }
   void Done(CodeGenFunction &CGF) {
     // Emit the rest of blocks/branches
@@ -2260,6 +2265,8 @@
     CGF.EmitBlock(ContBlock, true);
   }
   void Exit(CodeGenFunction &CGF) override {
+    if (WithMemBarrier)
+      CGF.Builder.CreateFence(llvm::AtomicOrdering::Release);
     CGF.EmitRuntimeCall(ExitCallee, ExitArgs);
   }
 };
@@ -2290,7 +2297,7 @@
       EnterArgs,
       llvm::OpenMPIRBuilder::getOrCreateRuntimeFunction(
           CGM.getModule(), OMPRTL___kmpc_end_critical),
-      Args);
+      Args, /*Conditional=*/false, /*WithMemBarrier=*/true);
   CriticalOpGen.setAction(Action);
   emitInlinedDirective(CGF, OMPD_critical, CriticalOpGen);
 }
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to