Author: Shilei Tian Date: 2021-07-26T11:00:44-04:00 New Revision: 3274cdc83ecdf2af569ad4f564d55d0e43b1072e
URL: https://github.com/llvm/llvm-project/commit/3274cdc83ecdf2af569ad4f564d55d0e43b1072e DIFF: https://github.com/llvm/llvm-project/commit/3274cdc83ecdf2af569ad4f564d55d0e43b1072e.diff LOG: [Clang][OpenMP] Remove the mandatory flush for capture for OpenMP 5.1 In OpenMP 5.1: > If the `write` or `update` clause is specifieded, the atomic operation is not > an atomic conditional update for which the comparison fails, and the > effective memory ordering is `release`, `acq_rel`, or `seq_cst`, the strong > flush on entry to the atomic operation is also a release flush. If the `read` > or `update` clause is specified and the effective memory ordering is > `acquire`, `acq_rel`, or `seq_cst` then the strong flush on exit from the > atomic operation is also an acquire flush. In OpenMP 5.0: > If the `write`, `update`, or **`capture`** clause is specified and the > `release`, `acq_rel`, or `seq_cst` clause is specified then the strong flush > on entry to the atomic operation is also a release flush. If the `read` or > `capture` clause is specified and the `acquire`, `acq_rel`, or `seq_cst` > clause is specified then the strong flush on exit from the atomic operation > is also an acquire flush. >From my understanding, in OpenMP 5.1, `capture` is removed from the >requirement for flush, therefore we don't have to enforce it. Reviewed By: ABataev Differential Revision: https://reviews.llvm.org/D100768 Added: Modified: clang/lib/CodeGen/CGStmtOpenMP.cpp clang/test/OpenMP/atomic_capture_codegen.cpp Removed: ################################################################################ diff --git a/clang/lib/CodeGen/CGStmtOpenMP.cpp b/clang/lib/CodeGen/CGStmtOpenMP.cpp index 486b48bca0a62..1f913590339f8 100644 --- a/clang/lib/CodeGen/CGStmtOpenMP.cpp +++ b/clang/lib/CodeGen/CGStmtOpenMP.cpp @@ -5725,32 +5725,35 @@ static void emitOMPAtomicCaptureExpr(CodeGenFunction &CGF, // Emit post-update store to 'v' of old/new 'x' value. CGF.emitOMPSimpleStore(VLValue, NewVVal, NewVValType, Loc); CGF.CGM.getOpenMPRuntime().checkAndEmitLastprivateConditional(CGF, V); - // OpenMP, 2.17.7, atomic Construct - // If the write, update, or capture clause is specified and the release, - // acq_rel, or seq_cst clause is specified then the strong flush on entry to - // the atomic operation is also a release flush. - // If the read or capture clause is specified and the acquire, acq_rel, or - // seq_cst clause is specified then the strong flush on exit from the atomic - // operation is also an acquire flush. - switch (AO) { - case llvm::AtomicOrdering::Release: - CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc, - llvm::AtomicOrdering::Release); - break; - case llvm::AtomicOrdering::Acquire: - CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc, - llvm::AtomicOrdering::Acquire); - break; - case llvm::AtomicOrdering::AcquireRelease: - case llvm::AtomicOrdering::SequentiallyConsistent: - CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc, - llvm::AtomicOrdering::AcquireRelease); - break; - case llvm::AtomicOrdering::Monotonic: - break; - case llvm::AtomicOrdering::NotAtomic: - case llvm::AtomicOrdering::Unordered: - llvm_unreachable("Unexpected ordering."); + // OpenMP 5.1 removes the required flush for capture clause. + if (CGF.CGM.getLangOpts().OpenMP < 51) { + // OpenMP, 2.17.7, atomic Construct + // If the write, update, or capture clause is specified and the release, + // acq_rel, or seq_cst clause is specified then the strong flush on entry to + // the atomic operation is also a release flush. + // If the read or capture clause is specified and the acquire, acq_rel, or + // seq_cst clause is specified then the strong flush on exit from the atomic + // operation is also an acquire flush. + switch (AO) { + case llvm::AtomicOrdering::Release: + CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc, + llvm::AtomicOrdering::Release); + break; + case llvm::AtomicOrdering::Acquire: + CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc, + llvm::AtomicOrdering::Acquire); + break; + case llvm::AtomicOrdering::AcquireRelease: + case llvm::AtomicOrdering::SequentiallyConsistent: + CGF.CGM.getOpenMPRuntime().emitFlush( + CGF, llvm::None, Loc, llvm::AtomicOrdering::AcquireRelease); + break; + case llvm::AtomicOrdering::Monotonic: + break; + case llvm::AtomicOrdering::NotAtomic: + case llvm::AtomicOrdering::Unordered: + llvm_unreachable("Unexpected ordering."); + } } } diff --git a/clang/test/OpenMP/atomic_capture_codegen.cpp b/clang/test/OpenMP/atomic_capture_codegen.cpp index 62e41c8f26934..1cdd5056397b6 100644 --- a/clang/test/OpenMP/atomic_capture_codegen.cpp +++ b/clang/test/OpenMP/atomic_capture_codegen.cpp @@ -1,11 +1,19 @@ // RUN: %clang_cc1 -verify -triple x86_64-apple-darwin10 -target-cpu core2 -fopenmp -x c -emit-llvm %s -o - | FileCheck %s // RUN: %clang_cc1 -fopenmp -x c -triple x86_64-apple-darwin10 -target-cpu core2 -emit-pch -o %t %s -// RUN: %clang_cc1 -fopenmp -x c -triple x86_64-apple-darwin10 -target-cpu core2 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s +// RUN: %clang_cc1 -fopenmp -x c -triple x86_64-apple-darwin10 -target-cpu core2 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix=CHECK --check-prefix=CHECK-50 %s // RUN: %clang_cc1 -verify -triple x86_64-apple-darwin10 -target-cpu core2 -fopenmp-simd -x c -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s // RUN: %clang_cc1 -fopenmp-simd -x c -triple x86_64-apple-darwin10 -target-cpu core2 -emit-pch -o %t %s // RUN: %clang_cc1 -fopenmp-simd -x c -triple x86_64-apple-darwin10 -target-cpu core2 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s + +// RUN: %clang_cc1 -verify -triple x86_64-apple-darwin10 -target-cpu core2 -fopenmp -fopenmp-version=51 -x c -emit-llvm %s -o - | FileCheck %s +// RUN: %clang_cc1 -fopenmp -fopenmp-version=51 -x c -triple x86_64-apple-darwin10 -target-cpu core2 -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp -fopenmp-version=51 -x c -triple x86_64-apple-darwin10 -target-cpu core2 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s + +// RUN: %clang_cc1 -verify -triple x86_64-apple-darwin10 -target-cpu core2 -fopenmp-simd -fopenmp-version=51 -x c -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s +// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=51 -x c -triple x86_64-apple-darwin10 -target-cpu core2 -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=51 -x c -triple x86_64-apple-darwin10 -target-cpu core2 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s // SIMD-ONLY0-NOT: {{__kmpc|__tgt}} // expected-no-diagnostics #ifndef HEADER @@ -343,7 +351,7 @@ int main() { // CHECK: [[IM_CAST:%.+]] = fptrunc double [[NEW_IM]] to float // CHECK: store float [[RE_CAST]], float* getelementptr inbounds ({ float, float }, { float, float }* @{{.+}}, i32 0, i32 0), // CHECK: store float [[IM_CAST]], float* getelementptr inbounds ({ float, float }, { float, float }* @{{.+}}, i32 0, i32 1), -// CHECK: call{{.*}} @__kmpc_flush( +// CHECK-50: call{{.*}} @__kmpc_flush( #pragma omp atomic capture seq_cst {cdx = cdx - cdv; cfv = cdx;} // CHECK: [[BV:%.+]] = load i8, i8* @{{.+}} @@ -393,7 +401,7 @@ int main() { // CHECK: br i1 [[SUCCESS_FAIL]], label %[[EXIT:.+]], label %[[CONT]] // CHECK: [[EXIT]] // CHECK: store i8 [[NEW]], i8* @{{.+}}, -// CHECK: call{{.*}} @__kmpc_flush( +// CHECK-50: call{{.*}} @__kmpc_flush( #pragma omp atomic capture, seq_cst {cx = cx >> ucv; cv = cx;} // CHECK: [[SV:%.+]] = load i16, i16* @{{.+}}, @@ -436,7 +444,7 @@ int main() { // CHECK: [[OLD:%.+]] = atomicrmw or i32* @{{.+}}, i32 [[EXPR]] seq_cst, align 4 // CHECK: [[DESIRED:%.+]] = or i32 [[EXPR]], [[OLD]] // CHECK: store i32 [[DESIRED]], i32* @{{.+}}, -// CHECK: call{{.*}} @__kmpc_flush( +// CHECK-50: call{{.*}} @__kmpc_flush( #pragma omp atomic seq_cst, capture {uix = iv | uix; uiv = uix;} // CHECK: [[EXPR:%.+]] = load i32, i32* @{{.+}} @@ -904,7 +912,7 @@ int main() { // CHECK: [[EXIT]] // CHECK: [[NEW_VAL:%.+]] = trunc i64 [[CONV]] to i32 // CHECK: store i32 [[NEW_VAL]], i32* @{{.+}}, -// CHECK: call{{.*}} @__kmpc_flush( +// CHECK-50: call{{.*}} @__kmpc_flush( #pragma omp atomic capture release {bfx4.b /= ldv; iv = bfx4.b;} // CHECK: [[EXPR:%.+]] = load x86_fp80, x86_fp80* @{{.+}} @@ -937,7 +945,7 @@ int main() { // CHECK: [[EXIT]] // CHECK: [[NEW_VAL_I32:%.+]] = trunc i64 [[NEW_VAL]] to i32 // CHECK: store i32 [[NEW_VAL_I32]], i32* @{{.+}}, -// CHECK: call{{.*}} @__kmpc_flush( +// CHECK-50: call{{.*}} @__kmpc_flush( #pragma omp atomic capture acquire iv = bfx4_packed.b += ldv; // CHECK: load i64, i64* @@ -963,7 +971,7 @@ int main() { // CHECK: br i1 [[FAIL_SUCCESS]], label %[[EXIT:.+]], label %[[CONT]] // CHECK: [[EXIT]] // CHECK: store float [[X]], float* @{{.+}}, -// CHECK: call{{.*}} @__kmpc_flush( +// CHECK-50: call{{.*}} @__kmpc_flush( #pragma omp atomic capture acq_rel {fv = float2x.x; float2x.x = ulv - float2x.x;} // CHECK: [[EXPR:%.+]] = load double, double* @{{.+}}, @@ -973,7 +981,7 @@ int main() { // CHECK: [[NEW_VAL:%.+]] = fptosi double [[DIV]] to i32 // CHECK: call void @llvm.write_register.i32([[REG]], i32 [[NEW_VAL]]) // CHECK: store i32 [[NEW_VAL]], i32* @{{.+}}, -// CHECK: call{{.*}} @__kmpc_flush( +// CHECK-50: call{{.*}} @__kmpc_flush( #pragma omp atomic capture seq_cst {rix = dv / rix; iv = rix;} // CHECK: [[OLD_VAL:%.+]] = atomicrmw xchg i32* @{{.+}}, i32 5 monotonic, align 4 _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits