sfantao created this revision.
sfantao added reviewers: ABataev, hfinkel, rjmccall.
sfantao added a subscriber: cfe-commits.
This patch aims at fixing the regression reported in
https://llvm.org/bugs/show_bug.cgi?id=24120. The problem is exposed when TLS is
used to implement OpenMP `threadprivate` directive. This patch is not final,
but I'd like to gather some feedback.
The problem is described in this code snippet:
```
int a = 0;
#pragma omp threadprivate(a)
foo() {
a = 1;
#pragma omp parallel copyin(a)
{
// a lookup is made by each thread to obtain the value of `a` in the master
thread.
// in the TLS implementation this maps to taking the value of the global,
so each thread will get its own private copy ,and not the master's, hence the
error.
// will print 0 for all threads other than master
printf("%d\n",a);
}
}
```
This patch aims at fixing the problem by doing:
```
int a = 0;
#pragma omp threadprivate(a)
foo() {
a = 1;
// capture a reference to `a` here and pass it to the outlined function
#pragma omp parallel copyin(a)
{
// use the reference passed to the outlined function to load the master
value and copy it to the private copy of `a`, local to the thread.
// will print 1 for all threads
printf("%d\n",a);
}
}
```
Notwithstanding this fixes https://llvm.org/bugs/show_bug.cgi?id=24120, this
change seems profitable also if no TLS is used: only the master will do the
(expensive) lookup at the cost of passing an extra reference to the outlined
function, instead of having all the threads doing the exact same look-up.
This patch would fix the problem for the snippet above, but not if the use of
the privatized global is not closely nested in the parallel region that has the
`copyin` clause. So if we have:
```
int a = 0;
#pragma omp threadprivate(a)
foo() {
a = 1;
#pragma omp parallel copyin(a) // <-- Capture 'a' here
{
#pragma omp critical // <-- do NOT capture 'a' here
{
printf("%d\n",a);
}
}
}
```
We want to capture `a` only for the scope that is closely nested by the
`parallel` directive. For the other innermost scopes, using the global is just
fine, and we don't want to pass another reference to the outlined function for
nothing.
The natural place to handle the capture seems to be `IsOpenMPCapturedVar`, but
in my understanding (please correct me if I'm wrong) there is no logic to
selectively capture something at a given level without capturing it for all
the scopes until the use is reached.
Should this logic be added? Do you have other ideas on how to tackle this?
Let me know your thoughts.
Thanks!
Samuel
http://reviews.llvm.org/D11395
Files:
lib/CodeGen/CGStmtOpenMP.cpp
lib/Sema/SemaOpenMP.cpp
Index: lib/Sema/SemaOpenMP.cpp
===================================================================
--- lib/Sema/SemaOpenMP.cpp
+++ lib/Sema/SemaOpenMP.cpp
@@ -157,6 +157,10 @@
/// current region.
bool isLoopControlVariable(VarDecl *D);
+ /// \brief Check if the specified variable is a 'copyin' variable for current
+ /// region.
+ bool isCopyinVariable(VarDecl *D);
+
/// \brief Adds explicit data sharing attribute to the specified declaration.
void addDSA(VarDecl *D, DeclRefExpr *E, OpenMPClauseKind A);
@@ -412,6 +416,12 @@
return Stack.back().LCVSet.count(D) > 0;
}
+bool DSAStackTy::isCopyinVariable(VarDecl *D){
+ assert(Stack.size() > 1 && "Data-sharing attributes stack is empty");
+ D = D->getCanonicalDecl();
+ DSAInfo I = Stack.back().SharingMap.lookup(D);
+ return I.Attributes == OMPC_copyin;
+}
void DSAStackTy::addDSA(VarDecl *D, DeclRefExpr *E, OpenMPClauseKind A) {
D = D->getCanonicalDecl();
if (A == OMPC_threadprivate) {
@@ -653,6 +663,8 @@
assert(LangOpts.OpenMP && "OpenMP is not allowed");
VD = VD->getCanonicalDecl();
if (DSAStack->getCurrentDirective() != OMPD_unknown) {
+ if (DSAStack->isCopyinVariable(VD) && !DSAStack->isClauseParsingMode())
+ return true;
if (DSAStack->isLoopControlVariable(VD) ||
(VD->hasLocalStorage() &&
isParallelOrTaskRegion(DSAStack->getCurrentDirective())))
Index: lib/CodeGen/CGStmtOpenMP.cpp
===================================================================
--- lib/CodeGen/CGStmtOpenMP.cpp
+++ lib/CodeGen/CGStmtOpenMP.cpp
@@ -227,10 +227,15 @@
auto *VD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
QualType Type = VD->getType();
if (CopiedVars.insert(VD->getCanonicalDecl()).second) {
- // Get the address of the master variable.
- auto *MasterAddr = VD->isStaticLocal()
- ? CGM.getStaticLocalDeclAddress(VD)
- : CGM.GetAddrOfGlobal(VD);
+
+ // Get the address of the master variable. It is passed from the master
+ // as field in the captured declaration.
+ auto *FD = CapturedStmtInfo->lookup(VD);
+ assert( FD && "Copyin master value should be available in some
field!");
+ QualType TagType = getContext().getTagDeclType(FD->getParent());
+ LValue LV =
MakeNaturalAlignAddrLValue(CapturedStmtInfo->getContextValue(), TagType);
+ auto *MasterAddr = EmitLValueForField(LV, FD).getAddress();
+
// Get the address of the threadprivate variable.
auto *PrivateAddr = EmitLValue(*IRef).getAddress();
if (CopiedVars.size() == 1) {
Index: lib/Sema/SemaOpenMP.cpp
===================================================================
--- lib/Sema/SemaOpenMP.cpp
+++ lib/Sema/SemaOpenMP.cpp
@@ -157,6 +157,10 @@
/// current region.
bool isLoopControlVariable(VarDecl *D);
+ /// \brief Check if the specified variable is a 'copyin' variable for current
+ /// region.
+ bool isCopyinVariable(VarDecl *D);
+
/// \brief Adds explicit data sharing attribute to the specified declaration.
void addDSA(VarDecl *D, DeclRefExpr *E, OpenMPClauseKind A);
@@ -412,6 +416,12 @@
return Stack.back().LCVSet.count(D) > 0;
}
+bool DSAStackTy::isCopyinVariable(VarDecl *D){
+ assert(Stack.size() > 1 && "Data-sharing attributes stack is empty");
+ D = D->getCanonicalDecl();
+ DSAInfo I = Stack.back().SharingMap.lookup(D);
+ return I.Attributes == OMPC_copyin;
+}
void DSAStackTy::addDSA(VarDecl *D, DeclRefExpr *E, OpenMPClauseKind A) {
D = D->getCanonicalDecl();
if (A == OMPC_threadprivate) {
@@ -653,6 +663,8 @@
assert(LangOpts.OpenMP && "OpenMP is not allowed");
VD = VD->getCanonicalDecl();
if (DSAStack->getCurrentDirective() != OMPD_unknown) {
+ if (DSAStack->isCopyinVariable(VD) && !DSAStack->isClauseParsingMode())
+ return true;
if (DSAStack->isLoopControlVariable(VD) ||
(VD->hasLocalStorage() &&
isParallelOrTaskRegion(DSAStack->getCurrentDirective())))
Index: lib/CodeGen/CGStmtOpenMP.cpp
===================================================================
--- lib/CodeGen/CGStmtOpenMP.cpp
+++ lib/CodeGen/CGStmtOpenMP.cpp
@@ -227,10 +227,15 @@
auto *VD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
QualType Type = VD->getType();
if (CopiedVars.insert(VD->getCanonicalDecl()).second) {
- // Get the address of the master variable.
- auto *MasterAddr = VD->isStaticLocal()
- ? CGM.getStaticLocalDeclAddress(VD)
- : CGM.GetAddrOfGlobal(VD);
+
+ // Get the address of the master variable. It is passed from the master
+ // as field in the captured declaration.
+ auto *FD = CapturedStmtInfo->lookup(VD);
+ assert( FD && "Copyin master value should be available in some field!");
+ QualType TagType = getContext().getTagDeclType(FD->getParent());
+ LValue LV = MakeNaturalAlignAddrLValue(CapturedStmtInfo->getContextValue(), TagType);
+ auto *MasterAddr = EmitLValueForField(LV, FD).getAddress();
+
// Get the address of the threadprivate variable.
auto *PrivateAddr = EmitLValue(*IRef).getAddress();
if (CopiedVars.size() == 1) {
_______________________________________________
cfe-commits mailing list
[email protected]
http://lists.cs.uiuc.edu/mailman/listinfo/cfe-commits