================
@@ -5674,6 +5674,130 @@ static void handleLaunchBoundsAttr(Sema &S, Decl *D,
const ParsedAttr &AL) {
AL.getNumArgs() > 2 ? AL.getArgAsExpr(2) : nullptr);
}
+static std::pair<Expr *, int>
+makeClusterDimsArgExpr(Sema &S, Expr *E, const CUDAClusterDimsAttr &AL,
+ const unsigned Idx) {
+ if (S.DiagnoseUnexpandedParameterPack(E))
+ return {};
+
+ // Accept template arguments for now as they depend on something else.
+ // We'll get to check them when they eventually get instantiated.
+ if (E->isInstantiationDependent())
+ return {E, 1};
+
+ std::optional<llvm::APSInt> I = E->getIntegerConstantExpr(S.Context);
+ if (!I) {
+ S.Diag(E->getExprLoc(), diag::err_attribute_argument_n_type)
+ << &AL << Idx << AANT_ArgumentIntegerConstant << E->getSourceRange();
+ return {};
+ }
+ // Make sure we can fit it in 4 bits.
+ if (!I->isIntN(4)) {
+ S.Diag(E->getExprLoc(), diag::err_ice_too_large)
+ << toString(*I, 10, false) << 4 << /*Unsigned=*/1;
+ return {};
+ }
+ if (*I < 0) {
+ S.Diag(E->getExprLoc(), diag::warn_attribute_argument_n_negative)
+ << &AL << Idx << E->getSourceRange();
+ }
+
+ return {ConstantExpr::Create(S.getASTContext(), E, APValue(*I)),
+ I->getZExtValue()};
+}
+
+CUDAClusterDimsAttr *Sema::createClusterDimsAttr(const AttributeCommonInfo &CI,
+ Expr *X, Expr *Y, Expr *Z) {
+ CUDAClusterDimsAttr TmpAttr(Context, CI, X, Y, Z);
+
+ int ValX = 1;
+ int ValY = 1;
+ int ValZ = 1;
+
+ std::tie(X, ValX) = makeClusterDimsArgExpr(*this, X, TmpAttr, /*Idx=*/0);
+ if (!X)
+ return nullptr;
+
+ if (Y) {
+ std::tie(Y, ValY) = makeClusterDimsArgExpr(*this, Y, TmpAttr, /*Idx=*/1);
+ if (!Y)
+ return nullptr;
+ }
+
+ if (Z) {
+ std::tie(Z, ValZ) = makeClusterDimsArgExpr(*this, Z, TmpAttr, /*Idx=*/2);
+ if (!Z)
+ return nullptr;
+ }
+
+ int FlatDim = ValX * ValY * ValZ;
+ const llvm::Triple TT =
+ (!Context.getLangOpts().CUDAIsDevice && Context.getAuxTargetInfo())
+ ? Context.getAuxTargetInfo()->getTriple()
+ : Context.getTargetInfo().getTriple();
+ int MaxDim = 1;
+ if (TT.isNVPTX())
+ MaxDim = 8;
+ else if (TT.isAMDGPU())
+ MaxDim = 16;
+ else
+ return nullptr;
+
+ // A maximum of 8 thread blocks in a cluster is supported as a portable
+ // cluster size in CUDA. The number is 16 for AMDGPU.
+ if (FlatDim > MaxDim) {
+ Diag(CI.getLoc(), diag::err_cuda_cluster_dims_too_large)
+ << MaxDim << FlatDim;
+ return nullptr;
+ }
+
+ return CUDAClusterDimsAttr::Create(Context, X, Y, Z, CI);
+}
+
+void Sema::addClusterDimsAttr(Decl *D, const AttributeCommonInfo &CI, Expr *X,
+ Expr *Y, Expr *Z) {
+ if (auto *Attr = createClusterDimsAttr(CI, X, Y, Z))
+ D->addAttr(Attr);
+}
+
+void Sema::addNoClusterAttr(Decl *D, const AttributeCommonInfo &CI) {
+ D->addAttr(CUDANoClusterAttr::Create(Context, CI));
+}
+
+static void handleClusterDimsAttr(Sema &S, Decl *D, const ParsedAttr &AL) {
+ const TargetInfo &TTI = S.Context.getTargetInfo();
+ OffloadArch Arch = StringToOffloadArch(TTI.getTargetOpts().CPU);
+ if ((TTI.getTriple().isNVPTX() && Arch < clang::OffloadArch::SM_90) ||
+ (TTI.getTriple().isAMDGPU() &&
+ !TTI.hasFeatureEnabled(TTI.getTargetOpts().FeatureMap, "clusters"))) {
+ S.Diag(AL.getLoc(), diag::err_cuda_cluster_attr_not_supported)
+ << "__cluster_dims__";
----------------
erichkeane wrote:
No, you've said a number of times that the underscore spelling is that of the
macro in the header. As that is the case, then however the ATTRIBUTE
diagnostic is, will just be surrounded with a note that gives the identify of
the macro. So there is no need to try to match them.
https://github.com/llvm/llvm-project/pull/156686
_______________________________________________
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits