david-arm created this revision. david-arm added reviewers: sdesmalen, ctetreau, fhahn, c-rhodes. Herald added subscribers: cfe-commits, psnobl, tschuett. Herald added a reviewer: efriedma. Herald added a reviewer: aaron.ballman. Herald added a project: clang. david-arm requested review of this revision.
This patch adds support for an optional second parameter passed to the vectorize_width pragma, which indicates if the user wishes to use fixed width or scalable vectorization. For example the user can now write something like: #pragma clang loop vectorize_width(4, fixed) or #pragma clang loop vectorize_width(4, scalable) I have added a new 'scalable_numeric' state to the LoopHintAttr class to indicate whether the numeric vectorization width is scalable or not. When generating IR we make use of the new format for the llvm.loop.vectorize.width attribute that allows us to effectively pass an ElementCount that contains the vectorization factor and a scalable flag. Tests were added to clang/test/CodeGenCXX/pragma-loop.cpp for both the 'fixed' and 'scalable' optional parameter. Repository: rG LLVM Github Monorepo https://reviews.llvm.org/D89031 Files: clang/docs/LanguageExtensions.rst clang/include/clang/Basic/Attr.td clang/include/clang/Basic/DiagnosticParseKinds.td clang/lib/CodeGen/CGLoopInfo.cpp clang/lib/CodeGen/CGLoopInfo.h clang/lib/Parse/ParsePragma.cpp clang/lib/Sema/SemaStmtAttr.cpp clang/test/CodeGenCXX/pragma-loop.cpp
Index: clang/test/CodeGenCXX/pragma-loop.cpp =================================================================== --- clang/test/CodeGenCXX/pragma-loop.cpp +++ clang/test/CodeGenCXX/pragma-loop.cpp @@ -158,6 +158,30 @@ for_template_constant_expression_test<double, 2, 4, 8>(List, Length); } +// Verify do loop is performing fixed width vectorization +void do_test_fixed(int *List, int Length) { + int i = 0; + +#pragma clang loop vectorize_width(16, fixed) interleave_count(4) unroll(disable) distribute(disable) + do { + // CHECK: br i1 {{.*}}, label {{.*}}, label {{.*}}, !llvm.loop ![[LOOP_15:.*]] + List[i] = i * 2; + i++; + } while (i < Length); +} + +// Verify do loop is performing scalable vectorization +void do_test_scalable(int *List, int Length) { + int i = 0; + +#pragma clang loop vectorize_width(16, scalable) interleave_count(4) unroll(disable) distribute(disable) + do { + // CHECK: br i1 {{.*}}, label {{.*}}, label {{.*}}, !llvm.loop ![[LOOP_16:.*]] + List[i] = i * 2; + i++; + } while (i < Length); +} + // CHECK: ![[LOOP_1]] = distinct !{![[LOOP_1]], ![[UNROLL_FULL:.*]]} // CHECK: ![[UNROLL_FULL]] = !{!"llvm.loop.unroll.full"} @@ -215,3 +239,10 @@ // CHECK: ![[LOOP_14]] = distinct !{![[LOOP_14]], ![[WIDTH_10:.*]], ![[VECTORIZE_ENABLE]]} // CHECK: ![[WIDTH_10]] = !{!"llvm.loop.vectorize.width", i32 10} + +// CHECK: ![[LOOP_15]] = distinct !{![[LOOP_15]], ![[UNROLL_DISABLE:.*]], ![[DISTRIBUTE_DISABLE:.*]], ![[WIDTH_16_FIXED:.*]], ![[INTERLEAVE_4:.*]], ![[VECTORIZE_ENABLE:.*]]} +// CHECK: ![[WIDTH_16_FIXED]] = !{!"llvm.loop.vectorize.width", i32 16} + +// CHECK: ![[LOOP_16]] = distinct !{![[LOOP_16]], ![[UNROLL_DISABLE:.*]], ![[DISTRIBUTE_DISABLE:.*]], ![[WIDTH_16_SCALABLE:.*]], ![[INTERLEAVE_4:.*]], ![[VECTORIZE_ENABLE:.*]]} +// CHECK: ![[WIDTH_16_SCALABLE]] = !{!"llvm.loop.vectorize.width", ![[ELEMENT_COUNT_16_SCALABLE:.*]]} +// CHECK: ![[ELEMENT_COUNT_16_SCALABLE]] = !{i32 16, i32 1} Index: clang/lib/Sema/SemaStmtAttr.cpp =================================================================== --- clang/lib/Sema/SemaStmtAttr.cpp +++ clang/lib/Sema/SemaStmtAttr.cpp @@ -139,10 +139,17 @@ LoopHintAttr::PipelineInitiationInterval) .Case("distribute", LoopHintAttr::Distribute) .Default(LoopHintAttr::Vectorize); - if (Option == LoopHintAttr::VectorizeWidth || - Option == LoopHintAttr::InterleaveCount || - Option == LoopHintAttr::UnrollCount || - Option == LoopHintAttr::PipelineInitiationInterval) { + if (Option == LoopHintAttr::VectorizeWidth) { + assert(ValueExpr && "Attribute must have a valid value expression."); + if (S.CheckLoopHintExpr(ValueExpr, St->getBeginLoc())) + return nullptr; + if (StateLoc && StateLoc->Ident && StateLoc->Ident->isStr("scalable")) + State = LoopHintAttr::ScalableNumeric; + else + State = LoopHintAttr::Numeric; + } else if (Option == LoopHintAttr::InterleaveCount || + Option == LoopHintAttr::UnrollCount || + Option == LoopHintAttr::PipelineInitiationInterval) { assert(ValueExpr && "Attribute must have a valid value expression."); if (S.CheckLoopHintExpr(ValueExpr, St->getBeginLoc())) return nullptr; Index: clang/lib/Parse/ParsePragma.cpp =================================================================== --- clang/lib/Parse/ParsePragma.cpp +++ clang/lib/Parse/ParsePragma.cpp @@ -1093,7 +1093,6 @@ assert(Tok.is(tok::annot_pragma_loop_hint)); PragmaLoopHintInfo *Info = static_cast<PragmaLoopHintInfo *>(Tok.getAnnotationValue()); - IdentifierInfo *PragmaNameInfo = Info->PragmaName.getIdentifierInfo(); Hint.PragmaNameLoc = IdentifierLoc::create( Actions.Context, Info->PragmaName.getLocation(), PragmaNameInfo); @@ -1193,6 +1192,24 @@ ExprResult R = ParseConstantExpression(); + if (OptionInfo && OptionInfo->getName() == "vectorize_width" && + Tok.is(tok::comma)) { + PP.Lex(Tok); // , + + SourceLocation StateLoc = Tok.getLocation(); + IdentifierInfo *StateInfo = Tok.getIdentifierInfo(); + StringRef IsScalableStr = StateInfo->getName(); + + if (IsScalableStr != "scalable" && IsScalableStr != "fixed") { + Diag(Tok.getLocation(), diag::err_pragma_loop_invalid_vectorize_option); + return false; + } + PP.Lex(Tok); // Identifier + + Hint.StateLoc = + IdentifierLoc::create(Actions.Context, StateLoc, StateInfo); + } + // Tokens following an error in an ill-formed constant expression will // remain in the token stream and must be removed. if (Tok.isNot(tok::eof)) { Index: clang/lib/CodeGen/CGLoopInfo.h =================================================================== --- clang/lib/CodeGen/CGLoopInfo.h +++ clang/lib/CodeGen/CGLoopInfo.h @@ -19,6 +19,7 @@ #include "llvm/IR/DebugLoc.h" #include "llvm/IR/Value.h" #include "llvm/Support/Compiler.h" +#include "llvm/Support/TypeSize.h" namespace llvm { class BasicBlock; @@ -56,7 +57,7 @@ LVEnableState VectorizePredicateEnable; /// Value for llvm.loop.vectorize.width metadata. - unsigned VectorizeWidth; + llvm::ElementCount VectorizeWidth; /// Value for llvm.loop.interleave.count metadata. unsigned InterleaveCount; @@ -253,7 +254,9 @@ } /// Set the vectorize width for the next loop pushed. - void setVectorizeWidth(unsigned W) { StagedAttrs.VectorizeWidth = W; } + void setVectorizeWidth(unsigned W, bool IsScalable = false) { + StagedAttrs.VectorizeWidth = llvm::ElementCount::get(W, IsScalable); + } /// Set the interleave count for the next loop pushed. void setInterleaveCount(unsigned C) { StagedAttrs.InterleaveCount = C; } Index: clang/lib/CodeGen/CGLoopInfo.cpp =================================================================== --- clang/lib/CodeGen/CGLoopInfo.cpp +++ clang/lib/CodeGen/CGLoopInfo.cpp @@ -258,7 +258,7 @@ bool IsVectorPredicateEnabled = false; if (Attrs.VectorizePredicateEnable != LoopAttributes::Unspecified && Attrs.VectorizeEnable != LoopAttributes::Disable && - Attrs.VectorizeWidth < 1) { + Attrs.VectorizeWidth.getKnownMinValue() < 1) { IsVectorPredicateEnabled = (Attrs.VectorizePredicateEnable == LoopAttributes::Enable); @@ -271,12 +271,25 @@ } // Setting vectorize.width - if (Attrs.VectorizeWidth > 0) { - Metadata *Vals[] = { - MDString::get(Ctx, "llvm.loop.vectorize.width"), - ConstantAsMetadata::get(ConstantInt::get(llvm::Type::getInt32Ty(Ctx), - Attrs.VectorizeWidth))}; - Args.push_back(MDNode::get(Ctx, Vals)); + if (Attrs.VectorizeWidth.getKnownMinValue() > 0) { + if (Attrs.VectorizeWidth.isScalable()) { + Metadata *ECArgs[] = { + ConstantAsMetadata::get( + ConstantInt::get(llvm::Type::getInt32Ty(Ctx), + Attrs.VectorizeWidth.getKnownMinValue())), + ConstantAsMetadata::get(ConstantInt::get( + llvm::Type::getInt32Ty(Ctx), Attrs.VectorizeWidth.isScalable()))}; + MDNode *EC = MDNode::get(Ctx, ECArgs); + Metadata *Vals[] = {MDString::get(Ctx, "llvm.loop.vectorize.width"), EC}; + Args.push_back(MDNode::get(Ctx, Vals)); + } else { + Metadata *Vals[] = {MDString::get(Ctx, "llvm.loop.vectorize.width"), + ConstantAsMetadata::get(ConstantInt::get( + llvm::Type::getInt32Ty(Ctx), + Attrs.VectorizeWidth.getKnownMinValue()))}; + + Args.push_back(MDNode::get(Ctx, Vals)); + } } // Setting interleave.count @@ -293,8 +306,9 @@ // 2) it is implied when vectorize.predicate is set, or // 3) it is implied when vectorize.width is set. if (Attrs.VectorizeEnable != LoopAttributes::Unspecified || - IsVectorPredicateEnabled || - Attrs.VectorizeWidth > 1 ) { + IsVectorPredicateEnabled || Attrs.VectorizeWidth.getKnownMinValue() > 1 || + (Attrs.VectorizeWidth.getKnownMinValue() == 1 && + Attrs.VectorizeWidth.isScalable())) { bool AttrVal = Attrs.VectorizeEnable != LoopAttributes::Disable; Args.push_back( MDNode::get(Ctx, {MDString::get(Ctx, "llvm.loop.vectorize.enable"), @@ -435,14 +449,15 @@ : IsParallel(IsParallel), VectorizeEnable(LoopAttributes::Unspecified), UnrollEnable(LoopAttributes::Unspecified), UnrollAndJamEnable(LoopAttributes::Unspecified), - VectorizePredicateEnable(LoopAttributes::Unspecified), VectorizeWidth(0), - InterleaveCount(0), UnrollCount(0), UnrollAndJamCount(0), + VectorizePredicateEnable(LoopAttributes::Unspecified), + VectorizeWidth(ElementCount::getFixed(0)), InterleaveCount(0), + UnrollCount(0), UnrollAndJamCount(0), DistributeEnable(LoopAttributes::Unspecified), PipelineDisabled(false), PipelineInitiationInterval(0) {} void LoopAttributes::clear() { IsParallel = false; - VectorizeWidth = 0; + VectorizeWidth = ElementCount::getFixed(0); InterleaveCount = 0; UnrollCount = 0; UnrollAndJamCount = 0; @@ -724,6 +739,16 @@ break; } break; + case LoopHintAttr::ScalableNumeric: + switch (Option) { + case LoopHintAttr::VectorizeWidth: + setVectorizeWidth(ValueInt, true); + break; + default: + llvm_unreachable("Options cannot be used with 'scalable' hint."); + break; + } + break; case LoopHintAttr::Numeric: switch (Option) { case LoopHintAttr::VectorizeWidth: Index: clang/include/clang/Basic/DiagnosticParseKinds.td =================================================================== --- clang/include/clang/Basic/DiagnosticParseKinds.td +++ clang/include/clang/Basic/DiagnosticParseKinds.td @@ -1380,6 +1380,9 @@ "%select{invalid|missing}0 option%select{ %1|}0; expected vectorize, " "vectorize_width, interleave, interleave_count, unroll, unroll_count, " "pipeline, pipeline_initiation_interval, vectorize_predicate, or distribute">; +def err_pragma_loop_invalid_vectorize_option : Error< + "vectorize_width loop hint malformed; use 'vectorize_width(X, scalable)' or " + "'vectorize_width(X, fixed)' where X is an integer">; def err_pragma_fp_invalid_option : Error< "%select{invalid|missing}0 option%select{ %1|}0; expected 'contract' or 'reassociate'">; Index: clang/include/clang/Basic/Attr.td =================================================================== --- clang/include/clang/Basic/Attr.td +++ clang/include/clang/Basic/Attr.td @@ -3293,8 +3293,8 @@ "PipelineDisabled", "PipelineInitiationInterval", "Distribute", "VectorizePredicate"]>, EnumArgument<"State", "LoopHintState", - ["enable", "disable", "numeric", "assume_safety", "full"], - ["Enable", "Disable", "Numeric", "AssumeSafety", "Full"]>, + ["enable", "disable", "numeric", "scalable_numeric", "assume_safety", "full"], + ["Enable", "Disable", "Numeric", "ScalableNumeric", "AssumeSafety", "Full"]>, ExprArgument<"Value">]; let AdditionalMembers = [{ Index: clang/docs/LanguageExtensions.rst =================================================================== --- clang/docs/LanguageExtensions.rst +++ clang/docs/LanguageExtensions.rst @@ -3052,10 +3052,15 @@ ... } -The vector width is specified by ``vectorize_width(_value_)`` and the interleave -count is specified by ``interleave_count(_value_)``, where -_value_ is a positive integer. This is useful for specifying the optimal -width/count of the set of target architectures supported by your application. +The vector width is specified by +``vectorize_width(_value_[, fixed|scalable])``, where __value__ is a positive +integer and the type of vectorization can be specified with an optional +second parameter. In this case 'fixed' is the default and refers to fixed width +vectorization, whereas 'scalable' indicates the compiler should use scalable +vectors instead. The interleave count is specified by +``interleave_count(_value_)``, where _value_ is a positive integer. This is +useful for specifying the optimal width/count of the set of target +architectures supported by your application. .. code-block:: c++
_______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits