[llvm-branch-commits] [clang] release/19.x: [clang-format] Fix a regression in parsing `switch` in macro call (#114506) (PR #114640)
https://github.com/HazardyKnusperkeks approved this pull request. https://github.com/llvm/llvm-project/pull/114640 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [AMDGPU][Attributor] Make `AAAMDWavesPerEU` honor existing attribute (PR #114438)
https://github.com/shiltian updated https://github.com/llvm/llvm-project/pull/114438 >From 1aaf29fc290cca84843f0ed1d4b9b9258b8daa36 Mon Sep 17 00:00:00 2001 From: Shilei Tian Date: Thu, 31 Oct 2024 12:49:07 -0400 Subject: [PATCH] [WIP][AMDGPU][Attributor] Make `AAAMDWavesPerEU` honor existing attribute --- llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp | 79 +++ .../annotate-kernel-features-hsa-call.ll | 46 +-- .../AMDGPU/attributor-loop-issue-58639.ll | 3 +- .../CodeGen/AMDGPU/direct-indirect-call.ll| 3 +- .../CodeGen/AMDGPU/propagate-waves-per-eu.ll | 59 +++--- .../AMDGPU/remove-no-kernel-id-attribute.ll | 9 ++- .../AMDGPU/uniform-work-group-multistep.ll| 3 +- .../uniform-work-group-recursion-test.ll | 2 +- 8 files changed, 111 insertions(+), 93 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp b/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp index 7878e13cfd9bf3..5a84a874fcb811 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp @@ -198,6 +198,17 @@ class AMDGPUInformationCache : public InformationCache { return ST.getWavesPerEU(F, FlatWorkGroupSize); } + std::optional> + getWavesPerEUAttr(const Function &F) { +auto Val = AMDGPU::getIntegerPairAttribute(F, "amdgpu-waves-per-eu", + /*OnlyFirstRequired=*/true); +if (Val && Val->second == 0) { + const GCNSubtarget &ST = TM.getSubtarget(F); + Val->second = ST.getMaxWavesPerEU(); +} +return Val; + } + std::pair getEffectiveWavesPerEU(const Function &F, std::pair WavesPerEU, @@ -768,22 +779,6 @@ struct AAAMDSizeRangeAttribute /*ForceReplace=*/true); } - ChangeStatus emitAttributeIfNotDefault(Attributor &A, unsigned Min, - unsigned Max) { -// Don't add the attribute if it's the implied default. -if (getAssumed().getLower() == Min && getAssumed().getUpper() - 1 == Max) - return ChangeStatus::UNCHANGED; - -Function *F = getAssociatedFunction(); -LLVMContext &Ctx = F->getContext(); -SmallString<10> Buffer; -raw_svector_ostream OS(Buffer); -OS << getAssumed().getLower() << ',' << getAssumed().getUpper() - 1; -return A.manifestAttrs(getIRPosition(), - {Attribute::get(Ctx, AttrName, OS.str())}, - /*ForceReplace=*/true); - } - const std::string getAsStr(Attributor *) const override { std::string Str; raw_string_ostream OS(Str); @@ -880,29 +875,47 @@ struct AAAMDWavesPerEU : public AAAMDSizeRangeAttribute { AAAMDWavesPerEU(const IRPosition &IRP, Attributor &A) : AAAMDSizeRangeAttribute(IRP, A, "amdgpu-waves-per-eu") {} - bool isValidState() const override { -return !Assumed.isEmptySet() && IntegerRangeState::isValidState(); - } - void initialize(Attributor &A) override { Function *F = getAssociatedFunction(); auto &InfoCache = static_cast(A.getInfoCache()); -if (const auto *AssumedGroupSize = A.getAAFor( -*this, IRPosition::function(*F), DepClassTy::REQUIRED); -AssumedGroupSize->isValidState()) { +auto TakeRange = [&](std::pair R) { + auto [Min, Max] = R; + ConstantRange Range(APInt(32, Min), APInt(32, Max + 1)); + IntegerRangeState RangeState(Range); + clampStateAndIndicateChange(this->getState(), RangeState); + indicateOptimisticFixpoint(); +}; - unsigned Min, Max; - std::tie(Min, Max) = InfoCache.getWavesPerEU( - *F, {AssumedGroupSize->getAssumed().getLower().getZExtValue(), - AssumedGroupSize->getAssumed().getUpper().getZExtValue() - 1}); +std::pair MaxWavesPerEURange{ +1U, InfoCache.getMaxWavesPerEU(*F)}; - ConstantRange Range(APInt(32, Min), APInt(32, Max + 1)); - intersectKnown(Range); +// If the attribute exists, we will honor it if it is not the default. +if (auto Attr = InfoCache.getWavesPerEUAttr(*F)) { + if (*Attr != MaxWavesPerEURange) { +TakeRange(*Attr); +return; + } } -if (AMDGPU::isEntryFunctionCC(F->getCallingConv())) - indicatePessimisticFixpoint(); +// Unlike AAAMDFlatWorkGroupSize, it's getting trickier here. Since the +// calculation of waves per EU involves flat work group size, we can't +// simply use an assumed flat work group size as a start point, because the +// update of flat work group size is in an inverse direction of waves per +// EU. However, we can still do something if it is an entry function. Since +// an entry function is a terminal node, and flat work group size either +// from attribute or default will be used anyway, we can take that value and +// calculate the waves per EU based on it. This result can't be updated by +// no means, but that could still allow us
[llvm-branch-commits] [llvm] [AMDGPU][Attributor] Make `AAAMDWavesPerEU` honor existing attribute (PR #114438)
https://github.com/shiltian updated https://github.com/llvm/llvm-project/pull/114438 >From 013d7aeb4698b74af0b48ce74512d770d46cd81e Mon Sep 17 00:00:00 2001 From: Shilei Tian Date: Thu, 31 Oct 2024 12:49:07 -0400 Subject: [PATCH] [WIP][AMDGPU][Attributor] Make `AAAMDWavesPerEU` honor existing attribute --- llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp | 81 +++ .../annotate-kernel-features-hsa-call.ll | 46 ++- .../AMDGPU/attributor-loop-issue-58639.ll | 3 +- .../CodeGen/AMDGPU/direct-indirect-call.ll| 3 +- .../CodeGen/AMDGPU/propagate-waves-per-eu.ll | 59 +++--- .../AMDGPU/remove-no-kernel-id-attribute.ll | 9 ++- .../AMDGPU/uniform-work-group-multistep.ll| 3 +- .../uniform-work-group-recursion-test.ll | 2 +- 8 files changed, 113 insertions(+), 93 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp b/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp index 376f2067582f32..7d51412730d4d5 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp @@ -201,6 +201,19 @@ class AMDGPUInformationCache : public InformationCache { return ST.getWavesPerEU(F, FlatWorkGroupSize); } + std::optional> + getWavesPerEUAttr(const Function &F) { +auto Val = AMDGPU::getIntegerPairAttribute(F, "amdgpu-waves-per-eu", + /*OnlyFirstRequired=*/true); +if (!Val) + return std::nullopt; +if (!Val->second) { + const GCNSubtarget &ST = TM.getSubtarget(F); + Val->second = ST.getMaxWavesPerEU(); +} +return std::make_pair(Val->first, *(Val->second)); + } + std::pair getEffectiveWavesPerEU(const Function &F, std::pair WavesPerEU, @@ -771,22 +784,6 @@ struct AAAMDSizeRangeAttribute /*ForceReplace=*/true); } - ChangeStatus emitAttributeIfNotDefault(Attributor &A, unsigned Min, - unsigned Max) { -// Don't add the attribute if it's the implied default. -if (getAssumed().getLower() == Min && getAssumed().getUpper() - 1 == Max) - return ChangeStatus::UNCHANGED; - -Function *F = getAssociatedFunction(); -LLVMContext &Ctx = F->getContext(); -SmallString<10> Buffer; -raw_svector_ostream OS(Buffer); -OS << getAssumed().getLower() << ',' << getAssumed().getUpper() - 1; -return A.manifestAttrs(getIRPosition(), - {Attribute::get(Ctx, AttrName, OS.str())}, - /*ForceReplace=*/true); - } - const std::string getAsStr(Attributor *) const override { std::string Str; raw_string_ostream OS(Str); @@ -883,29 +880,47 @@ struct AAAMDWavesPerEU : public AAAMDSizeRangeAttribute { AAAMDWavesPerEU(const IRPosition &IRP, Attributor &A) : AAAMDSizeRangeAttribute(IRP, A, "amdgpu-waves-per-eu") {} - bool isValidState() const override { -return !Assumed.isEmptySet() && IntegerRangeState::isValidState(); - } - void initialize(Attributor &A) override { Function *F = getAssociatedFunction(); auto &InfoCache = static_cast(A.getInfoCache()); -if (const auto *AssumedGroupSize = A.getAAFor( -*this, IRPosition::function(*F), DepClassTy::REQUIRED); -AssumedGroupSize->isValidState()) { +auto TakeRange = [&](std::pair R) { + auto [Min, Max] = R; + ConstantRange Range(APInt(32, Min), APInt(32, Max + 1)); + IntegerRangeState RangeState(Range); + clampStateAndIndicateChange(this->getState(), RangeState); + indicateOptimisticFixpoint(); +}; - unsigned Min, Max; - std::tie(Min, Max) = InfoCache.getWavesPerEU( - *F, {AssumedGroupSize->getAssumed().getLower().getZExtValue(), - AssumedGroupSize->getAssumed().getUpper().getZExtValue() - 1}); +std::pair MaxWavesPerEURange{ +1U, InfoCache.getMaxWavesPerEU(*F)}; - ConstantRange Range(APInt(32, Min), APInt(32, Max + 1)); - intersectKnown(Range); +// If the attribute exists, we will honor it if it is not the default. +if (auto Attr = InfoCache.getWavesPerEUAttr(*F)) { + if (*Attr != MaxWavesPerEURange) { +TakeRange(*Attr); +return; + } } -if (AMDGPU::isEntryFunctionCC(F->getCallingConv())) - indicatePessimisticFixpoint(); +// Unlike AAAMDFlatWorkGroupSize, it's getting trickier here. Since the +// calculation of waves per EU involves flat work group size, we can't +// simply use an assumed flat work group size as a start point, because the +// update of flat work group size is in an inverse direction of waves per +// EU. However, we can still do something if it is an entry function. Since +// an entry function is a terminal node, and flat work group size either +// from attribute or default will be used anyway, we can take that value and +// calculate the waves per EU based on it. This resu
[llvm-branch-commits] [llvm] [CodeExtractor] Optionally keep code in original function. (PR #114669)
https://github.com/Meinersbur created https://github.com/llvm/llvm-project/pull/114669 When extracting a region into a new function, optionally allow cloning basic blocks and instructions into the extracted function instead of moving them. The keeps the original code in the original function such that they can still be referenced -- and branched to -- in the original function. The motivation is the use of CodeExtractor in the OpenMPIRBuilder. The implementation of createParallel first emits the parallel region into the lexical function, then uses CodeExtractor to outline that region into a new function. The problem here is that Clang's code generator will references some basic blocks for code inside as well as outside the region. This includes some special purpose block (EHResumeBlock, TerminateLandingPad, TerminateHandler, UnreachableBlock, ...) and cleanup/dtor code that is re-used from multiple scopes (see test case extract-block-cleanup.ll). Moving these blocks into a different function will result in malformed IR. The KeepOldBlocks option will instead clone the outlined code into a new function keeping the auxiliary code intact, relying on later DCE to remove code that indeed has become unreachable. Additionally, this code could also be uses as a fallback when threading/offloading is disabled via environment option. Use of KeepOldBlocks by OpenMPIRBuilder is not part of this patch. For testing, we extend llvm-extract allowing the use of this option and thus making it more powerful. Originally submitted as https://reviews.llvm.org/D115216 >From eb8c6a3ba6e3bc9ddd1ac9579ff665f74034051f Mon Sep 17 00:00:00 2001 From: Michael Kruse Date: Mon, 6 Dec 2021 22:36:52 -0600 Subject: [PATCH 1/5] KeepOldBlocks --- llvm/include/llvm/Transforms/IPO.h| 4 +- llvm/include/llvm/Transforms/Utils/Cloning.h | 14 +- .../llvm/Transforms/Utils/CodeExtractor.h | 15 +- llvm/lib/Transforms/IPO/BlockExtractor.cpp| 34 +- llvm/lib/Transforms/Utils/CloneFunction.cpp | 12 +- llvm/lib/Transforms/Utils/CodeExtractor.cpp | 326 ++ .../llvm-extract/extract-block-cleanup.ll | 116 +++ .../extract-block-multiple-exits.ll | 200 +++ .../tools/llvm-extract/extract-block-sink.ll | 67 llvm/test/tools/llvm-extract/extract-block.ll | 10 +- .../extract-blocks-with-groups.ll | 24 +- llvm/tools/llvm-extract/llvm-extract.cpp | 20 +- 12 files changed, 737 insertions(+), 105 deletions(-) create mode 100644 llvm/test/tools/llvm-extract/extract-block-cleanup.ll create mode 100644 llvm/test/tools/llvm-extract/extract-block-multiple-exits.ll create mode 100644 llvm/test/tools/llvm-extract/extract-block-sink.ll diff --git a/llvm/include/llvm/Transforms/IPO.h b/llvm/include/llvm/Transforms/IPO.h index 67b9a93c47b215..d11c27304815db 100644 --- a/llvm/include/llvm/Transforms/IPO.h +++ b/llvm/include/llvm/Transforms/IPO.h @@ -192,11 +192,11 @@ Pass *createSingleLoopExtractorPass(); ModulePass *createBlockExtractorPass(); ModulePass * createBlockExtractorPass(const SmallVectorImpl &BlocksToExtract, - bool EraseFunctions); + bool EraseFunctions, bool KeepOldBlocks = false); ModulePass * createBlockExtractorPass(const SmallVectorImpl> &GroupsOfBlocksToExtract, - bool EraseFunctions); + bool EraseFunctions, bool KeepOldBlocks = false); /// createStripDeadPrototypesPass - This pass removes any function declarations /// (prototypes) that are not used. diff --git a/llvm/include/llvm/Transforms/Utils/Cloning.h b/llvm/include/llvm/Transforms/Utils/Cloning.h index 5a1f322b20544c..cff5e6bc8daeaf 100644 --- a/llvm/include/llvm/Transforms/Utils/Cloning.h +++ b/llvm/include/llvm/Transforms/Utils/Cloning.h @@ -114,10 +114,16 @@ struct ClonedCodeInfo { /// If you would like to collect additional information about the cloned /// function, you can specify a ClonedCodeInfo object with the optional fifth /// parameter. -BasicBlock *CloneBasicBlock(const BasicBlock *BB, ValueToValueMapTy &VMap, -const Twine &NameSuffix = "", Function *F = nullptr, -ClonedCodeInfo *CodeInfo = nullptr, -DebugInfoFinder *DIFinder = nullptr); +/// +/// If you would like to clone only a subset of instructions in the basic block, +/// you can specify a callback returning true only for those instructions that +/// are to be cloned. +BasicBlock * +CloneBasicBlock(const BasicBlock *BB, ValueToValueMapTy &VMap, +const Twine &NameSuffix = "", Function *F = nullptr, +ClonedCodeInfo *CodeInfo = nullptr, +DebugInfoFinder *DIFinder = nullptr, +function_ref InstSelect = {}); /// Return a copy of the specified function and add it to that /// function's module. Also, any references speci