[llvm-branch-commits] [flang] [flang][OpenMP] Semantic checks for DOACROSS clause (PR #115397)
@@ -541,6 +541,7 @@ void OmpStructureChecker::Leave(const parser::OpenMPConstruct &) { } void OmpStructureChecker::Enter(const parser::OpenMPLoopConstruct &x) { + loopStack_.push_back(&x); kparzysz wrote: The loopStack was added to keep track of the induction variables in the loop nest being visited. This is used to verify that the variables in the SINK iteration vector are actually induction variables. See flang/lib/Semantics/check-omp-structure.cpp, lines 3610-3633. https://github.com/llvm/llvm-project/pull/115397 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [flang] [flang][OpenMP] Semantic checks for DOACROSS clause (PR #115397)
https://github.com/kparzysz edited https://github.com/llvm/llvm-project/pull/115397 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [CodeGen][NewPM] Port MachineSink to NPM (PR #115434)
cjappl wrote: (sorry I don't know anything about this part of the codebase, so I am removing myself from the reviewers) https://github.com/llvm/llvm-project/pull/115434 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [BOLT] Match blocks with pseudo probes (PR #99891)
https://github.com/aaupov updated https://github.com/llvm/llvm-project/pull/99891 >From 36197b175681d07b4704e576fb008cec3cc1e05e Mon Sep 17 00:00:00 2001 From: Amir Ayupov Date: Wed, 28 Aug 2024 21:10:25 +0200 Subject: [PATCH 1/3] Reworked block probe matching Use new probe ifaces Get all function probes at once Drop ProfileUsePseudoProbes Unify matchWithBlockPseudoProbes Distinguish exact and loose probe match --- bolt/include/bolt/Core/BinaryContext.h| 20 +- bolt/lib/Passes/BinaryPasses.cpp | 40 ++- bolt/lib/Profile/StaleProfileMatching.cpp | 404 ++ bolt/lib/Rewrite/PseudoProbeRewriter.cpp | 8 +- 4 files changed, 237 insertions(+), 235 deletions(-) diff --git a/bolt/include/bolt/Core/BinaryContext.h b/bolt/include/bolt/Core/BinaryContext.h index 3e20cb607e657b..3f7b2ac0bc6cf9 100644 --- a/bolt/include/bolt/Core/BinaryContext.h +++ b/bolt/include/bolt/Core/BinaryContext.h @@ -724,14 +724,26 @@ class BinaryContext { uint32_t NumStaleBlocks{0}; /// the number of exactly matched basic blocks uint32_t NumExactMatchedBlocks{0}; -/// the number of pseudo probe matched basic blocks -uint32_t NumPseudoProbeMatchedBlocks{0}; +/// the number of loosely matched basic blocks +uint32_t NumLooseMatchedBlocks{0}; +/// the number of exactly pseudo probe matched basic blocks +uint32_t NumPseudoProbeExactMatchedBlocks{0}; +/// the number of loosely pseudo probe matched basic blocks +uint32_t NumPseudoProbeLooseMatchedBlocks{0}; +/// the number of call matched basic blocks +uint32_t NumCallMatchedBlocks{0}; /// the total count of samples in the profile uint64_t StaleSampleCount{0}; /// the count of exactly matched samples uint64_t ExactMatchedSampleCount{0}; -/// the count of pseudo probe matched samples -uint64_t PseudoProbeMatchedSampleCount{0}; +/// the count of exactly matched samples +uint64_t LooseMatchedSampleCount{0}; +/// the count of exactly pseudo probe matched samples +uint64_t PseudoProbeExactMatchedSampleCount{0}; +/// the count of loosely pseudo probe matched samples +uint64_t PseudoProbeLooseMatchedSampleCount{0}; +/// the count of call matched samples +uint64_t CallMatchedSampleCount{0}; /// the number of stale functions that have matching number of blocks in /// the profile uint64_t NumStaleFuncsWithEqualBlockCount{0}; diff --git a/bolt/lib/Passes/BinaryPasses.cpp b/bolt/lib/Passes/BinaryPasses.cpp index b786f07a6a6651..8edbd58c3ed3de 100644 --- a/bolt/lib/Passes/BinaryPasses.cpp +++ b/bolt/lib/Passes/BinaryPasses.cpp @@ -1524,15 +1524,43 @@ Error PrintProgramStats::runOnFunctions(BinaryContext &BC) { 100.0 * BC.Stats.ExactMatchedSampleCount / BC.Stats.StaleSampleCount, BC.Stats.ExactMatchedSampleCount, BC.Stats.StaleSampleCount); BC.outs() << format( -"BOLT-INFO: inference found a pseudo probe match for %.2f%% of basic " +"BOLT-INFO: inference found an exact pseudo probe match for %.2f%% of " +"basic blocks (%zu out of %zu stale) responsible for %.2f%% samples" +" (%zu out of %zu stale)\n", +100.0 * BC.Stats.NumPseudoProbeExactMatchedBlocks / +BC.Stats.NumStaleBlocks, +BC.Stats.NumPseudoProbeExactMatchedBlocks, BC.Stats.NumStaleBlocks, +100.0 * BC.Stats.PseudoProbeExactMatchedSampleCount / +BC.Stats.StaleSampleCount, +BC.Stats.PseudoProbeExactMatchedSampleCount, BC.Stats.StaleSampleCount); +BC.outs() << format( +"BOLT-INFO: inference found a loose pseudo probe match for %.2f%% of " +"basic blocks (%zu out of %zu stale) responsible for %.2f%% samples" +" (%zu out of %zu stale)\n", +100.0 * BC.Stats.NumPseudoProbeLooseMatchedBlocks / +BC.Stats.NumStaleBlocks, +BC.Stats.NumPseudoProbeLooseMatchedBlocks, BC.Stats.NumStaleBlocks, +100.0 * BC.Stats.PseudoProbeLooseMatchedSampleCount / +BC.Stats.StaleSampleCount, +BC.Stats.PseudoProbeLooseMatchedSampleCount, BC.Stats.StaleSampleCount); +BC.outs() << format( +"BOLT-INFO: inference found a call match for %.2f%% of basic " "blocks" " (%zu out of %zu stale) responsible for %.2f%% samples" " (%zu out of %zu stale)\n", -100.0 * BC.Stats.NumPseudoProbeMatchedBlocks / BC.Stats.NumStaleBlocks, -BC.Stats.NumPseudoProbeMatchedBlocks, BC.Stats.NumStaleBlocks, -100.0 * BC.Stats.PseudoProbeMatchedSampleCount / -BC.Stats.StaleSampleCount, -BC.Stats.PseudoProbeMatchedSampleCount, BC.Stats.StaleSampleCount); +100.0 * BC.Stats.NumCallMatchedBlocks / BC.Stats.NumStaleBlocks, +BC.Stats.NumCallMatchedBlocks, BC.Stats.NumStaleBlocks, +100.0 * BC.Stats.CallMatchedSampleCount / BC.Stats.StaleSampleCount, +BC.Stats.CallMatchedSampleCount, BC.Stats.StaleSampleCount); +BC
[llvm-branch-commits] [llvm] [BOLT] Match blocks with pseudo probes (PR #99891)
aaupov wrote: > > > Ping @wlei-llvm > > > > > > Sorry for the delay. The new version addressed my last comment (with just > > minor nits). However, I didn't fully follow the new features related to > > `ProbeMatchSpecs` stuffs. Could you add more descriptions to the diff > > summary? Or if it’s not a lot of work, could we split it into two patches? > > We could commit the first part, and I will review the second part > > separately. > > NVM, I think now I get what `ProbeMatchSpecs` does, it's a vector because a > function can have multiple sections(function split) Thank you for reviewing and sorry for the delay from my end, was busy with profile quality work. ProbeMatchSpecs is a mechanism to match probes belonging to another binary function. I'm going to utilize it in probe-based function matching (#100446). For example: source function: ``` void foo() { bar(); } ``` profiled binary: bar is not inlined => have top-level function bar new binary (where the profile is applied to): bar is inlined into foo. Right now, BOLT does 1:1 matching between profile functions and binary functions based on the name. #100446 will extend this to N:M where multiple profiles can be matched to one binary function (as in the example above where binary function foo would use profiles for foo and bar), and one profile can be matched to multiple binary functions (eg if bar was inlined into multiple functions). https://github.com/llvm/llvm-project/pull/99891 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [BOLT] Match blocks with pseudo probes (PR #99891)
https://github.com/aaupov updated https://github.com/llvm/llvm-project/pull/99891 >From 36197b175681d07b4704e576fb008cec3cc1e05e Mon Sep 17 00:00:00 2001 From: Amir Ayupov Date: Wed, 28 Aug 2024 21:10:25 +0200 Subject: [PATCH 1/3] Reworked block probe matching Use new probe ifaces Get all function probes at once Drop ProfileUsePseudoProbes Unify matchWithBlockPseudoProbes Distinguish exact and loose probe match --- bolt/include/bolt/Core/BinaryContext.h| 20 +- bolt/lib/Passes/BinaryPasses.cpp | 40 ++- bolt/lib/Profile/StaleProfileMatching.cpp | 404 ++ bolt/lib/Rewrite/PseudoProbeRewriter.cpp | 8 +- 4 files changed, 237 insertions(+), 235 deletions(-) diff --git a/bolt/include/bolt/Core/BinaryContext.h b/bolt/include/bolt/Core/BinaryContext.h index 3e20cb607e657b..3f7b2ac0bc6cf9 100644 --- a/bolt/include/bolt/Core/BinaryContext.h +++ b/bolt/include/bolt/Core/BinaryContext.h @@ -724,14 +724,26 @@ class BinaryContext { uint32_t NumStaleBlocks{0}; /// the number of exactly matched basic blocks uint32_t NumExactMatchedBlocks{0}; -/// the number of pseudo probe matched basic blocks -uint32_t NumPseudoProbeMatchedBlocks{0}; +/// the number of loosely matched basic blocks +uint32_t NumLooseMatchedBlocks{0}; +/// the number of exactly pseudo probe matched basic blocks +uint32_t NumPseudoProbeExactMatchedBlocks{0}; +/// the number of loosely pseudo probe matched basic blocks +uint32_t NumPseudoProbeLooseMatchedBlocks{0}; +/// the number of call matched basic blocks +uint32_t NumCallMatchedBlocks{0}; /// the total count of samples in the profile uint64_t StaleSampleCount{0}; /// the count of exactly matched samples uint64_t ExactMatchedSampleCount{0}; -/// the count of pseudo probe matched samples -uint64_t PseudoProbeMatchedSampleCount{0}; +/// the count of exactly matched samples +uint64_t LooseMatchedSampleCount{0}; +/// the count of exactly pseudo probe matched samples +uint64_t PseudoProbeExactMatchedSampleCount{0}; +/// the count of loosely pseudo probe matched samples +uint64_t PseudoProbeLooseMatchedSampleCount{0}; +/// the count of call matched samples +uint64_t CallMatchedSampleCount{0}; /// the number of stale functions that have matching number of blocks in /// the profile uint64_t NumStaleFuncsWithEqualBlockCount{0}; diff --git a/bolt/lib/Passes/BinaryPasses.cpp b/bolt/lib/Passes/BinaryPasses.cpp index b786f07a6a6651..8edbd58c3ed3de 100644 --- a/bolt/lib/Passes/BinaryPasses.cpp +++ b/bolt/lib/Passes/BinaryPasses.cpp @@ -1524,15 +1524,43 @@ Error PrintProgramStats::runOnFunctions(BinaryContext &BC) { 100.0 * BC.Stats.ExactMatchedSampleCount / BC.Stats.StaleSampleCount, BC.Stats.ExactMatchedSampleCount, BC.Stats.StaleSampleCount); BC.outs() << format( -"BOLT-INFO: inference found a pseudo probe match for %.2f%% of basic " +"BOLT-INFO: inference found an exact pseudo probe match for %.2f%% of " +"basic blocks (%zu out of %zu stale) responsible for %.2f%% samples" +" (%zu out of %zu stale)\n", +100.0 * BC.Stats.NumPseudoProbeExactMatchedBlocks / +BC.Stats.NumStaleBlocks, +BC.Stats.NumPseudoProbeExactMatchedBlocks, BC.Stats.NumStaleBlocks, +100.0 * BC.Stats.PseudoProbeExactMatchedSampleCount / +BC.Stats.StaleSampleCount, +BC.Stats.PseudoProbeExactMatchedSampleCount, BC.Stats.StaleSampleCount); +BC.outs() << format( +"BOLT-INFO: inference found a loose pseudo probe match for %.2f%% of " +"basic blocks (%zu out of %zu stale) responsible for %.2f%% samples" +" (%zu out of %zu stale)\n", +100.0 * BC.Stats.NumPseudoProbeLooseMatchedBlocks / +BC.Stats.NumStaleBlocks, +BC.Stats.NumPseudoProbeLooseMatchedBlocks, BC.Stats.NumStaleBlocks, +100.0 * BC.Stats.PseudoProbeLooseMatchedSampleCount / +BC.Stats.StaleSampleCount, +BC.Stats.PseudoProbeLooseMatchedSampleCount, BC.Stats.StaleSampleCount); +BC.outs() << format( +"BOLT-INFO: inference found a call match for %.2f%% of basic " "blocks" " (%zu out of %zu stale) responsible for %.2f%% samples" " (%zu out of %zu stale)\n", -100.0 * BC.Stats.NumPseudoProbeMatchedBlocks / BC.Stats.NumStaleBlocks, -BC.Stats.NumPseudoProbeMatchedBlocks, BC.Stats.NumStaleBlocks, -100.0 * BC.Stats.PseudoProbeMatchedSampleCount / -BC.Stats.StaleSampleCount, -BC.Stats.PseudoProbeMatchedSampleCount, BC.Stats.StaleSampleCount); +100.0 * BC.Stats.NumCallMatchedBlocks / BC.Stats.NumStaleBlocks, +BC.Stats.NumCallMatchedBlocks, BC.Stats.NumStaleBlocks, +100.0 * BC.Stats.CallMatchedSampleCount / BC.Stats.StaleSampleCount, +BC.Stats.CallMatchedSampleCount, BC.Stats.StaleSampleCount); +BC
[llvm-branch-commits] [llvm] [BOLT] Match functions with pseudo probes (PR #100446)
https://github.com/aaupov updated https://github.com/llvm/llvm-project/pull/100446 >From 56b45b104a2ab2dbc4ab8e9643c90092894b579e Mon Sep 17 00:00:00 2001 From: shawbyoung Date: Wed, 24 Jul 2024 11:29:22 -0700 Subject: [PATCH 1/5] Comment Created using spr 1.3.4 --- bolt/include/bolt/Profile/YAMLProfileReader.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bolt/include/bolt/Profile/YAMLProfileReader.h b/bolt/include/bolt/Profile/YAMLProfileReader.h index 6c00f82302fb92..bc09751fcae75e 100644 --- a/bolt/include/bolt/Profile/YAMLProfileReader.h +++ b/bolt/include/bolt/Profile/YAMLProfileReader.h @@ -108,7 +108,7 @@ class YAMLProfileReader : public ProfileReaderBase { std::vector YamlProfileToFunction; using FunctionSet = std::unordered_set; - /// To keep track of functions that have a matched profile before the profilez + /// To keep track of functions that have a matched profile before the profile /// is attributed. FunctionSet ProfiledFunctions; >From b851ca65c2bf2a9569315d62722b60a04c8102ee Mon Sep 17 00:00:00 2001 From: shawbyoung Date: Wed, 24 Jul 2024 11:39:48 -0700 Subject: [PATCH 2/5] Was accessing wrong YamlBF Hash, fixed Created using spr 1.3.4 --- bolt/lib/Profile/YAMLProfileReader.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bolt/lib/Profile/YAMLProfileReader.cpp b/bolt/lib/Profile/YAMLProfileReader.cpp index 68af95a1cd043e..f5ac0b8e2c56a2 100644 --- a/bolt/lib/Profile/YAMLProfileReader.cpp +++ b/bolt/lib/Profile/YAMLProfileReader.cpp @@ -614,7 +614,7 @@ size_t YAMLProfileReader::matchWithPseudoProbes(BinaryContext &BC) { uint64_t MatchedWithPseudoProbes = 0; for (yaml::bolt::BinaryFunctionProfile &YamlBF : YamlBP.Functions) { -auto It = PseudoProbeDescHashToBF.find(YamlBF.Hash); +auto It = PseudoProbeDescHashToBF.find(YamlBF.PseudoProbeDescHash); if (It == PseudoProbeDescHashToBF.end()) continue; BinaryFunction *BF = It->second; >From 39ba7175c9224c3584db7f5f8ca8fbed14da41e5 Mon Sep 17 00:00:00 2001 From: shawbyoung Date: Wed, 24 Jul 2024 11:49:54 -0700 Subject: [PATCH 3/5] Changed ordering of matching Created using spr 1.3.4 --- bolt/lib/Profile/YAMLProfileReader.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/bolt/lib/Profile/YAMLProfileReader.cpp b/bolt/lib/Profile/YAMLProfileReader.cpp index f5ac0b8e2c56a2..75ec4465856a15 100644 --- a/bolt/lib/Profile/YAMLProfileReader.cpp +++ b/bolt/lib/Profile/YAMLProfileReader.cpp @@ -770,8 +770,8 @@ Error YAMLProfileReader::readProfile(BinaryContext &BC) { const size_t MatchedWithHash = matchWithHash(BC); const size_t MatchedWithLTOCommonName = matchWithLTOCommonName(); const size_t MatchedWithCallGraph = matchWithCallGraph(BC); - const size_t MatchedWithNameSimilarity = matchWithNameSimilarity(BC); const size_t MatchedWithPseudoProbes = matchWithPseudoProbes(BC); + const size_t MatchedWithNameSimilarity = matchWithNameSimilarity(BC); for (auto [YamlBF, BF] : llvm::zip_equal(YamlBP.Functions, ProfileBFs)) if (!YamlBF.Used && BF && !ProfiledFunctions.count(BF)) @@ -792,10 +792,10 @@ Error YAMLProfileReader::readProfile(BinaryContext &BC) { << " functions with matching LTO common names\n"; outs() << "BOLT-INFO: matched " << MatchedWithCallGraph << " functions with call graph\n"; -outs() << "BOLT-INFO: matched " << MatchedWithNameSimilarity - << " functions with similar names\n"; outs() << "BOLT-INFO: matched " << MatchedWithPseudoProbes << " functions with pseudo probes\n"; +outs() << "BOLT-INFO: matched " << MatchedWithNameSimilarity + << " functions with similar names\n"; } // Set for parseFunctionProfile(). >From 11af7f19953da7c5ad4eb263be3d38a70b2518e0 Mon Sep 17 00:00:00 2001 From: shawbyoung Date: Wed, 24 Jul 2024 12:56:56 -0700 Subject: [PATCH 4/5] Added check for YamlBF.Used in pseudo probe function matching Created using spr 1.3.4 --- bolt/lib/Profile/YAMLProfileReader.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/bolt/lib/Profile/YAMLProfileReader.cpp b/bolt/lib/Profile/YAMLProfileReader.cpp index 75ec4465856a15..8dfdf1fb30eb36 100644 --- a/bolt/lib/Profile/YAMLProfileReader.cpp +++ b/bolt/lib/Profile/YAMLProfileReader.cpp @@ -614,6 +614,8 @@ size_t YAMLProfileReader::matchWithPseudoProbes(BinaryContext &BC) { uint64_t MatchedWithPseudoProbes = 0; for (yaml::bolt::BinaryFunctionProfile &YamlBF : YamlBP.Functions) { +if (YamlBF.Used) + continue; auto It = PseudoProbeDescHashToBF.find(YamlBF.PseudoProbeDescHash); if (It == PseudoProbeDescHashToBF.end()) continue; >From ad4d98fc4bf3f16d119ddbc5abad11b93641bf99 Mon Sep 17 00:00:00 2001 From: shawbyoung Date: Wed, 11 Sep 2024 15:49:03 -0700 Subject: [PATCH 5/5] Debug logging Created using spr 1.3.4 --- bolt/lib/Profile/YAMLProfileReader.cpp | 31 +++--- 1 file changed, 23 ins
[llvm-branch-commits] [llvm] [BOLT] Match functions with pseudo probes (PR #100446)
https://github.com/aaupov updated https://github.com/llvm/llvm-project/pull/100446 >From 56b45b104a2ab2dbc4ab8e9643c90092894b579e Mon Sep 17 00:00:00 2001 From: shawbyoung Date: Wed, 24 Jul 2024 11:29:22 -0700 Subject: [PATCH 1/5] Comment Created using spr 1.3.4 --- bolt/include/bolt/Profile/YAMLProfileReader.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bolt/include/bolt/Profile/YAMLProfileReader.h b/bolt/include/bolt/Profile/YAMLProfileReader.h index 6c00f82302fb92..bc09751fcae75e 100644 --- a/bolt/include/bolt/Profile/YAMLProfileReader.h +++ b/bolt/include/bolt/Profile/YAMLProfileReader.h @@ -108,7 +108,7 @@ class YAMLProfileReader : public ProfileReaderBase { std::vector YamlProfileToFunction; using FunctionSet = std::unordered_set; - /// To keep track of functions that have a matched profile before the profilez + /// To keep track of functions that have a matched profile before the profile /// is attributed. FunctionSet ProfiledFunctions; >From b851ca65c2bf2a9569315d62722b60a04c8102ee Mon Sep 17 00:00:00 2001 From: shawbyoung Date: Wed, 24 Jul 2024 11:39:48 -0700 Subject: [PATCH 2/5] Was accessing wrong YamlBF Hash, fixed Created using spr 1.3.4 --- bolt/lib/Profile/YAMLProfileReader.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bolt/lib/Profile/YAMLProfileReader.cpp b/bolt/lib/Profile/YAMLProfileReader.cpp index 68af95a1cd043e..f5ac0b8e2c56a2 100644 --- a/bolt/lib/Profile/YAMLProfileReader.cpp +++ b/bolt/lib/Profile/YAMLProfileReader.cpp @@ -614,7 +614,7 @@ size_t YAMLProfileReader::matchWithPseudoProbes(BinaryContext &BC) { uint64_t MatchedWithPseudoProbes = 0; for (yaml::bolt::BinaryFunctionProfile &YamlBF : YamlBP.Functions) { -auto It = PseudoProbeDescHashToBF.find(YamlBF.Hash); +auto It = PseudoProbeDescHashToBF.find(YamlBF.PseudoProbeDescHash); if (It == PseudoProbeDescHashToBF.end()) continue; BinaryFunction *BF = It->second; >From 39ba7175c9224c3584db7f5f8ca8fbed14da41e5 Mon Sep 17 00:00:00 2001 From: shawbyoung Date: Wed, 24 Jul 2024 11:49:54 -0700 Subject: [PATCH 3/5] Changed ordering of matching Created using spr 1.3.4 --- bolt/lib/Profile/YAMLProfileReader.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/bolt/lib/Profile/YAMLProfileReader.cpp b/bolt/lib/Profile/YAMLProfileReader.cpp index f5ac0b8e2c56a2..75ec4465856a15 100644 --- a/bolt/lib/Profile/YAMLProfileReader.cpp +++ b/bolt/lib/Profile/YAMLProfileReader.cpp @@ -770,8 +770,8 @@ Error YAMLProfileReader::readProfile(BinaryContext &BC) { const size_t MatchedWithHash = matchWithHash(BC); const size_t MatchedWithLTOCommonName = matchWithLTOCommonName(); const size_t MatchedWithCallGraph = matchWithCallGraph(BC); - const size_t MatchedWithNameSimilarity = matchWithNameSimilarity(BC); const size_t MatchedWithPseudoProbes = matchWithPseudoProbes(BC); + const size_t MatchedWithNameSimilarity = matchWithNameSimilarity(BC); for (auto [YamlBF, BF] : llvm::zip_equal(YamlBP.Functions, ProfileBFs)) if (!YamlBF.Used && BF && !ProfiledFunctions.count(BF)) @@ -792,10 +792,10 @@ Error YAMLProfileReader::readProfile(BinaryContext &BC) { << " functions with matching LTO common names\n"; outs() << "BOLT-INFO: matched " << MatchedWithCallGraph << " functions with call graph\n"; -outs() << "BOLT-INFO: matched " << MatchedWithNameSimilarity - << " functions with similar names\n"; outs() << "BOLT-INFO: matched " << MatchedWithPseudoProbes << " functions with pseudo probes\n"; +outs() << "BOLT-INFO: matched " << MatchedWithNameSimilarity + << " functions with similar names\n"; } // Set for parseFunctionProfile(). >From 11af7f19953da7c5ad4eb263be3d38a70b2518e0 Mon Sep 17 00:00:00 2001 From: shawbyoung Date: Wed, 24 Jul 2024 12:56:56 -0700 Subject: [PATCH 4/5] Added check for YamlBF.Used in pseudo probe function matching Created using spr 1.3.4 --- bolt/lib/Profile/YAMLProfileReader.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/bolt/lib/Profile/YAMLProfileReader.cpp b/bolt/lib/Profile/YAMLProfileReader.cpp index 75ec4465856a15..8dfdf1fb30eb36 100644 --- a/bolt/lib/Profile/YAMLProfileReader.cpp +++ b/bolt/lib/Profile/YAMLProfileReader.cpp @@ -614,6 +614,8 @@ size_t YAMLProfileReader::matchWithPseudoProbes(BinaryContext &BC) { uint64_t MatchedWithPseudoProbes = 0; for (yaml::bolt::BinaryFunctionProfile &YamlBF : YamlBP.Functions) { +if (YamlBF.Used) + continue; auto It = PseudoProbeDescHashToBF.find(YamlBF.PseudoProbeDescHash); if (It == PseudoProbeDescHashToBF.end()) continue; >From ad4d98fc4bf3f16d119ddbc5abad11b93641bf99 Mon Sep 17 00:00:00 2001 From: shawbyoung Date: Wed, 11 Sep 2024 15:49:03 -0700 Subject: [PATCH 5/5] Debug logging Created using spr 1.3.4 --- bolt/lib/Profile/YAMLProfileReader.cpp | 31 +++--- 1 file changed, 23 ins
[llvm-branch-commits] [llvm] [BOLT] Match functions with pseudo probes (PR #100446)
github-actions[bot] wrote: :warning: C/C++ code formatter, clang-format found issues in your code. :warning: You can test this locally with the following command: ``bash git-clang-format --diff 7be0b9e05cd110082fdba1f58f6b900756985981 3bd1ec25c2a99a23ac07b3e39e2d5d99a7a1dd2b --extensions h,cpp -- bolt/include/bolt/Profile/YAMLProfileReader.h bolt/lib/Profile/YAMLProfileReader.cpp `` View the diff from clang-format here. ``diff diff --git a/bolt/lib/Profile/YAMLProfileReader.cpp b/bolt/lib/Profile/YAMLProfileReader.cpp index 07bc890926..2c0c3cc137 100644 --- a/bolt/lib/Profile/YAMLProfileReader.cpp +++ b/bolt/lib/Profile/YAMLProfileReader.cpp @@ -51,9 +51,10 @@ llvm::cl::opt cl::desc("Match functions with call graph"), cl::Hidden, cl::cat(BoltOptCategory)); -llvm::cl::opt MatchWithPseudoProbes( -"match-with-pseudo-probes", cl::desc("Match functions with pseudo probes"), -cl::Hidden, cl::cat(BoltOptCategory)); +llvm::cl::opt +MatchWithPseudoProbes("match-with-pseudo-probes", + cl::desc("Match functions with pseudo probes"), + cl::Hidden, cl::cat(BoltOptCategory)); llvm::cl::opt ProfileUseDFS("profile-use-dfs", cl::desc("use DFS order for YAML profile"), @@ -809,12 +810,11 @@ size_t YAMLProfileReader::matchWithPseudoProbes(BinaryContext &BC) { const auto ProbeIt = Node->getProbes().begin(); const auto *Probe = (ProbeIt != Node->getProbes().end()) ? &*ProbeIt : nullptr; - LLVM_DEBUG(dbgs() - << MatchedNodes << "/" << YamlBF.InlineTree.size() - << " match with " << *BF << " at " - << (Probe ? Probe->getInlineContextStr(GUID2FuncDescMap) - : "(none)") - << '\n'); + LLVM_DEBUG(dbgs() << MatchedNodes << "/" << YamlBF.InlineTree.size() +<< " match with " << *BF << " at " +<< (Probe ? Probe->getInlineContextStr(GUID2FuncDescMap) + : "(none)") +<< '\n'); } MatchedWithPseudoProbes += !!Matched; YamlBF.Used |= !!Matched; `` https://github.com/llvm/llvm-project/pull/100446 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [BOLT] Match functions with pseudo probes (PR #100446)
https://github.com/aaupov updated https://github.com/llvm/llvm-project/pull/100446 >From 56b45b104a2ab2dbc4ab8e9643c90092894b579e Mon Sep 17 00:00:00 2001 From: shawbyoung Date: Wed, 24 Jul 2024 11:29:22 -0700 Subject: [PATCH 1/6] Comment Created using spr 1.3.4 --- bolt/include/bolt/Profile/YAMLProfileReader.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bolt/include/bolt/Profile/YAMLProfileReader.h b/bolt/include/bolt/Profile/YAMLProfileReader.h index 6c00f82302fb92..bc09751fcae75e 100644 --- a/bolt/include/bolt/Profile/YAMLProfileReader.h +++ b/bolt/include/bolt/Profile/YAMLProfileReader.h @@ -108,7 +108,7 @@ class YAMLProfileReader : public ProfileReaderBase { std::vector YamlProfileToFunction; using FunctionSet = std::unordered_set; - /// To keep track of functions that have a matched profile before the profilez + /// To keep track of functions that have a matched profile before the profile /// is attributed. FunctionSet ProfiledFunctions; >From b851ca65c2bf2a9569315d62722b60a04c8102ee Mon Sep 17 00:00:00 2001 From: shawbyoung Date: Wed, 24 Jul 2024 11:39:48 -0700 Subject: [PATCH 2/6] Was accessing wrong YamlBF Hash, fixed Created using spr 1.3.4 --- bolt/lib/Profile/YAMLProfileReader.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bolt/lib/Profile/YAMLProfileReader.cpp b/bolt/lib/Profile/YAMLProfileReader.cpp index 68af95a1cd043e..f5ac0b8e2c56a2 100644 --- a/bolt/lib/Profile/YAMLProfileReader.cpp +++ b/bolt/lib/Profile/YAMLProfileReader.cpp @@ -614,7 +614,7 @@ size_t YAMLProfileReader::matchWithPseudoProbes(BinaryContext &BC) { uint64_t MatchedWithPseudoProbes = 0; for (yaml::bolt::BinaryFunctionProfile &YamlBF : YamlBP.Functions) { -auto It = PseudoProbeDescHashToBF.find(YamlBF.Hash); +auto It = PseudoProbeDescHashToBF.find(YamlBF.PseudoProbeDescHash); if (It == PseudoProbeDescHashToBF.end()) continue; BinaryFunction *BF = It->second; >From 39ba7175c9224c3584db7f5f8ca8fbed14da41e5 Mon Sep 17 00:00:00 2001 From: shawbyoung Date: Wed, 24 Jul 2024 11:49:54 -0700 Subject: [PATCH 3/6] Changed ordering of matching Created using spr 1.3.4 --- bolt/lib/Profile/YAMLProfileReader.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/bolt/lib/Profile/YAMLProfileReader.cpp b/bolt/lib/Profile/YAMLProfileReader.cpp index f5ac0b8e2c56a2..75ec4465856a15 100644 --- a/bolt/lib/Profile/YAMLProfileReader.cpp +++ b/bolt/lib/Profile/YAMLProfileReader.cpp @@ -770,8 +770,8 @@ Error YAMLProfileReader::readProfile(BinaryContext &BC) { const size_t MatchedWithHash = matchWithHash(BC); const size_t MatchedWithLTOCommonName = matchWithLTOCommonName(); const size_t MatchedWithCallGraph = matchWithCallGraph(BC); - const size_t MatchedWithNameSimilarity = matchWithNameSimilarity(BC); const size_t MatchedWithPseudoProbes = matchWithPseudoProbes(BC); + const size_t MatchedWithNameSimilarity = matchWithNameSimilarity(BC); for (auto [YamlBF, BF] : llvm::zip_equal(YamlBP.Functions, ProfileBFs)) if (!YamlBF.Used && BF && !ProfiledFunctions.count(BF)) @@ -792,10 +792,10 @@ Error YAMLProfileReader::readProfile(BinaryContext &BC) { << " functions with matching LTO common names\n"; outs() << "BOLT-INFO: matched " << MatchedWithCallGraph << " functions with call graph\n"; -outs() << "BOLT-INFO: matched " << MatchedWithNameSimilarity - << " functions with similar names\n"; outs() << "BOLT-INFO: matched " << MatchedWithPseudoProbes << " functions with pseudo probes\n"; +outs() << "BOLT-INFO: matched " << MatchedWithNameSimilarity + << " functions with similar names\n"; } // Set for parseFunctionProfile(). >From 11af7f19953da7c5ad4eb263be3d38a70b2518e0 Mon Sep 17 00:00:00 2001 From: shawbyoung Date: Wed, 24 Jul 2024 12:56:56 -0700 Subject: [PATCH 4/6] Added check for YamlBF.Used in pseudo probe function matching Created using spr 1.3.4 --- bolt/lib/Profile/YAMLProfileReader.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/bolt/lib/Profile/YAMLProfileReader.cpp b/bolt/lib/Profile/YAMLProfileReader.cpp index 75ec4465856a15..8dfdf1fb30eb36 100644 --- a/bolt/lib/Profile/YAMLProfileReader.cpp +++ b/bolt/lib/Profile/YAMLProfileReader.cpp @@ -614,6 +614,8 @@ size_t YAMLProfileReader::matchWithPseudoProbes(BinaryContext &BC) { uint64_t MatchedWithPseudoProbes = 0; for (yaml::bolt::BinaryFunctionProfile &YamlBF : YamlBP.Functions) { +if (YamlBF.Used) + continue; auto It = PseudoProbeDescHashToBF.find(YamlBF.PseudoProbeDescHash); if (It == PseudoProbeDescHashToBF.end()) continue; >From ad4d98fc4bf3f16d119ddbc5abad11b93641bf99 Mon Sep 17 00:00:00 2001 From: shawbyoung Date: Wed, 11 Sep 2024 15:49:03 -0700 Subject: [PATCH 5/6] Debug logging Created using spr 1.3.4 --- bolt/lib/Profile/YAMLProfileReader.cpp | 31 +++--- 1 file changed, 23 ins
[llvm-branch-commits] [llvm] [CodeGen][NewPM] Port MachineSink to NPM (PR #115434)
@@ -0,0 +1,26 @@ +//===- MachineSink.h *- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===--===// + +#ifndef LLVM_CODEGEN_MACHINESINK_H +#define LLVM_CODEGEN_MACHINESINK_H + +#include "llvm/CodeGen/MachinePassManager.h" + +namespace llvm { + +class MachineSinkingPass : public PassInfoMixin { + bool EnableSinkAndFold; + +public: + MachineSinkingPass(bool EnableSinkAndFold = false) + : EnableSinkAndFold(EnableSinkAndFold) {} optimisan wrote: Right https://github.com/llvm/llvm-project/pull/115434 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] AMDGPU/GlobalISel: AMDGPURegBankSelect (PR #112863)
@@ -69,3 +72,37 @@ AMDGPU::getBaseWithConstantOffset(MachineRegisterInfo &MRI, Register Reg, return std::pair(Reg, 0); } + +IntrinsicLaneMaskAnalyzer::IntrinsicLaneMaskAnalyzer(MachineFunction &MF) +: MRI(MF.getRegInfo()) { + initLaneMaskIntrinsics(MF); +} + +bool IntrinsicLaneMaskAnalyzer::isS32S64LaneMask(Register Reg) const { + return S32S64LaneMask.contains(Reg); +} + +void IntrinsicLaneMaskAnalyzer::initLaneMaskIntrinsics(MachineFunction &MF) { + for (auto &MBB : MF) { +for (auto &MI : MBB) { + GIntrinsic *GI = dyn_cast(&MI); + if (GI && GI->is(Intrinsic::amdgcn_if_break)) { +S32S64LaneMask.insert(MI.getOperand(3).getReg()); +findLCSSAPhi(MI.getOperand(0).getReg()); + } + + if (MI.getOpcode() == AMDGPU::SI_IF || + MI.getOpcode() == AMDGPU::SI_ELSE) { +findLCSSAPhi(MI.getOperand(0).getReg()); + } arsenm wrote: I don't understand why you are mixing matching the intrinsic form of if.break above, but the selected pseudos for if and else https://github.com/llvm/llvm-project/pull/112863 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] AMDGPU/GlobalISel: AMDGPURegBankSelect (PR #112863)
@@ -66,9 +81,208 @@ FunctionPass *llvm::createAMDGPURegBankSelectPass() { return new AMDGPURegBankSelect(); } +class RegBankSelectHelper { + MachineIRBuilder &B; + MachineRegisterInfo &MRI; + AMDGPU::IntrinsicLaneMaskAnalyzer &ILMA; + const MachineUniformityInfo &MUI; + const SIRegisterInfo &TRI; + const RegisterBank *SgprRB; + const RegisterBank *VgprRB; + const RegisterBank *VccRB; + +public: + RegBankSelectHelper(MachineIRBuilder &B, + AMDGPU::IntrinsicLaneMaskAnalyzer &ILMA, + const MachineUniformityInfo &MUI, + const SIRegisterInfo &TRI, const RegisterBankInfo &RBI) + : B(B), MRI(*B.getMRI()), ILMA(ILMA), MUI(MUI), TRI(TRI), +SgprRB(&RBI.getRegBank(AMDGPU::SGPRRegBankID)), +VgprRB(&RBI.getRegBank(AMDGPU::VGPRRegBankID)), +VccRB(&RBI.getRegBank(AMDGPU::VCCRegBankID)) {} + + // Temporal divergence copy: COPY to vgpr with implicit use of $exec inside of + // the cycle + // Note: uniformity analysis does not consider that registers with vgpr def + // are divergent (you can have uniform value in vgpr). + // - TODO: implicit use of $exec could be implemented as indicator that + // instruction is divergent + bool isTemporalDivergenceCopy(Register Reg) { +MachineInstr *MI = MRI.getVRegDef(Reg); +if (!MI->isCopy() || MI->getNumImplicitOperands() != 1) + return false; + +return MI->implicit_operands().begin()->getReg() == TRI.getExec(); arsenm wrote: Just hardcode the operand index https://github.com/llvm/llvm-project/pull/112863 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] AMDGPU/GlobalISel: AMDGPURegBankSelect (PR #112863)
@@ -66,9 +81,208 @@ FunctionPass *llvm::createAMDGPURegBankSelectPass() { return new AMDGPURegBankSelect(); } +class RegBankSelectHelper { + MachineIRBuilder &B; + MachineRegisterInfo &MRI; + AMDGPU::IntrinsicLaneMaskAnalyzer &ILMA; + const MachineUniformityInfo &MUI; + const SIRegisterInfo &TRI; + const RegisterBank *SgprRB; + const RegisterBank *VgprRB; + const RegisterBank *VccRB; + +public: + RegBankSelectHelper(MachineIRBuilder &B, + AMDGPU::IntrinsicLaneMaskAnalyzer &ILMA, + const MachineUniformityInfo &MUI, + const SIRegisterInfo &TRI, const RegisterBankInfo &RBI) + : B(B), MRI(*B.getMRI()), ILMA(ILMA), MUI(MUI), TRI(TRI), +SgprRB(&RBI.getRegBank(AMDGPU::SGPRRegBankID)), +VgprRB(&RBI.getRegBank(AMDGPU::VGPRRegBankID)), +VccRB(&RBI.getRegBank(AMDGPU::VCCRegBankID)) {} + + // Temporal divergence copy: COPY to vgpr with implicit use of $exec inside of + // the cycle + // Note: uniformity analysis does not consider that registers with vgpr def + // are divergent (you can have uniform value in vgpr). + // - TODO: implicit use of $exec could be implemented as indicator that + // instruction is divergent + bool isTemporalDivergenceCopy(Register Reg) { arsenm wrote: Special casing a copy with an implicit exec operand doesn't feel hermetic. We already have ugly hacks to insert these copies at one point, and it's not a wholistic strategy. There are ample opportunities to lose the operand, such as introducing new derived operations which do not have the use Where are these exec operands getting inserted? Should we have a different pseudo instead? https://github.com/llvm/llvm-project/pull/112863 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [lld] [PAC][lld][AArch64][ELF] Support signed TLSDESC (PR #113817)
@@ -1352,6 +1352,36 @@ unsigned RelocationScanner::handleTlsRelocation(RelExpr expr, RelType type, return 1; } + auto fatalBothAuthAndNonAuth = [&sym]() { +fatal("both AUTH and non-AUTH TLSDESC entries for '" + sym.getName() + MaskRay wrote: We now use `Fatal(ctx) << ...`. Unlike fatal, `Fatal` executes `exit` but is not `noreturn`. `Fatal` should generally be avoided in favor of `Err`. https://github.com/llvm/llvm-project/pull/113817 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [lld] [PAC][lld][AArch64][ELF] Support signed TLSDESC (PR #113817)
https://github.com/MaskRay edited https://github.com/llvm/llvm-project/pull/113817 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [mlir] [mlir][IR][NFC] `DominanceInfo`: Share same impl for block/op dominance (PR #115587)
https://github.com/matthias-springer created https://github.com/llvm/llvm-project/pull/115587 The `properlyDominates` implementations for blocks and ops are very similar. This commit replaces them with a single implementation that operates on block iterators. That implementation can be used to implement both `properlyDominates` variants. Note: A subsequent commit will add a new public `properlyDominates` overload that accepts block iterators. That functionality can then be used to find a valid insertion point at which a range of values is defined (by utilizing post dominance). Depends on #115433. >From 3911ab137f9ed6643e6b1a609841bd060d9b7343 Mon Sep 17 00:00:00 2001 From: Matthias Springer Date: Sat, 9 Nov 2024 07:13:07 +0100 Subject: [PATCH] [mlir][IR][NFC] `DominanceInfo`: Share same impl for block/op dominance The `properlyDominates` implementations for blocks and ops are very similar. This commit replaces them with a single implementation that operates on block iterators. That implementation can be used to implement both `properlyDominates` variants. Note: A subsequent commit will add a new public `properlyDominates` overload that accepts block iterators. That functionality can then be used to find a valid insertion point at which a range of values is defined (by utilizing post dominance). Depends on #115433. --- mlir/include/mlir/IR/Dominance.h | 28 +++ mlir/lib/IR/Dominance.cpp| 124 --- 2 files changed, 92 insertions(+), 60 deletions(-) diff --git a/mlir/include/mlir/IR/Dominance.h b/mlir/include/mlir/IR/Dominance.h index 933ec09c5ede29..a6b2475e12b1c6 100644 --- a/mlir/include/mlir/IR/Dominance.h +++ b/mlir/include/mlir/IR/Dominance.h @@ -113,12 +113,12 @@ class DominanceInfoBase { llvm::PointerIntPair getDominanceInfo(Region *region, bool needsDomTree) const; - /// Return "true" if the specified block A properly (post)dominates block B. - bool properlyDominatesImpl(Block *a, Block *b) const; - - /// Return "true" if the specified op A properly (post)dominates op B. - bool properlyDominatesImpl(Operation *a, Operation *b, - bool enclosingOpOk = true) const; + /// Return "true" if block iterator A properly (post)dominates block iterator + /// B. If `enclosingOk` is set, A is considered to (post)dominate B if A + /// encloses B. + bool properlyDominatesImpl(Block *aBlock, Block::iterator aIt, Block *bBlock, + Block::iterator bIt, + bool enclosingOk = true) const; /// A mapping of regions to their base dominator tree and a cached /// "hasSSADominance" bit. This map does not contain dominator trees for @@ -151,9 +151,7 @@ class DominanceInfo : public detail::DominanceInfoBase { /// The `enclosingOpOk` flag says whether we should return true if the B op /// is enclosed by a region on A. bool properlyDominates(Operation *a, Operation *b, - bool enclosingOpOk = true) const { -return super::properlyDominatesImpl(a, b, enclosingOpOk); - } + bool enclosingOpOk = true) const; /// Return true if operation A dominates operation B, i.e. if A and B are the /// same operation or A properly dominates B. @@ -188,9 +186,7 @@ class DominanceInfo : public detail::DominanceInfoBase { /// Graph regions have only a single block. To be consistent with "proper /// dominance" of ops, the single block is considered to properly dominate /// itself in a graph region. - bool properlyDominates(Block *a, Block *b) const { -return super::properlyDominatesImpl(a, b); - } + bool properlyDominates(Block *a, Block *b) const; }; /// A class for computing basic postdominance information. @@ -200,9 +196,7 @@ class PostDominanceInfo : public detail::DominanceInfoBase { /// Return true if operation A properly postdominates operation B. bool properlyPostDominates(Operation *a, Operation *b, - bool enclosingOpOk = true) { -return super::properlyDominatesImpl(a, b, enclosingOpOk); - } + bool enclosingOpOk = true); /// Return true if operation A postdominates operation B. bool postDominates(Operation *a, Operation *b) { @@ -210,9 +204,7 @@ class PostDominanceInfo : public detail::DominanceInfoBase { } /// Return true if the specified block A properly postdominates block B. - bool properlyPostDominates(Block *a, Block *b) { -return super::properlyDominatesImpl(a, b); - } + bool properlyPostDominates(Block *a, Block *b); /// Return true if the specified block A postdominates block B. bool postDominates(Block *a, Block *b) { diff --git a/mlir/lib/IR/Dominance.cpp b/mlir/lib/IR/Dominance.cpp index 406e0f2d62d640..337a1b7af9d40f 100644 --- a/mlir/lib/IR/Dominance.cpp +++ b/mlir/lib/IR/Dominance.cpp @@ -213,61 +213,73 @@ DominanceInfoBase::findNearestCommonDominator(Block *a, return
[llvm-branch-commits] [mlir] [mlir][IR][NFC] `DominanceInfo`: Share same impl for block/op dominance (PR #115587)
llvmbot wrote: @llvm/pr-subscribers-mlir-core Author: Matthias Springer (matthias-springer) Changes The `properlyDominates` implementations for blocks and ops are very similar. This commit replaces them with a single implementation that operates on block iterators. That implementation can be used to implement both `properlyDominates` variants. Note: A subsequent commit will add a new public `properlyDominates` overload that accepts block iterators. That functionality can then be used to find a valid insertion point at which a range of values is defined (by utilizing post dominance). Depends on #115433. --- Full diff: https://github.com/llvm/llvm-project/pull/115587.diff 2 Files Affected: - (modified) mlir/include/mlir/IR/Dominance.h (+10-18) - (modified) mlir/lib/IR/Dominance.cpp (+82-42) ``diff diff --git a/mlir/include/mlir/IR/Dominance.h b/mlir/include/mlir/IR/Dominance.h index 933ec09c5ede29..a6b2475e12b1c6 100644 --- a/mlir/include/mlir/IR/Dominance.h +++ b/mlir/include/mlir/IR/Dominance.h @@ -113,12 +113,12 @@ class DominanceInfoBase { llvm::PointerIntPair getDominanceInfo(Region *region, bool needsDomTree) const; - /// Return "true" if the specified block A properly (post)dominates block B. - bool properlyDominatesImpl(Block *a, Block *b) const; - - /// Return "true" if the specified op A properly (post)dominates op B. - bool properlyDominatesImpl(Operation *a, Operation *b, - bool enclosingOpOk = true) const; + /// Return "true" if block iterator A properly (post)dominates block iterator + /// B. If `enclosingOk` is set, A is considered to (post)dominate B if A + /// encloses B. + bool properlyDominatesImpl(Block *aBlock, Block::iterator aIt, Block *bBlock, + Block::iterator bIt, + bool enclosingOk = true) const; /// A mapping of regions to their base dominator tree and a cached /// "hasSSADominance" bit. This map does not contain dominator trees for @@ -151,9 +151,7 @@ class DominanceInfo : public detail::DominanceInfoBase { /// The `enclosingOpOk` flag says whether we should return true if the B op /// is enclosed by a region on A. bool properlyDominates(Operation *a, Operation *b, - bool enclosingOpOk = true) const { -return super::properlyDominatesImpl(a, b, enclosingOpOk); - } + bool enclosingOpOk = true) const; /// Return true if operation A dominates operation B, i.e. if A and B are the /// same operation or A properly dominates B. @@ -188,9 +186,7 @@ class DominanceInfo : public detail::DominanceInfoBase { /// Graph regions have only a single block. To be consistent with "proper /// dominance" of ops, the single block is considered to properly dominate /// itself in a graph region. - bool properlyDominates(Block *a, Block *b) const { -return super::properlyDominatesImpl(a, b); - } + bool properlyDominates(Block *a, Block *b) const; }; /// A class for computing basic postdominance information. @@ -200,9 +196,7 @@ class PostDominanceInfo : public detail::DominanceInfoBase { /// Return true if operation A properly postdominates operation B. bool properlyPostDominates(Operation *a, Operation *b, - bool enclosingOpOk = true) { -return super::properlyDominatesImpl(a, b, enclosingOpOk); - } + bool enclosingOpOk = true); /// Return true if operation A postdominates operation B. bool postDominates(Operation *a, Operation *b) { @@ -210,9 +204,7 @@ class PostDominanceInfo : public detail::DominanceInfoBase { } /// Return true if the specified block A properly postdominates block B. - bool properlyPostDominates(Block *a, Block *b) { -return super::properlyDominatesImpl(a, b); - } + bool properlyPostDominates(Block *a, Block *b); /// Return true if the specified block A postdominates block B. bool postDominates(Block *a, Block *b) { diff --git a/mlir/lib/IR/Dominance.cpp b/mlir/lib/IR/Dominance.cpp index 406e0f2d62d640..337a1b7af9d40f 100644 --- a/mlir/lib/IR/Dominance.cpp +++ b/mlir/lib/IR/Dominance.cpp @@ -213,61 +213,73 @@ DominanceInfoBase::findNearestCommonDominator(Block *a, return getDomTree(a->getParent()).findNearestCommonDominator(a, b); } -/// Return true if the specified block A properly dominates block B. -template -bool DominanceInfoBase::properlyDominatesImpl(Block *a, - Block *b) const { - assert(a && b && "null blocks not allowed"); +/// Returns the given block iterator if it lies within the region region. +/// Otherwise, otherwise finds the ancestor of the given block iterator that +/// lies within the given region. Returns and "empty" iterator if the latter +/// fails. +/// +/// Note: This is a variant of Region::findAncestorOpInRegion that operates on +/// block iterators instead of ops. +st
[llvm-branch-commits] [mlir] [mlir][IR] Add helper functions to compute insertion point (PR #114940)
@@ -641,3 +641,76 @@ void OpBuilder::cloneRegionBefore(Region ®ion, Region &parent, void OpBuilder::cloneRegionBefore(Region ®ion, Block *before) { cloneRegionBefore(region, *before->getParent(), before->getIterator()); } + +//===--===// +// InsertPoint +//===--===// + +OpBuilder::InsertPoint OpBuilder::InsertPoint::after(Value value) { + if (auto blockArg = dyn_cast(value)) +return InsertPoint(blockArg.getOwner(), blockArg.getOwner()->begin()); + Operation *op = value.getDefiningOp(); + return InsertPoint(op->getBlock(), ++op->getIterator()); +} + +/// Helper function that returns "true" if: +/// - `a` is a proper ancestor of `b` +/// - or: there is a path from `a` to `b` +static bool isAncestorOrBefore(Block *a, Block *b) { + if (a->getParentOp()->isProperAncestor(b->getParentOp())) +return true; + if (a->getParent() != b->getParent()) +return false; + return a->isReachable(b); +} + +OpBuilder::InsertPoint +OpBuilder::InsertPoint::findClosest(ArrayRef values) { + // Compute the insertion point after the first value. + assert(!values.empty() && "expected at least one value"); + InsertPoint result = InsertPoint::after(values.front()); + + // Check all other values and update the insertion point as needed. + for (Value v : values.drop_front()) { +InsertPoint pt = InsertPoint::after(v); + +if (pt.getBlock() == result.getBlock()) { + // Both values belong to the same block. Modify the iterator (but keep + // the block) if needed: take the later one of the two insertion points. + Block *block = pt.getBlock(); + if (pt.getPoint() == block->end()) { +// `pt` points to the end of the block: take `pt`. +result = pt; +continue; + } else if (result.getPoint() == block->end()) { +// `result` points to the end of the block: nothing to do. +continue; + } + // Neither `pt` nor `result` point to the end of the block, so both + // iterators point to an operation. Set `result` to the later one of the + // two insertion point. + if (result.getPoint()->isBeforeInBlock(&*pt.getPoint())) +result = pt; + continue; +} + +if (isAncestorOrBefore(result.getBlock(), pt.getBlock())) { + // `result` is an ancestor of `pt`. Therefore, `pt` is a valid insertion + // point for `v` and all previous values. + result = pt; + continue; +} + +if (isAncestorOrBefore(pt.getBlock(), result.getBlock())) { + // `pt` is an ancestor of `result`. Therefore, `result` is a valid + // insertion point for `v` and all previous values. + continue; +} + +// `pt` and `result` are in different subtrees: neither is an ancestor of +// the other. In that case, there is no suitable insertion point. matthias-springer wrote: Yes. Post-dominator `Block::iterator` to be precise. (Adding support for that in #115587.) https://github.com/llvm/llvm-project/pull/114940 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [mlir] [mlir][IR] Add helper functions to compute insertion point (PR #114940)
@@ -334,6 +334,18 @@ class OpBuilder : public Builder { /// This class represents a saved insertion point. class InsertPoint { public: +/// Finds the closest insertion point where all given values are defined matthias-springer wrote: Possible candidates are the points (`Block::iterator`) right after the definition of each value. I will take a look at all of these and find the one that post-dominates all the others. I think there should be only one such point. Will update this PR accordingly soon... https://github.com/llvm/llvm-project/pull/114940 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [libcxx] [libc++] Stop copying headers to the build directory (PR #115380)
https://github.com/arichardson updated https://github.com/llvm/llvm-project/pull/115380 >From e26c0f6fd46cdc74eb96e342ff14acc5b8d64a84 Mon Sep 17 00:00:00 2001 From: Alex Richardson Date: Thu, 7 Nov 2024 14:20:52 -0800 Subject: [PATCH] =?UTF-8?q?[=F0=9D=98=80=F0=9D=97=BD=F0=9D=97=BF]=20initia?= =?UTF-8?q?l=20version?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Created using spr 1.3.6-beta.1 --- libcxx/CMakeLists.txt | 11 --- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/libcxx/CMakeLists.txt b/libcxx/CMakeLists.txt index d699135774ee0b..04596fccdfc923 100644 --- a/libcxx/CMakeLists.txt +++ b/libcxx/CMakeLists.txt @@ -414,15 +414,16 @@ set(LIBCXX_INSTALL_MODULES_DIR "share/libc++/v1" CACHE STRING set(LIBCXX_SHARED_OUTPUT_NAME "c++" CACHE STRING "Output name for the shared libc++ runtime library.") set(LIBCXX_STATIC_OUTPUT_NAME "c++" CACHE STRING "Output name for the static libc++ runtime library.") +set(LIBCXX_GENERATED_INCLUDE_DIR "${LIBCXX_BINARY_DIR}/include/c++/v1") +set(LIBCXX_GENERATED_MODULE_DIR "${LIBCXX_BINARY_DIR}/modules/c++/v1") + if(LLVM_ENABLE_PER_TARGET_RUNTIME_DIR AND NOT APPLE) set(LIBCXX_TARGET_SUBDIR ${LLVM_DEFAULT_TARGET_TRIPLE}) if(LIBCXX_LIBDIR_SUBDIR) string(APPEND LIBCXX_TARGET_SUBDIR /${LIBCXX_LIBDIR_SUBDIR}) endif() set(LIBCXX_LIBRARY_DIR ${LLVM_LIBRARY_OUTPUT_INTDIR}/${LIBCXX_TARGET_SUBDIR}) - set(LIBCXX_GENERATED_INCLUDE_DIR "${LLVM_BINARY_DIR}/include/c++/v1") - set(LIBCXX_GENERATED_MODULE_DIR "${LLVM_BINARY_DIR}/modules/c++/v1") - set(LIBCXX_GENERATED_INCLUDE_TARGET_DIR "${LLVM_BINARY_DIR}/include/${LIBCXX_TARGET_SUBDIR}/c++/v1") + set(LIBCXX_GENERATED_INCLUDE_TARGET_DIR "${LIBCXX_BINARY_DIR}/include/${LIBCXX_TARGET_SUBDIR}/c++/v1") set(LIBCXX_INSTALL_LIBRARY_DIR lib${LLVM_LIBDIR_SUFFIX}/${LIBCXX_TARGET_SUBDIR} CACHE STRING "Path where built libc++ libraries should be installed.") set(LIBCXX_INSTALL_INCLUDE_TARGET_DIR "${CMAKE_INSTALL_INCLUDEDIR}/${LIBCXX_TARGET_SUBDIR}/c++/v1" CACHE STRING @@ -431,12 +432,8 @@ if(LLVM_ENABLE_PER_TARGET_RUNTIME_DIR AND NOT APPLE) else() if(LLVM_LIBRARY_OUTPUT_INTDIR) set(LIBCXX_LIBRARY_DIR ${LLVM_LIBRARY_OUTPUT_INTDIR}) -set(LIBCXX_GENERATED_INCLUDE_DIR "${LLVM_BINARY_DIR}/include/c++/v1") -set(LIBCXX_GENERATED_MODULE_DIR "${LLVM_BINARY_DIR}/modules/c++/v1") else() set(LIBCXX_LIBRARY_DIR ${CMAKE_BINARY_DIR}/lib${LIBCXX_LIBDIR_SUFFIX}) -set(LIBCXX_GENERATED_INCLUDE_DIR "${CMAKE_BINARY_DIR}/include/c++/v1") -set(LIBCXX_GENERATED_MODULE_DIR "${CMAKE_BINARY_DIR}/modules/c++/v1") endif() set(LIBCXX_GENERATED_INCLUDE_TARGET_DIR "${LIBCXX_GENERATED_INCLUDE_DIR}") set(LIBCXX_INSTALL_LIBRARY_DIR lib${LIBCXX_LIBDIR_SUFFIX} CACHE STRING ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [libcxx] [libc++] Stop copying headers to the build directory (PR #115380)
https://github.com/arichardson updated https://github.com/llvm/llvm-project/pull/115380 >From e26c0f6fd46cdc74eb96e342ff14acc5b8d64a84 Mon Sep 17 00:00:00 2001 From: Alex Richardson Date: Thu, 7 Nov 2024 14:20:52 -0800 Subject: [PATCH] =?UTF-8?q?[=F0=9D=98=80=F0=9D=97=BD=F0=9D=97=BF]=20initia?= =?UTF-8?q?l=20version?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Created using spr 1.3.6-beta.1 --- libcxx/CMakeLists.txt | 11 --- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/libcxx/CMakeLists.txt b/libcxx/CMakeLists.txt index d699135774ee0b..04596fccdfc923 100644 --- a/libcxx/CMakeLists.txt +++ b/libcxx/CMakeLists.txt @@ -414,15 +414,16 @@ set(LIBCXX_INSTALL_MODULES_DIR "share/libc++/v1" CACHE STRING set(LIBCXX_SHARED_OUTPUT_NAME "c++" CACHE STRING "Output name for the shared libc++ runtime library.") set(LIBCXX_STATIC_OUTPUT_NAME "c++" CACHE STRING "Output name for the static libc++ runtime library.") +set(LIBCXX_GENERATED_INCLUDE_DIR "${LIBCXX_BINARY_DIR}/include/c++/v1") +set(LIBCXX_GENERATED_MODULE_DIR "${LIBCXX_BINARY_DIR}/modules/c++/v1") + if(LLVM_ENABLE_PER_TARGET_RUNTIME_DIR AND NOT APPLE) set(LIBCXX_TARGET_SUBDIR ${LLVM_DEFAULT_TARGET_TRIPLE}) if(LIBCXX_LIBDIR_SUBDIR) string(APPEND LIBCXX_TARGET_SUBDIR /${LIBCXX_LIBDIR_SUBDIR}) endif() set(LIBCXX_LIBRARY_DIR ${LLVM_LIBRARY_OUTPUT_INTDIR}/${LIBCXX_TARGET_SUBDIR}) - set(LIBCXX_GENERATED_INCLUDE_DIR "${LLVM_BINARY_DIR}/include/c++/v1") - set(LIBCXX_GENERATED_MODULE_DIR "${LLVM_BINARY_DIR}/modules/c++/v1") - set(LIBCXX_GENERATED_INCLUDE_TARGET_DIR "${LLVM_BINARY_DIR}/include/${LIBCXX_TARGET_SUBDIR}/c++/v1") + set(LIBCXX_GENERATED_INCLUDE_TARGET_DIR "${LIBCXX_BINARY_DIR}/include/${LIBCXX_TARGET_SUBDIR}/c++/v1") set(LIBCXX_INSTALL_LIBRARY_DIR lib${LLVM_LIBDIR_SUFFIX}/${LIBCXX_TARGET_SUBDIR} CACHE STRING "Path where built libc++ libraries should be installed.") set(LIBCXX_INSTALL_INCLUDE_TARGET_DIR "${CMAKE_INSTALL_INCLUDEDIR}/${LIBCXX_TARGET_SUBDIR}/c++/v1" CACHE STRING @@ -431,12 +432,8 @@ if(LLVM_ENABLE_PER_TARGET_RUNTIME_DIR AND NOT APPLE) else() if(LLVM_LIBRARY_OUTPUT_INTDIR) set(LIBCXX_LIBRARY_DIR ${LLVM_LIBRARY_OUTPUT_INTDIR}) -set(LIBCXX_GENERATED_INCLUDE_DIR "${LLVM_BINARY_DIR}/include/c++/v1") -set(LIBCXX_GENERATED_MODULE_DIR "${LLVM_BINARY_DIR}/modules/c++/v1") else() set(LIBCXX_LIBRARY_DIR ${CMAKE_BINARY_DIR}/lib${LIBCXX_LIBDIR_SUFFIX}) -set(LIBCXX_GENERATED_INCLUDE_DIR "${CMAKE_BINARY_DIR}/include/c++/v1") -set(LIBCXX_GENERATED_MODULE_DIR "${CMAKE_BINARY_DIR}/modules/c++/v1") endif() set(LIBCXX_GENERATED_INCLUDE_TARGET_DIR "${LIBCXX_GENERATED_INCLUDE_DIR}") set(LIBCXX_INSTALL_LIBRARY_DIR lib${LIBCXX_LIBDIR_SUFFIX} CACHE STRING ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [libcxx] [libc++] Stop copying headers to the build directory (PR #115380)
arichardson wrote: Thanks for the review - will merge once https://github.com/llvm/llvm-project/pull/115387 has landed and CI is happy. https://github.com/llvm/llvm-project/pull/115380 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [libcxx] [libc++] Stop copying headers to the build directory (PR #115380)
@@ -1021,17 +1021,8 @@ set(files configure_file("__config_site.in" "${LIBCXX_GENERATED_INCLUDE_TARGET_DIR}/__config_site" @ONLY) philnik777 wrote: I think this change is fine. I'll have to modify this anyways and removing the copying now means I won't introduce it by accident again. https://github.com/llvm/llvm-project/pull/115380 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [CodeGen][NewPM] Port MachineSink to NPM (PR #115434)
@@ -0,0 +1,26 @@ +//===- MachineSink.h *- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===--===// + +#ifndef LLVM_CODEGEN_MACHINESINK_H +#define LLVM_CODEGEN_MACHINESINK_H + +#include "llvm/CodeGen/MachinePassManager.h" + +namespace llvm { + +class MachineSinkingPass : public PassInfoMixin { + bool EnableSinkAndFold; + +public: + MachineSinkingPass(bool EnableSinkAndFold = false) + : EnableSinkAndFold(EnableSinkAndFold) {} paperchalice wrote: Could override `printPipeline` in `PassInfoMixin` because this pass has a parameter. https://github.com/llvm/llvm-project/pull/115434 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [CodeGen][NewPM] Port MachineSink to NPM (PR #115434)
@@ -127,6 +130,8 @@ class MachineSinking : public MachineFunctionPass { const MachineBranchProbabilityInfo *MBPI = nullptr; AliasAnalysis *AA = nullptr; RegisterClassInfo RegClassInfo; + Pass *LegacyPass; + MachineFunctionAnalysisManager *MFAM; arsenm wrote: Keep pass manager details out of the implementation https://github.com/llvm/llvm-project/pull/115434 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [AMDGPU] Still set up the two SGPRs for queue ptr even it is COV5 (PR #112403)
https://github.com/shiltian edited https://github.com/llvm/llvm-project/pull/112403 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [AMDGPU] Fix (PR #115505)
llvmbot wrote: @llvm/pr-subscribers-backend-amdgpu Author: Shilei Tian (shiltian) Changes --- Full diff: https://github.com/llvm/llvm-project/pull/115505.diff 1 Files Affected: - (modified) llvm/lib/Target/AMDGPU/SIFrameLowering.cpp (+10-1) ``diff diff --git a/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp b/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp index 13a2db7a87b437..8e3208abaec9a5 100644 --- a/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp @@ -59,9 +59,18 @@ static MCRegister findScratchNonCalleeSaveRegister( if (Unused) return findUnusedRegister(MRI, LiveUnits, RC); + dbgs() << "\n"; + for (MCRegister Reg : RC) { -if (LiveUnits.available(Reg) && !MRI.isReserved(Reg)) +dbgs() << "reg " << Reg << " avail? " << LiveUnits.available(Reg) + << ", reserved? " << MRI.isReserved(Reg) << "\n"; + } + + for (MCRegister Reg : RC) { +if (LiveUnits.available(Reg) && !MRI.isReserved(Reg)) { + dbgs() << "choose reg " << Reg << '\n'; return Reg; +} } return MCRegister(); `` https://github.com/llvm/llvm-project/pull/115505 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [AMDGPU] Fix (PR #115505)
shiltian wrote: > [!WARNING] > This pull request is not mergeable via GitHub because a downstack PR is > open. Once all requirements are satisfied, merge this PR as a stack href="https://app.graphite.dev/github/pr/llvm/llvm-project/115505?utm_source=stack-comment-downstack-mergeability-warning"; > >on Graphite. > https://graphite.dev/docs/merge-pull-requests";>Learn more * **#115505** https://app.graphite.dev/github/pr/llvm/llvm-project/115505?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> 👈 * **#115503** https://app.graphite.dev/github/pr/llvm/llvm-project/115503?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/>: 1 other dependent PR ([#112403](https://github.com/llvm/llvm-project/pull/112403) https://app.graphite.dev/github/pr/llvm/llvm-project/112403?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/>) * `main` This stack of pull requests is managed by Graphite. https://stacking.dev/?utm_source=stack-comment";>Learn more about stacking. Join @shiltian and the rest of your teammates on https://graphite.dev?utm-source=stack-comment";>https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="11px" height="11px"/> Graphite https://github.com/llvm/llvm-project/pull/115505 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [AMDGPU] Fix (PR #115505)
https://github.com/shiltian created https://github.com/llvm/llvm-project/pull/115505 None >From 8162e61b66ee56e476cf4987a9faff89dfdfc3d0 Mon Sep 17 00:00:00 2001 From: Shilei Tian Date: Tue, 29 Oct 2024 01:29:40 -0400 Subject: [PATCH] [AMDGPU] Fix --- llvm/lib/Target/AMDGPU/SIFrameLowering.cpp | 11 ++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp b/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp index 13a2db7a87b437..8e3208abaec9a5 100644 --- a/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp @@ -59,9 +59,18 @@ static MCRegister findScratchNonCalleeSaveRegister( if (Unused) return findUnusedRegister(MRI, LiveUnits, RC); + dbgs() << "\n"; + for (MCRegister Reg : RC) { -if (LiveUnits.available(Reg) && !MRI.isReserved(Reg)) +dbgs() << "reg " << Reg << " avail? " << LiveUnits.available(Reg) + << ", reserved? " << MRI.isReserved(Reg) << "\n"; + } + + for (MCRegister Reg : RC) { +if (LiveUnits.available(Reg) && !MRI.isReserved(Reg)) { + dbgs() << "choose reg " << Reg << '\n'; return Reg; +} } return MCRegister(); ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [AMDGPU] Fix (PR #115505)
https://github.com/shiltian closed https://github.com/llvm/llvm-project/pull/115505 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] 4e1351c - Revert "[Clang][HIP] Deprecate the AMDGCN_WAVEFRONT_SIZE macros (#112849)"
Author: Fabian Ritter Date: 2024-11-08T16:19:59+01:00 New Revision: 4e1351cf2eb08cc4d8ba1ee5538d3a8e7cfa5aa8 URL: https://github.com/llvm/llvm-project/commit/4e1351cf2eb08cc4d8ba1ee5538d3a8e7cfa5aa8 DIFF: https://github.com/llvm/llvm-project/commit/4e1351cf2eb08cc4d8ba1ee5538d3a8e7cfa5aa8.diff LOG: Revert "[Clang][HIP] Deprecate the AMDGCN_WAVEFRONT_SIZE macros (#112849)" This reverts commit e5c6d1f4e6d6c8709f92b47717cffc486947ff1b. Added: Modified: clang/docs/AMDGPUSupport.rst clang/docs/HIPSupport.rst clang/include/clang/Basic/MacroBuilder.h clang/lib/Basic/Targets/AMDGPU.cpp Removed: clang/test/Driver/hip-wavefront-size-deprecation-diagnostics.hip diff --git a/clang/docs/AMDGPUSupport.rst b/clang/docs/AMDGPUSupport.rst index 3eada5f900613a..e63c0e1ba7d67b 100644 --- a/clang/docs/AMDGPUSupport.rst +++ b/clang/docs/AMDGPUSupport.rst @@ -50,9 +50,9 @@ Predefined Macros * - ``__AMDGCN_UNSAFE_FP_ATOMICS__`` - Defined if unsafe floating-point atomics are allowed. * - ``__AMDGCN_WAVEFRONT_SIZE__`` - - Defines the wavefront size. Allowed values are 32 and 64 (deprecated). + - Defines the wavefront size. Allowed values are 32 and 64. * - ``__AMDGCN_WAVEFRONT_SIZE`` - - Alias to ``__AMDGCN_WAVEFRONT_SIZE__`` (deprecated). + - Alias to ``__AMDGCN_WAVEFRONT_SIZE__``. To be deprecated. * - ``__HAS_FMAF__`` - Defined if FMAF instruction is available (deprecated). * - ``__HAS_LDEXPF__`` diff --git a/clang/docs/HIPSupport.rst b/clang/docs/HIPSupport.rst index e830acd8dd85c0..e26297c7af97ac 100644 --- a/clang/docs/HIPSupport.rst +++ b/clang/docs/HIPSupport.rst @@ -178,7 +178,7 @@ Predefined Macros Note that some architecture specific AMDGPU macros will have default values when used from the HIP host compilation. Other :doc:`AMDGPU macros ` -like ``__AMDGCN_WAVEFRONT_SIZE__`` (deprecated) will default to 64 for example. +like ``__AMDGCN_WAVEFRONT_SIZE__`` will default to 64 for example. Compilation Modes = diff --git a/clang/include/clang/Basic/MacroBuilder.h b/clang/include/clang/Basic/MacroBuilder.h index d83f27c236e3d8..96e67cbbfa3f21 100644 --- a/clang/include/clang/Basic/MacroBuilder.h +++ b/clang/include/clang/Basic/MacroBuilder.h @@ -26,14 +26,8 @@ class MacroBuilder { MacroBuilder(raw_ostream &Output) : Out(Output) {} /// Append a \#define line for macro of the form "\#define Name Value\n". - /// If DeprecationMsg is provided, also append a pragma to deprecate the - /// defined macro. - void defineMacro(const Twine &Name, const Twine &Value = "1", - Twine DeprecationMsg = "") { + void defineMacro(const Twine &Name, const Twine &Value = "1") { Out << "#define " << Name << ' ' << Value << '\n'; -if (!DeprecationMsg.isTriviallyEmpty()) - Out << "#pragma clang deprecated(" << Name << ", \"" << DeprecationMsg - << "\")\n"; } /// Append a \#undef line for Name. Name should be of the form XXX diff --git a/clang/lib/Basic/Targets/AMDGPU.cpp b/clang/lib/Basic/Targets/AMDGPU.cpp index 99f8f2944e2796..078819183afdac 100644 --- a/clang/lib/Basic/Targets/AMDGPU.cpp +++ b/clang/lib/Basic/Targets/AMDGPU.cpp @@ -337,12 +337,9 @@ void AMDGPUTargetInfo::getTargetDefines(const LangOptions &Opts, if (hasFastFMA()) Builder.defineMacro("FP_FAST_FMA"); - Builder.defineMacro("__AMDGCN_WAVEFRONT_SIZE__", Twine(WavefrontSize), - "compile-time-constant access to the wavefront size will " - "be removed in a future release"); - Builder.defineMacro("__AMDGCN_WAVEFRONT_SIZE", Twine(WavefrontSize), - "compile-time-constant access to the wavefront size will " - "be removed in a future release"); + Builder.defineMacro("__AMDGCN_WAVEFRONT_SIZE__", Twine(WavefrontSize)); + // ToDo: deprecate this macro for naming consistency. + Builder.defineMacro("__AMDGCN_WAVEFRONT_SIZE", Twine(WavefrontSize)); Builder.defineMacro("__AMDGCN_CUMODE__", Twine(CUMode)); } diff --git a/clang/test/Driver/hip-wavefront-size-deprecation-diagnostics.hip b/clang/test/Driver/hip-wavefront-size-deprecation-diagnostics.hip deleted file mode 100644 index aca591536a76c0..00 --- a/clang/test/Driver/hip-wavefront-size-deprecation-diagnostics.hip +++ /dev/null @@ -1,111 +0,0 @@ -// REQUIRES: amdgpu-registered-target -// RUN: %clang -xhip --offload-arch=gfx1030 --offload-host-only -pedantic -nogpuinc -nogpulib -nobuiltininc -fsyntax-only -Xclang -verify %s -// RUN: %clang -xhip --offload-arch=gfx1030 --offload-device-only -pedantic -nogpuinc -nogpulib -nobuiltininc -fsyntax-only -Xclang -verify %s - -// Test that deprecation warnings for the wavefront size macro are emitted properly. - -#include - -#define WRAPPED __AMDGCN_WAVEFRONT_SIZE__ - -#define DOUBLE_WRAPPED (WRAPPED) - -__attribute__((host
[llvm-branch-commits] [llvm] [BOLT] Match blocks with pseudo probes (PR #99891)
https://github.com/aaupov updated https://github.com/llvm/llvm-project/pull/99891 >From 36197b175681d07b4704e576fb008cec3cc1e05e Mon Sep 17 00:00:00 2001 From: Amir Ayupov Date: Wed, 28 Aug 2024 21:10:25 +0200 Subject: [PATCH 1/3] Reworked block probe matching Use new probe ifaces Get all function probes at once Drop ProfileUsePseudoProbes Unify matchWithBlockPseudoProbes Distinguish exact and loose probe match --- bolt/include/bolt/Core/BinaryContext.h| 20 +- bolt/lib/Passes/BinaryPasses.cpp | 40 ++- bolt/lib/Profile/StaleProfileMatching.cpp | 404 ++ bolt/lib/Rewrite/PseudoProbeRewriter.cpp | 8 +- 4 files changed, 237 insertions(+), 235 deletions(-) diff --git a/bolt/include/bolt/Core/BinaryContext.h b/bolt/include/bolt/Core/BinaryContext.h index 3e20cb607e657b..3f7b2ac0bc6cf9 100644 --- a/bolt/include/bolt/Core/BinaryContext.h +++ b/bolt/include/bolt/Core/BinaryContext.h @@ -724,14 +724,26 @@ class BinaryContext { uint32_t NumStaleBlocks{0}; /// the number of exactly matched basic blocks uint32_t NumExactMatchedBlocks{0}; -/// the number of pseudo probe matched basic blocks -uint32_t NumPseudoProbeMatchedBlocks{0}; +/// the number of loosely matched basic blocks +uint32_t NumLooseMatchedBlocks{0}; +/// the number of exactly pseudo probe matched basic blocks +uint32_t NumPseudoProbeExactMatchedBlocks{0}; +/// the number of loosely pseudo probe matched basic blocks +uint32_t NumPseudoProbeLooseMatchedBlocks{0}; +/// the number of call matched basic blocks +uint32_t NumCallMatchedBlocks{0}; /// the total count of samples in the profile uint64_t StaleSampleCount{0}; /// the count of exactly matched samples uint64_t ExactMatchedSampleCount{0}; -/// the count of pseudo probe matched samples -uint64_t PseudoProbeMatchedSampleCount{0}; +/// the count of exactly matched samples +uint64_t LooseMatchedSampleCount{0}; +/// the count of exactly pseudo probe matched samples +uint64_t PseudoProbeExactMatchedSampleCount{0}; +/// the count of loosely pseudo probe matched samples +uint64_t PseudoProbeLooseMatchedSampleCount{0}; +/// the count of call matched samples +uint64_t CallMatchedSampleCount{0}; /// the number of stale functions that have matching number of blocks in /// the profile uint64_t NumStaleFuncsWithEqualBlockCount{0}; diff --git a/bolt/lib/Passes/BinaryPasses.cpp b/bolt/lib/Passes/BinaryPasses.cpp index b786f07a6a6651..8edbd58c3ed3de 100644 --- a/bolt/lib/Passes/BinaryPasses.cpp +++ b/bolt/lib/Passes/BinaryPasses.cpp @@ -1524,15 +1524,43 @@ Error PrintProgramStats::runOnFunctions(BinaryContext &BC) { 100.0 * BC.Stats.ExactMatchedSampleCount / BC.Stats.StaleSampleCount, BC.Stats.ExactMatchedSampleCount, BC.Stats.StaleSampleCount); BC.outs() << format( -"BOLT-INFO: inference found a pseudo probe match for %.2f%% of basic " +"BOLT-INFO: inference found an exact pseudo probe match for %.2f%% of " +"basic blocks (%zu out of %zu stale) responsible for %.2f%% samples" +" (%zu out of %zu stale)\n", +100.0 * BC.Stats.NumPseudoProbeExactMatchedBlocks / +BC.Stats.NumStaleBlocks, +BC.Stats.NumPseudoProbeExactMatchedBlocks, BC.Stats.NumStaleBlocks, +100.0 * BC.Stats.PseudoProbeExactMatchedSampleCount / +BC.Stats.StaleSampleCount, +BC.Stats.PseudoProbeExactMatchedSampleCount, BC.Stats.StaleSampleCount); +BC.outs() << format( +"BOLT-INFO: inference found a loose pseudo probe match for %.2f%% of " +"basic blocks (%zu out of %zu stale) responsible for %.2f%% samples" +" (%zu out of %zu stale)\n", +100.0 * BC.Stats.NumPseudoProbeLooseMatchedBlocks / +BC.Stats.NumStaleBlocks, +BC.Stats.NumPseudoProbeLooseMatchedBlocks, BC.Stats.NumStaleBlocks, +100.0 * BC.Stats.PseudoProbeLooseMatchedSampleCount / +BC.Stats.StaleSampleCount, +BC.Stats.PseudoProbeLooseMatchedSampleCount, BC.Stats.StaleSampleCount); +BC.outs() << format( +"BOLT-INFO: inference found a call match for %.2f%% of basic " "blocks" " (%zu out of %zu stale) responsible for %.2f%% samples" " (%zu out of %zu stale)\n", -100.0 * BC.Stats.NumPseudoProbeMatchedBlocks / BC.Stats.NumStaleBlocks, -BC.Stats.NumPseudoProbeMatchedBlocks, BC.Stats.NumStaleBlocks, -100.0 * BC.Stats.PseudoProbeMatchedSampleCount / -BC.Stats.StaleSampleCount, -BC.Stats.PseudoProbeMatchedSampleCount, BC.Stats.StaleSampleCount); +100.0 * BC.Stats.NumCallMatchedBlocks / BC.Stats.NumStaleBlocks, +BC.Stats.NumCallMatchedBlocks, BC.Stats.NumStaleBlocks, +100.0 * BC.Stats.CallMatchedSampleCount / BC.Stats.StaleSampleCount, +BC.Stats.CallMatchedSampleCount, BC.Stats.StaleSampleCount); +BC
[llvm-branch-commits] [llvm] [BOLT] Match blocks with pseudo probes (PR #99891)
https://github.com/aaupov updated https://github.com/llvm/llvm-project/pull/99891 >From 36197b175681d07b4704e576fb008cec3cc1e05e Mon Sep 17 00:00:00 2001 From: Amir Ayupov Date: Wed, 28 Aug 2024 21:10:25 +0200 Subject: [PATCH 1/3] Reworked block probe matching Use new probe ifaces Get all function probes at once Drop ProfileUsePseudoProbes Unify matchWithBlockPseudoProbes Distinguish exact and loose probe match --- bolt/include/bolt/Core/BinaryContext.h| 20 +- bolt/lib/Passes/BinaryPasses.cpp | 40 ++- bolt/lib/Profile/StaleProfileMatching.cpp | 404 ++ bolt/lib/Rewrite/PseudoProbeRewriter.cpp | 8 +- 4 files changed, 237 insertions(+), 235 deletions(-) diff --git a/bolt/include/bolt/Core/BinaryContext.h b/bolt/include/bolt/Core/BinaryContext.h index 3e20cb607e657b..3f7b2ac0bc6cf9 100644 --- a/bolt/include/bolt/Core/BinaryContext.h +++ b/bolt/include/bolt/Core/BinaryContext.h @@ -724,14 +724,26 @@ class BinaryContext { uint32_t NumStaleBlocks{0}; /// the number of exactly matched basic blocks uint32_t NumExactMatchedBlocks{0}; -/// the number of pseudo probe matched basic blocks -uint32_t NumPseudoProbeMatchedBlocks{0}; +/// the number of loosely matched basic blocks +uint32_t NumLooseMatchedBlocks{0}; +/// the number of exactly pseudo probe matched basic blocks +uint32_t NumPseudoProbeExactMatchedBlocks{0}; +/// the number of loosely pseudo probe matched basic blocks +uint32_t NumPseudoProbeLooseMatchedBlocks{0}; +/// the number of call matched basic blocks +uint32_t NumCallMatchedBlocks{0}; /// the total count of samples in the profile uint64_t StaleSampleCount{0}; /// the count of exactly matched samples uint64_t ExactMatchedSampleCount{0}; -/// the count of pseudo probe matched samples -uint64_t PseudoProbeMatchedSampleCount{0}; +/// the count of exactly matched samples +uint64_t LooseMatchedSampleCount{0}; +/// the count of exactly pseudo probe matched samples +uint64_t PseudoProbeExactMatchedSampleCount{0}; +/// the count of loosely pseudo probe matched samples +uint64_t PseudoProbeLooseMatchedSampleCount{0}; +/// the count of call matched samples +uint64_t CallMatchedSampleCount{0}; /// the number of stale functions that have matching number of blocks in /// the profile uint64_t NumStaleFuncsWithEqualBlockCount{0}; diff --git a/bolt/lib/Passes/BinaryPasses.cpp b/bolt/lib/Passes/BinaryPasses.cpp index b786f07a6a6651..8edbd58c3ed3de 100644 --- a/bolt/lib/Passes/BinaryPasses.cpp +++ b/bolt/lib/Passes/BinaryPasses.cpp @@ -1524,15 +1524,43 @@ Error PrintProgramStats::runOnFunctions(BinaryContext &BC) { 100.0 * BC.Stats.ExactMatchedSampleCount / BC.Stats.StaleSampleCount, BC.Stats.ExactMatchedSampleCount, BC.Stats.StaleSampleCount); BC.outs() << format( -"BOLT-INFO: inference found a pseudo probe match for %.2f%% of basic " +"BOLT-INFO: inference found an exact pseudo probe match for %.2f%% of " +"basic blocks (%zu out of %zu stale) responsible for %.2f%% samples" +" (%zu out of %zu stale)\n", +100.0 * BC.Stats.NumPseudoProbeExactMatchedBlocks / +BC.Stats.NumStaleBlocks, +BC.Stats.NumPseudoProbeExactMatchedBlocks, BC.Stats.NumStaleBlocks, +100.0 * BC.Stats.PseudoProbeExactMatchedSampleCount / +BC.Stats.StaleSampleCount, +BC.Stats.PseudoProbeExactMatchedSampleCount, BC.Stats.StaleSampleCount); +BC.outs() << format( +"BOLT-INFO: inference found a loose pseudo probe match for %.2f%% of " +"basic blocks (%zu out of %zu stale) responsible for %.2f%% samples" +" (%zu out of %zu stale)\n", +100.0 * BC.Stats.NumPseudoProbeLooseMatchedBlocks / +BC.Stats.NumStaleBlocks, +BC.Stats.NumPseudoProbeLooseMatchedBlocks, BC.Stats.NumStaleBlocks, +100.0 * BC.Stats.PseudoProbeLooseMatchedSampleCount / +BC.Stats.StaleSampleCount, +BC.Stats.PseudoProbeLooseMatchedSampleCount, BC.Stats.StaleSampleCount); +BC.outs() << format( +"BOLT-INFO: inference found a call match for %.2f%% of basic " "blocks" " (%zu out of %zu stale) responsible for %.2f%% samples" " (%zu out of %zu stale)\n", -100.0 * BC.Stats.NumPseudoProbeMatchedBlocks / BC.Stats.NumStaleBlocks, -BC.Stats.NumPseudoProbeMatchedBlocks, BC.Stats.NumStaleBlocks, -100.0 * BC.Stats.PseudoProbeMatchedSampleCount / -BC.Stats.StaleSampleCount, -BC.Stats.PseudoProbeMatchedSampleCount, BC.Stats.StaleSampleCount); +100.0 * BC.Stats.NumCallMatchedBlocks / BC.Stats.NumStaleBlocks, +BC.Stats.NumCallMatchedBlocks, BC.Stats.NumStaleBlocks, +100.0 * BC.Stats.CallMatchedSampleCount / BC.Stats.StaleSampleCount, +BC.Stats.CallMatchedSampleCount, BC.Stats.StaleSampleCount); +BC
[llvm-branch-commits] [clang] [Serialization] Code cleanups and polish 83233 (PR #83237)
ChuanqiXu9 wrote: @vgvassilev @ilya-biryukov @alexfh If I read correctly, the only blocker issue is the above reported performance issue. And I tried to split partial specialization from the full specialization table to avoid merging tables again and again. Could you please take another round of testing? Thanks in ahead. https://github.com/llvm/llvm-project/pull/83237 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] [Serialization] Code cleanups and polish 83233 (PR #83237)
ChuanqiXu9 wrote: Sorry for the not good stacked PR operation. https://github.com/llvm/llvm-project/pull/83237 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] [Serialization] Code cleanups and polish 83233 (PR #83237)
https://github.com/ChuanqiXu9 updated https://github.com/llvm/llvm-project/pull/83237 >From 493cb7cdafeb22efa9e9f3c1954b6de1e2665365 Mon Sep 17 00:00:00 2001 From: Chuanqi Xu Date: Wed, 28 Feb 2024 11:41:53 +0800 Subject: [PATCH 1/2] [Serialization] Code cleanups and polish 83233 --- clang/include/clang/AST/DeclTemplate.h| 32 +- clang/include/clang/AST/ExternalASTSource.h | 8 +- .../clang/Sema/MultiplexExternalSemaSource.h | 4 +- .../include/clang/Serialization/ASTBitCodes.h | 2 + clang/include/clang/Serialization/ASTReader.h | 4 +- clang/lib/AST/DeclTemplate.cpp| 85 ++-- clang/lib/AST/ExternalASTSource.cpp | 10 +- clang/lib/AST/ODRHash.cpp | 10 - .../lib/Sema/MultiplexExternalSemaSource.cpp | 13 +- clang/lib/Serialization/ASTCommon.h | 1 - clang/lib/Serialization/ASTReader.cpp | 41 +- clang/lib/Serialization/ASTReaderDecl.cpp | 80 +--- clang/lib/Serialization/ASTReaderInternals.h | 3 +- clang/lib/Serialization/ASTWriter.cpp | 49 +-- clang/lib/Serialization/ASTWriterDecl.cpp | 53 +-- clang/lib/Serialization/CMakeLists.txt| 1 + .../Serialization/TemplateArgumentHasher.cpp | 409 ++ .../Serialization/TemplateArgumentHasher.h| 34 ++ clang/test/Modules/cxx-templates.cpp | 8 +- .../Modules/recursive-instantiations.cppm | 40 ++ .../test/OpenMP/target_parallel_ast_print.cpp | 4 - clang/test/OpenMP/target_teams_ast_print.cpp | 4 - clang/test/OpenMP/task_ast_print.cpp | 4 - clang/test/OpenMP/teams_ast_print.cpp | 4 - 24 files changed, 616 insertions(+), 287 deletions(-) create mode 100644 clang/lib/Serialization/TemplateArgumentHasher.cpp create mode 100644 clang/lib/Serialization/TemplateArgumentHasher.h create mode 100644 clang/test/Modules/recursive-instantiations.cppm diff --git a/clang/include/clang/AST/DeclTemplate.h b/clang/include/clang/AST/DeclTemplate.h index 7d707ed45cf5f6..336340e6dc6641 100644 --- a/clang/include/clang/AST/DeclTemplate.h +++ b/clang/include/clang/AST/DeclTemplate.h @@ -262,9 +262,6 @@ class TemplateArgumentList final TemplateArgumentList(const TemplateArgumentList &) = delete; TemplateArgumentList &operator=(const TemplateArgumentList &) = delete; - /// Create hash for the given arguments. - static unsigned ComputeODRHash(ArrayRef Args); - /// Create a new template argument list that copies the given set of /// template arguments. static TemplateArgumentList *CreateCopy(ASTContext &Context, @@ -738,25 +735,6 @@ class RedeclarableTemplateDecl : public TemplateDecl, } void anchor() override; - struct LazySpecializationInfo { -GlobalDeclID DeclID = GlobalDeclID(); -unsigned ODRHash = ~0U; -bool IsPartial = false; -LazySpecializationInfo(GlobalDeclID ID, unsigned Hash = ~0U, - bool Partial = false) -: DeclID(ID), ODRHash(Hash), IsPartial(Partial) {} -LazySpecializationInfo() {} -bool operator<(const LazySpecializationInfo &Other) const { - return DeclID < Other.DeclID; -} -bool operator==(const LazySpecializationInfo &Other) const { - assert((DeclID != Other.DeclID || ODRHash == Other.ODRHash) && - "Hashes differ!"); - assert((DeclID != Other.DeclID || IsPartial == Other.IsPartial) && - "Both must be the same kinds!"); - return DeclID == Other.DeclID; -} - }; protected: template struct SpecEntryTraits { @@ -800,16 +778,20 @@ class RedeclarableTemplateDecl : public TemplateDecl, void loadLazySpecializationsImpl(bool OnlyPartial = false) const; - void loadLazySpecializationsImpl(llvm::ArrayRef Args, + bool loadLazySpecializationsImpl(llvm::ArrayRef Args, TemplateParameterList *TPL = nullptr) const; - Decl *loadLazySpecializationImpl(LazySpecializationInfo &LazySpecInfo) const; - template typename SpecEntryTraits::DeclType* findSpecializationImpl(llvm::FoldingSetVector &Specs, void *&InsertPos, ProfileArguments &&...ProfileArgs); + template + typename SpecEntryTraits::DeclType * + findSpecializationLocally(llvm::FoldingSetVector &Specs, +void *&InsertPos, +ProfileArguments &&...ProfileArgs); + template void addSpecializationImpl(llvm::FoldingSetVector &Specs, EntryType *Entry, void *InsertPos); diff --git a/clang/include/clang/AST/ExternalASTSource.h b/clang/include/clang/AST/ExternalASTSource.h index 5f4f9a9a8d681e..9f968ba05b4466 100644 --- a/clang/include/clang/AST/ExternalASTSource.h +++ b/clang/include/clang/AST/ExternalASTSource.h @@ -155,11 +155,15 @@ class ExternalASTSource : public RefCountedBase { /// Load all the external specializations for the Decl \param D if \param /// OnlyPartial is false. Otherwise, load all the external **
[llvm-branch-commits] [mlir] [mlir][IR] Add helper functions to compute insertion point (PR #114940)
@@ -641,3 +641,76 @@ void OpBuilder::cloneRegionBefore(Region ®ion, Region &parent, void OpBuilder::cloneRegionBefore(Region ®ion, Block *before) { cloneRegionBefore(region, *before->getParent(), before->getIterator()); } + +//===--===// +// InsertPoint +//===--===// + +OpBuilder::InsertPoint OpBuilder::InsertPoint::after(Value value) { + if (auto blockArg = dyn_cast(value)) +return InsertPoint(blockArg.getOwner(), blockArg.getOwner()->begin()); + Operation *op = value.getDefiningOp(); + return InsertPoint(op->getBlock(), ++op->getIterator()); +} + +/// Helper function that returns "true" if: +/// - `a` is a proper ancestor of `b` +/// - or: there is a path from `a` to `b` +static bool isAncestorOrBefore(Block *a, Block *b) { + if (a->getParentOp()->isProperAncestor(b->getParentOp())) +return true; + if (a->getParent() != b->getParent()) +return false; + return a->isReachable(b); +} + +OpBuilder::InsertPoint +OpBuilder::InsertPoint::findClosest(ArrayRef values) { + // Compute the insertion point after the first value. + assert(!values.empty() && "expected at least one value"); + InsertPoint result = InsertPoint::after(values.front()); + + // Check all other values and update the insertion point as needed. + for (Value v : values.drop_front()) { +InsertPoint pt = InsertPoint::after(v); + +if (pt.getBlock() == result.getBlock()) { + // Both values belong to the same block. Modify the iterator (but keep + // the block) if needed: take the later one of the two insertion points. + Block *block = pt.getBlock(); + if (pt.getPoint() == block->end()) { +// `pt` points to the end of the block: take `pt`. +result = pt; +continue; + } else if (result.getPoint() == block->end()) { +// `result` points to the end of the block: nothing to do. +continue; + } + // Neither `pt` nor `result` point to the end of the block, so both + // iterators point to an operation. Set `result` to the later one of the + // two insertion point. + if (result.getPoint()->isBeforeInBlock(&*pt.getPoint())) +result = pt; + continue; +} + +if (isAncestorOrBefore(result.getBlock(), pt.getBlock())) { + // `result` is an ancestor of `pt`. Therefore, `pt` is a valid insertion + // point for `v` and all previous values. + result = pt; + continue; +} + +if (isAncestorOrBefore(pt.getBlock(), result.getBlock())) { + // `pt` is an ancestor of `result`. Therefore, `result` is a valid + // insertion point for `v` and all previous values. + continue; +} + +// `pt` and `result` are in different subtrees: neither is an ancestor of +// the other. In that case, there is no suitable insertion point. joker-eph wrote: Wouldn't the right insertion point be the post-dominator block? https://github.com/llvm/llvm-project/pull/114940 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] [clang][CIR] Merge the mlir::cir namespace into cir (PR #115386)
https://github.com/AaronBallman approved this pull request. LGTM https://github.com/llvm/llvm-project/pull/115386 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] [clang][CIR] Merge the mlir::cir namespace into cir (PR #115386)
https://github.com/erichkeane approved this pull request. https://github.com/llvm/llvm-project/pull/115386 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [mlir] [mlir][IR] Add helper functions to compute insertion point (PR #114940)
@@ -334,6 +334,18 @@ class OpBuilder : public Builder { /// This class represents a saved insertion point. class InsertPoint { public: +/// Finds the closest insertion point where all given values are defined joker-eph wrote: It's not clear to me that this is a well defined problem: what does "closest means" here? https://github.com/llvm/llvm-project/pull/114940 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] [Serialization] Introduce OnDiskHashTable for specializations (PR #83233)
https://github.com/ChuanqiXu9 updated https://github.com/llvm/llvm-project/pull/83233 >From f565dd3f156bbdf608be6643208c40f02b4f0e83 Mon Sep 17 00:00:00 2001 From: Chuanqi Xu Date: Wed, 28 Feb 2024 11:41:53 +0800 Subject: [PATCH] [Serialization] Introduce OnDiskHashTable for specializations Following up for https://github.com/llvm/llvm-project/pull/83108 This follows the suggestion literally from https://github.com/llvm/llvm-project/pull/76774#issuecomment-1951172457 which introduces OnDiskHashTable for specializations based on D41416. Note that I didn't polish this patch to reduce the diff from D41416 to it easier to review. I'll make the polishing patch later. So that we can focus what we're doing in this patch and focus on the style in the next patch. --- clang/include/clang/AST/ExternalASTSource.h | 11 + .../clang/Sema/MultiplexExternalSemaSource.h | 6 + .../include/clang/Serialization/ASTBitCodes.h | 6 + clang/include/clang/Serialization/ASTReader.h | 34 ++- clang/include/clang/Serialization/ASTWriter.h | 15 + clang/lib/AST/DeclTemplate.cpp| 17 ++ clang/lib/AST/ExternalASTSource.cpp | 5 + .../lib/Sema/MultiplexExternalSemaSource.cpp | 12 + clang/lib/Serialization/ASTReader.cpp | 145 +- clang/lib/Serialization/ASTReaderDecl.cpp | 27 ++ clang/lib/Serialization/ASTReaderInternals.h | 124 + clang/lib/Serialization/ASTWriter.cpp | 174 +++- clang/lib/Serialization/ASTWriterDecl.cpp | 32 ++- clang/unittests/Serialization/CMakeLists.txt | 1 + .../Serialization/LoadSpecLazilyTest.cpp | 260 ++ 15 files changed, 856 insertions(+), 13 deletions(-) create mode 100644 clang/unittests/Serialization/LoadSpecLazilyTest.cpp diff --git a/clang/include/clang/AST/ExternalASTSource.h b/clang/include/clang/AST/ExternalASTSource.h index 582ed7c65f58ca..5f4f9a9a8d681e 100644 --- a/clang/include/clang/AST/ExternalASTSource.h +++ b/clang/include/clang/AST/ExternalASTSource.h @@ -152,6 +152,17 @@ class ExternalASTSource : public RefCountedBase { virtual bool FindExternalVisibleDeclsByName(const DeclContext *DC, DeclarationName Name); + /// Load all the external specializations for the Decl \param D if \param + /// OnlyPartial is false. Otherwise, load all the external **partial** + /// specializations for the \param D. + virtual void LoadExternalSpecializations(const Decl *D, bool OnlyPartial); + + /// Load all the specializations for the Decl \param D with the same template + /// args specified by \param TemplateArgs. + virtual void + LoadExternalSpecializations(const Decl *D, + ArrayRef TemplateArgs); + /// Ensures that the table of all visible declarations inside this /// context is up to date. /// diff --git a/clang/include/clang/Sema/MultiplexExternalSemaSource.h b/clang/include/clang/Sema/MultiplexExternalSemaSource.h index 3d1906d8699265..78bbbaf2d7b5c6 100644 --- a/clang/include/clang/Sema/MultiplexExternalSemaSource.h +++ b/clang/include/clang/Sema/MultiplexExternalSemaSource.h @@ -97,6 +97,12 @@ class MultiplexExternalSemaSource : public ExternalSemaSource { bool FindExternalVisibleDeclsByName(const DeclContext *DC, DeclarationName Name) override; + void LoadExternalSpecializations(const Decl *D, bool OnlyPartial) override; + + void + LoadExternalSpecializations(const Decl *D, + ArrayRef TemplateArgs) override; + /// Ensures that the table of all visible declarations inside this /// context is up to date. void completeVisibleDeclsMap(const DeclContext *DC) override; diff --git a/clang/include/clang/Serialization/ASTBitCodes.h b/clang/include/clang/Serialization/ASTBitCodes.h index e397dff097652b..405954ba922b5e 100644 --- a/clang/include/clang/Serialization/ASTBitCodes.h +++ b/clang/include/clang/Serialization/ASTBitCodes.h @@ -734,6 +734,9 @@ enum ASTRecordTypes { /// Record code for Sema's vector of functions/blocks with effects to /// be verified. DECLS_WITH_EFFECTS_TO_VERIFY = 72, + + /// Record code for updated specialization + UPDATE_SPECIALIZATION = 73, }; /// Record types used within a source manager block. @@ -1500,6 +1503,9 @@ enum DeclCode { /// A HLSLBufferDecl record. DECL_HLSL_BUFFER, + // A decls specilization record. + DECL_SPECIALIZATIONS, + /// An ImplicitConceptSpecializationDecl record. DECL_IMPLICIT_CONCEPT_SPECIALIZATION, diff --git a/clang/include/clang/Serialization/ASTReader.h b/clang/include/clang/Serialization/ASTReader.h index b476a40ebd2c8c..84bb25754f526f 100644 --- a/clang/include/clang/Serialization/ASTReader.h +++ b/clang/include/clang/Serialization/ASTReader.h @@ -353,6 +353,9 @@ class ASTIdentifierLookupTrait; /// The on-disk hash table(s) used for DeclContext name lookup. struct DeclContextLookupTable; +/// The on-disk hash table(s) used for specialization decls. +
[llvm-branch-commits] [clang] [Serialization] Introduce OnDiskHashTable for specializations (PR #83233)
https://github.com/ChuanqiXu9 updated https://github.com/llvm/llvm-project/pull/83233 >From 11726437efb760c9f2aba9b2258337b2b8eb4bb6 Mon Sep 17 00:00:00 2001 From: Chuanqi Xu Date: Fri, 8 Nov 2024 17:19:33 +0800 Subject: [PATCH] [Serialization] Introduce OnDiskHashTable for specializations --- clang/include/clang/AST/ExternalASTSource.h | 11 + .../clang/Sema/MultiplexExternalSemaSource.h | 6 + .../include/clang/Serialization/ASTBitCodes.h | 6 + clang/include/clang/Serialization/ASTReader.h | 34 ++- clang/include/clang/Serialization/ASTWriter.h | 14 + clang/lib/AST/DeclTemplate.cpp| 17 ++ clang/lib/AST/ExternalASTSource.cpp | 5 + .../lib/Sema/MultiplexExternalSemaSource.cpp | 12 + clang/lib/Serialization/ASTReader.cpp | 145 +- clang/lib/Serialization/ASTReaderDecl.cpp | 27 ++ clang/lib/Serialization/ASTReaderInternals.h | 124 + clang/lib/Serialization/ASTWriter.cpp | 174 +++- clang/lib/Serialization/ASTWriterDecl.cpp | 32 ++- clang/unittests/Serialization/CMakeLists.txt | 1 + .../Serialization/LoadSpecLazilyTest.cpp | 260 ++ 15 files changed, 855 insertions(+), 13 deletions(-) create mode 100644 clang/unittests/Serialization/LoadSpecLazilyTest.cpp diff --git a/clang/include/clang/AST/ExternalASTSource.h b/clang/include/clang/AST/ExternalASTSource.h index 582ed7c65f58ca..5f4f9a9a8d681e 100644 --- a/clang/include/clang/AST/ExternalASTSource.h +++ b/clang/include/clang/AST/ExternalASTSource.h @@ -152,6 +152,17 @@ class ExternalASTSource : public RefCountedBase { virtual bool FindExternalVisibleDeclsByName(const DeclContext *DC, DeclarationName Name); + /// Load all the external specializations for the Decl \param D if \param + /// OnlyPartial is false. Otherwise, load all the external **partial** + /// specializations for the \param D. + virtual void LoadExternalSpecializations(const Decl *D, bool OnlyPartial); + + /// Load all the specializations for the Decl \param D with the same template + /// args specified by \param TemplateArgs. + virtual void + LoadExternalSpecializations(const Decl *D, + ArrayRef TemplateArgs); + /// Ensures that the table of all visible declarations inside this /// context is up to date. /// diff --git a/clang/include/clang/Sema/MultiplexExternalSemaSource.h b/clang/include/clang/Sema/MultiplexExternalSemaSource.h index 3d1906d8699265..78bbbaf2d7b5c6 100644 --- a/clang/include/clang/Sema/MultiplexExternalSemaSource.h +++ b/clang/include/clang/Sema/MultiplexExternalSemaSource.h @@ -97,6 +97,12 @@ class MultiplexExternalSemaSource : public ExternalSemaSource { bool FindExternalVisibleDeclsByName(const DeclContext *DC, DeclarationName Name) override; + void LoadExternalSpecializations(const Decl *D, bool OnlyPartial) override; + + void + LoadExternalSpecializations(const Decl *D, + ArrayRef TemplateArgs) override; + /// Ensures that the table of all visible declarations inside this /// context is up to date. void completeVisibleDeclsMap(const DeclContext *DC) override; diff --git a/clang/include/clang/Serialization/ASTBitCodes.h b/clang/include/clang/Serialization/ASTBitCodes.h index 3b14a0b8203315..cb3ed6c1ecbb7c 100644 --- a/clang/include/clang/Serialization/ASTBitCodes.h +++ b/clang/include/clang/Serialization/ASTBitCodes.h @@ -734,6 +734,9 @@ enum ASTRecordTypes { /// Record code for Sema's vector of functions/blocks with effects to /// be verified. DECLS_WITH_EFFECTS_TO_VERIFY = 72, + + /// Record code for updated specialization + UPDATE_SPECIALIZATION = 73, }; /// Record types used within a source manager block. @@ -1500,6 +1503,9 @@ enum DeclCode { /// A HLSLBufferDecl record. DECL_HLSL_BUFFER, + // A decls specilization record. + DECL_SPECIALIZATIONS, + /// An ImplicitConceptSpecializationDecl record. DECL_IMPLICIT_CONCEPT_SPECIALIZATION, diff --git a/clang/include/clang/Serialization/ASTReader.h b/clang/include/clang/Serialization/ASTReader.h index 9c274adc59a207..6306d4f08e81fa 100644 --- a/clang/include/clang/Serialization/ASTReader.h +++ b/clang/include/clang/Serialization/ASTReader.h @@ -354,6 +354,9 @@ class ASTIdentifierLookupTrait; /// The on-disk hash table(s) used for DeclContext name lookup. struct DeclContextLookupTable; +/// The on-disk hash table(s) used for specialization decls. +struct LazySpecializationInfoLookupTable; + } // namespace reader } // namespace serialization @@ -632,20 +635,29 @@ class ASTReader llvm::DenseMap Lookups; + /// Map from decls to specialized decls. + llvm::DenseMap + SpecializationsLookups; + // Updates for visible decls can occur for other contexts than just the // TU, and when we read those update records, the actual context may not // be available yet, so have this pending map using the ID as
[llvm-branch-commits] [libcxx] [libc++] Stop copying headers to the build directory (PR #115380)
https://github.com/ldionne edited https://github.com/llvm/llvm-project/pull/115380 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [libcxx] [libc++] Stop copying headers to the build directory (PR #115380)
https://github.com/ldionne approved this pull request. This looks great, this is a really nice cleanup. Thank you! Let's just wait until we know what to do with the C++03 CMakeLists but I'm OK with merging this once that's settled, if CI is green. https://github.com/llvm/llvm-project/pull/115380 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [libcxx] [libc++] Stop copying headers to the build directory (PR #115380)
@@ -1043,17 +1043,8 @@ set(files configure_file("__config_site.in" "${LIBCXX_GENERATED_INCLUDE_TARGET_DIR}/__config_site" @ONLY) configure_file("${LIBCXX_ASSERTION_HANDLER_FILE}" "${LIBCXX_GENERATED_INCLUDE_DIR}/__assertion_handler" COPYONLY) -set(_all_includes "${LIBCXX_GENERATED_INCLUDE_TARGET_DIR}/__config_site" +set(_generated_includes "${LIBCXX_GENERATED_INCLUDE_TARGET_DIR}/__config_site" "${LIBCXX_GENERATED_INCLUDE_DIR}/__assertion_handler") ldionne wrote: ```suggestion "${LIBCXX_GENERATED_INCLUDE_DIR}/__assertion_handler") ``` Just to align with the line above. https://github.com/llvm/llvm-project/pull/115380 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [libcxx] [libc++] Stop copying headers to the build directory (PR #115380)
@@ -1021,17 +1021,8 @@ set(files configure_file("__config_site.in" "${LIBCXX_GENERATED_INCLUDE_TARGET_DIR}/__config_site" @ONLY) ldionne wrote: @philnik777 Should be be modifying the C++03 `CMakeLists.txt` at all? https://github.com/llvm/llvm-project/pull/115380 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [flang] [flang][OpenMP] Semantic checks for DOACROSS clause (PR #115397)
https://github.com/kparzysz updated https://github.com/llvm/llvm-project/pull/115397 >From 4165254e8c4a7b572e741cb62d632b462537dc0f Mon Sep 17 00:00:00 2001 From: Krzysztof Parzyszek Date: Tue, 5 Nov 2024 12:01:43 -0600 Subject: [PATCH 1/4] [flang][OpenMP] Semantic checks for DOACROSS clause Keep track of loop constructs and OpenMP loop constructs that have been entered. Use the information to validate the variables in the SINK loop iteration vector. --- flang/lib/Lower/OpenMP/Clauses.cpp | 28 ++-- flang/lib/Semantics/check-omp-structure.cpp | 141 ++-- flang/lib/Semantics/check-omp-structure.h | 25 +++- flang/lib/Semantics/resolve-directives.cpp | 12 +- flang/test/Lower/OpenMP/Todo/ordered.f90| 20 +++ flang/test/Semantics/OpenMP/doacross.f90| 28 flang/test/Semantics/OpenMP/ordered01.f90 | 4 +- flang/test/Semantics/OpenMP/ordered03.f90 | 2 + 8 files changed, 230 insertions(+), 30 deletions(-) create mode 100644 flang/test/Lower/OpenMP/Todo/ordered.f90 create mode 100644 flang/test/Semantics/OpenMP/doacross.f90 diff --git a/flang/lib/Lower/OpenMP/Clauses.cpp b/flang/lib/Lower/OpenMP/Clauses.cpp index f6633dd53f6f23..1764b3b79b4e34 100644 --- a/flang/lib/Lower/OpenMP/Clauses.cpp +++ b/flang/lib/Lower/OpenMP/Clauses.cpp @@ -574,20 +574,17 @@ Defaultmap make(const parser::OmpClause::Defaultmap &inp, /*VariableCategory=*/maybeApply(convert2, t1)}}; } -Depend make(const parser::OmpClause::Depend &inp, -semantics::SemanticsContext &semaCtx) { - // inp.v -> parser::OmpDependClause - using wrapped = parser::OmpDependClause; - using Variant = decltype(Depend::u); +Doacross makeDoacross(const parser::OmpDoacross &doa, + semantics::SemanticsContext &semaCtx) { // Iteration is the equivalent of parser::OmpIteration using Iteration = Doacross::Vector::value_type; // LoopIterationT - auto visitSource = [&](const parser::OmpDoacross::Source &) -> Variant { + auto visitSource = [&](const parser::OmpDoacross::Source &) { return Doacross{{/*DependenceType=*/Doacross::DependenceType::Source, /*Vector=*/{}}}; }; - auto visitSink = [&](const parser::OmpDoacross::Sink &s) -> Variant { + auto visitSink = [&](const parser::OmpDoacross::Sink &s) { using IterOffset = parser::OmpIterationOffset; auto convert2 = [&](const parser::OmpIteration &v) { auto &t0 = std::get(v.t); @@ -605,6 +602,15 @@ Depend make(const parser::OmpClause::Depend &inp, /*Vector=*/makeList(s.v.v, convert2)}}; }; + return common::visit(common::visitors{visitSink, visitSource}, doa.u); +} + +Depend make(const parser::OmpClause::Depend &inp, +semantics::SemanticsContext &semaCtx) { + // inp.v -> parser::OmpDependClause + using wrapped = parser::OmpDependClause; + using Variant = decltype(Depend::u); + auto visitTaskDep = [&](const wrapped::TaskDep &s) -> Variant { auto &t0 = std::get>(s.t); auto &t1 = std::get(s.t); @@ -617,11 +623,11 @@ Depend make(const parser::OmpClause::Depend &inp, /*LocatorList=*/makeObjects(t2, semaCtx)}}; }; - return Depend{Fortran::common::visit( // + return Depend{common::visit( // common::visitors{ // Doacross [&](const parser::OmpDoacross &s) -> Variant { -return common::visit(common::visitors{visitSink, visitSource}, s.u); +return makeDoacross(s, semaCtx); }, // Depend::TaskDep visitTaskDep, @@ -692,8 +698,8 @@ DistSchedule make(const parser::OmpClause::DistSchedule &inp, Doacross make(const parser::OmpClause::Doacross &inp, semantics::SemanticsContext &semaCtx) { - // inp -> empty - llvm_unreachable("Empty: doacross"); + // inp.v -> OmpDoacrossClause + return makeDoacross(inp.v.v, semaCtx); } // DynamicAllocators: empty diff --git a/flang/lib/Semantics/check-omp-structure.cpp b/flang/lib/Semantics/check-omp-structure.cpp index 132fb6484bcfc5..67360b983a7d19 100644 --- a/flang/lib/Semantics/check-omp-structure.cpp +++ b/flang/lib/Semantics/check-omp-structure.cpp @@ -541,6 +541,7 @@ void OmpStructureChecker::Leave(const parser::OpenMPConstruct &) { } void OmpStructureChecker::Enter(const parser::OpenMPLoopConstruct &x) { + loopStack_.push_back(&x); const auto &beginLoopDir{std::get(x.t)}; const auto &beginDir{std::get(beginLoopDir.t)}; @@ -933,11 +934,19 @@ void OmpStructureChecker::CheckDistLinear( } } -void OmpStructureChecker::Leave(const parser::OpenMPLoopConstruct &) { +void OmpStructureChecker::Leave(const parser::OpenMPLoopConstruct &x) { if (llvm::omp::allSimdSet.test(GetContext().directive)) { ExitDirectiveNest(SIMDNest); } dirContext_.pop_back(); + + assert(!loopStack_.empty() && "Expecting non-empty loop stack"); + const LoopConstruct &top = loopStack_.back(); +#ifndef NDEBUG + auto *loopc = std
[llvm-branch-commits] [llvm] AMDGPU/GlobalISel: RegBankLegalize rules for load (PR #112882)
https://github.com/petar-avramovic updated https://github.com/llvm/llvm-project/pull/112882 >From b7366209b93a07f286842f31bb625ca321b47df4 Mon Sep 17 00:00:00 2001 From: Petar Avramovic Date: Wed, 30 Oct 2024 15:37:59 +0100 Subject: [PATCH] AMDGPU/GlobalISel: RegBankLegalize rules for load Add IDs for bit width that cover multiple LLTs: B32 B64 etc. "Predicate" wrapper class for bool predicate functions used to write pretty rules. Predicates can be combined using &&, || and !. Lowering for splitting and widening loads. Write rules for loads to not change existing mir tests from old regbankselect. --- .../AMDGPU/AMDGPURegBankLegalizeHelper.cpp| 284 +++- .../AMDGPU/AMDGPURegBankLegalizeHelper.h | 5 + .../AMDGPU/AMDGPURegBankLegalizeRules.cpp | 309 - .../AMDGPU/AMDGPURegBankLegalizeRules.h | 65 +++- .../AMDGPU/GlobalISel/regbankselect-load.mir | 320 +++--- .../GlobalISel/regbankselect-zextload.mir | 9 +- 6 files changed, 927 insertions(+), 65 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeHelper.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeHelper.cpp index 916140e2bbcd68..5c4195cb15fb2c 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeHelper.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeHelper.cpp @@ -38,6 +38,83 @@ void RegBankLegalizeHelper::findRuleAndApplyMapping(MachineInstr &MI) { lower(MI, Mapping, WaterfallSgprs); } +void RegBankLegalizeHelper::splitLoad(MachineInstr &MI, + ArrayRef LLTBreakdown, LLT MergeTy) { + MachineFunction &MF = B.getMF(); + assert(MI.getNumMemOperands() == 1); + MachineMemOperand &BaseMMO = **MI.memoperands_begin(); + Register Dst = MI.getOperand(0).getReg(); + const RegisterBank *DstRB = MRI.getRegBankOrNull(Dst); + Register Base = MI.getOperand(1).getReg(); + LLT PtrTy = MRI.getType(Base); + const RegisterBank *PtrRB = MRI.getRegBankOrNull(Base); + LLT OffsetTy = LLT::scalar(PtrTy.getSizeInBits()); + SmallVector LoadPartRegs; + + unsigned ByteOffset = 0; + for (LLT PartTy : LLTBreakdown) { +Register BasePlusOffset; +if (ByteOffset == 0) { + BasePlusOffset = Base; +} else { + auto Offset = B.buildConstant({PtrRB, OffsetTy}, ByteOffset); + BasePlusOffset = B.buildPtrAdd({PtrRB, PtrTy}, Base, Offset).getReg(0); +} +auto *OffsetMMO = MF.getMachineMemOperand(&BaseMMO, ByteOffset, PartTy); +auto LoadPart = B.buildLoad({DstRB, PartTy}, BasePlusOffset, *OffsetMMO); +LoadPartRegs.push_back(LoadPart.getReg(0)); +ByteOffset += PartTy.getSizeInBytes(); + } + + if (!MergeTy.isValid()) { +// Loads are of same size, concat or merge them together. +B.buildMergeLikeInstr(Dst, LoadPartRegs); + } else { +// Loads are not all of same size, need to unmerge them to smaller pieces +// of MergeTy type, then merge pieces to Dst. +SmallVector MergeTyParts; +for (Register Reg : LoadPartRegs) { + if (MRI.getType(Reg) == MergeTy) { +MergeTyParts.push_back(Reg); + } else { +auto Unmerge = B.buildUnmerge({DstRB, MergeTy}, Reg); +for (unsigned i = 0; i < Unmerge->getNumOperands() - 1; ++i) + MergeTyParts.push_back(Unmerge.getReg(i)); + } +} +B.buildMergeLikeInstr(Dst, MergeTyParts); + } + MI.eraseFromParent(); +} + +void RegBankLegalizeHelper::widenLoad(MachineInstr &MI, LLT WideTy, + LLT MergeTy) { + MachineFunction &MF = B.getMF(); + assert(MI.getNumMemOperands() == 1); + MachineMemOperand &BaseMMO = **MI.memoperands_begin(); + Register Dst = MI.getOperand(0).getReg(); + const RegisterBank *DstRB = MRI.getRegBankOrNull(Dst); + Register Base = MI.getOperand(1).getReg(); + + MachineMemOperand *WideMMO = MF.getMachineMemOperand(&BaseMMO, 0, WideTy); + auto WideLoad = B.buildLoad({DstRB, WideTy}, Base, *WideMMO); + + if (WideTy.isScalar()) { +B.buildTrunc(Dst, WideLoad); + } else { +SmallVector MergeTyParts; +auto Unmerge = B.buildUnmerge({DstRB, MergeTy}, WideLoad); + +LLT DstTy = MRI.getType(Dst); +unsigned NumElts = DstTy.getSizeInBits() / MergeTy.getSizeInBits(); +for (unsigned i = 0; i < NumElts; ++i) { + MergeTyParts.push_back(Unmerge.getReg(i)); +} +B.buildMergeLikeInstr(Dst, MergeTyParts); + } + MI.eraseFromParent(); +} + void RegBankLegalizeHelper::lower(MachineInstr &MI, const RegBankLLTMapping &Mapping, SmallSet &WaterfallSgprs) { @@ -116,6 +193,50 @@ void RegBankLegalizeHelper::lower(MachineInstr &MI, MI.eraseFromParent(); break; } + case SplitLoad: { +LLT DstTy = MRI.getType(MI.getOperand(0).getReg()); +unsigned Size = DstTy.getSizeInBits(); +// Even split to 128-bit loads +if (Size > 128) { + LLT B128; + if (DstTy.isVector()) { +LLT EltTy = DstTy.getElementType(); +B128 = LLT:
[llvm-branch-commits] [llvm] MachineUniformityAnalysis: Improve isConstantOrUndefValuePhi (PR #112866)
https://github.com/petar-avramovic updated https://github.com/llvm/llvm-project/pull/112866 >From 686c0699e6653c1a11e7e911ccf4de107d390066 Mon Sep 17 00:00:00 2001 From: Petar Avramovic Date: Thu, 31 Oct 2024 14:10:57 +0100 Subject: [PATCH] MachineUniformityAnalysis: Improve isConstantOrUndefValuePhi Change existing code for G_PHI to match what LLVM-IR version is doing via PHINode::hasConstantOrUndefValue. This is not safe for regular PHI since it may appear with an undef operand and getVRegDef can fail. Most notably this improves number of values that can be allocated to sgpr register bank in AMDGPURegBankSelect. Common case here are phis that appear in structurize-cfg lowering for cycles with multiple exits: Undef incoming value is coming from block that reached cycle exit condition, if other incoming is uniform keep the phi uniform despite the fact it is joining values from pair of blocks that are entered via divergent condition branch. --- llvm/lib/CodeGen/MachineSSAContext.cpp| 27 +- .../AMDGPU/MIR/hidden-diverge-gmir.mir| 28 +++ .../AMDGPU/MIR/hidden-loop-diverge.mir| 4 +- .../AMDGPU/MIR/uses-value-from-cycle.mir | 8 +- .../GlobalISel/divergence-structurizer.mir| 80 -- .../regbankselect-mui-regbanklegalize.mir | 69 --- .../regbankselect-mui-regbankselect.mir | 18 ++-- .../AMDGPU/GlobalISel/regbankselect-mui.ll| 84 ++- .../AMDGPU/GlobalISel/regbankselect-mui.mir | 51 ++- 9 files changed, 191 insertions(+), 178 deletions(-) diff --git a/llvm/lib/CodeGen/MachineSSAContext.cpp b/llvm/lib/CodeGen/MachineSSAContext.cpp index e384187b6e8593..8e13c0916dd9e1 100644 --- a/llvm/lib/CodeGen/MachineSSAContext.cpp +++ b/llvm/lib/CodeGen/MachineSSAContext.cpp @@ -54,9 +54,34 @@ const MachineBasicBlock *MachineSSAContext::getDefBlock(Register value) const { return F->getRegInfo().getVRegDef(value)->getParent(); } +static bool isUndef(const MachineInstr &MI) { + return MI.getOpcode() == TargetOpcode::G_IMPLICIT_DEF || + MI.getOpcode() == TargetOpcode::IMPLICIT_DEF; +} + +/// MachineInstr equivalent of PHINode::hasConstantOrUndefValue() for G_PHI. template <> bool MachineSSAContext::isConstantOrUndefValuePhi(const MachineInstr &Phi) { - return Phi.isConstantValuePHI(); + if (!Phi.isPHI()) +return false; + + // In later passes PHI may appear with an undef operand, getVRegDef can fail. + if (Phi.getOpcode() == TargetOpcode::PHI) +return Phi.isConstantValuePHI(); + + // For G_PHI we do equivalent of PHINode::hasConstantOrUndefValue(). + const MachineRegisterInfo &MRI = Phi.getMF()->getRegInfo(); + Register This = Phi.getOperand(0).getReg(); + Register ConstantValue; + for (unsigned i = 1, e = Phi.getNumOperands(); i < e; i += 2) { +Register Incoming = Phi.getOperand(i).getReg(); +if (Incoming != This && !isUndef(*MRI.getVRegDef(Incoming))) { + if (ConstantValue && ConstantValue != Incoming) +return false; + ConstantValue = Incoming; +} + } + return true; } template <> diff --git a/llvm/test/Analysis/UniformityAnalysis/AMDGPU/MIR/hidden-diverge-gmir.mir b/llvm/test/Analysis/UniformityAnalysis/AMDGPU/MIR/hidden-diverge-gmir.mir index ce00edf3363f77..9694a340b5e906 100644 --- a/llvm/test/Analysis/UniformityAnalysis/AMDGPU/MIR/hidden-diverge-gmir.mir +++ b/llvm/test/Analysis/UniformityAnalysis/AMDGPU/MIR/hidden-diverge-gmir.mir @@ -1,24 +1,24 @@ # RUN: llc -mtriple=amdgcn-- -run-pass=print-machine-uniformity -o - %s 2>&1 | FileCheck %s # CHECK-LABEL: MachineUniformityInfo for function: hidden_diverge # CHECK-LABEL: BLOCK bb.0 -# CHECK: DIVERGENT: %{{[0-9]*}}: %{{[0-9]*}}:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.workitem.id.x) -# CHECK: DIVERGENT: %{{[0-9]*}}: %{{[0-9]*}}:_(s1) = G_ICMP intpred(slt) -# CHECK: DIVERGENT: %{{[0-9]*}}: %{{[0-9]*}}:_(s1) = G_XOR %{{[0-9]*}}:_, %{{[0-9]*}}:_ -# CHECK: DIVERGENT: %{{[0-9]*}}: %{{[0-9]*}}:_(s1), %{{[0-9]*}}:_(s64) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.if) -# CHECK: DIVERGENT: %{{[0-9]*}}: %{{[0-9]*}}:_(s1), %{{[0-9]*}}:_(s64) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.if) -# CHECK: DIVERGENT: G_BRCOND %{{[0-9]*}}:_(s1), %bb.1 -# CHECK: DIVERGENT: G_BR %bb.2 +# CHECK: DIVERGENT: %{{[0-9]*}}: %{{[0-9]*}}:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.workitem.id.x) +# CHECK: DIVERGENT: %{{[0-9]*}}: %{{[0-9]*}}:_(s1) = G_ICMP intpred(slt) +# CHECK: DIVERGENT: %{{[0-9]*}}: %{{[0-9]*}}:_(s1) = G_XOR %{{[0-9]*}}:_, %{{[0-9]*}}:_ +# CHECK: DIVERGENT: %{{[0-9]*}}: %{{[0-9]*}}:_(s1), %{{[0-9]*}}:_(s64) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.if) +# CHECK: DIVERGENT: %{{[0-9]*}}: %{{[0-9]*}}:_(s1), %{{[0-9]*}}:_(s64) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.if) +# CHECK: DIVERGENT: G_BRCOND %{{[0-9]*}}:_(s1), %bb.1 +# CHECK: DIVERGENT: G_BR %bb.2 # CHECK-LABEL: BLOCK bb.1 # CHECK-LABEL: BLOCK bb.2 -# CHECK: D
[llvm-branch-commits] [clang] [flang] [lld] [llvm] [Flang] LLVM_ENABLE_RUNTIMES=flang-rt (PR #110217)
jeanPerier wrote: When building the runtime out of tree and running check-flang-rt, I am seeing an error about LLVM header not found: ``` make -j check-flang-rt [ 1%] Building CXX object flang-rt/unittests/third-party/unittest/CMakeFiles/llvm_gtest.dir/googletest/src/gtest-all.cc.o [ 1%] Building CXX object flang-rt/lib/Testing/CMakeFiles/NonGTestTesting.dir/local/home/jperier/f18-llvm-project/flang/lib/Testing/testing.cpp.o [ 4%] Building CXX object flang-rt/lib/Testing/CMakeFiles/NonGTestTesting.dir/local/home/jperier/f18-llvm-project/flang/lib/Testing/fp-testing.cpp.o [ 4%] Building CXX object flang-rt/unittests/third-party/unittest/CMakeFiles/llvm_gtest.dir/googlemock/src/gmock-all.cc.o llvm-project/flang/lib/Testing/testing.cpp:10:10: fatal error: llvm/Support/raw_ostream.h: No such file or directory #include "llvm/Support/raw_ostream.h" ^~~~ compilation terminated. make[3]: *** [flang-rt/lib/Testing/CMakeFiles/NonGTestTesting.dir/build.make:76: flang-rt/lib/Testing/CMakeFiles/NonGTestTesting.dir/local/home/jperier/f18-llvm-project/flang/lib/Testing/testing.cpp.o] Error 1 make[3]: *** Waiting for unfinished jobs llvm-project/flang/lib/Testing/fp-testing.cpp:10:10: fatal error: llvm/Support/Errno.h: No such file or directory #include "llvm/Support/Errno.h" ``` I first built flang like this: ``` cmake -DCMAKE_C_COMPILER=gcc \ -DCMAKE_CXX_COMPILER=g++ \ -DFLANG_BUILD_EXAMPLES=ON \ -DLLVM_TARGETS_TO_BUILD=host -DLLVM_INSTALL_UTILS=ON \ -DLLVM_INSTALL_GTEST=ON \ -DCMAKE_BUILD_TYPE=Release \ -DLLVM_ENABLE_PROJECTS=\"clang;mlir;flang;openmp\" \ -DLLVM_ENABLE_LIBOMPTARGET=OFF \ -DOPENMP_ENABLE_LIBOMPTARGET=OFF \ -DLLVM_ENABLE_ASSERTIONS=ON \ -DCOMPILER_RT_BUILD_SANITIZERS=OFF \ -DCMAKE_INSTALL_PREFIX=$INSTALLDIR \ -DCMAKE_CXX_STANDARD=17 \ -DFLANG_RUNTIME_F128_MATH_LIB=libquadmath \ $LLVM_SRC/llvm make -j 16 make -j install ``` and then the runtime like this: ``` cmake \ -DCMAKE_BUILD_TYPE=Release \ -DCMAKE_INSTALL_PREFIX=$INSTALLDIR \ -DLLVM_ENABLE_RUNTIMES="flang-rt" \ -DCMAKE_C_COMPILER=gcc \ -DCMAKE_CXX_COMPILER=g++ \ -DLLVM_BINARY_DIR=$LLVM_BUILD_DIR \ -DLLVM_DIR=$LLVM_BUILD_DIR/lib/cmake/llvm \ -DClang_DIR=$LLVM_BUILD_DIR/lib/cmake/clang \ -DCMAKE_Fortran_COMPILER=$INSTALLDIR/bin/flang-new \ -DCMAKE_Fortran_COMPILER_WORKS=yes \ -DLLVM_DEFAULT_TARGET_TRIPLE=x86_64-linux-gnu \ -DLLVM_RUNTIMES_TARGET=x86_64-linux-gnu \ -DCMAKE_CXX_STANDARD=17 \ -DLLVM_ENABLE_ASSERTIONS=ON \ -DFLANG_RUNTIME_F128_MATH_LIB=libquadmath \ -DCMAKE_EXE_LINKER_FLAGS=\"$LDOPTS\" \ -DLLVM_STATIC_LINK_CXX_STDLIB=ON \ $LLVM_SRC/runtimes" make -j 16 make -j 16 check-flang-rt ``` Am I missing some cmake option for the out of tree runtime build? https://github.com/llvm/llvm-project/pull/110217 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [flang] [flang][OpenMP] Semantic checks for DOACROSS clause (PR #115397)
@@ -933,11 +934,19 @@ void OmpStructureChecker::CheckDistLinear( } } -void OmpStructureChecker::Leave(const parser::OpenMPLoopConstruct &) { +void OmpStructureChecker::Leave(const parser::OpenMPLoopConstruct &x) { if (llvm::omp::allSimdSet.test(GetContext().directive)) { ExitDirectiveNest(SIMDNest); } dirContext_.pop_back(); + + assert(!loopStack_.empty() && "Expecting non-empty loop stack"); + const LoopConstruct &top = loopStack_.back(); tblah wrote: nit: I think this variable will be unused on non-assertion builds https://github.com/llvm/llvm-project/pull/115397 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [flang] [flang][OpenMP] Semantic checks for DOACROSS clause (PR #115397)
@@ -541,6 +541,7 @@ void OmpStructureChecker::Leave(const parser::OpenMPConstruct &) { } void OmpStructureChecker::Enter(const parser::OpenMPLoopConstruct &x) { + loopStack_.push_back(&x); tblah wrote: Is this needed for non-assertion builds? https://github.com/llvm/llvm-project/pull/115397 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [MC] Use StringRefs from pseudo_probe_desc section if it's mapped (PR #112996)
https://github.com/aaupov edited https://github.com/llvm/llvm-project/pull/112996 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [MC] Use StringRefs from pseudo_probe_desc section if it's mapped (PR #112996)
aaupov wrote: ping @wlei-llvm https://github.com/llvm/llvm-project/pull/112996 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [MC] Use StringRefs from pseudo_probe_desc section if it's mapped (PR #112996)
https://github.com/aaupov updated https://github.com/llvm/llvm-project/pull/112996 >From a54e4f1f17c153272583eda3f7a2bbd7a928b34d Mon Sep 17 00:00:00 2001 From: Amir Ayupov Date: Fri, 18 Oct 2024 18:24:17 -0700 Subject: [PATCH] clang-format Created using spr 1.3.4 --- bolt/lib/Rewrite/PseudoProbeRewriter.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/bolt/lib/Rewrite/PseudoProbeRewriter.cpp b/bolt/lib/Rewrite/PseudoProbeRewriter.cpp index 4fecfe8c3c09b1..09aa4fbb66bd42 100644 --- a/bolt/lib/Rewrite/PseudoProbeRewriter.cpp +++ b/bolt/lib/Rewrite/PseudoProbeRewriter.cpp @@ -127,8 +127,8 @@ void PseudoProbeRewriter::parsePseudoProbe(bool ProfiledOnly) { StringRef Contents = PseudoProbeDescSection->getContents(); if (!ProbeDecoder.buildGUID2FuncDescMap( - reinterpret_cast(Contents.data()), - Contents.size(), /*IsMMapped*/true)) { + reinterpret_cast(Contents.data()), Contents.size(), + /*IsMMapped*/ true)) { errs() << "BOLT-WARNING: fail in building GUID2FuncDescMap\n"; return; } ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [MC] Use StringRefs from pseudo_probe_desc section if it's mapped (PR #112996)
https://github.com/aaupov updated https://github.com/llvm/llvm-project/pull/112996 >From a54e4f1f17c153272583eda3f7a2bbd7a928b34d Mon Sep 17 00:00:00 2001 From: Amir Ayupov Date: Fri, 18 Oct 2024 18:24:17 -0700 Subject: [PATCH] clang-format Created using spr 1.3.4 --- bolt/lib/Rewrite/PseudoProbeRewriter.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/bolt/lib/Rewrite/PseudoProbeRewriter.cpp b/bolt/lib/Rewrite/PseudoProbeRewriter.cpp index 4fecfe8c3c09b1..09aa4fbb66bd42 100644 --- a/bolt/lib/Rewrite/PseudoProbeRewriter.cpp +++ b/bolt/lib/Rewrite/PseudoProbeRewriter.cpp @@ -127,8 +127,8 @@ void PseudoProbeRewriter::parsePseudoProbe(bool ProfiledOnly) { StringRef Contents = PseudoProbeDescSection->getContents(); if (!ProbeDecoder.buildGUID2FuncDescMap( - reinterpret_cast(Contents.data()), - Contents.size(), /*IsMMapped*/true)) { + reinterpret_cast(Contents.data()), Contents.size(), + /*IsMMapped*/ true)) { errs() << "BOLT-WARNING: fail in building GUID2FuncDescMap\n"; return; } ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [MC] Use StringRefs from pseudo_probe_desc section if it's mapped (PR #112996)
https://github.com/wlei-llvm approved this pull request. Sorry for the delay, LGTM. https://github.com/llvm/llvm-project/pull/112996 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] e06af1d - Revert "[X86][AMX] Support AMX-AVX512 (#114070)"
Author: Alan Zhao Date: 2024-11-08T16:12:54-08:00 New Revision: e06af1d045a57e93ebf3c86c4ac70aa752a93fa1 URL: https://github.com/llvm/llvm-project/commit/e06af1d045a57e93ebf3c86c4ac70aa752a93fa1 DIFF: https://github.com/llvm/llvm-project/commit/e06af1d045a57e93ebf3c86c4ac70aa752a93fa1.diff LOG: Revert "[X86][AMX] Support AMX-AVX512 (#114070)" This reverts commit 58a17e1bbc54357385d0b89cfc5635e402c31ef6. Added: Modified: clang/docs/ReleaseNotes.rst clang/include/clang/Basic/BuiltinsX86_64.def clang/include/clang/Driver/Options.td clang/lib/Basic/Targets/X86.cpp clang/lib/Basic/Targets/X86.h clang/lib/Headers/CMakeLists.txt clang/lib/Headers/immintrin.h clang/lib/Sema/SemaX86.cpp clang/test/CodeGen/attr-target-x86.c clang/test/Driver/x86-target-features.c clang/test/Preprocessor/x86_target_features.c llvm/include/llvm/IR/IntrinsicsX86.td llvm/include/llvm/TargetParser/X86TargetParser.def llvm/lib/Target/X86/X86.td llvm/lib/Target/X86/X86ExpandPseudo.cpp llvm/lib/Target/X86/X86ISelLowering.cpp llvm/lib/Target/X86/X86InstrAMX.td llvm/lib/Target/X86/X86InstrPredicates.td llvm/lib/Target/X86/X86LowerAMXType.cpp llvm/lib/Target/X86/X86PreTileConfig.cpp llvm/lib/TargetParser/Host.cpp llvm/lib/TargetParser/X86TargetParser.cpp Removed: clang/lib/Headers/amxavx512intrin.h clang/test/CodeGen/X86/amx_avx512_api.c clang/test/CodeGen/X86/amxavx512-builtins.c llvm/test/CodeGen/X86/amx-across-func-tilemovrow.ll llvm/test/CodeGen/X86/amx-avx512-intrinsics.ll llvm/test/CodeGen/X86/amx-tile-avx512-internals.ll llvm/test/MC/Disassembler/X86/amx-avx512.txt llvm/test/MC/X86/amx-avx512-att.s llvm/test/MC/X86/amx-avx512-intel.s diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst index c3424e0e6f34c9..f82fbb73b12162 100644 --- a/clang/docs/ReleaseNotes.rst +++ b/clang/docs/ReleaseNotes.rst @@ -739,7 +739,6 @@ X86 Support * Supported intrinsics of ``_mm(256|512)_(mask(z))_loadrs_epi(8|16|32|64)``. - Support ISA of ``AMX-FP8``. - Support ISA of ``AMX-TRANSPOSE``. -- Support ISA of ``AMX-AVX512``. Arm and AArch64 Support ^^^ diff --git a/clang/include/clang/Basic/BuiltinsX86_64.def b/clang/include/clang/Basic/BuiltinsX86_64.def index 9f7462b1e0d962..d95e8455a304b6 100644 --- a/clang/include/clang/Basic/BuiltinsX86_64.def +++ b/clang/include/clang/Basic/BuiltinsX86_64.def @@ -133,12 +133,6 @@ TARGET_BUILTIN(__builtin_ia32_t2rpntlvwz0t1_internal, "vUsUsUsV256i*V256i*vC*z", TARGET_BUILTIN(__builtin_ia32_t2rpntlvwz1_internal, "vUsUsUsV256i*V256i*vC*z", "n", "amx-transpose") TARGET_BUILTIN(__builtin_ia32_t2rpntlvwz1t1_internal, "vUsUsUsV256i*V256i*vC*z", "n", "amx-transpose") TARGET_BUILTIN(__builtin_ia32_ttransposed_internal, "V256iUsUsV256i", "n", "amx-transpose") -TARGET_BUILTIN(__builtin_ia32_tcvtrowd2ps_internal, "V16fUsUsV256iUi", "n", "amx-avx512,avx10.2-512") -TARGET_BUILTIN(__builtin_ia32_tcvtrowps2pbf16h_internal, "V32yUsUsV256iUi", "n", "amx-avx512,avx10.2-512") -TARGET_BUILTIN(__builtin_ia32_tcvtrowps2pbf16l_internal, "V32yUsUsV256iUi", "n", "amx-avx512,avx10.2-512") -TARGET_BUILTIN(__builtin_ia32_tcvtrowps2phh_internal, "V32xUsUsV256iUi", "n", "amx-avx512,avx10.2-512") -TARGET_BUILTIN(__builtin_ia32_tcvtrowps2phl_internal, "V32xUsUsV256iUi", "n", "amx-avx512,avx10.2-512") -TARGET_BUILTIN(__builtin_ia32_tilemovrow_internal, "V16iUsUsV256iUi", "n", "amx-avx512,avx10.2-512") // AMX TARGET_BUILTIN(__builtin_ia32_tile_loadconfig, "vvC*", "n", "amx-tile") TARGET_BUILTIN(__builtin_ia32_tile_storeconfig, "vvC*", "n", "amx-tile") @@ -165,13 +159,6 @@ TARGET_BUILTIN(__builtin_ia32_t2rpntlvwz1, "vIUcvC*z", "n", "amx-transpose") TARGET_BUILTIN(__builtin_ia32_t2rpntlvwz1t1, "vIUcvC*z", "n","amx-transpose") TARGET_BUILTIN(__builtin_ia32_ttransposed, "vIUcIUc", "n", "amx-transpose") -TARGET_BUILTIN(__builtin_ia32_tcvtrowd2ps, "V16fIUcUi", "n", "amx-avx512,avx10.2-512") -TARGET_BUILTIN(__builtin_ia32_tcvtrowps2pbf16h, "V32yIUcUi", "n", "amx-avx512,avx10.2-512") -TARGET_BUILTIN(__builtin_ia32_tcvtrowps2pbf16l, "V32yIUcUi", "n", "amx-avx512,avx10.2-512") -TARGET_BUILTIN(__builtin_ia32_tcvtrowps2phh, "V32xIUcUi", "n", "amx-avx512,avx10.2-512") -TARGET_BUILTIN(__builtin_ia32_tcvtrowps2phl, "V32xIUcUi", "n", "amx-avx512,avx10.2-512") -TARGET_BUILTIN(__builtin_ia32_tilemovrow, "V16iIUcUi", "n", "amx-avx512,avx10.2-512") - TARGET_BUILTIN(__builtin_ia32_prefetchi, "vvC*Ui", "nc", "prefetchi") TARGET_BUILTIN(__builtin_ia32_cmpccxadd32, "Siv*SiSiIi", "n", "cmpccxadd") TARGET_BUILTIN(__builtin_ia32_cmpccxadd64, "SLLiSLLi*SLLiSLLiIi", "n", "cmpccxadd") diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td index 0dba5672c5a85d..8887e0c1495d2a 100644 --- a/clang/include/clang/Driver/Options.td +++ b/clang/include/cl