aganea updated this revision to Diff 237197.
aganea marked 13 inline comments as done.
aganea added a comment.
Herald added subscribers: lucyrfox, mgester, arpith-jacob, nicolasvasilache,
antiagainst, shauheen, burmako, jpienaar, rriddle.
Updated as suggested by @rnk.
I've also removed `ThreadPoolStrategy::PinThreads` because it wasn't
conclusive. In the best case, pinning threads to a core / hyperthread, was
similar in performance to that of using a full CPU socket affinity; in the
worst case, it was degrading performance. The NT scheduler seems to be doing a
pretty good job here, so we'll stick with it :-)
CHANGES SINCE LAST ACTION
https://reviews.llvm.org/D71775/new/
https://reviews.llvm.org/D71775
Files:
clang-tools-extra/clang-doc/tool/ClangDocMain.cpp
clang-tools-extra/clangd/TUScheduler.cpp
clang-tools-extra/clangd/index/Background.cpp
clang-tools-extra/clangd/index/Background.h
clang-tools-extra/clangd/index/BackgroundRebuild.h
clang/lib/Tooling/AllTUsExecution.cpp
clang/lib/Tooling/DependencyScanning/DependencyScanningFilesystem.cpp
clang/tools/clang-scan-deps/ClangScanDeps.cpp
lld/ELF/SyntheticSections.cpp
llvm/include/llvm/ADT/BitVector.h
llvm/include/llvm/ADT/SmallBitVector.h
llvm/include/llvm/LTO/LTO.h
llvm/include/llvm/Support/ThreadPool.h
llvm/include/llvm/Support/Threading.h
llvm/lib/CodeGen/ParallelCG.cpp
llvm/lib/ExecutionEngine/Orc/LLJIT.cpp
llvm/lib/LTO/LTO.cpp
llvm/lib/LTO/LTOBackend.cpp
llvm/lib/LTO/ThinLTOCodeGenerator.cpp
llvm/lib/Support/Host.cpp
llvm/lib/Support/Parallel.cpp
llvm/lib/Support/ThreadPool.cpp
llvm/lib/Support/Threading.cpp
llvm/lib/Support/Unix/Threading.inc
llvm/lib/Support/Windows/Threading.inc
llvm/tools/dsymutil/DwarfLinkerForBinary.cpp
llvm/tools/dsymutil/dsymutil.cpp
llvm/tools/gold/gold-plugin.cpp
llvm/tools/llvm-cov/CodeCoverage.cpp
llvm/tools/llvm-cov/CoverageExporterJson.cpp
llvm/tools/llvm-cov/CoverageReport.cpp
llvm/tools/llvm-lto2/llvm-lto2.cpp
llvm/tools/llvm-profdata/llvm-profdata.cpp
llvm/unittests/Support/Host.cpp
llvm/unittests/Support/TaskQueueTest.cpp
llvm/unittests/Support/ThreadPool.cpp
llvm/unittests/Support/Threading.cpp
mlir/lib/Pass/Pass.cpp
Index: mlir/lib/Pass/Pass.cpp
===================================================================
--- mlir/lib/Pass/Pass.cpp
+++ mlir/lib/Pass/Pass.cpp
@@ -411,7 +411,8 @@
// Create the async executors if they haven't been created, or if the main
// pipeline has changed.
if (asyncExecutors.empty() || hasSizeMismatch(asyncExecutors.front(), mgrs))
- asyncExecutors.assign(llvm::hardware_concurrency(), mgrs);
+ asyncExecutors.assign(llvm::hardware_concurrency().compute_thread_count(),
+ mgrs);
// Run a prepass over the module to collect the operations to execute over.
// This ensures that an analysis manager exists for each operation, as well as
Index: llvm/unittests/Support/Threading.cpp
===================================================================
--- llvm/unittests/Support/Threading.cpp
+++ llvm/unittests/Support/Threading.cpp
@@ -21,7 +21,8 @@
auto Num = heavyweight_hardware_concurrency();
// Since Num is unsigned this will also catch us trying to
// return -1.
- ASSERT_LE(Num, thread::hardware_concurrency());
+ ASSERT_LE(Num.compute_thread_count(),
+ hardware_concurrency().compute_thread_count());
}
#if LLVM_ENABLE_THREADS
Index: llvm/unittests/Support/ThreadPool.cpp
===================================================================
--- llvm/unittests/Support/ThreadPool.cpp
+++ llvm/unittests/Support/ThreadPool.cpp
@@ -8,11 +8,13 @@
#include "llvm/Support/ThreadPool.h"
+#include "llvm/ADT/DenseSet.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Triple.h"
#include "llvm/Support/Host.h"
#include "llvm/Support/TargetSelect.h"
+#include "llvm/Support/Threading.h"
#include "gtest/gtest.h"
@@ -69,6 +71,8 @@
void SetUp() override { MainThreadReady = false; }
+ void TestAllThreads(ThreadPoolStrategy S);
+
std::condition_variable WaitMainThread;
std::mutex WaitMainThreadMutex;
bool MainThreadReady = false;
@@ -131,7 +135,7 @@
TEST_F(ThreadPoolTest, GetFuture) {
CHECK_UNSUPPORTED();
- ThreadPool Pool{2};
+ ThreadPool Pool(hardware_concurrency(2));
std::atomic_int i{0};
Pool.async([this, &i] {
waitForMainThread();
@@ -162,3 +166,45 @@
}
ASSERT_EQ(5, checked_in);
}
+
+#if LLVM_ENABLE_THREADS == 1
+
+void ThreadPoolTest::TestAllThreads(ThreadPoolStrategy S) {
+ // FIXME: Skip these tests on non-Windows because multi-socket system were not
+ // tested on Unix yet, and llvm::get_thread_affinity_mask() isn't implemented
+ // for Unix.
+ Triple Host(Triple::normalize(sys::getProcessTriple()));
+ if (!Host.isOSWindows())
+ return;
+
+ llvm::DenseSet<llvm::BitVector> ThreadsUsed;
+ std::mutex Lock;
+ unsigned Threads = 0;
+ {
+ ThreadPool Pool(S);
+ Threads = Pool.getThreadCount();
+ for (size_t I = 0; I < 10000; ++I) {
+ Pool.async([&] {
+ waitForMainThread();
+ std::lock_guard<std::mutex> Guard(Lock);
+ auto Mask = llvm::get_thread_affinity_mask();
+ ThreadsUsed.insert(Mask);
+ });
+ }
+ ASSERT_EQ(true, ThreadsUsed.empty());
+ setMainThreadReady();
+ }
+ ASSERT_EQ(llvm::get_cpus(), ThreadsUsed.size());
+}
+
+TEST_F(ThreadPoolTest, AllThreads_UseAllRessources) {
+ CHECK_UNSUPPORTED();
+ TestAllThreads({});
+}
+
+TEST_F(ThreadPoolTest, AllThreads_OneThreadPerCore) {
+ CHECK_UNSUPPORTED();
+ TestAllThreads(llvm::heavyweight_hardware_concurrency());
+}
+
+#endif
Index: llvm/unittests/Support/TaskQueueTest.cpp
===================================================================
--- llvm/unittests/Support/TaskQueueTest.cpp
+++ llvm/unittests/Support/TaskQueueTest.cpp
@@ -22,7 +22,7 @@
};
TEST_F(TaskQueueTest, OrderedFutures) {
- ThreadPool TP(1);
+ ThreadPool TP(hardware_concurrency(1));
TaskQueue TQ(TP);
std::atomic<int> X{ 0 };
std::atomic<int> Y{ 0 };
@@ -66,7 +66,7 @@
}
TEST_F(TaskQueueTest, UnOrderedFutures) {
- ThreadPool TP(1);
+ ThreadPool TP(hardware_concurrency(1));
TaskQueue TQ(TP);
std::atomic<int> X{ 0 };
std::atomic<int> Y{ 0 };
@@ -96,7 +96,7 @@
}
TEST_F(TaskQueueTest, FutureWithReturnValue) {
- ThreadPool TP(1);
+ ThreadPool TP(hardware_concurrency(1));
TaskQueue TQ(TP);
std::future<std::string> F1 = TQ.async([&] { return std::string("Hello"); });
std::future<int> F2 = TQ.async([&] { return 42; });
Index: llvm/unittests/Support/Host.cpp
===================================================================
--- llvm/unittests/Support/Host.cpp
+++ llvm/unittests/Support/Host.cpp
@@ -37,7 +37,8 @@
// Initially this is only testing detection of the number of
// physical cores, which is currently only supported/tested for
// x86_64 Linux and Darwin.
- return (Host.getArch() == Triple::x86_64 &&
+ return Host.isOSWindows() ||
+ (Host.getArch() == Triple::x86_64 &&
(Host.isOSDarwin() || Host.getOS() == Triple::Linux));
}
Index: llvm/tools/llvm-profdata/llvm-profdata.cpp
===================================================================
--- llvm/tools/llvm-profdata/llvm-profdata.cpp
+++ llvm/tools/llvm-profdata/llvm-profdata.cpp
@@ -307,8 +307,11 @@
// If NumThreads is not specified, auto-detect a good default.
if (NumThreads == 0)
- NumThreads =
- std::min(hardware_concurrency(), unsigned((Inputs.size() + 1) / 2));
+ NumThreads = std::min(hardware_concurrency().compute_thread_count(),
+ unsigned((Inputs.size() + 1) / 2));
+ // FIXME: There's a bug here, where setting NumThreads = Inputs.size() fails
+ // the merge_empty_profile.test because the InstrProfWriter.ProfileKind isn't
+ // merged, thus the emitted file ends up with a PF_Unknown kind.
// Initialize the writer contexts.
SmallVector<std::unique_ptr<WriterContext>, 4> Contexts;
@@ -320,7 +323,7 @@
for (const auto &Input : Inputs)
loadInput(Input, Remapper, Contexts[0].get());
} else {
- ThreadPool Pool(NumThreads);
+ ThreadPool Pool(hardware_concurrency(NumThreads));
// Load the inputs in parallel (N/NumThreads serial steps).
unsigned Ctx = 0;
Index: llvm/tools/llvm-lto2/llvm-lto2.cpp
===================================================================
--- llvm/tools/llvm-lto2/llvm-lto2.cpp
+++ llvm/tools/llvm-lto2/llvm-lto2.cpp
@@ -65,8 +65,8 @@
"import files for the "
"distributed backend case"));
-static cl::opt<int> Threads("thinlto-threads",
- cl::init(llvm::heavyweight_hardware_concurrency()));
+// Default to using all hardware cores in the system.
+static cl::opt<int> Threads("thinlto-threads", cl::init(0));
static cl::list<std::string> SymbolResolutions(
"r",
Index: llvm/tools/llvm-cov/CoverageReport.cpp
===================================================================
--- llvm/tools/llvm-cov/CoverageReport.cpp
+++ llvm/tools/llvm-cov/CoverageReport.cpp
@@ -356,11 +356,8 @@
// If NumThreads is not specified, auto-detect a good default.
if (NumThreads == 0)
- NumThreads =
- std::max(1U, std::min(llvm::heavyweight_hardware_concurrency(),
- unsigned(Files.size())));
-
- ThreadPool Pool(NumThreads);
+ NumThreads = Files.size();
+ ThreadPool Pool(heavyweight_hardware_concurrency(NumThreads));
std::vector<FileCoverageSummary> FileReports;
FileReports.reserve(Files.size());
Index: llvm/tools/llvm-cov/CoverageExporterJson.cpp
===================================================================
--- llvm/tools/llvm-cov/CoverageExporterJson.cpp
+++ llvm/tools/llvm-cov/CoverageExporterJson.cpp
@@ -163,11 +163,9 @@
ArrayRef<FileCoverageSummary> FileReports,
const CoverageViewOptions &Options) {
auto NumThreads = Options.NumThreads;
- if (NumThreads == 0) {
- NumThreads = std::max(1U, std::min(llvm::heavyweight_hardware_concurrency(),
- unsigned(SourceFiles.size())));
- }
- ThreadPool Pool(NumThreads);
+ if (NumThreads == 0)
+ NumThreads = SourceFiles.size();
+ ThreadPool Pool(heavyweight_hardware_concurrency(NumThreads));
json::Array FileArray;
std::mutex FileArrayMutex;
Index: llvm/tools/llvm-cov/CodeCoverage.cpp
===================================================================
--- llvm/tools/llvm-cov/CodeCoverage.cpp
+++ llvm/tools/llvm-cov/CodeCoverage.cpp
@@ -944,9 +944,7 @@
// If NumThreads is not specified, auto-detect a good default.
if (NumThreads == 0)
- NumThreads =
- std::max(1U, std::min(llvm::heavyweight_hardware_concurrency(),
- unsigned(SourceFiles.size())));
+ NumThreads = SourceFiles.size();
if (!ViewOpts.hasOutputDirectory() || NumThreads == 1) {
for (const std::string &SourceFile : SourceFiles)
@@ -954,7 +952,7 @@
ShowFilenames);
} else {
// In -output-dir mode, it's safe to use multiple threads to print files.
- ThreadPool Pool(NumThreads);
+ ThreadPool Pool(heavyweight_hardware_concurrency(NumThreads));
for (const std::string &SourceFile : SourceFiles)
Pool.async(&CodeCoverageTool::writeSourceFileView, this, SourceFile,
Coverage.get(), Printer.get(), ShowFilenames);
Index: llvm/tools/gold/gold-plugin.cpp
===================================================================
--- llvm/tools/gold/gold-plugin.cpp
+++ llvm/tools/gold/gold-plugin.cpp
@@ -134,8 +134,8 @@
static unsigned OptLevel = 2;
// Default parallelism of 0 used to indicate that user did not specify.
// Actual parallelism default value depends on implementation.
- // Currently only affects ThinLTO, where the default is
- // llvm::heavyweight_hardware_concurrency.
+ // Currently only affects ThinLTO, where the default is the max cores in the
+ // system.
static unsigned Parallelism = 0;
// Default regular LTO codegen parallelism (number of partitions).
static unsigned ParallelCodeGenParallelismLevel = 1;
Index: llvm/tools/dsymutil/dsymutil.cpp
===================================================================
--- llvm/tools/dsymutil/dsymutil.cpp
+++ llvm/tools/dsymutil/dsymutil.cpp
@@ -258,7 +258,7 @@
if (opt::Arg *NumThreads = Args.getLastArg(OPT_threads))
Options.LinkOpts.Threads = atoi(NumThreads->getValue());
else
- Options.LinkOpts.Threads = thread::hardware_concurrency();
+ Options.LinkOpts.Threads = 0; // Use all available hardware threads
if (Options.DumpDebugMap || Options.LinkOpts.Verbose)
Options.LinkOpts.Threads = 1;
@@ -540,9 +540,10 @@
// Shared a single binary holder for all the link steps.
BinaryHolder BinHolder;
- unsigned ThreadCount =
- std::min<unsigned>(Options.LinkOpts.Threads, DebugMapPtrsOrErr->size());
- ThreadPool Threads(ThreadCount);
+ unsigned ThreadCount = Options.LinkOpts.Threads;
+ if (!ThreadCount)
+ ThreadCount = DebugMapPtrsOrErr->size();
+ ThreadPool Threads(hardware_concurrency(ThreadCount));
// If there is more than one link to execute, we need to generate
// temporary files.
Index: llvm/tools/dsymutil/DwarfLinkerForBinary.cpp
===================================================================
--- llvm/tools/dsymutil/DwarfLinkerForBinary.cpp
+++ llvm/tools/dsymutil/DwarfLinkerForBinary.cpp
@@ -3020,7 +3020,7 @@
// errors from unchecked llvm::Error objects.
Error RemarkLinkAllError = Error::success();
- ThreadPool pool(3);
+ ThreadPool pool(hardware_concurrency(3));
pool.async(AnalyzeAll);
pool.async(CloneAll);
pool.async(RemarkLinkAll, std::ref(RemarkLinkAllError));
Index: llvm/lib/Support/Windows/Threading.inc
===================================================================
--- llvm/lib/Support/Windows/Threading.inc
+++ llvm/lib/Support/Windows/Threading.inc
@@ -16,6 +16,8 @@
#include "WindowsSupport.h"
#include <process.h>
+#include <bitset>
+
// Windows will at times define MemoryFence.
#ifdef MemoryFence
#undef MemoryFence
@@ -122,3 +124,163 @@
? SetThreadPriorityResult::SUCCESS
: SetThreadPriorityResult::FAILURE;
}
+
+struct ProcessorGroup {
+ unsigned ID;
+ unsigned AllThreads;
+ unsigned UsableThreads;
+ unsigned ThreadsPerCore;
+ uint64_t Affinity;
+};
+
+template <typename F>
+static bool IterateProcInfo(LOGICAL_PROCESSOR_RELATIONSHIP Relationship, F Fn) {
+ DWORD Len = 0;
+ BOOL R = ::GetLogicalProcessorInformationEx(Relationship, NULL, &Len);
+ if (R || GetLastError() != ERROR_INSUFFICIENT_BUFFER) {
+ return false;
+ }
+ auto *Info = (SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX *)calloc(1, Len);
+ R = ::GetLogicalProcessorInformationEx(Relationship, Info, &Len);
+ if (R) {
+ auto *End =
+ (SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX *)((uint8_t *)Info + Len);
+ for (auto *Curr = Info; Curr < End;
+ Curr = (SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX *)((uint8_t *)Curr +
+ Curr->Size)) {
+ if (Curr->Relationship != Relationship)
+ continue;
+ Fn(Curr);
+ }
+ }
+ free(Info);
+ return true;
+}
+
+static ArrayRef<ProcessorGroup> getProcessorGroups() {
+ auto computeGroups = []() {
+ SmallVector<ProcessorGroup, 4> Groups;
+
+ auto HandleGroup = [&](SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX *ProcInfo) {
+ GROUP_RELATIONSHIP &El = ProcInfo->Group;
+ for (unsigned J = 0; J < El.ActiveGroupCount; ++J) {
+ ProcessorGroup G;
+ G.ID = Groups.size();
+ G.AllThreads = El.GroupInfo[J].MaximumProcessorCount;
+ G.UsableThreads = El.GroupInfo[J].ActiveProcessorCount;
+ assert(G.UsableThreads <= 64);
+ G.Affinity = El.GroupInfo[J].ActiveProcessorMask;
+ Groups.push_back(G);
+ }
+ };
+
+ if (!IterateProcInfo(RelationGroup, HandleGroup))
+ return std::vector<ProcessorGroup>();
+
+ auto HandleProc = [&](SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX *ProcInfo) {
+ PROCESSOR_RELATIONSHIP &El = ProcInfo->Processor;
+ assert(El.GroupCount == 1);
+ unsigned NumHyperThreads = 1;
+ // If the flag is set, each core supports more than one hyper-thread.
+ if (El.Flags & LTP_PC_SMT)
+ NumHyperThreads = std::bitset<64>(El.GroupMask[0].Mask).count();
+ unsigned I = El.GroupMask[0].Group;
+ Groups[I].ThreadsPerCore = NumHyperThreads;
+ };
+
+ if (!IterateProcInfo(RelationProcessorCore, HandleProc))
+ return std::vector<ProcessorGroup>();
+
+ // If there's an affinity mask set on one of the CPUs, then assume the user
+ // wants to constrain the current process to only a single CPU.
+ for (auto &G : Groups) {
+ if (G.UsableThreads != G.AllThreads) {
+ ProcessorGroup NewG{G};
+ Groups.clear();
+ Groups.push_back(NewG);
+ break;
+ }
+ }
+
+ return std::vector<ProcessorGroup>(Groups.begin(), Groups.end());
+ };
+ static auto Groups = computeGroups();
+ return ArrayRef<ProcessorGroup>(Groups);
+}
+
+template <typename R, typename UnaryPredicate>
+static unsigned aggregate(R &&Range, UnaryPredicate P) {
+ unsigned I{};
+ for (const auto &It : Range)
+ I += P(It);
+ return I;
+}
+
+// for sys::getHostNumPhysicalCores
+int computeHostNumPhysicalCores() {
+ static unsigned Cores =
+ aggregate(getProcessorGroups(), [](const ProcessorGroup &G) {
+ return G.UsableThreads / G.ThreadsPerCore;
+ });
+ return Cores;
+}
+
+int computeHostNumHardwareThreads() {
+ static unsigned Threads =
+ aggregate(getProcessorGroups(),
+ [](const ProcessorGroup &G) { return G.UsableThreads; });
+ return Threads;
+}
+
+// Assign the current thread to a more appropriate CPU socket or CPU group
+void llvm::ThreadPoolStrategy::apply_thread_strategy(
+ unsigned ThreadPoolNum) const {
+ ArrayRef<ProcessorGroup> Groups = getProcessorGroups();
+
+ assert(ThreadPoolNum < compute_thread_count() &&
+ "The thread index is not within thread strategy's range!");
+
+ // In this mode, the ThreadNumber represents the core number, not the
+ // hyper-thread number. Assumes all NUMA groups have the same amount of
+ // hyper-threads.
+ if (!UseHyperThreads)
+ ThreadPoolNum *= Groups[0].ThreadsPerCore;
+
+ unsigned ThreadRangeStart = 0;
+ for (unsigned I = 0; I < Groups.size(); ++I) {
+ const ProcessorGroup &G = Groups[I];
+ if (ThreadPoolNum >= ThreadRangeStart &&
+ ThreadPoolNum < ThreadRangeStart + G.UsableThreads) {
+
+ GROUP_AFFINITY Affinity{};
+ Affinity.Group = G.ID;
+ Affinity.Mask = G.Affinity;
+ SetThreadGroupAffinity(GetCurrentThread(), &Affinity, nullptr);
+ }
+ ThreadRangeStart += G.UsableThreads;
+ }
+}
+
+llvm::BitVector llvm::get_thread_affinity_mask() {
+ GROUP_AFFINITY Affinity{};
+ GetThreadGroupAffinity(GetCurrentThread(), &Affinity);
+
+ static unsigned All =
+ aggregate(getProcessorGroups(),
+ [](const ProcessorGroup &G) { return G.AllThreads; });
+
+ unsigned StartOffset =
+ aggregate(getProcessorGroups(), [&](const ProcessorGroup &G) {
+ return G.ID < Affinity.Group ? G.AllThreads : 0;
+ });
+
+ llvm::BitVector V;
+ V.resize(All);
+ for (unsigned I = 0; I < sizeof(KAFFINITY) * 8; ++I) {
+ if ((Affinity.Mask >> I) & 1)
+ V.set(StartOffset + I);
+ }
+ return V;
+}
+
+unsigned llvm::get_cpus() { return getProcessorGroups().size(); }
Index: llvm/lib/Support/Unix/Threading.inc
===================================================================
--- llvm/lib/Support/Unix/Threading.inc
+++ llvm/lib/Support/Unix/Threading.inc
@@ -267,3 +267,27 @@
#endif
return SetThreadPriorityResult::FAILURE;
}
+
+#include <thread>
+
+int computeHostNumHardwareThreads() {
+#if defined(HAVE_SCHED_GETAFFINITY) && defined(HAVE_CPU_COUNT)
+ cpu_set_t Set;
+ if (sched_getaffinity(0, sizeof(Set), &Set))
+ return CPU_COUNT(&Set);
+#endif
+ // Guard against std::thread::hardware_concurrency() returning 0.
+ if (unsigned Val = std::thread::hardware_concurrency())
+ return Val;
+ return 1;
+}
+
+void llvm::ThreadPoolStrategy::apply_thread_strategy(
+ unsigned ThreadPoolNum) const {}
+
+llvm::BitVector llvm::get_thread_affinity_mask() {
+ // FIXME: Implement
+ llvm_unreachable("Not implemented!");
+}
+
+unsigned llvm::get_cpus() { return 1; }
Index: llvm/lib/Support/Threading.cpp
===================================================================
--- llvm/lib/Support/Threading.cpp
+++ llvm/lib/Support/Threading.cpp
@@ -45,10 +45,6 @@
Fn(UserData);
}
-unsigned llvm::heavyweight_hardware_concurrency() { return 1; }
-
-unsigned llvm::hardware_concurrency() { return 1; }
-
uint64_t llvm::get_threadid() { return 0; }
uint32_t llvm::get_max_thread_name_length() { return 0; }
@@ -57,6 +53,14 @@
void llvm::get_thread_name(SmallVectorImpl<char> &Name) { Name.clear(); }
+llvm::BitVector llvm::get_thread_affinity_mask() { return {}; }
+
+unsigned llvm::ThreadPoolStrategy::compute_thread_count() const {
+ // When threads are disabled, already return 1 to ensure clients will loop at
+ // least once.
+ return 1;
+}
+
#if LLVM_ENABLE_THREADS == 0
void llvm::llvm_execute_on_thread_async(
llvm::unique_function<void()> Func,
@@ -78,30 +82,19 @@
#else
-#include <thread>
-unsigned llvm::heavyweight_hardware_concurrency() {
- // Since we can't get here unless LLVM_ENABLE_THREADS == 1, it is safe to use
- // `std::thread` directly instead of `llvm::thread` (and indeed, doing so
- // allows us to not define `thread` in the llvm namespace, which conflicts
- // with some platforms such as FreeBSD whose headers also define a struct
- // called `thread` in the global namespace which can cause ambiguity due to
- // ADL.
- int NumPhysical = sys::getHostNumPhysicalCores();
- if (NumPhysical == -1)
- return std::thread::hardware_concurrency();
- return NumPhysical;
-}
+int computeHostNumHardwareThreads();
-unsigned llvm::hardware_concurrency() {
-#if defined(HAVE_SCHED_GETAFFINITY) && defined(HAVE_CPU_COUNT)
- cpu_set_t Set;
- if (sched_getaffinity(0, sizeof(Set), &Set))
- return CPU_COUNT(&Set);
-#endif
- // Guard against std::thread::hardware_concurrency() returning 0.
- if (unsigned Val = std::thread::hardware_concurrency())
- return Val;
- return 1;
+unsigned llvm::ThreadPoolStrategy::compute_thread_count() const {
+ int MaxThreadCount = UseHyperThreads ? computeHostNumHardwareThreads()
+ : sys::getHostNumPhysicalCores();
+ if (MaxThreadCount <= 0)
+ MaxThreadCount = 1;
+
+ // No need to create more threads than there are hardware threads, it would
+ // uselessly induce more context-switching and cache eviction.
+ if (!ThreadsRequested || ThreadsRequested > (unsigned)MaxThreadCount)
+ return MaxThreadCount;
+ return ThreadsRequested;
}
namespace {
Index: llvm/lib/Support/ThreadPool.cpp
===================================================================
--- llvm/lib/Support/ThreadPool.cpp
+++ llvm/lib/Support/ThreadPool.cpp
@@ -20,16 +20,15 @@
#if LLVM_ENABLE_THREADS
-// Default to hardware_concurrency
-ThreadPool::ThreadPool() : ThreadPool(hardware_concurrency()) {}
-
-ThreadPool::ThreadPool(unsigned ThreadCount)
- : ActiveThreads(0), EnableFlag(true) {
+ThreadPool::ThreadPool(ThreadPoolStrategy S)
+ : ActiveThreads(0), EnableFlag(true),
+ ThreadCount(S.compute_thread_count()) {
// Create ThreadCount threads that will loop forever, wait on QueueCondition
// for tasks to be queued or the Pool to be destroyed.
Threads.reserve(ThreadCount);
for (unsigned ThreadID = 0; ThreadID < ThreadCount; ++ThreadID) {
- Threads.emplace_back([&] {
+ Threads.emplace_back([S, ThreadID, this] {
+ S.apply_thread_strategy(ThreadID);
while (true) {
PackagedTaskTy Task;
{
@@ -108,12 +107,10 @@
#else // LLVM_ENABLE_THREADS Disabled
-ThreadPool::ThreadPool() : ThreadPool(0) {}
-
// No threads are launched, issue a warning if ThreadCount is not 0
-ThreadPool::ThreadPool(unsigned ThreadCount)
- : ActiveThreads(0) {
- if (ThreadCount) {
+ThreadPool::ThreadPool(ThreadPoolStrategy S)
+ : ActiveThreads(0), ThreadCount(S.compute_thread_count()) {
+ if (ThreadCount != 1) {
errs() << "Warning: request a ThreadPool with " << ThreadCount
<< " threads, but LLVM_ENABLE_THREADS has been turned off\n";
}
@@ -138,8 +135,6 @@
return Future;
}
-ThreadPool::~ThreadPool() {
- wait();
-}
+ThreadPool::~ThreadPool() { wait(); }
#endif
Index: llvm/lib/Support/Parallel.cpp
===================================================================
--- llvm/lib/Support/Parallel.cpp
+++ llvm/lib/Support/Parallel.cpp
@@ -36,13 +36,16 @@
/// in filo order.
class ThreadPoolExecutor : public Executor {
public:
- explicit ThreadPoolExecutor(unsigned ThreadCount = hardware_concurrency())
- : Done(ThreadCount) {
+ explicit ThreadPoolExecutor(ThreadPoolStrategy S = hardware_concurrency())
+ : Strategy(S), Done(S.compute_thread_count()) {
// Spawn all but one of the threads in another thread as spawning threads
// can take a while.
- std::thread([&, ThreadCount] {
- for (size_t i = 1; i < ThreadCount; ++i) {
- std::thread([=] { work(); }).detach();
+ std::thread([&] {
+ for (size_t I = 1; I < Strategy.compute_thread_count(); ++I) {
+ std::thread([=] {
+ Strategy.apply_thread_strategy(I);
+ work();
+ }).detach();
}
work();
}).detach();
@@ -82,6 +85,7 @@
std::stack<std::function<void()>> WorkStack;
std::mutex Mutex;
std::condition_variable Cond;
+ ThreadPoolStrategy Strategy;
parallel::detail::Latch Done;
};
Index: llvm/lib/Support/Host.cpp
===================================================================
--- llvm/lib/Support/Host.cpp
+++ llvm/lib/Support/Host.cpp
@@ -1266,7 +1266,7 @@
// On Linux, the number of physical cores can be computed from /proc/cpuinfo,
// using the number of unique physical/core id pairs. The following
// implementation reads the /proc/cpuinfo format on an x86_64 system.
-static int computeHostNumPhysicalCores() {
+int computeHostNumPhysicalCores() {
// Read /proc/cpuinfo as a stream (until EOF reached). It cannot be
// mmapped because it appears to have 0 size.
llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> Text =
@@ -1312,7 +1312,7 @@
#include <sys/sysctl.h>
// Gets the number of *physical cores* on the machine.
-static int computeHostNumPhysicalCores() {
+int computeHostNumPhysicalCores() {
uint32_t count;
size_t len = sizeof(count);
sysctlbyname("hw.physicalcpu", &count, &len, NULL, 0);
@@ -1326,6 +1326,9 @@
}
return count;
}
+#elif defined(_WIN32)
+// Defined in llvm/lib/Support/Windows/Threading.inc
+int computeHostNumPhysicalCores();
#else
// On other systems, return -1 to indicate unknown.
static int computeHostNumPhysicalCores() { return -1; }
Index: llvm/lib/LTO/ThinLTOCodeGenerator.cpp
===================================================================
--- llvm/lib/LTO/ThinLTOCodeGenerator.cpp
+++ llvm/lib/LTO/ThinLTOCodeGenerator.cpp
@@ -81,8 +81,8 @@
namespace {
-static cl::opt<int>
- ThreadCount("threads", cl::init(llvm::heavyweight_hardware_concurrency()));
+// Default to using one job per hardware core in the system
+static cl::opt<int> ThreadCount("threads", cl::init(0));
// Simple helper to save temporary files for debug.
static void saveTempBitcode(const Module &TheModule, StringRef TempDir,
@@ -1037,7 +1037,7 @@
// Parallel optimizer + codegen
{
- ThreadPool Pool(ThreadCount);
+ ThreadPool Pool(heavyweight_hardware_concurrency(ThreadCount));
for (auto IndexCount : ModulesOrdering) {
auto &Mod = Modules[IndexCount];
Pool.async([&](int count) {
Index: llvm/lib/LTO/LTOBackend.cpp
===================================================================
--- llvm/lib/LTO/LTOBackend.cpp
+++ llvm/lib/LTO/LTOBackend.cpp
@@ -375,7 +375,8 @@
void splitCodeGen(Config &C, TargetMachine *TM, AddStreamFn AddStream,
unsigned ParallelCodeGenParallelismLevel,
std::unique_ptr<Module> Mod) {
- ThreadPool CodegenThreadPool(ParallelCodeGenParallelismLevel);
+ ThreadPool CodegenThreadPool(
+ heavyweight_hardware_concurrency(ParallelCodeGenParallelismLevel));
unsigned ThreadCount = 0;
const Target *T = &TM->getTarget();
Index: llvm/lib/LTO/LTO.cpp
===================================================================
--- llvm/lib/LTO/LTO.cpp
+++ llvm/lib/LTO/LTO.cpp
@@ -475,8 +475,7 @@
LTO::ThinLTOState::ThinLTOState(ThinBackend Backend)
: Backend(Backend), CombinedIndex(/*HaveGVs*/ false) {
if (!Backend)
- this->Backend =
- createInProcessThinBackend(llvm::heavyweight_hardware_concurrency());
+ this->Backend = createInProcessThinBackend();
}
LTO::LTO(Config Conf, ThinBackend Backend,
@@ -1067,7 +1066,8 @@
const StringMap<GVSummaryMapTy> &ModuleToDefinedGVSummaries,
AddStreamFn AddStream, NativeObjectCache Cache)
: ThinBackendProc(Conf, CombinedIndex, ModuleToDefinedGVSummaries),
- BackendThreadPool(ThinLTOParallelismLevel),
+ BackendThreadPool(
+ heavyweight_hardware_concurrency(ThinLTOParallelismLevel)),
AddStream(std::move(AddStream)), Cache(std::move(Cache)) {
for (auto &Name : CombinedIndex.cfiFunctionDefs())
CfiFunctionDefs.insert(
Index: llvm/lib/ExecutionEngine/Orc/LLJIT.cpp
===================================================================
--- llvm/lib/ExecutionEngine/Orc/LLJIT.cpp
+++ llvm/lib/ExecutionEngine/Orc/LLJIT.cpp
@@ -153,7 +153,8 @@
if (S.NumCompileThreads > 0) {
CompileLayer->setCloneToNewContextOnEmit(true);
- CompileThreads = std::make_unique<ThreadPool>(S.NumCompileThreads);
+ CompileThreads =
+ std::make_unique<ThreadPool>(hardware_concurrency(S.NumCompileThreads));
ES->setDispatchMaterialization(
[this](JITDylib &JD, std::unique_ptr<MaterializationUnit> MU) {
// FIXME: Switch to move capture once we have c++14.
Index: llvm/lib/CodeGen/ParallelCG.cpp
===================================================================
--- llvm/lib/CodeGen/ParallelCG.cpp
+++ llvm/lib/CodeGen/ParallelCG.cpp
@@ -51,7 +51,7 @@
// Create ThreadPool in nested scope so that threads will be joined
// on destruction.
{
- ThreadPool CodegenThreadPool(OSs.size());
+ ThreadPool CodegenThreadPool(hardware_concurrency(OSs.size()));
int ThreadCount = 0;
SplitModule(
Index: llvm/include/llvm/Support/Threading.h
===================================================================
--- llvm/include/llvm/Support/Threading.h
+++ llvm/include/llvm/Support/Threading.h
@@ -14,6 +14,7 @@
#ifndef LLVM_SUPPORT_THREADING_H
#define LLVM_SUPPORT_THREADING_H
+#include "llvm/ADT/BitVector.h"
#include "llvm/ADT/FunctionExtras.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/Config/llvm-config.h" // for LLVM_ON_UNIX
@@ -143,20 +144,57 @@
#endif
}
- /// Get the amount of currency to use for tasks requiring significant
- /// memory or other resources. Currently based on physical cores, if
- /// available for the host system, otherwise falls back to
- /// thread::hardware_concurrency().
- /// Returns 1 when LLVM is configured with LLVM_ENABLE_THREADS=OFF
- unsigned heavyweight_hardware_concurrency();
+ /// This tells how a thread pool will be used
+ class ThreadPoolStrategy {
+ public:
+ // The default thread count means all available threads, excluding affinity
+ // mask. If set, this value only represents a suggested high bound, the
+ // runtime might use a lower value (not higher).
+ unsigned ThreadsRequested = 0;
+
+ // If SMT is active, use hyper threads. If false, there will be only one
+ // std::thread per core (in that case, each thread will be pinned to a
+ // specific core).
+ bool UseHyperThreads = true;
+
+ /// Retrieves the max available threads for the current strategy. This
+ /// accounts for affinity masks and takes advantage of all CPU sockets.
+ unsigned compute_thread_count() const;
+
+ /// Assign the current thread to an ideal hardware CPU. In a multi-socket
+ /// system, this ensures all CPU sockets are used. \p ThreadNumber
+ /// represents a number between 0 and max threads available for a CPU or a
+ /// NUMA group. \p ThreadCount can be either 0 (auto-detect max) or a fixed
+ /// number of threads to be used. See the ThreadPoolStrategy above.
+ void apply_thread_strategy(unsigned ThreadPoolNum) const;
+ };
+
+ /// Returns a thread strategy for tasks requiring significant memory or other
+ /// resources. To be used for workloads where hardware_concurrency() proves to
+ /// be less efficient. Avoid this strategy if doing lots of I/O. Currently
+ /// based on physical cores, if available for the host system, otherwise falls
+ /// back to hardware_concurrency(). Returns 1 when LLVM is configured with
+ /// LLVM_ENABLE_THREADS = OFF
+ inline ThreadPoolStrategy
+ heavyweight_hardware_concurrency(unsigned ThreadCount = 0) {
+ ThreadPoolStrategy S;
+ S.UseHyperThreads = false;
+ S.ThreadsRequested = ThreadCount;
+ return S;
+ }
/// Get the number of threads that the current program can execute
- /// concurrently. On some systems std::thread::hardware_concurrency() returns
- /// the total number of cores, without taking affinity into consideration.
- /// Returns 1 when LLVM is configured with LLVM_ENABLE_THREADS=OFF.
- /// Fallback to std::thread::hardware_concurrency() if sched_getaffinity is
- /// not available.
- unsigned hardware_concurrency();
+ /// concurrently. Returns the default thread strategy where all available
+ /// hardware ressources are to be used, except for those initially excluded by
+ /// an affinity mask. On some systems std::thread::hardware_concurrency()
+ /// returns the total number of cores, without taking affinity into
+ /// consideration. Returns 1 when LLVM is configured with
+ /// LLVM_ENABLE_THREADS=OFF.
+ inline ThreadPoolStrategy hardware_concurrency(unsigned ThreadCount = 0) {
+ ThreadPoolStrategy S;
+ S.ThreadsRequested = ThreadCount;
+ return S;
+ }
/// Return the current thread id, as used in various OS system calls.
/// Note that not all platforms guarantee that the value returned will be
@@ -184,6 +222,14 @@
/// the operation succeeded or failed is returned.
void get_thread_name(SmallVectorImpl<char> &Name);
+ /// Returns a mask that represents on which hardware thread, core, CPU, NUMA
+ /// group, the calling thread can be executed. On Windows, threads cannot
+ /// cross CPU boundaries.
+ llvm::BitVector get_thread_affinity_mask();
+
+ /// Returns how many physical CPUs or NUMA groups the system has.
+ unsigned get_cpus();
+
enum class ThreadPriority {
Background = 0,
Default = 1,
Index: llvm/include/llvm/Support/ThreadPool.h
===================================================================
--- llvm/include/llvm/Support/ThreadPool.h
+++ llvm/include/llvm/Support/ThreadPool.h
@@ -13,7 +13,9 @@
#ifndef LLVM_SUPPORT_THREAD_POOL_H
#define LLVM_SUPPORT_THREAD_POOL_H
+#include "llvm/ADT/BitVector.h"
#include "llvm/Config/llvm-config.h"
+#include "llvm/Support/Threading.h"
#include "llvm/Support/thread.h"
#include <future>
@@ -38,12 +40,11 @@
using TaskTy = std::function<void()>;
using PackagedTaskTy = std::packaged_task<void()>;
- /// Construct a pool with the number of threads found by
- /// hardware_concurrency().
- ThreadPool();
-
- /// Construct a pool of \p ThreadCount threads
- ThreadPool(unsigned ThreadCount);
+ /// Construct a pool using the hardware strategy \p S for mapping hardware
+ /// execution resources (threads, cores, CPUs)
+ /// Defaults to using the maximum execution resources in the system, but
+ /// excluding any resources contained in the affinity mask.
+ ThreadPool(ThreadPoolStrategy S = hardware_concurrency());
/// Blocking destructor: the pool will wait for all the threads to complete.
~ThreadPool();
@@ -68,6 +69,8 @@
/// It is an error to try to add new tasks while blocking on this call.
void wait();
+ unsigned getThreadCount() const { return ThreadCount; }
+
private:
/// Asynchronous submission of a task to the pool. The returned future can be
/// used to wait for the task to finish and is *non-blocking* on destruction.
@@ -94,6 +97,8 @@
/// Signal for the destruction of the pool, asking thread to exit.
bool EnableFlag;
#endif
+
+ unsigned ThreadCount;
};
}
Index: llvm/include/llvm/LTO/LTO.h
===================================================================
--- llvm/include/llvm/LTO/LTO.h
+++ llvm/include/llvm/LTO/LTO.h
@@ -227,7 +227,8 @@
AddStreamFn AddStream, NativeObjectCache Cache)>;
/// This ThinBackend runs the individual backend jobs in-process.
-ThinBackend createInProcessThinBackend(unsigned ParallelismLevel);
+/// The default value means to use one job per hardware core (not hyper-thread).
+ThinBackend createInProcessThinBackend(unsigned ParallelismLevel = 0);
/// This ThinBackend writes individual module indexes to files, instead of
/// running the individual backend jobs. This backend is for distributed builds
Index: llvm/include/llvm/ADT/SmallBitVector.h
===================================================================
--- llvm/include/llvm/ADT/SmallBitVector.h
+++ llvm/include/llvm/ADT/SmallBitVector.h
@@ -662,6 +662,16 @@
getPointer()->clearBitsNotInMask(Mask, MaskWords);
}
+ void invalid() {
+ assert(empty());
+ X = (uintptr_t)-1;
+ }
+ bool isInvalid() const { return X == (uintptr_t)-1; }
+
+ ArrayRef<uintptr_t> getData() const {
+ return isSmall() ? makeArrayRef(X) : getPointer()->getData();
+ }
+
private:
template <bool AddBits, bool InvertMask>
void applyMask(const uint32_t *Mask, unsigned MaskWords) {
@@ -699,6 +709,23 @@
return Result;
}
+template <> struct DenseMapInfo<SmallBitVector> {
+ static inline SmallBitVector getEmptyKey() { return SmallBitVector(); }
+ static inline SmallBitVector getTombstoneKey() {
+ SmallBitVector V;
+ V.invalid();
+ return V;
+ }
+ static unsigned getHashValue(const BitVector &V) {
+ return DenseMapInfo<std::pair<unsigned, ArrayRef<uintptr_t>>>::getHashValue(
+ std::make_pair(V.size(), V.getData()));
+ }
+ static bool isEqual(const SmallBitVector &LHS, const SmallBitVector &RHS) {
+ if (LHS.isInvalid() || RHS.isInvalid())
+ return LHS.isInvalid() == RHS.isInvalid();
+ return LHS == RHS;
+ }
+};
} // end namespace llvm
namespace std {
Index: llvm/include/llvm/ADT/BitVector.h
===================================================================
--- llvm/include/llvm/ADT/BitVector.h
+++ llvm/include/llvm/ADT/BitVector.h
@@ -14,6 +14,7 @@
#define LLVM_ADT_BITVECTOR_H
#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/DenseMapInfo.h"
#include "llvm/ADT/iterator_range.h"
#include "llvm/Support/MathExtras.h"
#include <algorithm>
@@ -758,6 +759,14 @@
std::swap(Size, RHS.Size);
}
+ void invalid() {
+ assert(!Size && Bits.empty());
+ Size = (unsigned)-1;
+ }
+ bool isInvalid() const { return Size == (unsigned)-1; }
+
+ ArrayRef<BitWord> getData() const { return Bits; }
+
//===--------------------------------------------------------------------===//
// Portable bit mask operations.
//===--------------------------------------------------------------------===//
@@ -932,6 +941,23 @@
return X.getMemorySize();
}
+template <> struct DenseMapInfo<BitVector> {
+ static inline BitVector getEmptyKey() { return BitVector(); }
+ static inline BitVector getTombstoneKey() {
+ BitVector V;
+ V.invalid();
+ return V;
+ }
+ static unsigned getHashValue(const BitVector &V) {
+ return DenseMapInfo<std::pair<unsigned, ArrayRef<uintptr_t>>>::getHashValue(
+ std::make_pair(V.size(), V.getData()));
+ }
+ static bool isEqual(const BitVector &LHS, const BitVector &RHS) {
+ if (LHS.isInvalid() || RHS.isInvalid())
+ return LHS.isInvalid() == RHS.isInvalid();
+ return LHS == RHS;
+ }
+};
} // end namespace llvm
namespace std {
Index: lld/ELF/SyntheticSections.cpp
===================================================================
--- lld/ELF/SyntheticSections.cpp
+++ lld/ELF/SyntheticSections.cpp
@@ -2668,8 +2668,8 @@
size_t numShards = 32;
size_t concurrency = 1;
if (threadsEnabled)
- concurrency =
- std::min<size_t>(PowerOf2Floor(hardware_concurrency()), numShards);
+ concurrency = std::min<size_t>(
+ hardware_concurrency().compute_thread_count(), numShards);
// A sharded map to uniquify symbols by name.
std::vector<DenseMap<CachedHashStringRef, size_t>> map(numShards);
@@ -3112,8 +3112,8 @@
// operations in the following tight loop.
size_t concurrency = 1;
if (threadsEnabled)
- concurrency =
- std::min<size_t>(PowerOf2Floor(hardware_concurrency()), numShards);
+ concurrency = std::min<size_t>(
+ hardware_concurrency().compute_thread_count(), numShards);
// Add section pieces to the builders.
parallelForEachN(0, concurrency, [&](size_t threadId) {
Index: clang/tools/clang-scan-deps/ClangScanDeps.cpp
===================================================================
--- clang/tools/clang-scan-deps/ClangScanDeps.cpp
+++ clang/tools/clang-scan-deps/ClangScanDeps.cpp
@@ -17,6 +17,7 @@
#include "llvm/Support/InitLLVM.h"
#include "llvm/Support/Program.h"
#include "llvm/Support/Signals.h"
+#include "llvm/Support/ThreadPool.h"
#include "llvm/Support/Threading.h"
#include <mutex>
#include <thread>
@@ -299,14 +300,9 @@
DependencyScanningService Service(ScanMode, Format, ReuseFileManager,
SkipExcludedPPRanges);
-#if LLVM_ENABLE_THREADS
- unsigned NumWorkers =
- NumThreads == 0 ? llvm::hardware_concurrency() : NumThreads;
-#else
- unsigned NumWorkers = 1;
-#endif
+ llvm::ThreadPool Pool(llvm::hardware_concurrency(NumThreads));
std::vector<std::unique_ptr<DependencyScanningTool>> WorkerTools;
- for (unsigned I = 0; I < NumWorkers; ++I)
+ for (unsigned I = 0; I < Pool.getThreadCount(); ++I)
WorkerTools.push_back(std::make_unique<DependencyScanningTool>(Service));
std::vector<SingleCommandCompilationDatabase> Inputs;
@@ -314,18 +310,17 @@
AdjustingCompilations->getAllCompileCommands())
Inputs.emplace_back(Cmd);
- std::vector<std::thread> WorkerThreads;
std::atomic<bool> HadErrors(false);
std::mutex Lock;
size_t Index = 0;
if (Verbose) {
llvm::outs() << "Running clang-scan-deps on " << Inputs.size()
- << " files using " << NumWorkers << " workers\n";
+ << " files using " << Pool.getThreadCount() << " workers\n";
}
- for (unsigned I = 0; I < NumWorkers; ++I) {
- auto Worker = [I, &Lock, &Index, &Inputs, &HadErrors, &WorkerTools,
- &DependencyOS, &Errs]() {
+ for (unsigned I = 0; I < Pool.getThreadCount(); ++I) {
+ Pool.async([I, &Lock, &Index, &Inputs, &HadErrors, &WorkerTools,
+ &DependencyOS, &Errs]() {
while (true) {
const SingleCommandCompilationDatabase *Input;
std::string Filename;
@@ -345,16 +340,9 @@
if (handleDependencyToolResult(Filename, MaybeFile, DependencyOS, Errs))
HadErrors = true;
}
- };
-#if LLVM_ENABLE_THREADS
- WorkerThreads.emplace_back(std::move(Worker));
-#else
- // Run the worker without spawning a thread when threads are disabled.
- Worker();
-#endif
+ });
}
- for (auto &W : WorkerThreads)
- W.join();
+ Pool.wait();
return HadErrors;
}
Index: clang/lib/Tooling/DependencyScanning/DependencyScanningFilesystem.cpp
===================================================================
--- clang/lib/Tooling/DependencyScanning/DependencyScanningFilesystem.cpp
+++ clang/lib/Tooling/DependencyScanning/DependencyScanningFilesystem.cpp
@@ -106,7 +106,8 @@
// sharding gives a performance edge by reducing the lock contention.
// FIXME: A better heuristic might also consider the OS to account for
// the different cost of lock contention on different OSes.
- NumShards = std::max(2u, llvm::hardware_concurrency() / 4);
+ NumShards =
+ std::max(2u, llvm::hardware_concurrency().compute_thread_count() / 4);
CacheShards = std::make_unique<CacheShard[]>(NumShards);
}
Index: clang/lib/Tooling/AllTUsExecution.cpp
===================================================================
--- clang/lib/Tooling/AllTUsExecution.cpp
+++ clang/lib/Tooling/AllTUsExecution.cpp
@@ -114,8 +114,7 @@
auto &Action = Actions.front();
{
- llvm::ThreadPool Pool(ThreadCount == 0 ? llvm::hardware_concurrency()
- : ThreadCount);
+ llvm::ThreadPool Pool(llvm::hardware_concurrency(ThreadCount));
for (std::string File : Files) {
Pool.async(
[&](std::string Path) {
Index: clang-tools-extra/clangd/index/BackgroundRebuild.h
===================================================================
--- clang-tools-extra/clangd/index/BackgroundRebuild.h
+++ clang-tools-extra/clangd/index/BackgroundRebuild.h
@@ -49,7 +49,9 @@
public:
BackgroundIndexRebuilder(SwapIndex *Target, FileSymbols *Source,
unsigned Threads)
- : TUsBeforeFirstBuild(Threads), Target(Target), Source(Source) {}
+ : TUsBeforeFirstBuild(llvm::heavyweight_hardware_concurrency(Threads)
+ .compute_thread_count()),
+ Target(Target), Source(Source) {}
// Called to indicate a TU has been indexed.
// May rebuild, if enough TUs have been indexed.
Index: clang-tools-extra/clangd/index/Background.h
===================================================================
--- clang-tools-extra/clangd/index/Background.h
+++ clang-tools-extra/clangd/index/Background.h
@@ -117,11 +117,10 @@
/// If BuildIndexPeriodMs is greater than 0, the symbol index will only be
/// rebuilt periodically (one per \p BuildIndexPeriodMs); otherwise, index is
/// rebuilt for each indexed file.
- BackgroundIndex(
- Context BackgroundContext, const FileSystemProvider &,
- const GlobalCompilationDatabase &CDB,
- BackgroundIndexStorage::Factory IndexStorageFactory,
- size_t ThreadPoolSize = llvm::heavyweight_hardware_concurrency());
+ BackgroundIndex(Context BackgroundContext, const FileSystemProvider &,
+ const GlobalCompilationDatabase &CDB,
+ BackgroundIndexStorage::Factory IndexStorageFactory,
+ size_t ThreadPoolSize = 0); // 0 = use all hardware threads
~BackgroundIndex(); // Blocks while the current task finishes.
// Enqueue translation units for indexing.
Index: clang-tools-extra/clangd/index/Background.cpp
===================================================================
--- clang-tools-extra/clangd/index/Background.cpp
+++ clang-tools-extra/clangd/index/Background.cpp
@@ -146,9 +146,10 @@
CDB.watch([&](const std::vector<std::string> &ChangedFiles) {
enqueue(ChangedFiles);
})) {
- assert(ThreadPoolSize > 0 && "Thread pool size can't be zero.");
+ assert(Rebuilder.TUsBeforeFirstBuild > 0 &&
+ "Thread pool size can't be zero.");
assert(this->IndexStorageFactory && "Storage factory can not be null!");
- for (unsigned I = 0; I < ThreadPoolSize; ++I) {
+ for (unsigned I = 0; I < Rebuilder.TUsBeforeFirstBuild; ++I) {
ThreadPool.runAsync("background-worker-" + llvm::Twine(I + 1), [this] {
WithContext Ctx(this->BackgroundContext.clone());
Queue.work([&] { Rebuilder.idle(); });
Index: clang-tools-extra/clangd/TUScheduler.cpp
===================================================================
--- clang-tools-extra/clangd/TUScheduler.cpp
+++ clang-tools-extra/clangd/TUScheduler.cpp
@@ -824,13 +824,7 @@
} // namespace
unsigned getDefaultAsyncThreadsCount() {
- unsigned HardwareConcurrency = llvm::heavyweight_hardware_concurrency();
- // heavyweight_hardware_concurrency may fall back to hardware_concurrency.
- // C++ standard says that hardware_concurrency() may return 0; fallback to 1
- // worker thread in that case.
- if (HardwareConcurrency == 0)
- return 1;
- return HardwareConcurrency;
+ return llvm::heavyweight_hardware_concurrency().compute_thread_count();
}
FileStatus TUStatus::render(PathRef File) const {
Index: clang-tools-extra/clang-doc/tool/ClangDocMain.cpp
===================================================================
--- clang-tools-extra/clang-doc/tool/ClangDocMain.cpp
+++ clang-tools-extra/clang-doc/tool/ClangDocMain.cpp
@@ -268,8 +268,7 @@
Error = false;
llvm::sys::Mutex IndexMutex;
// ExecutorConcurrency is a flag exposed by AllTUsExecution.h
- llvm::ThreadPool Pool(ExecutorConcurrency == 0 ? llvm::hardware_concurrency()
- : ExecutorConcurrency);
+ llvm::ThreadPool Pool(llvm::hardware_concurrency(ExecutorConcurrency));
for (auto &Group : USRToBitcode) {
Pool.async([&]() {
std::vector<std::unique_ptr<doc::Info>> Infos;
_______________________________________________
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits