common/Common.hpp | 2 +- wsd/DocumentBroker.cpp | 7 ++++++- wsd/LOOLWSD.cpp | 16 ++++++++++++---- 3 files changed, 19 insertions(+), 6 deletions(-)
New commits: commit 32fa1d95fc2ec65866d0cb47d619885182db7040 Author: Ashod Nakashian <[email protected]> AuthorDate: Fri Oct 18 08:10:12 2019 -0400 Commit: Andras Timar <[email protected]> CommitDate: Tue Oct 22 19:01:03 2019 +0200 wsd: dynamic child timeout The initial child spawning takes significantly longer than subsequent ones (for obvious reasons) and this lead to unit-tests being sensitive to the timeout we use for child spawning. Too short, and we spawn more than we want on startup, too long and crash-recovery tests fail (we don't recover fast enough, as we wait too long before spawning new children). Dynamically setting the timeout allows us to give longer timeout at startup, and reduce it afterwards. Change-Id: I8423f5c6619e57030ab43d519aaa41d8712c36d3 Reviewed-on: https://gerrit.libreoffice.org/81194 Reviewed-by: Andras Timar <[email protected]> Tested-by: Andras Timar <[email protected]> diff --git a/common/Common.hpp b/common/Common.hpp index 599de0779..84b5eaae2 100644 --- a/common/Common.hpp +++ b/common/Common.hpp @@ -15,7 +15,7 @@ constexpr int DEFAULT_CLIENT_PORT_NUMBER = 9980; constexpr int DEFAULT_MASTER_PORT_NUMBER = 9981; constexpr int COMMAND_TIMEOUT_MS = 5000; -constexpr int CHILD_TIMEOUT_MS = COMMAND_TIMEOUT_MS * 2; +constexpr int CHILD_TIMEOUT_MS = COMMAND_TIMEOUT_MS; constexpr int CHILD_REBALANCE_INTERVAL_MS = CHILD_TIMEOUT_MS / 10; constexpr int POLL_TIMEOUT_MS = COMMAND_TIMEOUT_MS / 5; constexpr int WS_SEND_TIMEOUT_MS = 1000; diff --git a/wsd/DocumentBroker.cpp b/wsd/DocumentBroker.cpp index 657ea999b..0c423bc52 100644 --- a/wsd/DocumentBroker.cpp +++ b/wsd/DocumentBroker.cpp @@ -373,7 +373,12 @@ void DocumentBroker::pollThread() } // Flush socket data first. - const int flushTimeoutMs = POLL_TIMEOUT_MS * 2; // ~1000ms + constexpr int flushTimeoutMs = POLL_TIMEOUT_MS * 2; // ~1000ms + LOG_INF("Flushing socket for doc [" + << _docKey << "] for " << flushTimeoutMs << " ms. stop: " << _stop + << ", continuePolling: " << _poll->continuePolling() << ", ShutdownRequestFlag: " + << ShutdownRequestFlag << ", TerminationFlag: " << TerminationFlag + << ". Terminating child with reason: [" << _closeReason << "]."); const auto flushStartTime = std::chrono::steady_clock::now(); while (_poll->getSocketCount()) { diff --git a/wsd/LOOLWSD.cpp b/wsd/LOOLWSD.cpp index 5b8f43949..af101523c 100644 --- a/wsd/LOOLWSD.cpp +++ b/wsd/LOOLWSD.cpp @@ -206,6 +206,8 @@ extern "C" { void dump_state(void); /* easy for gdb */ } static int careerSpanMs = 0; #endif +/// The timeout for a child to spawn, initially high, then reset to the default. +int ChildSpawnTimeoutMs = CHILD_TIMEOUT_MS * 4; bool LOOLWSD::NoCapsForKit = false; bool LOOLWSD::TileCachePersistent = true; std::atomic<unsigned> LOOLWSD::NumConnections; @@ -422,7 +424,7 @@ static int rebalanceChildren(int balance) const auto duration = (std::chrono::steady_clock::now() - LastForkRequestTime); const std::chrono::milliseconds::rep durationMs = std::chrono::duration_cast<std::chrono::milliseconds>(duration).count(); - if (OutstandingForks != 0 && durationMs >= CHILD_TIMEOUT_MS) + if (OutstandingForks != 0 && durationMs >= ChildSpawnTimeoutMs) { // Children taking too long to spawn. // Forget we had requested any, and request anew. @@ -504,7 +506,7 @@ std::shared_ptr<ChildProcess> getNewChild_Blocks( } // With valgrind we need extended time to spawn kits. - const size_t timeoutMs = CHILD_TIMEOUT_MS / 2; + const size_t timeoutMs = ChildSpawnTimeoutMs / 2; LOG_TRC("Waiting for a new child for a max of " << timeoutMs << " ms."); const auto timeout = std::chrono::milliseconds(timeoutMs); #else @@ -1642,7 +1644,10 @@ bool LOOLWSD::createForKit() Admin::instance().setForKitPid(ForKitProcId); Admin::instance().setForKitWritePipe(ForKitWritePipe); - rebalanceChildren(LOOLWSD::NumPreSpawnedChildren - 1); + const int balance = LOOLWSD::NumPreSpawnedChildren - OutstandingForks; + if (balance > 0) + rebalanceChildren(balance); + return ForKitProcId != -1; #endif } @@ -3191,7 +3196,7 @@ int LOOLWSD::innerMain() } else { - const int timeoutMs = CHILD_TIMEOUT_MS * (LOOLWSD::NoCapsForKit ? 150 : 50); + const int timeoutMs = ChildSpawnTimeoutMs * (LOOLWSD::NoCapsForKit ? 150 : 50); const auto timeout = std::chrono::milliseconds(timeoutMs); LOG_TRC("Waiting for a new child for a max of " << timeoutMs << " ms."); if (!NewChildrenCV.wait_for(lock, timeout, []() { return !NewChildren.empty(); })) @@ -3229,6 +3234,9 @@ int LOOLWSD::innerMain() std::cerr << "Ready to accept connections on port " << ClientPortNumber << ".\n" << std::endl; #endif + // Reset the child-spawn timeout to the default, now that we're set. + ChildSpawnTimeoutMs = CHILD_TIMEOUT_MS; + const auto startStamp = std::chrono::steady_clock::now(); while (!TerminationFlag && !ShutdownRequestFlag) _______________________________________________ Libreoffice-commits mailing list [email protected] https://lists.freedesktop.org/mailman/listinfo/libreoffice-commits
