Juan Hernandez has uploaded a new change for review. Change subject: core: Make connection timeout configurable ......................................................................
core: Make connection timeout configurable The timeout that we currently use to connect to hosts is by default very large, as it is the same we use for VDSM operations. The default is 180 seconds but waiting that long for connections is not very reasonable, as establishing a connection is a quick operation, specially in a high speed local network. This large timeout severely affects the high availability support, as it means that we need to wait at least 6 minutes (we need at least 2 failures in a row) when a host fails and starts dropping packets before we can declare it failed and start the recovery process. This patch introduces two new parameters that control the time that it takes to react when there are connection issues: 1. vdsConnectionTimeout (default is 3 minutes) Timeout for establishment of connections with hosts. This should be quite small, a few secones at most, as it the TCP handshake with hosts should be very quick in most networks. At the moment we are using 3 minutes in order to preserve the behaviour before this modification, but this will probably be changed to a much smaller value (2 seconds, for example) in the future. 2. vdsRetries (default is 3) The number of times to retry host operations when there are IO errors. The default value is 3 to match the default value of the XML RPC library that we use, so there is no change in behaviour. Bug-Url: https://bugzilla.redhat.com/863211 Change-Id: I55a49ae5d655c2105c5840decec81ae712e40c32 Signed-off-by: Juan Hernandez <juan.hernan...@redhat.com> --- M backend/manager/dbscripts/upgrade/pre_upgrade/0000_config.sql M backend/manager/modules/common/src/main/java/org/ovirt/engine/core/common/config/ConfigValues.java M backend/manager/modules/vdsbroker/src/main/java/org/ovirt/engine/core/vdsbroker/VdsManager.java M backend/manager/modules/vdsbroker/src/main/java/org/ovirt/engine/core/vdsbroker/irsbroker/IrsBrokerCommand.java M backend/manager/modules/vdsbroker/src/main/java/org/ovirt/engine/core/vdsbroker/xmlrpc/XmlRpcUtils.java M backend/manager/tools/engine-config/src/main/resources/engine-config.properties 6 files changed, 98 insertions(+), 7 deletions(-) git pull ssh://gerrit.ovirt.org:29418/ovirt-engine refs/changes/90/9190/1 diff --git a/backend/manager/dbscripts/upgrade/pre_upgrade/0000_config.sql b/backend/manager/dbscripts/upgrade/pre_upgrade/0000_config.sql index 785214f..f58313e 100644 --- a/backend/manager/dbscripts/upgrade/pre_upgrade/0000_config.sql +++ b/backend/manager/dbscripts/upgrade/pre_upgrade/0000_config.sql @@ -516,6 +516,8 @@ select fn_db_add_config_value('VdcVersion','3.0.0.0','general'); select fn_db_add_config_value('VDSAttemptsToResetCount','2','general'); select fn_db_add_config_value('VdsCertificateValidityInYears','5','general'); +select fn_db_add_config_value('vdsConnectionTimeout','180','3.0'); +select fn_db_add_config_value('vdsConnectionTimeout','180','3.1'); select fn_db_add_config_value('VdsFenceOptionMapping','alom:secure=secure,port=ipport;apc:secure=secure,port=ipport,slot=port;bladecenter:secure=secure,port=ipport,slot=port;drac5:secure=secure,port=ipport;eps:slot=port;ilo:secure=ssl,port=ipport;ipmilan:;rsa:secure=secure,port=ipport;rsb:;wti:secure=secure,port=ipport,slot=port;cisco_ucs:secure=ssl,slot=port','general'); select fn_db_add_config_value('VdsFenceOptions','','general'); select fn_db_add_config_value('VdsFenceOptionTypes','secure=bool,port=int,slot=int','general'); @@ -528,8 +530,11 @@ select fn_db_add_config_value('VdsLocalDisksLowFreeSpace','500','general'); select fn_db_add_config_value('VdsRecoveryTimeoutInMintues','3','general'); select fn_db_add_config_value('VdsRefreshRate','2','general'); +select fn_db_add_config_value('vdsRetries','3','3.0'); +select fn_db_add_config_value('vdsRetries','3','3.1'); --Handling Host Selection Algorithm default for cluster select fn_db_add_config_value('VdsSelectionAlgorithm','None','general'); +--Timeout Policy select fn_db_add_config_value('vdsTimeout','180','general'); --Handling Virtual Machine Domain Name select fn_db_add_config_value('VirtualMachineDomainName','VirtualMachineDomainName','general'); @@ -639,6 +644,8 @@ select fn_db_update_default_config_value('oVirtISOsRepositoryPath','ovirt-isos', '/usr/share/rhev-hypervisor','general',false); select fn_db_update_default_config_value('VdsLocalDisksCriticallyLowFreeSpace','100','500','general',false); select fn_db_update_default_config_value('VdsLocalDisksLowFreeSpace','500', '1000','general',false); +--Placeholder for future change +select fn_db_update_default_config_value('vdsConnectionTimeout','180','180','3.1',false); ------------------------------------------------------------------------------------ -- Cleanup deprecated configuration values section ------------------------------------------------------------------------------------ diff --git a/backend/manager/modules/common/src/main/java/org/ovirt/engine/core/common/config/ConfigValues.java b/backend/manager/modules/common/src/main/java/org/ovirt/engine/core/common/config/ConfigValues.java index d1d65b1..48555a9 100644 --- a/backend/manager/modules/common/src/main/java/org/ovirt/engine/core/common/config/ConfigValues.java +++ b/backend/manager/modules/common/src/main/java/org/ovirt/engine/core/common/config/ConfigValues.java @@ -38,9 +38,38 @@ @TypeConverterAttribute(String.class) @DefaultValueAttribute("EXAMPLE.COM") DomainName(10), + + /** + * Timeout in seconds for the completion of calls to VDSM. It should + * be quite large as some host operations can take more than 3 + * minutes to complete. + */ @TypeConverterAttribute(Integer.class) @DefaultValueAttribute("180") vdsTimeout(11), + + /** + * The number of times to retry host operations when there are IO errors. + * The default value is 3 to match the default value of the XML RPC library + * that we use, so there is no change in behaviour. + */ + @TypeConverterAttribute(Integer.class) + @DefaultValueAttribute("3") + vdsRetries(393), + + /** + * Timeout for establishment of connections with hosts. This should be quite + * small, a few secones at most, as it the TCP handshake with hosts should + * be very quick in most networks. + * + * At the moment we are using 3 minutes in order to preserve the behaviour + * before this modification, but this will probably be changed to a much + * smaller value (2 seconds, for example) in the future. + */ + @TypeConverterAttribute(Integer.class) + @DefaultValueAttribute("180") + vdsConnectionTimeout(394), + @TypeConverterAttribute(Integer.class) @DefaultValueAttribute("2") VdsRefreshRate(12), diff --git a/backend/manager/modules/vdsbroker/src/main/java/org/ovirt/engine/core/vdsbroker/VdsManager.java b/backend/manager/modules/vdsbroker/src/main/java/org/ovirt/engine/core/vdsbroker/VdsManager.java index 6353288..d5ad4e3 100644 --- a/backend/manager/modules/vdsbroker/src/main/java/org/ovirt/engine/core/vdsbroker/VdsManager.java +++ b/backend/manager/modules/vdsbroker/src/main/java/org/ovirt/engine/core/vdsbroker/VdsManager.java @@ -173,11 +173,25 @@ private void InitVdsBroker() { log.infoFormat("vdsBroker({0},{1})", _vds.gethost_name(), _vds.getport()); - int clientTimeOut = Config.<Integer> GetValue(ConfigValues.vdsTimeout) * 1000; + + // Get the compatibility version of the cluster, as the values of the + // timeouts used depend on that: + String clusterVersion = Config.DefaultConfigurationVersion; + if (_vds != null) { + clusterVersion = _vds.getvds_group_compatibility_version().getValue(); + } + + // Find the version specific timeouts: + int clientTimeOut = Config.<Integer> GetValue(ConfigValues.vdsTimeout, clusterVersion) * 1000; + int connectionTimeOut = Config.<Integer>GetValue(ConfigValues.vdsConnectionTimeout, clusterVersion) * 1000; + int clientRetries = Config.<Integer>GetValue(ConfigValues.vdsRetries, clusterVersion); + Pair<VdsServerConnector, HttpClient> returnValue = XmlRpcUtils.getConnection(_vds.gethost_name(), _vds.getport(), clientTimeOut, + connectionTimeOut, + clientRetries, VdsServerConnector.class, Config.<Boolean> GetValue(ConfigValues.UseSecureConnectionWithServers)); _vdsProxy = new VdsServerWrapper(returnValue.getFirst(), returnValue.getSecond()); diff --git a/backend/manager/modules/vdsbroker/src/main/java/org/ovirt/engine/core/vdsbroker/irsbroker/IrsBrokerCommand.java b/backend/manager/modules/vdsbroker/src/main/java/org/ovirt/engine/core/vdsbroker/irsbroker/IrsBrokerCommand.java index 9748caa..e139949 100644 --- a/backend/manager/modules/vdsbroker/src/main/java/org/ovirt/engine/core/vdsbroker/irsbroker/IrsBrokerCommand.java +++ b/backend/manager/modules/vdsbroker/src/main/java/org/ovirt/engine/core/vdsbroker/irsbroker/IrsBrokerCommand.java @@ -546,11 +546,27 @@ }); if (host != null) { - int clientTimeOut = Config.<Integer> GetValue(ConfigValues.vdsTimeout) * 1000; + // Get the compatibility version of the cluster, as the values of the + // timeouts used depend on that: + String clusterVersion = Config.DefaultConfigurationVersion; + if (mCurrentVdsId != null) { + VDS vds = DbFacade.getInstance().getVdsDao().get(mCurrentVdsId); + if (vds != null) { + clusterVersion = vds.getvds_group_compatibility_version().getValue(); + } + } + + // Find the version specific timeouts: + int clientTimeOut = Config.<Integer> GetValue(ConfigValues.vdsTimeout, clusterVersion) * 1000; + int connectionTimeOut = Config.<Integer>GetValue(ConfigValues.vdsConnectionTimeout, clusterVersion) * 1000; + int clientRetries = Config.<Integer> GetValue(ConfigValues.vdsRetries, clusterVersion); + Pair<IrsServerConnector, HttpClient> returnValue = XmlRpcUtils.getConnection(host, getmIrsPort(), clientTimeOut, + connectionTimeOut, + clientRetries, IrsServerConnector.class, Config.<Boolean> GetValue(ConfigValues.UseSecureConnectionWithServers)); privatemIrsProxy = new IrsServerWrapper(returnValue.getFirst(), returnValue.getSecond()); diff --git a/backend/manager/modules/vdsbroker/src/main/java/org/ovirt/engine/core/vdsbroker/xmlrpc/XmlRpcUtils.java b/backend/manager/modules/vdsbroker/src/main/java/org/ovirt/engine/core/vdsbroker/xmlrpc/XmlRpcUtils.java index 817a3f9..e28f9b1 100644 --- a/backend/manager/modules/vdsbroker/src/main/java/org/ovirt/engine/core/vdsbroker/xmlrpc/XmlRpcUtils.java +++ b/backend/manager/modules/vdsbroker/src/main/java/org/ovirt/engine/core/vdsbroker/xmlrpc/XmlRpcUtils.java @@ -11,8 +11,12 @@ import java.util.concurrent.TimeUnit; import java.util.concurrent.TimeoutException; +import org.apache.commons.httpclient.DefaultHttpMethodRetryHandler; import org.apache.commons.httpclient.HttpClient; +import org.apache.commons.httpclient.HttpMethodRetryHandler; import org.apache.commons.httpclient.MultiThreadedHttpConnectionManager; +import org.apache.commons.httpclient.params.HttpClientParams; +import org.apache.commons.httpclient.params.HttpMethodParams; import org.apache.commons.httpclient.protocol.Protocol; import org.apache.commons.httpclient.protocol.ProtocolSocketFactory; import org.apache.commons.lang.StringUtils; @@ -78,7 +82,7 @@ * @return an instance of the given type. */ public static <T> Pair<T, HttpClient> getConnection(String hostName, int port, int clientTimeOut, - Class<T> type, boolean isSecure) { + int connectionTimeOut, int clientRetries, Class<T> type, boolean isSecure) { URL serverUrl; String prefix; if (isSecure) { @@ -92,7 +96,7 @@ log.error("failed to forme the xml-rpc url", mfue); return null; } - return getHttpConnection(serverUrl, clientTimeOut, type); + return getHttpConnection(serverUrl, clientTimeOut, connectionTimeOut, clientRetries, type); } public static void shutDownConnection(HttpClient httpClient) { @@ -103,16 +107,18 @@ @SuppressWarnings("unchecked") private static <T> Pair<T, HttpClient> getHttpConnection(URL serverUrl, int clientTimeOut, - Class<T> type) { + int connectionTimeOut, int clientRetries, Class<T> type) { XmlRpcClientConfigImpl config = new XmlRpcClientConfigImpl(); config.setServerURL(serverUrl); - config.setConnectionTimeout(clientTimeOut); + config.setReplyTimeout(clientTimeOut); + config.setConnectionTimeout(connectionTimeOut); + XmlRpcClient xmlRpcClient = new XmlRpcClient(); xmlRpcClient.setConfig(config); XmlRpcCommonsTransportFactory transportFactory = new CustomXmlRpcCommonsTransportFactory(xmlRpcClient); - HttpClient httpclient = new HttpClient(new MultiThreadedHttpConnectionManager()); + HttpClient httpclient = createHttpClient(clientRetries); transportFactory.setHttpClient(httpclient); xmlRpcClient.setTransportFactory(transportFactory); @@ -126,6 +132,21 @@ return returnValue; } + private static HttpClient createHttpClient(int clientRetries) { + // Create the client: + HttpClient client = new HttpClient(new MultiThreadedHttpConnectionManager()); + + // Configure the HTTP client so it will retry the execution of + // methods when there are IO errors: + int retries = Config.<Integer> GetValue(ConfigValues.vdsRetries); + HttpMethodRetryHandler handler = new DefaultHttpMethodRetryHandler(retries, false); + HttpClientParams parameters = client.getParams(); + parameters.setParameter(HttpMethodParams.RETRY_HANDLER, handler); + + // Done: + return client; + } + private static class AsyncProxy implements InvocationHandler { private Object obj; diff --git a/backend/manager/tools/engine-config/src/main/resources/engine-config.properties b/backend/manager/tools/engine-config/src/main/resources/engine-config.properties index 68b4ad2..f6b82a9 100644 --- a/backend/manager/tools/engine-config/src/main/resources/engine-config.properties +++ b/backend/manager/tools/engine-config/src/main/resources/engine-config.properties @@ -169,6 +169,10 @@ VdsRefreshRate.type=Integer vdsTimeout.description="Host Control Communication Timeout (in seconds)" vdsTimeout.type=Integer +vdsConnectionTimeout.description="Time to wait for connection establishment with hosts (in seconds)" +vdsConnectionTimeout.type=Integer +vdsRetries.description="Number of times to retry host operations when there are communication errors" +vdsRetries.type=Integer VmGracefulShutdownMessage.description="Message displayed in Virtual Machine when Virtual Machine is being shutdown from oVirt Engine" VMMinMemorySizeInMB.description="Minimal memory size of virtual machine in MB" VMMinMemorySizeInMB.type=Integer -- To view, visit http://gerrit.ovirt.org/9190 To unsubscribe, visit http://gerrit.ovirt.org/settings Gerrit-MessageType: newchange Gerrit-Change-Id: I55a49ae5d655c2105c5840decec81ae712e40c32 Gerrit-PatchSet: 1 Gerrit-Project: ovirt-engine Gerrit-Branch: engine_3.1 Gerrit-Owner: Juan Hernandez <juan.hernan...@redhat.com> _______________________________________________ Engine-patches mailing list Engine-patches@ovirt.org http://lists.ovirt.org/mailman/listinfo/engine-patches