Juan Hernandez has uploaded a new change for review.

Change subject: core: Make connection timeout configurable
......................................................................

core: Make connection timeout configurable

The timeout that we currently use to connect to hosts is by default very
large, as it is the same we use for VDSM operations. The default is 180
seconds but waiting that long for connections is not very reasonable, as
establishing a connection is a quick operation, specially in a high speed
local network.

This large timeout severely affects the high availability support, as it
means that we need to wait at least 6 minutes (we need at least 2
failures in a row) when a host fails and starts dropping packets before
we can declare it failed and start the recovery process.

This patch introduces two new parameters that control the time that it
takes to react when there are connection issues:

1. vdsConnectionTimeout (default is 3 minutes)

Timeout for establishment of connections with hosts. This should be
quite small, a few secones at most, as it the TCP handshake with hosts
should be very quick in most networks.

At the moment we are using 3 minutes in order to preserve the behaviour
before this modification, but this will probably be changed to a much
smaller value (2 seconds, for example) in the future.

2. vdsRetries (default is 3)

The number of times to retry host operations when there are IO errors.
The default value is 3 to match the default value of the XML RPC library
that we use, so there is no change in behaviour.

Bug-Url: https://bugzilla.redhat.com/863211
Change-Id: I55a49ae5d655c2105c5840decec81ae712e40c32
Signed-off-by: Juan Hernandez <juan.hernan...@redhat.com>
---
M backend/manager/dbscripts/upgrade/pre_upgrade/0000_config.sql
M 
backend/manager/modules/common/src/main/java/org/ovirt/engine/core/common/config/ConfigValues.java
M 
backend/manager/modules/vdsbroker/src/main/java/org/ovirt/engine/core/vdsbroker/VdsManager.java
M 
backend/manager/modules/vdsbroker/src/main/java/org/ovirt/engine/core/vdsbroker/irsbroker/IrsBrokerCommand.java
M 
backend/manager/modules/vdsbroker/src/main/java/org/ovirt/engine/core/vdsbroker/xmlrpc/XmlRpcUtils.java
M 
backend/manager/tools/engine-config/src/main/resources/engine-config.properties
6 files changed, 98 insertions(+), 7 deletions(-)


  git pull ssh://gerrit.ovirt.org:29418/ovirt-engine refs/changes/90/9190/1

diff --git a/backend/manager/dbscripts/upgrade/pre_upgrade/0000_config.sql 
b/backend/manager/dbscripts/upgrade/pre_upgrade/0000_config.sql
index 785214f..f58313e 100644
--- a/backend/manager/dbscripts/upgrade/pre_upgrade/0000_config.sql
+++ b/backend/manager/dbscripts/upgrade/pre_upgrade/0000_config.sql
@@ -516,6 +516,8 @@
 select fn_db_add_config_value('VdcVersion','3.0.0.0','general');
 select fn_db_add_config_value('VDSAttemptsToResetCount','2','general');
 select fn_db_add_config_value('VdsCertificateValidityInYears','5','general');
+select fn_db_add_config_value('vdsConnectionTimeout','180','3.0');
+select fn_db_add_config_value('vdsConnectionTimeout','180','3.1');
 select 
fn_db_add_config_value('VdsFenceOptionMapping','alom:secure=secure,port=ipport;apc:secure=secure,port=ipport,slot=port;bladecenter:secure=secure,port=ipport,slot=port;drac5:secure=secure,port=ipport;eps:slot=port;ilo:secure=ssl,port=ipport;ipmilan:;rsa:secure=secure,port=ipport;rsb:;wti:secure=secure,port=ipport,slot=port;cisco_ucs:secure=ssl,slot=port','general');
 select fn_db_add_config_value('VdsFenceOptions','','general');
 select 
fn_db_add_config_value('VdsFenceOptionTypes','secure=bool,port=int,slot=int','general');
@@ -528,8 +530,11 @@
 select fn_db_add_config_value('VdsLocalDisksLowFreeSpace','500','general');
 select fn_db_add_config_value('VdsRecoveryTimeoutInMintues','3','general');
 select fn_db_add_config_value('VdsRefreshRate','2','general');
+select fn_db_add_config_value('vdsRetries','3','3.0');
+select fn_db_add_config_value('vdsRetries','3','3.1');
 --Handling Host Selection Algorithm default for cluster
 select fn_db_add_config_value('VdsSelectionAlgorithm','None','general');
+--Timeout Policy
 select fn_db_add_config_value('vdsTimeout','180','general');
 --Handling Virtual Machine Domain Name
 select 
fn_db_add_config_value('VirtualMachineDomainName','VirtualMachineDomainName','general');
@@ -639,6 +644,8 @@
 select 
fn_db_update_default_config_value('oVirtISOsRepositoryPath','ovirt-isos', 
'/usr/share/rhev-hypervisor','general',false);
 select 
fn_db_update_default_config_value('VdsLocalDisksCriticallyLowFreeSpace','100','500','general',false);
 select fn_db_update_default_config_value('VdsLocalDisksLowFreeSpace','500', 
'1000','general',false);
+--Placeholder for future change
+select 
fn_db_update_default_config_value('vdsConnectionTimeout','180','180','3.1',false);
 
------------------------------------------------------------------------------------
 --              Cleanup deprecated configuration values section
 
------------------------------------------------------------------------------------
diff --git 
a/backend/manager/modules/common/src/main/java/org/ovirt/engine/core/common/config/ConfigValues.java
 
b/backend/manager/modules/common/src/main/java/org/ovirt/engine/core/common/config/ConfigValues.java
index d1d65b1..48555a9 100644
--- 
a/backend/manager/modules/common/src/main/java/org/ovirt/engine/core/common/config/ConfigValues.java
+++ 
b/backend/manager/modules/common/src/main/java/org/ovirt/engine/core/common/config/ConfigValues.java
@@ -38,9 +38,38 @@
     @TypeConverterAttribute(String.class)
     @DefaultValueAttribute("EXAMPLE.COM")
     DomainName(10),
+
+    /**
+     * Timeout in seconds for the completion of calls to VDSM. It should
+     * be quite large as some host operations can take more than 3
+     * minutes to complete.
+     */
     @TypeConverterAttribute(Integer.class)
     @DefaultValueAttribute("180")
     vdsTimeout(11),
+
+    /**
+     * The number of times to retry host operations when there are IO errors.
+     * The default value is 3 to match the default value of the XML RPC library
+     * that we use, so there is no change in behaviour.
+     */
+    @TypeConverterAttribute(Integer.class)
+    @DefaultValueAttribute("3")
+    vdsRetries(393),
+
+    /**
+     * Timeout for establishment of connections with hosts. This should be 
quite
+     * small, a few secones at most, as it the TCP handshake with hosts should
+     * be very quick in most networks.
+     *
+     * At the moment we are using 3 minutes in order to preserve the behaviour
+     * before this modification, but this will probably be changed to a much
+     * smaller value (2 seconds, for example) in the future.
+     */
+    @TypeConverterAttribute(Integer.class)
+    @DefaultValueAttribute("180")
+    vdsConnectionTimeout(394),
+
     @TypeConverterAttribute(Integer.class)
     @DefaultValueAttribute("2")
     VdsRefreshRate(12),
diff --git 
a/backend/manager/modules/vdsbroker/src/main/java/org/ovirt/engine/core/vdsbroker/VdsManager.java
 
b/backend/manager/modules/vdsbroker/src/main/java/org/ovirt/engine/core/vdsbroker/VdsManager.java
index 6353288..d5ad4e3 100644
--- 
a/backend/manager/modules/vdsbroker/src/main/java/org/ovirt/engine/core/vdsbroker/VdsManager.java
+++ 
b/backend/manager/modules/vdsbroker/src/main/java/org/ovirt/engine/core/vdsbroker/VdsManager.java
@@ -173,11 +173,25 @@
 
     private void InitVdsBroker() {
         log.infoFormat("vdsBroker({0},{1})", _vds.gethost_name(), 
_vds.getport());
-        int clientTimeOut = Config.<Integer> GetValue(ConfigValues.vdsTimeout) 
* 1000;
+
+        // Get the compatibility version of the cluster, as the values of the
+        // timeouts used depend on that:
+        String clusterVersion = Config.DefaultConfigurationVersion;
+        if (_vds != null) {
+            clusterVersion = 
_vds.getvds_group_compatibility_version().getValue();
+        }
+
+        // Find the version specific timeouts:
+        int clientTimeOut = Config.<Integer> GetValue(ConfigValues.vdsTimeout, 
clusterVersion) * 1000;
+        int connectionTimeOut = 
Config.<Integer>GetValue(ConfigValues.vdsConnectionTimeout, clusterVersion) * 
1000;
+        int clientRetries = Config.<Integer>GetValue(ConfigValues.vdsRetries, 
clusterVersion);
+
         Pair<VdsServerConnector, HttpClient> returnValue =
                 XmlRpcUtils.getConnection(_vds.gethost_name(),
                         _vds.getport(),
                         clientTimeOut,
+                        connectionTimeOut,
+                        clientRetries,
                         VdsServerConnector.class,
                         Config.<Boolean> 
GetValue(ConfigValues.UseSecureConnectionWithServers));
         _vdsProxy = new VdsServerWrapper(returnValue.getFirst(), 
returnValue.getSecond());
diff --git 
a/backend/manager/modules/vdsbroker/src/main/java/org/ovirt/engine/core/vdsbroker/irsbroker/IrsBrokerCommand.java
 
b/backend/manager/modules/vdsbroker/src/main/java/org/ovirt/engine/core/vdsbroker/irsbroker/IrsBrokerCommand.java
index 9748caa..e139949 100644
--- 
a/backend/manager/modules/vdsbroker/src/main/java/org/ovirt/engine/core/vdsbroker/irsbroker/IrsBrokerCommand.java
+++ 
b/backend/manager/modules/vdsbroker/src/main/java/org/ovirt/engine/core/vdsbroker/irsbroker/IrsBrokerCommand.java
@@ -546,11 +546,27 @@
                     });
 
                     if (host != null) {
-                        int clientTimeOut = Config.<Integer> 
GetValue(ConfigValues.vdsTimeout) * 1000;
+                        // Get the compatibility version of the cluster, as 
the values of the
+                        // timeouts used depend on that:
+                        String clusterVersion = 
Config.DefaultConfigurationVersion;
+                        if (mCurrentVdsId != null) {
+                            VDS vds = 
DbFacade.getInstance().getVdsDao().get(mCurrentVdsId);
+                            if (vds != null) {
+                                clusterVersion = 
vds.getvds_group_compatibility_version().getValue();
+                            }
+                        }
+
+                        // Find the version specific timeouts:
+                        int clientTimeOut = Config.<Integer> 
GetValue(ConfigValues.vdsTimeout, clusterVersion) * 1000;
+                        int connectionTimeOut = 
Config.<Integer>GetValue(ConfigValues.vdsConnectionTimeout, clusterVersion) * 
1000;
+                        int clientRetries = Config.<Integer> 
GetValue(ConfigValues.vdsRetries, clusterVersion);
+
                         Pair<IrsServerConnector, HttpClient> returnValue =
                                 XmlRpcUtils.getConnection(host,
                                         getmIrsPort(),
                                         clientTimeOut,
+                                        connectionTimeOut,
+                                        clientRetries,
                                         IrsServerConnector.class,
                                         Config.<Boolean> 
GetValue(ConfigValues.UseSecureConnectionWithServers));
                         privatemIrsProxy = new 
IrsServerWrapper(returnValue.getFirst(), returnValue.getSecond());
diff --git 
a/backend/manager/modules/vdsbroker/src/main/java/org/ovirt/engine/core/vdsbroker/xmlrpc/XmlRpcUtils.java
 
b/backend/manager/modules/vdsbroker/src/main/java/org/ovirt/engine/core/vdsbroker/xmlrpc/XmlRpcUtils.java
index 817a3f9..e28f9b1 100644
--- 
a/backend/manager/modules/vdsbroker/src/main/java/org/ovirt/engine/core/vdsbroker/xmlrpc/XmlRpcUtils.java
+++ 
b/backend/manager/modules/vdsbroker/src/main/java/org/ovirt/engine/core/vdsbroker/xmlrpc/XmlRpcUtils.java
@@ -11,8 +11,12 @@
 import java.util.concurrent.TimeUnit;
 import java.util.concurrent.TimeoutException;
 
+import org.apache.commons.httpclient.DefaultHttpMethodRetryHandler;
 import org.apache.commons.httpclient.HttpClient;
+import org.apache.commons.httpclient.HttpMethodRetryHandler;
 import org.apache.commons.httpclient.MultiThreadedHttpConnectionManager;
+import org.apache.commons.httpclient.params.HttpClientParams;
+import org.apache.commons.httpclient.params.HttpMethodParams;
 import org.apache.commons.httpclient.protocol.Protocol;
 import org.apache.commons.httpclient.protocol.ProtocolSocketFactory;
 import org.apache.commons.lang.StringUtils;
@@ -78,7 +82,7 @@
      * @return an instance of the given type.
      */
     public static <T> Pair<T, HttpClient> getConnection(String hostName, int 
port, int clientTimeOut,
-            Class<T> type, boolean isSecure) {
+            int connectionTimeOut, int clientRetries, Class<T> type, boolean 
isSecure) {
         URL serverUrl;
         String prefix;
         if (isSecure) {
@@ -92,7 +96,7 @@
             log.error("failed to forme the xml-rpc url", mfue);
             return null;
         }
-        return getHttpConnection(serverUrl, clientTimeOut, type);
+        return getHttpConnection(serverUrl, clientTimeOut, connectionTimeOut, 
clientRetries, type);
     }
 
     public static void shutDownConnection(HttpClient httpClient) {
@@ -103,16 +107,18 @@
 
     @SuppressWarnings("unchecked")
     private static <T> Pair<T, HttpClient> getHttpConnection(URL serverUrl, 
int clientTimeOut,
-            Class<T> type) {
+            int connectionTimeOut, int clientRetries, Class<T> type) {
         XmlRpcClientConfigImpl config = new XmlRpcClientConfigImpl();
         config.setServerURL(serverUrl);
-        config.setConnectionTimeout(clientTimeOut);
+
         config.setReplyTimeout(clientTimeOut);
+        config.setConnectionTimeout(connectionTimeOut);
+
         XmlRpcClient xmlRpcClient = new XmlRpcClient();
         xmlRpcClient.setConfig(config);
 
         XmlRpcCommonsTransportFactory transportFactory = new 
CustomXmlRpcCommonsTransportFactory(xmlRpcClient);
-        HttpClient httpclient = new HttpClient(new 
MultiThreadedHttpConnectionManager());
+        HttpClient httpclient = createHttpClient(clientRetries);
         transportFactory.setHttpClient(httpclient);
         xmlRpcClient.setTransportFactory(transportFactory);
 
@@ -126,6 +132,21 @@
         return returnValue;
     }
 
+    private static HttpClient createHttpClient(int clientRetries) {
+        // Create the client:
+        HttpClient client = new HttpClient(new 
MultiThreadedHttpConnectionManager());
+
+        // Configure the HTTP client so it will retry the execution of
+        // methods when there are IO errors:
+        int retries = Config.<Integer> GetValue(ConfigValues.vdsRetries);
+        HttpMethodRetryHandler handler = new 
DefaultHttpMethodRetryHandler(retries, false);
+        HttpClientParams parameters = client.getParams();
+        parameters.setParameter(HttpMethodParams.RETRY_HANDLER, handler);
+
+        // Done:
+        return client;
+    }
+
     private static class AsyncProxy implements InvocationHandler {
 
         private Object obj;
diff --git 
a/backend/manager/tools/engine-config/src/main/resources/engine-config.properties
 
b/backend/manager/tools/engine-config/src/main/resources/engine-config.properties
index 68b4ad2..f6b82a9 100644
--- 
a/backend/manager/tools/engine-config/src/main/resources/engine-config.properties
+++ 
b/backend/manager/tools/engine-config/src/main/resources/engine-config.properties
@@ -169,6 +169,10 @@
 VdsRefreshRate.type=Integer
 vdsTimeout.description="Host Control Communication Timeout (in seconds)"
 vdsTimeout.type=Integer
+vdsConnectionTimeout.description="Time to wait for connection establishment 
with hosts (in seconds)"
+vdsConnectionTimeout.type=Integer
+vdsRetries.description="Number of times to retry host operations when there 
are communication errors"
+vdsRetries.type=Integer
 VmGracefulShutdownMessage.description="Message displayed in Virtual Machine 
when Virtual Machine is being shutdown from oVirt Engine"
 VMMinMemorySizeInMB.description="Minimal memory size of virtual machine in MB"
 VMMinMemorySizeInMB.type=Integer


--
To view, visit http://gerrit.ovirt.org/9190
To unsubscribe, visit http://gerrit.ovirt.org/settings

Gerrit-MessageType: newchange
Gerrit-Change-Id: I55a49ae5d655c2105c5840decec81ae712e40c32
Gerrit-PatchSet: 1
Gerrit-Project: ovirt-engine
Gerrit-Branch: engine_3.1
Gerrit-Owner: Juan Hernandez <juan.hernan...@redhat.com>
_______________________________________________
Engine-patches mailing list
Engine-patches@ovirt.org
http://lists.ovirt.org/mailman/listinfo/engine-patches

Reply via email to