Author: markt
Date: Fri Aug 24 12:12:27 2018
New Revision: 1838830

URL: http://svn.apache.org/viewvc?rev=1838830&view=rev
Log:
Fix https://bz.apache.org/bugzilla/show_bug.cgi?id=62408
Add the new load-balancer worker property lb_retries to improve the control 
over the number of retries.
Based on a patch provided by Frederik Nosi.

Modified:
    tomcat/jk/trunk/native/common/jk_ajp_common.c
    tomcat/jk/trunk/native/common/jk_ajp_common.h
    tomcat/jk/trunk/native/common/jk_lb_worker.c
    tomcat/jk/trunk/native/common/jk_lb_worker.h
    tomcat/jk/trunk/native/common/jk_service.h
    tomcat/jk/trunk/native/common/jk_util.c
    tomcat/jk/trunk/xdocs/miscellaneous/changelog.xml
    tomcat/jk/trunk/xdocs/reference/workers.xml

Modified: tomcat/jk/trunk/native/common/jk_ajp_common.c
URL: 
http://svn.apache.org/viewvc/tomcat/jk/trunk/native/common/jk_ajp_common.c?rev=1838830&r1=1838829&r2=1838830&view=diff
==============================================================================
--- tomcat/jk/trunk/native/common/jk_ajp_common.c (original)
+++ tomcat/jk/trunk/native/common/jk_ajp_common.c Fri Aug 24 12:12:27 2018
@@ -3041,6 +3041,10 @@ int ajp_init(jk_worker_t *pThis,
             jk_get_worker_retries(props, p->name,
                                   JK_RETRIES);
 
+        p->lb_retries =
+            jk_get_worker_lb_retries(props, p->name,
+                                  JK_LB_RETRIES);
+
         p->max_packet_size =
             jk_get_max_packet_size(props, p->name);
 

Modified: tomcat/jk/trunk/native/common/jk_ajp_common.h
URL: 
http://svn.apache.org/viewvc/tomcat/jk/trunk/native/common/jk_ajp_common.h?rev=1838830&r1=1838829&r2=1838830&view=diff
==============================================================================
--- tomcat/jk/trunk/native/common/jk_ajp_common.h (original)
+++ tomcat/jk/trunk/native/common/jk_ajp_common.h Fri Aug 24 12:12:27 2018
@@ -370,6 +370,13 @@ struct ajp_worker
      */
     int retries;
 
+    /*
+     * Public property used in load balancer workers, meaning
+     * the maximum number of failover attempts between ajp
+     * workers of cluster.
+     */
+    int lb_retries;
+
     unsigned int max_packet_size;  /*  Maximum AJP Packet size */
 
     int retry_interval;            /*  Number of milliseconds to sleep before 
doing a retry */

Modified: tomcat/jk/trunk/native/common/jk_lb_worker.c
URL: 
http://svn.apache.org/viewvc/tomcat/jk/trunk/native/common/jk_lb_worker.c?rev=1838830&r1=1838829&r2=1838830&view=diff
==============================================================================
--- tomcat/jk/trunk/native/common/jk_lb_worker.c (original)
+++ tomcat/jk/trunk/native/common/jk_lb_worker.c Fri Aug 24 12:12:27 2018
@@ -1225,6 +1225,7 @@ static int JK_METHOD service(jk_endpoint
     if (p->worker->sequence < p->worker->s->h.sequence)
         jk_lb_pull(p->worker, JK_FALSE, l);
     for (i = 0; i < num_of_workers; i++) {
+        jk_log(l, JK_LOG_DEBUG, "LB - num_of_workers: %d, retry: %d, 
lb_retries: %d", num_of_workers, i, p->worker->lb_retries);
         lb_sub_worker_t *rec = &(p->worker->lb_workers[i]);
         ajp_worker_t *aw = (ajp_worker_t *)rec->worker->worker_private;
         if (rec->s->state == JK_LB_STATE_BUSY) {
@@ -1272,7 +1273,10 @@ static int JK_METHOD service(jk_endpoint
                p->worker->sticky_session, sessionid ? sessionid : "empty");
 
     while (recoverable == JK_TRUE) {
-        if (attempt >= num_of_workers) {
+        if (JK_IS_DEBUG_LEVEL(l))
+            jk_log(l, JK_LOG_DEBUG, "attempt %d, max attempts %d, worker count 
%d",
+                    attempt, p->worker->lb_retries, num_of_workers);
+        if (attempt >= num_of_workers || attempt >= p->worker->lb_retries) {
             retry++;
             if (retry >= p->worker->retries) {
                 /* Done with retrying */
@@ -1549,7 +1553,7 @@ static int JK_METHOD service(jk_endpoint
                     }
                     else {
                         /*
-                         * Reply timeout, bot not yet too many of them.
+                         * Reply timeout, but not yet too many of them.
                          * Keep previous global state.
                          * Do not try to reuse the same node for the same 
request.
                          * Failing over to another node could help.
@@ -1903,6 +1907,8 @@ static int JK_METHOD init(jk_worker_t *p
     p->worker.we = we;
     p->retries = jk_get_worker_retries(props, p->name,
                                        JK_RETRIES);
+    p->lb_retries = jk_get_worker_lb_retries(props, p->name,
+                                       JK_LB_RETRIES);
     p->retry_interval =
             jk_get_worker_retry_interval(props, p->name,
                                         JK_SLEEP_DEF);

Modified: tomcat/jk/trunk/native/common/jk_lb_worker.h
URL: 
http://svn.apache.org/viewvc/tomcat/jk/trunk/native/common/jk_lb_worker.h?rev=1838830&r1=1838829&r2=1838830&view=diff
==============================================================================
--- tomcat/jk/trunk/native/common/jk_lb_worker.h (original)
+++ tomcat/jk/trunk/native/common/jk_lb_worker.h Fri Aug 24 12:12:27 2018
@@ -187,6 +187,7 @@ struct lb_worker
     int          error_escalation_time;
     int          max_reply_timeouts;
     int          retries;
+    int          lb_retries;
     int          retry_interval;
     int          lbmethod;
     int          lblock;

Modified: tomcat/jk/trunk/native/common/jk_service.h
URL: 
http://svn.apache.org/viewvc/tomcat/jk/trunk/native/common/jk_service.h?rev=1838830&r1=1838829&r2=1838830&view=diff
==============================================================================
--- tomcat/jk/trunk/native/common/jk_service.h (original)
+++ tomcat/jk/trunk/native/common/jk_service.h Fri Aug 24 12:12:27 2018
@@ -36,6 +36,7 @@
 #include "jk_msg_buff.h"
 
 #define JK_RETRIES 2
+#define JK_LB_RETRIES 2
 
 #ifdef __cplusplus
 extern "C"

Modified: tomcat/jk/trunk/native/common/jk_util.c
URL: 
http://svn.apache.org/viewvc/tomcat/jk/trunk/native/common/jk_util.c?rev=1838830&r1=1838829&r2=1838830&view=diff
==============================================================================
--- tomcat/jk/trunk/native/common/jk_util.c (original)
+++ tomcat/jk/trunk/native/common/jk_util.c Fri Aug 24 12:12:27 2018
@@ -111,6 +111,7 @@
 #define DEFAULT_WORKER_TYPE         JK_AJP13_WORKER_NAME
 #define SECRET_KEY_OF_WORKER        "secretkey"
 #define RETRIES_OF_WORKER           "retries"
+#define LB_RETRIES_OF_WORKER        "lb_retries"
 #define STATUS_FAIL_OF_WORKER       "fail_on_status"
 
 #define DEFAULT_WORKER              JK_AJP13_WORKER_NAME
@@ -232,6 +233,7 @@ static const char *unique_properties[] =
     STYLE_SHEET_OF_WORKER,
     READ_ONLY_OF_WORKER,
     RETRIES_OF_WORKER,
+    LB_RETRIES_OF_WORKER,
     WORKER_MAINTAIN_PROPERTY_NAME,
     NAMESPACE_OF_WORKER,
     XML_NAMESPACE_OF_WORKER,
@@ -342,6 +344,7 @@ static const char *supported_properties[
     BAD_RATING_OF_WORKER,
     SECRET_KEY_OF_WORKER,
     RETRIES_OF_WORKER,
+    LB_RETRIES_OF_WORKER,
     STATUS_FAIL_OF_WORKER,
     LIST_PROPERTY_NAME,
     MAINTAIN_PROPERTY_NAME,
@@ -1229,6 +1232,24 @@ int jk_get_worker_retries(jk_map_t *m, c
     return rv;
 }
 
+int jk_get_worker_lb_retries(jk_map_t *m, const char *wname, int def)
+{
+    char buf[1024];
+    int rv;
+    if (!m || !wname) {
+        return -1;
+    }
+
+    MAKE_WORKER_PARAM(LB_RETRIES_OF_WORKER);
+
+    rv = jk_map_get_int(m, buf, def);
+    if (rv < 1)
+        rv = 1;
+
+    return rv;
+}
+
+
 int jk_get_worker_recovery_opts(jk_map_t *m, const char *wname, int def)
 {
     char buf[PARAM_BUFFER_SIZE];

Modified: tomcat/jk/trunk/xdocs/miscellaneous/changelog.xml
URL: 
http://svn.apache.org/viewvc/tomcat/jk/trunk/xdocs/miscellaneous/changelog.xml?rev=1838830&r1=1838829&r2=1838830&view=diff
==============================================================================
--- tomcat/jk/trunk/xdocs/miscellaneous/changelog.xml (original)
+++ tomcat/jk/trunk/xdocs/miscellaneous/changelog.xml Fri Aug 24 12:12:27 2018
@@ -73,6 +73,11 @@
         Clarify the behvaiour of lb workers when all ajp13 workers fail with
         particular reference to the role of the retries attribute. (markt)
       </add>
+      <add>
+        <bug>62408</bug>: Add the new load-balancer worker property lb_retries
+        to improve the control over the number of retries. Based on a patch
+        provided by Frederik Nosi. (markt)
+      </add>
    </changelog>
   </subsection>
 </section>

Modified: tomcat/jk/trunk/xdocs/reference/workers.xml
URL: 
http://svn.apache.org/viewvc/tomcat/jk/trunk/xdocs/reference/workers.xml?rev=1838830&r1=1838829&r2=1838830&view=diff
==============================================================================
--- tomcat/jk/trunk/xdocs/reference/workers.xml (original)
+++ tomcat/jk/trunk/xdocs/reference/workers.xml Fri Aug 24 12:12:27 2018
@@ -626,22 +626,59 @@ This feature has been added in <b>jk 1.2
 
 <directive name="retries" default="2" required="false">
 <warn>This directive also exists for normal workers.
-For those it has a <a href="#Advanced Worker Directives">different 
meaning</a>.</warn>
-If the load balancer can not get a valid member worker or in case of failover,
-it will try each member in turn a number of times given by <b>retries</b>.
-Before each retry, it will make a pause define by <b>retry_interval</b> 
directive.
+For those it has a <a href="#Advanced Worker Directives">different
+meaning</a>.</warn>
+When making a request, the load-balancer worker will allocate the request to a
+member worker. If that member worker is either unable to service the request or
+fails to service the request, the request will be passed to another member
+worker until the request is processed, every member worker has attempted to
+process the request or <b>lb_retries</b> member workers have attempted to
+process the request.
+<p>
+If the request remains unprocessed, the load-balancer worker will repeat the
+above process a maximum of <b>retries</b> times (including the original
+attempt). Before each retry, the load-balancer worker will pause for a time
+defined by the <b>retry_interval</b> directive.
+</p>
 <p>
 Note that this means that, if all workers fail, there will be a total of
-number-of-workers * lb.retries * worker.retries requests before a 504 response
-is returned to the client. So for an lb worker with four members and a default
-configuration, if all workers fail there will be a total of 16 requests before
-a 504 response is returned to the client.
+worker.retries * min(lb.lb_retries,member worker count) * lb.retries requests
+before a 504 response is returned to the client. So for an lb worker with four
+members and a default configuration, if all workers fail there will be a total
+of 8 requests before a 504 response is returned to the client.
 </p>
 <p>
 Until version <b>1.2.16</b> the default value was 3.
 </p>
 </directive>
 
+<directive name="lb_retries" default="2" required="false">
+When making a request, the load-balancer worker will allocate the request to a
+member worker. If that member worker is either unable to service the request or
+fails to service the request, the request will be passed to another member
+worker until the request is processed, every member worker has attempted to
+process the request or <b>lb_retries</b> member workers have attempted to
+process the request.
+<p>
+If the request remains unprocessed, the load-balancer worker will repeat the
+above process a maximum of <b>retries</b> times (including the original
+attempt). Before each retry, the load-balancer worker will pause for a time
+defined by the <b>retry_interval</b> directive.
+</p>
+<p>
+Note that this means that, if all workers fail, there will be a total of
+worker.retries * min(lb.lb_retries,member worker count) * lb.retries requests
+before a 504 response is returned to the client. So for an lb worker with four
+members and a default configuration, if all workers fail there will be a total
+of 8 requests before a 504 response is returned to the client.
+</p>
+<p>
+This feature has been added in <b>jk 1.2.44</b>. Prior to this feature being
+added, the load-balancer worker behaved as if <b>lb_retries</b> was equal to 
the
+number of member workers.
+</p>
+</directive>
+
 </directives>
 
 </subsection>



---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to