Author: rjung
Date: Tue Dec 23 18:19:04 2014
New Revision: 1647636
URL: http://svn.apache.org/r1647636
Log:
PR 52334: LB: Calculate worker recovery time based
on last recovery attempt time instead of original
error time after the first recovery attempt.
Modified:
tomcat/jk/trunk/native/common/jk_lb_worker.c
tomcat/jk/trunk/native/common/jk_shm.h
tomcat/jk/trunk/native/common/jk_status.c
tomcat/jk/trunk/xdocs/miscellaneous/changelog.xml
Modified: tomcat/jk/trunk/native/common/jk_lb_worker.c
URL:
http://svn.apache.org/viewvc/tomcat/jk/trunk/native/common/jk_lb_worker.c?rev=1647636&r1=1647635&r2=1647636&view=diff
==============================================================================
--- tomcat/jk/trunk/native/common/jk_lb_worker.c (original)
+++ tomcat/jk/trunk/native/common/jk_lb_worker.c Tue Dec 23 18:19:04 2014
@@ -601,7 +601,7 @@ static int recover_workers(lb_worker_t *
w = &p->lb_workers[i];
aw = (ajp_worker_t *)w->worker->worker_private;
if (w->s->state == JK_LB_STATE_ERROR) {
- elapsed = (int)difftime(now, w->s->error_time);
+ elapsed = (int)difftime(now, w->s->last_error_time);
if (elapsed <= p->recover_wait_time) {
if (JK_IS_DEBUG_LEVEL(l))
jk_log(l, JK_LOG_DEBUG,
@@ -620,8 +620,8 @@ static int recover_workers(lb_worker_t *
non_error++;
}
}
- else if (w->s->error_time > 0 &&
- (int)difftime(now, w->s->error_time) >=
p->error_escalation_time &&
+ else if (w->s->first_error_time > 0 &&
+ (int)difftime(now, w->s->first_error_time) >=
p->error_escalation_time &&
w->s->state != JK_LB_STATE_RECOVER) {
if (JK_IS_DEBUG_LEVEL(l))
jk_log(l, JK_LOG_DEBUG,
@@ -1428,7 +1428,8 @@ static int JK_METHOD service(jk_endpoint
*/
rec->s->state = JK_LB_STATE_OK;
p->states[rec->i] = JK_LB_STATE_OK;
- rec->s->error_time = 0;
+ rec->s->first_error_time = 0;
+ rec->s->last_error_time = 0;
rc = JK_TRUE;
recoverable = JK_UNSET;
}
@@ -1439,7 +1440,8 @@ static int JK_METHOD service(jk_endpoint
*/
rec->s->state = JK_LB_STATE_OK;
p->states[rec->i] = JK_LB_STATE_ERROR;
- rec->s->error_time = 0;
+ rec->s->first_error_time = 0;
+ rec->s->last_error_time = 0;
rc = JK_CLIENT_ERROR;
recoverable = JK_FALSE;
}
@@ -1472,7 +1474,8 @@ static int JK_METHOD service(jk_endpoint
*/
rec->s->state = JK_LB_STATE_OK;
p->states[rec->i] = JK_LB_STATE_ERROR;
- rec->s->error_time = 0;
+ rec->s->first_error_time = 0;
+ rec->s->last_error_time = 0;
rc = JK_FALSE;
}
else if (service_stat == JK_STATUS_FATAL_ERROR) {
@@ -1485,7 +1488,8 @@ static int JK_METHOD service(jk_endpoint
rec->s->errors++;
rec->s->state = JK_LB_STATE_ERROR;
p->states[rec->i] = JK_LB_STATE_ERROR;
- rec->s->error_time = time(NULL);
+ rec->s->first_error_time = time(NULL);
+ rec->s->last_error_time = rec->s->first_error_time;
rc = JK_FALSE;
}
else if (service_stat == JK_REPLY_TIMEOUT) {
@@ -1499,7 +1503,8 @@ static int JK_METHOD service(jk_endpoint
rec->s->errors++;
rec->s->state = JK_LB_STATE_ERROR;
p->states[rec->i] = JK_LB_STATE_ERROR;
- rec->s->error_time = time(NULL);
+ rec->s->first_error_time = time(NULL);
+ rec->s->last_error_time = rec->s->first_error_time;
}
else {
/*
@@ -1523,8 +1528,8 @@ static int JK_METHOD service(jk_endpoint
rec->s->errors++;
if (rec->s->busy == 0 ||
p->worker->error_escalation_time == 0 ||
- (rec->s->error_time > 0 &&
- (int)difftime(now, rec->s->error_time) >=
p->worker->error_escalation_time)) {
+ (rec->s->first_error_time > 0 &&
+ (int)difftime(now, rec->s->first_error_time) >=
p->worker->error_escalation_time)) {
if (JK_IS_DEBUG_LEVEL(l))
jk_log(l, JK_LOG_DEBUG,
"worker %s escalating local error to global
error",
@@ -1532,9 +1537,10 @@ static int JK_METHOD service(jk_endpoint
rec->s->state = JK_LB_STATE_ERROR;
}
p->states[rec->i] = JK_LB_STATE_ERROR;
- if (rec->s->error_time == 0) {
- rec->s->error_time = now;
+ if (rec->s->first_error_time == 0) {
+ rec->s->first_error_time = now;
}
+ rec->s->last_error_time = now;
rc = JK_FALSE;
}
if (p->worker->lblock == JK_LB_LOCK_PESSIMISTIC)
@@ -1743,7 +1749,8 @@ static int JK_METHOD validate(jk_worker_
p->lb_workers[i].s->lb_value = 0;
p->lb_workers[i].s->state = JK_LB_STATE_IDLE;
- p->lb_workers[i].s->error_time = 0;
+ p->lb_workers[i].s->first_error_time = 0;
+ p->lb_workers[i].s->last_error_time = 0;
p->lb_workers[i].s->elected_snapshot = 0;
p->lb_workers[i].s->sessions = 0;
p->lb_workers[i].activation =
Modified: tomcat/jk/trunk/native/common/jk_shm.h
URL:
http://svn.apache.org/viewvc/tomcat/jk/trunk/native/common/jk_shm.h?rev=1647636&r1=1647635&r2=1647636&view=diff
==============================================================================
--- tomcat/jk/trunk/native/common/jk_shm.h (original)
+++ tomcat/jk/trunk/native/common/jk_shm.h Tue Dec 23 18:19:04 2014
@@ -150,8 +150,10 @@ struct jk_shm_lb_sub_worker
volatile jk_uint64_t lb_mult;
/* Current lb value */
volatile jk_uint64_t lb_value;
- /* Statistical data */
- volatile time_t error_time;
+ /* First consecutive error time */
+ volatile time_t first_error_time;
+ /* Last consecutive error time */
+ volatile time_t last_error_time;
/* Number of times the worker was elected - snapshot during maintenance */
volatile jk_uint64_t elected_snapshot;
/* Number of non-sticky requests handled, that were not marked as
stateless */
Modified: tomcat/jk/trunk/native/common/jk_status.c
URL:
http://svn.apache.org/viewvc/tomcat/jk/trunk/native/common/jk_status.c?rev=1647636&r1=1647635&r2=1647636&view=diff
==============================================================================
--- tomcat/jk/trunk/native/common/jk_status.c (original)
+++ tomcat/jk/trunk/native/common/jk_status.c Tue Dec 23 18:19:04 2014
@@ -1802,9 +1802,9 @@ static void display_worker_ajp_details(j
name = lb->name;
sub_name = wr->name;
ajp_name = wr->name;
- error_time = wr->s->error_time;
+ error_time = wr->s->first_error_time;
if (wr->s->state == JK_LB_STATE_ERROR) {
- rs_min = lb->recover_wait_time - (int)difftime(now,
wr->s->error_time);
+ rs_min = lb->recover_wait_time - (int)difftime(now,
wr->s->last_error_time);
if (rs_min < 0) {
rs_min = 0;
}
@@ -4300,7 +4300,8 @@ static int reset_worker(jk_ws_service_t
wr->s->state = JK_LB_STATE_IDLE;
wr->s->elected_snapshot = 0;
wr->s->sessions = 0;
- wr->s->error_time = 0;
+ wr->s->first_error_time = 0;
+ wr->s->last_error_time = 0;
wr->s->errors = 0;
wr->s->lb_value = 0;
aw->s->used = 0;
@@ -4328,7 +4329,8 @@ static int reset_worker(jk_ws_service_t
wr->s->state = JK_LB_STATE_IDLE;
wr->s->elected_snapshot = 0;
wr->s->sessions = 0;
- wr->s->error_time = 0;
+ wr->s->first_error_time = 0;
+ wr->s->last_error_time = 0;
wr->s->errors = 0;
wr->s->lb_value = 0;
aw->s->used = 0;
Modified: tomcat/jk/trunk/xdocs/miscellaneous/changelog.xml
URL:
http://svn.apache.org/viewvc/tomcat/jk/trunk/xdocs/miscellaneous/changelog.xml?rev=1647636&r1=1647635&r2=1647636&view=diff
==============================================================================
--- tomcat/jk/trunk/xdocs/miscellaneous/changelog.xml (original)
+++ tomcat/jk/trunk/xdocs/miscellaneous/changelog.xml Tue Dec 23 18:19:04 2014
@@ -102,6 +102,11 @@
"connection_pool_timeout" and "keepAliveTimeout" or "connectionTimeout"
in the Tomcat AJP connector configuration. (rjung)
</fix>
+ <fix>
+ <bug>52334</bug>: LB: Calculate worker recovery time based on last
+ recovery attempt time instead of original error time after the first
+ recovery attempt. (rjung)
+ </fix>
</changelog>
</subsection>
</section>
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]