Author: rjung
Date: Sun Aug 27 13:29:08 2006
New Revision: 437455
URL: http://svn.apache.org/viewvc?rev=437455&view=rev
Log:
Reorg lb service method:
- return from method at only one point, so that passing back
results gets easier
- retry handling was at least strange, should be clearer now
- abort once get_most_suitable_worker only returns NULL
Modified:
tomcat/connectors/trunk/jk/native/common/jk_lb_worker.c
tomcat/connectors/trunk/jk/native/common/jk_lb_worker.h
Modified: tomcat/connectors/trunk/jk/native/common/jk_lb_worker.c
URL:
http://svn.apache.org/viewvc/tomcat/connectors/trunk/jk/native/common/jk_lb_worker.c?rev=437455&r1=437454&r2=437455&view=diff
==============================================================================
--- tomcat/connectors/trunk/jk/native/common/jk_lb_worker.c (original)
+++ tomcat/connectors/trunk/jk/native/common/jk_lb_worker.c Sun Aug 27 13:29:08
2006
@@ -598,9 +598,9 @@
jk_logger_t *l, int *is_error)
{
lb_endpoint_t *p;
- int attempt = 0;
+ int attempt = 1;
int num_of_workers;
- worker_record_t *prec = NULL;
+ int rc = -1;
char *sessionid = NULL;
JK_TRACE_ENTER(l);
@@ -635,26 +635,49 @@
"service sticky_session=%d id='%s'",
p->worker->s->sticky_session, sessionid ? sessionid : "empty");
- while (num_of_workers) {
+ while (attempt <= num_of_workers && rc == -1) {
worker_record_t *rec =
get_most_suitable_worker(p->worker, sessionid, s, l);
- int rc;
/* Do not reuse previous worker, because
* that worker already failed.
*/
- if (rec && rec != prec) {
+ if (rec) {
+ int r;
int is_service_error = JK_HTTP_OK;
- int service_stat = JK_FALSE;
jk_endpoint_t *end = NULL;
-
+ int retry = 0;
+ int retry_wait = JK_LB_MIN_RETRY_WAIT;
s->jvm_route = rec->r;
- rc = rec->w->get_endpoint(rec->w, &end, l);
if (JK_IS_DEBUG_LEVEL(l))
jk_log(l, JK_LOG_DEBUG,
"service worker=%s jvm_route=%s",
rec->s->name, s->jvm_route);
- if (rc && end) {
+ while ((!(r=rec->w->get_endpoint(rec->w, &end, l)) || !end) &&
(retry < p->worker->s->retries)) {
+ retry++;
+ retry_wait *=2;
+ if (retry_wait > JK_LB_MAX_RETRY_WAIT)
+ retry_wait = JK_LB_MAX_RETRY_WAIT;
+ if (JK_IS_DEBUG_LEVEL(l))
+ jk_log(l, JK_LOG_DEBUG,
+ "could not get free endpoint for worker"
+ " (retry %d, sleeping for %d ms)",
+ retry, retry_wait);
+ jk_sleep(retry_wait);
+ }
+ if (!r || !end) {
+ /* If we can not get the endpoint
+ * mark the worker as busy rather then
+ * as in error if the retry number is
+ * greater then the number of retries.
+ */
+ rec->s->is_busy = JK_TRUE;
+ jk_log(l, JK_LOG_INFO,
+ "could not get free endpoint for worker %s (%d
retries)",
+ rec->s->name, retry);
+ }
+ else {
+ int service_stat = -1;
size_t rd = 0;
size_t wr = 0;
/* Reset endpoint read and write sizes for
@@ -732,116 +755,92 @@
rec->s->error_time = 0;
if (p->worker->lblock == JK_LB_LOCK_PESSIMISTIC)
jk_shm_unlock();
- JK_TRACE_EXIT(l);
- return JK_TRUE;
- }
- if (p->worker->lblock == JK_LB_LOCK_PESSIMISTIC)
- jk_shm_unlock();
- }
- else {
- /* If we can not get the endpoint
- * mark the worker as busy rather then
- * as in error if the attempt number is
- * greater then the number of retries.
- */
- attempt++;
- if (attempt > p->worker->s->retries) {
- rec->s->is_busy = JK_TRUE;
- num_of_workers = 0;
+ rc = JK_TRUE;
}
- jk_log(l, JK_LOG_INFO,
- "could not get free endpoint for worker %s (attempt
%d)",
- rec->s->name, attempt);
- /* In case of attempt > num of workers sleep for 100 ms
- * on each consecutive attempt.
- */
- if (attempt > (int)p->worker->num_of_workers)
- jk_sleep(JK_SLEEP_DEF);
- continue;
- }
- if (service_stat == JK_FALSE) {
- /*
- * Service failed !!!
- *
- * Time for fault tolerance (if possible)...
- */
-
- rec->s->errors++;
- rec->s->in_error_state = JK_TRUE;
- rec->s->in_recovering = JK_FALSE;
- rec->s->error_time = time(NULL);
+ else if (service_stat == JK_FALSE) {
+ /*
+ * Service failed !!!
+ *
+ * Time for fault tolerance (if possible)...
+ */
- if (is_service_error != JK_HTTP_SERVER_BUSY) {
+ rec->s->errors++;
+ rec->s->in_error_state = JK_TRUE;
+ rec->s->in_recovering = JK_FALSE;
+ rec->s->error_time = time(NULL);
+ if (p->worker->lblock == JK_LB_LOCK_PESSIMISTIC)
+ jk_shm_unlock();
+
+ if (is_service_error != JK_HTTP_SERVER_BUSY) {
+ /*
+ * Error is not recoverable - break with an error.
+ */
+ jk_log(l, JK_LOG_ERROR,
+ "unrecoverable error %d, request failed."
+ " Tomcat failed in the middle of request,"
+ " we can't recover to another instance.",
+ is_service_error);
+ *is_error = is_service_error;
+ rc = JK_FALSE;
+ }
+ else
+ jk_log(l, JK_LOG_INFO,
+ "service failed, worker %s is in error state",
+ rec->s->name);
+ }
+ else if (service_stat == JK_CLIENT_ERROR) {
/*
- * Error is not recoverable - break with an error.
+ * Client error !!!
+ * Since this is bad request do not fail over.
*/
- jk_log(l, JK_LOG_ERROR,
- "unrecoverable error %d, request failed."
- " Tomcat failed in the middle of request,"
- " we can't recover to another instance.",
- is_service_error);
+ rec->s->errors++;
+ rec->s->in_error_state = JK_FALSE;
+ rec->s->in_recovering = JK_FALSE;
+ rec->s->error_time = 0;
+ if (p->worker->lblock == JK_LB_LOCK_PESSIMISTIC)
+ jk_shm_unlock();
+ jk_log(l, JK_LOG_INFO,
+ "unrecoverable error %d, request failed."
+ " Client failed in the middle of request,"
+ " we can't recover to another instance.",
+ is_service_error);
*is_error = is_service_error;
- JK_TRACE_EXIT(l);
- return JK_FALSE;
+ rc = JK_CLIENT_ERROR;
+ }
+ else {
+ if (p->worker->lblock == JK_LB_LOCK_PESSIMISTIC)
+ jk_shm_unlock();
}
- jk_log(l, JK_LOG_INFO,
- "service failed, worker %s is in error state",
- rec->s->name);
}
- else if (service_stat == JK_CLIENT_ERROR) {
+ if ( rc == -1 ) {
/*
- * Client error !!!
- * Since this is bad request do not fail over.
- */
- rec->s->errors++;
- rec->s->in_error_state = JK_FALSE;
- rec->s->in_recovering = JK_FALSE;
- rec->s->error_time = 0;
- *is_error = is_service_error;
-
- jk_log(l, JK_LOG_INFO,
- "unrecoverable error %d, request failed."
- " Client failed in the middle of request,"
- " we can't recover to another instance.",
- is_service_error);
- JK_TRACE_EXIT(l);
- return JK_CLIENT_ERROR;
- }
- else {
- /* If we can not get the endpoint from the worker
- * that does not mean that the worker is in error
- * state. It means that the worker is busy.
- * We will try another worker.
- * To prevent infinite loop decrement worker count;
+ * Error is recoverable by submitting the request to
+ * another worker... Lets try to do that.
*/
+ if (JK_IS_DEBUG_LEVEL(l))
+ jk_log(l, JK_LOG_DEBUG,
+ "recoverable error... will try to recover on other
worker");
}
- /*
- * Error is recoverable by submitting the request to
- * another worker... Lets try to do that.
- */
- if (JK_IS_DEBUG_LEVEL(l))
- jk_log(l, JK_LOG_DEBUG,
- "recoverable error... will try to recover on other
host");
}
-#if 0
else {
/* NULL record, no more workers left ... */
jk_log(l, JK_LOG_ERROR,
"All tomcat instances failed, no more workers left");
- JK_TRACE_EXIT(l);
*is_error = JK_HTTP_SERVER_BUSY;
- return JK_FALSE;
+ rc = JK_FALSE;
}
-#endif
- --num_of_workers;
- prec = rec;
+ attempt++;
}
- jk_log(l, JK_LOG_INFO,
- "All tomcat instances are busy or in error state");
- /* Set error to Timeout */
- *is_error = JK_HTTP_SERVER_BUSY;
+ if ( rc == -1 ) {
+ jk_log(l, JK_LOG_INFO,
+ "All tomcat instances are busy or in error state");
+ /* Set error to Timeout */
+ *is_error = JK_HTTP_SERVER_BUSY;
+ rc = JK_FALSE;
+ }
+
JK_TRACE_EXIT(l);
- return JK_FALSE;
+ return rc;
}
static int JK_METHOD done(jk_endpoint_t **e, jk_logger_t *l)
Modified: tomcat/connectors/trunk/jk/native/common/jk_lb_worker.h
URL:
http://svn.apache.org/viewvc/tomcat/connectors/trunk/jk/native/common/jk_lb_worker.h?rev=437455&r1=437454&r2=437455&view=diff
==============================================================================
--- tomcat/connectors/trunk/jk/native/common/jk_lb_worker.h (original)
+++ tomcat/connectors/trunk/jk/native/common/jk_lb_worker.h Sun Aug 27 13:29:08
2006
@@ -53,6 +53,10 @@
#define JK_LB_LOCK_TEXT_PESSIMISTIC ("Pessimistic")
#define JK_LB_LOCK_TEXT_DEF (JK_LB_LOCK_TEXT_OPTIMISTIC)
+/* Minimal time in ms to wait between get_endpoint retries for balanced
workers */
+#define JK_LB_MIN_RETRY_WAIT (25)
+/* Maximal time in ms to wait between get_endpoint retries for balanced
workers */
+#define JK_LB_MAX_RETRY_WAIT (100)
/* Time to wait before retry. */
#define WAIT_BEFORE_RECOVER (60)
/* We accept doing global maintenance if we are */
---------------------------------------------------------------------
To unsubscribe, e-mail: [EMAIL PROTECTED]
For additional commands, e-mail: [EMAIL PROTECTED]