Author: markt Date: Sun Feb 6 21:00:52 2011 New Revision: 1067759 URL: http://svn.apache.org/viewvc?rev=1067759&view=rev Log: Review from kkolinko 1. Matcher not thread safe 2. >1 UA header -> not a bot
Modified: tomcat/trunk/java/org/apache/catalina/valves/CrawlerSessionManagerValve.java Modified: tomcat/trunk/java/org/apache/catalina/valves/CrawlerSessionManagerValve.java URL: http://svn.apache.org/viewvc/tomcat/trunk/java/org/apache/catalina/valves/CrawlerSessionManagerValve.java?rev=1067759&r1=1067758&r2=1067759&view=diff ============================================================================== --- tomcat/trunk/java/org/apache/catalina/valves/CrawlerSessionManagerValve.java (original) +++ tomcat/trunk/java/org/apache/catalina/valves/CrawlerSessionManagerValve.java Sun Feb 6 21:00:52 2011 @@ -22,7 +22,6 @@ import java.util.Iterator; import java.util.Map; import java.util.Map.Entry; import java.util.concurrent.ConcurrentHashMap; -import java.util.regex.Matcher; import java.util.regex.Pattern; import javax.servlet.ServletException; @@ -51,7 +50,7 @@ public class CrawlerSessionManagerValve private String crawlerUserAgents = ".*GoogleBot.*|.*bingbot.*|.*Yahoo! Slurp.*"; - private Matcher uaMatcher = null; + private Pattern uaPattern = null; private int sessionInactiveInterval = 60; @@ -65,9 +64,9 @@ public class CrawlerSessionManagerValve public void setCrawlerUserAgents(String crawlerUserAgents) { this.crawlerUserAgents = crawlerUserAgents; if (crawlerUserAgents == null || crawlerUserAgents.length() == 0) { - uaMatcher = null; + uaPattern = null; } else { - uaMatcher = Pattern.compile(crawlerUserAgents).matcher(""); + uaPattern = Pattern.compile(crawlerUserAgents); } } @@ -103,7 +102,7 @@ public class CrawlerSessionManagerValve protected void initInternal() throws LifecycleException { super.initInternal(); - uaMatcher = Pattern.compile(crawlerUserAgents).matcher(""); + uaPattern = Pattern.compile(crawlerUserAgents); } @@ -124,19 +123,18 @@ public class CrawlerSessionManagerValve // If the incoming request has a session ID, no action is required if (request.getRequestedSessionId() == null) { - // Is this a crawler + // Is this a crawler - cheack the UA headers Enumeration<String> uaHeaders = request.getHeaders("user-agent"); - while (!isBot && uaMatcher != null && - uaHeaders.hasMoreElements()) { - - String uaHeader = uaHeaders.nextElement(); - uaMatcher.reset(uaHeader); - + String uaHeader = uaHeaders.nextElement(); + + // If more than one UA header - assume not a bot + if (!uaHeaders.hasMoreElements()) { + if (log.isDebugEnabled()) { log.debug(request.hashCode() + ": UserAgent=" + uaHeader); } - if (uaMatcher.matches()) { + if (uaPattern.matcher(uaHeader).matches()) { isBot = true; if (log.isDebugEnabled()) { --------------------------------------------------------------------- To unsubscribe, e-mail: dev-unsubscr...@tomcat.apache.org For additional commands, e-mail: dev-h...@tomcat.apache.org