Author: markt
Date: Mon Apr 23 19:48:11 2018
New Revision: 1829934

URL: http://svn.apache.org/viewvc?rev=1829934&view=rev
Log:
Fix https://bz.apache.org/bugzilla/show_bug.cgi?id=62297
Enable the CrawlerSessionManagerValve to correctly handle bots that crawl 
multiple hosts and/or web applications when the Valve is configured on a Host 
or an Engine.

Modified:
    tomcat/trunk/java/org/apache/catalina/valves/CrawlerSessionManagerValve.java
    
tomcat/trunk/test/org/apache/catalina/valves/TestCrawlerSessionManagerValve.java
    tomcat/trunk/webapps/docs/changelog.xml
    tomcat/trunk/webapps/docs/config/valve.xml

Modified: 
tomcat/trunk/java/org/apache/catalina/valves/CrawlerSessionManagerValve.java
URL: 
http://svn.apache.org/viewvc/tomcat/trunk/java/org/apache/catalina/valves/CrawlerSessionManagerValve.java?rev=1829934&r1=1829933&r2=1829934&view=diff
==============================================================================
--- 
tomcat/trunk/java/org/apache/catalina/valves/CrawlerSessionManagerValve.java 
(original)
+++ 
tomcat/trunk/java/org/apache/catalina/valves/CrawlerSessionManagerValve.java 
Mon Apr 23 19:48:11 2018
@@ -27,6 +27,8 @@ import javax.servlet.http.HttpSession;
 import javax.servlet.http.HttpSessionBindingEvent;
 import javax.servlet.http.HttpSessionBindingListener;
 
+import org.apache.catalina.Context;
+import org.apache.catalina.Host;
 import org.apache.catalina.LifecycleException;
 import org.apache.catalina.connector.Request;
 import org.apache.catalina.connector.Response;
@@ -44,8 +46,8 @@ public class CrawlerSessionManagerValve
 
     private static final Log log = 
LogFactory.getLog(CrawlerSessionManagerValve.class);
 
-    private final Map<String, String> clientIpSessionId = new 
ConcurrentHashMap<>();
-    private final Map<String, String> sessionIdClientIp = new 
ConcurrentHashMap<>();
+    private final Map<String, String> clientIdSessionId = new 
ConcurrentHashMap<>();
+    private final Map<String, String> sessionIdClientId = new 
ConcurrentHashMap<>();
 
     private String crawlerUserAgents = ".*[bB]ot.*|.*Yahoo! 
Slurp.*|.*Feedfetcher-Google.*";
     private Pattern uaPattern = null;
@@ -55,6 +57,10 @@ public class CrawlerSessionManagerValve
 
     private int sessionInactiveInterval = 60;
 
+    private boolean isHostAware = true;
+
+    private boolean isContextAware = true;
+
 
     /**
      * Specifies a default constructor so async support can be configured.
@@ -134,7 +140,27 @@ public class CrawlerSessionManagerValve
 
 
     public Map<String, String> getClientIpSessionId() {
-        return clientIpSessionId;
+        return clientIdSessionId;
+    }
+
+
+    public boolean isHostAware() {
+        return isHostAware;
+    }
+
+
+    public void setHostAware(boolean isHostAware) {
+        this.isHostAware = isHostAware;
+    }
+
+
+    public boolean isContextAware() {
+        return isContextAware;
+    }
+
+
+    public void setContextAware(boolean isContextAware) {
+        this.isContextAware = isContextAware;
     }
 
 
@@ -152,9 +178,10 @@ public class CrawlerSessionManagerValve
         boolean isBot = false;
         String sessionId = null;
         String clientIp = request.getRemoteAddr();
+        String clientIdentifier = getClientIdentifier(request.getHost(), 
request.getContext(), clientIp);
 
         if (log.isDebugEnabled()) {
-            log.debug(request.hashCode() + ": ClientIp=" + clientIp + ", 
RequestedSessionId="
+            log.debug(request.hashCode() + ": ClientIdentifier=" + 
clientIdentifier + ", RequestedSessionId="
                     + request.getRequestedSessionId());
         }
 
@@ -194,7 +221,7 @@ public class CrawlerSessionManagerValve
 
             // If this is a bot, is the session ID known?
             if (isBot) {
-                sessionId = clientIpSessionId.get(clientIp);
+                sessionId = clientIdSessionId.get(clientIdentifier);
                 if (sessionId != null) {
                     request.setRequestedSessionId(sessionId);
                     if (log.isDebugEnabled()) {
@@ -211,8 +238,8 @@ public class CrawlerSessionManagerValve
                 // Has bot just created a session, if so make a note of it
                 HttpSession s = request.getSession(false);
                 if (s != null) {
-                    clientIpSessionId.put(clientIp, s.getId());
-                    sessionIdClientIp.put(s.getId(), clientIp);
+                    clientIdSessionId.put(clientIdentifier, s.getId());
+                    sessionIdClientId.put(s.getId(), clientIdentifier);
                     // #valueUnbound() will be called on session expiration
                     s.setAttribute(this.getClass().getName(), this);
                     s.setMaxInactiveInterval(sessionInactiveInterval);
@@ -231,11 +258,23 @@ public class CrawlerSessionManagerValve
     }
 
 
+    private String getClientIdentifier(Host host, Context context, String 
clientIp) {
+        StringBuilder result = new StringBuilder(clientIp);
+        if (isHostAware) {
+            result.append('-').append(host.getName());
+        }
+        if (isContextAware) {
+            result.append(context.getName());
+        }
+        return result.toString();
+    }
+
+
     @Override
     public void valueUnbound(HttpSessionBindingEvent event) {
-        String clientIp = sessionIdClientIp.remove(event.getSession().getId());
-        if (clientIp != null) {
-            clientIpSessionId.remove(clientIp);
+        String clientIdentifier = 
sessionIdClientId.remove(event.getSession().getId());
+        if (clientIdentifier != null) {
+            clientIdSessionId.remove(clientIdentifier);
         }
     }
 }

Modified: 
tomcat/trunk/test/org/apache/catalina/valves/TestCrawlerSessionManagerValve.java
URL: 
http://svn.apache.org/viewvc/tomcat/trunk/test/org/apache/catalina/valves/TestCrawlerSessionManagerValve.java?rev=1829934&r1=1829933&r2=1829934&view=diff
==============================================================================
--- 
tomcat/trunk/test/org/apache/catalina/valves/TestCrawlerSessionManagerValve.java
 (original)
+++ 
tomcat/trunk/test/org/apache/catalina/valves/TestCrawlerSessionManagerValve.java
 Mon Apr 23 19:48:11 2018
@@ -16,12 +16,17 @@
  */
 package org.apache.catalina.valves;
 
+import java.io.IOException;
+import java.util.Arrays;
 import java.util.Collections;
 
+import javax.servlet.ServletException;
 import javax.servlet.http.HttpSession;
 
 import org.junit.Test;
 
+import org.apache.catalina.Context;
+import org.apache.catalina.Host;
 import org.apache.catalina.Valve;
 import org.apache.catalina.connector.Request;
 import org.apache.catalina.connector.Response;
@@ -34,6 +39,7 @@ public class TestCrawlerSessionManagerVa
     public void testCrawlerIpsPositive() throws Exception {
         CrawlerSessionManagerValve valve = new CrawlerSessionManagerValve();
         valve.setCrawlerIps("216\\.58\\.206\\.174");
+        valve.setCrawlerUserAgents(valve.getCrawlerUserAgents());
         valve.setNext(EasyMock.createMock(Valve.class));
         HttpSession session = createSessionExpectations(valve, true);
         Request request = createRequestExpectations("216.58.206.174", session, 
true);
@@ -49,6 +55,7 @@ public class TestCrawlerSessionManagerVa
     public void testCrawlerIpsNegative() throws Exception {
         CrawlerSessionManagerValve valve = new CrawlerSessionManagerValve();
         valve.setCrawlerIps("216\\.58\\.206\\.174");
+        valve.setCrawlerUserAgents(valve.getCrawlerUserAgents());
         valve.setNext(EasyMock.createMock(Valve.class));
         HttpSession session = createSessionExpectations(valve, false);
         Request request = createRequestExpectations("127.0.0.1", session, 
false);
@@ -60,6 +67,32 @@ public class TestCrawlerSessionManagerVa
         EasyMock.verify(request, session);
     }
 
+    @Test
+    public void testCrawlerMultipleHostsHostAware() throws Exception {
+        CrawlerSessionManagerValve valve = new CrawlerSessionManagerValve();
+        valve.setCrawlerUserAgents(valve.getCrawlerUserAgents());
+        valve.setHostAware(true);
+        valve.setContextAware(true);
+        valve.setNext(EasyMock.createMock(Valve.class));
+
+        verifyCrawlingLocalhost(valve, "localhost");
+        verifyCrawlingLocalhost(valve, "example.invalid");
+    }
+
+
+    private void verifyCrawlingLocalhost(CrawlerSessionManagerValve valve, 
String hostname)
+            throws IOException, ServletException {
+        HttpSession session = createSessionExpectations(valve, true);
+        Request request = createRequestExpectations("127.0.0.1", session, 
true, hostname, "tomcatBot 1.0");
+
+        EasyMock.replay(request, session);
+
+        valve.invoke(request, EasyMock.createMock(Response.class));
+
+        EasyMock.verify(request, session);
+    }
+
+
     private HttpSession createSessionExpectations(CrawlerSessionManagerValve 
valve, boolean isBot) {
         HttpSession session = EasyMock.createMock(HttpSession.class);
         if (isBot) {
@@ -72,15 +105,36 @@ public class TestCrawlerSessionManagerVa
         return session;
     }
 
+
     private Request createRequestExpectations(String ip, HttpSession session, 
boolean isBot) {
+        return createRequestExpectations(ip, session, isBot, "localhost", 
"something 1.0");
+    }
+
+    private Request createRequestExpectations(String ip, HttpSession session, 
boolean isBot, String hostname, String userAgent) {
         Request request = EasyMock.createMock(Request.class);
         EasyMock.expect(request.getRemoteAddr()).andReturn(ip);
+        
EasyMock.expect(request.getHost()).andReturn(simpleHostWithName(hostname));
+        
EasyMock.expect(request.getContext()).andReturn(simpleContextWithName());
         IExpectationSetters<HttpSession> setter = 
EasyMock.expect(request.getSession(false))
                 .andReturn(null);
         if (isBot) {
             setter.andReturn(session);
         }
-        
EasyMock.expect(request.getHeaders("user-agent")).andReturn(Collections.emptyEnumeration());
+        EasyMock.expect(request.getHeaders("user-agent")).andAnswer(() -> 
Collections.enumeration(Arrays.asList(userAgent)));
         return request;
     }
+
+    private Host simpleHostWithName(String hostname) {
+        Host host = EasyMock.createMock(Host.class);
+        EasyMock.expect(host.getName()).andReturn(hostname);
+        EasyMock.replay(host);
+        return host;
+    }
+
+    private Context simpleContextWithName() {
+        Context context = EasyMock.createMock(Context.class);
+        EasyMock.expect(context.getName()).andReturn("/examples");
+        EasyMock.replay(context);
+        return context;
+    }
 }

Modified: tomcat/trunk/webapps/docs/changelog.xml
URL: 
http://svn.apache.org/viewvc/tomcat/trunk/webapps/docs/changelog.xml?rev=1829934&r1=1829933&r2=1829934&view=diff
==============================================================================
--- tomcat/trunk/webapps/docs/changelog.xml (original)
+++ tomcat/trunk/webapps/docs/changelog.xml Mon Apr 23 19:48:11 2018
@@ -73,6 +73,11 @@
         access Java 11 support to the annotation scanning code. (markt)
       </add>
       <fix>
+        <bug>62297</bug>: Enable the <code>CrawlerSessionManagerValve</code> to
+        correctly handle bots that crawl multiple hosts and/or web applications
+        when the Valve is configured on a Host or an Engine. (fschumacher)
+      </fix>
+      <fix>
         <bug>62309</bug>: Fix a <code>SecurityException</code> when using 
JASPIC
         under a <code>SecurityManager</code> when authentication is not
         mandatory. (markt)

Modified: tomcat/trunk/webapps/docs/config/valve.xml
URL: 
http://svn.apache.org/viewvc/tomcat/trunk/webapps/docs/config/valve.xml?rev=1829934&r1=1829933&r2=1829934&view=diff
==============================================================================
--- tomcat/trunk/webapps/docs/config/valve.xml (original)
+++ tomcat/trunk/webapps/docs/config/valve.xml Mon Apr 23 19:48:11 2018
@@ -1820,6 +1820,13 @@
         </p>
       </attribute>
 
+      <attribute name="contextAware" required="false">
+        <p>Flag to use the context name together with the client IP to
+        identify the session to re-use. Can be combined with 
<code>hostAware</code>.
+        Default value: <code>true</code>
+        </p>
+      </attribute>
+
       <attribute name="crawlerIps" required="false">
         <p>Regular expression (using <code>java.util.regex</code>) that client
         IP is matched against to determine if a request is from a web crawler.
@@ -1833,6 +1840,13 @@
         <code>.*[bB]ot.*|.*Yahoo! Slurp.*|.*Feedfetcher-Google.*</code> is 
used.</p>
       </attribute>
 
+      <attribute name="hostAware" required="false">
+        <p>Flag to use the configured host together with the client IP to
+        identify the session to re-use. Can be combined with 
<code>contextAware</code>.
+        Default value: <code>true</code>
+        </p>
+      </attribute>
+
       <attribute name="sessionInactiveInterval" required="false">
         <p>The minimum time in seconds that the Crawler Session Manager Valve
         should keep the mapping of client IP to session ID in memory without 
any



---------------------------------------------------------------------
To unsubscribe, e-mail: dev-unsubscr...@tomcat.apache.org
For additional commands, e-mail: dev-h...@tomcat.apache.org

Reply via email to