Author: fhanik Date: Tue May 23 07:48:10 2006 New Revision: 408912 URL: http://svn.apache.org/viewvc?rev=408912&view=rev Log: Updated notes about the leader election algorithm, we are no longer using Hans Svensson's algorithm as that algorithm assumes membership visibility. Instead we are using a merging algorithm that makes other members visible.
Added: tomcat/container/tc5.5.x/modules/groupcom/doc/leader-election-initiate-election.dia (with props) tomcat/container/tc5.5.x/modules/groupcom/doc/leader-election-initiate-election.jpg (with props) tomcat/container/tc5.5.x/modules/groupcom/doc/leader-election-message-arrives.dia (with props) tomcat/container/tc5.5.x/modules/groupcom/doc/leader-election-message-arrives.jpg (with props) Modified: tomcat/container/tc5.5.x/modules/groupcom/VERSION tomcat/container/tc5.5.x/modules/groupcom/build/build.xml tomcat/container/tc5.5.x/modules/groupcom/src/share/org/apache/catalina/tribes/group/interceptors/NonBlockingCoordinator.java tomcat/container/tc5.5.x/modules/groupcom/src/share/org/apache/catalina/tribes/membership/Membership.java Modified: tomcat/container/tc5.5.x/modules/groupcom/VERSION URL: http://svn.apache.org/viewvc/tomcat/container/tc5.5.x/modules/groupcom/VERSION?rev=408912&r1=408911&r2=408912&view=diff ============================================================================== --- tomcat/container/tc5.5.x/modules/groupcom/VERSION (original) +++ tomcat/container/tc5.5.x/modules/groupcom/VERSION Tue May 23 07:48:10 2006 @@ -1,3 +1,5 @@ +0.0.2.4 + - leader election, work in progress 0.9.2.3 - Keep alive pings for AbstractReplicatedMap - Improved TcpFailureDetector Modified: tomcat/container/tc5.5.x/modules/groupcom/build/build.xml URL: http://svn.apache.org/viewvc/tomcat/container/tc5.5.x/modules/groupcom/build/build.xml?rev=408912&r1=408911&r2=408912&view=diff ============================================================================== --- tomcat/container/tc5.5.x/modules/groupcom/build/build.xml (original) +++ tomcat/container/tc5.5.x/modules/groupcom/build/build.xml Tue May 23 07:48:10 2006 @@ -110,6 +110,9 @@ includes="*.xml"> <param name="relative-path" expression="."/> </style> + <copy todir="${docs.path}"> + <fileset dir="${basedir}/doc"/> + </copy> </target> <target name="javadoc"> Added: tomcat/container/tc5.5.x/modules/groupcom/doc/leader-election-initiate-election.dia URL: http://svn.apache.org/viewvc/tomcat/container/tc5.5.x/modules/groupcom/doc/leader-election-initiate-election.dia?rev=408912&view=auto ============================================================================== Binary file - no diff available. Propchange: tomcat/container/tc5.5.x/modules/groupcom/doc/leader-election-initiate-election.dia ------------------------------------------------------------------------------ svn:mime-type = application/octet-stream Added: tomcat/container/tc5.5.x/modules/groupcom/doc/leader-election-initiate-election.jpg URL: http://svn.apache.org/viewvc/tomcat/container/tc5.5.x/modules/groupcom/doc/leader-election-initiate-election.jpg?rev=408912&view=auto ============================================================================== Binary file - no diff available. Propchange: tomcat/container/tc5.5.x/modules/groupcom/doc/leader-election-initiate-election.jpg ------------------------------------------------------------------------------ svn:mime-type = application/octet-stream Added: tomcat/container/tc5.5.x/modules/groupcom/doc/leader-election-message-arrives.dia URL: http://svn.apache.org/viewvc/tomcat/container/tc5.5.x/modules/groupcom/doc/leader-election-message-arrives.dia?rev=408912&view=auto ============================================================================== Binary file - no diff available. Propchange: tomcat/container/tc5.5.x/modules/groupcom/doc/leader-election-message-arrives.dia ------------------------------------------------------------------------------ svn:mime-type = application/octet-stream Added: tomcat/container/tc5.5.x/modules/groupcom/doc/leader-election-message-arrives.jpg URL: http://svn.apache.org/viewvc/tomcat/container/tc5.5.x/modules/groupcom/doc/leader-election-message-arrives.jpg?rev=408912&view=auto ============================================================================== Binary file - no diff available. Propchange: tomcat/container/tc5.5.x/modules/groupcom/doc/leader-election-message-arrives.jpg ------------------------------------------------------------------------------ svn:mime-type = application/octet-stream Modified: tomcat/container/tc5.5.x/modules/groupcom/src/share/org/apache/catalina/tribes/group/interceptors/NonBlockingCoordinator.java URL: http://svn.apache.org/viewvc/tomcat/container/tc5.5.x/modules/groupcom/src/share/org/apache/catalina/tribes/group/interceptors/NonBlockingCoordinator.java?rev=408912&r1=408911&r2=408912&view=diff ============================================================================== --- tomcat/container/tc5.5.x/modules/groupcom/src/share/org/apache/catalina/tribes/group/interceptors/NonBlockingCoordinator.java (original) +++ tomcat/container/tc5.5.x/modules/groupcom/src/share/org/apache/catalina/tribes/group/interceptors/NonBlockingCoordinator.java Tue May 23 07:48:10 2006 @@ -28,6 +28,8 @@ import org.apache.catalina.tribes.util.Arrays; import org.apache.catalina.tribes.io.ChannelData; import org.apache.catalina.tribes.Channel; +import java.util.HashMap; +import java.util.LinkedHashMap; /** * <p>Title: NonBlockingCoordinator</p> @@ -86,20 +88,17 @@ * <p> * Lets assume that C1 arrives, C1 has lower priority than C, but higher priority than D.<br> * Lets also assume that C1 sees the following view {B,D,E}<br> - * C1 sends Z{B-ldr, C-src, mbrs-B,C1,D,E} to D<br> - * D receives Z{B-ldr, C-src, mbrs-B,C1,D,E} sends Z{A-ldr, D-src, mbrs-A,B,C,C1,D,E} to E<br> - * Once the message reaches A, A will issue a new view and send a new message<br> - * A view is not accepted by a member unless ldr==src in the token.<br> - * </p> - * <p> - * Lets assume that A0 arrives A0 being higher than A.<br> - * Lets also assume that A0 sees view {B,D,E}<br> - * A0 will issue a similar view statement and the same scenario as above will happen.<br> - * If A0 sees {A,B,C,D} it simply sends the message to A rather than B. + * C1 waits for a token to arrive. When the token arrives, the same scenario as above will happen.<br> + * In the scenario where C1 sees {D,E} and A,B,C can not see C1, no token will ever arrive.<br> + * In this case, C1 sends a Z{C1-ldr, C1-src, mbrs-C1,D,E} to D<br> + * D receives Z{C1-ldr, C1-src, mbrs-C1,D,E} and sends Z{A-ldr, C1-src, mbrs-A,B,C,C1,D,E} to E<br> + * E receives Z{A-ldr, C1-src, mbrs-A,B,C,C1,D,E} and sends it to A<br> + * A sends Z{A-ldr, A-src, mbrs-A,B,C,C1,D,E} to B and the chain continues until A receives the token again. + * At that time A optionally sends out Z{A-ldr, A-src, mbrs-A,B,C,C1,D,E, confirmed} to A,B,C,C1,D,E * </p> * <p>If we wanted to ensure that the view gets implemented at all nodes at the same time, * ie, implementing a blocking coordinator, we would simply require that each view, before it gets installed - * has to receive a VIEW_CONF message. + * has to receive a VIEW_CONF message, this is the 'confirmed' message that is optional above. * * <p>Ideally, the interceptor below this one would be the TcpFailureDetector to ensure correct memberships</p> * @@ -107,6 +106,9 @@ * But I suck at writing state machines, my head gets all confused. One day I will document this algorithm though.<br> * Maybe I'll do a state diagram :) * </p> + * <h2>State Diagrams</h2> + * <a href="http://people.apache.org/~fhanik/tribes/docs/leader-election-initiate-election.jpg">Initiate an election</a><br><br> + * <a href="http://people.apache.org/~fhanik/tribes/docs/leader-election-message-arrives.jpg">Receive an election message</a><br><br> * * @author Filip Hanik * @version 1.0 @@ -129,19 +131,32 @@ */ protected static final byte[] COORD_CONF = new byte[] {67, 88, 107, -86, 69, 23, 76, -70, -91, -23, -87, -25, -125, 86, 75, 20}; - protected Member coordinator = null; - + /** + * Our current view + */ protected Membership view = null; - protected Membership suggestedview = null; - + /** + * Out current viewId + */ protected UniqueId viewId; + + /** + * Our nonblocking membership + */ + protected Membership membership = null; + + /** + * indicates that we are running an election + * and this is the one we are running + */ protected UniqueId suggestedviewId; + protected LinkedHashMap rotatingViews = new LinkedHashMap(); + protected boolean started = false; protected final int startsvc = 0xFFFF; protected Object electionMutex = new Object(); - protected boolean runningElection = false; public NonBlockingCoordinator() { super(); @@ -159,7 +174,6 @@ } //coordination can happen before this line of code executes Member local = getLocalMember(false); - if (local != null && coordinator == null) coordinator = local; } public void stop(int svc) throws ChannelException { @@ -171,7 +185,10 @@ }finally { release(); } - this.coordinator = null; + } + + public Membership getView(UniqueId id) { + return (Membership)rotatingViews.get(id); } public void elect() { @@ -185,8 +202,11 @@ //I'm not the higest, exit if ( !local.equals(mbrs[0]) ) return; //I'm already running an election - if ( suggestedview.hasMembers() ) return; + if ( suggestedviewId != null ) return; //create a suggestedview + suggestedviewId = new UniqueId(UUIDGenerator.randomUUID(true)); + Membership suggestedview = new Membership((MemberImpl)local,AbsoluteOrder.comp); + rotatingViews.put(suggestedviewId,suggestedview); suggestedview.addMember((MemberImpl)local); Arrays.fill(suggestedview,mbrs); suggestedviewId = new UniqueId(UUIDGenerator.randomUUID(true)); @@ -218,6 +238,7 @@ //we are running our own election if (suggestedviewId.equals(msg.getId())) { //we received our own token + Membership suggestedview = getView(msg.getId()); Member[] suggested = suggestedview.getMembers(); Member[] received = msg.getMembers(); if (Arrays.sameMembers(suggested,received) ) { @@ -225,6 +246,7 @@ view = suggestedview; viewId = suggestedviewId; suggestedviewId = null; + rotatingViews.remove(viewId); suggestedview.reset(); viewChange(viewId,view.getMembers()); release(); @@ -253,9 +275,12 @@ } - + /** + * Returns coordinator if one is available + * @return Member + */ public Member getCoordinator() { - return coordinator; + return (view != null && view.hasMembers()) ? view.getMembers()[0] : null; } public void sendMessage(Member[] destination, ChannelMessage msg, InterceptorPayload payload) throws ChannelException { @@ -277,11 +302,13 @@ public void memberAdded(Member member) { try { + if ( membership == null ) setupMembership(); + if ( membership.memberAlive((MemberImpl)member) ) super.memberAdded(member); halt(); }finally { release(); } - super.memberAdded(member); + } public void memberDisappeared(Member member) { @@ -302,8 +329,8 @@ * has members */ public boolean hasMembers() { - if ( view == null ) setupMembership(); - return view.hasMembers(); + if ( membership == null ) setupMembership(); + return membership.hasMembers(); } /** @@ -311,9 +338,8 @@ * @return all members or empty array */ public Member[] getMembers() { - if ( view == null ) setupMembership(); - Member[] members = view.getMembers(); - return members; + if ( membership == null ) setupMembership(); + throw new UnsupportedOperationException("Not yet implemented"); } /** @@ -322,8 +348,8 @@ * @return Member */ public Member getMember(Member mbr) { - if ( view == null ) setupMembership(); - return view.getMember(mbr); + if ( membership == null ) setupMembership(); + throw new UnsupportedOperationException("Not yet implemented"); } /** @@ -338,9 +364,9 @@ } protected synchronized void setupMembership() { - if ( view == null || suggestedview == null ) { - view = new Membership((MemberImpl)super.getLocalMember(true)); - suggestedview = new Membership((MemberImpl)super.getLocalMember(true)); + if ( view == null || membership == null ) { + view = new Membership((MemberImpl)super.getLocalMember(true),AbsoluteOrder.comp); + membership = new Membership((MemberImpl)super.getLocalMember(true),AbsoluteOrder.comp); } } @@ -373,6 +399,7 @@ protected MemberImpl[] view; protected UniqueId id; protected byte[] type; + protected long timestamp = System.currentTimeMillis(); public CoordinationMessage(XByteBuffer buf) { this.buf = buf; Modified: tomcat/container/tc5.5.x/modules/groupcom/src/share/org/apache/catalina/tribes/membership/Membership.java URL: http://svn.apache.org/viewvc/tomcat/container/tc5.5.x/modules/groupcom/src/share/org/apache/catalina/tribes/membership/Membership.java?rev=408912&r1=408911&r2=408912&view=diff ============================================================================== --- tomcat/container/tc5.5.x/modules/groupcom/src/share/org/apache/catalina/tribes/membership/Membership.java (original) +++ tomcat/container/tc5.5.x/modules/groupcom/src/share/org/apache/catalina/tribes/membership/Membership.java Tue May 23 07:48:10 2006 @@ -24,6 +24,7 @@ import java.util.Map; import org.apache.catalina.tribes.Member; +import java.util.Comparator; /** * A <b>membership</b> implementation using simple multicast. @@ -58,7 +59,7 @@ /** * sort members by alive time */ - protected MemberComparator memberComparator = new MemberComparator(); + protected Comparator memberComparator = new MemberComparator(); /** * Constructs a new membership @@ -66,6 +67,11 @@ */ public Membership(MemberImpl local) { this.local = local; + } + + public Membership(MemberImpl local, Comparator comp) { + this(local); + this.memberComparator = comp; } /** --------------------------------------------------------------------- To unsubscribe, e-mail: [EMAIL PROTECTED] For additional commands, e-mail: [EMAIL PROTECTED]