I have not been able to reproduce this crash on my systems, and even
instrumenting the code isn't helping me to locate the issue.  Can you
apply the following patch on top of the previous patches, and let me
know if you get any additional output?

- Sean
---
diff --git a/drivers/infiniband/core/multicast.c 
b/drivers/infiniband/core/multicast.c
index 88a9edf..b3bc4c6 100644
--- a/drivers/infiniband/core/multicast.c
+++ b/drivers/infiniband/core/multicast.c
@@ -81,6 +81,12 @@ enum mcast_state {
        MCAST_ERROR
 };
 
+enum mcast_debug {
+       MCAST_DEBUG_IDLE,
+       MCAST_DEBUG_JOINING,
+       MCAST_DEBUG_LEAVING,
+};
+
 struct mcast_member;
 
 struct mcast_group {
@@ -97,6 +103,7 @@ struct mcast_group {
        enum mcast_state        state;
        struct ib_sa_query      *query;
        int                     query_id;
+       enum mcast_debug        debug_state;
 };
 
 struct mcast_member {
@@ -179,6 +186,7 @@ static void release_group(struct mcast_g
        if (atomic_dec_and_test(&group->refcount)) {
                rb_erase(&group->node, &port->table);
                spin_unlock_irqrestore(&port->lock, flags);
+               BUG_ON(group->debug_state != MCAST_DEBUG_IDLE);
                kfree(group);
                deref_port(port);
        } else
@@ -319,6 +327,8 @@ static int send_join(struct mcast_group 
        struct mcast_port *port = group->port;
        int ret;
 
+       BUG_ON(group->debug_state != MCAST_DEBUG_IDLE);
+       group->debug_state = MCAST_DEBUG_JOINING;
        ret = ib_sa_mcmember_rec_query(&sa_client, port->dev->device,
                                       port->port_num, IB_MGMT_METHOD_SET,
                                       &member->multicast.rec,
@@ -341,6 +351,8 @@ static int send_leave(struct mcast_group
        rec = group->rec;
        rec.join_state = leave_state;
 
+       BUG_ON(group->debug_state != MCAST_DEBUG_IDLE);
+       group->debug_state = MCAST_DEBUG_LEAVING;
        ret = ib_sa_mcmember_rec_query(&sa_client, port->dev->device,
                                       port->port_num, IB_SA_METHOD_DELETE, 
&rec,
                                       IB_SA_MCMEMBER_REC_MGID     |
@@ -493,6 +505,8 @@ static void join_handler(int status, str
 {
        struct mcast_group *group = context;
 
+       BUG_ON(group->debug_state != MCAST_DEBUG_JOINING);
+       group->debug_state = MCAST_DEBUG_IDLE;
        if (status)
                process_join_error(group, status);
        else {
@@ -510,6 +524,10 @@ static void join_handler(int status, str
 static void leave_handler(int status, struct ib_sa_mcmember_rec *rec,
                          void *context)
 {
+       struct mcast_group *group = context;
+
+       BUG_ON(group->debug_state != MCAST_DEBUG_LEAVING);
+       group->debug_state = MCAST_DEBUG_IDLE;
        mcast_work_handler(context);
 }
 


_______________________________________________
openib-general mailing list
[email protected]
http://openib.org/mailman/listinfo/openib-general

To unsubscribe, please visit http://openib.org/mailman/listinfo/openib-general

Reply via email to