On Wed, 2019-06-05 at 13:14 -0700, Bart Van Assche wrote:
> The previous patch guarantees that srp_queuecommand() does not get
> invoked while reconnecting occurs. Hence remove the code from
> srp_queuecommand() that prevents command queueing while reconnecting.
> This patch avoids that the following can appear in the kernel log:
>
> BUG: sleeping function called from invalid context at
> kernel/locking/mutex.c:747
> in_atomic(): 1, irqs_disabled(): 0, pid: 5600, name: scsi_eh_9
> 1 lock held by scsi_eh_9/5600:
> #0: (rcu_read_lock){....}, at: [<00000000cbb798c7>]
> __blk_mq_run_hw_queue+0xf1/0x1e0
> Preemption disabled at:
> [<00000000139badf2>] __blk_mq_delay_run_hw_queue+0x78/0xf0
> CPU: 9 PID: 5600 Comm: scsi_eh_9 Tainted: G W 4.15.0-
> rc4-dbg+ #1
> Hardware name: Dell Inc. PowerEdge R720/0VWT90, BIOS 2.5.4 01/22/2016
> Call Trace:
> dump_stack+0x67/0x99
> ___might_sleep+0x16a/0x250 [ib_srp]
> __mutex_lock+0x46/0x9d0
> srp_queuecommand+0x356/0x420 [ib_srp]
> scsi_dispatch_cmd+0xf6/0x3f0
> scsi_queue_rq+0x4a8/0x5f0
> blk_mq_dispatch_rq_list+0x73/0x440
> blk_mq_sched_dispatch_requests+0x109/0x1a0
> __blk_mq_run_hw_queue+0x131/0x1e0
> __blk_mq_delay_run_hw_queue+0x9a/0xf0
> blk_mq_run_hw_queue+0xc0/0x1e0
> blk_mq_start_hw_queues+0x2c/0x40
> scsi_run_queue+0x18e/0x2d0
> scsi_run_host_queues+0x22/0x40
> scsi_error_handler+0x18d/0x5f0
> kthread+0x11c/0x140
> ret_from_fork+0x24/0x30
>
> Reviewed-by: Hannes Reinecke <[email protected]>
> Reviewed-by: Christoph Hellwig <[email protected]>
> Cc: Jason Gunthorpe <[email protected]>
> Cc: Leon Romanovsky <[email protected]>
> Cc: Doug Ledford <[email protected]>
> Cc: Laurence Oberman <[email protected]>
> Signed-off-by: Bart Van Assche <[email protected]>
> ---
> drivers/infiniband/ulp/srp/ib_srp.c | 21 ++-------------------
> 1 file changed, 2 insertions(+), 19 deletions(-)
>
> diff --git a/drivers/infiniband/ulp/srp/ib_srp.c
> b/drivers/infiniband/ulp/srp/ib_srp.c
> index be9ddcad8f28..b7c5a35f7daa 100644
> --- a/drivers/infiniband/ulp/srp/ib_srp.c
> +++ b/drivers/infiniband/ulp/srp/ib_srp.c
> @@ -2338,7 +2338,6 @@ static void srp_handle_qp_err(struct ib_cq *cq,
> struct ib_wc *wc,
> static int srp_queuecommand(struct Scsi_Host *shost, struct
> scsi_cmnd *scmnd)
> {
> struct srp_target_port *target = host_to_target(shost);
> - struct srp_rport *rport = target->rport;
> struct srp_rdma_ch *ch;
> struct srp_request *req;
> struct srp_iu *iu;
> @@ -2348,16 +2347,6 @@ static int srp_queuecommand(struct Scsi_Host
> *shost, struct scsi_cmnd *scmnd)
> u32 tag;
> u16 idx;
> int len, ret;
> - const bool in_scsi_eh = !in_interrupt() && current == shost-
> >ehandler;
> -
> - /*
> - * The SCSI EH thread is the only context from which
> srp_queuecommand()
> - * can get invoked for blocked devices (SDEV_BLOCK /
> - * SDEV_CREATED_BLOCK). Avoid racing with srp_reconnect_rport()
> by
> - * locking the rport mutex if invoked from inside the SCSI EH.
> - */
> - if (in_scsi_eh)
> - mutex_lock(&rport->mutex);
>
> scmnd->result = srp_chkready(target->rport);
> if (unlikely(scmnd->result))
> @@ -2426,13 +2415,7 @@ static int srp_queuecommand(struct Scsi_Host
> *shost, struct scsi_cmnd *scmnd)
> goto err_unmap;
> }
>
> - ret = 0;
> -
> -unlock_rport:
> - if (in_scsi_eh)
> - mutex_unlock(&rport->mutex);
> -
> - return ret;
> + return 0;
>
> err_unmap:
> srp_unmap_data(scmnd, ch, req);
> @@ -2454,7 +2437,7 @@ static int srp_queuecommand(struct Scsi_Host
> *shost, struct scsi_cmnd *scmnd)
> ret = SCSI_MLQUEUE_HOST_BUSY;
> }
>
> - goto unlock_rport;
> + return ret;
> }
>
> /*
Based on prior patch this looks good to me.
I will also test it
Reviewed-by: Laurence Oberman <[email protected]>