static int nvme_tcp_recv_data(struct nvme_tcp_queue *queue, struct sk_buff 
*skb,
@@ -1115,6 +1222,7 @@ static int nvme_tcp_try_send_cmd_pdu(struct 
nvme_tcp_request *req)
        bool inline_data = nvme_tcp_has_inline_data(req);
        u8 hdgst = nvme_tcp_hdgst_len(queue);
        int len = sizeof(*pdu) + hdgst - req->offset;
+       struct request *rq = blk_mq_rq_from_pdu(req);
        int flags = MSG_DONTWAIT;
        int ret;
@@ -1123,6 +1231,10 @@ static int nvme_tcp_try_send_cmd_pdu(struct nvme_tcp_request *req)
        else
                flags |= MSG_EOR;
+ if (test_bit(NVME_TCP_Q_OFFLOADS, &queue->flags) &&
+           blk_rq_nr_phys_segments(rq) && rq_data_dir(rq) == READ)
+               nvme_tcp_setup_ddp(queue, pdu->cmd.common.command_id, rq);
I'd assume that this is something we want to setup in
nvme_tcp_setup_cmd_pdu. Why do it here?
Our goal in placing it here is to keep both setup and teardown in the same 
thread.
This enables drivers to avoid locking for per-queue operations.

I also think that it is cleaner when setting up the PDU. Do note that if
queues match 1x1 with cpu cores then any synchronization is pretty
lightweight, and if not, we have other synchronizations anyways...

Reply via email to