Enable BIO_DMA_TOKEN backed requests. It requires special handling to
set up the nvme request from the prepared in advance mapping, tear it
down and sync the buffers.

Suggested-by: Keith Busch <[email protected]>
Signed-off-by: Pavel Begunkov <[email protected]>
---
 drivers/nvme/host/pci.c | 126 +++++++++++++++++++++++++++++++++++++++-
 1 file changed, 124 insertions(+), 2 deletions(-)

diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c
index 63e03c3dc044..ac377416b088 100644
--- a/drivers/nvme/host/pci.c
+++ b/drivers/nvme/host/pci.c
@@ -797,6 +797,123 @@ static void nvme_free_descriptors(struct request *req)
        }
 }
 
+static void nvme_sync_dma(struct nvme_dev *nvme_dev, struct request *req,
+                         enum dma_data_direction dir)
+{
+       struct blk_mq_dma_map *map = req->dma_map;
+       int length = blk_rq_payload_bytes(req);
+       bool for_cpu = dir == DMA_FROM_DEVICE;
+       struct device *dev = nvme_dev->dev;
+       dma_addr_t *dma_list = map->private;
+       struct bio *bio = req->bio;
+       int offset, map_idx;
+
+       offset = bio->bi_iter.bi_bvec_done;
+       map_idx = offset / NVME_CTRL_PAGE_SIZE;
+       length += offset & (NVME_CTRL_PAGE_SIZE - 1);
+
+       while (length > 0) {
+               u64 dma_addr = dma_list[map_idx++];
+
+               if (for_cpu)
+                       __dma_sync_single_for_cpu(dev, dma_addr,
+                                                 NVME_CTRL_PAGE_SIZE, dir);
+               else
+                       __dma_sync_single_for_device(dev, dma_addr,
+                                                    NVME_CTRL_PAGE_SIZE, dir);
+               length -= NVME_CTRL_PAGE_SIZE;
+       }
+}
+
+static void nvme_unmap_premapped_data(struct nvme_dev *dev,
+                                     struct request *req)
+{
+       struct nvme_iod *iod = blk_mq_rq_to_pdu(req);
+
+       if (rq_data_dir(req) == READ)
+               nvme_sync_dma(dev, req, DMA_FROM_DEVICE);
+       if (!(iod->flags & IOD_SINGLE_SEGMENT))
+               nvme_free_descriptors(req);
+}
+
+static blk_status_t nvme_dma_premapped(struct request *req,
+                                      struct nvme_queue *nvmeq)
+{
+       struct nvme_iod *iod = blk_mq_rq_to_pdu(req);
+       int length = blk_rq_payload_bytes(req);
+       struct blk_mq_dma_map *map = req->dma_map;
+       u64 dma_addr, prp1_dma, prp2_dma;
+       struct bio *bio = req->bio;
+       dma_addr_t *dma_list;
+       dma_addr_t prp_dma;
+       __le64 *prp_list;
+       int i, map_idx;
+       int offset;
+
+       dma_list = map->private;
+
+       if (rq_data_dir(req) == WRITE)
+               nvme_sync_dma(nvmeq->dev, req, DMA_TO_DEVICE);
+
+       offset = bio->bi_iter.bi_bvec_done;
+       map_idx = offset / NVME_CTRL_PAGE_SIZE;
+       offset &= (NVME_CTRL_PAGE_SIZE - 1);
+
+       prp1_dma = dma_list[map_idx++] + offset;
+
+       length -= (NVME_CTRL_PAGE_SIZE - offset);
+       if (length <= 0) {
+               prp2_dma = 0;
+               goto done;
+       }
+
+       if (length <= NVME_CTRL_PAGE_SIZE) {
+               prp2_dma = dma_list[map_idx];
+               goto done;
+       }
+
+       if (DIV_ROUND_UP(length, NVME_CTRL_PAGE_SIZE) <=
+           NVME_SMALL_POOL_SIZE / sizeof(__le64))
+               iod->flags |= IOD_SMALL_DESCRIPTOR;
+
+       prp_list = dma_pool_alloc(nvme_dma_pool(nvmeq, iod), GFP_ATOMIC,
+                       &prp_dma);
+       if (!prp_list)
+               return BLK_STS_RESOURCE;
+
+       iod->descriptors[iod->nr_descriptors++] = prp_list;
+       prp2_dma = prp_dma;
+       i = 0;
+       for (;;) {
+               if (i == NVME_CTRL_PAGE_SIZE >> 3) {
+                       __le64 *old_prp_list = prp_list;
+
+                       prp_list = dma_pool_alloc(nvmeq->descriptor_pools.large,
+                                       GFP_ATOMIC, &prp_dma);
+                       if (!prp_list)
+                               goto free_prps;
+                       iod->descriptors[iod->nr_descriptors++] = prp_list;
+                       prp_list[0] = old_prp_list[i - 1];
+                       old_prp_list[i - 1] = cpu_to_le64(prp_dma);
+                       i = 1;
+               }
+
+               dma_addr = dma_list[map_idx++];
+               prp_list[i++] = cpu_to_le64(dma_addr);
+
+               length -= NVME_CTRL_PAGE_SIZE;
+               if (length <= 0)
+                       break;
+       }
+done:
+       iod->cmd.common.dptr.prp1 = cpu_to_le64(prp1_dma);
+       iod->cmd.common.dptr.prp2 = cpu_to_le64(prp2_dma);
+       return BLK_STS_OK;
+free_prps:
+       nvme_free_descriptors(req);
+       return BLK_STS_RESOURCE;
+}
+
 static void nvme_free_prps(struct request *req, unsigned int attrs)
 {
        struct nvme_iod *iod = blk_mq_rq_to_pdu(req);
@@ -875,6 +992,11 @@ static void nvme_unmap_data(struct request *req)
        struct device *dma_dev = nvmeq->dev->dev;
        unsigned int attrs = 0;
 
+       if (req->bio && bio_flagged(req->bio, BIO_DMA_TOKEN)) {
+               nvme_unmap_premapped_data(nvmeq->dev, req);
+               return;
+       }
+
        if (iod->flags & IOD_SINGLE_SEGMENT) {
                static_assert(offsetof(union nvme_data_ptr, prp1) ==
                                offsetof(union nvme_data_ptr, sgl.addr));
@@ -1154,8 +1276,8 @@ static blk_status_t nvme_map_data(struct request *req)
        struct blk_dma_iter iter;
        blk_status_t ret;
 
-       if (req->bio && bio_flagged(req->bio, BIO_DMA_TOKEN))
-               return BLK_STS_RESOURCE;
+       if (req->dma_map)
+               return nvme_dma_premapped(req, nvmeq);
 
        /*
         * Try to skip the DMA iterator for single segment requests, as that
-- 
2.52.0

Reply via email to