Enable BIO_DMA_TOKEN backed requests. It requires special handling to set up the nvme request from the prepared in advance mapping, tear it down and sync the buffers.
Suggested-by: Keith Busch <[email protected]> Signed-off-by: Pavel Begunkov <[email protected]> --- drivers/nvme/host/pci.c | 126 +++++++++++++++++++++++++++++++++++++++- 1 file changed, 124 insertions(+), 2 deletions(-) diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index 63e03c3dc044..ac377416b088 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -797,6 +797,123 @@ static void nvme_free_descriptors(struct request *req) } } +static void nvme_sync_dma(struct nvme_dev *nvme_dev, struct request *req, + enum dma_data_direction dir) +{ + struct blk_mq_dma_map *map = req->dma_map; + int length = blk_rq_payload_bytes(req); + bool for_cpu = dir == DMA_FROM_DEVICE; + struct device *dev = nvme_dev->dev; + dma_addr_t *dma_list = map->private; + struct bio *bio = req->bio; + int offset, map_idx; + + offset = bio->bi_iter.bi_bvec_done; + map_idx = offset / NVME_CTRL_PAGE_SIZE; + length += offset & (NVME_CTRL_PAGE_SIZE - 1); + + while (length > 0) { + u64 dma_addr = dma_list[map_idx++]; + + if (for_cpu) + __dma_sync_single_for_cpu(dev, dma_addr, + NVME_CTRL_PAGE_SIZE, dir); + else + __dma_sync_single_for_device(dev, dma_addr, + NVME_CTRL_PAGE_SIZE, dir); + length -= NVME_CTRL_PAGE_SIZE; + } +} + +static void nvme_unmap_premapped_data(struct nvme_dev *dev, + struct request *req) +{ + struct nvme_iod *iod = blk_mq_rq_to_pdu(req); + + if (rq_data_dir(req) == READ) + nvme_sync_dma(dev, req, DMA_FROM_DEVICE); + if (!(iod->flags & IOD_SINGLE_SEGMENT)) + nvme_free_descriptors(req); +} + +static blk_status_t nvme_dma_premapped(struct request *req, + struct nvme_queue *nvmeq) +{ + struct nvme_iod *iod = blk_mq_rq_to_pdu(req); + int length = blk_rq_payload_bytes(req); + struct blk_mq_dma_map *map = req->dma_map; + u64 dma_addr, prp1_dma, prp2_dma; + struct bio *bio = req->bio; + dma_addr_t *dma_list; + dma_addr_t prp_dma; + __le64 *prp_list; + int i, map_idx; + int offset; + + dma_list = map->private; + + if (rq_data_dir(req) == WRITE) + nvme_sync_dma(nvmeq->dev, req, DMA_TO_DEVICE); + + offset = bio->bi_iter.bi_bvec_done; + map_idx = offset / NVME_CTRL_PAGE_SIZE; + offset &= (NVME_CTRL_PAGE_SIZE - 1); + + prp1_dma = dma_list[map_idx++] + offset; + + length -= (NVME_CTRL_PAGE_SIZE - offset); + if (length <= 0) { + prp2_dma = 0; + goto done; + } + + if (length <= NVME_CTRL_PAGE_SIZE) { + prp2_dma = dma_list[map_idx]; + goto done; + } + + if (DIV_ROUND_UP(length, NVME_CTRL_PAGE_SIZE) <= + NVME_SMALL_POOL_SIZE / sizeof(__le64)) + iod->flags |= IOD_SMALL_DESCRIPTOR; + + prp_list = dma_pool_alloc(nvme_dma_pool(nvmeq, iod), GFP_ATOMIC, + &prp_dma); + if (!prp_list) + return BLK_STS_RESOURCE; + + iod->descriptors[iod->nr_descriptors++] = prp_list; + prp2_dma = prp_dma; + i = 0; + for (;;) { + if (i == NVME_CTRL_PAGE_SIZE >> 3) { + __le64 *old_prp_list = prp_list; + + prp_list = dma_pool_alloc(nvmeq->descriptor_pools.large, + GFP_ATOMIC, &prp_dma); + if (!prp_list) + goto free_prps; + iod->descriptors[iod->nr_descriptors++] = prp_list; + prp_list[0] = old_prp_list[i - 1]; + old_prp_list[i - 1] = cpu_to_le64(prp_dma); + i = 1; + } + + dma_addr = dma_list[map_idx++]; + prp_list[i++] = cpu_to_le64(dma_addr); + + length -= NVME_CTRL_PAGE_SIZE; + if (length <= 0) + break; + } +done: + iod->cmd.common.dptr.prp1 = cpu_to_le64(prp1_dma); + iod->cmd.common.dptr.prp2 = cpu_to_le64(prp2_dma); + return BLK_STS_OK; +free_prps: + nvme_free_descriptors(req); + return BLK_STS_RESOURCE; +} + static void nvme_free_prps(struct request *req, unsigned int attrs) { struct nvme_iod *iod = blk_mq_rq_to_pdu(req); @@ -875,6 +992,11 @@ static void nvme_unmap_data(struct request *req) struct device *dma_dev = nvmeq->dev->dev; unsigned int attrs = 0; + if (req->bio && bio_flagged(req->bio, BIO_DMA_TOKEN)) { + nvme_unmap_premapped_data(nvmeq->dev, req); + return; + } + if (iod->flags & IOD_SINGLE_SEGMENT) { static_assert(offsetof(union nvme_data_ptr, prp1) == offsetof(union nvme_data_ptr, sgl.addr)); @@ -1154,8 +1276,8 @@ static blk_status_t nvme_map_data(struct request *req) struct blk_dma_iter iter; blk_status_t ret; - if (req->bio && bio_flagged(req->bio, BIO_DMA_TOKEN)) - return BLK_STS_RESOURCE; + if (req->dma_map) + return nvme_dma_premapped(req, nvmeq); /* * Try to skip the DMA iterator for single segment requests, as that -- 2.52.0
