During disk revalidation done with sd_revalidate(), the zones of a
zoned disk zones are checked using the helper function
blk_revalidate_disk_zones() if a zone configuration change is detected
(change in the number of zones or zone size). The function
blk_revalidate_disk_zones() issues report_zones calls that are very
large, that is, to obtain zone information for all zones of the disk
with a single command. The size of the report zones command buffer
necessary for such large request generally is lower than the disk
max_hw_sectors and KMALLOC_MAX_SIZE (4MB) but still very large (e.g.
aboiut 3.5MB for a 15TB disk with 256MB zones). This large report zones
reply buffer allocation with kmalloc succeeds on boot, but frequently
fails at run time, especially for a system under memory pressure. This
causes the disk revalidation to fail and the disk capacity to be
changed to 0.
This problem can be avoided with a more intelligent report zones buffer
allocation. This patch introduces the arbitrary SD_ZBC_REPORT_SIZE
allocation limit of 1MB allowing to fit 16383 zone descriptor for every
report zone command execution, thus allowing a full zone report with 4
or 5 commands for most ZBC/ZAC disks today. This limit may be lowered to
satisfy the disk max_hw_sectors limit. Furthermore, further reduce the
likelyhood of a buffer allocation failure while guaranteeing progress
in the zone report by retrying the buffer allocation with a smaller
size in case kmalloc() fails.
Fixes: 515ce6061312 ("scsi: sd_zbc: Fix sd_zbc_report_zones() buffer
allocation")
Fixes: e76239a3748c ("block: add a report_zones method")
Cc: [email protected]
Signed-off-by: Damien Le Moal <[email protected]>
---
drivers/scsi/sd_zbc.c | 54 +++++++++++++++++++++++++++++++++++--------
1 file changed, 45 insertions(+), 9 deletions(-)
diff --git a/drivers/scsi/sd_zbc.c b/drivers/scsi/sd_zbc.c
index 7334024b64f1..37469d77264e 100644
--- a/drivers/scsi/sd_zbc.c
+++ b/drivers/scsi/sd_zbc.c
@@ -103,6 +103,44 @@ static int sd_zbc_do_report_zones(struct scsi_disk *sdkp,
unsigned char *buf,
return 0;
}
+/**
+ * Arbitrary maximum report zones buffer size of 1MB, fitting 16383 x 64B zone
+ * descriptors plus the 64B report header.
+ */
+#define SD_ZBC_REPORT_SIZE (16384U * 64U)
+
+/**
+ * Allocate a buffer for report zones.
+ */
+static void *sd_zbc_alloc_report_buffer(struct gendisk *disk, size_t *buflen,
+ gfp_t gfp_mask)
+{
+ struct page *page;
+ size_t bufsize;
+ int order;
+
+ /*
+ * Limit the command buffer size to the arbitrary SD_ZBC_REPORT_SIZE
+ * size (1MB), allowing up to 16383 zone descriptors being reported with
+ * a single command. And make sure that this size does not exceed the
+ * hardware capabilities. To avoid disk revalidation failures due to
+ * memory allocation errors, retry the allocation with a smaller buffer
+ * size if the allocation fails.
+ */
+ bufsize = min_t(size_t, *buflen, SD_ZBC_REPORT_SIZE);
+ bufsize = min_t(size_t, bufsize,
+ queue_max_hw_sectors(disk->queue) << 9);
+ for (order = get_order(bufsize); order >= 0; order--) {
+ page = alloc_pages(gfp_mask, order);
+ if (page) {
+ *buflen = PAGE_SIZE << order;
+ return page_address(page);
+ }
+ }
+
+ return NULL;
+}
+
/**
* sd_zbc_report_zones - Disk report zones operation.
* @disk: The target disk
@@ -118,9 +156,9 @@ int sd_zbc_report_zones(struct gendisk *disk, sector_t
sector,
gfp_t gfp_mask)
{
struct scsi_disk *sdkp = scsi_disk(disk);
- unsigned int i, buflen, nrz = *nr_zones;
+ unsigned int i, nrz = *nr_zones;
unsigned char *buf;
- size_t offset = 0;
+ size_t buflen, offset = 0;
int ret = 0;
if (!sd_is_zoned(sdkp))
@@ -128,13 +166,11 @@ int sd_zbc_report_zones(struct gendisk *disk, sector_t
sector,
return -EOPNOTSUPP;
/*
- * Get a reply buffer for the number of requested zones plus a header,
- * without exceeding the device maximum command size. For ATA disks,
- * buffers must be aligned to 512B.
+ * Try to get a buffer that can fits the requested number of zones plus
+ * the command reply header, all 64B in size.
*/
- buflen = min(queue_max_hw_sectors(disk->queue) << 9,
- roundup((nrz + 1) * 64, 512));
- buf = kmalloc(buflen, gfp_mask);
+ buflen = (nrz + 1) * 64;
+ buf = sd_zbc_alloc_report_buffer(disk, &buflen, gfp_mask);
if (!buf)
return -ENOMEM;
@@ -153,7 +189,7 @@ int sd_zbc_report_zones(struct gendisk *disk, sector_t
sector,
*nr_zones = nrz;
out_free_buf:
- kfree(buf);
+ free_pages((unsigned long)buf, get_order(buflen));
return ret;
}
--
2.21.0