[PATCH v3 15/15] nvme: Ensure atomic writes will be executed atomically

From: John Garry
Date: Wed Jan 24 2024 - 06:47:38 EST


From: Alan Adamson <alan.adamson@xxxxxxxxxx>

There is no dedicated NVMe atomic write command (which may error for a
command which exceeds the controller atomic write limits).

As an insurance policy against the block layer sending requests which
cannot be executed atomically, add a check in the queue path.

Signed-off-by: Alan Adamson <alan.adamson@xxxxxxxxxx>
Signed-off-by: John Garry <john.g.garry@xxxxxxxxxx>
---
drivers/nvme/host/core.c | 35 +++++++++++++++++++++++++++++++++--
drivers/nvme/host/nvme.h | 2 ++
2 files changed, 35 insertions(+), 2 deletions(-)

diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index 5045c84f2516..6a34a5d92088 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -911,6 +911,32 @@ static inline blk_status_t nvme_setup_rw(struct nvme_ns *ns,
if (req->cmd_flags & REQ_RAHEAD)
dsmgmt |= NVME_RW_DSM_FREQ_PREFETCH;

+ /*
+ * Ensure that nothing has been sent which cannot be executed
+ * atomically.
+ */
+ if (req->cmd_flags & REQ_ATOMIC) {
+ struct nvme_ns_head *head = ns->head;
+ u32 boundary_bytes = head->atomic_boundary;
+
+ if (blk_rq_bytes(req) > ns->head->atomic_max)
+ return BLK_STS_IOERR;
+
+ if (boundary_bytes) {
+ u32 mask = boundary_bytes - 1, imask = ~mask;
+ u32 start = blk_rq_pos(req) << SECTOR_SHIFT;
+ u32 end = start + blk_rq_bytes(req);
+
+ if (blk_rq_bytes(req) > boundary_bytes)
+ return BLK_STS_IOERR;
+
+ if (((start & imask) != (end & imask)) &&
+ (end & mask)) {
+ return BLK_STS_IOERR;
+ }
+ }
+ }
+
cmnd->rw.opcode = op;
cmnd->rw.flags = 0;
cmnd->rw.nsid = cpu_to_le32(ns->head->ns_id);
@@ -1912,7 +1938,8 @@ static void nvme_set_queue_limits(struct nvme_ctrl *ctrl,
}

static void nvme_update_atomic_write_disk_info(struct gendisk *disk,
- struct nvme_ctrl *ctrl, struct nvme_id_ns *id, u32 bs, u32 atomic_bs)
+ struct nvme_ctrl *ctrl, struct nvme_ns_head *head,
+ struct nvme_id_ns *id, u32 bs, u32 atomic_bs)
{
unsigned int unit_min = 0, unit_max = 0, boundary = 0, max_bytes = 0;
struct request_queue *q = disk->queue;
@@ -1943,6 +1970,9 @@ static void nvme_update_atomic_write_disk_info(struct gendisk *disk,
unit_max = rounddown_pow_of_two(atomic_bs);
}

+ head->atomic_max = max_bytes;
+ head->atomic_boundary = boundary;
+
blk_queue_atomic_write_max_bytes(q, max_bytes);
blk_queue_atomic_write_unit_min_sectors(q, unit_min >> SECTOR_SHIFT);
blk_queue_atomic_write_unit_max_sectors(q, unit_max >> SECTOR_SHIFT);
@@ -1980,7 +2010,8 @@ static void nvme_update_disk_info(struct nvme_ctrl *ctrl, struct gendisk *disk,
else
atomic_bs = (1 + ctrl->subsys->awupf) * bs;

- nvme_update_atomic_write_disk_info(disk, ctrl, id, bs, atomic_bs);
+ nvme_update_atomic_write_disk_info(disk, ctrl, head, id,
+ bs, atomic_bs);
}

if (id->nsfeat & NVME_NS_FEAT_IO_OPT) {
diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h
index 030c80818240..938060c85e6f 100644
--- a/drivers/nvme/host/nvme.h
+++ b/drivers/nvme/host/nvme.h
@@ -476,6 +476,8 @@ struct nvme_ns_head {
struct device cdev_device;

struct gendisk *disk;
+ u32 atomic_max;
+ u32 atomic_boundary;
#ifdef CONFIG_NVME_MULTIPATH
struct bio_list requeue_list;
spinlock_t requeue_lock;
--
2.31.1