[PATCH] virtio pmem: fix async flush ordering

From: Pankaj Gupta
Date: Wed Nov 20 2019 - 04:29:04 EST


Remove logic to create child bio in the async flush function which
causes child bio to get executed after parent bio 'pmem_make_request'
completes. This resulted in wrong ordering of REQ_PREFLUSH with the
data write request.

Instead we are performing flush from the parent bio to maintain the
correct order. Also, returning from function 'pmem_make_request' if
REQ_PREFLUSH returns an error.

Reported-by: Jeff Moyer <jmoyer@xxxxxxxxxx>
Signed-off-by: Pankaj Gupta <pagupta@xxxxxxxxxx>
---
drivers/acpi/nfit/core.c | 4 ++--
drivers/nvdimm/claim.c | 2 +-
drivers/nvdimm/nd.h | 2 +-
drivers/nvdimm/nd_virtio.c | 29 ++---------------------------
drivers/nvdimm/pmem.c | 14 ++++++++++----
drivers/nvdimm/region_devs.c | 6 +++---
drivers/nvdimm/virtio_pmem.c | 2 +-
drivers/nvdimm/virtio_pmem.h | 2 +-
include/linux/libnvdimm.h | 4 ++--
9 files changed, 23 insertions(+), 42 deletions(-)

diff --git a/drivers/acpi/nfit/core.c b/drivers/acpi/nfit/core.c
index 14e68f202f81..afbd5e2b2ea8 100644
--- a/drivers/acpi/nfit/core.c
+++ b/drivers/acpi/nfit/core.c
@@ -2426,7 +2426,7 @@ static void write_blk_ctl(struct nfit_blk *nfit_blk, unsigned int bw,
offset = to_interleave_offset(offset, mmio);

writeq(cmd, mmio->addr.base + offset);
- nvdimm_flush(nfit_blk->nd_region, NULL);
+ nvdimm_flush(nfit_blk->nd_region);

if (nfit_blk->dimm_flags & NFIT_BLK_DCR_LATCH)
readq(mmio->addr.base + offset);
@@ -2475,7 +2475,7 @@ static int acpi_nfit_blk_single_io(struct nfit_blk *nfit_blk,
}

if (rw)
- nvdimm_flush(nfit_blk->nd_region, NULL);
+ nvdimm_flush(nfit_blk->nd_region);

rc = read_blk_stat(nfit_blk, lane) ? -EIO : 0;
return rc;
diff --git a/drivers/nvdimm/claim.c b/drivers/nvdimm/claim.c
index 2985ca949912..0fedb2fbfcbe 100644
--- a/drivers/nvdimm/claim.c
+++ b/drivers/nvdimm/claim.c
@@ -293,7 +293,7 @@ static int nsio_rw_bytes(struct nd_namespace_common *ndns,
}

memcpy_flushcache(nsio->addr + offset, buf, size);
- ret = nvdimm_flush(to_nd_region(ndns->dev.parent), NULL);
+ ret = nvdimm_flush(to_nd_region(ndns->dev.parent));
if (ret)
rc = ret;

diff --git a/drivers/nvdimm/nd.h b/drivers/nvdimm/nd.h
index ee5c04070ef9..77d8b9f0c34a 100644
--- a/drivers/nvdimm/nd.h
+++ b/drivers/nvdimm/nd.h
@@ -155,7 +155,7 @@ struct nd_region {
struct badblocks bb;
struct nd_interleave_set *nd_set;
struct nd_percpu_lane __percpu *lane;
- int (*flush)(struct nd_region *nd_region, struct bio *bio);
+ int (*flush)(struct nd_region *nd_region);
struct nd_mapping mapping[0];
};

diff --git a/drivers/nvdimm/nd_virtio.c b/drivers/nvdimm/nd_virtio.c
index 10351d5b49fa..9604ba08a68a 100644
--- a/drivers/nvdimm/nd_virtio.c
+++ b/drivers/nvdimm/nd_virtio.c
@@ -35,7 +35,7 @@ void virtio_pmem_host_ack(struct virtqueue *vq)
EXPORT_SYMBOL_GPL(virtio_pmem_host_ack);

/* The request submission function */
-static int virtio_pmem_flush(struct nd_region *nd_region)
+int virtio_pmem_flush(struct nd_region *nd_region)
{
struct virtio_device *vdev = nd_region->provider_data;
struct virtio_pmem *vpmem = vdev->priv;
@@ -96,30 +96,5 @@ static int virtio_pmem_flush(struct nd_region *nd_region)
kfree(req_data);
return err;
};
-
-/* The asynchronous flush callback function */
-int async_pmem_flush(struct nd_region *nd_region, struct bio *bio)
-{
- /*
- * Create child bio for asynchronous flush and chain with
- * parent bio. Otherwise directly call nd_region flush.
- */
- if (bio && bio->bi_iter.bi_sector != -1) {
- struct bio *child = bio_alloc(GFP_ATOMIC, 0);
-
- if (!child)
- return -ENOMEM;
- bio_copy_dev(child, bio);
- child->bi_opf = REQ_PREFLUSH;
- child->bi_iter.bi_sector = -1;
- bio_chain(child, bio);
- submit_bio(child);
- return 0;
- }
- if (virtio_pmem_flush(nd_region))
- return -EIO;
-
- return 0;
-};
-EXPORT_SYMBOL_GPL(async_pmem_flush);
+EXPORT_SYMBOL_GPL(virtio_pmem_flush);
MODULE_LICENSE("GPL");
diff --git a/drivers/nvdimm/pmem.c b/drivers/nvdimm/pmem.c
index f9f76f6ba07b..b3ca641668a2 100644
--- a/drivers/nvdimm/pmem.c
+++ b/drivers/nvdimm/pmem.c
@@ -194,7 +194,13 @@ static blk_qc_t pmem_make_request(struct request_queue *q, struct bio *bio)
struct nd_region *nd_region = to_region(pmem);

if (bio->bi_opf & REQ_PREFLUSH)
- ret = nvdimm_flush(nd_region, bio);
+ ret = nvdimm_flush(nd_region);
+
+ if (ret) {
+ bio->bi_status = errno_to_blk_status(ret);
+ bio_endio(bio);
+ return BLK_QC_T_NONE;
+ }

do_acct = nd_iostat_start(bio, &start);
bio_for_each_segment(bvec, bio, iter) {
@@ -209,7 +215,7 @@ static blk_qc_t pmem_make_request(struct request_queue *q, struct bio *bio)
nd_iostat_end(bio, start);

if (bio->bi_opf & REQ_FUA)
- ret = nvdimm_flush(nd_region, bio);
+ ret = nvdimm_flush(nd_region);

if (ret)
bio->bi_status = errno_to_blk_status(ret);
@@ -549,14 +555,14 @@ static int nd_pmem_remove(struct device *dev)
sysfs_put(pmem->bb_state);
pmem->bb_state = NULL;
}
- nvdimm_flush(to_nd_region(dev->parent), NULL);
+ nvdimm_flush(to_nd_region(dev->parent));

return 0;
}

static void nd_pmem_shutdown(struct device *dev)
{
- nvdimm_flush(to_nd_region(dev->parent), NULL);
+ nvdimm_flush(to_nd_region(dev->parent));
}

static void nd_pmem_notify(struct device *dev, enum nvdimm_event event)
diff --git a/drivers/nvdimm/region_devs.c b/drivers/nvdimm/region_devs.c
index ef423ba1a711..cfd96a0d52f2 100644
--- a/drivers/nvdimm/region_devs.c
+++ b/drivers/nvdimm/region_devs.c
@@ -287,7 +287,7 @@ static ssize_t deep_flush_store(struct device *dev, struct device_attribute *att
return rc;
if (!flush)
return -EINVAL;
- rc = nvdimm_flush(nd_region, NULL);
+ rc = nvdimm_flush(nd_region);
if (rc)
return rc;

@@ -1079,14 +1079,14 @@ struct nd_region *nvdimm_volatile_region_create(struct nvdimm_bus *nvdimm_bus,
}
EXPORT_SYMBOL_GPL(nvdimm_volatile_region_create);

-int nvdimm_flush(struct nd_region *nd_region, struct bio *bio)
+int nvdimm_flush(struct nd_region *nd_region)
{
int rc = 0;

if (!nd_region->flush)
rc = generic_nvdimm_flush(nd_region);
else {
- if (nd_region->flush(nd_region, bio))
+ if (nd_region->flush(nd_region))
rc = -EIO;
}

diff --git a/drivers/nvdimm/virtio_pmem.c b/drivers/nvdimm/virtio_pmem.c
index 5e3d07b47e0c..a6234466674d 100644
--- a/drivers/nvdimm/virtio_pmem.c
+++ b/drivers/nvdimm/virtio_pmem.c
@@ -80,7 +80,7 @@ static int virtio_pmem_probe(struct virtio_device *vdev)

ndr_desc.res = &res;
ndr_desc.numa_node = nid;
- ndr_desc.flush = async_pmem_flush;
+ ndr_desc.flush = virtio_pmem_flush;
set_bit(ND_REGION_PAGEMAP, &ndr_desc.flags);
set_bit(ND_REGION_ASYNC, &ndr_desc.flags);
nd_region = nvdimm_pmem_region_create(vpmem->nvdimm_bus, &ndr_desc);
diff --git a/drivers/nvdimm/virtio_pmem.h b/drivers/nvdimm/virtio_pmem.h
index 0dddefe594c4..4f9ee19aad90 100644
--- a/drivers/nvdimm/virtio_pmem.h
+++ b/drivers/nvdimm/virtio_pmem.h
@@ -51,5 +51,5 @@ struct virtio_pmem {
};

void virtio_pmem_host_ack(struct virtqueue *vq);
-int async_pmem_flush(struct nd_region *nd_region, struct bio *bio);
+int virtio_pmem_flush(struct nd_region *nd_region);
#endif
diff --git a/include/linux/libnvdimm.h b/include/linux/libnvdimm.h
index b6eddf912568..211c87edb4eb 100644
--- a/include/linux/libnvdimm.h
+++ b/include/linux/libnvdimm.h
@@ -130,7 +130,7 @@ struct nd_region_desc {
int target_node;
unsigned long flags;
struct device_node *of_node;
- int (*flush)(struct nd_region *nd_region, struct bio *bio);
+ int (*flush)(struct nd_region *nd_region);
};

struct device;
@@ -261,7 +261,7 @@ unsigned long nd_blk_memremap_flags(struct nd_blk_region *ndbr);
unsigned int nd_region_acquire_lane(struct nd_region *nd_region);
void nd_region_release_lane(struct nd_region *nd_region, unsigned int lane);
u64 nd_fletcher64(void *addr, size_t len, bool le);
-int nvdimm_flush(struct nd_region *nd_region, struct bio *bio);
+int nvdimm_flush(struct nd_region *nd_region);
int generic_nvdimm_flush(struct nd_region *nd_region);
int nvdimm_has_flush(struct nd_region *nd_region);
int nvdimm_has_cache(struct nd_region *nd_region);
--
2.20.1