[PATCH] nvme-pci: fix prp list allocation

From: Matthew Waltz
Date: Sat Feb 12 2022 - 15:07:51 EST


Fixes kernel block errors originating from the hard-coded 256-byte
alignment of dma_pool_create(). The NVMe specification requires a PRP
List PBAO offset field of 0h, i.e. a PRP List must be aligned to the
configured 4096-byte memory page size.

Essentially reverts commit 99802a7aee2b ("NVMe: Optimise memory usage
for I/Os between 4k and 128k")

Resolved by using default PRP pool which is properly aligned.

Signed-off-by: Matthew Waltz <matthewwaltzis@xxxxxxxxx>
---
drivers/nvme/host/pci.c | 51 +++++++++++------------------------------
1 file changed, 14 insertions(+), 37 deletions(-)

diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c
index 6a99ed680915..4fcb159c7df3 100644
--- a/drivers/nvme/host/pci.c
+++ b/drivers/nvme/host/pci.c
@@ -119,7 +119,6 @@ struct nvme_dev {
u32 __iomem *dbs;
struct device *dev;
struct dma_pool *prp_page_pool;
- struct dma_pool *prp_small_pool;
unsigned online_queues;
unsigned max_qid;
unsigned io_queues[HCTX_MAX_TYPES];
@@ -228,7 +227,7 @@ struct nvme_iod {
struct nvme_queue *nvmeq;
bool use_sgl;
int aborted;
- int npages; /* In the PRP list. 0 means small pool in use */
+ int npages; /* In the PRP list. */
int nents; /* Used in scatterlist */
dma_addr_t first_dma;
unsigned int dma_len; /* length of single DMA segment mapping */
@@ -598,10 +597,7 @@ static void nvme_unmap_data(struct nvme_dev *dev, struct request *req)
WARN_ON_ONCE(!iod->nents);

nvme_unmap_sg(dev, req);
- if (iod->npages == 0)
- dma_pool_free(dev->prp_small_pool, nvme_pci_iod_list(req)[0],
- iod->first_dma);
- else if (iod->use_sgl)
+ if (iod->use_sgl)
nvme_free_sgls(dev, req);
else
nvme_free_prps(dev, req);
@@ -635,7 +631,7 @@ static blk_status_t nvme_pci_setup_prps(struct nvme_dev *dev,
__le64 *prp_list;
void **list = nvme_pci_iod_list(req);
dma_addr_t prp_dma;
- int nprps, i;
+ int i;

length -= (NVME_CTRL_PAGE_SIZE - offset);
if (length <= 0) {
@@ -657,14 +653,8 @@ static blk_status_t nvme_pci_setup_prps(struct nvme_dev *dev,
goto done;
}

- nprps = DIV_ROUND_UP(length, NVME_CTRL_PAGE_SIZE);
- if (nprps <= (256 / 8)) {
- pool = dev->prp_small_pool;
- iod->npages = 0;
- } else {
- pool = dev->prp_page_pool;
- iod->npages = 1;
- }
+ pool = dev->prp_page_pool;
+ iod->npages = 1;

prp_list = dma_pool_alloc(pool, GFP_ATOMIC, &prp_dma);
if (!prp_list) {
@@ -753,13 +743,8 @@ static blk_status_t nvme_pci_setup_sgls(struct nvme_dev *dev,
return BLK_STS_OK;
}

- if (entries <= (256 / sizeof(struct nvme_sgl_desc))) {
- pool = dev->prp_small_pool;
- iod->npages = 0;
- } else {
- pool = dev->prp_page_pool;
- iod->npages = 1;
- }
+ pool = dev->prp_page_pool;
+ iod->npages = 1;

sg_list = dma_pool_alloc(pool, GFP_ATOMIC, &sgl_dma);
if (!sg_list) {
@@ -2727,7 +2712,7 @@ static int nvme_disable_prepare_reset(struct nvme_dev *dev, bool shutdown)
return 0;
}

-static int nvme_setup_prp_pools(struct nvme_dev *dev)
+static int nvme_setup_prp_pool(struct nvme_dev *dev)
{
dev->prp_page_pool = dma_pool_create("prp list page", dev->dev,
NVME_CTRL_PAGE_SIZE,
@@ -2735,20 +2720,12 @@ static int nvme_setup_prp_pools(struct nvme_dev *dev)
if (!dev->prp_page_pool)
return -ENOMEM;

- /* Optimisation for I/Os between 4k and 128k */
- dev->prp_small_pool = dma_pool_create("prp list 256", dev->dev,
- 256, 256, 0);
- if (!dev->prp_small_pool) {
- dma_pool_destroy(dev->prp_page_pool);
- return -ENOMEM;
- }
return 0;
}

-static void nvme_release_prp_pools(struct nvme_dev *dev)
+static void nvme_release_prp_pool(struct nvme_dev *dev)
{
dma_pool_destroy(dev->prp_page_pool);
- dma_pool_destroy(dev->prp_small_pool);
}

static void nvme_free_tagset(struct nvme_dev *dev)
@@ -3080,7 +3057,7 @@ static int nvme_probe(struct pci_dev *pdev, const struct pci_device_id *id)
INIT_WORK(&dev->remove_work, nvme_remove_dead_ctrl_work);
mutex_init(&dev->shutdown_lock);

- result = nvme_setup_prp_pools(dev);
+ result = nvme_setup_prp_pool(dev);
if (result)
goto unmap;

@@ -3109,7 +3086,7 @@ static int nvme_probe(struct pci_dev *pdev, const struct pci_device_id *id)
GFP_KERNEL, node);
if (!dev->iod_mempool) {
result = -ENOMEM;
- goto release_pools;
+ goto release_pool;
}

result = nvme_init_ctrl(&dev->ctrl, &pdev->dev, &nvme_pci_ctrl_ops,
@@ -3126,8 +3103,8 @@ static int nvme_probe(struct pci_dev *pdev, const struct pci_device_id *id)

release_mempool:
mempool_destroy(dev->iod_mempool);
- release_pools:
- nvme_release_prp_pools(dev);
+ release_pool:
+ nvme_release_prp_pool(dev);
unmap:
nvme_dev_unmap(dev);
put_pci:
@@ -3198,7 +3175,7 @@ static void nvme_remove(struct pci_dev *pdev)
nvme_free_host_mem(dev);
nvme_dev_remove_admin(dev);
nvme_free_queues(dev, 0);
- nvme_release_prp_pools(dev);
+ nvme_release_prp_pool(dev);
nvme_dev_unmap(dev);
nvme_uninit_ctrl(&dev->ctrl);
}
--
2.25.1