[PATCH v2 5/7] block, bio, fs: convert most filesystems to pin_user_pages_fast()

From: John Hubbard
Date: Wed Aug 31 2022 - 00:19:30 EST


Use dio_w_*() wrapper calls, in place of get_user_pages_fast(),
get_page() and put_page().

This converts the Direct IO parts of most filesystems over to using
FOLL_PIN (pin_user_page*()) page pinning.

Signed-off-by: John Hubbard <jhubbard@xxxxxxxxxx>
---
block/bio.c | 27 ++++++++++++++-------------
block/blk-map.c | 7 ++++---
fs/direct-io.c | 40 ++++++++++++++++++++--------------------
fs/iomap/direct-io.c | 2 +-
4 files changed, 39 insertions(+), 37 deletions(-)

diff --git a/block/bio.c b/block/bio.c
index 3d3a2678fea2..6c6110f7054e 100644
--- a/block/bio.c
+++ b/block/bio.c
@@ -1125,7 +1125,7 @@ void __bio_release_pages(struct bio *bio, bool mark_dirty)
bio_for_each_segment_all(bvec, bio, iter_all) {
if (mark_dirty && !PageCompound(bvec->bv_page))
set_page_dirty_lock(bvec->bv_page);
- put_page(bvec->bv_page);
+ dio_w_unpin_user_page(bvec->bv_page);
}
}
EXPORT_SYMBOL_GPL(__bio_release_pages);
@@ -1162,7 +1162,7 @@ static int bio_iov_add_page(struct bio *bio, struct page *page,
}

if (same_page)
- put_page(page);
+ dio_w_unpin_user_page(page);
return 0;
}

@@ -1176,7 +1176,7 @@ static int bio_iov_add_zone_append_page(struct bio *bio, struct page *page,
queue_max_zone_append_sectors(q), &same_page) != len)
return -EINVAL;
if (same_page)
- put_page(page);
+ dio_w_unpin_user_page(page);
return 0;
}

@@ -1187,10 +1187,10 @@ static int bio_iov_add_zone_append_page(struct bio *bio, struct page *page,
* @bio: bio to add pages to
* @iter: iov iterator describing the region to be mapped
*
- * Pins pages from *iter and appends them to @bio's bvec array. The
- * pages will have to be released using put_page() when done.
- * For multi-segment *iter, this function only adds pages from the
- * next non-empty segment of the iov iterator.
+ * Pins pages from *iter and appends them to @bio's bvec array. The pages will
+ * have to be released using dio_w_unpin_user_page when done. For multi-segment
+ * *iter, this function only adds pages from the next non-empty segment of the
+ * iov iterator.
*/
static int __bio_iov_iter_get_pages(struct bio *bio, struct iov_iter *iter)
{
@@ -1218,8 +1218,9 @@ static int __bio_iov_iter_get_pages(struct bio *bio, struct iov_iter *iter)
* result to ensure the bio's total size is correct. The remainder of
* the iov data will be picked up in the next bio iteration.
*/
- size = iov_iter_get_pages2(iter, pages, UINT_MAX - bio->bi_iter.bi_size,
- nr_pages, &offset);
+ size = dio_w_iov_iter_pin_pages(iter, pages,
+ UINT_MAX - bio->bi_iter.bi_size,
+ nr_pages, &offset);
if (unlikely(size <= 0))
return size ? size : -EFAULT;

@@ -1252,7 +1253,7 @@ static int __bio_iov_iter_get_pages(struct bio *bio, struct iov_iter *iter)
iov_iter_revert(iter, left);
out:
while (i < nr_pages)
- put_page(pages[i++]);
+ dio_w_unpin_user_page(pages[i++]);

return ret;
}
@@ -1444,9 +1445,9 @@ void bio_set_pages_dirty(struct bio *bio)
* have been written out during the direct-IO read. So we take another ref on
* the BIO and re-dirty the pages in process context.
*
- * It is expected that bio_check_pages_dirty() will wholly own the BIO from
- * here on. It will run one put_page() against each page and will run one
- * bio_put() against the BIO.
+ * It is expected that bio_check_pages_dirty() will wholly own the BIO from here
+ * on. It will run one dio_w_unpin_user_page() against each page and will run
+ * one bio_put() against the BIO.
*/

static void bio_dirty_fn(struct work_struct *work);
diff --git a/block/blk-map.c b/block/blk-map.c
index 7196a6b64c80..4e333ad9776d 100644
--- a/block/blk-map.c
+++ b/block/blk-map.c
@@ -254,7 +254,8 @@ static int bio_map_user_iov(struct request *rq, struct iov_iter *iter,
size_t offs, added = 0;
int npages;

- bytes = iov_iter_get_pages_alloc2(iter, &pages, LONG_MAX, &offs);
+ bytes = dio_w_iov_iter_pin_pages_alloc(iter, &pages, LONG_MAX,
+ &offs);
if (unlikely(bytes <= 0)) {
ret = bytes ? bytes : -EFAULT;
goto out_unmap;
@@ -276,7 +277,7 @@ static int bio_map_user_iov(struct request *rq, struct iov_iter *iter,
if (!bio_add_hw_page(rq->q, bio, page, n, offs,
max_sectors, &same_page)) {
if (same_page)
- put_page(page);
+ dio_w_unpin_user_page(page);
break;
}

@@ -289,7 +290,7 @@ static int bio_map_user_iov(struct request *rq, struct iov_iter *iter,
* release the pages we didn't map into the bio, if any
*/
while (j < npages)
- put_page(pages[j++]);
+ dio_w_unpin_user_page(pages[j++]);
kvfree(pages);
/* couldn't stuff something into bio? */
if (bytes) {
diff --git a/fs/direct-io.c b/fs/direct-io.c
index f669163d5860..05c044c55374 100644
--- a/fs/direct-io.c
+++ b/fs/direct-io.c
@@ -169,8 +169,8 @@ static inline int dio_refill_pages(struct dio *dio, struct dio_submit *sdio)
const enum req_op dio_op = dio->opf & REQ_OP_MASK;
ssize_t ret;

- ret = iov_iter_get_pages2(sdio->iter, dio->pages, LONG_MAX, DIO_PAGES,
- &sdio->from);
+ ret = dio_w_iov_iter_pin_pages(sdio->iter, dio->pages, LONG_MAX,
+ DIO_PAGES, &sdio->from);

if (ret < 0 && sdio->blocks_available && dio_op == REQ_OP_WRITE) {
struct page *page = ZERO_PAGE(0);
@@ -181,7 +181,7 @@ static inline int dio_refill_pages(struct dio *dio, struct dio_submit *sdio)
*/
if (dio->page_errors == 0)
dio->page_errors = ret;
- get_page(page);
+ dio_w_pin_user_page(page);
dio->pages[0] = page;
sdio->head = 0;
sdio->tail = 1;
@@ -197,7 +197,7 @@ static inline int dio_refill_pages(struct dio *dio, struct dio_submit *sdio)
sdio->to = ((ret - 1) & (PAGE_SIZE - 1)) + 1;
return 0;
}
- return ret;
+ return ret;
}

/*
@@ -324,7 +324,7 @@ static void dio_aio_complete_work(struct work_struct *work)
static blk_status_t dio_bio_complete(struct dio *dio, struct bio *bio);

/*
- * Asynchronous IO callback.
+ * Asynchronous IO callback.
*/
static void dio_bio_end_aio(struct bio *bio)
{
@@ -449,7 +449,7 @@ static inline void dio_bio_submit(struct dio *dio, struct dio_submit *sdio)
static inline void dio_cleanup(struct dio *dio, struct dio_submit *sdio)
{
while (sdio->head < sdio->tail)
- put_page(dio->pages[sdio->head++]);
+ dio_w_unpin_user_page(dio->pages[sdio->head++]);
}

/*
@@ -716,7 +716,7 @@ static inline int dio_bio_add_page(struct dio_submit *sdio)
*/
if ((sdio->cur_page_len + sdio->cur_page_offset) == PAGE_SIZE)
sdio->pages_in_io--;
- get_page(sdio->cur_page);
+ dio_w_pin_user_page(sdio->cur_page);
sdio->final_block_in_bio = sdio->cur_page_block +
(sdio->cur_page_len >> sdio->blkbits);
ret = 0;
@@ -725,7 +725,7 @@ static inline int dio_bio_add_page(struct dio_submit *sdio)
}
return ret;
}
-
+
/*
* Put cur_page under IO. The section of cur_page which is described by
* cur_page_offset,cur_page_len is put into a BIO. The section of cur_page
@@ -787,7 +787,7 @@ static inline int dio_send_cur_page(struct dio *dio, struct dio_submit *sdio,
* An autonomous function to put a chunk of a page under deferred IO.
*
* The caller doesn't actually know (or care) whether this piece of page is in
- * a BIO, or is under IO or whatever. We just take care of all possible
+ * a BIO, or is under IO or whatever. We just take care of all possible
* situations here. The separation between the logic of do_direct_IO() and
* that of submit_page_section() is important for clarity. Please don't break.
*
@@ -832,13 +832,13 @@ submit_page_section(struct dio *dio, struct dio_submit *sdio, struct page *page,
*/
if (sdio->cur_page) {
ret = dio_send_cur_page(dio, sdio, map_bh);
- put_page(sdio->cur_page);
+ dio_w_unpin_user_page(sdio->cur_page);
sdio->cur_page = NULL;
if (ret)
return ret;
}

- get_page(page); /* It is in dio */
+ dio_w_pin_user_page(page); /* It is in dio */
sdio->cur_page = page;
sdio->cur_page_offset = offset;
sdio->cur_page_len = len;
@@ -853,7 +853,7 @@ submit_page_section(struct dio *dio, struct dio_submit *sdio, struct page *page,
ret = dio_send_cur_page(dio, sdio, map_bh);
if (sdio->bio)
dio_bio_submit(dio, sdio);
- put_page(sdio->cur_page);
+ dio_w_unpin_user_page(sdio->cur_page);
sdio->cur_page = NULL;
}
return ret;
@@ -890,7 +890,7 @@ static inline void dio_zero_block(struct dio *dio, struct dio_submit *sdio,
* We need to zero out part of an fs block. It is either at the
* beginning or the end of the fs block.
*/
- if (end)
+ if (end)
this_chunk_blocks = dio_blocks_per_fs_block - this_chunk_blocks;

this_chunk_bytes = this_chunk_blocks << sdio->blkbits;
@@ -954,7 +954,7 @@ static int do_direct_IO(struct dio *dio, struct dio_submit *sdio,

ret = get_more_blocks(dio, sdio, map_bh);
if (ret) {
- put_page(page);
+ dio_w_unpin_user_page(page);
goto out;
}
if (!buffer_mapped(map_bh))
@@ -999,7 +999,7 @@ static int do_direct_IO(struct dio *dio, struct dio_submit *sdio,

/* AKPM: eargh, -ENOTBLK is a hack */
if (dio_op == REQ_OP_WRITE) {
- put_page(page);
+ dio_w_unpin_user_page(page);
return -ENOTBLK;
}

@@ -1012,7 +1012,7 @@ static int do_direct_IO(struct dio *dio, struct dio_submit *sdio,
if (sdio->block_in_file >=
i_size_aligned >> blkbits) {
/* We hit eof */
- put_page(page);
+ dio_w_unpin_user_page(page);
goto out;
}
zero_user(page, from, 1 << blkbits);
@@ -1052,7 +1052,7 @@ static int do_direct_IO(struct dio *dio, struct dio_submit *sdio,
sdio->next_block_for_io,
map_bh);
if (ret) {
- put_page(page);
+ dio_w_unpin_user_page(page);
goto out;
}
sdio->next_block_for_io += this_chunk_blocks;
@@ -1067,8 +1067,8 @@ static int do_direct_IO(struct dio *dio, struct dio_submit *sdio,
break;
}

- /* Drop the ref which was taken in get_user_pages() */
- put_page(page);
+ /* Drop the ref which was taken in [get|pin]_user_pages() */
+ dio_w_unpin_user_page(page);
}
out:
return ret;
@@ -1288,7 +1288,7 @@ ssize_t __blockdev_direct_IO(struct kiocb *iocb, struct inode *inode,
ret2 = dio_send_cur_page(dio, &sdio, &map_bh);
if (retval == 0)
retval = ret2;
- put_page(sdio.cur_page);
+ dio_w_unpin_user_page(sdio.cur_page);
sdio.cur_page = NULL;
}
if (sdio.bio)
diff --git a/fs/iomap/direct-io.c b/fs/iomap/direct-io.c
index 4eb559a16c9e..fc7763c418d1 100644
--- a/fs/iomap/direct-io.c
+++ b/fs/iomap/direct-io.c
@@ -202,7 +202,7 @@ static void iomap_dio_zero(const struct iomap_iter *iter, struct iomap_dio *dio,
bio->bi_private = dio;
bio->bi_end_io = iomap_dio_bio_end_io;

- get_page(page);
+ dio_w_pin_user_page(page);
__bio_add_page(bio, page, len, 0);
iomap_dio_submit_bio(iter, dio, bio, pos);
}
--
2.37.2