Re: [PATCH] erofs: support direct IO for uncompressed file

From: Gao Xiang
Date: Tue Dec 22 2020 - 08:23:10 EST


Hi Jianan,

On Mon, Dec 14, 2020 at 10:04:27PM +0800, Huang Jianan wrote:
> direct IO is useful in certain scenarios for uncompressed files.
> For example, it can avoid double pagecache when use the uncompressed
> file to mount upper layer filesystem.
>
> In addition, another patch adds direct IO test for the stress tool
> which was mentioned here:
> https://lore.kernel.org/linux-erofs/20200206135631.1491-1-hsiangkao@xxxxxxx/
>
> Signed-off-by: Huang Jianan <huangjianan@xxxxxxxx>
> Signed-off-by: Guo Weichao <guoweichao@xxxxxxxx>
> ---
> fs/erofs/data.c | 57 +++++++++++++++++++++++++++++++++++++++++++++++++
> 1 file changed, 57 insertions(+)
>
> diff --git a/fs/erofs/data.c b/fs/erofs/data.c
> index ea4f693bee22..3067aa3defff 100644
> --- a/fs/erofs/data.c
> +++ b/fs/erofs/data.c
> @@ -6,6 +6,8 @@
> */
> #include "internal.h"
> #include <linux/prefetch.h>
> +#include <linux/uio.h>
> +#include <linux/blkdev.h>
>
> #include <trace/events/erofs.h>
>
> @@ -312,6 +314,60 @@ static void erofs_raw_access_readahead(struct readahead_control *rac)
> submit_bio(bio);
> }
>
> +static int erofs_get_block(struct inode *inode, sector_t iblock,
> + struct buffer_head *bh, int create)
> +{
> + struct erofs_map_blocks map = {
> + .m_la = blknr_to_addr(iblock),
> + };
> + int err;
> +
> + err = erofs_map_blocks(inode, &map, EROFS_GET_BLOCKS_RAW);
> + if (err)
> + return err;
> +
> + if (map.m_flags & EROFS_MAP_MAPPED)
> + map_bh(bh, inode->i_sb, erofs_blknr(map.m_pa));
> +
> + return err;
> +}
> +
> +static int check_direct_IO(struct inode *inode, struct iov_iter *iter,
> + loff_t offset)
> +{
> + unsigned i_blkbits = READ_ONCE(inode->i_blkbits);

It would be better to fold in check_direct_IO, also the READ_ONCE above
is somewhat weird...

No rush here, since 5.11-rc1 haven't be out yet, we have >= 2 months to
work it out.

Thanks,
Gao Xiang

> + unsigned blkbits = i_blkbits;
> + unsigned blocksize_mask = (1 << blkbits) - 1;
> + unsigned long align = offset | iov_iter_alignment(iter);
> + struct block_device *bdev = inode->i_sb->s_bdev;
> +
> + if (align & blocksize_mask) {
> + if (bdev)
> + blkbits = blksize_bits(bdev_logical_block_size(bdev));
> + blocksize_mask = (1 << blkbits) - 1;
> + if (align & blocksize_mask)
> + return -EINVAL;
> + return 1;
> + }
> + return 0;
> +}
> +
> +static ssize_t erofs_direct_IO(struct kiocb *iocb, struct iov_iter *iter)
> +{
> + struct address_space *mapping = iocb->ki_filp->f_mapping;
> + struct inode *inode = mapping->host;
> + loff_t offset = iocb->ki_pos;
> + int err;
> +
> + err = check_direct_IO(inode, iter, offset);
> + if (err)
> + return err < 0 ? err : 0;
> +
> + return __blockdev_direct_IO(iocb, inode, inode->i_sb->s_bdev, iter,
> + erofs_get_block, NULL, NULL,
> + DIO_LOCKING | DIO_SKIP_HOLES);
> +}
> +
> static sector_t erofs_bmap(struct address_space *mapping, sector_t block)
> {
> struct inode *inode = mapping->host;
> @@ -336,6 +392,7 @@ static sector_t erofs_bmap(struct address_space *mapping, sector_t block)
> const struct address_space_operations erofs_raw_access_aops = {
> .readpage = erofs_raw_access_readpage,
> .readahead = erofs_raw_access_readahead,
> + .direct_IO = erofs_direct_IO,
> .bmap = erofs_bmap,
> };
>
> --
> 2.25.1
>