[PATCH v5 07/11] blksnap: minimum data storage unit of the original block device

From: Sergei Shtepa
Date: Mon Jun 12 2023 - 09:30:27 EST


The struct chunk describes the minimum data storage unit of the original
block device. Functions for working with these minimal blocks implement
algorithms for reading and writing blocks.

Co-developed-by: Christoph Hellwig <hch@xxxxxxxxxxxxx>
Signed-off-by: Christoph Hellwig <hch@xxxxxxxxxxxxx>
Signed-off-by: Sergei Shtepa <sergei.shtepa@xxxxxxxxx>
---
drivers/block/blksnap/chunk.c | 454 ++++++++++++++++++++++++++++++++++
drivers/block/blksnap/chunk.h | 114 +++++++++
2 files changed, 568 insertions(+)
create mode 100644 drivers/block/blksnap/chunk.c
create mode 100644 drivers/block/blksnap/chunk.h

diff --git a/drivers/block/blksnap/chunk.c b/drivers/block/blksnap/chunk.c
new file mode 100644
index 000000000000..fe1e9b0e3323
--- /dev/null
+++ b/drivers/block/blksnap/chunk.c
@@ -0,0 +1,454 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (C) 2023 Veeam Software Group GmbH */
+#define pr_fmt(fmt) KBUILD_MODNAME "-chunk: " fmt
+
+#include <linux/blkdev.h>
+#include <linux/slab.h>
+#include "chunk.h"
+#include "diff_buffer.h"
+#include "diff_storage.h"
+#include "params.h"
+
+struct chunk_bio {
+ struct work_struct work;
+ struct list_head chunks;
+ struct bio *orig_bio;
+ struct bvec_iter orig_iter;
+ struct bio bio;
+};
+
+static struct bio_set chunk_io_bioset;
+static struct bio_set chunk_clone_bioset;
+
+static inline sector_t chunk_sector(struct chunk *chunk)
+{
+ return (sector_t)(chunk->number)
+ << (chunk->diff_area->chunk_shift - SECTOR_SHIFT);
+}
+
+void chunk_store_failed(struct chunk *chunk, int error)
+{
+ struct diff_area *diff_area = diff_area_get(chunk->diff_area);
+
+ WARN_ON_ONCE(chunk->state != CHUNK_ST_NEW &&
+ chunk->state != CHUNK_ST_IN_MEMORY);
+ chunk->state = CHUNK_ST_FAILED;
+
+ if (likely(chunk->diff_buffer)) {
+ diff_buffer_release(diff_area, chunk->diff_buffer);
+ chunk->diff_buffer = NULL;
+ }
+ diff_storage_free_region(chunk->diff_region);
+ chunk->diff_region = NULL;
+
+ chunk_up(chunk);
+ if (error)
+ diff_area_set_corrupted(diff_area, error);
+ diff_area_put(diff_area);
+};
+
+static void chunk_schedule_storing(struct chunk *chunk)
+{
+ struct diff_area *diff_area = diff_area_get(chunk->diff_area);
+ int queue_count;
+
+ WARN_ON_ONCE(chunk->state != CHUNK_ST_NEW &&
+ chunk->state != CHUNK_ST_STORED);
+ chunk->state = CHUNK_ST_IN_MEMORY;
+
+ spin_lock(&diff_area->store_queue_lock);
+ list_add_tail(&chunk->link, &diff_area->store_queue);
+ queue_count = atomic_inc_return(&diff_area->store_queue_count);
+ spin_unlock(&diff_area->store_queue_lock);
+
+ chunk_up(chunk);
+
+ /* Initiate the queue clearing process */
+ if (queue_count > get_chunk_maximum_in_queue())
+ queue_work(system_wq, &diff_area->store_queue_work);
+ diff_area_put(diff_area);
+}
+
+void chunk_copy_bio(struct chunk *chunk, struct bio *bio,
+ struct bvec_iter *iter)
+{
+ unsigned int chunk_ofs, chunk_left;
+
+ chunk_ofs = (iter->bi_sector - chunk_sector(chunk)) << SECTOR_SHIFT;
+ chunk_left = chunk->diff_buffer->size - chunk_ofs;
+ while (chunk_left && iter->bi_size) {
+ struct bio_vec bvec = bio_iter_iovec(bio, *iter);
+ unsigned int page_ofs = offset_in_page(chunk_ofs);
+ struct page *page;
+ unsigned int len;
+
+ page = chunk->diff_buffer->pages[chunk_ofs >> PAGE_SHIFT];
+ len = min3(bvec.bv_len,
+ chunk_left,
+ (unsigned int)PAGE_SIZE - page_ofs);
+
+ if (op_is_write(bio_op(bio))) {
+ /* from bio to buffer */
+ memcpy_page(page, page_ofs,
+ bvec.bv_page, bvec.bv_offset,
+ len);
+ } else {
+ /* from buffer to bio */
+ memcpy_page(bvec.bv_page, bvec.bv_offset,
+ page, page_ofs,
+ len);
+ }
+
+ chunk_ofs += len;
+ chunk_left -= len;
+ bio_advance_iter_single(bio, iter, len);
+ }
+}
+
+static void chunk_clone_endio(struct bio *bio)
+{
+ struct bio *orig_bio = bio->bi_private;
+
+ if (unlikely(bio->bi_status != BLK_STS_OK))
+ bio_io_error(orig_bio);
+ else
+ bio_endio(orig_bio);
+}
+
+static inline sector_t chunk_offset(struct chunk *chunk, struct bio *bio)
+{
+ return bio->bi_iter.bi_sector - chunk_sector(chunk);
+}
+
+static inline void chunk_limit_iter(struct chunk *chunk, struct bio *bio,
+ sector_t sector, struct bvec_iter *iter)
+{
+ sector_t chunk_ofs = chunk_offset(chunk, bio);
+
+ iter->bi_sector = sector + chunk_ofs;
+ iter->bi_size = min_t(unsigned int,
+ bio->bi_iter.bi_size,
+ (chunk->sector_count - chunk_ofs) << SECTOR_SHIFT);
+}
+
+static inline unsigned int chunk_limit(struct chunk *chunk, struct bio *bio)
+{
+ unsigned int chunk_ofs, chunk_left;
+
+ chunk_ofs = (unsigned int)chunk_offset(chunk, bio) << SECTOR_SHIFT;
+ chunk_left = chunk->diff_buffer->size - chunk_ofs;
+
+ return min(bio->bi_iter.bi_size, chunk_left);
+}
+
+struct bio *chunk_alloc_clone(struct block_device *bdev, struct bio *bio)
+{
+ return bio_alloc_clone(bdev, bio, GFP_NOIO, &chunk_clone_bioset);
+}
+
+void chunk_clone_bio(struct chunk *chunk, struct bio *bio)
+{
+ struct bio *new_bio;
+ struct block_device *bdev;
+ sector_t sector;
+
+ if (chunk->state == CHUNK_ST_STORED) {
+ bdev = chunk->diff_region->bdev;
+ sector = chunk->diff_region->sector;
+ } else {
+ bdev = chunk->diff_area->orig_bdev;
+ sector = chunk_sector(chunk);
+ }
+
+ new_bio = chunk_alloc_clone(bdev, bio);
+ WARN_ON(!new_bio);
+
+ chunk_limit_iter(chunk, bio, sector, &new_bio->bi_iter);
+ bio_set_flag(new_bio, BIO_FILTERED);
+ new_bio->bi_end_io = chunk_clone_endio;
+ new_bio->bi_private = bio;
+
+ bio_advance(bio, new_bio->bi_iter.bi_size);
+ bio_inc_remaining(bio);
+
+ submit_bio_noacct(new_bio);
+}
+
+static inline struct chunk *get_chunk_from_cbio(struct chunk_bio *cbio)
+{
+ struct chunk *chunk = list_first_entry_or_null(&cbio->chunks,
+ struct chunk, link);
+
+ if (chunk)
+ list_del_init(&chunk->link);
+ return chunk;
+}
+
+static void notify_load_and_schedule_io(struct work_struct *work)
+{
+ struct chunk_bio *cbio = container_of(work, struct chunk_bio, work);
+ struct chunk *chunk;
+
+ while ((chunk = get_chunk_from_cbio(cbio))) {
+ if (unlikely(cbio->bio.bi_status != BLK_STS_OK)) {
+ chunk_store_failed(chunk, -EIO);
+ continue;
+ }
+ if (chunk->state == CHUNK_ST_FAILED) {
+ chunk_up(chunk);
+ continue;
+ }
+
+ chunk_copy_bio(chunk, cbio->orig_bio, &cbio->orig_iter);
+ bio_endio(cbio->orig_bio);
+
+ chunk_schedule_storing(chunk);
+ }
+
+ bio_put(&cbio->bio);
+}
+
+static void notify_load_and_postpone_io(struct work_struct *work)
+{
+ struct chunk_bio *cbio = container_of(work, struct chunk_bio, work);
+ struct chunk *chunk;
+
+ while ((chunk = get_chunk_from_cbio(cbio))) {
+ if (unlikely(cbio->bio.bi_status != BLK_STS_OK)) {
+ chunk_store_failed(chunk, -EIO);
+ continue;
+ }
+ if (chunk->state == CHUNK_ST_FAILED) {
+ chunk_up(chunk);
+ continue;
+ }
+
+ chunk_schedule_storing(chunk);
+ }
+
+ /* submit the original bio fed into the tracker */
+ submit_bio_noacct_nocheck(cbio->orig_bio);
+ bio_put(&cbio->bio);
+}
+
+static void chunk_notify_store(struct work_struct *work)
+{
+ struct chunk_bio *cbio = container_of(work, struct chunk_bio, work);
+ struct chunk *chunk;
+
+ while ((chunk = get_chunk_from_cbio(cbio))) {
+ if (unlikely(cbio->bio.bi_status != BLK_STS_OK)) {
+ chunk_store_failed(chunk, -EIO);
+ continue;
+ }
+
+ WARN_ON_ONCE(chunk->state != CHUNK_ST_IN_MEMORY);
+ chunk->state = CHUNK_ST_STORED;
+
+ if (chunk->diff_buffer) {
+ diff_buffer_release(chunk->diff_area,
+ chunk->diff_buffer);
+ chunk->diff_buffer = NULL;
+ }
+ chunk_up(chunk);
+ }
+
+ bio_put(&cbio->bio);
+}
+
+static void chunk_io_endio(struct bio *bio)
+{
+ struct chunk_bio *cbio = container_of(bio, struct chunk_bio, bio);
+
+ queue_work(system_wq, &cbio->work);
+}
+
+static void chunk_submit_bio(struct bio *bio)
+{
+ bio->bi_end_io = chunk_io_endio;
+ submit_bio_noacct(bio);
+}
+
+static inline unsigned short calc_max_vecs(sector_t left)
+{
+ return bio_max_segs(round_up(left, PAGE_SECTORS) / PAGE_SECTORS);
+}
+
+void chunk_store(struct chunk *chunk)
+{
+ struct block_device *bdev = chunk->diff_region->bdev;
+ sector_t sector = chunk->diff_region->sector;
+ sector_t count = chunk->diff_region->count;
+ unsigned int page_idx = 0;
+ struct bio *bio;
+ struct chunk_bio *cbio;
+
+ bio = bio_alloc_bioset(bdev, calc_max_vecs(count),
+ REQ_OP_WRITE | REQ_SYNC | REQ_FUA, GFP_NOIO,
+ &chunk_io_bioset);
+ bio->bi_iter.bi_sector = sector;
+ bio_set_flag(bio, BIO_FILTERED);
+
+ while (count) {
+ struct bio *next;
+ sector_t portion = min_t(sector_t, count, PAGE_SECTORS);
+ unsigned int bytes = portion << SECTOR_SHIFT;
+
+ if (bio_add_page(bio, chunk->diff_buffer->pages[page_idx],
+ bytes, 0) == bytes) {
+ page_idx++;
+ count -= portion;
+ continue;
+ }
+
+ /* Create next bio */
+ next = bio_alloc_bioset(bdev, calc_max_vecs(count),
+ REQ_OP_WRITE | REQ_SYNC | REQ_FUA,
+ GFP_NOIO, &chunk_io_bioset);
+ next->bi_iter.bi_sector = bio_end_sector(bio);
+ bio_set_flag(next, BIO_FILTERED);
+ bio_chain(bio, next);
+ submit_bio_noacct(bio);
+ bio = next;
+ }
+
+ cbio = container_of(bio, struct chunk_bio, bio);
+
+ INIT_WORK(&cbio->work, chunk_notify_store);
+ INIT_LIST_HEAD(&cbio->chunks);
+ list_add_tail(&chunk->link, &cbio->chunks);
+ cbio->orig_bio = NULL;
+ chunk_submit_bio(bio);
+}
+
+static struct bio *__chunk_load(struct chunk *chunk)
+{
+ struct diff_buffer *diff_buffer;
+ unsigned int page_idx = 0;
+ struct bio *bio;
+ struct block_device *bdev;
+ sector_t sector, count;
+
+ diff_buffer = diff_buffer_take(chunk->diff_area);
+ if (IS_ERR(diff_buffer))
+ return ERR_CAST(diff_buffer);
+ chunk->diff_buffer = diff_buffer;
+
+ if (chunk->state == CHUNK_ST_STORED) {
+ bdev = chunk->diff_region->bdev;
+ sector = chunk->diff_region->sector;
+ count = chunk->diff_region->count;
+ } else {
+ bdev = chunk->diff_area->orig_bdev;
+ sector = chunk_sector(chunk);
+ count = chunk->sector_count;
+ }
+
+ bio = bio_alloc_bioset(bdev, calc_max_vecs(count),
+ REQ_OP_READ, GFP_NOIO, &chunk_io_bioset);
+ bio->bi_iter.bi_sector = sector;
+ bio_set_flag(bio, BIO_FILTERED);
+
+ while (count) {
+ struct bio *next;
+ sector_t portion = min_t(sector_t, count, PAGE_SECTORS);
+ unsigned int bytes = portion << SECTOR_SHIFT;
+
+ if (bio_add_page(bio, chunk->diff_buffer->pages[page_idx],
+ bytes, 0) == bytes) {
+ page_idx++;
+ count -= portion;
+ continue;
+ }
+
+ /* Create next bio */
+ next = bio_alloc_bioset(bdev, calc_max_vecs(count),
+ REQ_OP_READ, GFP_NOIO,
+ &chunk_io_bioset);
+ next->bi_iter.bi_sector = bio_end_sector(bio);
+ bio_set_flag(next, BIO_FILTERED);
+ bio_chain(bio, next);
+ submit_bio_noacct(bio);
+ bio = next;
+ }
+ return bio;
+}
+
+int chunk_load_and_postpone_io(struct chunk *chunk, struct bio **chunk_bio)
+{
+ struct bio *prev = *chunk_bio, *bio;
+
+ bio = __chunk_load(chunk);
+ if (IS_ERR(bio))
+ return PTR_ERR(bio);
+
+ if (prev) {
+ bio_chain(prev, bio);
+ submit_bio_noacct(prev);
+ }
+
+ *chunk_bio = bio;
+ return 0;
+}
+
+void chunk_load_and_postpone_io_finish(struct list_head *chunks,
+ struct bio *chunk_bio, struct bio *orig_bio)
+{
+ struct chunk_bio *cbio;
+
+ cbio = container_of(chunk_bio, struct chunk_bio, bio);
+ INIT_LIST_HEAD(&cbio->chunks);
+ while (!list_empty(chunks)) {
+ struct chunk *it;
+
+ it = list_first_entry(chunks, struct chunk, link);
+ list_del_init(&it->link);
+
+ list_add_tail(&it->link, &cbio->chunks);
+ }
+ INIT_WORK(&cbio->work, notify_load_and_postpone_io);
+ cbio->orig_bio = orig_bio;
+ chunk_submit_bio(chunk_bio);
+}
+
+int chunk_load_and_schedule_io(struct chunk *chunk, struct bio *orig_bio)
+{
+ struct chunk_bio *cbio;
+ struct bio *bio;
+
+ bio = __chunk_load(chunk);
+ if (IS_ERR(bio))
+ return PTR_ERR(bio);
+
+ cbio = container_of(bio, struct chunk_bio, bio);
+ INIT_LIST_HEAD(&cbio->chunks);
+ list_add_tail(&chunk->link, &cbio->chunks);
+ INIT_WORK(&cbio->work, notify_load_and_schedule_io);
+ cbio->orig_bio = orig_bio;
+ cbio->orig_iter = orig_bio->bi_iter;
+ bio_advance_iter_single(orig_bio, &orig_bio->bi_iter,
+ chunk_limit(chunk, orig_bio));
+ bio_inc_remaining(orig_bio);
+
+ chunk_submit_bio(bio);
+ return 0;
+}
+
+int __init chunk_init(void)
+{
+ int ret;
+
+ ret = bioset_init(&chunk_io_bioset, 64,
+ offsetof(struct chunk_bio, bio),
+ BIOSET_NEED_BVECS | BIOSET_NEED_RESCUER);
+ if (!ret)
+ ret = bioset_init(&chunk_clone_bioset, 64, 0,
+ BIOSET_NEED_BVECS | BIOSET_NEED_RESCUER);
+ return ret;
+}
+
+void chunk_done(void)
+{
+ bioset_exit(&chunk_io_bioset);
+ bioset_exit(&chunk_clone_bioset);
+}
diff --git a/drivers/block/blksnap/chunk.h b/drivers/block/blksnap/chunk.h
new file mode 100644
index 000000000000..cd119ac729df
--- /dev/null
+++ b/drivers/block/blksnap/chunk.h
@@ -0,0 +1,114 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (C) 2023 Veeam Software Group GmbH */
+#ifndef __BLKSNAP_CHUNK_H
+#define __BLKSNAP_CHUNK_H
+
+#include <linux/blk_types.h>
+#include <linux/blkdev.h>
+#include <linux/rwsem.h>
+#include <linux/atomic.h>
+#include "diff_area.h"
+
+struct diff_area;
+struct diff_region;
+
+/**
+ * enum chunk_st - Possible states for a chunk.
+ *
+ * @CHUNK_ST_NEW:
+ * No data is associated with the chunk.
+ * @CHUNK_ST_IN_MEMORY:
+ * The data of the chunk is ready to be read from the RAM buffer.
+ * The flag is removed when a chunk is removed from the store queue
+ * and its buffer is released.
+ * @CHUNK_ST_STORED:
+ * The data of the chunk has been written to the difference storage.
+ * @CHUNK_ST_FAILED:
+ * An error occurred while processing the chunk data.
+ *
+ * Chunks life circle:
+ * CHUNK_ST_NEW -> CHUNK_ST_IN_MEMORY <-> CHUNK_ST_STORED
+ */
+
+enum chunk_st {
+ CHUNK_ST_NEW,
+ CHUNK_ST_IN_MEMORY,
+ CHUNK_ST_STORED,
+ CHUNK_ST_FAILED,
+};
+
+/**
+ * struct chunk - Minimum data storage unit.
+ *
+ * @link:
+ * The list header allows to create queue of chunks.
+ * @number:
+ * Sequential number of the chunk.
+ * @sector_count:
+ * Number of sectors in the current chunk. This is especially true
+ * for the last chunk.
+ * @lock:
+ * Binary semaphore. Syncs access to the chunks fields: state,
+ * diff_buffer and diff_region.
+ * @diff_area:
+ * Pointer to the difference area - the difference storage area for a
+ * specific device. This field is only available when the chunk is locked.
+ * Allows to protect the difference area from early release.
+ * @state:
+ * Defines the state of a chunk.
+ * @diff_buffer:
+ * Pointer to &struct diff_buffer. Describes a buffer in the memory
+ * for storing the chunk data.
+ * @diff_region:
+ * Pointer to &struct diff_region. Describes a copy of the chunk data
+ * on the difference storage.
+ *
+ * This structure describes the block of data that the module operates
+ * with when executing the copy-on-write algorithm and when performing I/O
+ * to snapshot images.
+ *
+ * If the data of the chunk has been changed or has just been read, then
+ * the chunk gets into store queue.
+ *
+ * The semaphore is blocked for writing if there is no actual data in the
+ * buffer, since a block of data is being read from the original device or
+ * from a diff storage. If data is being read from or written to the
+ * diff_buffer, the semaphore must be locked.
+ */
+struct chunk {
+ struct list_head link;
+ unsigned long number;
+ sector_t sector_count;
+
+ struct semaphore lock;
+ struct diff_area *diff_area;
+
+ enum chunk_st state;
+ struct diff_buffer *diff_buffer;
+ struct diff_region *diff_region;
+};
+
+static inline void chunk_up(struct chunk *chunk)
+{
+ struct diff_area *diff_area = chunk->diff_area;
+
+ chunk->diff_area = NULL;
+ up(&chunk->lock);
+ diff_area_put(diff_area);
+};
+
+void chunk_store_failed(struct chunk *chunk, int error);
+struct bio *chunk_alloc_clone(struct block_device *bdev, struct bio *bio);
+
+void chunk_copy_bio(struct chunk *chunk, struct bio *bio,
+ struct bvec_iter *iter);
+void chunk_clone_bio(struct chunk *chunk, struct bio *bio);
+void chunk_store(struct chunk *chunk);
+int chunk_load_and_schedule_io(struct chunk *chunk, struct bio *orig_bio);
+int chunk_load_and_postpone_io(struct chunk *chunk, struct bio **chunk_bio);
+void chunk_load_and_postpone_io_finish(struct list_head *chunks,
+ struct bio *chunk_bio, struct bio *orig_bio);
+
+int __init chunk_init(void);
+void chunk_done(void);
+#endif /* __BLKSNAP_CHUNK_H */
--
2.20.1