[RFC PATCH v2 8/9] iov_iter: Add benchmarking kunit tests

From: David Howells
Date: Wed Sep 20 2023 - 09:06:42 EST


Add kunit tests to benchmark 256MiB copies to a KVEC iterator, a BVEC
iterator, an XARRAY iterator and to a loop that allocates 256-page BVECs
and fills them in (similar to a maximal bio struct being set up).

Signed-off-by: David Howells <dhowells@xxxxxxxxxx>
cc: Christoph Hellwig <hch@xxxxxx>
cc: Christian Brauner <brauner@xxxxxxxxxx>
cc: Jens Axboe <axboe@xxxxxxxxx>
cc: Al Viro <viro@xxxxxxxxxxxxxxxxxx>
cc: David Hildenbrand <david@xxxxxxxxxx>
cc: John Hubbard <jhubbard@xxxxxxxxxx>
cc: Brendan Higgins <brendanhiggins@xxxxxxxxxx>
cc: David Gow <davidgow@xxxxxxxxxx>
cc: linux-kselftest@xxxxxxxxxxxxxxx
cc: kunit-dev@xxxxxxxxxxxxxxxx
cc: linux-fsdevel@xxxxxxxxxxxxxxx
cc: linux-mm@xxxxxxxxx
---
lib/kunit_iov_iter.c | 251 +++++++++++++++++++++++++++++++++++++++++++
1 file changed, 251 insertions(+)

diff --git a/lib/kunit_iov_iter.c b/lib/kunit_iov_iter.c
index 2994c3f348ab..17f85f24b239 100644
--- a/lib/kunit_iov_iter.c
+++ b/lib/kunit_iov_iter.c
@@ -1261,6 +1261,253 @@ static void __init iov_kunit_extract_pages_xarray(struct kunit *test)
KUNIT_SUCCEED();
}

+static void iov_kunit_free_page(void *data)
+{
+ __free_page(data);
+}
+
+#define IOV_KUNIT_NR_SAMPLES 16
+static void __init iov_kunit_benchmark_print_stats(struct kunit *test,
+ unsigned int *samples)
+{
+ unsigned long long sumsq = 0;
+ unsigned long total = 0, mean, stddev;
+ unsigned int n = IOV_KUNIT_NR_SAMPLES;
+ int i;
+
+ //for (i = 0; i < n; i++)
+ // kunit_info(test, "run %x: %u uS\n", i, samples[i]);
+
+ /* Ignore the 0th sample as that may include extra overhead such as
+ * setting up PTEs.
+ */
+ samples++;
+ n--;
+ for (i = 0; i < n; i++)
+ total += samples[i];
+ mean = total / n;
+
+ for (i = 0; i < n; i++) {
+ long s = samples[i] - mean;
+
+ sumsq += s * s;
+ }
+ stddev = int_sqrt64(sumsq);
+
+ kunit_info(test, "avg %lu uS, stddev %lu uS\n", mean, stddev);
+}
+
+/*
+ * Create a source buffer for benchmarking.
+ */
+static void *__init iov_kunit_create_source(struct kunit *test, size_t npages)
+{
+ struct page *page, **pages;
+ void *scratch;
+ size_t i;
+
+ /* Allocate a page and tile it repeatedly in the buffer. */
+ page = alloc_page(GFP_KERNEL);
+ KUNIT_ASSERT_NOT_NULL(test, page);
+ kunit_add_action_or_reset(test, iov_kunit_free_page, page);
+
+ pages = kunit_kmalloc_array(test, npages, sizeof(pages[0]), GFP_KERNEL);
+ KUNIT_ASSERT_NOT_NULL(test, pages);
+ for (i = 0; i < npages; i++) {
+ pages[i] = page;
+ get_page(page);
+ }
+
+ scratch = vmap(pages, npages, VM_MAP | VM_MAP_PUT_PAGES, PAGE_KERNEL);
+ KUNIT_ASSERT_NOT_NULL(test, scratch);
+ kunit_add_action_or_reset(test, iov_kunit_unmap, scratch);
+ return scratch;
+}
+
+/*
+ * Time copying 256MiB through an ITER_KVEC.
+ */
+static void __init iov_kunit_benchmark_kvec(struct kunit *test)
+{
+ struct iov_iter iter;
+ struct kvec kvec[8];
+ unsigned int samples[IOV_KUNIT_NR_SAMPLES];
+ ktime_t a, b;
+ ssize_t copied;
+ size_t size = 256 * 1024 * 1024, npages = size / PAGE_SIZE, part;
+ void *scratch, *buffer;
+ int i;
+
+ /* Allocate a huge buffer and populate it with pages. */
+ buffer = iov_kunit_create_source(test, npages);
+
+ /* Create a single large buffer to copy to/from. */
+ scratch = iov_kunit_create_source(test, npages);
+
+ /* Split the target over a number of kvecs */
+ copied = 0;
+ for (i = 0; i < ARRAY_SIZE(kvec); i++) {
+ part = size / ARRAY_SIZE(kvec);
+ kvec[i].iov_base = buffer + copied;
+ kvec[i].iov_len = part;
+ copied += part;
+ }
+ kvec[i - 1].iov_len += size - part;
+
+ /* Perform and time a bunch of copies. */
+ kunit_info(test, "Benchmarking copy_to_iter() over KVEC:\n");
+ for (i = 0; i < IOV_KUNIT_NR_SAMPLES; i++) {
+ iov_iter_kvec(&iter, ITER_SOURCE, kvec, ARRAY_SIZE(kvec), size);
+
+ a = ktime_get_real();
+ copied = copy_from_iter(scratch, size, &iter);
+ b = ktime_get_real();
+ KUNIT_EXPECT_EQ(test, copied, size);
+ samples[i] = ktime_to_us(ktime_sub(b, a));
+ }
+
+ iov_kunit_benchmark_print_stats(test, samples);
+ KUNIT_SUCCEED();
+}
+
+/*
+ * Time copying 256MiB through an ITER_BVEC.
+ */
+static void __init iov_kunit_benchmark_bvec(struct kunit *test)
+{
+ struct iov_iter iter;
+ struct bio_vec *bvec;
+ struct page *page;
+ unsigned int samples[IOV_KUNIT_NR_SAMPLES];
+ ktime_t a, b;
+ ssize_t copied;
+ size_t size = 256 * 1024 * 1024, npages = size / PAGE_SIZE;
+ void *scratch;
+ int i;
+
+ /* Allocate a page and tile it repeatedly in the buffer. */
+ page = alloc_page(GFP_KERNEL);
+ KUNIT_ASSERT_NOT_NULL(test, page);
+ kunit_add_action_or_reset(test, iov_kunit_free_page, page);
+
+ bvec = kunit_kmalloc_array(test, npages, sizeof(bvec[0]), GFP_KERNEL);
+ KUNIT_ASSERT_NOT_NULL(test, bvec);
+ for (i = 0; i < npages; i++)
+ bvec_set_page(&bvec[i], page, PAGE_SIZE, 0);
+
+ /* Create a single large buffer to copy to/from. */
+ scratch = iov_kunit_create_source(test, npages);
+
+ /* Perform and time a bunch of copies. */
+ kunit_info(test, "Benchmarking copy_to_iter() over BVEC:\n");
+ for (i = 0; i < IOV_KUNIT_NR_SAMPLES; i++) {
+ iov_iter_bvec(&iter, ITER_SOURCE, bvec, npages, size);
+ a = ktime_get_real();
+ copied = copy_from_iter(scratch, size, &iter);
+ b = ktime_get_real();
+ KUNIT_EXPECT_EQ(test, copied, size);
+ samples[i] = ktime_to_us(ktime_sub(b, a));
+ }
+
+ iov_kunit_benchmark_print_stats(test, samples);
+ KUNIT_SUCCEED();
+}
+
+/*
+ * Time copying 256MiB through an ITER_BVEC in 256 page chunks.
+ */
+static void __init iov_kunit_benchmark_bvec_split(struct kunit *test)
+{
+ struct iov_iter iter;
+ struct bio_vec *bvec;
+ struct page *page;
+ unsigned int samples[IOV_KUNIT_NR_SAMPLES];
+ ktime_t a, b;
+ ssize_t copied;
+ size_t size, npages = 64;
+ void *scratch;
+ int i, j;
+
+ /* Allocate a page and tile it repeatedly in the buffer. */
+ page = alloc_page(GFP_KERNEL);
+ KUNIT_ASSERT_NOT_NULL(test, page);
+ kunit_add_action_or_reset(test, iov_kunit_free_page, page);
+
+ /* Create a single large buffer to copy to/from. */
+ scratch = iov_kunit_create_source(test, npages);
+
+ /* Perform and time a bunch of copies. */
+ kunit_info(test, "Benchmarking copy_to_iter() over BVEC:\n");
+ for (i = 0; i < IOV_KUNIT_NR_SAMPLES; i++) {
+ size = 256 * 1024 * 1024;
+ a = ktime_get_real();
+ do {
+ size_t part = min_t(size_t, size, npages * PAGE_SIZE);
+
+ bvec = kunit_kmalloc_array(test, npages, sizeof(bvec[0]), GFP_KERNEL);
+ KUNIT_ASSERT_NOT_NULL(test, bvec);
+ for (j = 0; j < npages; j++)
+ bvec_set_page(&bvec[j], page, PAGE_SIZE, 0);
+
+ iov_iter_bvec(&iter, ITER_SOURCE, bvec, npages, part);
+ copied = copy_from_iter(scratch, part, &iter);
+ KUNIT_EXPECT_EQ(test, copied, part);
+ size -= part;
+ } while (size > 0);
+ b = ktime_get_real();
+ samples[i] = ktime_to_us(ktime_sub(b, a));
+ }
+
+ iov_kunit_benchmark_print_stats(test, samples);
+ KUNIT_SUCCEED();
+}
+
+/*
+ * Time copying 256MiB through an ITER_XARRAY.
+ */
+static void __init iov_kunit_benchmark_xarray(struct kunit *test)
+{
+ struct iov_iter iter;
+ struct xarray *xarray;
+ struct page *page;
+ unsigned int samples[IOV_KUNIT_NR_SAMPLES];
+ ktime_t a, b;
+ ssize_t copied;
+ size_t size = 256 * 1024 * 1024, npages = size / PAGE_SIZE;
+ void *scratch;
+ int i;
+
+ /* Allocate a page and tile it repeatedly in the buffer. */
+ page = alloc_page(GFP_KERNEL);
+ KUNIT_ASSERT_NOT_NULL(test, page);
+ kunit_add_action_or_reset(test, iov_kunit_free_page, page);
+
+ xarray = iov_kunit_create_xarray(test);
+
+ for (i = 0; i < npages; i++) {
+ void *x = xa_store(xarray, i, page, GFP_KERNEL);
+
+ KUNIT_ASSERT_FALSE(test, xa_is_err(x));
+ }
+
+ /* Create a single large buffer to copy to/from. */
+ scratch = iov_kunit_create_source(test, npages);
+
+ /* Perform and time a bunch of copies. */
+ kunit_info(test, "Benchmarking copy_to_iter() over XARRAY:\n");
+ for (i = 0; i < IOV_KUNIT_NR_SAMPLES; i++) {
+ iov_iter_xarray(&iter, ITER_SOURCE, xarray, 0, size);
+ a = ktime_get_real();
+ copied = copy_from_iter(scratch, size, &iter);
+ b = ktime_get_real();
+ KUNIT_EXPECT_EQ(test, copied, size);
+ samples[i] = ktime_to_us(ktime_sub(b, a));
+ }
+
+ iov_kunit_benchmark_print_stats(test, samples);
+ KUNIT_SUCCEED();
+}
+
static struct kunit_case __refdata iov_kunit_cases[] = {
KUNIT_CASE(iov_kunit_copy_to_ubuf),
KUNIT_CASE(iov_kunit_copy_from_ubuf),
@@ -1277,6 +1524,10 @@ static struct kunit_case __refdata iov_kunit_cases[] = {
KUNIT_CASE(iov_kunit_extract_pages_kvec),
KUNIT_CASE(iov_kunit_extract_pages_bvec),
KUNIT_CASE(iov_kunit_extract_pages_xarray),
+ KUNIT_CASE(iov_kunit_benchmark_kvec),
+ KUNIT_CASE(iov_kunit_benchmark_bvec),
+ KUNIT_CASE(iov_kunit_benchmark_bvec_split),
+ KUNIT_CASE(iov_kunit_benchmark_xarray),
{}
};