Re: [PATCH v2] zbud: allow up to PAGE_SIZE allocations

From: Seth Jennings
Date: Tue Sep 22 2015 - 23:23:55 EST


On Tue, Sep 22, 2015 at 02:17:33PM +0200, Vitaly Wool wrote:
> Currently zbud is only capable of allocating not more than
> PAGE_SIZE - ZHDR_SIZE_ALIGNED - CHUNK_SIZE. This is okay as
> long as only zswap is using it, but other users of zbud may
> (and likely will) want to allocate up to PAGE_SIZE. This patch
> addresses that by skipping the creation of zbud internal
> structure in the beginning of an allocated page (such pages are
> then called 'headless').

I guess I'm having trouble with this. If you store a PAGE_SIZE
allocation in zbud, then the zpage can only have one allocation as there
is no room for a buddy. Sooooo... we have an allocator for that: the
page allocator.

zbud doesn't support this by design because, if you are only storing one
allocation per page, you don't gain anything.

This functionality creates many new edge cases for the code.

What is this use case you envision? I think we need to discuss
whether the use case exists and if it justifies the added complexity.

We are crossing a boundary into zsmalloc style complexity with storing
stuff in the struct page, something I really didn't want to do in zbud.

zbud is the simple one, zsmalloc is the complex one. I'd hate to have
two complex ones :-/

Seth

>
> As a zbud page is no longer guaranteed to contain zbud header, the
> following changes had to be applied throughout the code:
> * page->lru to be used for zbud page lists
> * page->private to hold 'under_reclaim' flag
>
> page->private will also be used to indicate if this page contains
> a zbud header in the beginning or not ('headless' flag).
>
> Signed-off-by: Vitaly Wool <vitalywool@xxxxxxxxx>
> ---
> mm/zbud.c | 194 +++++++++++++++++++++++++++++++++++++++++---------------------
> 1 file changed, 128 insertions(+), 66 deletions(-)
>
> diff --git a/mm/zbud.c b/mm/zbud.c
> index fa48bcdf..7b51eb6 100644
> --- a/mm/zbud.c
> +++ b/mm/zbud.c
> @@ -105,18 +105,25 @@ struct zbud_pool {
>
> /*
> * struct zbud_header - zbud page metadata occupying the first chunk of each
> - * zbud page.
> + * zbud page, except for HEADLESS pages
> * @buddy: links the zbud page into the unbuddied/buddied lists in the pool
> - * @lru: links the zbud page into the lru list in the pool
> * @first_chunks: the size of the first buddy in chunks, 0 if free
> * @last_chunks: the size of the last buddy in chunks, 0 if free
> */
> struct zbud_header {
> struct list_head buddy;
> - struct list_head lru;
> unsigned int first_chunks;
> unsigned int last_chunks;
> - bool under_reclaim;
> +};
> +
> +/*
> + * struct zbud_page_priv - zbud flags to be stored in page->private
> + * @under_reclaim: if a zbud page is under reclaim
> + * @headless: indicates a page where zbud header didn't fit
> + */
> +struct zbud_page_priv {
> + bool under_reclaim:1;
> + bool headless:1;
> };
>
> /*****************
> @@ -221,6 +228,7 @@ MODULE_ALIAS("zpool-zbud");
> *****************/
> /* Just to make the code easier to read */
> enum buddy {
> + HEADLESS,
> FIRST,
> LAST
> };
> @@ -237,12 +245,15 @@ static int size_to_chunks(size_t size)
> /* Initializes the zbud header of a newly allocated zbud page */
> static struct zbud_header *init_zbud_page(struct page *page)
> {
> + struct zbud_page_priv *ppriv = (struct zbud_page_priv *)page->private;
> struct zbud_header *zhdr = page_address(page);
> +
> + INIT_LIST_HEAD(&page->lru);
> + ppriv->under_reclaim = 0;
> +
> zhdr->first_chunks = 0;
> zhdr->last_chunks = 0;
> INIT_LIST_HEAD(&zhdr->buddy);
> - INIT_LIST_HEAD(&zhdr->lru);
> - zhdr->under_reclaim = 0;
> return zhdr;
> }
>
> @@ -267,11 +278,22 @@ static unsigned long encode_handle(struct zbud_header *zhdr, enum buddy bud)
> * over the zbud header in the first chunk.
> */
> handle = (unsigned long)zhdr;
> - if (bud == FIRST)
> + switch (bud) {
> + case FIRST:
> /* skip over zbud header */
> handle += ZHDR_SIZE_ALIGNED;
> - else /* bud == LAST */
> + break;
> + case LAST:
> handle += PAGE_SIZE - (zhdr->last_chunks << CHUNK_SHIFT);
> + break;
> + case HEADLESS:
> + break;
> + default:
> + /* this should never happen */
> + pr_err("zbud: invalid buddy value %d\n", bud);
> + handle = 0;
> + break;
> + }
> return handle;
> }
>
> @@ -287,6 +309,7 @@ static int num_free_chunks(struct zbud_header *zhdr)
> /*
> * Rather than branch for different situations, just use the fact that
> * free buddies have a length of zero to simplify everything.
> + * NB: can't be used with HEADLESS pages.
> */
> return NCHUNKS - zhdr->first_chunks - zhdr->last_chunks;
> }
> @@ -353,31 +376,40 @@ void zbud_destroy_pool(struct zbud_pool *pool)
> int zbud_alloc(struct zbud_pool *pool, size_t size, gfp_t gfp,
> unsigned long *handle)
> {
> - int chunks, i, freechunks;
> + int chunks = 0, i, freechunks;
> struct zbud_header *zhdr = NULL;
> + struct zbud_page_priv *ppriv;
> enum buddy bud;
> struct page *page;
>
> if (!size || (gfp & __GFP_HIGHMEM))
> return -EINVAL;
> - if (size > PAGE_SIZE - ZHDR_SIZE_ALIGNED - CHUNK_SIZE)
> +
> + if (size > PAGE_SIZE)
> return -ENOSPC;
> - chunks = size_to_chunks(size);
> - spin_lock(&pool->lock);
>
> - /* First, try to find an unbuddied zbud page. */
> - zhdr = NULL;
> - for_each_unbuddied_list(i, chunks) {
> - if (!list_empty(&pool->unbuddied[i])) {
> - zhdr = list_first_entry(&pool->unbuddied[i],
> - struct zbud_header, buddy);
> - list_del(&zhdr->buddy);
> - if (zhdr->first_chunks == 0)
> - bud = FIRST;
> - else
> - bud = LAST;
> - goto found;
> + if (size > PAGE_SIZE - ZHDR_SIZE_ALIGNED - CHUNK_SIZE)
> + bud = HEADLESS;
> + else {
> + chunks = size_to_chunks(size);
> + spin_lock(&pool->lock);
> +
> + /* First, try to find an unbuddied zbud page. */
> + zhdr = NULL;
> + for_each_unbuddied_list(i, chunks) {
> + if (!list_empty(&pool->unbuddied[i])) {
> + zhdr = list_first_entry(&pool->unbuddied[i],
> + struct zbud_header, buddy);
> + list_del(&zhdr->buddy);
> + page = virt_to_page(zhdr);
> + if (zhdr->first_chunks == 0)
> + bud = FIRST;
> + else
> + bud = LAST;
> + goto found;
> + }
> }
> + bud = FIRST;
> }
>
> /* Couldn't find unbuddied zbud page, create new one */
> @@ -388,27 +420,31 @@ int zbud_alloc(struct zbud_pool *pool, size_t size, gfp_t gfp,
> spin_lock(&pool->lock);
> pool->pages_nr++;
> zhdr = init_zbud_page(page);
> - bud = FIRST;
>
> found:
> - if (bud == FIRST)
> - zhdr->first_chunks = chunks;
> - else
> - zhdr->last_chunks = chunks;
> + ppriv = (struct zbud_page_priv *)page->private;
> + if (bud != HEADLESS) {
> + if (bud == FIRST)
> + zhdr->first_chunks = chunks;
> + else
> + zhdr->last_chunks = chunks;
> +
> + if (zhdr->first_chunks == 0 || zhdr->last_chunks == 0) {
> + /* Add to unbuddied list */
> + freechunks = num_free_chunks(zhdr);
> + list_add(&zhdr->buddy, &pool->unbuddied[freechunks]);
> + } else {
> + /* Add to buddied list */
> + list_add(&zhdr->buddy, &pool->buddied);
> + }
>
> - if (zhdr->first_chunks == 0 || zhdr->last_chunks == 0) {
> - /* Add to unbuddied list */
> - freechunks = num_free_chunks(zhdr);
> - list_add(&zhdr->buddy, &pool->unbuddied[freechunks]);
> - } else {
> - /* Add to buddied list */
> - list_add(&zhdr->buddy, &pool->buddied);
> - }
> + /* Add/move zbud page to beginning of LRU */
> + if (!list_empty(&page->lru))
> + list_del(&page->lru);
> + } else
> + ppriv->headless = true;
>
> - /* Add/move zbud page to beginning of LRU */
> - if (!list_empty(&zhdr->lru))
> - list_del(&zhdr->lru);
> - list_add(&zhdr->lru, &pool->lru);
> + list_add(&page->lru, &pool->lru);
>
> *handle = encode_handle(zhdr, bud);
> spin_unlock(&pool->lock);
> @@ -430,28 +466,41 @@ void zbud_free(struct zbud_pool *pool, unsigned long handle)
> {
> struct zbud_header *zhdr;
> int freechunks;
> + struct page *page;
> + enum buddy bud;
> + struct zbud_page_priv *ppriv;
>
> spin_lock(&pool->lock);
> zhdr = handle_to_zbud_header(handle);
> + page = virt_to_page(zhdr);
> + ppriv = (struct zbud_page_priv *)page->private;
>
> - /* If first buddy, handle will be page aligned */
> - if ((handle - ZHDR_SIZE_ALIGNED) & ~PAGE_MASK)
> + if (!(handle & ~PAGE_MASK)) /* HEADLESS page stored */
> + bud = HEADLESS;
> + else if ((handle - ZHDR_SIZE_ALIGNED) & ~PAGE_MASK) {
> + bud = LAST;
> zhdr->last_chunks = 0;
> - else
> + } else {
> + /* If first buddy, handle will be page aligned */
> + bud = FIRST;
> zhdr->first_chunks = 0;
> + }
>
> - if (zhdr->under_reclaim) {
> + if (ppriv->under_reclaim) {
> /* zbud page is under reclaim, reclaim will free */
> spin_unlock(&pool->lock);
> return;
> }
>
> - /* Remove from existing buddy list */
> - list_del(&zhdr->buddy);
> + if (bud != HEADLESS) {
> + /* Remove from existing buddy list */
> + list_del(&zhdr->buddy);
> + }
>
> - if (zhdr->first_chunks == 0 && zhdr->last_chunks == 0) {
> + if (bud == HEADLESS ||
> + (zhdr->first_chunks == 0 && zhdr->last_chunks == 0)) {
> /* zbud page is empty, free */
> - list_del(&zhdr->lru);
> + list_del(&page->lru);
> free_zbud_page(zhdr);
> pool->pages_nr--;
> } else {
> @@ -505,6 +554,8 @@ int zbud_reclaim_page(struct zbud_pool *pool, unsigned int retries)
> {
> int i, ret, freechunks;
> struct zbud_header *zhdr;
> + struct page *page;
> + struct zbud_page_priv *ppriv;
> unsigned long first_handle = 0, last_handle = 0;
>
> spin_lock(&pool->lock);
> @@ -514,21 +565,31 @@ int zbud_reclaim_page(struct zbud_pool *pool, unsigned int retries)
> return -EINVAL;
> }
> for (i = 0; i < retries; i++) {
> - zhdr = list_tail_entry(&pool->lru, struct zbud_header, lru);
> - list_del(&zhdr->lru);
> - list_del(&zhdr->buddy);
> + page = list_tail_entry(&pool->lru, struct page, lru);
> + ppriv = (struct zbud_page_priv *)page->private;
> + list_del(&page->lru);
> +
> /* Protect zbud page against free */
> - zhdr->under_reclaim = true;
> - /*
> - * We need encode the handles before unlocking, since we can
> - * race with free that will set (first|last)_chunks to 0
> - */
> - first_handle = 0;
> - last_handle = 0;
> - if (zhdr->first_chunks)
> - first_handle = encode_handle(zhdr, FIRST);
> - if (zhdr->last_chunks)
> - last_handle = encode_handle(zhdr, LAST);
> + ppriv->under_reclaim = true;
> + zhdr = page_address(page);
> + if (!ppriv->headless) {
> + list_del(&zhdr->buddy);
> + /*
> + * We need encode the handles before unlocking, since
> + * we can race with free that will set
> + * (first|last)_chunks to 0
> + */
> + first_handle = 0;
> + last_handle = 0;
> + if (zhdr->first_chunks)
> + first_handle = encode_handle(zhdr, FIRST);
> + if (zhdr->last_chunks)
> + last_handle = encode_handle(zhdr, LAST);
> + } else {
> + first_handle = encode_handle(zhdr, HEADLESS);
> + last_handle = 0;
> + }
> +
> spin_unlock(&pool->lock);
>
> /* Issue the eviction callback(s) */
> @@ -544,8 +605,9 @@ int zbud_reclaim_page(struct zbud_pool *pool, unsigned int retries)
> }
> next:
> spin_lock(&pool->lock);
> - zhdr->under_reclaim = false;
> - if (zhdr->first_chunks == 0 && zhdr->last_chunks == 0) {
> + ppriv->under_reclaim = false;
> + if (ppriv->headless ||
> + (zhdr->first_chunks == 0 && zhdr->last_chunks == 0)) {
> /*
> * Both buddies are now free, free the zbud page and
> * return success.
> @@ -565,7 +627,7 @@ next:
> }
>
> /* add to beginning of LRU */
> - list_add(&zhdr->lru, &pool->lru);
> + list_add(&page->lru, &pool->lru);
> }
> spin_unlock(&pool->lock);
> return -EAGAIN;
> --
> 2.4.2
>
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/