[RFC 5/6] zsmalloc: support compaction

From: Minchan Kim
Date: Mon Dec 01 2014 - 21:51:49 EST


This patch enables zsmalloc compaction so that user can use it
via calling zs_compact(pool).

The migration policy is as follows,

1. find migration target objects in ZS_ALMOST_EMPTY
2. find free space in ZS_ALMOST_FULL. With no found, find it in ZS_ALMOST_EMPTY.
3. migrate objects get by 1 to free spaces get by 2
4. repeat [1-3] on each size class

Signed-off-by: Minchan Kim <minchan@xxxxxxxxxx>
---
include/linux/zsmalloc.h | 1 +
mm/zsmalloc.c | 344 ++++++++++++++++++++++++++++++++++++++++++++---
2 files changed, 330 insertions(+), 15 deletions(-)

diff --git a/include/linux/zsmalloc.h b/include/linux/zsmalloc.h
index 05c214760977..04ecd3fc4283 100644
--- a/include/linux/zsmalloc.h
+++ b/include/linux/zsmalloc.h
@@ -47,5 +47,6 @@ void *zs_map_object(struct zs_pool *pool, unsigned long handle,
void zs_unmap_object(struct zs_pool *pool, unsigned long handle);

unsigned long zs_get_total_pages(struct zs_pool *pool);
+unsigned long zs_compact(struct zs_pool *pool);

#endif
diff --git a/mm/zsmalloc.c b/mm/zsmalloc.c
index 16c40081c22e..304595d97610 100644
--- a/mm/zsmalloc.c
+++ b/mm/zsmalloc.c
@@ -227,6 +227,7 @@ struct zs_pool {
struct size_class **size_class;
struct size_class *handle_class;

+ rwlock_t migrate_lock;
gfp_t flags; /* allocation flags used when growing pool */
atomic_long_t pages_allocated;
};
@@ -618,6 +619,24 @@ static unsigned long handle_to_obj(struct zs_pool *pool, unsigned long handle)
return obj;
}

+static unsigned long obj_to_handle(struct zs_pool *pool,
+ struct size_class *class, unsigned long obj)
+{
+ struct page *page;
+ unsigned long obj_idx, off;
+ unsigned long handle;
+ void *addr;
+
+ obj_to_location(obj, &page, &obj_idx);
+ off = obj_idx_to_offset(page, obj_idx, class->size);
+
+ addr = kmap_atomic(page);
+ handle = *(unsigned long *)(addr + off);
+ kunmap_atomic(addr);
+
+ return handle;
+}
+
static unsigned long alloc_handle(struct zs_pool *pool)
{
unsigned long handle;
@@ -1066,6 +1085,8 @@ struct zs_pool *zs_create_pool(gfp_t flags)
if (!pool)
return NULL;

+ rwlock_init(&pool->migrate_lock);
+
if (create_handle_class(pool, ZS_HANDLE_SIZE))
goto err;

@@ -1157,20 +1178,41 @@ void zs_destroy_pool(struct zs_pool *pool)
}
EXPORT_SYMBOL_GPL(zs_destroy_pool);

-static unsigned long __zs_malloc(struct zs_pool *pool,
- struct size_class *class, gfp_t flags, unsigned long handle)
+static unsigned long __obj_malloc(struct page *first_page,
+ struct size_class *class, unsigned long handle)
{
unsigned long obj;
struct link_free *link;
- struct page *first_page, *m_page;
+ struct page *m_page;
unsigned long m_objidx, m_offset;
void *vaddr;

+ obj = (unsigned long)first_page->freelist;
+ obj_to_location(obj, &m_page, &m_objidx);
+ m_offset = obj_idx_to_offset(m_page, m_objidx, class->size);
+
+ vaddr = kmap_atomic(m_page);
+ link = (struct link_free *)vaddr + m_offset / sizeof(*link);
+ first_page->freelist = link->next;
+ link->handle = handle;
+ kunmap_atomic(vaddr);
+
+ first_page->inuse++;
+ return obj;
+}
+
+static unsigned long __zs_malloc(struct zs_pool *pool,
+ struct size_class *class, gfp_t flags, unsigned long handle)
+{
+ struct page *first_page;
+ unsigned long obj;
+
spin_lock(&class->lock);
first_page = find_get_zspage(class);

if (!first_page) {
spin_unlock(&class->lock);
+ read_unlock(&pool->migrate_lock);
first_page = alloc_zspage(class, flags);
if (unlikely(!first_page))
return 0;
@@ -1178,21 +1220,11 @@ static unsigned long __zs_malloc(struct zs_pool *pool,
set_zspage_mapping(first_page, class->index, ZS_EMPTY);
atomic_long_add(class->pages_per_zspage,
&pool->pages_allocated);
+ read_lock(&pool->migrate_lock);
spin_lock(&class->lock);
}

- obj = (unsigned long)first_page->freelist;
- obj_to_location(obj, &m_page, &m_objidx);
- m_offset = obj_idx_to_offset(m_page, m_objidx, class->size);
-
- vaddr = kmap_atomic(m_page);
- link = (struct link_free *)vaddr + m_offset / sizeof(*link);
- first_page->freelist = link->next;
- link->handle = handle;
- kunmap_atomic(vaddr);
-
- first_page->inuse++;
-
+ obj = __obj_malloc(first_page, class, handle);
if (handle) {
unsigned long *h_addr;

@@ -1225,6 +1257,7 @@ unsigned long zs_malloc(struct zs_pool *pool, size_t size)
if (unlikely(!size || (size + ZS_HANDLE_SIZE) > ZS_MAX_ALLOC_SIZE))
return 0;

+ read_lock(&pool->migrate_lock);
/* allocate handle */
handle = alloc_handle(pool);
if (!handle)
@@ -1240,6 +1273,7 @@ unsigned long zs_malloc(struct zs_pool *pool, size_t size)
goto out;
}
out:
+ read_unlock(&pool->migrate_lock);
return handle;
}
EXPORT_SYMBOL_GPL(zs_malloc);
@@ -1299,6 +1333,7 @@ void zs_free(struct zs_pool *pool, unsigned long handle)
if (unlikely(!handle))
return;

+ read_lock(&pool->migrate_lock);
obj = handle_to_obj(pool, handle);
/* free handle */
free_handle(pool, handle);
@@ -1311,6 +1346,7 @@ void zs_free(struct zs_pool *pool, unsigned long handle)
class = pool->size_class[class_idx];

__zs_free(pool, class, obj);
+ read_unlock(&pool->migrate_lock);
}
EXPORT_SYMBOL_GPL(zs_free);

@@ -1343,6 +1379,7 @@ void *zs_map_object(struct zs_pool *pool, unsigned long handle,

BUG_ON(!handle);

+ read_lock(&pool->migrate_lock);
/*
* Because we use per-cpu mapping areas shared among the
* pools/users, we can't allow mapping in interrupt context
@@ -1405,6 +1442,7 @@ void zs_unmap_object(struct zs_pool *pool, unsigned long handle)
__zs_unmap_object(area, pages, off, class->size);
}
put_cpu_var(zs_map_area);
+ read_unlock(&pool->migrate_lock);
}
EXPORT_SYMBOL_GPL(zs_unmap_object);

@@ -1414,6 +1452,282 @@ unsigned long zs_get_total_pages(struct zs_pool *pool)
}
EXPORT_SYMBOL_GPL(zs_get_total_pages);

+static void zs_object_copy(unsigned long src, unsigned long dst,
+ struct size_class *class)
+{
+ struct page *s_page, *d_page;
+ unsigned long s_objidx, d_objidx;
+ unsigned long s_off, d_off;
+ void *s_addr, *d_addr;
+ int s_size, d_size, size;
+ int written = 0;
+
+ s_size = d_size = class->size;
+
+ obj_to_location(src, &s_page, &s_objidx);
+ obj_to_location(dst, &d_page, &d_objidx);
+
+ s_off = obj_idx_to_offset(s_page, s_objidx, class->size);
+ d_off = obj_idx_to_offset(d_page, d_objidx, class->size);
+
+ if (s_off + class->size > PAGE_SIZE)
+ s_size = PAGE_SIZE - s_off;
+
+ if (d_off + class->size > PAGE_SIZE)
+ d_size = PAGE_SIZE - d_off;
+
+ s_addr = kmap_atomic(s_page);
+ d_addr = kmap_atomic(d_page);
+
+ while (1) {
+ size = min(s_size, d_size);
+ memcpy(d_addr + d_off, s_addr + s_off, size);
+ written += size;
+
+ if (written == class->size)
+ break;
+
+ if (s_off + size >= PAGE_SIZE) {
+ kunmap_atomic(s_addr);
+ s_page = get_next_page(s_page);
+ BUG_ON(!s_page);
+ s_addr = kmap_atomic(s_page);
+ s_size = class->size - written;
+ s_off = 0;
+ } else {
+ s_off += size;
+ s_size -= size;
+ }
+
+ if (d_off + size >= PAGE_SIZE) {
+ kunmap_atomic(d_addr);
+ d_page = get_next_page(d_page);
+ BUG_ON(!d_page);
+ d_addr = kmap_atomic(d_page);
+ d_size = class->size - written;
+ d_off = 0;
+ } else {
+ d_off += size;
+ d_size -= size;
+ }
+ }
+
+ kunmap_atomic(s_addr);
+ kunmap_atomic(d_addr);
+}
+
+static unsigned long find_alloced_obj(struct page *page, int index,
+ struct size_class *class)
+{
+ int offset = 0;
+ unsigned long obj = 0;
+ void *addr = kmap_atomic(page);
+
+ if (!is_first_page(page))
+ offset = page->index;
+ offset += class->size * index;
+
+ while (offset < PAGE_SIZE) {
+ if (*(unsigned long *)(addr + offset) & OBJ_ALLOCATED) {
+ obj = (unsigned long)obj_location_to_handle(page,
+ index);
+ break;
+ }
+
+ offset += class->size;
+ index++;
+ }
+
+ kunmap_atomic(addr);
+ return obj;
+}
+
+struct zs_compact_control {
+ struct page *s_page; /* from page for migration */
+ int index; /* start index from @s_page for finding used object */
+ struct page *d_page; /* to page for migration */
+ unsigned long nr_migrated;
+ int nr_to_migrate;
+};
+
+static void migrate_zspage(struct zs_pool *pool, struct zs_compact_control *cc,
+ struct size_class *class)
+{
+ unsigned long used_obj, free_obj;
+ unsigned long handle;
+ struct page *s_page = cc->s_page;
+ unsigned long index = cc->index;
+ struct page *d_page = cc->d_page;
+ unsigned long *h_addr;
+ bool exit = false;
+
+ BUG_ON(!is_first_page(d_page));
+
+ while (1) {
+ used_obj = find_alloced_obj(s_page, index, class);
+ if (!used_obj) {
+ s_page = get_next_page(s_page);
+ if (!s_page)
+ break;
+ index = 0;
+ continue;
+ }
+
+ if (d_page->inuse == d_page->objects)
+ break;
+
+ free_obj = __obj_malloc(d_page, class, 0);
+
+ zs_object_copy(used_obj, free_obj, class);
+
+ obj_to_location(used_obj, &s_page, &index);
+ index++;
+
+ handle = obj_to_handle(pool, class, used_obj);
+ h_addr = handle_to_addr(pool, handle);
+ BUG_ON(*h_addr != used_obj);
+ *h_addr = free_obj;
+ cc->nr_migrated++;
+
+ /* Don't need a class->lock due to migrate_lock */
+ insert_zspage(get_first_page(s_page), class, ZS_ALMOST_EMPTY);
+
+ /*
+ * I don't want __zs_free has return value in case of freeing
+ * zspage for slow path so let's check page->inuse count
+ * right before __zs_free and then exit if it is last object.
+ */
+ if (get_first_page(s_page)->inuse == 1)
+ exit = true;
+
+ __zs_free(pool, class, used_obj);
+ if (exit)
+ break;
+
+ remove_zspage(get_first_page(s_page), class, ZS_ALMOST_EMPTY);
+ }
+
+ cc->s_page = s_page;
+ cc->index = index;
+}
+
+static struct page *alloc_target_page(struct size_class *class)
+{
+ int i;
+ struct page *page;
+
+ spin_lock(&class->lock);
+ for (i = 0; i < _ZS_NR_FULLNESS_GROUPS; i++) {
+ page = class->fullness_list[i];
+ if (page) {
+ remove_zspage(page, class, i);
+ break;
+ }
+ }
+ spin_unlock(&class->lock);
+
+ return page;
+}
+
+static void putback_target_page(struct page *page, struct size_class *class)
+{
+ int class_idx;
+ enum fullness_group currfg;
+
+ BUG_ON(!is_first_page(page));
+
+ spin_lock(&class->lock);
+ get_zspage_mapping(page, &class_idx, &currfg);
+ insert_zspage(page, class, currfg);
+ fix_fullness_group(class, page);
+ spin_unlock(&class->lock);
+}
+
+static struct page *isolate_source_page(struct size_class *class)
+{
+ struct page *page;
+
+ spin_lock(&class->lock);
+ page = class->fullness_list[ZS_ALMOST_EMPTY];
+ if (page)
+ remove_zspage(page, class, ZS_ALMOST_EMPTY);
+ spin_unlock(&class->lock);
+
+ return page;
+}
+
+static void putback_source_page(struct page *page, struct size_class *class)
+{
+ spin_lock(&class->lock);
+ insert_zspage(page, class, ZS_ALMOST_EMPTY);
+ fix_fullness_group(class, page);
+ spin_unlock(&class->lock);
+}
+
+static unsigned long __zs_compact(struct zs_pool *pool,
+ struct size_class *class)
+{
+ unsigned long nr_total_migrated = 0;
+ struct page *src_page, *dst_page;
+
+ write_lock(&pool->migrate_lock);
+ while ((src_page = isolate_source_page(class))) {
+ struct zs_compact_control cc;
+
+ BUG_ON(!is_first_page(src_page));
+
+ cc.index = 0;
+ cc.s_page = src_page;
+ cc.nr_to_migrate = src_page->inuse;
+ cc.nr_migrated = 0;
+
+ BUG_ON(0 >= cc.nr_to_migrate);
+retry:
+ dst_page = alloc_target_page(class);
+ if (!dst_page)
+ break;
+ cc.d_page = dst_page;
+
+ migrate_zspage(pool, &cc, class);
+ putback_target_page(cc.d_page, class);
+
+ if (cc.nr_migrated < cc.nr_to_migrate)
+ goto retry;
+
+ write_unlock(&pool->migrate_lock);
+ write_lock(&pool->migrate_lock);
+ nr_total_migrated += cc.nr_migrated;
+ }
+
+ if (src_page)
+ putback_source_page(src_page, class);
+
+ write_unlock(&pool->migrate_lock);
+
+ return nr_total_migrated;
+}
+
+unsigned long zs_compact(struct zs_pool *pool)
+{
+ int i;
+ unsigned long nr_migrated = 0;
+
+ for (i = 0; i < zs_size_classes; i++) {
+ struct size_class *class = pool->size_class[i];
+
+ if (!class)
+ continue;
+
+ if (class->index != i)
+ continue;
+
+ nr_migrated += __zs_compact(pool, class);
+ }
+
+ return nr_migrated;
+}
+EXPORT_SYMBOL_GPL(zs_compact);
+
module_init(zs_init);
module_exit(zs_exit);

--
2.0.0

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/