Re: [RFC][PATCH] module: Optimize __module_address() using a latched RB-tree

From: Peter Zijlstra
Date: Thu Feb 26 2015 - 09:13:13 EST


On Thu, Feb 26, 2015 at 12:43:09PM +0100, Peter Zijlstra wrote:

> +static struct module *__tree_find(struct rb_root *r, unsigned long addr)
> +{
> + struct rb_node *n = r->rb_node;
> +
> + while (n) {
> + struct module *m = mod_entry(n);
> + int idx = mod_node_idx(m, n);
> +
> + if (idx & 2) { /* core */
> + if (addr < (unsigned long)m->module_core)
> + n = n->rb_left;
> + else if (addr >= (unsigned long)m->module_core + m->core_size)
> + n = n->rb_right;
> + else
> + return m;
> + } else { /* init */
> + if (addr < (unsigned long)m->module_init)
> + n = n->rb_left;
> + else if (addr >= (unsigned long)m->module_init + m->init_size)
> + n = n->rb_right;
> + else
> + return m;
> + }
> + }
> +
> + return NULL;
> +}

Assuming struct module is cacheline aligned, Rusty? from what I can find
its squirreled away in some data structure:

mod = (void *)info->sechdrs[info->index.mod].sh_addr

And I can't seem to quickly find its alignment. If its not cacheline
aligned, can we make it so?

The below patch shuffles things around a bit and relies on the above
assumption.

#pahole -EC module ivb-ep-build/kernel/module.o

struct module {
...

/* --- cacheline 6 boundary (384 bytes) --- */
void * module_init; /* 384 8 */
void * module_core; /* 392 8 */
unsigned int init_size; /* 400 4 */
unsigned int core_size; /* 404 4 */
unsigned int init_text_size; /* 408 4 */
unsigned int core_text_size; /* 412 4 */
struct module_node {
struct module * mod; /* 416 8 */
struct rb_node {
long unsigned int __rb_parent_color; /* 424 8 */
struct rb_node * rb_right; /* 432 8 */
struct rb_node * rb_left; /* 440 8 */
} node; /* 424 24 */
} tree_node[4]; /* 416 128 */

...
};

Which gets module_{init,core} and {init,core}_size on the same cacheline
as tree_node[0].

And given that module loading/unloading is rare, tree modifications will
be rare, and we'll normally always iterate tree latch0. Furthermore,
execution of init code is brief and by switching core to elements 0,2
we've insured to typically only hit tree_node[0].

Therefore the loads in __tree_find() will (typically) hit the same
cacheline and life is good.

---

Index: linux-2.6/include/linux/module.h
===================================================================
--- linux-2.6.orig/include/linux/module.h 2015-02-26 14:59:06.549177534 +0100
+++ linux-2.6/include/linux/module.h 2015-02-26 14:53:27.708162155 +0100
@@ -222,8 +222,6 @@
/* Member of list of modules */
struct list_head list;

- struct module_node tree_node[4];
-
/* Unique handle for this module */
char name[MODULE_NAME_LEN];

@@ -278,7 +276,7 @@
int (*init)(void);

/* If this is non-NULL, vfree after init() returns */
- void *module_init;
+ void *module_init ____cacheline_aligned;

/* Here is the actual code + data, vfree'd on unload. */
void *module_core;
@@ -289,6 +287,8 @@
/* The size of the executable code in each section. */
unsigned int init_text_size, core_text_size;

+ struct module_node tree_node[4];
+
/* Size of RO sections of the module (text+rodata) */
unsigned int init_ro_size, core_ro_size;

Index: linux-2.6/kernel/module.c
===================================================================
--- linux-2.6.orig/kernel/module.c 2015-02-26 14:59:06.551177534 +0100
+++ linux-2.6/kernel/module.c 2015-02-26 14:52:04.094158360 +0100
@@ -125,10 +125,10 @@
* module, but other than that its pretty straight forward.
*
* idx
- * 0 - latch 0, init
- * 1 - latch 1, init
- * 2 - latch 0, core
- * 3 - latch 1, core
+ * 0 - latch 0, core
+ * 1 - latch 1, core
+ * 2 - latch 0, init
+ * 3 - latch 1, init
*/

struct latch_tree_root {
@@ -138,10 +138,10 @@

static unsigned long mod_value(struct module *mod, int idx)
{
- if (idx & 2) { /* core */
- return (unsigned long)mod->module_core;
- } else { /* init */
+ if (idx & 2) { /* init */
return (unsigned long)mod->module_init;
+ } else { /* core */
+ return (unsigned long)mod->module_core;
}
}

@@ -207,17 +207,17 @@
struct module *m = mod_entry(n);
int idx = mod_node_idx(m, n);

- if (idx & 2) { /* core */
- if (addr < (unsigned long)m->module_core)
+ if (idx & 2) { /* init */
+ if (addr < (unsigned long)m->module_init)
n = n->rb_left;
- else if (addr >= (unsigned long)m->module_core + m->core_size)
+ else if (addr >= (unsigned long)m->module_init + m->init_size)
n = n->rb_right;
else
return m;
- } else { /* init */
- if (addr < (unsigned long)m->module_init)
+ } else { /* core */
+ if (addr < (unsigned long)m->module_core)
n = n->rb_left;
- else if (addr >= (unsigned long)m->module_init + m->init_size)
+ else if (addr >= (unsigned long)m->module_core + m->core_size)
n = n->rb_right;
else
return m;
@@ -232,35 +232,35 @@
static void mod_tree_insert(struct module *mod)
{
raw_write_seqcount_latch(&mod_tree.seq);
+ __tree_insert(&mod_tree, mod, 0); /* core */
if (mod->init_size)
- __tree_insert(&mod_tree, mod, 0); /* init */
- __tree_insert(&mod_tree, mod, 2); /* core */
+ __tree_insert(&mod_tree, mod, 2); /* init */
raw_write_seqcount_latch(&mod_tree.seq);
+ __tree_insert(&mod_tree, mod, 1); /* core */
if (mod->init_size)
- __tree_insert(&mod_tree, mod, 1); /* init */
- __tree_insert(&mod_tree, mod, 3); /* core */
+ __tree_insert(&mod_tree, mod, 3); /* init */
}

static void mod_tree_remove_init(struct module *mod)
{
raw_write_seqcount_latch(&mod_tree.seq);
if (mod->init_size)
- __tree_remove(&mod_tree, mod, 0); /* init */
+ __tree_remove(&mod_tree, mod, 2); /* init */
raw_write_seqcount_latch(&mod_tree.seq);
if (mod->init_size)
- __tree_remove(&mod_tree, mod, 1); /* init */
+ __tree_remove(&mod_tree, mod, 3); /* init */
}

static void mod_tree_remove(struct module *mod)
{
raw_write_seqcount_latch(&mod_tree.seq);
+ __tree_remove(&mod_tree, mod, 0); /* core */
if (mod->init_size)
- __tree_remove(&mod_tree, mod, 0); /* init */
- __tree_remove(&mod_tree, mod, 2); /* core */
+ __tree_remove(&mod_tree, mod, 2); /* init */
raw_write_seqcount_latch(&mod_tree.seq);
+ __tree_remove(&mod_tree, mod, 1); /* core */
if (mod->init_size)
- __tree_remove(&mod_tree, mod, 1); /* init */
- __tree_remove(&mod_tree, mod, 3); /* core */
+ __tree_remove(&mod_tree, mod, 3); /* init */
}

static struct module *mod_tree_find(unsigned long addr)
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/