Re: [RFC][PATCH] module: Optimize __module_address() using a latched RB-tree

From: Peter Zijlstra
Date: Thu Feb 26 2015 - 17:32:35 EST


On Thu, Feb 26, 2015 at 08:45:16PM +0100, Peter Zijlstra wrote:
> On Thu, Feb 26, 2015 at 11:41:44AM -0800, Paul E. McKenney wrote:
> > Or just have the *_ONCE() calls in the single version. I bet that there
> > is no visible performance loss.
>
> I'll go play with that tomorrow, see what GCC makes of it.

Who needs sleep anyhow.. ;-)

21 extra bytes, I've not quite yet figured out what for.

The below changes the insert/erase functions to use WRITE_ONCE() for the
rb_{left,right} assignments and removes one program order loop.

I did make squiggles for all scenarios to see if any of the intermediate
states had loops in, I only found the one case (erase, case 3).

I also added an rb_link_node_rcu() which does the store_release thing --
I didn't want to include rcupdate.h for fear of header insanity.

We need the store_release because of the node setup in there. So I can
forget about my mod_tree_init().

---
include/linux/rbtree.h | 10 ++++++
include/linux/rbtree_augmented.h | 21 ++++++++----
lib/rbtree.c | 71 +++++++++++++++++++++++++++-------------
3 files changed, 73 insertions(+), 29 deletions(-)

diff --git a/include/linux/rbtree.h b/include/linux/rbtree.h
index fb31765e935a..ae9df86bf9ba 100644
--- a/include/linux/rbtree.h
+++ b/include/linux/rbtree.h
@@ -31,6 +31,7 @@

#include <linux/kernel.h>
#include <linux/stddef.h>
+#include <asm/barrier.h>

struct rb_node {
unsigned long __rb_parent_color;
@@ -85,6 +86,15 @@ static inline void rb_link_node(struct rb_node * node, struct rb_node * parent,
*rb_link = node;
}

+static inline void rb_link_node_rcu(struct rb_node * node, struct rb_node * parent,
+ struct rb_node ** rb_link)
+{
+ node->__rb_parent_color = (unsigned long)parent;
+ node->rb_left = node->rb_right = NULL;
+
+ smp_store_release(rb_link, node);
+}
+
#define rb_entry_safe(ptr, type, member) \
({ typeof(ptr) ____ptr = (ptr); \
____ptr ? rb_entry(____ptr, type, member) : NULL; \
diff --git a/include/linux/rbtree_augmented.h b/include/linux/rbtree_augmented.h
index 378c5ee75f78..14d7b831b63a 100644
--- a/include/linux/rbtree_augmented.h
+++ b/include/linux/rbtree_augmented.h
@@ -123,11 +123,11 @@ __rb_change_child(struct rb_node *old, struct rb_node *new,
{
if (parent) {
if (parent->rb_left == old)
- parent->rb_left = new;
+ WRITE_ONCE(parent->rb_left, new);
else
- parent->rb_right = new;
+ WRITE_ONCE(parent->rb_right, new);
} else
- root->rb_node = new;
+ WRITE_ONCE(root->rb_node, new);
}

extern void __rb_erase_color(struct rb_node *parent, struct rb_root *root,
@@ -137,7 +137,8 @@ static __always_inline struct rb_node *
__rb_erase_augmented(struct rb_node *node, struct rb_root *root,
const struct rb_augment_callbacks *augment)
{
- struct rb_node *child = node->rb_right, *tmp = node->rb_left;
+ struct rb_node *child = node->rb_right;
+ struct rb_node *tmp = node->rb_left;
struct rb_node *parent, *rebalance;
unsigned long pc;

@@ -167,6 +168,7 @@ __rb_erase_augmented(struct rb_node *node, struct rb_root *root,
tmp = parent;
} else {
struct rb_node *successor = child, *child2;
+
tmp = child->rb_left;
if (!tmp) {
/*
@@ -180,6 +182,7 @@ __rb_erase_augmented(struct rb_node *node, struct rb_root *root,
*/
parent = successor;
child2 = successor->rb_right;
+
augment->copy(node, successor);
} else {
/*
@@ -201,19 +204,23 @@ __rb_erase_augmented(struct rb_node *node, struct rb_root *root,
successor = tmp;
tmp = tmp->rb_left;
} while (tmp);
- parent->rb_left = child2 = successor->rb_right;
- successor->rb_right = child;
+ child2 = successor->rb_right;
+ WRITE_ONCE(parent->rb_left, child2);
+ WRITE_ONCE(successor->rb_right, child);
rb_set_parent(child, successor);
+
augment->copy(node, successor);
augment->propagate(parent, successor);
}

- successor->rb_left = tmp = node->rb_left;
+ tmp = node->rb_left;
+ WRITE_ONCE(successor->rb_left, tmp);
rb_set_parent(tmp, successor);

pc = node->__rb_parent_color;
tmp = __rb_parent(pc);
__rb_change_child(node, successor, tmp, root);
+
if (child2) {
successor->__rb_parent_color = pc;
rb_set_parent_color(child2, parent, RB_BLACK);
diff --git a/lib/rbtree.c b/lib/rbtree.c
index c16c81a3d430..0b3ec6a75b76 100644
--- a/lib/rbtree.c
+++ b/lib/rbtree.c
@@ -44,6 +44,25 @@
* parentheses and have some accompanying text comment.
*/

+/*
+ * All stores to the tree structure (rb_left and rb_right) must be done using
+ * WRITE_ONCE(). And we must not inadvertently cause (temporary) loops in the
+ * tree structure as seen in program order.
+ *
+ * These two requirements will allow lockless iteration of the tree -- not
+ * correct iteration mind you, tree rotations are not atomic so a lookup might
+ * miss entire subtrees.
+ *
+ * But they do guarantee that any such traversal will only see valid elements
+ * and that it will indeed complete -- does not get stuck in a loop.
+ *
+ * NOTE:
+ *
+ * Stores to __rb_parent_color are not important for simple lookups so those
+ * are left undone as of now. Nor did I check for loops involving parent
+ * pointers.
+ */
+
static inline void rb_set_black(struct rb_node *rb)
{
rb->__rb_parent_color |= RB_BLACK;
@@ -129,8 +148,9 @@ __rb_insert(struct rb_node *node, struct rb_root *root,
* This still leaves us in violation of 4), the
* continuation into Case 3 will fix that.
*/
- parent->rb_right = tmp = node->rb_left;
- node->rb_left = parent;
+ tmp = node->rb_left;
+ WRITE_ONCE(parent->rb_right, tmp);
+ WRITE_ONCE(node->rb_left, parent);
if (tmp)
rb_set_parent_color(tmp, parent,
RB_BLACK);
@@ -149,8 +169,8 @@ __rb_insert(struct rb_node *node, struct rb_root *root,
* / \
* n U
*/
- gparent->rb_left = tmp; /* == parent->rb_right */
- parent->rb_right = gparent;
+ WRITE_ONCE(gparent->rb_left, tmp); /* == parent->rb_right */
+ WRITE_ONCE(parent->rb_right, gparent);
if (tmp)
rb_set_parent_color(tmp, gparent, RB_BLACK);
__rb_rotate_set_parents(gparent, parent, root, RB_RED);
@@ -171,8 +191,9 @@ __rb_insert(struct rb_node *node, struct rb_root *root,
tmp = parent->rb_left;
if (node == tmp) {
/* Case 2 - right rotate at parent */
- parent->rb_left = tmp = node->rb_right;
- node->rb_right = parent;
+ tmp = node->rb_right;
+ WRITE_ONCE(parent->rb_left, tmp);
+ WRITE_ONCE(node->rb_right, parent);
if (tmp)
rb_set_parent_color(tmp, parent,
RB_BLACK);
@@ -183,8 +204,8 @@ __rb_insert(struct rb_node *node, struct rb_root *root,
}

/* Case 3 - left rotate at gparent */
- gparent->rb_right = tmp; /* == parent->rb_left */
- parent->rb_left = gparent;
+ WRITE_ONCE(gparent->rb_right, tmp); /* == parent->rb_left */
+ WRITE_ONCE(parent->rb_left, gparent);
if (tmp)
rb_set_parent_color(tmp, gparent, RB_BLACK);
__rb_rotate_set_parents(gparent, parent, root, RB_RED);
@@ -224,8 +245,9 @@ ____rb_erase_color(struct rb_node *parent, struct rb_root *root,
* / \ / \
* Sl Sr N Sl
*/
- parent->rb_right = tmp1 = sibling->rb_left;
- sibling->rb_left = parent;
+ tmp1 = sibling->rb_left;
+ WRITE_ONCE(parent->rb_right, tmp1);
+ WRITE_ONCE(sibling->rb_left, parent);
rb_set_parent_color(tmp1, parent, RB_BLACK);
__rb_rotate_set_parents(parent, sibling, root,
RB_RED);
@@ -275,9 +297,10 @@ ____rb_erase_color(struct rb_node *parent, struct rb_root *root,
* \
* Sr
*/
- sibling->rb_left = tmp1 = tmp2->rb_right;
- tmp2->rb_right = sibling;
- parent->rb_right = tmp2;
+ tmp1 = tmp2->rb_right;
+ WRITE_ONCE(sibling->rb_left, tmp1);
+ WRITE_ONCE(parent->rb_right, tmp2);
+ WRITE_ONCE(tmp2->rb_right, sibling);
if (tmp1)
rb_set_parent_color(tmp1, sibling,
RB_BLACK);
@@ -297,8 +320,9 @@ ____rb_erase_color(struct rb_node *parent, struct rb_root *root,
* / \ / \
* (sl) sr N (sl)
*/
- parent->rb_right = tmp2 = sibling->rb_left;
- sibling->rb_left = parent;
+ tmp2 = sibling->rb_left;
+ WRITE_ONCE(parent->rb_right, tmp2);
+ WRITE_ONCE(sibling->rb_left, parent);
rb_set_parent_color(tmp1, sibling, RB_BLACK);
if (tmp2)
rb_set_parent(tmp2, parent);
@@ -310,8 +334,9 @@ ____rb_erase_color(struct rb_node *parent, struct rb_root *root,
sibling = parent->rb_left;
if (rb_is_red(sibling)) {
/* Case 1 - right rotate at parent */
- parent->rb_left = tmp1 = sibling->rb_right;
- sibling->rb_right = parent;
+ tmp1 = sibling->rb_right;
+ WRITE_ONCE(parent->rb_left, tmp1);
+ WRITE_ONCE(sibling->rb_right, parent);
rb_set_parent_color(tmp1, parent, RB_BLACK);
__rb_rotate_set_parents(parent, sibling, root,
RB_RED);
@@ -336,9 +361,10 @@ ____rb_erase_color(struct rb_node *parent, struct rb_root *root,
break;
}
/* Case 3 - right rotate at sibling */
- sibling->rb_right = tmp1 = tmp2->rb_left;
- tmp2->rb_left = sibling;
- parent->rb_left = tmp2;
+ tmp1 = tmp2->rb_left;
+ WRITE_ONCE(sibling->rb_right, tmp1);
+ WRITE_ONCE(parent->rb_left, tmp2);
+ WRITE_ONCE(tmp2->rb_left, sibling);
if (tmp1)
rb_set_parent_color(tmp1, sibling,
RB_BLACK);
@@ -347,8 +373,9 @@ ____rb_erase_color(struct rb_node *parent, struct rb_root *root,
sibling = tmp2;
}
/* Case 4 - left rotate at parent + color flips */
- parent->rb_left = tmp2 = sibling->rb_right;
- sibling->rb_right = parent;
+ tmp2 = sibling->rb_right;
+ WRITE_ONCE(parent->rb_left, tmp2);
+ WRITE_ONCE(sibling->rb_right, parent);
rb_set_parent_color(tmp1, sibling, RB_BLACK);
if (tmp2)
rb_set_parent(tmp2, parent);
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/