Re: [PATCH 3/3] High-res-timers part 3 (posix to hrposix) take 20

From: Andrew Morton (akpm@digeo.com)
Date: Tue Dec 10 2002 - 04:24:33 EST


george anzinger wrote:
>
> ...
> > radix-trees do not currently have a "find next empty slot from this
> > offset" function but that is quite straightforward. Not quite
> > as fast, unless an occupancy bitmap is added to the radix-tree
> > node. That's something whcih I have done before - in fact it was
> > an array of occupancy maps so I could do an efficient in-order
> > gang lookup of "all dirty pages from this offset" and "all locked
> > pages from this offset". It was before its time, and mouldered.
>
> Gosh, I think this is what I have. Is it already in the
> kernel tree somewhere? Oh, I found it. I will look at
> this, tomorrow...
>

A simple way of doing the "find an empty slot" is to descend the
tree, following the trail of nodes which have `count < 64' until
you hit the bottom. At each node you'll need to walk the slots[]
array to locate the first empty one.

That's quite a few cache misses. It can be optimised by adding
a 64-bit DECLARE_BITMAP to struct radix_tree_node. This actually
obsoletes `count', because you can just replace the test for
zero count with a test for `all 64 bits are zero'.

Such a search would be an extension to or variant of radix_tree_gang_lookup.
Something like the (old, untested) code below.

But it's a big job. First thing to do is to write a userspace
test harness for the radix-tree code. That's something I need to
do anyway, because radix_tree_gang_lookup fails for offests beyond
the 8TB mark, and it's such a pita fixing that stuff in-kernel.

Good luck ;)

 include/linux/radix-tree.h | 11 ++
 lib/radix-tree.c | 209 ++++++++++++++++++++++++++++++++++++++-------
 2 files changed, 191 insertions(+), 29 deletions(-)

--- 2.5.34/lib/radix-tree.c~radix_tree_tagged_lookup Wed Sep 11 11:49:28 2002
+++ 2.5.34-akpm/lib/radix-tree.c Wed Sep 11 11:49:28 2002
@@ -32,9 +32,11 @@
 #define RADIX_TREE_MAP_SHIFT 6
 #define RADIX_TREE_MAP_SIZE (1UL << RADIX_TREE_MAP_SHIFT)
 #define RADIX_TREE_MAP_MASK (RADIX_TREE_MAP_SIZE-1)
+#define NR_TAGS ((RADIX_TREE_MAP_SIZE + BITS_PER_LONG - 1) / BITS_PER_LONG)
 
 struct radix_tree_node {
         unsigned int count;
+ unsigned long tags[NR_TAGS];
         void *slots[RADIX_TREE_MAP_SIZE];
 };
 
@@ -221,15 +223,70 @@ void *radix_tree_lookup(struct radix_tre
 }
 EXPORT_SYMBOL(radix_tree_lookup);
 
+/**
+ * radix_tree_tag - tag an existing node
+ * @root: radix tree root
+ * @index: index key
+ *
+ * Tag a path down to a known-to-exist item.
+ */
+void radix_tree_tag(struct radix_tree_root *root, unsigned long index)
+{
+ unsigned int height, shift;
+ struct radix_tree_node **slot;
+
+ height = root->height;
+ if (index > radix_tree_maxindex(height))
+ return;
+
+ shift = (height-1) * RADIX_TREE_MAP_SHIFT;
+ slot = &root->rnode;
+ root->tag = 1;
+
+ while (height > 0) {
+ unsigned int offset;
+
+ BUG_ON(*slot == NULL);
+ offset = (index >> shift) & RADIX_TREE_MAP_MASK;
+ if (slot != &root->rnode) {
+ if (!test_bit(offset, (*slot)->tags))
+ set_bit(offset, (*slot)->tags);
+ }
+ slot = (struct radix_tree_node **)((*slot)->slots + offset);
+ shift -= RADIX_TREE_MAP_SHIFT;
+ height--;
+ }
+}
+EXPORT_SYMBOL(radix_tree_tag);
+
+enum tag_mode {
+ TM_NONE,
+ TM_TEST,
+ TM_TEST_CLEAR,
+};
+
+static inline int tags_clear(struct radix_tree_node *node)
+{
+ int i;
+
+ for (i = 0; i < NR_TAGS; i++) {
+ if (node->tags[i])
+ return 0;
+ }
+ return 1;
+}
+
 static /* inline */ unsigned int
 __lookup(struct radix_tree_root *root, void **results, unsigned long index,
         unsigned int max_items, unsigned long *next_index,
- unsigned long max_index)
+ unsigned long max_index, enum tag_mode tag_mode)
 {
         unsigned int nr_found = 0;
         unsigned int shift;
         unsigned int height = root->height;
         struct radix_tree_node *slot;
+ struct radix_tree_node *path[RADIX_TREE_MAX_PATH];
+ struct radix_tree_node **pathp = path;
 
         if (index > max_index)
                 return 0;
@@ -239,8 +296,12 @@ __lookup(struct radix_tree_root *root, v
         while (height > 0) {
                 unsigned long i = (index >> shift) & RADIX_TREE_MAP_MASK;
                 for ( ; i < RADIX_TREE_MAP_SIZE; i++) {
- if (slot->slots[i] != NULL)
- break;
+ if (slot->slots[i] != NULL) {
+ if (tag_mode == TM_NONE)
+ break;
+ if (test_bit(i, slot->tags))
+ break;
+ }
                         index &= ~((1 << shift) - 1);
                         index += 1 << shift;
                 }
@@ -248,6 +309,7 @@ __lookup(struct radix_tree_root *root, v
                         goto out;
                 height--;
                 shift -= RADIX_TREE_MAP_SHIFT;
+ *pathp++ = slot;
                 if (height == 0) {
                         /* Bottom level: grab some items */
                         unsigned long j;
@@ -257,36 +319,46 @@ __lookup(struct radix_tree_root *root, v
                         j = index & RADIX_TREE_MAP_MASK;
                         for ( ; j < RADIX_TREE_MAP_SIZE; j++) {
                                 index++;
- if (slot->slots[j]) {
- results[nr_found++] = slot->slots[j];
- if (nr_found == max_items)
- goto out;
+ if (!slot->slots[j])
+ continue;
+ if (tag_mode == TM_TEST) {
+ if (!test_bit(j, slot->tags))
+ continue;
+ }
+ if (tag_mode == TM_TEST_CLEAR) {
+ if (!test_and_clear_bit(j, slot->tags))
+ continue;
                                 }
+ results[nr_found++] = slot->slots[j];
+ if (nr_found == max_items)
+ goto out;
                         }
                 }
                 slot = slot->slots[i];
         }
 out:
+ if (tag_mode == TM_TEST_CLEAR) {
+ while (pathp > path) {
+ if (tags_clear(pathp[1])) {
+ unsigned int offset;
+
+ offset = (void **)pathp[1] - pathp[0]->slots;
+ BUG_ON(offset >= RADIX_TREE_MAP_SIZE);
+ clear_bit(offset, pathp[0]->tags);
+ } else {
+ break;
+ }
+ }
+ }
         *next_index = index;
         return nr_found;
         
 }
-/**
- * radix_tree_gang_lookup - perform multiple lookup on a radix tree
- * @root: radix tree root
- * @results: where the results of the lookup are placed
- * @first_index: start the lookup from this key
- * @max_items: place up to this many items at *results
- *
- * Performs an index-ascending scan of the tree for present items. Places
- * them at *@results and returns the number of items which were placed at
- * *@results.
- *
- * The implementation is naive.
- */
-unsigned int
-radix_tree_gang_lookup(struct radix_tree_root *root, void **results,
- unsigned long first_index, unsigned int max_items)
+
+static unsigned int
+gang_lookup(struct radix_tree_root *root, void **results,
+ unsigned long first_index, unsigned int max_items,
+ enum tag_mode tag_mode)
 {
         const unsigned long max_index = radix_tree_maxindex(root->height);
         unsigned long cur_index = first_index;
@@ -297,18 +369,37 @@ radix_tree_gang_lookup(struct radix_tree
         if (max_index == 0) { /* Bah. Special case */
                 if (first_index == 0) {
                         if (max_items > 0) {
- *results = root->rnode;
- ret = 1;
+ switch (tag_mode) {
+ case TM_NONE:
+ *results = root->rnode;
+ ret = 1;
+ break;
+ case TM_TEST:
+ if (root->tag) {
+ *results = root->rnode;
+ ret = 1;
+ }
+ break;
+ case TM_TEST_CLEAR:
+ if (root->tag) {
+ *results = root->rnode;
+ ret = 1;
+ root->tag = 0;
+ }
+ break;
+ }
                         }
                 }
                 goto out;
         }
+
         while (ret < max_items) {
                 unsigned int nr_found;
                 unsigned long next_index; /* Index of next search */
 
                 nr_found = __lookup(root, results + ret, cur_index,
- max_items - ret, &next_index, max_index);
+ max_items - ret, &next_index,
+ max_index, tag_mode);
                 if (nr_found == 0)
                         break;
                 ret += nr_found;
@@ -317,9 +408,70 @@ radix_tree_gang_lookup(struct radix_tree
 out:
         return ret;
 }
+
+/**
+ * radix_tree_gang_lookup - perform multiple lookup on a radix tree
+ * @root: radix tree root
+ * @results: where the results of the lookup are placed
+ * @first_index: start the lookup from this key
+ * @max_items: place up to this many items at *results
+ *
+ * Performs an index-ascending scan of the tree for present items. Places them
+ * at *@results and returns the number of items which were placed at *@results.
+ *
+ * The implementation is naive.
+ */
+unsigned int
+radix_tree_gang_lookup(struct radix_tree_root *root, void **results,
+ unsigned long first_index, unsigned int max_items)
+{
+ return gang_lookup(root, results, first_index, max_items, TM_NONE);
+}
 EXPORT_SYMBOL(radix_tree_gang_lookup);
 
 /**
+ * radix_tree_test_gang_lookup - perform multiple lookup on a radix tree
+ * @root: radix tree root
+ * @results: where the results of the lookup are placed
+ * @first_index: start the lookup from this key
+ * @max_items: place up to this many items at *results
+ *
+ * Performs an index-ascending scan of the tree for present items which are
+ * tagged. Places them at *@results and returns the number of items which were
+ * placed at *@results.
+ */
+unsigned int
+radix_tree_test_gang_lookup(struct radix_tree_root *root, void **results,
+ unsigned long first_index, unsigned int max_items)
+{
+ return gang_lookup(root, results, first_index, max_items, TM_TEST);
+}
+EXPORT_SYMBOL(radix_tree_test_gang_lookup);
+
+/**
+ * radix_tree_test_clear_gang_lookup - perform multiple lookup on a radix tree,
+ * clearing its tag tree.
+ * @root: radix tree root
+ * @results: where the results of the lookup are placed
+ * @first_index: start the lookup from this key
+ * @max_items: place up to this many items at *results
+ *
+ * Performs an index-ascending scan of the tree for present items which are
+ * tagged. Places them at *@results and returns the number of items which were
+ * placed at *@results.
+ *
+ * The tags are cleared on the path back up from the found items.
+ */
+unsigned int
+radix_tree_test_clear_gang_lookup(struct radix_tree_root *root, void **results,
+ unsigned long first_index, unsigned int max_items)
+{
+ return gang_lookup(root, results, first_index,
+ max_items, TM_TEST_CLEAR);
+}
+EXPORT_SYMBOL(radix_tree_test_clear_gang_lookup);
+
+/**
  * radix_tree_delete - delete an item from a radix tree
  * @root: radix tree root
  * @index: index key
@@ -366,7 +518,8 @@ int radix_tree_delete(struct radix_tree_
 
 EXPORT_SYMBOL(radix_tree_delete);
 
-static void radix_tree_node_ctor(void *node, kmem_cache_t *cachep, unsigned long flags)
+static void
+radix_tree_node_ctor(void *node, kmem_cache_t *cachep, unsigned long flags)
 {
         memset(node, 0, sizeof(struct radix_tree_node));
 }
@@ -390,7 +543,7 @@ void __init radix_tree_init(void)
 {
         radix_tree_node_cachep = kmem_cache_create("radix_tree_node",
                         sizeof(struct radix_tree_node), 0,
- SLAB_HWCACHE_ALIGN, radix_tree_node_ctor, NULL);
+ 0, radix_tree_node_ctor, NULL);
         if (!radix_tree_node_cachep)
                 panic ("Failed to create radix_tree_node cache\n");
         radix_tree_node_pool = mempool_create(512, radix_tree_node_pool_alloc,
--- 2.5.34/include/linux/radix-tree.h~radix_tree_tagged_lookup Wed Sep 11 11:49:28 2002
+++ 2.5.34-akpm/include/linux/radix-tree.h Wed Sep 11 11:49:28 2002
@@ -26,10 +26,11 @@ struct radix_tree_node;
 struct radix_tree_root {
         unsigned int height;
         int gfp_mask;
+ int tag; /* ugh. dirtiness of the top node */
         struct radix_tree_node *rnode;
 };
 
-#define RADIX_TREE_INIT(mask) {0, (mask), NULL}
+#define RADIX_TREE_INIT(mask) {0, (mask), 0, NULL}
 
 #define RADIX_TREE(name, mask) \
         struct radix_tree_root name = RADIX_TREE_INIT(mask)
@@ -38,6 +39,7 @@ struct radix_tree_root {
 do { \
         (root)->height = 0; \
         (root)->gfp_mask = (mask); \
+ (root)->tag = 0; \
         (root)->rnode = NULL; \
 } while (0)
 
@@ -48,5 +50,12 @@ extern int radix_tree_delete(struct radi
 extern unsigned int
 radix_tree_gang_lookup(struct radix_tree_root *root, void **results,
                         unsigned long first_index, unsigned int max_items);
+extern unsigned int
+radix_tree_test_gang_lookup(struct radix_tree_root *root, void **results,
+ unsigned long first_index, unsigned int max_items);
+extern unsigned int
+radix_tree_test_clear_gang_lookup(struct radix_tree_root *root, void **results,
+ unsigned long first_index, unsigned int max_items);
+void radix_tree_tag(struct radix_tree_root *root, unsigned long index);
 
 #endif /* _LINUX_RADIX_TREE_H */

.
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/



This archive was generated by hypermail 2b29 : Sun Dec 15 2002 - 22:00:17 EST