Re: [PATCH] Linux-2.5 fix/improve get_pid()

From: Andrew Morton (akpm@zip.com.au)
Date: Wed Aug 07 2002 - 18:06:05 EST


Paul Larson wrote:
>
> This patch provides an improved version of get_pid() while also taking
> care of the bug that causes the machine to hang when you hit PID_MAX.

Has this been evaluated against Bill Irwin's constant-time
allocator? Bill says it has slightly worse normal-case and
vastly improved worst-case performance over the stock allocator.
Not sure how it stacks up against this one. Plus it's much nicer
looking code.

He's shy, so.....

#include <linux/compiler.h>
#include <linux/bitops.h>
#include <linux/spinlock.h>
#include <linux/init.h>

/*
 * pid allocator
 * (C) 2002 William Irwin, IBM
 *
 * The strategy is to maintain a tower of bitmaps where a bitmap above
 * another in each bit accounts whether any pid's are available in the
 * space tracked by BITS_PER_LONG bits of the level below. The bitmaps
 * must be marked on allocation and also release, hence some
 * infrastructure for detecting when the last user of a pid releases it
 * must be in place.
 *
 * This general strategy is simple in concept and enforces highly
 * deterministic bounds on the search time for the next pid.
 */

#define PID_MAX 0x8000
#define RESERVED_PIDS 300

#define MAP0_SIZE (PID_MAX >> BITS_PER_LONG_SHIFT)
#define MAP1_SIZE (MAP0_SIZE >> BITS_PER_LONG_SHIFT)
#define MAP2_SIZE (MAP1_SIZE >> BITS_PER_LONG_SHIFT)

#define MAP0_SHIFT BITS_PER_LONG_SHIFT
#define MAP1_SHIFT (2*BITS_PER_LONG_SHIFT)
#define MAP2_SHIFT (3*BITS_PER_LONG_SHIFT)

#define PID_MAP_MASK (BITS_PER_LONG - 1)

#define PID_MAP_DEPTH (ARRAY_SIZE(pid_map) - 1)

static unsigned long pid_map0[MAP0_SIZE];
static unsigned long pid_map1[MAP1_SIZE];
static unsigned long pid_map2[MAP2_SIZE];

static unsigned long * pid_map[] = { pid_map0, pid_map1, pid_map2, NULL, };

unsigned long last_pid = 0;
unsigned long npids = 0;

static const int map_shifts[] =
        { 0,
                BITS_PER_LONG_SHIFT,
                BITS_PER_LONG_SHIFT*2,
                BITS_PER_LONG_SHIFT*3,
                BITS_PER_LONG_SHIFT*4,
        };

static inline int pid_map_shift(int depth)
{
        return map_shifts[depth+1];
}

static spinlock_t pid_lock = SPIN_LOCK_UNLOCKED;

void free_pid(unsigned long pid)
{
        unsigned long **map = pid_map;

        spin_lock(&pid_lock);
        while (*map) {
                int bit = pid & PID_MAP_MASK;
                pid >>= BITS_PER_LONG_SHIFT;
                __clear_bit(bit, &(*map)[pid]);
                ++map;
        }
        --npids;
        spin_unlock(&pid_lock);
}

static inline int whole_block_used(int level, unsigned long pid)
{
        return pid_map[level][pid >> pid_map_shift(level)] == ~0UL;
}

static inline void mark_pid(unsigned long pid)
{
        int level;
        for (level = 0; level < PID_MAP_DEPTH; ++level) {
                int shift, bit;
                unsigned long entry;

                shift = pid_map_shift(level);
                entry = pid >> shift;
                bit = (pid >> (shift - BITS_PER_LONG_SHIFT)) & PID_MAP_MASK;
                if (level == 0 || whole_block_used(level - 1, pid))
                        __set_bit(bit, &pid_map[level][entry]);
                else
                        break;
        }
        ++npids;
}

static inline int pid_map_limit(int depth)
{
        return PID_MAX >> pid_map_shift(depth);
}

#ifdef PID_ALLOC_EXAMPLE
/*
 * the pid allocation traverses the bitmaps by recursively ffz'ing
 * through down the tower of maps. Some additional logic is required
 * to enforce lower limits, but the following example of how one
 * would perform this search without the lower limit may well prove
 * enlightening for those interested in the mechanics of the algorithm.
 */
static long alloc_pid_from_zero(void)
{
        unsigned long pid = 0;
        int level;

        for (level = PID_MAP_DEPTH - 1; level >= 0; --level) {
                unsigned long entry = pid_map[level][pid];

                if (unlikely(entry == ~0UL))
                        return ~0UL;

                pid = (pid << BITS_PER_LONG_SHIFT) + ffz(pid_map[level][pid]);
        }
        return pid;
}
#endif /* PID_ALLOC_EXAMPLE */

static const unsigned long pid_max_levels[] =
        { PID_MAX >> BITS_PER_LONG_SHIFT,
                PID_MAX >> (BITS_PER_LONG_SHIFT*2),
                PID_MAX >> (BITS_PER_LONG_SHIFT*3),
                PID_MAX >> (BITS_PER_LONG_SHIFT*4),
        };

static inline unsigned long pid_map_digit(int level, unsigned long limit)
{
        return (limit >> pid_map_shift(level-1)) & PID_MAP_MASK;
}

/*
 * Scratch space for storing the digits of the limit, all accesses
 * protected by the pid_lock.
 */
static unsigned long limit_digits[4];

/*
 * This is not a high-performance implementation. alloc_pid_after()
 * can be made highly compact with some effort, but this is instead
 * meant to be clear. As the cost of fork() is dominated by much
 * more expensive operations and the cost of this is constant-bounded
 * by a very low constant, the gains from manual optimization here
 * are marginal.
 */
static long alloc_pid_after(unsigned long limit)
{
        unsigned long pid = 0;
        int level;

        /*
         * The limit passed to us is a strict lower limit. It is more
         * convenient to work with <= constraints.
         */
        ++limit;
        if (unlikely(limit == PID_MAX))
                return ~0UL;

        for (level = 0; level < PID_MAP_DEPTH; ++level) {
                limit_digits[level] = limit & PID_MAP_MASK;
                limit >>= BITS_PER_LONG_SHIFT;
        }

        /*
         * Now the lowest available pid number above limit is
         * reconstructed by ffz'ing down the bitmap and checking
         * each digit against the digits of the limit for
         * dictionary ordering. If the check should fail, it's
         * fixed up by using the maximum of the two digits. The
         * dictionary ordering on digits also means that a
         * greater significant digit found in the bitmap
         * invalidates all further comparisons, which requires
         * fallback to the pure recursive ffz algorithm outlined
         * above in order to be handled.
         */
        for (level = PID_MAP_DEPTH - 1; level >= 0; --level) {
                unsigned long bit, digit;

                if (unlikely(pid >= pid_max_levels[level]))
                        return ~0UL;

                bit = ffz(pid_map[level][pid]);
                digit = limit_digits[level];

                if (unlikely(bit < digit))
                        bit = digit;

                pid = (pid << BITS_PER_LONG_SHIFT) + bit;

                /*
                 * This is not an optimization; if this check
                 * should succeed the digit comparisons above
                 * are no longer valid and (pessimistically)
                 * incorrect first available pid's are found.
                 *
                 */
                if (likely(bit > digit)) {
                        --level;
                        goto finish_just_ffz;
                }
        }
out:
        if (pid < PID_MAX)
                return pid;
        else
                return ~0UL;

finish_just_ffz:
        /*
         * Now revert to the pure recursive ffz algorithm with
         * the slight variation of not beginning at a fixed level,
         * because it is no longer valid to perform comparisons
         * of the digit obtained by ffz'ing the bitmap against the
         * digits of the limit.
         */
        while (level >= 0) {
                unsigned long bit;

                if (unlikely(pid >= pid_max_levels[level]))
                        return ~0UL;

                bit = ffz(pid_map[level][pid]);
                pid = (pid << BITS_PER_LONG_SHIFT) + bit;
                --level;
        }

        goto out;
}

int alloc_pid(void)
{
        unsigned long pid;

        spin_lock(&pid_lock);
        BUG_ON(last_pid >= PID_MAX);
        pid = alloc_pid_after(last_pid);
        if (unlikely(pid == ~0UL)) {
                pid = alloc_pid_after(RESERVED_PIDS);
                if (unlikely(pid == ~0UL))
                        goto out;
                BUG_ON(pid < RESERVED_PIDS);
        } else
                BUG_ON(pid <= last_pid);
        last_pid = pid;
        mark_pid(pid);
out:
        spin_unlock(&pid_lock);
        return (int)pid;
}

void __init pid_init(void)
{
        mark_pid(0);
}
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/



This archive was generated by hypermail 2b29 : Wed Aug 07 2002 - 22:00:38 EST