PID hashing patch [was: Re: sys_kill() performance patch]

Ingo Molnar (mingo@pc5829.hil.siemens.at)
Fri, 28 Mar 1997 18:23:31 +0100 (MET)


On Tue, 18 Mar 1997, David S. Miller wrote:

> For other Posix-ish features like PID->task relations we could use hash
> tables ... i'm not sure yet how important this is ... if you've got an
> application that uses this heavily, tell me. [or if you know some part of
> the kernel that frequently uses it]

> This seems to suggest that we should hash processes when they are
> created based upon pid, and perhaps hashed in other tables as well
> based upon pgrp etc.
>
> That would be the real fix to these problems.

yup, here is PID hashing implemented for 2.1.30.

trace of kill() from lmbench lat_sig without the patch [166MHz P5]:

------------------------------------------->
Trace: c011bb97 <*SYS_ENTRY*+17/20> (0.72)
Trace: c0118693 <sys_kill+13/ac> (0.64)
Trace: c0118632 <kill_proc+e/5c> (16.45) <---- walks full task list
Trace: c01181cf <send_sig+13/16c> (1.39)
Trace: c011bba8 <*SYS_EXIT*+8/10> (0.88)
<------------------------------------------

with the patch:

------------------------------------------->
Trace: c011bbc7 <*SYS_ENTRY*+17/20> (0.13)
Trace: c01187cf <sys_kill+13/ac> (0.11)
Trace: c011877f <kill_proc+f/4c> (0.10)
Trace: c01181d9 <lookup_pid+d/30> (0.22) <---- doesnt have to walk
Trace: c0118303 <send_sig+13/174> (0.49)
Trace: c011bbd8 <*SYS_EXIT*+8/10> (1.44)
<------------------------------------------

lookup_pid() is used in kill() and fork() currently, but there might be
other places too which could use it.

Comments, reports welcome.

-- mingo

--- linux-2.1.30_vanilla/include/linux/pidhash.h Fri Mar 28 16:25:57 1997
+++ linux/include/linux/pidhash.h Fri Mar 28 17:09:35 1997
@@ -0,0 +1,18 @@
+#ifndef _LINUX_PIDHASH_H
+#define _LINUX_PIDHASH_H
+#ifdef __KERNEL__
+
+/*
+ * Guessomatic hashtable-size:
+ */
+#define PID_HASH_SIZE (NR_TASKS/4)
+
+extern struct task_struct * pid_hash[PID_HASH_SIZE];
+
+extern void add_to_pidhash (struct task_struct * p);
+extern void remove_from_pidhash (struct task_struct * p);
+struct task_struct * lookup_pid (const int pid);
+
+#endif /* __KERNEL__ */
+#endif
+
--- linux-2.1.30_vanilla/include/linux/sched.h Fri Mar 28 12:32:24 1997
+++ linux/include/linux/sched.h Fri Mar 28 18:16:04 1997
@@ -63,7 +64,8 @@
#define CT_TO_SECS(x) ((x) / HZ)
#define CT_TO_USECS(x) (((x) % HZ) * 1000000/HZ)

-extern int nr_running, nr_tasks;
+extern int nr_running (void);
+extern int nr_tasks;
extern int last_pid;

#define FIRST_TASK task[0]
@@ -187,6 +189,7 @@
struct linux_binfmt *binfmt;
struct task_struct *next_task, *prev_task;
struct task_struct *next_run, *prev_run;
+ struct task_struct *next_pidhash, *prev_pidhash;
unsigned long saved_kernel_stack;
unsigned long kernel_stack_page;
int exit_code, exit_signal;
@@ -288,7 +294,7 @@
/* debugregs */ { 0, }, \
/* exec domain */&default_exec_domain, \
/* binfmt */ NULL, \
-/* schedlink */ &init_task,&init_task, &init_task, &init_task, \
+/* schedlink */ &init_task,&init_task, &init_task, &init_task, NULL, NULL, \
/* stack */ 0,(unsigned long) &init_kernel_stack, \
/* ec,brk... */ 0,0,0,0,0, \
/* pid etc.. */ 0,0,0,0,0, \
--- linux-2.1.30_vanilla/kernel/exit.c Fri Mar 28 12:32:33 1997
+++ linux/kernel/exit.c Fri Mar 28 18:00:28 1997
@@ -20,6 +20,7 @@
#include <linux/smp.h>
#include <linux/smp_lock.h>
#include <linux/module.h>
+#include <linux/pidhash.h>

#include <asm/uaccess.h>
#include <asm/pgtable.h>
@@ -29,6 +30,108 @@
extern void acct_process (long exitcode);
extern void kerneld_exit(void);

+/*
+ * A simple hash-array to look up PIDs.
+ */
+struct task_struct * pid_hash[PID_HASH_SIZE];
+
+#define pid_hashfn(pid) pid_hash[(pid) & (PID_HASH_SIZE-1)]
+
+#undef DEBUG_PIDHASH
+
+#ifdef DEBUG_PIDHASH
+#define MUST_BE(c) do { if (!(c)) { printk("pid hash problem in line %d.\n", __LINE__); panic("ouch\n"); }; } while (0)
+
+#define DEBUG_MESSAGE(t) do { printk("pid hash, line %d, %s.\n", __LINE__, t); } while(0)
+#else
+#define MUST_BE(c)
+#define DEBUG_MESSAGE(t)
+#endif
+
+static int debughash=1;
+
+struct task_struct * lookup_pid (const int pid)
+{
+ struct task_struct * p;
+
+ if (!debughash)
+ DEBUG_MESSAGE("nondebugging add_to_pidhash()");
+
+ for (p=pid_hashfn(pid); p && (p->pid != pid); p=p->next_pidhash) {
+ if (p->prev_pidhash)
+ MUST_BE(p->prev_pidhash->next_pidhash == p);
+ if (p->next_pidhash)
+ MUST_BE(p->next_pidhash->prev_pidhash == p);
+ }
+
+ return p;
+}
+
+#ifdef DEBUG_PIDHASH
+void debug_hashtable (void)
+{
+ int i;
+
+ debughash = 1;
+ for (i=65536; i<65536+PID_HASH_SIZE; i++)
+ lookup_pid(i);
+ debughash = 0;
+
+ DEBUG_MESSAGE("debugging finished");
+}
+#else
+#define debug_hashtable()
+#endif
+
+void add_to_pidhash (struct task_struct * p)
+{
+ struct task_struct * p2 = pid_hashfn(p->pid);
+
+ DEBUG_MESSAGE("add_to_pidhash()");
+
+ debug_hashtable();
+
+ p->next_pidhash=p2;
+ p->prev_pidhash=NULL;
+
+ if (p2)
+ p2->prev_pidhash = p;
+
+ pid_hashfn(p->pid)=p;
+ DEBUG_MESSAGE("add_to_pidhash() #1");
+ debug_hashtable();
+ DEBUG_MESSAGE("add_to_pidhash() #2");
+}
+
+void remove_from_pidhash (struct task_struct * p)
+{
+ struct task_struct * prev = p->prev_pidhash,
+ * hash = pid_hashfn(p->pid),
+ * next = p->next_pidhash;
+
+ DEBUG_MESSAGE("remove_from_pidhash()");
+
+ debug_hashtable();
+
+ if (prev)
+ prev->next_pidhash = next;
+ if (next)
+ next->prev_pidhash = prev;
+
+ if (hash==p) {
+ MUST_BE(prev==NULL);
+ pid_hashfn(p->pid) = next;
+ }
+
+ p->next_pidhash=NULL;
+ p->prev_pidhash=NULL;
+
+ DEBUG_MESSAGE("remove_from_pidhash() done #1");
+ debug_hashtable();
+ DEBUG_MESSAGE("remove_from_pidhash() done #2");
+}
+
+
int getrusage(struct task_struct *, int, struct rusage *);

static inline void generate(unsigned long sig, struct task_struct * p)
@@ -128,8 +231,9 @@
nr_tasks--;
task[i] = NULL;
REMOVE_LINKS(p);
+ remove_from_pidhash(p);
release_thread(p);
- if (STACK_MAGIC != *(unsigned long *)p->kernel_stack_page)
+ if (KSTACK_MAGIC != *(unsigned long *)p->kernel_stack_page)
printk(KERN_ALERT "release: %s kernel stack corruption. Aiee\n", p->comm);
free_kernel_stack(p->kernel_stack_page);
current->cmin_flt += p->min_flt + p->cmin_flt;
@@ -309,10 +413,12 @@

if (sig<0 || sig>32)
return -EINVAL;
- for_each_task(p) {
- if (p && p->pid == pid)
- return send_sig(sig,p,priv);
- }
+
+ p = lookup_pid(pid);
+
+ if (p)
+ return send_sig(sig,p,priv);
+
return(-ESRCH);
}

@@ -692,6 +798,7 @@
retval = p->pid;
if (p->p_opptr != p->p_pptr) {
REMOVE_LINKS(p);
+ remove_from_pidhash(p);
p->p_pptr = p->p_opptr;
SET_LINKS(p);
notify_parent(p);
--- linux-2.1.30_vanilla/kernel/fork.c Sun Jan 26 12:40:46 1997
+++ linux/kernel/fork.c Fri Mar 28 18:14:47 1997
@@ -11,6 +11,7 @@
* management can be a bitch. See 'mm/mm.c': 'copy_page_tables()'
*/

+#include <linux/config.h>
#include <linux/errno.h>
#include <linux/sched.h>
#include <linux/kernel.h>
@@ -22,6 +23,7 @@
#include <linux/smp.h>
#include <linux/smp_lock.h>
#include <linux/module.h>
+#include <linux/pidhash.h>

#include <asm/system.h>
#include <asm/pgtable.h>
@@ -29,7 +31,6 @@
#include <asm/uaccess.h>

int nr_tasks=1;
-int nr_running=1;
unsigned long int total_forks=0; /* Handle normal Linux uptimes. */
int last_pid=0;

@@ -70,12 +71,10 @@
repeat:
if ((++last_pid) & 0xffff8000)
last_pid=1;
- for_each_task (p) {
- if (p->pid == last_pid ||
- p->pgrp == last_pid ||
- p->session == last_pid)
- goto repeat;
- }
+
+ if ((p=lookup_pid(last_pid)))
+ goto repeat;
+
return last_pid;
}

@@ -246,6 +245,7 @@
p->state = TASK_UNINTERRUPTIBLE;
p->flags &= ~(PF_PTRACED|PF_TRACESYS|PF_SUPERPRIV);
p->flags |= PF_FORKNOEXEC;
p->pid = get_pid(clone_flags);
+ add_to_pidhash(p);
p->next_run = NULL;
p->prev_run = NULL;
p->p_pptr = p->p_opptr = current;
@@ -316,6 +322,7 @@
__MOD_DEC_USE_COUNT(p->binfmt->module);
task[nr] = NULL;
REMOVE_LINKS(p);
+ remove_from_pidhash(p);
nr_tasks--;
bad_fork_free_stack:
free_kernel_stack(new_stack);
--- linux-2.1.30_vanilla/arch/i386/kernel/ptrace.c Sun Jan 26 11:07:04 1997
+++ linux/arch/i386/kernel/ptrace.c Fri Mar 28 17:26:15 1997
@@ -12,6 +12,7 @@
#include <linux/ptrace.h>
#include <linux/user.h>
#include <linux/debugreg.h>
+#include <linux/pidhash.h>

#include <asm/uaccess.h>
#include <asm/pgtable.h>
@@ -401,6 +402,7 @@
child->flags |= PF_PTRACED;
if (child->p_pptr != current) {
REMOVE_LINKS(child);
+ remove_from_pidhash(child);
child->p_pptr = current;
SET_LINKS(child);
}
@@ -564,6 +566,7 @@
wake_up_process(child);
child->exit_code = data;
REMOVE_LINKS(child);
+ remove_from_pidhash(child);
child->p_pptr = child->p_opptr;
SET_LINKS(child);
/* make sure the single step bit is not set. */