[PATCH v2 2/2] pid: Remove pidhash

From: Gargi Sharma
Date: Wed Sep 27 2017 - 01:07:07 EST


pidhash is no longer required as all the information
can be looked up from idr tree. nr_hashed represented
the number of pids that had been hashed. Since, nr_hashed and
PIDNS_HASH_ADDING are no longer relevant, it has been renamed
to pid_allocated and PIDNS_ADDING respectively.

Signed-off-by: Gargi Sharma <gs051095@xxxxxxxxx>
Reviewed-by: Rik van Riel <riel@xxxxxxxxxx>
---
include/linux/init_task.h | 1 -
include/linux/pid.h | 2 --
include/linux/pid_namespace.h | 4 ++--
init/main.c | 1 -
kernel/fork.c | 2 +-
kernel/pid.c | 52 ++++++++-----------------------------------
kernel/pid_namespace.c | 7 +++---
7 files changed, 16 insertions(+), 53 deletions(-)

diff --git a/include/linux/init_task.h b/include/linux/init_task.h
index 3c07ace..cc45798 100644
--- a/include/linux/init_task.h
+++ b/include/linux/init_task.h
@@ -104,7 +104,6 @@ extern struct group_info init_groups;
.numbers = { { \
.nr = 0, \
.ns = &init_pid_ns, \
- .pid_chain = { .next = NULL, .pprev = NULL }, \
}, } \
}

diff --git a/include/linux/pid.h b/include/linux/pid.h
index 7195827..3915664 100644
--- a/include/linux/pid.h
+++ b/include/linux/pid.h
@@ -50,10 +50,8 @@ enum pid_type
*/

struct upid {
- /* Try to keep pid_chain in the same cacheline as nr for find_vpid */
int nr;
struct pid_namespace *ns;
- struct hlist_node pid_chain;
};

struct pid
diff --git a/include/linux/pid_namespace.h b/include/linux/pid_namespace.h
index f4db4a7..7911b58 100644
--- a/include/linux/pid_namespace.h
+++ b/include/linux/pid_namespace.h
@@ -24,7 +24,7 @@ struct pid_namespace {
struct kref kref;
struct idr idr;
struct rcu_head rcu;
- unsigned int nr_hashed;
+ unsigned int pid_allocated;
struct task_struct *child_reaper;
struct kmem_cache *pid_cachep;
unsigned int level;
@@ -48,7 +48,7 @@ struct pid_namespace {

extern struct pid_namespace init_pid_ns;

-#define PIDNS_HASH_ADDING (1U << 31)
+#define PIDNS_ADDING (1U << 31)

#ifdef CONFIG_PID_NS
static inline struct pid_namespace *get_pid_ns(struct pid_namespace *ns)
diff --git a/init/main.c b/init/main.c
index 9f4db20..c17a21b 100644
--- a/init/main.c
+++ b/init/main.c
@@ -562,7 +562,6 @@ asmlinkage __visible void __init start_kernel(void)
* kmem_cache_init()
*/
setup_log_buf(0);
- pidhash_init();
vfs_caches_init_early();
sort_main_extable();
trap_init();
diff --git a/kernel/fork.c b/kernel/fork.c
index 1064618..c3518b8 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -1853,7 +1853,7 @@ static __latent_entropy struct task_struct *copy_process(
retval = -ERESTARTNOINTR;
goto bad_fork_cancel_cgroup;
}
- if (unlikely(!(ns_of_pid(pid)->nr_hashed & PIDNS_HASH_ADDING))) {
+ if (unlikely(!(ns_of_pid(pid)->pid_allocated & PIDNS_ADDING))) {
retval = -ENOMEM;
goto bad_fork_cancel_cgroup;
}
diff --git a/kernel/pid.c b/kernel/pid.c
index 207c49a..ae31f7d1 100644
--- a/kernel/pid.c
+++ b/kernel/pid.c
@@ -41,10 +41,6 @@
#include <linux/sched/task.h>
#include <linux/idr.h>

-#define pid_hashfn(nr, ns) \
- hash_long((unsigned long)nr + (unsigned long)ns, pidhash_shift)
-static struct hlist_head *pid_hash;
-static unsigned int pidhash_shift = 4;
struct pid init_struct_pid = INIT_STRUCT_PID;

int pid_max = PID_MAX_DEFAULT;
@@ -54,9 +50,6 @@ int pid_max = PID_MAX_DEFAULT;
int pid_max_min = RESERVED_PIDS + 1;
int pid_max_max = PID_MAX_LIMIT;

-#define find_next_offset(map, off) \
- find_next_zero_bit((map)->page, BITS_PER_PAGE, off)
-
/*
* PID-map pages start out as NULL, they get allocated upon
* first use and are never deallocated. This way a low pid_max
@@ -66,7 +59,7 @@ int pid_max_max = PID_MAX_LIMIT;
struct pid_namespace init_pid_ns = {
.kref = KREF_INIT(2),
.idr = IDR_INIT,
- .nr_hashed = PIDNS_HASH_ADDING,
+ .pid_allocated = PIDNS_ADDING,
.level = 0,
.child_reaper = &init_task,
.user_ns = &init_user_ns,
@@ -125,8 +118,7 @@ void free_pid(struct pid *pid)
for (i = 0; i <= pid->level; i++) {
struct upid *upid = pid->numbers + i;
struct pid_namespace *ns = upid->ns;
- hlist_del_rcu(&upid->pid_chain);
- switch(--ns->nr_hashed) {
+ switch(--ns->pid_allocated) {
case 2:
case 1:
/* When all that is left in the pid namespace
@@ -135,10 +127,10 @@ void free_pid(struct pid *pid)
*/
wake_up_process(ns->child_reaper);
break;
- case PIDNS_HASH_ADDING:
+ case PIDNS_ADDING:
/* Handle a fork failure of the first process */
WARN_ON(ns->child_reaper);
- ns->nr_hashed = 0;
+ ns->pid_allocated = 0;
/* fall through */
case 0:
schedule_work(&ns->proc_work);
@@ -208,12 +200,10 @@ struct pid *alloc_pid(struct pid_namespace *ns)

upid = pid->numbers + ns->level;
spin_lock_irq(&pidmap_lock);
- if (!(ns->nr_hashed & PIDNS_HASH_ADDING))
+ if (!(ns->pid_allocated & PIDNS_ADDING))
goto out_unlock;
for ( ; upid >= pid->numbers; --upid) {
- hlist_add_head_rcu(&upid->pid_chain,
- &pid_hash[pid_hashfn(upid->nr, upid->ns)]);
- upid->ns->nr_hashed++;
+ upid->ns->pid_allocated++;
}
spin_unlock_irq(&pidmap_lock);

@@ -236,21 +226,13 @@ struct pid *alloc_pid(struct pid_namespace *ns)
void disable_pid_allocation(struct pid_namespace *ns)
{
spin_lock_irq(&pidmap_lock);
- ns->nr_hashed &= ~PIDNS_HASH_ADDING;
+ ns->pid_allocated &= ~PIDNS_ADDING;
spin_unlock_irq(&pidmap_lock);
}

struct pid *find_pid_ns(int nr, struct pid_namespace *ns)
{
- struct upid *pnr;
-
- hlist_for_each_entry_rcu(pnr,
- &pid_hash[pid_hashfn(nr, ns)], pid_chain)
- if (pnr->nr == nr && pnr->ns == ns)
- return container_of(pnr, struct pid,
- numbers[ns->level]);
-
- return NULL;
+ return idr_find(&ns->idr, nr);
}
EXPORT_SYMBOL_GPL(find_pid_ns);

@@ -432,26 +414,10 @@ struct pid *find_ge_pid(int nr, struct pid_namespace *ns)
return idr_get_next(&ns->idr, &nr);
}

-/*
- * The pid hash table is scaled according to the amount of memory in the
- * machine. From a minimum of 16 slots up to 4096 slots at one gigabyte or
- * more.
- */
-void __init pidhash_init(void)
-{
- unsigned int pidhash_size;
-
- pid_hash = alloc_large_system_hash("PID", sizeof(*pid_hash), 0, 18,
- HASH_EARLY | HASH_SMALL | HASH_ZERO,
- &pidhash_shift, NULL,
- 0, 4096);
- pidhash_size = 1U << pidhash_shift;
-}
-
void __init pid_idr_init(void)
{
/* Verify no one has done anything silly: */
- BUILD_BUG_ON(PID_MAX_LIMIT >= PIDNS_HASH_ADDING);
+ BUILD_BUG_ON(PID_MAX_LIMIT >= PIDNS_ADDING);

/* bump default and minimum pid_max based on number of cpus */
pid_max = min(pid_max_max, max_t(int, pid_max,
diff --git a/kernel/pid_namespace.c b/kernel/pid_namespace.c
index 9af3625..dcfcd99 100644
--- a/kernel/pid_namespace.c
+++ b/kernel/pid_namespace.c
@@ -133,7 +133,8 @@ static struct pid_namespace *create_pid_namespace(struct user_namespace *user_ns
ns->parent = get_pid_ns(parent_pid_ns);
ns->user_ns = get_user_ns(user_ns);
ns->ucounts = ucounts;
- ns->nr_hashed = PIDNS_HASH_ADDING;
+ ns->pid_allocated = PIDNS_ADDING;
+
INIT_WORK(&ns->proc_work, proc_cleanup_work);

return ns;
@@ -259,7 +260,7 @@ void zap_pid_ns_processes(struct pid_namespace *pid_ns)
* sys_wait4() above can't reap the EXIT_DEAD children but we do not
* really care, we could reparent them to the global init. We could
* exit and reap ->child_reaper even if it is not the last thread in
- * this pid_ns, free_pid(nr_hashed == 0) calls proc_cleanup_work(),
+ * this pid_ns, free_pid(pid_allocated == 0) calls proc_cleanup_work(),
* pid_ns can not go away until proc_kill_sb() drops the reference.
*
* But this ns can also have other tasks injected by setns()+fork().
@@ -273,7 +274,7 @@ void zap_pid_ns_processes(struct pid_namespace *pid_ns)
*/
for (;;) {
set_current_state(TASK_INTERRUPTIBLE);
- if (pid_ns->nr_hashed == init_pids)
+ if (pid_ns->pid_allocated == init_pids)
break;
schedule();
}
--
2.7.4