[PATCH 3/3] timer: Reduce unnecessary sighand lock contention

From: Jason Low
Date: Tue Aug 25 2015 - 23:19:00 EST


It was found while running a database workload on large systems that
significant time was spent trying to acquire the sighand lock.

The issue was that whenever an itimer expired, many threads ended up
simultaneously trying to send the signal. Most of the time, nothing
happened after acquiring the sighand lock because another thread
had already sent the signal and updated the "next expire" time. The
fastpath_timer_check() didn't help much since the "next expire" time
was updated later.

This patch addresses this by having the thread_group_cputimer structure
maintain a boolean to signify when a thread in the group is already
checking for process wide timers, and adds extra logic in the fastpath
to check the boolean.

Signed-off-by: Jason Low <jason.low2@xxxxxx>
---
include/linux/init_task.h | 1 +
include/linux/sched.h | 3 +++
kernel/time/posix-cpu-timers.c | 19 +++++++++++++++++--
3 files changed, 21 insertions(+), 2 deletions(-)

diff --git a/include/linux/init_task.h b/include/linux/init_task.h
index d0b380e..3350c77 100644
--- a/include/linux/init_task.h
+++ b/include/linux/init_task.h
@@ -53,6 +53,7 @@ extern struct fs_struct init_fs;
.cputimer = { \
.cputime_atomic = INIT_CPUTIME_ATOMIC, \
.running = 0, \
+ .checking_timer = 0, \
}, \
INIT_PREV_CPUTIME(sig) \
.cred_guard_mutex = \
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 119823d..a6c8334 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -619,6 +619,8 @@ struct task_cputime_atomic {
* @cputime_atomic: atomic thread group interval timers.
* @running: non-zero when there are timers running and
* @cputime receives updates.
+ * @checking_timer: non-zero when a thread is in the process of
+ * checking for thread group timers.
*
* This structure contains the version of task_cputime, above, that is
* used for thread group CPU timer calculations.
@@ -626,6 +628,7 @@ struct task_cputime_atomic {
struct thread_group_cputimer {
struct task_cputime_atomic cputime_atomic;
int running;
+ int checking_timer;
};

#include <linux/rwsem.h>
diff --git a/kernel/time/posix-cpu-timers.c b/kernel/time/posix-cpu-timers.c
index 535bef5..f3ddf0e 100644
--- a/kernel/time/posix-cpu-timers.c
+++ b/kernel/time/posix-cpu-timers.c
@@ -962,6 +962,14 @@ static void check_process_timers(struct task_struct *tsk,
unsigned long soft;

/*
+ * Signify that a thread is checking for process timers.
+ * The checking_timer field is only modified in this function,
+ * which is called with the sighand lock held. Thus, we can
+ * just use WRITE_ONCE() without any further locking.
+ */
+ WRITE_ONCE(sig->cputimer.checking_timer, 1);
+
+ /*
* Collect the current process totals.
*/
thread_group_cputimer(tsk, &cputime);
@@ -1015,6 +1023,8 @@ static void check_process_timers(struct task_struct *tsk,
sig->cputime_expires.sched_exp = sched_expires;
if (task_cputime_zero(&sig->cputime_expires))
stop_process_timers(sig);
+
+ WRITE_ONCE(sig->cputimer.checking_timer, 0);
}

/*
@@ -1132,8 +1142,13 @@ static inline int fastpath_timer_check(struct task_struct *tsk)
}

sig = tsk->signal;
- /* Check if cputimer is running. This is accessed without locking. */
- if (READ_ONCE(sig->cputimer.running)) {
+ /*
+ * Check if thread group timers expired if the cputimer is running
+ * and that no other thread in the group is already checking for
+ * thread group cputimers.
+ */
+ if (READ_ONCE(sig->cputimer.running) &&
+ !READ_ONCE(sig->cputimer.checking_timer)) {
struct task_cputime group_sample;

sample_cputime_atomic(&group_sample, &sig->cputimer.cputime_atomic);
--
1.7.2.5

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/