[patch 10/16] sched: throttle entities exceeding their allowed bandwidth

From: Paul Turner
Date: Tue Jun 21 2011 - 03:22:01 EST


Add conditional checks time of put_prev_entity() and enqueue_entity() to detect
when an active entity has exceeded its allowed bandwidth and requires
throttling.

Signed-off-by: Paul Turner <pjt@xxxxxxxxxx>

---
kernel/sched_fair.c | 55 ++++++++++++++++++++++++++++++++++++++++++++++++++--
1 file changed, 53 insertions(+), 2 deletions(-)

Index: tip/kernel/sched_fair.c
===================================================================
--- tip.orig/kernel/sched_fair.c
+++ tip/kernel/sched_fair.c
@@ -987,6 +987,8 @@ place_entity(struct cfs_rq *cfs_rq, stru
se->vruntime = vruntime;
}

+static void check_enqueue_throttle(struct cfs_rq *cfs_rq);
+
static void
enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
{
@@ -1016,8 +1018,10 @@ enqueue_entity(struct cfs_rq *cfs_rq, st
__enqueue_entity(cfs_rq, se);
se->on_rq = 1;

- if (cfs_rq->nr_running == 1)
+ if (cfs_rq->nr_running == 1) {
list_add_leaf_cfs_rq(cfs_rq);
+ check_enqueue_throttle(cfs_rq);
+ }
}

static void __clear_buddies_last(struct sched_entity *se)
@@ -1222,6 +1226,8 @@ static struct sched_entity *pick_next_en
return se;
}

+static void check_cfs_rq_runtime(struct cfs_rq *cfs_rq);
+
static void put_prev_entity(struct cfs_rq *cfs_rq, struct sched_entity *prev)
{
/*
@@ -1231,6 +1237,9 @@ static void put_prev_entity(struct cfs_r
if (prev->on_rq)
update_curr(cfs_rq);

+ /* throttle cfs_rqs exceeding runtime */
+ check_cfs_rq_runtime(cfs_rq);
+
check_spread(cfs_rq, prev);
if (prev->on_rq) {
update_stats_wait_start(cfs_rq, prev);
@@ -1403,7 +1412,7 @@ static void account_cfs_rq_runtime(struc
* if we're unable to extend our runtime we resched so that the active
* hierarchy can be throttled
*/
- if (!assign_cfs_rq_runtime(cfs_rq))
+ if (!assign_cfs_rq_runtime(cfs_rq) && likely(cfs_rq->curr))
resched_task(rq_of(cfs_rq)->curr);
}

@@ -1448,6 +1457,46 @@ static void throttle_cfs_rq(struct cfs_r
raw_spin_unlock(&cfs_b->lock);
}

+/*
+ * When a group wakes up we want to make sure that its quota is not already
+ * expired/exceeded, otherwise it may be allowed to steal additional ticks of
+ * runtime as update_curr() throttling can not not trigger until it's on-rq.
+ */
+static void check_enqueue_throttle(struct cfs_rq *cfs_rq)
+{
+ /* an active group must be handled by the update_curr()->put() path */
+ if (!cfs_rq->runtime_enabled || cfs_rq->curr)
+ return;
+
+ /* ensure the group is not already throttled */
+ if (cfs_rq_throttled(cfs_rq))
+ return;
+
+ /* update runtime allocation */
+ account_cfs_rq_runtime(cfs_rq, 0);
+ if (cfs_rq->runtime_remaining <= 0)
+ throttle_cfs_rq(cfs_rq);
+}
+
+/* conditionally throttle active cfs_rq's from put_prev_entity() */
+static void check_cfs_rq_runtime(struct cfs_rq *cfs_rq)
+{
+ if (!cfs_rq->runtime_enabled || cfs_rq->runtime_remaining > 0)
+ return;
+
+ /*
+ * as the alignment of the last vestiges of per-cpu quota is not
+ * controllable it's possible that active load-balance will force a
+ * thread belonging to an unthrottled cfs_rq on cpu A to into a running
+ * state on a throttled cfs_rq on cpu B. In this case we're already
+ * throttled.
+ */
+ if (cfs_rq_throttled(cfs_rq))
+ return;
+
+ throttle_cfs_rq(cfs_rq);
+}
+
static void unthrottle_cfs_rq(struct cfs_rq *cfs_rq)
{
struct rq *rq = rq_of(cfs_rq);
@@ -1586,6 +1635,8 @@ out_unlock:
#else
static void account_cfs_rq_runtime(struct cfs_rq *cfs_rq,
unsigned long delta_exec) {}
+static void check_cfs_rq_runtime(struct cfs_rq *cfs_rq) {}
+static void check_enqueue_throttle(struct cfs_rq *cfs_rq) {}

static inline int cfs_rq_throttled(struct cfs_rq *cfs_rq)
{


--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/