Re: [git] CFS-devel, group scheduler, fixes

From: Mike Galbraith
Date: Sun Sep 23 2007 - 07:38:08 EST


Hi,

I haven't chased down the exact scenario, but using a min_vruntime which
is about to change definitely seems to be what's causing my latency
woes. Does the below cure your fairness woes as well?

(first bit is just some debug format changes for your convenience if you
try it)

diff -uprNX /root/dontdiff git/linux-2.6.sched-devel/kernel/sched_debug.c linux-2.6.23-rc7.d/kernel/sched_debug.c
--- git/linux-2.6.sched-devel/kernel/sched_debug.c 2007-09-22 13:37:32.000000000 +0200
+++ linux-2.6.23-rc7.d/kernel/sched_debug.c 2007-09-21 12:12:27.000000000 +0200
@@ -67,7 +67,7 @@ print_task(struct seq_file *m, struct rq
(long long)(p->nvcsw + p->nivcsw),
p->prio);
#ifdef CONFIG_SCHEDSTATS
- SEQ_printf(m, "%15Ld.%06ld %15Ld.%06ld %15Ld.%06ld\n",
+ SEQ_printf(m, "%9Ld.%06ld %9Ld.%06ld %9Ld.%06ld\n",
SPLIT_NS(p->se.vruntime),
SPLIT_NS(p->se.sum_exec_runtime),
SPLIT_NS(p->se.sum_sleep_runtime));
@@ -83,10 +83,10 @@ static void print_rq(struct seq_file *m,

SEQ_printf(m,
"\nrunnable tasks:\n"
- " task PID tree-key switches prio"
- " exec-runtime sum-exec sum-sleep\n"
+ " task PID tree-key switches prio"
+ " exec-runtime sum-exec sum-sleep\n"
"------------------------------------------------------"
- "------------------------------------------------");
+ "----------------------------------------------------\n");

read_lock_irq(&tasklist_lock);

@@ -134,7 +134,7 @@ void print_cfs_rq(struct seq_file *m, in
spread0 = min_vruntime - rq0_min_vruntime;
SEQ_printf(m, " .%-30s: %Ld.%06ld\n", "spread0",
SPLIT_NS(spread0));
- SEQ_printf(m, " .%-30s: %ld\n", "spread0",
+ SEQ_printf(m, " .%-30s: %ld\n", "nr_sync_min_vruntime",
cfs_rq->nr_sync_min_vruntime);
}

diff -uprNX /root/dontdiff git/linux-2.6.sched-devel/kernel/sched_fair.c linux-2.6.23-rc7.d/kernel/sched_fair.c
--- git/linux-2.6.sched-devel/kernel/sched_fair.c 2007-09-22 13:37:32.000000000 +0200
+++ linux-2.6.23-rc7.d/kernel/sched_fair.c 2007-09-23 12:53:11.000000000 +0200
@@ -290,14 +290,19 @@ __update_curr(struct cfs_rq *cfs_rq, str
static void sync_vruntime(struct cfs_rq *cfs_rq)
{
struct rq *rq;
- int cpu;
+ int cpu, wrote = 0;

for_each_online_cpu(cpu) {
rq = &per_cpu(runqueues, cpu);
+ if (!spin_trylock(&rq->lock))
+ continue;
cfs_rq->min_vruntime = max_vruntime(cfs_rq->min_vruntime,
rq->cfs.min_vruntime);
+ spin_unlock(&rq->lock);
+ wrote++;
}
- schedstat_inc(cfs_rq, nr_sync_min_vruntime);
+ if (wrote)
+ schedstat_inc(cfs_rq, nr_sync_min_vruntime);
}

static void update_curr(struct cfs_rq *cfs_rq)
@@ -306,8 +311,10 @@ static void update_curr(struct cfs_rq *c
u64 now = rq_of(cfs_rq)->clock;
unsigned long delta_exec;

- if (unlikely(!curr))
+ if (unlikely(!cfs_rq->nr_running))
return sync_vruntime(cfs_rq);
+ if (unlikely(!curr))
+ return;

/*
* Get the amount of time the current task was running


-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/