--- linux-2.5.69.virgin/include/linux/sched.h.org Fri May 23 07:14:23 2003 +++ linux-2.5.69.virgin/include/linux/sched.h Tue May 27 09:30:51 2003 @@ -328,7 +328,9 @@ prio_array_t *array; unsigned long sleep_avg; - unsigned long last_run; + unsigned long long last_run; + unsigned int run_nsecs; + unsigned int sleep_nsecs; unsigned long policy; unsigned long cpus_allowed; --- linux-2.5.69.virgin/kernel/sched.c.org Sun May 25 06:05:42 2003 +++ linux-2.5.69.virgin/kernel/sched.c Tue May 27 12:58:02 2003 @@ -74,6 +74,12 @@ #define MAX_SLEEP_AVG (10*HZ) #define STARVATION_LIMIT (10*HZ) #define NODE_THRESHOLD 125 +#define SCHED_NANOSECOND 1 +#define SCHED_SECOND (1000000000 * SCHED_NANOSECOND) +#define SCHED_TICK (SCHED_SECOND / HZ) +#define TICKS_PER_SECOND (SCHED_SECOND / SCHED_TICK) + +extern unsigned long long monotonic_clock(void); /* * If a task is 'interactive' then we reinsert it in the active @@ -342,10 +348,23 @@ */ static inline int activate_task(task_t *p, runqueue_t *rq) { - long sleep_time = jiffies - p->last_run - 1; - int requeue_waker = 0; + unsigned long long now = monotonic_clock(); + long long sleep = now - p->last_run + p->sleep_nsecs; + int ticks = 0, requeue_waker = 0; + + if (sleep >= SCHED_TICK) { + while (sleep >= SCHED_SECOND) { + sleep -= SCHED_SECOND; + ticks += TICKS_PER_SECOND; + } + while (sleep >= SCHED_TICK) { + sleep -= SCHED_TICK; + ticks++; + } + p->sleep_nsecs = sleep; + } else p->sleep_nsecs += sleep; - if (sleep_time > 0) { + if (ticks > 0) { int sleep_avg; /* @@ -356,7 +375,7 @@ * spends sleeping, the higher the average gets - and the * higher the priority boost gets as well. */ - sleep_avg = p->sleep_avg + sleep_time; + sleep_avg = p->sleep_avg + ticks; /* * 'Overflow' bonus ticks go to the waker as well, so the @@ -364,8 +383,10 @@ * boosting tasks that are related to maximum-interactive * tasks. */ - if (sleep_avg > MAX_SLEEP_AVG) + if (sleep_avg > MAX_SLEEP_AVG) { sleep_avg = MAX_SLEEP_AVG; + p->sleep_nsecs = 0; + } if (p->sleep_avg != sleep_avg) { p->sleep_avg = sleep_avg; p->prio = effective_prio(p); @@ -571,6 +592,8 @@ current->sleep_avg = current->sleep_avg * PARENT_PENALTY / 100; p->sleep_avg = p->sleep_avg * CHILD_PENALTY / 100; p->prio = effective_prio(p); + p->run_nsecs = 0; + p->sleep_nsecs = 0; set_task_cpu(p, smp_processor_id()); if (unlikely(!current->array)) @@ -1170,6 +1193,49 @@ (jiffies - (rq)->expired_timestamp >= \ STARVATION_LIMIT * ((rq)->nr_running) + 1))) +inline void __scheduler_tick(runqueue_t *rq, task_t *p) +{ + unsigned long long now = monotonic_clock(); + prio_array_t *array = rq->active; + int ticks; + + p->run_nsecs += now - p->last_run; + /* Task might have expired already, but not scheduled off yet */ + if (p->array != array) { + set_tsk_need_resched(p); + goto abort; + } + if (p->run_nsecs < SCHED_TICK || p->policy == SCHED_FIFO ) + goto abort; + + for (ticks = 0; p->run_nsecs >= SCHED_TICK; ticks++) + p->run_nsecs -= SCHED_TICK; + if (ticks > p->time_slice) + show_task(p); + if (p->sleep_avg > ticks) + p->sleep_avg -= ticks; + else + p->sleep_avg = 0; + p->time_slice -= ticks; + + if (p->time_slice <= 0) { + dequeue_task(p, p->array); + p->prio = effective_prio(p); + p->time_slice = task_timeslice(p); + p->first_time_slice = 0; + set_tsk_need_resched(p); + if ((EXPIRED_STARVING(rq) && !rt_task(p)) || + !TASK_INTERACTIVE(p)) { + array = rq->expired; + if (!rq->expired_timestamp) + rq->expired_timestamp = jiffies; + } + enqueue_task(p, array); + } +abort: + p->last_run = monotonic_clock(); +} + /* * This function gets called by the timer code, with HZ frequency. * We call it with interrupts disabled. @@ -1182,11 +1248,12 @@ int cpu = smp_processor_id(); runqueue_t *rq = this_rq(); task_t *p = current; + int idle = p == rq->idle; if (rcu_pending(cpu)) rcu_check_callbacks(cpu, user_ticks); - if (p == rq->idle) { + if (idle) { /* note: this timer irq context must be accounted for as well */ if (irq_count() - HARDIRQ_OFFSET >= SOFTIRQ_OFFSET) kstat_cpu(cpu).cpustat.system += sys_ticks; @@ -1194,8 +1261,7 @@ kstat_cpu(cpu).cpustat.iowait += sys_ticks; else kstat_cpu(cpu).cpustat.idle += sys_ticks; - rebalance_tick(rq, 1); - return; + goto out; } if (TASK_NICE(p) > 0) kstat_cpu(cpu).cpustat.nice += user_ticks; @@ -1203,11 +1269,6 @@ kstat_cpu(cpu).cpustat.user += user_ticks; kstat_cpu(cpu).cpustat.system += sys_ticks; - /* Task might have expired already, but not scheduled off yet */ - if (p->array != rq->active) { - set_tsk_need_resched(p); - return; - } spin_lock(&rq->lock); /* * The task was running during this tick - update the @@ -1217,41 +1278,10 @@ * it possible for interactive tasks to use up their * timeslices at their highest priority levels. */ - if (p->sleep_avg) - p->sleep_avg--; - if (unlikely(rt_task(p))) { - /* - * RR tasks need a special form of timeslice management. - * FIFO tasks have no timeslices. - */ - if ((p->policy == SCHED_RR) && !--p->time_slice) { - p->time_slice = task_timeslice(p); - p->first_time_slice = 0; - set_tsk_need_resched(p); - - /* put it at the end of the queue: */ - dequeue_task(p, rq->active); - enqueue_task(p, rq->active); - } - goto out; - } - if (!--p->time_slice) { - dequeue_task(p, rq->active); - set_tsk_need_resched(p); - p->prio = effective_prio(p); - p->time_slice = task_timeslice(p); - p->first_time_slice = 0; - - if (!TASK_INTERACTIVE(p) || EXPIRED_STARVING(rq)) { - if (!rq->expired_timestamp) - rq->expired_timestamp = jiffies; - enqueue_task(p, rq->expired); - } else - enqueue_task(p, rq->active); - } -out: + __scheduler_tick(rq, p); spin_unlock(&rq->lock); - rebalance_tick(rq, 0); +out: + rebalance_tick(rq, idle); } void scheduling_functions_start_here(void) { } @@ -1286,8 +1316,8 @@ rq = this_rq(); release_kernel_lock(prev); - prev->last_run = jiffies; spin_lock_irq(&rq->lock); + __scheduler_tick(rq, prev); /* * if entering off of a kernel preemption go straight @@ -1342,6 +1372,7 @@ if (likely(prev != next)) { rq->nr_switches++; rq->curr = next; + next->last_run = prev->last_run; prepare_arch_switch(rq, next); prev = context_switch(rq, prev, next); --- linux-2.5.69.virgin/arch/i386/kernel/timers/timer_tsc.c.org Sat May 24 08:55:05 2003 +++ linux-2.5.69.virgin/arch/i386/kernel/timers/timer_tsc.c Sat May 24 09:26:09 2003 @@ -102,12 +102,13 @@ static unsigned long long monotonic_clock_tsc(void) { unsigned long long last_offset, this_offset, base; + unsigned long flags; /* atomically read monotonic base & last_offset */ - read_lock_irq(&monotonic_lock); + read_lock_irqsave(&monotonic_lock, flags); last_offset = ((unsigned long long)last_tsc_high<<32)|last_tsc_low; base = monotonic_base; - read_unlock_irq(&monotonic_lock); + read_unlock_irqrestore(&monotonic_lock, flags); /* Read the Time Stamp Counter */ rdtscll(this_offset); --- linux-2.5.69.virgin/kernel/printk.c.org Sun May 25 09:10:38 2003 +++ linux-2.5.69.virgin/kernel/printk.c Sun May 25 09:11:04 2003 @@ -510,8 +510,10 @@ console_may_schedule = 0; up(&console_sem); spin_unlock_irqrestore(&logbuf_lock, flags); +#if 0 if (wake_klogd && !oops_in_progress && waitqueue_active(&log_wait)) wake_up_interruptible(&log_wait); +#endif } /** console_conditional_schedule - yield the CPU if required