--- linux-2.5.69.virgin/include/linux/sched.h.org Fri May 23 07:14:23 2003 +++ linux-2.5.69.virgin/include/linux/sched.h Sat May 24 08:03:40 2003 @@ -328,7 +328,8 @@ prio_array_t *array; unsigned long sleep_avg; - unsigned long last_run; + unsigned long long last_run; + unsigned int used_nsecs; unsigned long policy; unsigned long cpus_allowed; --- linux-2.5.69.virgin/kernel/sched.c.org Sun May 25 06:05:42 2003 +++ linux-2.5.69.virgin/kernel/sched.c Sun May 25 10:04:14 2003 @@ -74,6 +74,10 @@ #define MAX_SLEEP_AVG (10*HZ) #define STARVATION_LIMIT (10*HZ) #define NODE_THRESHOLD 125 +#define SCHED_NANOSECOND 1 +#define SCHED_MILLISECOND (1000000 * SCHED_NANOSECOND) + +extern unsigned long long monotonic_clock(void); /* * If a task is 'interactive' then we reinsert it in the active @@ -342,7 +346,8 @@ */ static inline int activate_task(task_t *p, runqueue_t *rq) { - long sleep_time = jiffies - p->last_run - 1; + unsigned long long now = monotonic_clock(); + long sleep_time = (long)(now - p->last_run) / SCHED_MILLISECOND; int requeue_waker = 0; if (sleep_time > 0) { @@ -514,7 +519,9 @@ __activate_task(p, rq); else { requeue_waker = activate_task(p, rq); - if (p->prio < rq->curr->prio) + if (p->prio < rq->curr->prio || + (0 && p->prio == rq->curr->prio && + p->time_slice > rq->curr->time_slice)) resched_task(rq->curr); } success = 1; @@ -1182,11 +1189,12 @@ int cpu = smp_processor_id(); runqueue_t *rq = this_rq(); task_t *p = current; + int idle = p == rq->idle, used_msecs; if (rcu_pending(cpu)) rcu_check_callbacks(cpu, user_ticks); - if (p == rq->idle) { + if (idle) { /* note: this timer irq context must be accounted for as well */ if (irq_count() - HARDIRQ_OFFSET >= SOFTIRQ_OFFSET) kstat_cpu(cpu).cpustat.system += sys_ticks; @@ -1194,8 +1202,7 @@ kstat_cpu(cpu).cpustat.iowait += sys_ticks; else kstat_cpu(cpu).cpustat.idle += sys_ticks; - rebalance_tick(rq, 1); - return; + goto out; } if (TASK_NICE(p) > 0) kstat_cpu(cpu).cpustat.nice += user_ticks; @@ -1203,12 +1210,12 @@ kstat_cpu(cpu).cpustat.user += user_ticks; kstat_cpu(cpu).cpustat.system += sys_ticks; + spin_lock(&rq->lock); /* Task might have expired already, but not scheduled off yet */ if (p->array != rq->active) { set_tsk_need_resched(p); - return; + goto out_unlock; } - spin_lock(&rq->lock); /* * The task was running during this tick - update the * time slice counter and the sleep average. Note: we @@ -1217,8 +1224,15 @@ * it possible for interactive tasks to use up their * timeslices at their highest priority levels. */ - if (p->sleep_avg) - p->sleep_avg--; + if ((used_msecs = p->used_nsecs / SCHED_MILLISECOND)) { + p->used_nsecs -= used_msecs * SCHED_MILLISECOND; + if (p->sleep_avg >= used_msecs) + p->sleep_avg -= used_msecs; + else p->sleep_avg = 0; + if (p->policy != SCHED_FIFO && used_msecs > p->time_slice) + printk(KERN_DEBUG "ROADRUNNER! %d:%d stole %d msecs.\n", + p->pid, p->prio-100, used_msecs - p->time_slice); + } if (unlikely(rt_task(p))) { /* * RR tasks need a special form of timeslice management. @@ -1233,7 +1247,7 @@ dequeue_task(p, rq->active); enqueue_task(p, rq->active); } - goto out; + goto out_unlock; } if (!--p->time_slice) { dequeue_task(p, rq->active); @@ -1249,9 +1263,10 @@ } else enqueue_task(p, rq->active); } -out: +out_unlock: spin_unlock(&rq->lock); - rebalance_tick(rq, 0); +out: + rebalance_tick(rq, idle); } void scheduling_functions_start_here(void) { } @@ -1264,8 +1279,9 @@ task_t *prev, *next; runqueue_t *rq; prio_array_t *array; - struct list_head *queue; + struct list_head *head, *curr; int idx; + static int last_pid, last_msused; /* * Test if we are atomic. Since do_exit() needs to call into @@ -1286,7 +1302,6 @@ rq = this_rq(); release_kernel_lock(prev); - prev->last_run = jiffies; spin_lock_irq(&rq->lock); /* @@ -1331,8 +1346,32 @@ } idx = sched_find_first_bit(array->bitmap); - queue = array->queue + idx; - next = list_entry(queue->next, task_t, run_list); +next_queue: + head = array->queue + idx; + curr = head->next; + next = list_entry(curr, task_t, run_list); + curr = curr->next; + /* + * If we are about to wrap back to the head of the queue, + * give a lower priority queue a chance to sneak one in. + */ + if (0 && idx == prev->prio && curr == head && array->nr_active > 1) { + int tmp = find_next_bit(array->bitmap, MAX_PRIO, ++idx); + if (tmp < MAX_PRIO) { + idx = tmp; + goto next_queue; + } + } + if (prev != rq->idle && prev->array == rq->active && + prev->used_nsecs / SCHED_MILLISECOND >= 1) { + if (last_pid != prev->pid || + last_msused != prev->used_nsecs / SCHED_MILLISECOND) + printk(KERN_DEBUG "COYOTE! %d:%d not ticked for %d msecs.\n", + prev->pid, prev->prio-100, + prev->used_nsecs / SCHED_MILLISECOND); + last_pid = prev->pid; + last_msused = prev->used_nsecs / SCHED_MILLISECOND; + } switch_tasks: prefetch(next); @@ -1340,8 +1379,11 @@ RCU_qsctr(prev->thread_info->cpu)++; if (likely(prev != next)) { + unsigned long long now = monotonic_clock(); rq->nr_switches++; rq->curr = next; + prev->used_nsecs += now - prev->last_run; + prev->last_run = next->last_run = now; prepare_arch_switch(rq, next); prev = context_switch(rq, prev, next); --- linux-2.5.69.virgin/arch/i386/kernel/timers/timer_tsc.c.org Sat May 24 08:55:05 2003 +++ linux-2.5.69.virgin/arch/i386/kernel/timers/timer_tsc.c Sat May 24 09:26:09 2003 @@ -102,12 +102,13 @@ static unsigned long long monotonic_clock_tsc(void) { unsigned long long last_offset, this_offset, base; + unsigned long flags; /* atomically read monotonic base & last_offset */ - read_lock_irq(&monotonic_lock); + read_lock_irqsave(&monotonic_lock, flags); last_offset = ((unsigned long long)last_tsc_high<<32)|last_tsc_low; base = monotonic_base; - read_unlock_irq(&monotonic_lock); + read_unlock_irqrestore(&monotonic_lock, flags); /* Read the Time Stamp Counter */ rdtscll(this_offset); --- linux-2.5.69.virgin/kernel/printk.c.org Sun May 25 09:10:38 2003 +++ linux-2.5.69.virgin/kernel/printk.c Sun May 25 09:11:04 2003 @@ -510,8 +510,10 @@ console_may_schedule = 0; up(&console_sem); spin_unlock_irqrestore(&logbuf_lock, flags); +#if 0 if (wake_klogd && !oops_in_progress && waitqueue_active(&log_wait)) wake_up_interruptible(&log_wait); +#endif } /** console_conditional_schedule - yield the CPU if required