SCHED_RR is broken + patch

From: Christian Ehrhardt (ehrhardt@mathematik.uni-ulm.de)
Date: Tue Apr 25 2000 - 07:15:48 EST


Hi,

[[ I sent a similar mail to Alan Cox some time ago but I didn't get a
   response and the fix isn't included in 2.2.16pre1 either. ]]

I think I found a Bug in the linux scheduler: A running SCHED_RR
process is not preempted when its time slice is exhausted but the process
is still running. The following program demonstrates the problem:
In theory the child should be preempted when the time slice is exhausted
and the parent should be allowed to run. Unfortunately this doesn't work.

#include <stdio.h>
#include <sched.h>
#include <signal.h>
#include <sys/types.h>
#include <unistd.h>
#include <errno.h>

int main () {
        int pid, count=0;
        struct sched_param p;
        struct timespec t;

        p.sched_priority = 1;
        if ((pid = fork ())) {
                if (sched_setscheduler (0, SCHED_RR, &p) < 0) {
                        perror ("setsched parent: ");
                }
                if (sched_setscheduler (pid, SCHED_RR, &p) < 0) {
                        perror ("setsched child: ");
                }
                if (sched_rr_get_interval (0, &t) <0) {
                        perror ("get_interval parent");
                }
                printf ("Parent slice: %ld %ld\n", t.tv_sec, t.tv_nsec);
                if (sched_rr_get_interval (pid, &t) <0) {
                        perror ("get_interval child");
                }
                printf ("Child slice: %ld %ld\n", t.tv_sec, t.tv_nsec);
                count = 0;
                sched_yield ();
                for (count = 0; count < 10; count ++) {
                        if (sched_yield () < 0) {
                                perror ("yield: ");
                        }
                }
                printf ("Killing\n");
                kill (pid, SIGKILL);
        } else { /* child */
                printf ("Child start:\n");
                sched_yield ();
                while (1) { ; }
        }
        return 0;
}

The reason for this behaviour is the advantage of the currently running
process over other processes with the same goodness. This make an already
running SCHED_RR process more desirable than any other SCHED_RR with the
same priority. Here's a patch against 2.2.15pre17 that cured the problem
for me. The patch also includes a fix that makes sched_rr_get_interval
return the correct values.

--- kernel/sched.c.orig Sun Apr 2 19:44:28 2000
+++ kernel/sched.c Sun Apr 2 20:28:15 2000
@@ -686,7 +686,7 @@
 asmlinkage void schedule(void)
 {
         struct schedule_data * sched_data;
- struct task_struct *prev, *next, *p;
+ struct task_struct *prev, *next, *p, *prev2;
         int this_cpu, c;
 
         if (tq_scheduler)
@@ -718,7 +718,9 @@
         if (prev->policy == SCHED_RR)
                 goto move_rr_last;
 move_rr_back:
-
+ prev2 = NULL;
+ if (prev->policy != SCHED_OTHER)
+ prev2 = prev;
         switch (prev->state) {
                 case TASK_INTERRUPTIBLE:
                         if (signal_pending(prev)) {
@@ -741,7 +743,7 @@
         /* Default process to select.. */
         next = idle_task(this_cpu);
         c = -1000;
- if (prev->state == TASK_RUNNING)
+ if ((prev->state == TASK_RUNNING) && !prev2)
                 goto still_running;
 still_running_back:
 
@@ -761,7 +763,7 @@
  * list, so our list starting at "p" is essentially fixed.
  */
         while (p != &init_task) {
- if (can_schedule(p)) {
+ if (can_schedule(p) || (p == prev2)) {
                         int weight = goodness(prev, p, this_cpu);
                         if (weight > c)
                                 c = weight, next = p;
@@ -1892,9 +1894,22 @@
 asmlinkage int sys_sched_rr_get_interval(pid_t pid, struct timespec *interval)
 {
         struct timespec t;
+ struct task_struct * p;
+ int val;
 
+ val = current->priority;
+ if (pid && (pid != current->pid)) {
+ read_lock (&tasklist_lock);
+ p = find_task_by_pid (pid);
+ if (p)
+ val = p->priority;
+ read_unlock (&tasklist_lock);
+ if (!p)
+ return -ESRCH;
+ }
+
         t.tv_sec = 0;
- t.tv_nsec = 150000;
+ t.tv_nsec = (val+1) * 1000000000 / HZ;
         if (copy_to_user(interval, &t, sizeof(struct timespec)))
                 return -EFAULT;
         return 0;

    greetings Christian

-- 
****************************************************************************
** Christian Ehrhardt  **  e-Mail: ehrhardt@mathematik.uni-ulm.de  *********
****************************************************************************
THAT'S ALL FOLKS!

- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majordomo@vger.rutgers.edu Please read the FAQ at http://www.tux.org/lkml/



This archive was generated by hypermail 2b29 : Sun Apr 30 2000 - 21:00:09 EST