Hi,
[[ I sent a similar mail to Alan Cox some time ago but I didn't get a
response and the fix isn't included in 2.2.16pre1 either. ]]
I think I found a Bug in the linux scheduler: A running SCHED_RR
process is not preempted when its time slice is exhausted but the process
is still running. The following program demonstrates the problem:
In theory the child should be preempted when the time slice is exhausted
and the parent should be allowed to run. Unfortunately this doesn't work.
#include <stdio.h>
#include <sched.h>
#include <signal.h>
#include <sys/types.h>
#include <unistd.h>
#include <errno.h>
int main () {
int pid, count=0;
struct sched_param p;
struct timespec t;
p.sched_priority = 1;
if ((pid = fork ())) {
if (sched_setscheduler (0, SCHED_RR, &p) < 0) {
perror ("setsched parent: ");
}
if (sched_setscheduler (pid, SCHED_RR, &p) < 0) {
perror ("setsched child: ");
}
if (sched_rr_get_interval (0, &t) <0) {
perror ("get_interval parent");
}
printf ("Parent slice: %ld %ld\n", t.tv_sec, t.tv_nsec);
if (sched_rr_get_interval (pid, &t) <0) {
perror ("get_interval child");
}
printf ("Child slice: %ld %ld\n", t.tv_sec, t.tv_nsec);
count = 0;
sched_yield ();
for (count = 0; count < 10; count ++) {
if (sched_yield () < 0) {
perror ("yield: ");
}
}
printf ("Killing\n");
kill (pid, SIGKILL);
} else { /* child */
printf ("Child start:\n");
sched_yield ();
while (1) { ; }
}
return 0;
}
The reason for this behaviour is the advantage of the currently running
process over other processes with the same goodness. This make an already
running SCHED_RR process more desirable than any other SCHED_RR with the
same priority. Here's a patch against 2.2.15pre17 that cured the problem
for me. The patch also includes a fix that makes sched_rr_get_interval
return the correct values.
--- kernel/sched.c.orig Sun Apr 2 19:44:28 2000
+++ kernel/sched.c Sun Apr 2 20:28:15 2000
@@ -686,7 +686,7 @@
asmlinkage void schedule(void)
{
struct schedule_data * sched_data;
- struct task_struct *prev, *next, *p;
+ struct task_struct *prev, *next, *p, *prev2;
int this_cpu, c;
if (tq_scheduler)
@@ -718,7 +718,9 @@
if (prev->policy == SCHED_RR)
goto move_rr_last;
move_rr_back:
-
+ prev2 = NULL;
+ if (prev->policy != SCHED_OTHER)
+ prev2 = prev;
switch (prev->state) {
case TASK_INTERRUPTIBLE:
if (signal_pending(prev)) {
@@ -741,7 +743,7 @@
/* Default process to select.. */
next = idle_task(this_cpu);
c = -1000;
- if (prev->state == TASK_RUNNING)
+ if ((prev->state == TASK_RUNNING) && !prev2)
goto still_running;
still_running_back:
@@ -761,7 +763,7 @@
* list, so our list starting at "p" is essentially fixed.
*/
while (p != &init_task) {
- if (can_schedule(p)) {
+ if (can_schedule(p) || (p == prev2)) {
int weight = goodness(prev, p, this_cpu);
if (weight > c)
c = weight, next = p;
@@ -1892,9 +1894,22 @@
asmlinkage int sys_sched_rr_get_interval(pid_t pid, struct timespec *interval)
{
struct timespec t;
+ struct task_struct * p;
+ int val;
+ val = current->priority;
+ if (pid && (pid != current->pid)) {
+ read_lock (&tasklist_lock);
+ p = find_task_by_pid (pid);
+ if (p)
+ val = p->priority;
+ read_unlock (&tasklist_lock);
+ if (!p)
+ return -ESRCH;
+ }
+
t.tv_sec = 0;
- t.tv_nsec = 150000;
+ t.tv_nsec = (val+1) * 1000000000 / HZ;
if (copy_to_user(interval, &t, sizeof(struct timespec)))
return -EFAULT;
return 0;
greetings Christian
-- **************************************************************************** ** Christian Ehrhardt ** e-Mail: ehrhardt@mathematik.uni-ulm.de ********* **************************************************************************** THAT'S ALL FOLKS!- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majordomo@vger.rutgers.edu Please read the FAQ at http://www.tux.org/lkml/
This archive was generated by hypermail 2b29 : Sun Apr 30 2000 - 21:00:09 EST