Re: [RFC][PATCH] SCHED_ISO for interactivity

From: Con Kolivas (kernel@kolivas.org)
Date: Sun Jul 13 2003 - 23:05:35 EST


On Mon, 14 Jul 2003 00:54, Guillaume Chazarain wrote:
> 13/07/03 14:53:12, Con Kolivas <kernel@kolivas.org> wrote:
> >On Sun, 13 Jul 2003 20:41, Guillaume Chazarain wrote:
> Good, with ISO_PENALTY == 2, I can smoothly move big windows (with
> ISO_PENALTY == 5 it was smooth only with very small windows), but it lets
> me move them smoothly during less time than stock :(

I think I know what you mean now. Expiring X hurts. With a penalty of only 2
it should be unecessary to expire iso tasks. Addressed below.

> >The logical conclusion of this idea where there is a dynamic policy
> > assigned to interactive tasks is a dynamic policy assigned to non
> > interactive tasks that get treated in the opposite way. I'll code
> > something for that soon, now that I've had more feedback on the first
> > part.
>
> Interesting, let's see :)
> But as the interactive bonus can already be negative I wonder what use
> will have another variable.

The added feature of expiring them every time they use up their timeslice
should help.

An updated patch-SI-0307141335 against 2.5.75-mm1 incorporating these
changes and more tweaks is here:
http://kernel.kolivas.org/2.5/

and here:
patch-SI-0307141335
--------------------------------
diff -Naurp linux-2.5.75-mm1/include/linux/sched.h linux-2.5.75-test/include/linux/sched.h
--- linux-2.5.75-mm1/include/linux/sched.h 2003-07-13 00:21:30.000000000 +1000
+++ linux-2.5.75-test/include/linux/sched.h 2003-07-14 13:50:01.000000000 +1000
@@ -125,6 +125,8 @@ extern unsigned long nr_iowait(void);
 #define SCHED_NORMAL 0
 #define SCHED_FIFO 1
 #define SCHED_RR 2
+#define SCHED_BATCH 3
+#define SCHED_ISO 4
 
 struct sched_param {
         int sched_priority;
diff -Naurp linux-2.5.75-mm1/kernel/exit.c linux-2.5.75-test/kernel/exit.c
--- linux-2.5.75-mm1/kernel/exit.c 2003-07-13 00:21:30.000000000 +1000
+++ linux-2.5.75-test/kernel/exit.c 2003-07-14 13:33:42.000000000 +1000
@@ -223,7 +223,7 @@ void reparent_to_init(void)
         /* Set the exit signal to SIGCHLD so we signal init on exit */
         current->exit_signal = SIGCHLD;
 
- if ((current->policy == SCHED_NORMAL) && (task_nice(current) < 0))
+ if ((current->policy == SCHED_NORMAL || current->policy == SCHED_ISO || current->policy == SCHED_BATCH) && (task_nice(current) < 0))
                 set_user_nice(current, 0);
         /* cpus_allowed? */
         /* rt_priority? */
diff -Naurp linux-2.5.75-mm1/kernel/sched.c linux-2.5.75-test/kernel/sched.c
--- linux-2.5.75-mm1/kernel/sched.c 2003-07-13 00:21:30.000000000 +1000
+++ linux-2.5.75-test/kernel/sched.c 2003-07-14 13:41:55.000000000 +1000
@@ -74,12 +74,12 @@
 #define PRIO_BONUS_RATIO 25
 #define INTERACTIVE_DELTA 2
 #define MIN_SLEEP_AVG (HZ)
-#define MAX_SLEEP_AVG (10*HZ)
-#define STARVATION_LIMIT (10*HZ)
-#define SLEEP_BUFFER (HZ/20)
+#define MAX_SLEEP_AVG (5*HZ)
+#define STARVATION_LIMIT (5*HZ)
+#define ISO_PENALTY (2)
 #define NODE_THRESHOLD 125
 #define MAX_BONUS ((MAX_USER_PRIO - MAX_RT_PRIO) * PRIO_BONUS_RATIO / 100)
-
+#define JUST_INTERACTIVE (MAX_BONUS - INTERACTIVE_DELTA) / MAX_BONUS
 /*
  * If a task is 'interactive' then we reinsert it in the active
  * array after it has expired its current timeslice. (it will not
@@ -118,6 +118,10 @@
 #define TASK_INTERACTIVE(p) \
         ((p)->prio <= (p)->static_prio - DELTA(p))
 
+#define normal_task(p) ((p)->policy == SCHED_NORMAL)
+#define iso_task(p) ((p)->policy == SCHED_ISO)
+#define batch_task(p) ((p)->policy == SCHED_BATCH)
+
 /*
  * BASE_TIMESLICE scales user-nice values [ -20 ... 19 ]
  * to time slice values.
@@ -134,7 +138,16 @@
 
 static inline unsigned int task_timeslice(task_t *p)
 {
- return BASE_TIMESLICE(p);
+ if (!iso_task(p))
+ return (BASE_TIMESLICE(p));
+ else {
+ int timeslice = BASE_TIMESLICE(p) / ISO_PENALTY;
+
+ if (timeslice < MIN_TIMESLICE)
+ timeslice = MIN_TIMESLICE;
+
+ return timeslice;
+ }
 }
 
 /*
@@ -319,6 +332,14 @@ static inline void normalise_sleep(task_
 
         p->sleep_avg = p->sleep_avg * MIN_SLEEP_AVG / old_avg_time;
         p->avg_start = jiffies - MIN_SLEEP_AVG;
+
+ /*
+ * New children and their parents are not allowed to
+ * be SCHED_ISO or SCHED_BATCH.
+ */
+ if (iso_task(p) || batch_task(p))
+ p->policy = SCHED_NORMAL;
+
 }
 
 /*
@@ -343,26 +364,38 @@ static int effective_prio(task_t *p)
         if (rt_task(p))
                 return p->prio;
 
- sleep_period = jiffies - p->avg_start;
+ /*
+ * SCHED_BATCH tasks end up getting the maximum penalty
+ */
+ bonus = - MAX_USER_PRIO*PRIO_BONUS_RATIO/100/2;
 
- if (unlikely(!sleep_period))
- return p->static_prio;
+ if (normal_task(p)){
+ sleep_period = jiffies - p->avg_start;
 
- if (sleep_period > MAX_SLEEP_AVG)
- sleep_period = MAX_SLEEP_AVG;
+ if (unlikely(!sleep_period))
+ return p->static_prio;
 
- if (p->sleep_avg > sleep_period)
- sleep_period = p->sleep_avg;
+ if (sleep_period > MAX_SLEEP_AVG)
+ sleep_period = MAX_SLEEP_AVG;
 
- /*
- * The bonus is determined according to the accumulated
- * sleep avg over the duration the task has been running
- * until it reaches MAX_SLEEP_AVG. -ck
- */
- bonus = MAX_USER_PRIO*PRIO_BONUS_RATIO*p->sleep_avg/sleep_period/100 -
- MAX_USER_PRIO*PRIO_BONUS_RATIO/100/2;
+ if (p->sleep_avg > sleep_period)
+ sleep_period = p->sleep_avg;
+
+ /*
+ * The bonus is determined according to the accumulated
+ * sleep avg over the duration the task has been running
+ * until it reaches MAX_SLEEP_AVG. -ck
+ */
+ bonus += MAX_USER_PRIO*PRIO_BONUS_RATIO*p->sleep_avg/sleep_period/100;
+
+ } else if (iso_task(p))
+ /*
+ * SCHED_ISO tasks get the maximum possible bonus
+ */
+ bonus += MAX_USER_PRIO*PRIO_BONUS_RATIO/100;
 
         prio = p->static_prio - bonus;
+
         if (prio < MAX_RT_PRIO)
                 prio = MAX_RT_PRIO;
         if (prio > MAX_PRIO-1)
@@ -398,6 +431,11 @@ static inline void activate_task(task_t
                  * to allow them to become interactive or non-interactive rapidly
                  */
                 if (sleep_time > MIN_SLEEP_AVG){
+ /*
+ * Idle tasks can not be SCHED_ISO or SCHED_BATCH
+ */
+ if (iso_task(p) || batch_task(p))
+ p->policy = SCHED_NORMAL;
                         p->avg_start = jiffies - MIN_SLEEP_AVG;
                         p->sleep_avg = MIN_SLEEP_AVG * (MAX_BONUS - INTERACTIVE_DELTA - 1) /
                                 MAX_BONUS;
@@ -417,25 +455,45 @@ static inline void activate_task(task_t
                          * the problem of the denominator in the bonus equation
                          * from continually getting larger.
                          */
- if ((runtime - MIN_SLEEP_AVG) < MAX_SLEEP_AVG)
- p->sleep_avg += (runtime - p->sleep_avg) *
- (MAX_SLEEP_AVG + MIN_SLEEP_AVG - runtime) *
- (MAX_BONUS - INTERACTIVE_DELTA) / MAX_BONUS / MAX_SLEEP_AVG;
+
+ if ((runtime - MIN_SLEEP_AVG < MAX_SLEEP_AVG) && (runtime * JUST_INTERACTIVE > p->sleep_avg))
+ p->sleep_avg += (runtime * JUST_INTERACTIVE - p->sleep_avg) *
+ (MAX_SLEEP_AVG + MIN_SLEEP_AVG - runtime) / MAX_SLEEP_AVG;
+
+ if (p->sleep_avg > MAX_SLEEP_AVG){
+ /*
+ * Tasks that have slept more than MAX_SLEEP_AVG
+ * become SCHED_ISO tasks.
+ */
+ if (normal_task(p))
+ p->policy = SCHED_ISO;
+ else if (unlikely(batch_task(p)))
+ p->policy = SCHED_NORMAL;
+
+ p->sleep_avg = MAX_SLEEP_AVG;
+ }
 
                         /*
- * Keep a small buffer of SLEEP_BUFFER sleep_avg to
- * prevent fully interactive tasks from becoming
- * lower priority with small bursts of cpu usage.
+ * Just in case a SCHED_ISO task has become a complete
+ * cpu hog revert it to SCHED_NORMAL
                          */
- if (p->sleep_avg > (MAX_SLEEP_AVG + SLEEP_BUFFER))
- p->sleep_avg = MAX_SLEEP_AVG + SLEEP_BUFFER;
+ if (unlikely(!p->sleep_avg && iso_task(p))){
+ p->policy = SCHED_NORMAL;
+ p->avg_start = jiffies;
+ }
                 }
 
                 if (unlikely(p->avg_start > jiffies)){
                         p->avg_start = jiffies;
                         p->sleep_avg = 0;
                 }
- }
+ /*
+ * SCHED_NORMAL tasks that have used up all their sleep avg
+ * get demoted to SCHED_BATCH
+ */
+ } else if (!p->sleep_avg && normal_task(p))
+ p->policy = SCHED_BATCH;
+
         p->prio = effective_prio(p);
         __activate_task(p, rq);
 }
@@ -1309,13 +1367,20 @@ void scheduler_tick(int user_ticks, int
                 p->time_slice = task_timeslice(p);
                 p->first_time_slice = 0;
 
- if (!TASK_INTERACTIVE(p) || EXPIRED_STARVING(rq)) {
+ /*
+ * SCHED_BATCH tasks always get expired if they use up their
+ * timeslice.
+ * If SCHED_ISO tasks are using too much cpu time they
+ * enter the expired array.
+ */
+ if (!TASK_INTERACTIVE(p) || EXPIRED_STARVING(rq) || batch_task(p)) {
                         if (!rq->expired_timestamp)
                                 rq->expired_timestamp = jiffies;
                         enqueue_task(p, rq->expired);
                 } else
                         enqueue_task(p, rq->active);
         }
+
 out_unlock:
         spin_unlock(&rq->lock);
 out:
@@ -1818,8 +1883,8 @@ static int setscheduler(pid_t pid, int p
                 policy = p->policy;
         else {
                 retval = -EINVAL;
- if (policy != SCHED_FIFO && policy != SCHED_RR &&
- policy != SCHED_NORMAL)
+ if (policy != SCHED_FIFO && policy != SCHED_RR && policy != SCHED_BATCH &&
+ policy != SCHED_NORMAL && policy != SCHED_ISO)
                         goto out_unlock;
         }
 
@@ -1830,7 +1895,7 @@ static int setscheduler(pid_t pid, int p
         retval = -EINVAL;
         if (lp.sched_priority < 0 || lp.sched_priority > MAX_USER_RT_PRIO-1)
                 goto out_unlock;
- if ((policy == SCHED_NORMAL) != (lp.sched_priority == 0))
+ if ((policy == SCHED_NORMAL || policy == SCHED_ISO || policy == SCHED_BATCH) != (lp.sched_priority == 0))
                 goto out_unlock;
 
         retval = -EPERM;
@@ -1852,7 +1917,7 @@ static int setscheduler(pid_t pid, int p
         p->policy = policy;
         p->rt_priority = lp.sched_priority;
         oldprio = p->prio;
- if (policy != SCHED_NORMAL)
+ if (policy == SCHED_FIFO || policy == SCHED_RR)
                 p->prio = MAX_USER_RT_PRIO-1 - p->rt_priority;
         else
                 p->prio = p->static_prio;
@@ -2151,6 +2216,8 @@ asmlinkage long sys_sched_get_priority_m
                 ret = MAX_USER_RT_PRIO-1;
                 break;
         case SCHED_NORMAL:
+ case SCHED_ISO:
+ case SCHED_BATCH:
                 ret = 0;
                 break;
         }
@@ -2174,6 +2241,8 @@ asmlinkage long sys_sched_get_priority_m
                 ret = 1;
                 break;
         case SCHED_NORMAL:
+ case SCHED_ISO:
+ case SCHED_BATCH:
                 ret = 0;
         }
         return ret;

-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/



This archive was generated by hypermail 2b29 : Tue Jul 15 2003 - 22:00:50 EST