[RFC v5 1/6] sched/core: Add manual background task classification using sched_setattr syscall

From: Parth Shah
Date: Mon Oct 07 2019 - 04:31:09 EST


Small background tasks typically performs some housekeeping work and are
less important in the overall scheme of load balancing and scheduling.

So provide a way to mark the task which are small background noises with
the use of additional flag to the existing task attribute. Also provide an
interface from the userspace which uses sched_setattr syscall to mark such
tasks.

The scheduler may use this as hints to pack such tasks on fewer number of
cores.

Signed-off-by: Parth Shah <parth@xxxxxxxxxxxxx>
---
include/linux/sched.h | 1 +
include/uapi/linux/sched.h | 4 +++-
kernel/sched/core.c | 9 +++++++++
3 files changed, 13 insertions(+), 1 deletion(-)

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 1113dd4706ae..e03b85166e34 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1463,6 +1463,7 @@ extern struct pid *cad_pid;
#define PF_NO_SETAFFINITY 0x04000000 /* Userland is not allowed to meddle with cpus_mask */
#define PF_MCE_EARLY 0x08000000 /* Early kill for mce process policy */
#define PF_MEMALLOC_NOCMA 0x10000000 /* All allocation request will have _GFP_MOVABLE cleared */
+#define PF_CAN_BE_PACKED 0x20000000 /* Provide hints to the scheduler to pack such tasks */
#define PF_FREEZER_SKIP 0x40000000 /* Freezer should not count it as freezable */
#define PF_SUSPEND_TASK 0x80000000 /* This thread called freeze_processes() and should not be frozen */

diff --git a/include/uapi/linux/sched.h b/include/uapi/linux/sched.h
index 617bb59aa8ba..fccb1c57d037 100644
--- a/include/uapi/linux/sched.h
+++ b/include/uapi/linux/sched.h
@@ -55,6 +55,7 @@
#define SCHED_FLAG_KEEP_PARAMS 0x10
#define SCHED_FLAG_UTIL_CLAMP_MIN 0x20
#define SCHED_FLAG_UTIL_CLAMP_MAX 0x40
+#define SCHED_FLAG_TASK_PACKING 0x80

#define SCHED_FLAG_KEEP_ALL (SCHED_FLAG_KEEP_POLICY | \
SCHED_FLAG_KEEP_PARAMS)
@@ -66,6 +67,7 @@
SCHED_FLAG_RECLAIM | \
SCHED_FLAG_DL_OVERRUN | \
SCHED_FLAG_KEEP_ALL | \
- SCHED_FLAG_UTIL_CLAMP)
+ SCHED_FLAG_UTIL_CLAMP | \
+ SCHED_FLAG_TASK_PACKING)

#endif /* _UAPI_LINUX_SCHED_H */
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index fa43ce3962e7..e7cda4aa8696 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -4498,6 +4498,8 @@ static void __setscheduler_params(struct task_struct *p,
p->rt_priority = attr->sched_priority;
p->normal_prio = normal_prio(p);
set_load_weight(p, true);
+ if (attr->sched_flags & SCHED_FLAG_TASK_PACKING)
+ p->flags |= PF_CAN_BE_PACKED;
}

/* Actually do priority change: must hold pi & rq lock. */
@@ -4557,6 +4559,8 @@ static int __sched_setscheduler(struct task_struct *p,
struct rq_flags rf;
int reset_on_fork;
int queue_flags = DEQUEUE_SAVE | DEQUEUE_MOVE | DEQUEUE_NOCLOCK;
+ unsigned long long task_packing_flag =
+ attr->sched_flags & SCHED_FLAG_TASK_PACKING;
struct rq *rq;

/* The pi code expects interrupts enabled */
@@ -4686,6 +4690,8 @@ static int __sched_setscheduler(struct task_struct *p,
goto change;
if (attr->sched_flags & SCHED_FLAG_UTIL_CLAMP)
goto change;
+ if (task_packing_flag)
+ goto change;

p->sched_reset_on_fork = reset_on_fork;
task_rq_unlock(rq, p, &rf);
@@ -5181,6 +5187,9 @@ SYSCALL_DEFINE4(sched_getattr, pid_t, pid, struct sched_attr __user *, uattr,
attr.sched_util_max = p->uclamp_req[UCLAMP_MAX].value;
#endif

+ if (p->flags & PF_CAN_BE_PACKED)
+ attr.sched_flags |= SCHED_FLAG_TASK_PACKING;
+
rcu_read_unlock();

retval = sched_read_attr(uattr, &attr, size);
--
2.17.1