[RFC PATCH v0.1 1/9] sched/umcg: add UMCG syscall stubs and CONFIG_UMCG

From: Peter Oskolkov
Date: Thu May 20 2021 - 14:36:27 EST


User Managed Concurrency Groups is a fast context switching and
in-process userspace scheduling framework.

Two main use cases are security sandboxes and userspace scheduling.

Security sandboxes: fast X-process context switching will open up a
bunch of light-weight security tools, e.g. gVisor, or Tor Project's
Shadow simulator, to more use cases.

In-process userspace scheduling is used extensively at Google to provide
latency control and isolation guarantees for diverse workloads while
maintaining high CPU utilization.

Signed-off-by: Peter Oskolkov <posk@xxxxxxxxxx>
---
arch/x86/entry/syscalls/syscall_64.tbl | 11 +++++++++++
include/uapi/asm-generic/unistd.h | 25 ++++++++++++++++++++++++-
init/Kconfig | 10 ++++++++++
kernel/sys_ni.c | 13 +++++++++++++
4 files changed, 58 insertions(+), 1 deletion(-)

diff --git a/arch/x86/entry/syscalls/syscall_64.tbl b/arch/x86/entry/syscalls/syscall_64.tbl
index ecd551b08d05..2e984a77eb23 100644
--- a/arch/x86/entry/syscalls/syscall_64.tbl
+++ b/arch/x86/entry/syscalls/syscall_64.tbl
@@ -368,6 +368,17 @@
444 common landlock_create_ruleset sys_landlock_create_ruleset
445 common landlock_add_rule sys_landlock_add_rule
446 common landlock_restrict_self sys_landlock_restrict_self
+447 common umcg_api_version sys_umcg_api_version
+448 common umcg_register_task sys_umcg_register_task
+449 common umcg_unregister_task sys_umcg_unregister_task
+450 common umcg_wait sys_umcg_wait
+451 common umcg_wake sys_umcg_wake
+452 common umcg_swap sys_umcg_swap
+453 common umcg_create_group sys_umcg_create_group
+454 common umcg_destroy_group sys_umcg_destroy_group
+455 common umcg_poll_worker sys_umcg_poll_worker
+456 common umcg_run_worker sys_umcg_run_worker
+457 common umcg_preempt_worker sys_umcg_preempt_worker

#
# Due to a historical design error, certain syscalls are numbered differently
diff --git a/include/uapi/asm-generic/unistd.h b/include/uapi/asm-generic/unistd.h
index 6de5a7fc066b..cb8504e7ae07 100644
--- a/include/uapi/asm-generic/unistd.h
+++ b/include/uapi/asm-generic/unistd.h
@@ -873,8 +873,31 @@ __SYSCALL(__NR_landlock_add_rule, sys_landlock_add_rule)
#define __NR_landlock_restrict_self 446
__SYSCALL(__NR_landlock_restrict_self, sys_landlock_restrict_self)

+#define __NR_umcg_api_version 447
+__SYSCALL(__NR_umcg_api_version, sys_umcg_api_version)
+#define __NR_umcg_register_task 448
+__SYSCALL(__NR_umcg_register_task, sys_umcg_register_task)
+#define __NR_umcg_unregister_task 449
+__SYSCALL(__NR_umcg_unregister_task, sys_umcg_unregister_task)
+#define __NR_umcg_wait 450
+__SYSCALL(__NR_umcg_wait, sys_umcg_wait)
+#define __NR_umcg_wake 451
+__SYSCALL(__NR_umcg_wake, sys_umcg_wake)
+#define __NR_umcg_swap 452
+__SYSCALL(__NR_umcg_swap, sys_umcg_swap)
+#define __NR_umcg_create_group 453
+__SYSCALL(__NR_umcg_create_group, sys_umcg_create_group)
+#define __NR_umcg_destroy_group 454
+__SYSCALL(__NR_umcg_destroy_group, sys_umcg_destroy_group)
+#define __NR_umcg_poll_worker 455
+__SYSCALL(__NR_umcg_poll_worker, sys_umcg_poll_worker)
+#define __NR_umcg_run_worker 456
+__SYSCALL(__NR_umcg_run_worker, sys_umcg_run_worker)
+#define __NR_umcg_preempt_worker 457
+__SYSCALL(__NR_umcg_preempt_worker, sys_umcg_preempt_worker)
+
#undef __NR_syscalls
-#define __NR_syscalls 447
+#define __NR_syscalls 458

/*
* 32 bit systems traditionally used different
diff --git a/init/Kconfig b/init/Kconfig
index 1ea12c64e4c9..bfac88dd5d73 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -1661,6 +1661,16 @@ config MEMBARRIER

If unsure, say Y.

+config UMCG
+ bool "Enable User Managed Concurrency Groups API"
+ default n
+ help
+ Enable UMCG core wait/wake/swap operations as well as UMCG
+ group/server/worker API. The core API is useful for fast IPC
+ and context switching, while the group/server/worker API, together
+ with the core API, form the basis for an in-process M:N userspace
+ scheduling framework implemented in lib/umcg.
+
config KALLSYMS
bool "Load all symbols for debugging/ksymoops" if EXPERT
default y
diff --git a/kernel/sys_ni.c b/kernel/sys_ni.c
index 0ea8128468c3..fea55aa0222a 100644
--- a/kernel/sys_ni.c
+++ b/kernel/sys_ni.c
@@ -272,6 +272,19 @@ COND_SYSCALL(landlock_create_ruleset);
COND_SYSCALL(landlock_add_rule);
COND_SYSCALL(landlock_restrict_self);

+/* kernel/sched/umcg.c */
+COND_SYSCALL(umcg_api_version);
+COND_SYSCALL(umcg_register_task);
+COND_SYSCALL(umcg_unregister_task);
+COND_SYSCALL(umcg_wait);
+COND_SYSCALL(umcg_wake);
+COND_SYSCALL(umcg_swap);
+COND_SYSCALL(umcg_create_group);
+COND_SYSCALL(umcg_destroy_group);
+COND_SYSCALL(umcg_poll_worker);
+COND_SYSCALL(umcg_run_worker);
+COND_SYSCALL(umcg_preempt_worker);
+
/* arch/example/kernel/sys_example.c */

/* mm/fadvise.c */
--
2.31.1.818.g46aad6cb9e-goog