[PATCH v2 08/11] mm/mempolicy: add set_mempolicy2 syscall

From: Gregory Price
Date: Sat Dec 09 2023 - 02:00:52 EST


set_mempolicy2 is an extensible set_mempolicy interface which allows
a user to set the per-task memory policy.

Defined as:

set_mempolicy2(struct mpol_args *args, size_t size, unsigned long flags);

relevant mpol_args fields include the following:

mode: The MPOL_* policy (DEFAULT, INTERLEAVE, etc.)
mode_flags: The MPOL_F_* flags that were previously passed in or'd
into the mode. This was split to hopefully allow future
extensions additional mode/flag space.
pol_nodes: the nodemask to apply for the memory policy
pol_maxnodes: The max number of nodes described by pol_nodes

The usize arg is intended for the user to pass in sizeof(mpol_args)
to allow forward/backward compatibility whenever possible.

The flags argument is intended to future proof the syscall against
future extensions which may require interpreting the arguments in
the structure differently.

Semantics of `set_mempolicy` are otherwise the same as `set_mempolicy`
as of this patch.

Suggested-by: Michal Hocko <mhocko@xxxxxxxx>
Signed-off-by: Gregory Price <gregory.price@xxxxxxxxxxxx>
---
.../admin-guide/mm/numa_memory_policy.rst | 10 ++++++
arch/alpha/kernel/syscalls/syscall.tbl | 1 +
arch/arm/tools/syscall.tbl | 1 +
arch/m68k/kernel/syscalls/syscall.tbl | 1 +
arch/microblaze/kernel/syscalls/syscall.tbl | 1 +
arch/mips/kernel/syscalls/syscall_n32.tbl | 1 +
arch/mips/kernel/syscalls/syscall_o32.tbl | 1 +
arch/parisc/kernel/syscalls/syscall.tbl | 1 +
arch/powerpc/kernel/syscalls/syscall.tbl | 1 +
arch/s390/kernel/syscalls/syscall.tbl | 1 +
arch/sh/kernel/syscalls/syscall.tbl | 1 +
arch/sparc/kernel/syscalls/syscall.tbl | 1 +
arch/x86/entry/syscalls/syscall_32.tbl | 1 +
arch/x86/entry/syscalls/syscall_64.tbl | 1 +
arch/xtensa/kernel/syscalls/syscall.tbl | 1 +
include/linux/syscalls.h | 2 ++
include/uapi/asm-generic/unistd.h | 4 ++-
mm/mempolicy.c | 36 +++++++++++++++++++
18 files changed, 65 insertions(+), 1 deletion(-)

diff --git a/Documentation/admin-guide/mm/numa_memory_policy.rst b/Documentation/admin-guide/mm/numa_memory_policy.rst
index 64c5804dc40f..aabc24db92d3 100644
--- a/Documentation/admin-guide/mm/numa_memory_policy.rst
+++ b/Documentation/admin-guide/mm/numa_memory_policy.rst
@@ -432,6 +432,8 @@ Set [Task] Memory Policy::

long set_mempolicy(int mode, const unsigned long *nmask,
unsigned long maxnode);
+ long set_mempolicy2(struct mpol_args args, size_t size,
+ unsigned long flags);

Set's the calling task's "task/process memory policy" to mode
specified by the 'mode' argument and the set of nodes defined by
@@ -440,6 +442,12 @@ specified by the 'mode' argument and the set of nodes defined by
'mode' argument with the flag (for example: MPOL_INTERLEAVE |
MPOL_F_STATIC_NODES).

+set_mempolicy2() is an extended version of set_mempolicy() capable
+of setting a mempolicy which requires more information than can be
+passed via get_mempolicy(). For example, weighted interleave with
+task-local weights requires a weight array to be passed via the
+'mpol_args->il_weights' argument in the 'struct mpol_args' arg.
+
See the set_mempolicy(2) man page for more details


@@ -498,6 +506,8 @@ Extended Mempolicy Arguments::
The extended mempolicy argument structure is defined to allow the mempolicy
interfaces future extensibility without the need for additional system calls.

+Extended interfaces (set_mempolicy2) use this argument structure.
+
The core arguments (mode, mode_flags, pol_nodes, and pol_maxnodes) apply to
all interfaces relative to their non-extended counterparts. Each additional
field may only apply to specific extended interfaces. See the respective
diff --git a/arch/alpha/kernel/syscalls/syscall.tbl b/arch/alpha/kernel/syscalls/syscall.tbl
index 18c842ca6c32..0dc288a1118a 100644
--- a/arch/alpha/kernel/syscalls/syscall.tbl
+++ b/arch/alpha/kernel/syscalls/syscall.tbl
@@ -496,3 +496,4 @@
564 common futex_wake sys_futex_wake
565 common futex_wait sys_futex_wait
566 common futex_requeue sys_futex_requeue
+567 common set_mempolicy2 sys_set_mempolicy2
diff --git a/arch/arm/tools/syscall.tbl b/arch/arm/tools/syscall.tbl
index 584f9528c996..50172ec0e1f5 100644
--- a/arch/arm/tools/syscall.tbl
+++ b/arch/arm/tools/syscall.tbl
@@ -470,3 +470,4 @@
454 common futex_wake sys_futex_wake
455 common futex_wait sys_futex_wait
456 common futex_requeue sys_futex_requeue
+457 common set_mempolicy2 sys_set_mempolicy2
diff --git a/arch/m68k/kernel/syscalls/syscall.tbl b/arch/m68k/kernel/syscalls/syscall.tbl
index 7a4b780e82cb..839d90c535f2 100644
--- a/arch/m68k/kernel/syscalls/syscall.tbl
+++ b/arch/m68k/kernel/syscalls/syscall.tbl
@@ -456,3 +456,4 @@
454 common futex_wake sys_futex_wake
455 common futex_wait sys_futex_wait
456 common futex_requeue sys_futex_requeue
+457 common set_mempolicy2 sys_set_mempolicy2
diff --git a/arch/microblaze/kernel/syscalls/syscall.tbl b/arch/microblaze/kernel/syscalls/syscall.tbl
index 5b6a0b02b7de..567c8b883735 100644
--- a/arch/microblaze/kernel/syscalls/syscall.tbl
+++ b/arch/microblaze/kernel/syscalls/syscall.tbl
@@ -462,3 +462,4 @@
454 common futex_wake sys_futex_wake
455 common futex_wait sys_futex_wait
456 common futex_requeue sys_futex_requeue
+457 common set_mempolicy2 sys_set_mempolicy2
diff --git a/arch/mips/kernel/syscalls/syscall_n32.tbl b/arch/mips/kernel/syscalls/syscall_n32.tbl
index a842b41c8e06..cc0640e16f2f 100644
--- a/arch/mips/kernel/syscalls/syscall_n32.tbl
+++ b/arch/mips/kernel/syscalls/syscall_n32.tbl
@@ -395,3 +395,4 @@
454 n32 futex_wake sys_futex_wake
455 n32 futex_wait sys_futex_wait
456 n32 futex_requeue sys_futex_requeue
+457 n32 set_mempolicy2 sys_set_mempolicy2
diff --git a/arch/mips/kernel/syscalls/syscall_o32.tbl b/arch/mips/kernel/syscalls/syscall_o32.tbl
index 525cc54bc63b..f7262fde98d9 100644
--- a/arch/mips/kernel/syscalls/syscall_o32.tbl
+++ b/arch/mips/kernel/syscalls/syscall_o32.tbl
@@ -444,3 +444,4 @@
454 o32 futex_wake sys_futex_wake
455 o32 futex_wait sys_futex_wait
456 o32 futex_requeue sys_futex_requeue
+457 o32 set_mempolicy2 sys_set_mempolicy2
diff --git a/arch/parisc/kernel/syscalls/syscall.tbl b/arch/parisc/kernel/syscalls/syscall.tbl
index a47798fed54e..e10f0e8bd064 100644
--- a/arch/parisc/kernel/syscalls/syscall.tbl
+++ b/arch/parisc/kernel/syscalls/syscall.tbl
@@ -455,3 +455,4 @@
454 common futex_wake sys_futex_wake
455 common futex_wait sys_futex_wait
456 common futex_requeue sys_futex_requeue
+457 common set_mempolicy2 sys_set_mempolicy2
diff --git a/arch/powerpc/kernel/syscalls/syscall.tbl b/arch/powerpc/kernel/syscalls/syscall.tbl
index 7fab411378f2..4f03f5f42b78 100644
--- a/arch/powerpc/kernel/syscalls/syscall.tbl
+++ b/arch/powerpc/kernel/syscalls/syscall.tbl
@@ -543,3 +543,4 @@
454 common futex_wake sys_futex_wake
455 common futex_wait sys_futex_wait
456 common futex_requeue sys_futex_requeue
+457 common set_mempolicy2 sys_set_mempolicy2
diff --git a/arch/s390/kernel/syscalls/syscall.tbl b/arch/s390/kernel/syscalls/syscall.tbl
index 86fec9b080f6..f98dadc2e9df 100644
--- a/arch/s390/kernel/syscalls/syscall.tbl
+++ b/arch/s390/kernel/syscalls/syscall.tbl
@@ -459,3 +459,4 @@
454 common futex_wake sys_futex_wake sys_futex_wake
455 common futex_wait sys_futex_wait sys_futex_wait
456 common futex_requeue sys_futex_requeue sys_futex_requeue
+457 common set_mempolicy2 sys_set_mempolicy2 sys_set_mempolicy2
diff --git a/arch/sh/kernel/syscalls/syscall.tbl b/arch/sh/kernel/syscalls/syscall.tbl
index 363fae0fe9bf..f47ba9f2d05d 100644
--- a/arch/sh/kernel/syscalls/syscall.tbl
+++ b/arch/sh/kernel/syscalls/syscall.tbl
@@ -459,3 +459,4 @@
454 common futex_wake sys_futex_wake
455 common futex_wait sys_futex_wait
456 common futex_requeue sys_futex_requeue
+457 common set_mempolicy2 sys_set_mempolicy2
diff --git a/arch/sparc/kernel/syscalls/syscall.tbl b/arch/sparc/kernel/syscalls/syscall.tbl
index 7bcaa3d5ea44..53fb16616728 100644
--- a/arch/sparc/kernel/syscalls/syscall.tbl
+++ b/arch/sparc/kernel/syscalls/syscall.tbl
@@ -502,3 +502,4 @@
454 common futex_wake sys_futex_wake
455 common futex_wait sys_futex_wait
456 common futex_requeue sys_futex_requeue
+457 common set_mempolicy2 sys_set_mempolicy2
diff --git a/arch/x86/entry/syscalls/syscall_32.tbl b/arch/x86/entry/syscalls/syscall_32.tbl
index c8fac5205803..4b4dc41b24ee 100644
--- a/arch/x86/entry/syscalls/syscall_32.tbl
+++ b/arch/x86/entry/syscalls/syscall_32.tbl
@@ -461,3 +461,4 @@
454 i386 futex_wake sys_futex_wake
455 i386 futex_wait sys_futex_wait
456 i386 futex_requeue sys_futex_requeue
+457 i386 set_mempolicy2 sys_set_mempolicy2
diff --git a/arch/x86/entry/syscalls/syscall_64.tbl b/arch/x86/entry/syscalls/syscall_64.tbl
index 8cb8bf68721c..1bc2190bec27 100644
--- a/arch/x86/entry/syscalls/syscall_64.tbl
+++ b/arch/x86/entry/syscalls/syscall_64.tbl
@@ -378,6 +378,7 @@
454 common futex_wake sys_futex_wake
455 common futex_wait sys_futex_wait
456 common futex_requeue sys_futex_requeue
+457 common set_mempolicy2 sys_set_mempolicy2

#
# Due to a historical design error, certain syscalls are numbered differently
diff --git a/arch/xtensa/kernel/syscalls/syscall.tbl b/arch/xtensa/kernel/syscalls/syscall.tbl
index 06eefa9c1458..e26dc89399eb 100644
--- a/arch/xtensa/kernel/syscalls/syscall.tbl
+++ b/arch/xtensa/kernel/syscalls/syscall.tbl
@@ -427,3 +427,4 @@
454 common futex_wake sys_futex_wake
455 common futex_wait sys_futex_wait
456 common futex_requeue sys_futex_requeue
+457 common set_mempolicy2 sys_set_mempolicy2
diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
index fd9d12de7e92..3244cd990858 100644
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -822,6 +822,8 @@ asmlinkage long sys_get_mempolicy(int __user *policy,
unsigned long addr, unsigned long flags);
asmlinkage long sys_set_mempolicy(int mode, const unsigned long __user *nmask,
unsigned long maxnode);
+asmlinkage long sys_set_mempolicy2(struct mpol_args *args, size_t size,
+ unsigned long flags);
asmlinkage long sys_migrate_pages(pid_t pid, unsigned long maxnode,
const unsigned long __user *from,
const unsigned long __user *to);
diff --git a/include/uapi/asm-generic/unistd.h b/include/uapi/asm-generic/unistd.h
index 756b013fb832..55486aba099f 100644
--- a/include/uapi/asm-generic/unistd.h
+++ b/include/uapi/asm-generic/unistd.h
@@ -828,9 +828,11 @@ __SYSCALL(__NR_futex_wake, sys_futex_wake)
__SYSCALL(__NR_futex_wait, sys_futex_wait)
#define __NR_futex_requeue 456
__SYSCALL(__NR_futex_requeue, sys_futex_requeue)
+#define __NR_set_mempolicy2 457
+__SYSCALL(__NR_set_mempolicy2, sys_set_mempolicy2)

#undef __NR_syscalls
-#define __NR_syscalls 457
+#define __NR_syscalls 458

/*
* 32 bit systems traditionally used different
diff --git a/mm/mempolicy.c b/mm/mempolicy.c
index 446167dcebdc..a56ff02f780e 100644
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -1633,6 +1633,42 @@ SYSCALL_DEFINE3(set_mempolicy, int, mode, const unsigned long __user *, nmask,
return kernel_set_mempolicy(mode, nmask, maxnode);
}

+SYSCALL_DEFINE3(set_mempolicy2, struct mpol_args __user *, uargs, size_t, usize,
+ unsigned long, flags)
+{
+ struct mpol_args kargs;
+ struct mempolicy_args margs;
+ int err;
+ nodemask_t policy_nodemask;
+ unsigned long __user *nodes_ptr;
+
+ if (flags)
+ return -EINVAL;
+
+ err = copy_struct_from_user(&kargs, sizeof(kargs), uargs, usize);
+ if (err)
+ return err;
+
+ err = validate_mpol_flags(kargs.mode, &kargs.mode_flags);
+ if (err)
+ return err;
+
+ memset(&margs, '\0', sizeof(margs));
+ margs.mode = kargs.mode;
+ margs.mode_flags = kargs.mode_flags;
+ if (kargs.pol_nodes) {
+ nodes_ptr = u64_to_user_ptr(kargs.pol_nodes);
+ err = get_nodes(&policy_nodemask, nodes_ptr,
+ kargs.pol_maxnodes);
+ if (err)
+ return err;
+ margs.policy_nodes = &policy_nodemask;
+ } else
+ margs.policy_nodes = NULL;
+
+ return do_set_mempolicy(&margs);
+}
+
static int kernel_migrate_pages(pid_t pid, unsigned long maxnode,
const unsigned long __user *old_nodes,
const unsigned long __user *new_nodes)
--
2.39.1