[RFC PATCH 11/11] fs/proc: Add mempolicy attribute to allow read/write of task mempolicy

From: Gregory Price
Date: Wed Nov 22 2023 - 16:13:16 EST


Expose mempolicy via procfs, and utilize the existing mpol_parse_str
format to allow external tasks to change the policies of another task.

mpol_parse_str format:
<mode>[=<flags>][:<nodelist>]

valid settings:
"prefer" (without a nodemask, aliases to 'local')
"prefer:node"
"interleave:nodelist"
"local"
"default"
"prefer (many):nodelist"
"bind:nodelist"

flags are either "=static" or "=relative", and cannot be used with
"prefer" or "local" ("prefer=flag:nodelist" is valid).

Signed-off-by: Gregory Price <gregory.price@xxxxxxxxxxxx>
---
fs/proc/Makefile | 1 +
fs/proc/base.c | 1 +
fs/proc/internal.h | 1 +
fs/proc/mempolicy.c | 117 ++++++++++++++++++++++++++++++++++++++++++++
4 files changed, 120 insertions(+)
create mode 100644 fs/proc/mempolicy.c

diff --git a/fs/proc/Makefile b/fs/proc/Makefile
index bd08616ed8ba..272d22d9022f 100644
--- a/fs/proc/Makefile
+++ b/fs/proc/Makefile
@@ -27,6 +27,7 @@ proc-y += softirqs.o
proc-y += namespaces.o
proc-y += self.o
proc-y += thread_self.o
+proc-y += mempolicy.o
proc-$(CONFIG_PROC_SYSCTL) += proc_sysctl.o
proc-$(CONFIG_NET) += proc_net.o
proc-$(CONFIG_PROC_KCORE) += kcore.o
diff --git a/fs/proc/base.c b/fs/proc/base.c
index dd31e3b6bf77..3eb3d6d81a8e 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -3279,6 +3279,7 @@ static const struct pid_entry tgid_base_stuff[] = {
REG("maps", S_IRUGO, proc_pid_maps_operations),
#ifdef CONFIG_NUMA
REG("numa_maps", S_IRUGO, proc_pid_numa_maps_operations),
+ REG("mempolicy", S_IRUSR|S_IWUSR, proc_mempolicy_operations),
#endif
REG("mem", S_IRUSR|S_IWUSR, proc_mem_operations),
LNK("cwd", proc_cwd_link),
diff --git a/fs/proc/internal.h b/fs/proc/internal.h
index 9a8f32f21ff5..e8e81629a8d8 100644
--- a/fs/proc/internal.h
+++ b/fs/proc/internal.h
@@ -303,6 +303,7 @@ extern const struct file_operations proc_pid_smaps_operations;
extern const struct file_operations proc_pid_smaps_rollup_operations;
extern const struct file_operations proc_clear_refs_operations;
extern const struct file_operations proc_pagemap_operations;
+extern const struct file_operations proc_mempolicy_operations;

extern unsigned long task_vsize(struct mm_struct *);
extern unsigned long task_statm(struct mm_struct *,
diff --git a/fs/proc/mempolicy.c b/fs/proc/mempolicy.c
new file mode 100644
index 000000000000..417c2c8046d9
--- /dev/null
+++ b/fs/proc/mempolicy.c
@@ -0,0 +1,117 @@
+// SPDX-License-Identifier: GPL-2.0
+#ifdef CONFIG_NUMA
+#include <linux/fs.h>
+#include <linux/proc_fs.h>
+#include <linux/sched.h>
+#include <linux/mempolicy.h>
+#include <linux/uaccess.h>
+#include <linux/nodemask.h>
+
+#include "internal.h"
+
+#define MPOL_STR_SIZE 4096
+static ssize_t mempolicy_read_proc(struct file *file, char __user *buf,
+ size_t count, loff_t *ppos)
+{
+ struct task_struct *task;
+ struct mempolicy *policy;
+ char *buffer;
+ ssize_t rv = 0;
+ size_t outlen;
+
+ buffer = kzalloc(MPOL_STR_SIZE, GFP_KERNEL);
+ if (!buffer)
+ return -ENOMEM;
+
+ task = get_proc_task(file_inode(file));
+ if (!task) {
+ rv = -ESRCH;
+ goto freebuf;
+ }
+
+ task_lock(task);
+ policy = get_task_policy(task);
+ mpol_get(policy);
+ task_unlock(task);
+
+ if (!policy)
+ goto out;
+
+ mpol_to_str(buffer, MPOL_STR_SIZE, policy);
+
+ buffer[MPOL_STR_SIZE-1] = '\0';
+ outlen = strlen(buffer);
+ if (outlen < MPOL_STR_SIZE - 1) {
+ buffer[outlen] = '\n';
+ buffer[outlen + 1] = '\0';
+ outlen++;
+ }
+ rv = simple_read_from_buffer(buf, count, ppos, buffer, outlen);
+ mpol_put(policy);
+out:
+ put_task_struct(task);
+freebuf:
+ kfree(buffer);
+ return rv;
+}
+
+static ssize_t mempolicy_write_proc(struct file *file, const char __user *buf,
+ size_t count, loff_t *ppos)
+{
+ struct task_struct *task;
+ struct mempolicy *new_policy = NULL;
+ char *mempolicy_str, *nl;
+ nodemask_t nodes;
+ int err;
+
+ mempolicy_str = kmalloc(count + 1, GFP_KERNEL);
+ if (!mempolicy_str)
+ return -ENOMEM;
+
+ if (copy_from_user(mempolicy_str, buf, count)) {
+ kfree(mempolicy_str);
+ return -EFAULT;
+ }
+ mempolicy_str[count] = '\0';
+
+ /* strip new line characters for simplicity of handling by parser */
+ nl = strchr(mempolicy_str, '\n');
+ if (nl)
+ *nl = '\0';
+ nl = strchr(mempolicy_str, '\r');
+ if (nl)
+ *nl = '\0';
+
+ err = mpol_parse_str(mempolicy_str, &new_policy);
+ if (err) {
+ kfree(mempolicy_str);
+ return err;
+ }
+
+ /* If no error and no policy, it was 'default', clear node list */
+ if (new_policy)
+ nodes = new_policy->nodes;
+ else
+ nodes_clear(nodes);
+
+ task = get_proc_task(file_inode(file));
+ if (!task) {
+ mpol_put(new_policy);
+ kfree(mempolicy_str);
+ return -ESRCH;
+ }
+
+ err = replace_mempolicy(task, new_policy, &nodes);
+
+ put_task_struct(task);
+ kfree(mempolicy_str);
+
+ return err ? err : count;
+}
+
+const struct file_operations proc_mempolicy_operations = {
+ .read = mempolicy_read_proc,
+ .write = mempolicy_write_proc,
+ .llseek = noop_llseek,
+};
+#endif /* CONFIG_NUMA */
--
2.39.1