[PATCH] ipc/mqueue: introduce msg cache

From: Roman Gushchin
Date: Mon Dec 05 2022 - 21:13:15 EST


Signed-off-by: Roman Gushchin <roman.gushchin@xxxxxxxxx>
---
ipc/mqueue.c | 20 ++++++++++---
ipc/msg.c | 12 ++++----
ipc/msgutil.c | 79 +++++++++++++++++++++++++++++++++++++++++++++++----
ipc/util.h | 8 ++++--
4 files changed, 101 insertions(+), 18 deletions(-)

diff --git a/ipc/mqueue.c b/ipc/mqueue.c
index 467a194b8a2e..5c6fec8e9701 100644
--- a/ipc/mqueue.c
+++ b/ipc/mqueue.c
@@ -131,6 +131,11 @@ struct ext_wait_queue { /* queue of sleeping tasks */
int state; /* one of STATE_* values */
};

+struct pcpu_msg_cache;
+struct msg_cache {
+ struct pcpu_msg_cache __percpu *pcpu_cache;
+};
+
struct mqueue_inode_info {
spinlock_t lock;
struct inode vfs_inode;
@@ -152,6 +157,8 @@ struct mqueue_inode_info {
/* for tasks waiting for free space and messages, respectively */
struct ext_wait_queue e_wait_q[2];

+ struct msg_cache msg_cache;
+
unsigned long qsize; /* size of queue in memory (sum of all msgs) */
};

@@ -368,6 +375,9 @@ static struct inode *mqueue_get_inode(struct super_block *sb,
mq_bytes = info->attr.mq_maxmsg * info->attr.mq_msgsize;
if (mq_bytes + mq_treesize < mq_bytes)
goto out_inode;
+ ret = init_msg_cache(&info->msg_cache);
+ if (ret)
+ goto out_inode;
mq_bytes += mq_treesize;
info->ucounts = get_ucounts(current_ucounts());
if (info->ucounts) {
@@ -531,9 +541,11 @@ static void mqueue_evict_inode(struct inode *inode)

list_for_each_entry_safe(msg, nmsg, &tmp_msg, m_list) {
list_del(&msg->m_list);
- free_msg(msg);
+ free_msg(msg, &info->msg_cache);
}

+ free_msg_cache(&info->msg_cache);
+
if (info->ucounts) {
unsigned long mq_bytes, mq_treesize;

@@ -1108,7 +1120,7 @@ static int do_mq_timedsend(mqd_t mqdes, const char __user *u_msg_ptr,

/* First try to allocate memory, before doing anything with
* existing queues. */
- msg_ptr = load_msg(u_msg_ptr, msg_len);
+ msg_ptr = load_msg(u_msg_ptr, msg_len, &info->msg_cache);
if (IS_ERR(msg_ptr)) {
ret = PTR_ERR(msg_ptr);
goto out_fput;
@@ -1170,7 +1182,7 @@ static int do_mq_timedsend(mqd_t mqdes, const char __user *u_msg_ptr,
wake_up_q(&wake_q);
out_free:
if (ret)
- free_msg(msg_ptr);
+ free_msg(msg_ptr, &info->msg_cache);
out_fput:
fdput(f);
out:
@@ -1273,7 +1285,7 @@ static int do_mq_timedreceive(mqd_t mqdes, char __user *u_msg_ptr,
store_msg(u_msg_ptr, msg_ptr, msg_ptr->m_ts)) {
ret = -EFAULT;
}
- free_msg(msg_ptr);
+ free_msg(msg_ptr, &info->msg_cache);
}
out_fput:
fdput(f);
diff --git a/ipc/msg.c b/ipc/msg.c
index fd08b3cb36d7..fcc09f848490 100644
--- a/ipc/msg.c
+++ b/ipc/msg.c
@@ -287,7 +287,7 @@ static void freeque(struct ipc_namespace *ns, struct kern_ipc_perm *ipcp)

list_for_each_entry_safe(msg, t, &msq->q_messages, m_list) {
percpu_counter_sub_local(&ns->percpu_msg_hdrs, 1);
- free_msg(msg);
+ free_msg(msg, NULL);
}
percpu_counter_sub_local(&ns->percpu_msg_bytes, msq->q_cbytes);
ipc_update_pid(&msq->q_lspid, NULL);
@@ -861,7 +861,7 @@ static long do_msgsnd(int msqid, long mtype, void __user *mtext,
if (mtype < 1)
return -EINVAL;

- msg = load_msg(mtext, msgsz);
+ msg = load_msg(mtext, msgsz, NULL);
if (IS_ERR(msg))
return PTR_ERR(msg);

@@ -954,7 +954,7 @@ static long do_msgsnd(int msqid, long mtype, void __user *mtext,
out_unlock1:
rcu_read_unlock();
if (msg != NULL)
- free_msg(msg);
+ free_msg(msg, NULL);
return err;
}

@@ -1049,7 +1049,7 @@ static inline struct msg_msg *prepare_copy(void __user *buf, size_t bufsz)
/*
* Create dummy message to copy real message to.
*/
- copy = load_msg(buf, bufsz);
+ copy = load_msg(buf, bufsz, NULL);
if (!IS_ERR(copy))
copy->m_ts = bufsz;
return copy;
@@ -1058,7 +1058,7 @@ static inline struct msg_msg *prepare_copy(void __user *buf, size_t bufsz)
static inline void free_copy(struct msg_msg *copy)
{
if (copy)
- free_msg(copy);
+ free_msg(copy, NULL);
}
#else
static inline struct msg_msg *prepare_copy(void __user *buf, size_t bufsz)
@@ -1256,7 +1256,7 @@ static long do_msgrcv(int msqid, void __user *buf, size_t bufsz, long msgtyp, in
}

bufsz = msg_handler(buf, msg, bufsz);
- free_msg(msg);
+ free_msg(msg, NULL);

return bufsz;
}
diff --git a/ipc/msgutil.c b/ipc/msgutil.c
index d0a0e877cadd..8fe64bb3f48d 100644
--- a/ipc/msgutil.c
+++ b/ipc/msgutil.c
@@ -39,16 +39,65 @@ struct msg_msgseg {
/* the next part of the message follows immediately */
};

+struct pcpu_msg_cache {
+ struct msg_msg *msg;
+ struct task_struct *curr;
+ size_t len;
+};
+
+struct msg_cache {
+ struct pcpu_msg_cache __percpu *pcpu_cache;
+};
+
#define DATALEN_MSG ((size_t)PAGE_SIZE-sizeof(struct msg_msg))
#define DATALEN_SEG ((size_t)PAGE_SIZE-sizeof(struct msg_msgseg))

+int init_msg_cache(struct msg_cache *cache)
+{
+ cache->pcpu_cache = alloc_percpu(struct pcpu_msg_cache);
+ if (!cache->pcpu_cache)
+ return -ENOMEM;

-static struct msg_msg *alloc_msg(size_t len)
+ return 0;
+}
+
+void free_msg_cache(struct msg_cache *cache)
+{
+ int cpu;
+
+ if (!cache->pcpu_cache)
+ return;
+
+ for_each_possible_cpu(cpu) {
+ struct pcpu_msg_cache *pc = per_cpu_ptr(cache->pcpu_cache, cpu);
+
+ if (pc->msg)
+ free_msg(pc->msg, NULL);
+ }
+
+ free_percpu(cache->pcpu_cache);
+}
+
+static struct msg_msg *alloc_msg(size_t len, struct msg_cache *cache)
{
struct msg_msg *msg;
struct msg_msgseg **pseg;
size_t alen;

+ if (cache) {
+ struct pcpu_msg_cache *pc;
+
+ msg = NULL;
+ pc = get_cpu_ptr(cache->pcpu_cache);
+ if (pc->msg && pc->curr == get_current() && pc->len == len) {
+ msg = pc->msg;
+ pc->msg = NULL;
+ }
+ put_cpu_ptr(cache->pcpu_cache);
+ if (msg)
+ return msg;
+ }
+
alen = min(len, DATALEN_MSG);
msg = kmalloc(sizeof(*msg) + alen, GFP_KERNEL_ACCOUNT);
if (msg == NULL)
@@ -77,18 +126,19 @@ static struct msg_msg *alloc_msg(size_t len)
return msg;

out_err:
- free_msg(msg);
+ free_msg(msg, cache);
return NULL;
}

-struct msg_msg *load_msg(const void __user *src, size_t len)
+struct msg_msg *load_msg(const void __user *src, size_t len,
+ struct msg_cache *cache)
{
struct msg_msg *msg;
struct msg_msgseg *seg;
int err = -EFAULT;
size_t alen;

- msg = alloc_msg(len);
+ msg = alloc_msg(len, cache);
if (msg == NULL)
return ERR_PTR(-ENOMEM);

@@ -111,7 +161,7 @@ struct msg_msg *load_msg(const void __user *src, size_t len)
return msg;

out_err:
- free_msg(msg);
+ free_msg(msg, cache);
return ERR_PTR(err);
}
#ifdef CONFIG_CHECKPOINT_RESTORE
@@ -166,10 +216,27 @@ int store_msg(void __user *dest, struct msg_msg *msg, size_t len)
return 0;
}

-void free_msg(struct msg_msg *msg)
+void free_msg(struct msg_msg *msg, struct msg_cache *cache)
{
struct msg_msgseg *seg;

+ if (cache) {
+ struct pcpu_msg_cache *pc;
+ bool cached = false;
+
+ pc = get_cpu_ptr(cache->pcpu_cache);
+ if (!pc->msg) {
+ pc->curr = get_current();
+ pc->len = msg->m_ts;
+ pc->msg = msg;
+ cached = true;
+ }
+ put_cpu_ptr(cache->pcpu_cache);
+
+ if (cached)
+ return;
+ }
+
security_msg_msg_free(msg);

seg = msg->next;
diff --git a/ipc/util.h b/ipc/util.h
index b2906e366539..a2da266386aa 100644
--- a/ipc/util.h
+++ b/ipc/util.h
@@ -196,8 +196,12 @@ static inline void ipc_update_pid(struct pid **pos, struct pid *pid)
int ipc_parse_version(int *cmd);
#endif

-extern void free_msg(struct msg_msg *msg);
-extern struct msg_msg *load_msg(const void __user *src, size_t len);
+struct msg_cache;
+extern int init_msg_cache(struct msg_cache *cache);
+extern void free_msg_cache(struct msg_cache *cache);
+extern void free_msg(struct msg_msg *msg, struct msg_cache *cache);
+extern struct msg_msg *load_msg(const void __user *src, size_t len,
+ struct msg_cache *cache);
extern struct msg_msg *copy_msg(struct msg_msg *src, struct msg_msg *dst);
extern int store_msg(void __user *dest, struct msg_msg *msg, size_t len);

--
2.38.1