[RFC 08/14] fork: separate vmap stack alloction and free calls

From: Pasha Tatashin
Date: Mon Mar 11 2024 - 12:48:27 EST


In preparation for the dynamic stacks, separate out the
__vmalloc_node_range and vfree calls from the vmap based stack
allocations. The dynamic stacks will use their own variants of these
functions.

Signed-off-by: Pasha Tatashin <pasha.tatashin@xxxxxxxxxx>
---
kernel/fork.c | 53 ++++++++++++++++++++++++++++++---------------------
1 file changed, 31 insertions(+), 22 deletions(-)

diff --git a/kernel/fork.c b/kernel/fork.c
index 3004e6ce6c65..bbae5f705773 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -204,6 +204,29 @@ static bool try_release_thread_stack_to_cache(struct vm_struct *vm_area)
return false;
}

+static inline struct vm_struct *alloc_vmap_stack(int node)
+{
+ void *stack;
+
+ /*
+ * Allocated stacks are cached and later reused by new threads,
+ * so memcg accounting is performed manually on assigning/releasing
+ * stacks to tasks. Drop __GFP_ACCOUNT.
+ */
+ stack = __vmalloc_node_range(THREAD_SIZE, THREAD_ALIGN,
+ VMALLOC_START, VMALLOC_END,
+ THREADINFO_GFP & ~__GFP_ACCOUNT,
+ PAGE_KERNEL,
+ 0, node, __builtin_return_address(0));
+
+ return (stack) ? find_vm_area(stack) : NULL;
+}
+
+static inline void free_vmap_stack(struct vm_struct *vm_area)
+{
+ vfree(vm_area->addr);
+}
+
static void thread_stack_free_rcu(struct rcu_head *rh)
{
struct vm_stack *vm_stack = container_of(rh, struct vm_stack, rcu);
@@ -212,7 +235,7 @@ static void thread_stack_free_rcu(struct rcu_head *rh)
if (try_release_thread_stack_to_cache(vm_stack->stack_vm_area))
return;

- vfree(vm_area->addr);
+ free_vmap_stack(vm_area);
}

static void thread_stack_delayed_free(struct task_struct *tsk)
@@ -235,7 +258,7 @@ static int free_vm_stack_cache(unsigned int cpu)
if (!vm_area)
continue;

- vfree(vm_area->addr);
+ free_vmap_stack(vm_area);
cached_vm_stacks[i] = NULL;
}

@@ -265,7 +288,6 @@ static int alloc_thread_stack_node(struct task_struct *tsk, int node)
{
struct vm_struct *vm_area;
int i, j, nr_pages;
- void *stack;

for (i = 0; i < NR_CACHED_STACKS; i++) {
vm_area = this_cpu_xchg(cached_stacks[i], NULL);
@@ -273,14 +295,13 @@ static int alloc_thread_stack_node(struct task_struct *tsk, int node)
continue;

if (memcg_charge_kernel_stack(vm_area)) {
- vfree(vm_area->addr);
+ free_vmap_stack(vm_area);
return -ENOMEM;
}

/* Reset stack metadata. */
kasan_unpoison_range(vm_area->addr, THREAD_SIZE);
-
- stack = kasan_reset_tag(vm_area->addr);
+ tsk->stack = kasan_reset_tag(vm_area->addr);

/* Clear stale pointers from reused stack. */
nr_pages = vm_area->nr_pages;
@@ -288,26 +309,15 @@ static int alloc_thread_stack_node(struct task_struct *tsk, int node)
clear_page(page_address(vm_area->pages[j]));

tsk->stack_vm_area = vm_area;
- tsk->stack = stack;
return 0;
}

- /*
- * Allocated stacks are cached and later reused by new threads,
- * so memcg accounting is performed manually on assigning/releasing
- * stacks to tasks. Drop __GFP_ACCOUNT.
- */
- stack = __vmalloc_node_range(THREAD_SIZE, THREAD_ALIGN,
- VMALLOC_START, VMALLOC_END,
- THREADINFO_GFP & ~__GFP_ACCOUNT,
- PAGE_KERNEL,
- 0, node, __builtin_return_address(0));
- if (!stack)
+ vm_area = alloc_vmap_stack(node);
+ if (!vm_area)
return -ENOMEM;

- vm_area = find_vm_area(stack);
if (memcg_charge_kernel_stack(vm_area)) {
- vfree(stack);
+ free_vmap_stack(vm_area);
return -ENOMEM;
}
/*
@@ -316,8 +326,7 @@ static int alloc_thread_stack_node(struct task_struct *tsk, int node)
* so cache the vm_struct.
*/
tsk->stack_vm_area = vm_area;
- stack = kasan_reset_tag(stack);
- tsk->stack = stack;
+ tsk->stack = kasan_reset_tag(vm_area->addr);
return 0;
}

--
2.44.0.278.ge034bb2e1d-goog