Re: [PATCH v2 03/16] skbuff: Proactively round up to kmalloc bucket size

From: Kees Cook
Date: Sat Sep 24 2022 - 05:14:22 EST


On Fri, Sep 23, 2022 at 01:28:09PM -0700, Kees Cook wrote:
> Instead of discovering the kmalloc bucket size _after_ allocation, round
> up proactively so the allocation is explicitly made for the full size,
> allowing the compiler to correctly reason about the resulting size of
> the buffer through the existing __alloc_size() hint.
>
> This will allow for kernels built with CONFIG_UBSAN_BOUNDS or the
> coming dynamic bounds checking under CONFIG_FORTIFY_SOURCE to gain
> back the __alloc_size() hints that were temporarily reverted in commit
> 93dd04ab0b2b ("slab: remove __alloc_size attribute from __kmalloc_track_caller")
>
> Additionally tries to normalize size variables to u32 from int. Most
> interfaces are using "int", but notably __alloc_skb uses unsigned int.
>
> Also fix some reverse Christmas tree and comments while touching nearby
> code.

Something in this patch is breaking things -- I've refactored it again
to avoid overwriting the incoming size argument, and instead add a
dedicated outgoing size variable. Here's what will be v3 ...

---
net/core/skbuff.c | 41 ++++++++++++++++++++++-------------------
1 file changed, 22 insertions(+), 19 deletions(-)

diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 974bbbbe7138..9b5a9fb69d9d 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -346,11 +346,12 @@ EXPORT_SYMBOL(napi_build_skb);
* memory is free
*/
static void *kmalloc_reserve(size_t size, gfp_t flags, int node,
- bool *pfmemalloc)
+ bool *pfmemalloc, size_t *alloc_size)
{
void *obj;
bool ret_pfmemalloc = false;

+ size = kmalloc_size_roundup(size);
/*
* Try a regular allocation, when that fails and we're not entitled
* to the reserves, fail.
@@ -369,6 +370,7 @@ static void *kmalloc_reserve(size_t size, gfp_t flags, int node,
if (pfmemalloc)
*pfmemalloc = ret_pfmemalloc;

+ *alloc_size = size;
return obj;
}

@@ -400,7 +402,7 @@ struct sk_buff *__alloc_skb(unsigned int size, gfp_t gfp_mask,
{
struct kmem_cache *cache;
struct sk_buff *skb;
- unsigned int osize;
+ size_t alloc_size;
bool pfmemalloc;
u8 *data;

@@ -427,15 +429,15 @@ struct sk_buff *__alloc_skb(unsigned int size, gfp_t gfp_mask,
*/
size = SKB_DATA_ALIGN(size);
size += SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
- data = kmalloc_reserve(size, gfp_mask, node, &pfmemalloc);
- if (unlikely(!data))
- goto nodata;
- /* kmalloc(size) might give us more room than requested.
+ /* kmalloc(size) might give us more room than requested, so
+ * allocate the true bucket size up front.
* Put skb_shared_info exactly at the end of allocated zone,
* to allow max possible filling before reallocation.
*/
- osize = ksize(data);
- size = SKB_WITH_OVERHEAD(osize);
+ data = kmalloc_reserve(size, gfp_mask, node, &pfmemalloc, &alloc_size);
+ if (unlikely(!data))
+ goto nodata;
+ size = SKB_WITH_OVERHEAD(alloc_size);
prefetchw(data + size);

/*
@@ -444,7 +446,7 @@ struct sk_buff *__alloc_skb(unsigned int size, gfp_t gfp_mask,
* the tail pointer in struct sk_buff!
*/
memset(skb, 0, offsetof(struct sk_buff, tail));
- __build_skb_around(skb, data, osize);
+ __build_skb_around(skb, data, alloc_size);
skb->pfmemalloc = pfmemalloc;

if (flags & SKB_ALLOC_FCLONE) {
@@ -1709,6 +1711,7 @@ int pskb_expand_head(struct sk_buff *skb, int nhead, int ntail,
{
int i, osize = skb_end_offset(skb);
int size = osize + nhead + ntail;
+ size_t alloc_size;
long off;
u8 *data;

@@ -1723,10 +1726,10 @@ int pskb_expand_head(struct sk_buff *skb, int nhead, int ntail,
if (skb_pfmemalloc(skb))
gfp_mask |= __GFP_MEMALLOC;
data = kmalloc_reserve(size + SKB_DATA_ALIGN(sizeof(struct skb_shared_info)),
- gfp_mask, NUMA_NO_NODE, NULL);
+ gfp_mask, NUMA_NO_NODE, NULL, &alloc_size);
if (!data)
goto nodata;
- size = SKB_WITH_OVERHEAD(ksize(data));
+ size = SKB_WITH_OVERHEAD(alloc_size);

/* Copy only real data... and, alas, header. This should be
* optimized for the cases when header is void.
@@ -6063,19 +6066,19 @@ static int pskb_carve_inside_header(struct sk_buff *skb, const u32 off,
int i;
int size = skb_end_offset(skb);
int new_hlen = headlen - off;
+ size_t alloc_size;
u8 *data;

size = SKB_DATA_ALIGN(size);

if (skb_pfmemalloc(skb))
gfp_mask |= __GFP_MEMALLOC;
- data = kmalloc_reserve(size +
- SKB_DATA_ALIGN(sizeof(struct skb_shared_info)),
- gfp_mask, NUMA_NO_NODE, NULL);
+ data = kmalloc_reserve(size + SKB_DATA_ALIGN(sizeof(struct skb_shared_info)),
+ gfp_mask, NUMA_NO_NODE, NULL, &alloc_size);
if (!data)
return -ENOMEM;

- size = SKB_WITH_OVERHEAD(ksize(data));
+ size = SKB_WITH_OVERHEAD(alloc_size);

/* Copy real data, and all frags */
skb_copy_from_linear_data_offset(skb, off, data, new_hlen);
@@ -6184,18 +6187,18 @@ static int pskb_carve_inside_nonlinear(struct sk_buff *skb, const u32 off,
u8 *data;
const int nfrags = skb_shinfo(skb)->nr_frags;
struct skb_shared_info *shinfo;
+ size_t alloc_size;

size = SKB_DATA_ALIGN(size);

if (skb_pfmemalloc(skb))
gfp_mask |= __GFP_MEMALLOC;
- data = kmalloc_reserve(size +
- SKB_DATA_ALIGN(sizeof(struct skb_shared_info)),
- gfp_mask, NUMA_NO_NODE, NULL);
+ data = kmalloc_reserve(size + SKB_DATA_ALIGN(sizeof(struct skb_shared_info)),
+ gfp_mask, NUMA_NO_NODE, NULL, &alloc_size);
if (!data)
return -ENOMEM;

- size = SKB_WITH_OVERHEAD(ksize(data));
+ size = SKB_WITH_OVERHEAD(alloc_size);

memcpy((struct skb_shared_info *)(data + size),
skb_shinfo(skb), offsetof(struct skb_shared_info, frags[0]));
--
2.34.1


--
Kees Cook