Re: [PATCH] kasan: fix the missing underflow in memmove and memcpy with CONFIG_KASAN_GENERIC=y

From: Dmitry Vyukov
Date: Mon Oct 07 2019 - 03:30:01 EST


On Mon, Oct 7, 2019 at 5:23 AM Walter Wu <walter-zh.wu@xxxxxxxxxxxx> wrote:
> > > > > > "out-of-bounds" is the _least_ frequent KASAN bug type. So saying
> > > > > > "out-of-bounds" has downsides of both approaches and won't prevent
> > > > > > duplicate reports by syzbot...
> > > > > >
> > > > > maybe i should add your comment into the comment in get_bug_type?
> > > >
> > > > Yes, that's exactly what I meant above:
> > > >
> > > > "I would change get_bug_type() to return "slab-out-of-bounds" (as the
> > > > most common OOB) in such case (with a comment)."
> > > >
> > > > ;)
> > >
> > >
> > > The patchset help to produce KASAN report when size is negative size in
> > > memory operation function. It is helpful for programmer to solve the
> > > undefined behavior issue. Patch 1 based on Dmitry's suggestion and
> > > review, patch 2 is a test in order to verify the patch 1.
> > >
> > > [1]https://bugzilla.kernel.org/show_bug.cgi?id=199341
> > > [2]https://lore.kernel.org/linux-arm-kernel/20190927034338.15813-1-walter-zh.wu@xxxxxxxxxxxx/
> > >
> > > Walter Wu (2):
> > > kasan: detect invalid size in memory operation function
> > > kasan: add test for invalid size in memmove
> > >
> > > lib/test_kasan.c | 18 ++++++++++++++++++
> > > mm/kasan/common.c | 13 ++++++++-----
> > > mm/kasan/generic.c | 5 +++++
> > > mm/kasan/generic_report.c | 10 ++++++++++
> > > mm/kasan/tags.c | 5 +++++
> > > mm/kasan/tags_report.c | 10 ++++++++++
> > > 6 files changed, 56 insertions(+), 5 deletions(-)
> > >
> > >
> > >
> > >
> > > commit 0bc50c759a425fa0aafb7ef623aa1598b3542c67
> > > Author: Walter Wu <walter-zh.wu@xxxxxxxxxxxx>
> > > Date: Fri Oct 4 18:38:31 2019 +0800
> > >
> > > kasan: detect invalid size in memory operation function
> > >
> > > It is an undefined behavior to pass a negative value to
> > > memset()/memcpy()/memmove()
> > > , so need to be detected by KASAN.
> > >
> > > If size is negative value, then it will be larger than ULONG_MAX/2,
> > > so that we will qualify as out-of-bounds issue.
> > >
> > > KASAN report:
> > >
> > > BUG: KASAN: out-of-bounds in kmalloc_memmove_invalid_size+0x70/0xa0
> > > Read of size 18446744073709551608 at addr ffffff8069660904 by task
> > > cat/72
> > >
> > > CPU: 2 PID: 72 Comm: cat Not tainted
> > > 5.4.0-rc1-next-20191004ajb-00001-gdb8af2f372b2-dirty #1
> > > Hardware name: linux,dummy-virt (DT)
> > > Call trace:
> > > dump_backtrace+0x0/0x288
> > > show_stack+0x14/0x20
> > > dump_stack+0x10c/0x164
> > > print_address_description.isra.9+0x68/0x378
> > > __kasan_report+0x164/0x1a0
> > > kasan_report+0xc/0x18
> > > check_memory_region+0x174/0x1d0
> > > memmove+0x34/0x88
> > > kmalloc_memmove_invalid_size+0x70/0xa0
> > >
> > > [1] https://bugzilla.kernel.org/show_bug.cgi?id=199341
> > >
> > > Signed-off-by: Walter Wu <walter-zh.wu@xxxxxxxxxxxx>
> > > Reported -by: Dmitry Vyukov <dvyukov@xxxxxxxxxx>
> > > Suggested-by: Dmitry Vyukov <dvyukov@xxxxxxxxxx>
> > >
> > > diff --git a/mm/kasan/common.c b/mm/kasan/common.c
> > > index 6814d6d6a023..6ef0abd27f06 100644
> > > --- a/mm/kasan/common.c
> > > +++ b/mm/kasan/common.c
> > > @@ -102,7 +102,8 @@ EXPORT_SYMBOL(__kasan_check_write);
> > > #undef memset
> > > void *memset(void *addr, int c, size_t len)
> > > {
> > > - check_memory_region((unsigned long)addr, len, true, _RET_IP_);
> > > + if (!check_memory_region((unsigned long)addr, len, true, _RET_IP_))
> > > + return NULL;
> > >
> > > return __memset(addr, c, len);
> > > }
> > > @@ -110,8 +111,9 @@ void *memset(void *addr, int c, size_t len)
> > > #undef memmove
> > > void *memmove(void *dest, const void *src, size_t len)
> > > {
> > > - check_memory_region((unsigned long)src, len, false, _RET_IP_);
> > > - check_memory_region((unsigned long)dest, len, true, _RET_IP_);
> > > + if (!check_memory_region((unsigned long)src, len, false, _RET_IP_) ||
> > > + !check_memory_region((unsigned long)dest, len, true, _RET_IP_))
> > > + return NULL;
> > >
> > > return __memmove(dest, src, len);
> > > }
> > > @@ -119,8 +121,9 @@ void *memmove(void *dest, const void *src, size_t
> > > len)
> > > #undef memcpy
> > > void *memcpy(void *dest, const void *src, size_t len)
> > > {
> > > - check_memory_region((unsigned long)src, len, false, _RET_IP_);
> > > - check_memory_region((unsigned long)dest, len, true, _RET_IP_);
> > > + if (!check_memory_region((unsigned long)src, len, false, _RET_IP_) ||
> > > + !check_memory_region((unsigned long)dest, len, true, _RET_IP_))
> > > + return NULL;
> > >
> > > return __memcpy(dest, src, len);
> > > }
> > > diff --git a/mm/kasan/generic.c b/mm/kasan/generic.c
> > > index 616f9dd82d12..02148a317d27 100644
> > > --- a/mm/kasan/generic.c
> > > +++ b/mm/kasan/generic.c
> > > @@ -173,6 +173,11 @@ static __always_inline bool
> > > check_memory_region_inline(unsigned long addr,
> > > if (unlikely(size == 0))
> > > return true;
> > >
> > > + if (unlikely((long)size < 0)) {
> > > + kasan_report(addr, size, write, ret_ip);
> > > + return false;
> > > + }
> > > +
> > > if (unlikely((void *)addr <
> > > kasan_shadow_to_mem((void *)KASAN_SHADOW_START))) {
> > > kasan_report(addr, size, write, ret_ip);
> > > diff --git a/mm/kasan/generic_report.c b/mm/kasan/generic_report.c
> > > index 36c645939bc9..23951a453681 100644
> > > --- a/mm/kasan/generic_report.c
> > > +++ b/mm/kasan/generic_report.c
> > > @@ -107,6 +107,16 @@ static const char *get_wild_bug_type(struct
> > > kasan_access_info *info)
> > >
> > > const char *get_bug_type(struct kasan_access_info *info)
> > > {
> > > + /*
> > > + * if access_size < 0, then it will be larger than ULONG_MAX/2,
> > > + * so that this can qualify as out-of-bounds.
> > > + * out-of-bounds is the _least_ frequent KASAN bug type. So saying
> > > + * out-of-bounds has downsides of both approaches and won't prevent
> > > + * duplicate reports by syzbot.
> > > + */
> > > + if ((long)info->access_size < 0)
> > > + return "out-of-bounds";
> > > +
> > > if (addr_has_shadow(info->access_addr))
> > > return get_shadow_bug_type(info);
> > > return get_wild_bug_type(info);
> > > diff --git a/mm/kasan/tags.c b/mm/kasan/tags.c
> > > index 0e987c9ca052..b829535a3ad7 100644
> > > --- a/mm/kasan/tags.c
> > > +++ b/mm/kasan/tags.c
> > > @@ -86,6 +86,11 @@ bool check_memory_region(unsigned long addr, size_t
> > > size, bool write,
> > > if (unlikely(size == 0))
> > > return true;
> > >
> > > + if (unlikely((long)size < 0)) {
> > > + kasan_report(addr, size, write, ret_ip);
> > > + return false;
> > > + }
> > > +
> > > tag = get_tag((const void *)addr);
> > >
> > > /*
> > > diff --git a/mm/kasan/tags_report.c b/mm/kasan/tags_report.c
> > > index 969ae08f59d7..19b9e364b397 100644
> > > --- a/mm/kasan/tags_report.c
> > > +++ b/mm/kasan/tags_report.c
> > > @@ -36,6 +36,16 @@
> > >
> > > const char *get_bug_type(struct kasan_access_info *info)
> > > {
> > > + /*
> > > + * if access_size < 0, then it will be larger than ULONG_MAX/2,
> > > + * so that this can qualify as out-of-bounds.
> > > + * out-of-bounds is the _least_ frequent KASAN bug type. So saying
> > > + * out-of-bounds has downsides of both approaches and won't prevent
> > > + * duplicate reports by syzbot.
> > > + */
> > > + if ((long)info->access_size < 0)
> > > + return "out-of-bounds";
> >
> >
> > wait, no :)
> > I meant we change it to heap-out-of-bounds and explain why we are
> > saying this is a heap-out-of-bounds.
> > The current comment effectively says we are doing non useful thing for
> > no reason, it does not eliminate any of my questions as a reader of
> > this code :)
> >
> Ok, the current comment may not enough to be understood why we use OOB
> to represent size<0 bug. We can modify it as below :)
>
> If access_size < 0, then it has two reasons to be defined as
> out-of-bounds.
> 1) Casting negative numbers to size_t would indeed turn up as a "large"
> size_t and its value will be larger than ULONG_MAX/2, so that this can
> qualify as out-of-bounds.
> 2) Don't generate new bug type in order to prevent duplicate reports by
> some systems, e.g. syzbot."

Looks good to me. I think it should provide enough hooks for future
readers to understand why we do this.

> > > +
> > > #ifdef CONFIG_KASAN_SW_TAGS_IDENTIFY
> > > struct kasan_alloc_meta *alloc_meta;
> > > struct kmem_cache *cache;
> > >
> > >
> > >
> > > commit fb5cf7bd16e939d1feef229af0211a8616c9ea03
> > > Author: Walter Wu <walter-zh.wu@xxxxxxxxxxxx>
> > > Date: Fri Oct 4 18:32:03 2019 +0800
> > >
> > > kasan: add test for invalid size in memmove
> > >
> > > Test size is negative vaule in memmove in order to verify
> > > if it correctly produce KASAN report.
> > >
> > > Signed-off-by: Walter Wu <walter-zh.wu@xxxxxxxxxxxx>
> > >
> > > diff --git a/lib/test_kasan.c b/lib/test_kasan.c
> > > index 49cc4d570a40..06942cf585cc 100644
> > > --- a/lib/test_kasan.c
> > > +++ b/lib/test_kasan.c
> > > @@ -283,6 +283,23 @@ static noinline void __init
> > > kmalloc_oob_in_memset(void)
> > > kfree(ptr);
> > > }
> > >
> > > +static noinline void __init kmalloc_memmove_invalid_size(void)
> > > +{
> > > + char *ptr;
> > > + size_t size = 64;
> > > +
> > > + pr_info("invalid size in memmove\n");
> > > + ptr = kmalloc(size, GFP_KERNEL);
> > > + if (!ptr) {
> > > + pr_err("Allocation failed\n");
> > > + return;
> > > + }
> > > +
> > > + memset((char *)ptr, 0, 64);
> > > + memmove((char *)ptr, (char *)ptr + 4, -2);
> > > + kfree(ptr);
> > > +}
> > > +
> > > static noinline void __init kmalloc_uaf(void)
> > > {
> > > char *ptr;
> > > @@ -773,6 +790,7 @@ static int __init kmalloc_tests_init(void)
> > > kmalloc_oob_memset_4();
> > > kmalloc_oob_memset_8();
> > > kmalloc_oob_memset_16();
> > > + kmalloc_memmove_invalid_size();
> > > kmalloc_uaf();
> > > kmalloc_uaf_memset();
> > > kmalloc_uaf2();
>
>