Re: [PATCH v3 10/35] lib: code tagging framework
From: Suren Baghdasaryan
Date: Fri Feb 16 2024 - 02:22:49 EST
On Mon, Feb 12, 2024 at 6:04 PM Suren Baghdasaryan <surenb@xxxxxxxxxx> wrote:
>
> On Mon, Feb 12, 2024 at 2:27 PM Kees Cook <keescook@xxxxxxxxxxxx> wrote:
> >
> > On Mon, Feb 12, 2024 at 01:38:56PM -0800, Suren Baghdasaryan wrote:
> > > Add basic infrastructure to support code tagging which stores tag common
> > > information consisting of the module name, function, file name and line
> > > number. Provide functions to register a new code tag type and navigate
> > > between code tags.
> > >
> > > Co-developed-by: Kent Overstreet <kent.overstreet@xxxxxxxxx>
> > > Signed-off-by: Kent Overstreet <kent.overstreet@xxxxxxxxx>
> > > Signed-off-by: Suren Baghdasaryan <surenb@xxxxxxxxxx>
> > > ---
> > > include/linux/codetag.h | 71 ++++++++++++++
> > > lib/Kconfig.debug | 4 +
> > > lib/Makefile | 1 +
> > > lib/codetag.c | 199 ++++++++++++++++++++++++++++++++++++++++
> > > 4 files changed, 275 insertions(+)
> > > create mode 100644 include/linux/codetag.h
> > > create mode 100644 lib/codetag.c
> > >
> > > diff --git a/include/linux/codetag.h b/include/linux/codetag.h
> > > new file mode 100644
> > > index 000000000000..a9d7adecc2a5
> > > --- /dev/null
> > > +++ b/include/linux/codetag.h
> > > @@ -0,0 +1,71 @@
> > > +/* SPDX-License-Identifier: GPL-2.0 */
> > > +/*
> > > + * code tagging framework
> > > + */
> > > +#ifndef _LINUX_CODETAG_H
> > > +#define _LINUX_CODETAG_H
> > > +
> > > +#include <linux/types.h>
> > > +
> > > +struct codetag_iterator;
> > > +struct codetag_type;
> > > +struct seq_buf;
> > > +struct module;
> > > +
> > > +/*
> > > + * An instance of this structure is created in a special ELF section at every
> > > + * code location being tagged. At runtime, the special section is treated as
> > > + * an array of these.
> > > + */
> > > +struct codetag {
> > > + unsigned int flags; /* used in later patches */
> > > + unsigned int lineno;
> > > + const char *modname;
> > > + const char *function;
> > > + const char *filename;
> > > +} __aligned(8);
> > > +
> > > +union codetag_ref {
> > > + struct codetag *ct;
> > > +};
> > > +
> > > +struct codetag_range {
> > > + struct codetag *start;
> > > + struct codetag *stop;
> > > +};
> > > +
> > > +struct codetag_module {
> > > + struct module *mod;
> > > + struct codetag_range range;
> > > +};
> > > +
> > > +struct codetag_type_desc {
> > > + const char *section;
> > > + size_t tag_size;
> > > +};
> > > +
> > > +struct codetag_iterator {
> > > + struct codetag_type *cttype;
> > > + struct codetag_module *cmod;
> > > + unsigned long mod_id;
> > > + struct codetag *ct;
> > > +};
> > > +
> > > +#define CODE_TAG_INIT { \
> > > + .modname = KBUILD_MODNAME, \
> > > + .function = __func__, \
> > > + .filename = __FILE__, \
> > > + .lineno = __LINE__, \
> > > + .flags = 0, \
> > > +}
> > > +
> > > +void codetag_lock_module_list(struct codetag_type *cttype, bool lock);
> > > +struct codetag_iterator codetag_get_ct_iter(struct codetag_type *cttype);
> > > +struct codetag *codetag_next_ct(struct codetag_iterator *iter);
> > > +
> > > +void codetag_to_text(struct seq_buf *out, struct codetag *ct);
> > > +
> > > +struct codetag_type *
> > > +codetag_register_type(const struct codetag_type_desc *desc);
> > > +
> > > +#endif /* _LINUX_CODETAG_H */
> > > diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
> > > index 975a07f9f1cc..0be2d00c3696 100644
> > > --- a/lib/Kconfig.debug
> > > +++ b/lib/Kconfig.debug
> > > @@ -968,6 +968,10 @@ config DEBUG_STACKOVERFLOW
> > >
> > > If in doubt, say "N".
> > >
> > > +config CODE_TAGGING
> > > + bool
> > > + select KALLSYMS
> > > +
> > > source "lib/Kconfig.kasan"
> > > source "lib/Kconfig.kfence"
> > > source "lib/Kconfig.kmsan"
> > > diff --git a/lib/Makefile b/lib/Makefile
> > > index 6b09731d8e61..6b48b22fdfac 100644
> > > --- a/lib/Makefile
> > > +++ b/lib/Makefile
> > > @@ -235,6 +235,7 @@ obj-$(CONFIG_OF_RECONFIG_NOTIFIER_ERROR_INJECT) += \
> > > of-reconfig-notifier-error-inject.o
> > > obj-$(CONFIG_FUNCTION_ERROR_INJECTION) += error-inject.o
> > >
> > > +obj-$(CONFIG_CODE_TAGGING) += codetag.o
> > > lib-$(CONFIG_GENERIC_BUG) += bug.o
> > >
> > > obj-$(CONFIG_HAVE_ARCH_TRACEHOOK) += syscall.o
> > > diff --git a/lib/codetag.c b/lib/codetag.c
> > > new file mode 100644
> > > index 000000000000..7708f8388e55
> > > --- /dev/null
> > > +++ b/lib/codetag.c
> > > @@ -0,0 +1,199 @@
> > > +// SPDX-License-Identifier: GPL-2.0-only
> > > +#include <linux/codetag.h>
> > > +#include <linux/idr.h>
> > > +#include <linux/kallsyms.h>
> > > +#include <linux/module.h>
> > > +#include <linux/seq_buf.h>
> > > +#include <linux/slab.h>
> > > +
> > > +struct codetag_type {
> > > + struct list_head link;
> > > + unsigned int count;
> > > + struct idr mod_idr;
> > > + struct rw_semaphore mod_lock; /* protects mod_idr */
> > > + struct codetag_type_desc desc;
> > > +};
> > > +
> > > +static DEFINE_MUTEX(codetag_lock);
> > > +static LIST_HEAD(codetag_types);
> > > +
> > > +void codetag_lock_module_list(struct codetag_type *cttype, bool lock)
> > > +{
> > > + if (lock)
> > > + down_read(&cttype->mod_lock);
> > > + else
> > > + up_read(&cttype->mod_lock);
> > > +}
> > > +
> > > +struct codetag_iterator codetag_get_ct_iter(struct codetag_type *cttype)
> > > +{
> > > + struct codetag_iterator iter = {
> > > + .cttype = cttype,
> > > + .cmod = NULL,
> > > + .mod_id = 0,
> > > + .ct = NULL,
> > > + };
> > > +
> > > + return iter;
> > > +}
> > > +
> > > +static inline struct codetag *get_first_module_ct(struct codetag_module *cmod)
> > > +{
> > > + return cmod->range.start < cmod->range.stop ? cmod->range.start : NULL;
> > > +}
> > > +
> > > +static inline
> > > +struct codetag *get_next_module_ct(struct codetag_iterator *iter)
> > > +{
> > > + struct codetag *res = (struct codetag *)
> > > + ((char *)iter->ct + iter->cttype->desc.tag_size);
> > > +
> > > + return res < iter->cmod->range.stop ? res : NULL;
> > > +}
> > > +
> > > +struct codetag *codetag_next_ct(struct codetag_iterator *iter)
> > > +{
> > > + struct codetag_type *cttype = iter->cttype;
> > > + struct codetag_module *cmod;
> > > + struct codetag *ct;
> > > +
> > > + lockdep_assert_held(&cttype->mod_lock);
> > > +
> > > + if (unlikely(idr_is_empty(&cttype->mod_idr)))
> > > + return NULL;
> > > +
> > > + ct = NULL;
> > > + while (true) {
> > > + cmod = idr_find(&cttype->mod_idr, iter->mod_id);
> > > +
> > > + /* If module was removed move to the next one */
> > > + if (!cmod)
> > > + cmod = idr_get_next_ul(&cttype->mod_idr,
> > > + &iter->mod_id);
> > > +
> > > + /* Exit if no more modules */
> > > + if (!cmod)
> > > + break;
> > > +
> > > + if (cmod != iter->cmod) {
> > > + iter->cmod = cmod;
> > > + ct = get_first_module_ct(cmod);
> > > + } else
> > > + ct = get_next_module_ct(iter);
> > > +
> > > + if (ct)
> > > + break;
> > > +
> > > + iter->mod_id++;
> > > + }
> > > +
> > > + iter->ct = ct;
> > > + return ct;
> > > +}
> > > +
> > > +void codetag_to_text(struct seq_buf *out, struct codetag *ct)
> > > +{
> > > + seq_buf_printf(out, "%s:%u module:%s func:%s",
> > > + ct->filename, ct->lineno,
> > > + ct->modname, ct->function);
> > > +}
> >
> > Thank you for using seq_buf here!
> >
> > Also, will this need an EXPORT_SYMBOL_GPL()?
Missed this question. I don't think we need EXPORT_SYMBOL_GPL() here
at least for now. Modules don't use these functions. The "alloc_tags"
sections will be generated for each module at compile time but they
themselves do not use it.
> >
> > > +
> > > +static inline size_t range_size(const struct codetag_type *cttype,
> > > + const struct codetag_range *range)
> > > +{
> > > + return ((char *)range->stop - (char *)range->start) /
> > > + cttype->desc.tag_size;
> > > +}
> > > +
> > > +static void *get_symbol(struct module *mod, const char *prefix, const char *name)
> > > +{
> > > + char buf[64];
> >
> > Why is 64 enough? I was expecting KSYM_NAME_LEN here, but perhaps this
> > is specialized enough to section names that it will not be a problem?
>
> This buffer is being used to hold the name of the section containing
> codetags appended with "__start_" or "__stop_" and the only current
> user is alloc_tag_init() which sets the section name to "alloc_tags".
> So, this buffer currently holds either "alloc_tags__start_" or
> "alloc_tags__stop_". When more codetag applications are added (like
> the ones we have shown in the original RFC [1]), there would be more
> section names. 64 was chosen as a big enough value to reasonably hold
> the section name with the suffix. But you are right, we should add a
> check for the section name size to ensure it always fits. Will add
> into my TODO list.
>
> [1] https://lore.kernel.org/all/20220830214919.53220-1-surenb@xxxxxxxxxx/
> > If so, please document it clearly with a comment.
>
> Will do.
>
> >
> > > + int res;
> > > +
> > > + res = snprintf(buf, sizeof(buf), "%s%s", prefix, name);
> > > + if (WARN_ON(res < 1 || res > sizeof(buf)))
> > > + return NULL;
> >
> > Please use a seq_buf here instead of snprintf, which we're trying to get
> > rid of.
> >
> > DECLARE_SEQ_BUF(sb, KSYM_NAME_LEN);
> > char *buf;
> >
> > seq_buf_printf(sb, "%s%s", prefix, name);
> > if (seq_buf_has_overflowed(sb))
> > return NULL;
> >
> > buf = seq_buf_str(sb);
>
> Will do. Thanks!
>
> >
> > > +
> > > + return mod ?
> > > + (void *)find_kallsyms_symbol_value(mod, buf) :
> > > + (void *)kallsyms_lookup_name(buf);
> > > +}
> > > +
> > > +static struct codetag_range get_section_range(struct module *mod,
> > > + const char *section)
> > > +{
> > > + return (struct codetag_range) {
> > > + get_symbol(mod, "__start_", section),
> > > + get_symbol(mod, "__stop_", section),
> > > + };
> > > +}
> > > +
> > > +static int codetag_module_init(struct codetag_type *cttype, struct module *mod)
> > > +{
> > > + struct codetag_range range;
> > > + struct codetag_module *cmod;
> > > + int err;
> > > +
> > > + range = get_section_range(mod, cttype->desc.section);
> > > + if (!range.start || !range.stop) {
> > > + pr_warn("Failed to load code tags of type %s from the module %s\n",
> > > + cttype->desc.section,
> > > + mod ? mod->name : "(built-in)");
> > > + return -EINVAL;
> > > + }
> > > +
> > > + /* Ignore empty ranges */
> > > + if (range.start == range.stop)
> > > + return 0;
> > > +
> > > + BUG_ON(range.start > range.stop);
> > > +
> > > + cmod = kmalloc(sizeof(*cmod), GFP_KERNEL);
> > > + if (unlikely(!cmod))
> > > + return -ENOMEM;
> > > +
> > > + cmod->mod = mod;
> > > + cmod->range = range;
> > > +
> > > + down_write(&cttype->mod_lock);
> > > + err = idr_alloc(&cttype->mod_idr, cmod, 0, 0, GFP_KERNEL);
> > > + if (err >= 0)
> > > + cttype->count += range_size(cttype, &range);
> > > + up_write(&cttype->mod_lock);
> > > +
> > > + if (err < 0) {
> > > + kfree(cmod);
> > > + return err;
> > > + }
> > > +
> > > + return 0;
> > > +}
> > > +
> > > +struct codetag_type *
> > > +codetag_register_type(const struct codetag_type_desc *desc)
> > > +{
> > > + struct codetag_type *cttype;
> > > + int err;
> > > +
> > > + BUG_ON(desc->tag_size <= 0);
> > > +
> > > + cttype = kzalloc(sizeof(*cttype), GFP_KERNEL);
> > > + if (unlikely(!cttype))
> > > + return ERR_PTR(-ENOMEM);
> > > +
> > > + cttype->desc = *desc;
> > > + idr_init(&cttype->mod_idr);
> > > + init_rwsem(&cttype->mod_lock);
> > > +
> > > + err = codetag_module_init(cttype, NULL);
> > > + if (unlikely(err)) {
> > > + kfree(cttype);
> > > + return ERR_PTR(err);
> > > + }
> > > +
> > > + mutex_lock(&codetag_lock);
> > > + list_add_tail(&cttype->link, &codetag_types);
> > > + mutex_unlock(&codetag_lock);
> > > +
> > > + return cttype;
> > > +}
> > > --
> > > 2.43.0.687.g38aa6559b0-goog
> > >
> >
> > --
> > Kees Cook