Re: [RFC PATCH 3/5] pgo: Wire up the new more generic code for modules

From: Kees Cook
Date: Mon Jun 14 2021 - 11:56:26 EST


On Sat, Jun 12, 2021 at 06:24:24AM +0300, Jarmo Tiitto wrote:
> prf_open() now uses the inode->i_private to get
> the prf_object for the file. This can be either
> vmlinux.profraw or any module.profraw file.
>
> The prf_vmlinux object is now added into prf_list and
> allocate_node() scans the list and reserves vnodes
> from corresponding prf_object(s).
>
> Signed-off-by: Jarmo Tiitto <jarmo.tiitto@xxxxxxxxx>
> ---
> note: There is no module notifier code yet,
> so only vmlinux.profraw profile data
> is available with this commit.
>
> Another thing is that pgo/reset will only
> reset vmlinux.profraw.
> Profile data reset for modules may be added later:
> maybe writing module's name into pgo/reset would reset only
> the specified module's profile data?
> Then writing "all" or zero would atomically reset everything.

Yeah, I think matching the internal naming is right. "vmlinux",
module::name, and "all"?

> I'm bit unsure about the new allocate_node() code since
> it is the first place I had to put rcu_read_lock()
> and the code is likely to change from this.

Comments below...

> ---
> kernel/pgo/fs.c | 30 ++++++++++++++++++++-----
> kernel/pgo/instrument.c | 49 +++++++++++++++++++++++++++--------------
> kernel/pgo/pgo.h | 2 ++
> 3 files changed, 60 insertions(+), 21 deletions(-)
>
> diff --git a/kernel/pgo/fs.c b/kernel/pgo/fs.c
> index 7e269d69bcd7..84b36e61758b 100644
> --- a/kernel/pgo/fs.c
> +++ b/kernel/pgo/fs.c
> @@ -32,8 +32,10 @@ static struct dentry *directory;
> struct prf_private_data {
> void *buffer;
> size_t size;
> + struct prf_object *core;
> };
>
> +/* vmlinux's prf core */
> static struct prf_object prf_vmlinux;
>
> /*
> @@ -281,7 +283,6 @@ static int prf_serialize(struct prf_object *po, struct prf_private_data *p, size
> prf_serialize_values(po, &buffer);
>
> return 0;
> -
> }
>
> /* open() implementation for PGO. Creates a copy of the profiling data set. */
> @@ -292,13 +293,21 @@ static int prf_open(struct inode *inode, struct file *file)
> size_t buf_size;
> int err = -EINVAL;
>
> + if (WARN_ON(!inode->i_private)) {
> + /* bug: inode was not initialized by us */
> + return err;
> + }
> +
> data = kzalloc(sizeof(*data), GFP_KERNEL);
> if (!data)
> return -ENOMEM;
>
> + /* Get prf_object of this inode */
> + data->core = inode->i_private;
> +
> /* Get initial buffer size. */
> flags = prf_lock();
> - data->size = prf_buffer_size(&prf_vmlinux);
> + data->size = prf_buffer_size(data->core);
> prf_unlock(flags);
>
> do {
> @@ -318,12 +327,13 @@ static int prf_open(struct inode *inode, struct file *file)
> * data length in data->size.
> */
> flags = prf_lock();
> - err = prf_serialize(&prf_vmlinux, data, buf_size);
> + err = prf_serialize(data->core, data, buf_size);
> prf_unlock(flags);
> /* In unlikely case, try again. */
> } while (err == -EAGAIN);
>
> if (err < 0) {
> +
> if (data)
> vfree(data->buffer);
> kfree(data);
> @@ -412,6 +422,8 @@ static const struct file_operations prf_reset_fops = {
> /* Create debugfs entries. */
> static int __init pgo_init(void)
> {
> + unsigned long flags;
> +
> /* Init profiler vmlinux core entry */
> memset(&prf_vmlinux, 0, sizeof(prf_vmlinux));
> prf_vmlinux.data = __llvm_prf_data_start;
> @@ -430,19 +442,27 @@ static int __init pgo_init(void)
> prf_vmlinux.vnds_num = prf_get_count(__llvm_prf_vnds_start,
> __llvm_prf_vnds_end, sizeof(__llvm_prf_vnds_start[0]));
>
> + /* enable profiling */
> + flags = prf_list_lock();
> + list_add_tail_rcu(&prf_vmlinux.link, &prf_list);
> + prf_list_unlock(flags);
>
> directory = debugfs_create_dir("pgo", NULL);
> if (!directory)
> goto err_remove;
>
> - if (!debugfs_create_file("vmlinux.profraw", 0600, directory, NULL,
> - &prf_fops))
> + prf_vmlinux.file = debugfs_create_file("vmlinux.profraw",
> + 0600, directory, &prf_vmlinux, &prf_fops);
> + if (!prf_vmlinux.file)
> goto err_remove;
>
> if (!debugfs_create_file("reset", 0200, directory, NULL,
> &prf_reset_fops))
> goto err_remove;
>
> + /* show notice why the system slower: */
> + pr_notice("Clang PGO instrumentation is active.");
> +

Please pull this change into a separate patch and make it pr_info()
("notice" is, I think, not right here).

> return 0;
>
> err_remove:
> diff --git a/kernel/pgo/instrument.c b/kernel/pgo/instrument.c
> index 24fdeb79b674..e214c9d7a113 100644
> --- a/kernel/pgo/instrument.c
> +++ b/kernel/pgo/instrument.c
> @@ -24,6 +24,7 @@
> #include <linux/export.h>
> #include <linux/spinlock.h>
> #include <linux/types.h>
> +#include <linux/rculist.h>
> #include "pgo.h"
>
> /*
> @@ -56,22 +57,38 @@ void prf_unlock(unsigned long flags)
> static struct llvm_prf_value_node *allocate_node(struct llvm_prf_data *p,
> u32 index, u64 value)
> {
> - const int max_vnds = prf_get_count(__llvm_prf_vnds_start,
> - __llvm_prf_vnds_end, sizeof(struct llvm_prf_value_node));
> -
> - /*
> - * Check that p is within vmlinux __llvm_prf_data section.
> - * If not, don't allocate since we can't handle modules yet.
> - */
> - if (!memory_contains(__llvm_prf_data_start,
> - __llvm_prf_data_end, p, sizeof(*p)))
> - return NULL;
> -
> - if (WARN_ON_ONCE(current_node >= max_vnds))
> - return NULL; /* Out of nodes */
> -
> - /* reserve vnode for vmlinux */
> - return &__llvm_prf_vnds_start[current_node++];
> + struct llvm_prf_value_node *vnode = NULL;
> + struct prf_object *po;
> + struct llvm_prf_data *data_end;
> + int max_vnds;
> +
> + rcu_read_lock();

AIUI, list readers are using rcu_read_lock(), and writers are using
prf_list_lock()?

> +
> + list_for_each_entry_rcu(po, &prf_list, link) {
> + /* get section limits */
> + max_vnds = prf_vnds_count(po);
> + data_end = po->data + prf_data_count(po);
> +
> + /*
> + * Check that p is within:
> + * [po->data, po->data + prf_data_count(po)] section.
> + * If yes, allocate vnode from this prf_object.
> + */
> + if (memory_contains(po->data, data_end, p, sizeof(*p))) {
> +
> +
> + if (WARN_ON_ONCE(po->current_node >= max_vnds))
> + return NULL; /* Out of nodes */
> +
> + /* reserve the vnode */
> + vnode = &po->vnds[po->current_node++];
> + goto out;
> + }
> + }
> +
> +out:
> + rcu_read_unlock();
> + return vnode;
> }
>
> /*
> diff --git a/kernel/pgo/pgo.h b/kernel/pgo/pgo.h
> index 44d79e2861e1..59d0aa966fbe 100644
> --- a/kernel/pgo/pgo.h
> +++ b/kernel/pgo/pgo.h
> @@ -19,6 +19,8 @@
> #ifndef _PGO_H
> #define _PGO_H
>
> +#include <linux/rculist.h>
> +
> /*
> * Note: These internal LLVM definitions must match the compiler version.
> * See llvm/include/llvm/ProfileData/InstrProfData.inc in LLVM's source code.
> --
> 2.32.0
>

--
Kees Cook