Re: [RFC][PATCH v6 1/5] trace: Add trace any kernel object

From: Jeff Xie
Date: Tue Dec 21 2021 - 05:29:45 EST


Hi Masami,

On Tue, Dec 21, 2021 at 3:36 PM Masami Hiramatsu <mhiramat@xxxxxxxxxx> wrote:
>
> Hi Jeff,
>
> On Sat, 18 Dec 2021 00:32:57 +0800
> Jeff Xie <xiehuan09@xxxxxxxxx> wrote:
>
> > > > +struct object_instance {
> > > > + void *object;
> > > > + struct freelist_node free_list;
> > > > + struct list_head active_list;
> > > > +};
> > > > +
> > > > +struct obj_pool {
> > > > + struct freelist_head free_list;
> > > > + struct list_head active_list;
> > > > +};
> > > > +static struct obj_pool *obj_pool;
> > > > +
> > > > +static bool object_exist(void *obj)
> > > > +{
> > > > + struct object_instance *inst;
> > > > + bool ret = false;
> > > > +
> > > > + list_for_each_entry_rcu(inst, &obj_pool->active_list, active_list) {
> > > > + if (inst->object == obj) {
> > > > + ret = true;
> > > > + goto out;
> > > > + }
> > > > + }
> > > > +out:
> > > > + return ret;
> > > > +}
> > > > +
> > > > +static bool object_empty(void)
> > > > +{
> > > > + return list_empty(&obj_pool->active_list);
> > > > +}
> > > > +
> > > > +static void set_trace_object(void *obj)
> > > > +{
> > > > + struct freelist_node *fn;
> > > > + struct object_instance *ins;
> > > > + unsigned long flags;
> > > > +
> > > > + if (in_nmi())
> > > > + return;
> > > > +
> > > > + if (!obj)
> > > > + return;
> > > > +
> > > > + if (object_exist(obj))
> > > > + return;
> > > > +
> > > > + fn = freelist_try_get(&obj_pool->free_list);
> > > > + if (!fn) {
> > > > + trace_printk("object_pool is full, can't trace object:0x%px\n", obj);
> > > > + return;
> > > > + }
> > > > +
> > > > + ins = container_of(fn, struct object_instance, free_list);
> > > > + ins->object = obj;
> > > > +
> > > > + raw_spin_lock_irqsave(&object_spin_lock, flags);
> > > > + list_add_rcu(&ins->active_list, &obj_pool->active_list);
> > > > + raw_spin_unlock_irqrestore(&object_spin_lock, flags);
> > >
> > > Please add a comment that why this spinlock is needed here and why
> > > other operation doesn't need it.
> >
> > (Only this place has write operations, and it cannot be concurrent)
> > I agree, I will add it.
>
> BTW, I have another better solution for object pool. If the
> object pool size is fixed (of course to avoid performance overhead,
> it should be small enough) and it can not avoid using spinlock,
> it is better to use fixed-size array. That makes the implementation
> much simpler.

This looks really simpler, I will add it in the next version ;-)

> static struct object_instance {
> void *obj; /* trace object */
> // add offset in the next patch
> } traced_obj[MAX_TRACE_OBJECT];
>
> static atomic_t num_traced_obj;
> static DEFINE_RAW_SPINLOCK(trace_obj_lock);
>
> static void set_trace_object(void *obj)
> {
> ...
>
> raw_spin_lock_irqsave(&trace_obj_lock, flags);
> if (num_traced_obj == MAX_TRACED_OBJECT)
> goto out;
>
> traced_obj[num_traced_obj].obj = obj;
> smp_wmb(); // make sure the num_traced_obj update always appears after trace_obj update.
> num_traced_obj++;

I would like to ask whether this place need to add another smp_wmb()
to match the smp_rmb() in the object_exist().
I learned that rcu has a publish and subscribe mechanism ;-)

> out:
> raw_spin_unlock_irqrestore(&trace_obj_lock, flags);
> }
>
> static bool object_exist(void *obj)
> {
> ...
> max = num_traced_obj;
> smp_rmb(); // then the num_traced_obj will cap the max.
> for (i = 0; i < max; i++) {
> if (traced_obj[i].obj == obj)
> return true;
> }
> return false;
> }
>
> static inline void free_object_pool(void)
> {
> num_traced_obj = 0;
> memset(traced_obj, 0, sizeof(traced_obj));
> }
>
> Sorry if I confuse you but I think you shouldn't take a time on those unneeded
> complexity. :)

Thanks, at least I learned a different way to implement it ;-)

>
>
> > > > +static void submit_trace_object(unsigned long ip, unsigned long parent_ip,
> > > > + unsigned long object)
> > > > +{
> > > > +
> > > > + struct trace_buffer *buffer;
> > > > + struct ring_buffer_event *event;
> > > > + struct trace_object_entry *entry;
> > > > + int pc;
> > > > +
> > > > + pc = preempt_count();
> > > > + event = trace_event_buffer_lock_reserve(&buffer, &event_trace_file,
> > > > + TRACE_OBJECT, sizeof(*entry), pc);
> > > > + if (!event)
> > > > + return;
> > > > + entry = ring_buffer_event_data(event);
> > > > + entry->ip = ip;
> > > > + entry->parent_ip = parent_ip;
> > > > + entry->object = object;
> > > > +
> > > > + event_trigger_unlock_commit(&event_trace_file, buffer, event,
> > > > + entry, pc);
> > > > +}
> > > > +
> > > > +static void
> > > > +trace_object_events_call(unsigned long ip, unsigned long parent_ip,
> > > > + struct ftrace_ops *op, struct ftrace_regs *fregs)
> > > > +{
> > > > + struct pt_regs *pt_regs = ftrace_get_regs(fregs);
> > > > + unsigned long obj;
> > > > + long disabled;
> > > > + int cpu, n;
> > > > +
> > > > + preempt_disable_notrace();
> > > > +
> > > > + cpu = raw_smp_processor_id();
> > > > + disabled = atomic_inc_return(&per_cpu(trace_object_event_disable, cpu));
> > >
> > > So what is the purpose of this check? (are there any issue if the same
> > > cpu reenter here?)
> > >
> >
> > There may be an interrupt context that can preempt it. I am not yet
> > sure whether the cpu reenter will affect it.
> > I will debug and test it. (Referred from function_test_events_call())
>
> Maybe you can use ftrace_test_recursion_trylock(), as kprobe_ftrace_handler()
> does.

I will use it ,thanks.

>
> Thank you,
>
> --
> Masami Hiramatsu <mhiramat@xxxxxxxxxx>
---
JeffXie