Re: [PATCH v2] exit: dump thread info on global init exit

From: Oleg Nesterov
Date: Fri Nov 10 2023 - 12:58:17 EST


On 11/10, chenqiwu wrote:
>
> On Fri, Nov 10, 2023 at 11:20:43AM +0800, chenqiwu wrote:
> > Currently, there are various global init exit issues encountered
> > on Andriod/linux system. It's hard to debug these issues on product
> > environment without a usable coredump, This patch dump the last
> > exit thread executable sections and regs to find the exit reason
> > before panic.

Again, I am not going to comment the intent. I agree that the more info
the better, but I think someone else should ack this patch.

However. I won't argue, but somehow I can't say I really like it ;)
Can you look at panic_print_sys_info() called by panic() ?
Perhaps it makes more sense to introduce another PANIC_PRINT_XXX
option for dump_thread_maps_info() ?

To me it would be more clean/consistent wrt other info panic() reports.

Oleg.

> >
> > Signed-off-by: chenqiwu <qiwu.chen@xxxxxxxxxxxxx>
> > Tested-by: chenqiwu <qiwu.chen@xxxxxxxxxxxxx>
> > ---
> > kernel/exit.c | 79 +++++++++++++++++++++++++++++++++++++++++++++++++--
> > 1 file changed, 76 insertions(+), 3 deletions(-)
> >
> > diff --git a/kernel/exit.c b/kernel/exit.c
> > index ee9f43bed49a..af2e24bc3ecd 100644
> > --- a/kernel/exit.c
> > +++ b/kernel/exit.c
> > @@ -13,6 +13,7 @@
> > #include <linux/sched/task.h>
> > #include <linux/sched/task_stack.h>
> > #include <linux/sched/cputime.h>
> > +#include <linux/sched/debug.h>
> > #include <linux/interrupt.h>
> > #include <linux/module.h>
> > #include <linux/capability.h>
> > @@ -806,6 +807,76 @@ static void synchronize_group_exit(struct task_struct *tsk, long code)
> > spin_unlock_irq(&sighand->siglock);
> > }
> >
> > +/*
> > + * This function only dump thread executable sections to reduce maps space,
> > + * since an unhandled falut in user mode is likely generated from code section.
> > + */
> > +static void dump_thread_maps_info(struct task_struct *tsk)
> > +{
> > + struct vm_area_struct *vma;
> > + struct mm_struct *mm = tsk->mm;
> > +
> > + if (!mmap_read_trylock(mm))
> > + return;
> > +
> > + VMA_ITERATOR(vmi, mm, 0);
> > + pr_info("%s-%d: Dump maps info start\n", tsk->comm, task_pid_nr(tsk));
> > + for_each_vma(vmi, vma) {
> > + struct file *file = vma->vm_file;
> > + int flags = vma->vm_flags;
> > + unsigned long long pgoff = ((loff_t)vma->vm_pgoff) << PAGE_SHIFT;
> > +
> > + if (file) {
> > + if (flags & VM_EXEC) {
> > + char tpath[256] = {0};
> > + char *pathname = d_path(&file->f_path, tpath, sizeof(tpath));
> > +
> > + pr_info("%08lx-%08lx %c%c%c%c %08llx %s\n",
> > + vma->vm_start, vma->vm_end,
> > + flags & VM_READ ? 'r' : '-',
> > + flags & VM_WRITE ? 'w' : '-',
> > + flags & VM_EXEC ? 'x' : '-',
> > + flags & VM_MAYSHARE ? 's' : 'p',
> > + pgoff, pathname);
> > + }
> > + } else {
> > + const char *name = arch_vma_name(vma);
> > +
> > + if (!name) {
> > + struct mm_struct *mm = vma->vm_mm;
> > +
> > + if (mm) {
> > + if (vma_is_initial_heap(vma))
> > + name = "[heap]";
> > + else if (vma_is_initial_stack(vma))
> > + name = "[stack]";
> > + } else {
> > + name = "[vdso]";
> > + }
> > + }
> > +
> > + if (name && (flags & VM_EXEC)) {
> > + pr_info("%08lx-%08lx %c%c%c%c %08llx %s\n",
> > + vma->vm_start, vma->vm_end,
> > + flags & VM_READ ? 'r' : '-',
> > + flags & VM_WRITE ? 'w' : '-',
> > + flags & VM_EXEC ? 'x' : '-',
> > + flags & VM_MAYSHARE ? 's' : 'p', pgoff, name);
> > + }
> > + }
> > + }
> > + mmap_read_unlock(mm);
> > + pr_info("%s-%d: Dump maps info end\n", tsk->comm, task_pid_nr(tsk));
> > +}
> > +
> > +static void dump_thread_info(struct task_struct *tsk)
> > +{
> > + struct pt_regs *regs = task_pt_regs(tsk);
> > +
> > + dump_thread_maps_info(tsk);
> > + show_regs(regs);
> > +}
> > +
> > void __noreturn do_exit(long code)
> > {
> > struct task_struct *tsk = current;
> > @@ -833,12 +904,14 @@ void __noreturn do_exit(long code)
> > group_dead = atomic_dec_and_test(&tsk->signal->live);
> > if (group_dead) {
> > /*
> > - * If the last thread of global init has exited, panic
> > - * immediately to get a useable coredump.
> > + * If the last thread of global init has exited, dump
> > + * some usable information before panic.
> > */
> > - if (unlikely(is_global_init(tsk)))
> > + if (unlikely(is_global_init(tsk))) {
> > + dump_thread_info(tsk);
> > panic("Attempted to kill init! exitcode=0x%08x\n",
> > tsk->signal->group_exit_code ?: (int)code);
> > + }
> >
> > #ifdef CONFIG_POSIX_TIMERS
> > hrtimer_cancel(&tsk->signal->real_timer);
> > --
> > 2.25.1
> >
>
> Add oleg for reviewer.
>