kernel BUG at kernel/sched/core.c:3490!

From: Qian Cai
Date: Tue Jan 01 2019 - 00:56:07 EST


Running some mmap() workloads to put the system on low memory situation with
swapping and OOM, and then it trigger this BUG(),

void __noreturn do_task_dead(void)
{
/* Causes final put_task_struct in finish_task_switch(): */
set_special_state(TASK_DEAD);

/* Tell freezer to ignore us: */
current->flags |= PF_NOFREEZE;

__schedule(false);
BUG();

/* Avoid "noreturn function does return" - but don't continue if BUG()
is a NOP: */
for (;;)
cpu_relax();
}

[ 422.863911] kernel BUG at kernel/sched/core.c:3490!
[ 422.868634] oom01 (3177) used greatest stack depth: 28712 bytes left
[ 422.869109] invalid opcode: 0000 [#1] SMP KASAN NOPTI
[ 422.880325] CPU: 86 PID: 3235 Comm: oom01 Kdump: loaded Tainted: G W
4.20.0+ #5
[ 422.888995] Hardware name: HPE ProLiant DL385 Gen10/ProLiant DL385 Gen10,
BIOS A40 09/07/2018
[ 422.897590] RIP: 0010:do_task_dead+0x73/0x90
[ 422.901893] Code: 48 c7 43 10 80 00 00 00 4c 89 ee 4c 89 e7 e8 34 26 8a 00 48
8d 7b 24 e8 3b b6 2e 00 81 4b 24 00 80 00 00 31 ff e8 8d 4c 89 00 <0f> 0b 48 c7
c7 40 2c 53 b1 e8 da e7 51 00 0f 1f 44 00 00 66 2e 0f
[ 422.920783] RSP: 0018:ffff888392daf5a8 EFLAGS: 00010282
[ 422.926048] RAX: 0000000000000000 RBX: ffff88810e23aec0 RCX: 0000000000000000
[ 422.933234] RDX: dffffc0000000000 RSI: dffffc0000000000 RDI: ffffed10725b5ea8
[ 422.940419] RBP: ffff888392daf5c0 R08: fffffbfff6338a76 R09: fffffbfff6338a75
[ 422.947604] R10: fffffbfff6338a75 R11: ffffffffb19c53ab R12: ffff88810e23b510
[ 422.954789] R13: 0000000000000246 R14: 0000000000000000 R15: ffff888638f636d8
[ 422.961974] FS: 00007f105ff5d700(0000) GS:ffff88905e300000(0000)
knlGS:0000000000000000
[ 422.970118] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[ 422.975905] CR2: 00007f105ff5d9d0 CR3: 0000000a23e1e000 CR4: 00000000001406a0
[ 422.983087] Call Trace:
[ 422.985564] do_exit+0x95e/0xe30
[ 422.988821] ? dump_align+0x50/0x50
[ 422.992338] ? mm_update_next_owner+0x570/0x570
[ 422.996906] ? __kasan_slab_free+0x1af/0x210
[ 423.001207] ? kmem_cache_free+0xc0/0x350
[ 423.005247] ? __dequeue_signal+0x2bd/0x370
[ 423.009460] ? dequeue_signal+0x90/0x2d0
[ 423.013410] ? get_signal+0x296/0xf30
[ 423.017100] ? do_signal+0x93/0x9d0
[ 423.020619] ? exit_to_usermode_loop+0x130/0x170
[ 423.025271] ? prepare_exit_to_usermode+0x1d7/0x1f0
[ 423.030187] ? retint_user+0x8/0x18
[ 423.033704] ? trace_hardirqs_off+0x9d/0x230
[ 423.038005] ? trace_hardirqs_on_caller+0x230/0x230
[ 423.042919] ? do_raw_spin_trylock+0x180/0x180
[ 423.047396] ? do_raw_spin_lock+0xf0/0x1f0
[ 423.051524] ? rwlock_bug.part.0+0x60/0x60
[ 423.055656] ? __dequeue_signal+0x2bd/0x370
[ 423.059871] ? _raw_spin_unlock_irqrestore+0x34/0x50
[ 423.064873] ? __debug_check_no_obj_freed+0x204/0x330
[ 423.069963] ? debug_object_free+0x10/0x10
[ 423.074092] ? trace_hardirqs_on_caller+0x9f/0x230
[ 423.078920] ? check_stack_object+0x22/0x60
[ 423.083138] ? debug_lockdep_rcu_enabled+0x22/0x40
[ 423.087964] ? kmem_cache_free+0x22e/0x350
[ 423.092091] ? __dequeue_signal+0x2bd/0x370
[ 423.096309] ? debug_lockdep_rcu_enabled+0x22/0x40
[ 423.101137] ? get_signal+0x530/0xf30
[ 423.104828] ? __flush_itimer_signals+0x310/0x310
[ 423.109567] ? check_flags.part.18+0x220/0x220
[ 423.114045] ? recalc_sigpending+0x6e/0x110
[ 423.118258] ? __sigqueue_alloc+0x4e0/0x4e0
[ 423.122470] ? lockdep_hardirqs_on+0x11/0x290
[ 423.126861] do_group_exit+0xc1/0x1d0
[ 423.130552] ? __x64_sys_exit+0x30/0x30
[ 423.134416] get_signal+0x4a6/0xf30
[ 423.137933] ? ptrace_notify+0xb0/0xb0
[ 423.141708] ? force_sig_fault+0xb3/0xf0
[ 423.145659] ? force_sigsegv+0x90/0x90
[ 423.149440] ? set_signal_archinfo+0x6f/0xa0
[ 423.153738] ? __do_page_fault+0x6b1/0x6d0
[ 423.157863] ? mm_fault_error+0x140/0x140
[ 423.161903] do_signal+0x93/0x9d0
[ 423.165244] ? lockdep_hardirqs_on+0x11/0x290
[ 423.169632] ? trace_hardirqs_on+0x9d/0x230
[ 423.173846] ? ftrace_destroy_function_files+0x50/0x50
[ 423.179022] ? do_raw_spin_trylock+0x180/0x180
[ 423.183498] ? setup_sigcontext+0x260/0x260
[ 423.187712] ? do_page_fault+0x119/0x53c
[ 423.191663] ? lockdep_hardirqs_on+0x11/0x290
[ 423.196050] ? trace_hardirqs_on+0x9d/0x230
[ 423.200264] ? ftrace_destroy_function_files+0x50/0x50
[ 423.205441] ? task_work_run+0x118/0x1a0
[ 423.209393] ? mark_held_locks+0x23/0xb0
[ 423.213345] ? trace_hardirqs_on_thunk+0x1a/0x1c
[ 423.217999] ? retint_user+0x18/0x18
[ 423.221602] exit_to_usermode_loop+0x130/0x170
[ 423.226081] ? lockdep_sys_exit_thunk+0x29/0x29
[ 423.230647] prepare_exit_to_usermode+0x1d7/0x1f0
[ 423.235387] ? syscall_slow_exit_work+0x380/0x380
[ 423.240127] ? trace_hardirqs_off_thunk+0x1a/0x1c
[ 423.244866] ? page_fault+0x5/0x20
[ 423.248294] retint_user+0x8/0x18
[ 423.251637] RIP: 0033:0x40f910
[ 423.254719] Code: Bad RIP value.
[ 423.257970] RSP: 002b:00007f105ff5cec0 EFLAGS: 00010206
[ 423.263230] RAX: 0000000000001000 RBX: 00000000c0000000 RCX: 00007f5dd40bd497
[ 423.270413] RDX: 0000000013ff8000 RSI: 00000000c0000000 RDI: 0000000000000000
[ 423.277594] RBP: 00007f0edef5c000 R08: 00000000ffffffff R09: 0000000000000000
[ 423.284777] R10: 0000000000000022 R11: 0000000000000246 R12: 0000000000000001
[ 423.291958] R13: 00007ffe090ed89f R14: 0000000000000000 R15: 00007f105ff5cfc0
[ 423.299141] Modules linked in: af_packet nls_iso8859_1 nls_cp437 vfat fat ses
enclosure efivars ip_tables x_tables xfs libcrc32c crc32c_generic crypto_hash
sd_mod smartpqi tg3 scsi_transport_sas mlx5_core libphy firmware_class dm_mirror
dm_region_hash dm_log dm_mod efivarfs