A "NULL pointer dereference " problem in pick_next_task_fair in linux 3.10.0 based

From: åå
Date: Tue May 31 2016 - 22:24:46 EST


hi everybody:

I have encountered a "NULL pointer dereference" problem in
pick_next_task_fair() in linux 3.10.0 based

static struct task_struct *pick_next_task_fair(struct rq *rq)
{
struct task_struct *p;
struct cfs_rq *cfs_rq = &rq->cfs;
struct sched_entity *se;

if (!cfs_rq->nr_running)
return NULL;

do {
se = pick_next_entity(cfs_rq);
set_next_entity(cfs_rq, se);
cfs_rq = group_cfs_rq(se);
} while (cfs_rq);

p = task_of(se);
if (hrtick_enabled(rq))
hrtick_start_fair(rq, p);

return p;
}

In pick_next_entity():

static struct sched_entity *pick_next_entity(struct cfs_rq *cfs_rq)
{
struct sched_entity *se = __pick_first_entity(cfs_rq);
struct sched_entity *left = se;

/*
* Avoid running the skip buddy, if running something else can
* be done without getting too unfair.
*/
if (cfs_rq->skip == se) {
struct sched_entity *second = __pick_next_entity(se);
if (second && wakeup_preempt_entity(second, left) < 1)
se = second;
}

/*
* Prefer last buddy, try to return the CPU to a preempted task.
*/
if (cfs_rq->last && wakeup_preempt_entity(cfs_rq->last, left) < 1)
se = cfs_rq->last;

/*
* Someone really wants this to run. If it's not unfair, run it.
*/
if (cfs_rq->next && wakeup_preempt_entity(cfs_rq->next, left) < 1)
se = cfs_rq->next;

clear_buddies(cfs_rq, se);

return se;
}


in pick_next_entity

1. struct sched_entity *se = __pick_first_entity(cfs_rq);
This se may be get NULL.

2. Then if cfs_rq->skip is also null, cfs_rq->skip == se.

3. struct sched_entity *second = __pick_next_entity(se);
This code will lead to the "NULL pointer dereference" because se
is NULL in __pick_next_entity -> rb_next()

The oops is as follow:

[795797.511960] BUG: unable to handle kernel NULL pointer dereference
at 0000000000000010


[795797.622950] task: ffff883f24924560 ti: ffff883f2495c000 task.ti:
ffff883f2495c000

[795797.630541] RIP: 0010:[] [] rb_next+0x1/0x50

[795797.638153] RSP: 0018:ffff883f2495fdc0 EFLAGS: 00010046

[795797.643575] RAX: 0000000000000000 RBX: 0000000000000000 RCX:
0000000000000000

[795797.650824] RDX: 0000000000000001 RSI: ffff883f7f5f4868 RDI:
0000000000000010

[795797.658068] RBP: ffff883f2495fe08 R08: 0000000000000000 R09:
0000000000000000

[795797.666476] R10: 0000000000000000 R11: ffff883f249f5400 R12:
ffff883d82207200

[795797.674821] R13: 0000000000000000 R14: 0000000000000000 R15:
0000000000000000

[795797.683160] FS: 0000000000000000(0000) GS:ffff883f7f5e0000(0000)
knlGS:0000000000000000

[795797.692460] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033

[795797.699395] CR2: 0000000000000010 CR3: 0000003f1de58000 CR4:
00000000003407e0

[795797.707703] DR0: 0000000000000000 DR1: 0000000000000000 DR2:
0000000000000000

[795797.715992] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7:
0000000000000400

[795797.724267] Stack:

[795797.727416] ffff883f2495fe08 ffffffff810b87f9 ffff883f2495fe08
ffff883f7f5f47c0

[795797.736040] ffff883f24924b40 ffff883f7f5f47c0 000000000000003f
ffff883f24924560

[795797.744658] 0000000000000000 ffff883f2495fe68 ffffffff816324ea
ffff883f24924560

[795797.753255] Call Trace:

[795797.756818] [] ? pick_next_task_fair+0x129/0x1d0

[795797.764207] [] __schedule+0x12a/0x910

[795797.770622] [] schedule+0x29/0x70

[795797.776683] [] smpboot_thread_fn+0xd3/0x1a0

[795797.783599] [] ? schedule+0x29/0x70

[795797.789807] [] ? lg_double_unlock+0x90/0x90

[795797.796706] [] kthread+0xcf/0xe0

[795797.802632] [] ? kthread_create_on_node+0x140/0x140

[795797.810207] [] ret_from_fork+0x58/0x90

[795797.816646] [] ? kthread_create_on_node+0x140/0x140


I find there are something different in upstream (in pick_next_entity )

static struct sched_entity *

pick_next_entity(struct cfs_rq *cfs_rq, struct sched_entity *curr)

{
struct sched_entity *left = __pick_first_entity(cfs_rq);
struct sched_entity *se;
/*
* If curr is set we have to see if its left of the leftmost entity
* still in the tree, provided there was anything in the tree at all.
*/
if (!left || (curr && entity_before(curr, left)))
left = curr;
se = left; /* ideally we run the leftmost entity */
/*
* Avoid running the skip buddy, if running something else can
* be done without getting too unfair.
*/
if (cfs_rq->skip == se) {
struct sched_entity *second;
if (se == curr) {
second = __pick_first_entity(cfs_rq);
} else {
second = __pick_next_entity(se);
if (!second || (curr && entity_before(curr, second)))
second = curr;
}
if (second && wakeup_preempt_entity(second, left) < 1)
se = second;
}
/*
* Prefer last buddy, try to return the CPU to a preempted task.
*/
if (cfs_rq->last && wakeup_preempt_entity(cfs_rq->last, left) < 1)
se = cfs_rq->last;
/*
* Someone really wants this to run. If it's not unfair, run it.
*/
if (cfs_rq->next && wakeup_preempt_entity(cfs_rq->next, left) < 1)
se = cfs_rq->next;
clear_buddies(cfs_rq, se);
return se;

}

if the se,curr,cfs_rq->skip are all NULL, the wakeup_preempt_entity()
or clear_buddies will be also occured the "NULL pointer dereference"
problem.

I think the following patch can work in the case:

--- a/kernel/sched/fair.c

+++ b/kernel/sched/fair.c

@@ -4639,10 +4639,9 @@
struct cfs_rq *cfs_rq = &rq->cfs;
struct sched_entity *se;

- if (!cfs_rq->nr_running)
- return NULL;
-
do {
+ if (!cfs_rq->nr_running)
+ return NULL;
se = pick_next_entity(cfs_rq);
set_next_entity(cfs_rq, se);
cfs_rq = group_cfs_rq(se);



Do I miss some rules or this is a bug?