[PATCH RT v2] kernel/res_counter.c: Change lock of struct res_counter to raw_spinlock_t

From: Thavatchai Makphaibulchoke
Date: Fri Jan 30 2015 - 14:06:47 EST


Since memory cgroups can be called from a page fault handler as shown
by the stack dump here,

[12679.513255] BUG: scheduling while atomic: ssh/10621/0x00000002
[12679.513305] Preemption disabled at:[<ffffffff811a20f7>] mem_cgroup_charge_common+0x37/0x60
[12679.513305]
[12679.513322] Call Trace:
[12679.513331] [<ffffffff81512f62>] dump_stack+0x4f/0x7c
[12679.513333] [<ffffffff8150f4f1>] __schedule_bug+0x9f/0xad
[12679.513338] [<ffffffff815155f3>] __schedule+0x653/0x720
[12679.513340] [<ffffffff815180ce>] ? _raw_spin_unlock_irqrestore+0x2e/0x70
[12679.513343] [<ffffffff81515784>] schedule+0x34/0xa0
[12679.513345] [<ffffffff81516fdb>] rt_spin_lock_slowlock+0x10b/0x250
[12679.513348] [<ffffffff815183a5>] rt_spin_lock+0x35/0x40
[12679.513352] [<ffffffff810ec1d9>] res_counter_uncharge_until+0x69/0xb0
[12679.513354] [<ffffffff810ec233>] res_counter_uncharge+0x13/0x20
[12679.513358] [<ffffffff8119c0be>] drain_stock.isra.38+0x5e/0x90
[12679.513360] [<ffffffff811a16a2>] __mem_cgroup_try_charge+0x3f2/0x8a0
[12679.513363] [<ffffffff811a20f7>] mem_cgroup_charge_common+0x37/0x60
[12679.513365] [<ffffffff811a3b06>] mem_cgroup_newpage_charge+0x26/0x30
[12679.513369] [<ffffffff8116c8d2>] handle_mm_fault+0x9b2/0xdb0
[12679.513374] [<ffffffff81400474>] ? sock_aio_read.part.11+0x104/0x130
[12679.513379] [<ffffffff8151c072>] __do_page_fault+0x182/0x4f0
[12679.513381] [<ffffffff814004c1>] ? sock_aio_read+0x21/0x30
[12679.513385] [<ffffffff811ab25a>] ? do_sync_read+0x5a/0x90
[12679.513390] [<ffffffff8108c981>] ? get_parent_ip+0x11/0x50
[12679.513392] [<ffffffff8151c41e>] do_page_fault+0x3e/0x80
[12679.513395] [<ffffffff81518e68>] page_fault+0x28/0x30

the lock member of struct res_counter should be of type raw_spinlock_t,
not spinlock_t which can go to sleep.

Tested on a 2 node, 32 thread, plaform with cyclictest.

Kernel version 3.14.25 + patch-3.14.25-rt22

Signed-off-by: T Makphaibulchoke <tmac@xxxxxx>
---

Changed in v2:
- Fixed Signed-off-by tag.

include/linux/res_counter.h | 26 +++++++++++++-------------
kernel/res_counter.c | 18 +++++++++---------
2 files changed, 22 insertions(+), 22 deletions(-)

diff --git a/include/linux/res_counter.h b/include/linux/res_counter.h
index 201a697..61d94a4 100644
--- a/include/linux/res_counter.h
+++ b/include/linux/res_counter.h
@@ -47,7 +47,7 @@ struct res_counter {
* the lock to protect all of the above.
* the routines below consider this to be IRQ-safe
*/
- spinlock_t lock;
+ raw_spinlock_t lock;
/*
* Parent counter, used for hierarchial resource accounting
*/
@@ -148,12 +148,12 @@ static inline unsigned long long res_counter_margin(struct res_counter *cnt)
unsigned long long margin;
unsigned long flags;

- spin_lock_irqsave(&cnt->lock, flags);
+ raw_spin_lock_irqsave(&cnt->lock, flags);
if (cnt->limit > cnt->usage)
margin = cnt->limit - cnt->usage;
else
margin = 0;
- spin_unlock_irqrestore(&cnt->lock, flags);
+ raw_spin_unlock_irqrestore(&cnt->lock, flags);
return margin;
}

@@ -170,12 +170,12 @@ res_counter_soft_limit_excess(struct res_counter *cnt)
unsigned long long excess;
unsigned long flags;

- spin_lock_irqsave(&cnt->lock, flags);
+ raw_spin_lock_irqsave(&cnt->lock, flags);
if (cnt->usage <= cnt->soft_limit)
excess = 0;
else
excess = cnt->usage - cnt->soft_limit;
- spin_unlock_irqrestore(&cnt->lock, flags);
+ raw_spin_unlock_irqrestore(&cnt->lock, flags);
return excess;
}

@@ -183,18 +183,18 @@ static inline void res_counter_reset_max(struct res_counter *cnt)
{
unsigned long flags;

- spin_lock_irqsave(&cnt->lock, flags);
+ raw_spin_lock_irqsave(&cnt->lock, flags);
cnt->max_usage = cnt->usage;
- spin_unlock_irqrestore(&cnt->lock, flags);
+ raw_spin_unlock_irqrestore(&cnt->lock, flags);
}

static inline void res_counter_reset_failcnt(struct res_counter *cnt)
{
unsigned long flags;

- spin_lock_irqsave(&cnt->lock, flags);
+ raw_spin_lock_irqsave(&cnt->lock, flags);
cnt->failcnt = 0;
- spin_unlock_irqrestore(&cnt->lock, flags);
+ raw_spin_unlock_irqrestore(&cnt->lock, flags);
}

static inline int res_counter_set_limit(struct res_counter *cnt,
@@ -203,12 +203,12 @@ static inline int res_counter_set_limit(struct res_counter *cnt,
unsigned long flags;
int ret = -EBUSY;

- spin_lock_irqsave(&cnt->lock, flags);
+ raw_spin_lock_irqsave(&cnt->lock, flags);
if (cnt->usage <= limit) {
cnt->limit = limit;
ret = 0;
}
- spin_unlock_irqrestore(&cnt->lock, flags);
+ raw_spin_unlock_irqrestore(&cnt->lock, flags);
return ret;
}

@@ -218,9 +218,9 @@ res_counter_set_soft_limit(struct res_counter *cnt,
{
unsigned long flags;

- spin_lock_irqsave(&cnt->lock, flags);
+ raw_spin_lock_irqsave(&cnt->lock, flags);
cnt->soft_limit = soft_limit;
- spin_unlock_irqrestore(&cnt->lock, flags);
+ raw_spin_unlock_irqrestore(&cnt->lock, flags);
return 0;
}

diff --git a/kernel/res_counter.c b/kernel/res_counter.c
index 3fbcb0d..59a7a62 100644
--- a/kernel/res_counter.c
+++ b/kernel/res_counter.c
@@ -16,7 +16,7 @@

void res_counter_init(struct res_counter *counter, struct res_counter *parent)
{
- spin_lock_init(&counter->lock);
+ raw_spin_lock_init(&counter->lock);
counter->limit = RES_COUNTER_MAX;
counter->soft_limit = RES_COUNTER_MAX;
counter->parent = parent;
@@ -51,9 +51,9 @@ static int __res_counter_charge(struct res_counter *counter, unsigned long val,
*limit_fail_at = NULL;
local_irq_save_nort(flags);
for (c = counter; c != NULL; c = c->parent) {
- spin_lock(&c->lock);
+ raw_spin_lock(&c->lock);
r = res_counter_charge_locked(c, val, force);
- spin_unlock(&c->lock);
+ raw_spin_unlock(&c->lock);
if (r < 0 && !ret) {
ret = r;
*limit_fail_at = c;
@@ -64,9 +64,9 @@ static int __res_counter_charge(struct res_counter *counter, unsigned long val,

if (ret < 0 && !force) {
for (u = counter; u != c; u = u->parent) {
- spin_lock(&u->lock);
+ raw_spin_lock(&u->lock);
res_counter_uncharge_locked(u, val);
- spin_unlock(&u->lock);
+ raw_spin_unlock(&u->lock);
}
}
local_irq_restore_nort(flags);
@@ -106,11 +106,11 @@ u64 res_counter_uncharge_until(struct res_counter *counter,
local_irq_save_nort(flags);
for (c = counter; c != top; c = c->parent) {
u64 r;
- spin_lock(&c->lock);
+ raw_spin_lock(&c->lock);
r = res_counter_uncharge_locked(c, val);
if (c == counter)
ret = r;
- spin_unlock(&c->lock);
+ raw_spin_unlock(&c->lock);
}
local_irq_restore_nort(flags);
return ret;
@@ -164,9 +164,9 @@ u64 res_counter_read_u64(struct res_counter *counter, int member)
unsigned long flags;
u64 ret;

- spin_lock_irqsave(&counter->lock, flags);
+ raw_spin_lock_irqsave(&counter->lock, flags);
ret = *res_counter_member(counter, member);
- spin_unlock_irqrestore(&counter->lock, flags);
+ raw_spin_unlock_irqrestore(&counter->lock, flags);

return ret;
}
--
1.9.1

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/