[patch] mm: memcontrol: lockless page counters fix 2

From: Johannes Weiner
Date: Tue Sep 23 2014 - 11:48:48 EST


- page_counter_sub -> page_counter_cancel [johannes]
- document page counter API [vladimir]
- WARN_ON_ONCE and revert on counter underflow [kame]
- convert page_counter_try_charge() from CAS to FAA [vladimir]
---
include/linux/memcontrol.h | 2 +-
mm/hugetlb_cgroup.c | 2 +-
mm/memcontrol.c | 100 +++++++++++++++++++++++++++++++++------------
3 files changed, 76 insertions(+), 28 deletions(-)

diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index a8b939376a5d..1bda77dff591 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -84,7 +84,7 @@ static inline unsigned long page_counter_read(struct page_counter *counter)
return atomic_long_read(&counter->count);
}

-int page_counter_sub(struct page_counter *counter, unsigned long nr_pages);
+int page_counter_cancel(struct page_counter *counter, unsigned long nr_pages);
void page_counter_charge(struct page_counter *counter, unsigned long nr_pages);
int page_counter_try_charge(struct page_counter *counter,
unsigned long nr_pages,
diff --git a/mm/hugetlb_cgroup.c b/mm/hugetlb_cgroup.c
index abd1e8dc7b46..aae47a24ec0e 100644
--- a/mm/hugetlb_cgroup.c
+++ b/mm/hugetlb_cgroup.c
@@ -131,7 +131,7 @@ static void hugetlb_cgroup_move_parent(int idx, struct hugetlb_cgroup *h_cg,
}
counter = &h_cg->hugepage[idx];
/* Take the pages off the local counter */
- page_counter_sub(counter, nr_pages);
+ page_counter_cancel(counter, nr_pages);

set_hugetlb_cgroup(page, parent);
out:
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index ec2210965686..70839678d805 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -65,18 +65,32 @@

#include <trace/events/vmscan.h>

-int page_counter_sub(struct page_counter *counter, unsigned long nr_pages)
+/**
+ * page_counter_cancel - take pages out of the local counter
+ * @counter: counter
+ * @nr_pages: number of pages to cancel
+ *
+ * Returns whether there are remaining pages in the counter.
+ */
+int page_counter_cancel(struct page_counter *counter, unsigned long nr_pages)
{
long new;

new = atomic_long_sub_return(nr_pages, &counter->count);

- if (WARN_ON(unlikely(new < 0)))
- atomic_long_set(&counter->count, 0);
+ if (WARN_ON_ONCE(unlikely(new < 0)))
+ atomic_long_add(nr_pages, &counter->count);

return new > 0;
}

+/**
+ * page_counter_charge - hierarchically charge pages
+ * @counter: counter
+ * @nr_pages: number of pages to charge
+ *
+ * NOTE: This may exceed the configured counter limits.
+ */
void page_counter_charge(struct page_counter *counter, unsigned long nr_pages)
{
struct page_counter *c;
@@ -91,6 +105,15 @@ void page_counter_charge(struct page_counter *counter, unsigned long nr_pages)
}
}

+/**
+ * page_counter_try_charge - try to hierarchically charge pages
+ * @counter: counter
+ * @nr_pages: number of pages to charge
+ * @fail: points first counter to hit its limit, if any
+ *
+ * Returns 0 on success, or -ENOMEM and @fail if the counter or one of
+ * its ancestors has hit its limit.
+ */
int page_counter_try_charge(struct page_counter *counter,
unsigned long nr_pages,
struct page_counter **fail)
@@ -98,37 +121,44 @@ int page_counter_try_charge(struct page_counter *counter,
struct page_counter *c;

for (c = counter; c; c = c->parent) {
- for (;;) {
- long count;
- long new;
-
- count = atomic_long_read(&c->count);
-
- new = count + nr_pages;
- if (new > c->limit) {
- c->failcnt++;
- *fail = c;
- goto failed;
- }
-
- if (atomic_long_cmpxchg(&c->count, count, new) != count)
- continue;
-
- if (new > c->watermark)
- c->watermark = new;
+ long new;

- break;
+ new = atomic_long_add_return(nr_pages, &c->count);
+ if (new > c->limit) {
+ atomic_long_sub(nr_pages, &c->count);
+ /*
+ * This is racy, but the failcnt is only a
+ * ballpark metric anyway.
+ */
+ c->failcnt++;
+ *fail = c;
+ goto failed;
}
+ /*
+ * This is racy, but with the per-cpu caches on top
+ * this is a ballpark metric as well, and with lazy
+ * cache reclaim, the majority of workloads peg the
+ * watermark to the group limit soon after launch.
+ */
+ if (new > c->watermark)
+ c->watermark = new;
}
return 0;

failed:
for (c = counter; c != *fail; c = c->parent)
- page_counter_sub(c, nr_pages);
+ page_counter_cancel(c, nr_pages);

return -ENOMEM;
}

+/**
+ * page_counter_uncharge - hierarchically uncharge pages
+ * @counter: counter
+ * @nr_pages: number of pages to uncharge
+ *
+ * Returns whether there are remaining charges in @counter.
+ */
int page_counter_uncharge(struct page_counter *counter, unsigned long nr_pages)
{
struct page_counter *c;
@@ -137,7 +167,7 @@ int page_counter_uncharge(struct page_counter *counter, unsigned long nr_pages)
for (c = counter; c; c = c->parent) {
int remainder;

- remainder = page_counter_sub(c, nr_pages);
+ remainder = page_counter_cancel(c, nr_pages);
if (c == counter && !remainder)
ret = 0;
}
@@ -145,6 +175,16 @@ int page_counter_uncharge(struct page_counter *counter, unsigned long nr_pages)
return ret;
}

+/**
+ * page_counter_limit - limit the number of pages allowed
+ * @counter: counter
+ * @limit: limit to set
+ *
+ * Returns 0 on success, -EBUSY if the current number of pages on the
+ * counter already exceeds the specified limit.
+ *
+ * The caller must serialize invocations on the same counter.
+ */
int page_counter_limit(struct page_counter *counter, unsigned long limit)
{
for (;;) {
@@ -169,6 +209,14 @@ int page_counter_limit(struct page_counter *counter, unsigned long limit)
}
}

+/**
+ * page_counter_memparse - memparse() for page counter limits
+ * @buf: string to parse
+ * @nr_pages: returns the result in number of pages
+ *
+ * Returns -EINVAL, or 0 and @nr_pages on success. @nr_pages will be
+ * limited to %PAGE_COUNTER_MAX.
+ */
int page_counter_memparse(const char *buf, unsigned long *nr_pages)
{
char unlimited[] = "-1";
@@ -3572,9 +3620,9 @@ static int mem_cgroup_move_parent(struct page *page,
pc, child, parent);
if (!ret) {
/* Take charge off the local counters */
- page_counter_sub(&child->memory, nr_pages);
+ page_counter_cancel(&child->memory, nr_pages);
if (do_swap_account)
- page_counter_sub(&child->memsw, nr_pages);
+ page_counter_cancel(&child->memsw, nr_pages);
}

if (nr_pages > 1)
--
2.1.0


--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/