[patch] mm: memcontrol: do not filter reclaimable nodes in NUMA round-robin

From: Johannes Weiner
Date: Tue Oct 21 2014 - 09:53:54 EST


The round-robin node reclaim currently tries to include only nodes
that have memory of the memcg in question, which is quite elaborate.

Just use plain round-robin over the nodes that are allowed by the
task's cpuset, which are the most likely to contain that memcg's
memory. But even if zones without memcg memory are encountered,
direct reclaim will skip over them without too much hassle.

Signed-off-by: Johannes Weiner <hannes@xxxxxxxxxxx>
---
mm/memcontrol.c | 97 +++------------------------------------------------------
1 file changed, 5 insertions(+), 92 deletions(-)

diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index d353d9e1fdca..293db8234179 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -54,6 +54,7 @@
#include <linux/page_cgroup.h>
#include <linux/cpu.h>
#include <linux/oom.h>
+#include <linux/cpuset.h>
#include <linux/lockdep.h>
#include <linux/file.h>
#include "internal.h"
@@ -129,12 +130,10 @@ static const char * const mem_cgroup_lru_names[] = {
enum mem_cgroup_events_target {
MEM_CGROUP_TARGET_THRESH,
MEM_CGROUP_TARGET_SOFTLIMIT,
- MEM_CGROUP_TARGET_NUMAINFO,
MEM_CGROUP_NTARGETS,
};
#define THRESHOLDS_EVENTS_TARGET 128
#define SOFTLIMIT_EVENTS_TARGET 1024
-#define NUMAINFO_EVENTS_TARGET 1024

struct mem_cgroup_stat_cpu {
long count[MEM_CGROUP_STAT_NSTATS];
@@ -352,11 +351,6 @@ struct mem_cgroup {
#endif

int last_scanned_node;
-#if MAX_NUMNODES > 1
- nodemask_t scan_nodes;
- atomic_t numainfo_events;
- atomic_t numainfo_updating;
-#endif

/* List of events which userspace want to receive */
struct list_head event_list;
@@ -965,9 +959,6 @@ static bool mem_cgroup_event_ratelimit(struct mem_cgroup *memcg,
case MEM_CGROUP_TARGET_SOFTLIMIT:
next = val + SOFTLIMIT_EVENTS_TARGET;
break;
- case MEM_CGROUP_TARGET_NUMAINFO:
- next = val + NUMAINFO_EVENTS_TARGET;
- break;
default:
break;
}
@@ -986,22 +977,10 @@ static void memcg_check_events(struct mem_cgroup *memcg, struct page *page)
/* threshold event is triggered in finer grain than soft limit */
if (unlikely(mem_cgroup_event_ratelimit(memcg,
MEM_CGROUP_TARGET_THRESH))) {
- bool do_softlimit;
- bool do_numainfo __maybe_unused;
-
- do_softlimit = mem_cgroup_event_ratelimit(memcg,
- MEM_CGROUP_TARGET_SOFTLIMIT);
-#if MAX_NUMNODES > 1
- do_numainfo = mem_cgroup_event_ratelimit(memcg,
- MEM_CGROUP_TARGET_NUMAINFO);
-#endif
mem_cgroup_threshold(memcg);
- if (unlikely(do_softlimit))
+ if (mem_cgroup_event_ratelimit(memcg,
+ MEM_CGROUP_TARGET_SOFTLIMIT))
mem_cgroup_update_tree(memcg, page);
-#if MAX_NUMNODES > 1
- if (unlikely(do_numainfo))
- atomic_inc(&memcg->numainfo_events);
-#endif
}
}

@@ -1708,61 +1687,7 @@ static void mem_cgroup_out_of_memory(struct mem_cgroup *memcg, gfp_t gfp_mask,
NULL, "Memory cgroup out of memory");
}

-/**
- * test_mem_cgroup_node_reclaimable
- * @memcg: the target memcg
- * @nid: the node ID to be checked.
- * @noswap : specify true here if the user wants flle only information.
- *
- * This function returns whether the specified memcg contains any
- * reclaimable pages on a node. Returns true if there are any reclaimable
- * pages in the node.
- */
-static bool test_mem_cgroup_node_reclaimable(struct mem_cgroup *memcg,
- int nid, bool noswap)
-{
- if (mem_cgroup_node_nr_lru_pages(memcg, nid, LRU_ALL_FILE))
- return true;
- if (noswap || !total_swap_pages)
- return false;
- if (mem_cgroup_node_nr_lru_pages(memcg, nid, LRU_ALL_ANON))
- return true;
- return false;
-
-}
#if MAX_NUMNODES > 1
-
-/*
- * Always updating the nodemask is not very good - even if we have an empty
- * list or the wrong list here, we can start from some node and traverse all
- * nodes based on the zonelist. So update the list loosely once per 10 secs.
- *
- */
-static void mem_cgroup_may_update_nodemask(struct mem_cgroup *memcg)
-{
- int nid;
- /*
- * numainfo_events > 0 means there was at least NUMAINFO_EVENTS_TARGET
- * pagein/pageout changes since the last update.
- */
- if (!atomic_read(&memcg->numainfo_events))
- return;
- if (atomic_inc_return(&memcg->numainfo_updating) > 1)
- return;
-
- /* make a nodemask where this memcg uses memory from */
- memcg->scan_nodes = node_states[N_MEMORY];
-
- for_each_node_mask(nid, node_states[N_MEMORY]) {
-
- if (!test_mem_cgroup_node_reclaimable(memcg, nid, false))
- node_clear(nid, memcg->scan_nodes);
- }
-
- atomic_set(&memcg->numainfo_events, 0);
- atomic_set(&memcg->numainfo_updating, 0);
-}
-
/*
* Selecting a node where we start reclaim from. Because what we need is just
* reducing usage counter, start from anywhere is O,K. Considering
@@ -1779,21 +1704,9 @@ int mem_cgroup_select_victim_node(struct mem_cgroup *memcg)
{
int node;

- mem_cgroup_may_update_nodemask(memcg);
- node = memcg->last_scanned_node;
-
- node = next_node(node, memcg->scan_nodes);
+ node = next_node(memcg->last_scanned_node, cpuset_current_mems_allowed);
if (node == MAX_NUMNODES)
- node = first_node(memcg->scan_nodes);
- /*
- * We call this when we hit limit, not when pages are added to LRU.
- * No LRU may hold pages because all pages are UNEVICTABLE or
- * memcg is too small and all pages are not on LRU. In that case,
- * we use curret node.
- */
- if (unlikely(node == MAX_NUMNODES))
- node = numa_node_id();
-
+ node = first_node(cpuset_current_mems_allowed);
memcg->last_scanned_node = node;
return node;
}
--
2.1.2
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/