[PATCH 2/3] Fix numa scan information update to be triggered by memory event

From: KAMEZAWA Hiroyuki
Date: Tue Jun 28 2011 - 04:09:25 EST


commit 889976 adds an numa node round-robin for memcg. But the information
is updated once per 10sec.

This patch changes the update trigger from jiffies to memcg's event count.
After this patch, numa scan information will be updated when we see
1024 events of pagein/pageout under a memcg.

Signed-off-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@xxxxxxxxxxxxxx>

Changelog:
- simplified
- removed mutex
- removed 3% check. To use heuristics, we cannot avoid magic value.
So, removed heuristics.
---
mm/memcontrol.c | 29 +++++++++++++++++++++++++----
1 files changed, 25 insertions(+), 4 deletions(-)

diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index c624312..3e7d5e6 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -108,10 +108,12 @@ enum mem_cgroup_events_index {
enum mem_cgroup_events_target {
MEM_CGROUP_TARGET_THRESH,
MEM_CGROUP_TARGET_SOFTLIMIT,
+ MEM_CGROUP_TARGET_NUMAINFO,
MEM_CGROUP_NTARGETS,
};
#define THRESHOLDS_EVENTS_TARGET (128)
#define SOFTLIMIT_EVENTS_TARGET (1024)
+#define NUMAINFO_EVENTS_TARGET (1024)

struct mem_cgroup_stat_cpu {
long count[MEM_CGROUP_STAT_NSTATS];
@@ -237,7 +239,8 @@ struct mem_cgroup {
int last_scanned_node;
#if MAX_NUMNODES > 1
nodemask_t scan_nodes;
- unsigned long next_scan_node_update;
+ atomic_t numainfo_events;
+ atomic_t numainfo_updating;
#endif
/*
* Should the accounting and control be hierarchical, per subtree?
@@ -680,6 +683,9 @@ static void __mem_cgroup_target_update(struct mem_cgroup *mem, int target)
case MEM_CGROUP_TARGET_SOFTLIMIT:
next = val + SOFTLIMIT_EVENTS_TARGET;
break;
+ case MEM_CGROUP_TARGET_NUMAINFO:
+ next = val + NUMAINFO_EVENTS_TARGET;
+ break;
default:
return;
}
@@ -703,6 +709,14 @@ static void memcg_check_events(struct mem_cgroup *mem, struct page *page)
__mem_cgroup_target_update(mem,
MEM_CGROUP_TARGET_SOFTLIMIT);
}
+#if MAX_NUMNODES > 1
+ if (unlikely(__memcg_event_check(mem,
+ MEM_CGROUP_TARGET_NUMAINFO))) {
+ atomic_inc(&mem->numainfo_events);
+ __mem_cgroup_target_update(mem,
+ MEM_CGROUP_TARGET_NUMAINFO);
+ }
+#endif
}
}

@@ -1582,11 +1596,15 @@ static bool test_mem_cgroup_node_reclaimable(struct mem_cgroup *mem,
static void mem_cgroup_may_update_nodemask(struct mem_cgroup *mem)
{
int nid;
-
- if (time_after(mem->next_scan_node_update, jiffies))
+ /*
+ * numainfo_events > 0 means there was at least NUMAINFO_EVENTS_TARGET
+ * pagein/pageout changes since the last update.
+ */
+ if (!atomic_read(&mem->numainfo_events))
+ return;
+ if (atomic_inc_return(&mem->numainfo_updating) > 1)
return;

- mem->next_scan_node_update = jiffies + 10*HZ;
/* make a nodemask where this memcg uses memory from */
mem->scan_nodes = node_states[N_HIGH_MEMORY];

@@ -1595,6 +1613,9 @@ static void mem_cgroup_may_update_nodemask(struct mem_cgroup *mem)
if (!test_mem_cgroup_node_reclaimable(mem, nid, false))
node_clear(nid, mem->scan_nodes);
}
+
+ atomic_set(&mem->numainfo_events, 0);
+ atomic_set(&mem->numainfo_updating, 0);
}

/*
--
1.7.4.1


--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/