[RFC][PATCH 5/5] memcg softlimit hooks to kswapd

From: KAMEZAWA Hiroyuki
Date: Wed Mar 11 2009 - 21:01:45 EST


This patch needs MORE investigation...

==
From: KAMEZAWA Hiroyuki <kamezawa.hiroyu@xxxxxxxxxxxxxx>

This patch adds hooks for memcg's softlimit to kswapd().

Softlimit handler is called...
- before generic shrink_zone() is called.
- # of pages to be scanned depends on priority.
- If not enough progress, selected memcg will be moved to UNUSED queue.
- at each call for balance_pgdat(), softlimit queue is rebalanced.

Changelog: v3 -> v4
- move "sc" as local variable

Signed-off-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@xxxxxxxxxxxxxx>
---
mm/vmscan.c | 52 ++++++++++++++++++++++++++++++++++++++++++++++++++++
1 file changed, 52 insertions(+)

Index: mmotm-2.6.29-Mar10/mm/vmscan.c
===================================================================
--- mmotm-2.6.29-Mar10.orig/mm/vmscan.c
+++ mmotm-2.6.29-Mar10/mm/vmscan.c
@@ -1733,6 +1733,49 @@ unsigned long try_to_free_mem_cgroup_pag
}
#endif

+static void shrink_zone_softlimit(struct zone *zone, int order, int priority,
+ int target, int end_zone)
+{
+ int scan = SWAP_CLUSTER_MAX;
+ int nid = zone->zone_pgdat->node_id;
+ int zid = zone_idx(zone);
+ struct mem_cgroup *mem;
+ struct scan_control sc = {
+ .gfp_mask = GFP_KERNEL,
+ .may_writepage = !laptop_mode,
+ .swap_cluster_max = SWAP_CLUSTER_MAX,
+ .may_unmap = 1,
+ .swappiness = vm_swappiness,
+ .order = order,
+ .mem_cgroup = NULL,
+ .isolate_pages = mem_cgroup_isolate_pages,
+ };
+
+ scan = target * 2;
+
+ sc.nr_scanned = 0;
+ sc.nr_reclaimed = 0;
+ while (scan > 0) {
+ if (zone_watermark_ok(zone, order, target, end_zone, 0))
+ break;
+ mem = mem_cgroup_schedule(nid, zid);
+ if (!mem)
+ return;
+ sc.mem_cgroup = mem;
+
+ sc.nr_reclaimed = 0;
+ shrink_zone(priority, zone, &sc);
+
+ if (sc.nr_reclaimed >= SWAP_CLUSTER_MAX/2)
+ mem_cgroup_schedule_end(nid, zid, mem, true);
+ else
+ mem_cgroup_schedule_end(nid, zid, mem, false);
+
+ scan -= sc.nr_scanned;
+ }
+
+ return;
+}
/*
* For kswapd, balance_pgdat() will work across all this node's zones until
* they are all at pages_high.
@@ -1776,6 +1819,8 @@ static unsigned long balance_pgdat(pg_da
*/
int temp_priority[MAX_NR_ZONES];

+ /* Refill softlimit queue */
+ mem_cgroup_reschedule_all(pgdat->node_id);
loop_again:
total_scanned = 0;
sc.nr_reclaimed = 0;
@@ -1856,6 +1901,13 @@ loop_again:
end_zone, 0))
all_zones_ok = 0;
temp_priority[i] = priority;
+
+ /*
+ * Try soft limit at first. This reclaims page
+ * with regard to user's hint.
+ */
+ shrink_zone_softlimit(zone, order, priority,
+ 8 * zone->pages_high, end_zone);
sc.nr_scanned = 0;
note_zone_scanning_priority(zone, priority);
/*

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/