[RFC PATCH 1/4] remove p->numa_migrate_deferred

From: riel
Date: Tue Nov 26 2013 - 17:19:07 EST

Next message: Manuel Krause: "3.12.x looses serial mouse over hibernate + resume"
Previous message: riel: "[RFC PATCH 2/4] track from which nodes NUMA faults are triggered"
In reply to: riel: "[RFC PATCH 2/4] track from which nodes NUMA faults are triggered"
Messages sorted by: [ date ] [ thread ] [ subject ] [ author ]

From: Rik van Riel <riel@xxxxxxxxxx>

Excessive migration of pages can hurt the performance of workloads
that span multiple NUMA nodes. However, it turns out that the
p->numa_migrate_deferred knob is a really big hammer, which does
reduce migration rates, but does not actually help performance.

It is time to rip it out, and replace it with something smarter.

Signed-off-by: Rik van Riel <riel@xxxxxxxxxx>
---
include/linux/sched.h | 1 -
kernel/sched/fair.c | 8 --------
kernel/sysctl.c | 7 -------
mm/mempolicy.c | 45 ---------------------------------------------
4 files changed, 61 deletions(-)

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 42f2baf..9e4cb598 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1345,7 +1345,6 @@ struct task_struct {
unsigned int numa_scan_period;
unsigned int numa_scan_period_max;
int numa_preferred_nid;
- int numa_migrate_deferred;
unsigned long numa_migrate_retry;
u64 node_stamp; /* migration stamp */
struct callback_head numa_work;
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 7f9b376..410858e 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -794,14 +794,6 @@ unsigned int sysctl_numa_balancing_scan_size = 256;
/* Scan @scan_size MB every @scan_period after an initial @scan_delay in ms */
unsigned int sysctl_numa_balancing_scan_delay = 1000;

-/*
- * After skipping a page migration on a shared page, skip N more numa page
- * migrations unconditionally. This reduces the number of NUMA migrations
- * in shared memory workloads, and has the effect of pulling tasks towards
- * where their memory lives, over pulling the memory towards the task.
- */
-unsigned int sysctl_numa_balancing_migrate_deferred = 16;
-
static unsigned int task_nr_scan_windows(struct task_struct *p)
{
unsigned long rss = 0;
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 14c4f51..821e3f1 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -392,13 +392,6 @@ static struct ctl_table kern_table[] = {
.mode = 0644,
.proc_handler = proc_dointvec,
},
- {
- .procname = "numa_balancing_migrate_deferred",
- .data = &sysctl_numa_balancing_migrate_deferred,
- .maxlen = sizeof(unsigned int),
- .mode = 0644,
- .proc_handler = proc_dointvec,
- },
#endif /* CONFIG_NUMA_BALANCING */
#endif /* CONFIG_SCHED_DEBUG */
{
diff --git a/mm/mempolicy.c b/mm/mempolicy.c
index 9a2f6dd..0522aa2 100644
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -2247,35 +2247,6 @@ static void sp_free(struct sp_node *n)
kmem_cache_free(sn_cache, n);
}

-#ifdef CONFIG_NUMA_BALANCING
-static bool numa_migrate_deferred(struct task_struct *p, int last_cpupid)
-{
- /* Never defer a private fault */
- if (cpupid_match_pid(p, last_cpupid))
- return false;
-
- if (p->numa_migrate_deferred) {
- p->numa_migrate_deferred--;
- return true;
- }
- return false;
-}
-
-static inline void defer_numa_migrate(struct task_struct *p)
-{
- p->numa_migrate_deferred = sysctl_numa_balancing_migrate_deferred;
-}
-#else
-static inline bool numa_migrate_deferred(struct task_struct *p, int last_cpupid)
-{
- return false;
-}
-
-static inline void defer_numa_migrate(struct task_struct *p)
-{
-}
-#endif /* CONFIG_NUMA_BALANCING */
-
/**
* mpol_misplaced - check whether current page node is valid in policy
*
@@ -2378,24 +2349,8 @@ int mpol_misplaced(struct page *page, struct vm_area_struct *vma, unsigned long
*/
last_cpupid = page_cpupid_xchg_last(page, this_cpupid);
if (!cpupid_pid_unset(last_cpupid) && cpupid_to_nid(last_cpupid) != thisnid) {
-
- /* See sysctl_numa_balancing_migrate_deferred comment */
- if (!cpupid_match_pid(current, last_cpupid))
- defer_numa_migrate(current);
-
goto out;
}
-
- /*
- * The quadratic filter above reduces extraneous migration
- * of shared pages somewhat. This code reduces it even more,
- * reducing the overhead of page migrations of shared pages.
- * This makes workloads with shared pages rely more on
- * "move task near its memory", and less on "move memory
- * towards its task", which is exactly what we want.
- */
- if (numa_migrate_deferred(current, last_cpupid))
- goto out;
}

if (curnid != polnid)
--
1.8.3.1

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/

Next message: Manuel Krause: "3.12.x looses serial mouse over hibernate + resume"
Previous message: riel: "[RFC PATCH 2/4] track from which nodes NUMA faults are triggered"
In reply to: riel: "[RFC PATCH 2/4] track from which nodes NUMA faults are triggered"
Messages sorted by: [ date ] [ thread ] [ subject ] [ author ]