[RFC PATCH 4/9] mm: convert zone lock from spinlock to rwlock

From: Aaron Lu
Date: Tue Sep 11 2018 - 01:36:46 EST


This patch converts zone lock from spinlock to rwlock and always
take the lock in write mode so there is no functionality change.

This is a preparation for free path to take the lock in read mode
to make free path work concurrently.

compact_trylock and compact_unlock_should_abort are taken from
Daniel Jordan's patch.

Signed-off-by: Aaron Lu <aaron.lu@xxxxxxxxx>
---
include/linux/mmzone.h | 2 +-
mm/compaction.c | 90 +++++++++++++++++++++---------------------
mm/hugetlb.c | 8 ++--
mm/page_alloc.c | 52 ++++++++++++------------
mm/page_isolation.c | 12 +++---
mm/vmstat.c | 4 +-
6 files changed, 85 insertions(+), 83 deletions(-)

diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index 1e22d96734e0..84cfa56e2d19 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -465,7 +465,7 @@ struct zone {
unsigned long flags;

/* Primarily protects free_area */
- spinlock_t lock;
+ rwlock_t lock;

/* Write-intensive fields used by compaction and vmstats. */
ZONE_PADDING(_pad2_)
diff --git a/mm/compaction.c b/mm/compaction.c
index faca45ebe62d..6ecf74d8e287 100644
--- a/mm/compaction.c
+++ b/mm/compaction.c
@@ -347,20 +347,20 @@ static inline void update_pageblock_skip(struct compact_control *cc,
* Returns true if the lock is held
* Returns false if the lock is not held and compaction should abort
*/
-static bool compact_trylock_irqsave(spinlock_t *lock, unsigned long *flags,
- struct compact_control *cc)
-{
- if (cc->mode == MIGRATE_ASYNC) {
- if (!spin_trylock_irqsave(lock, *flags)) {
- cc->contended = true;
- return false;
- }
- } else {
- spin_lock_irqsave(lock, *flags);
- }
-
- return true;
-}
+#define compact_trylock(lock, flags, cc, lockf, trylockf) \
+({ \
+ bool __ret = true; \
+ if ((cc)->mode == MIGRATE_ASYNC) { \
+ if (!trylockf((lock), *(flags))) { \
+ (cc)->contended = true; \
+ __ret = false; \
+ } \
+ } else { \
+ lockf((lock), *(flags)); \
+ } \
+ \
+ __ret; \
+})

/*
* Compaction requires the taking of some coarse locks that are potentially
@@ -377,29 +377,29 @@ static bool compact_trylock_irqsave(spinlock_t *lock, unsigned long *flags,
* Returns false when compaction can continue (sync compaction might have
* scheduled)
*/
-static bool compact_unlock_should_abort(spinlock_t *lock,
- unsigned long flags, bool *locked, struct compact_control *cc)
-{
- if (*locked) {
- spin_unlock_irqrestore(lock, flags);
- *locked = false;
- }
-
- if (fatal_signal_pending(current)) {
- cc->contended = true;
- return true;
- }
-
- if (need_resched()) {
- if (cc->mode == MIGRATE_ASYNC) {
- cc->contended = true;
- return true;
- }
- cond_resched();
- }
-
- return false;
-}
+#define compact_unlock_should_abort(lock, flags, locked, cc, unlockf) \
+({ \
+ bool __ret = false; \
+ \
+ if (*(locked)) { \
+ unlockf((lock), (flags)); \
+ *(locked) = false; \
+ } \
+ \
+ if (fatal_signal_pending(current)) { \
+ (cc)->contended = true; \
+ __ret = true; \
+ } else if (need_resched()) { \
+ if ((cc)->mode == MIGRATE_ASYNC) { \
+ (cc)->contended = true; \
+ __ret = true; \
+ } else { \
+ cond_resched(); \
+ } \
+ } \
+ \
+ __ret; \
+})

/*
* Aside from avoiding lock contention, compaction also periodically checks
@@ -457,7 +457,7 @@ static unsigned long isolate_freepages_block(struct compact_control *cc,
*/
if (!(blockpfn % SWAP_CLUSTER_MAX)
&& compact_unlock_should_abort(&cc->zone->lock, flags,
- &locked, cc))
+ &locked, cc, write_unlock_irqrestore))
break;

nr_scanned++;
@@ -502,8 +502,9 @@ static unsigned long isolate_freepages_block(struct compact_control *cc,
* spin on the lock and we acquire the lock as late as
* possible.
*/
- locked = compact_trylock_irqsave(&cc->zone->lock,
- &flags, cc);
+ locked = compact_trylock(&cc->zone->lock, &flags, cc,
+ write_lock_irqsave,
+ write_trylock_irqsave);
if (!locked)
break;

@@ -541,7 +542,7 @@ static unsigned long isolate_freepages_block(struct compact_control *cc,
}

if (locked)
- spin_unlock_irqrestore(&cc->zone->lock, flags);
+ write_unlock_irqrestore(&cc->zone->lock, flags);

/*
* There is a tiny chance that we have read bogus compound_order(),
@@ -758,7 +759,7 @@ isolate_migratepages_block(struct compact_control *cc, unsigned long low_pfn,
*/
if (!(low_pfn % SWAP_CLUSTER_MAX)
&& compact_unlock_should_abort(zone_lru_lock(zone), flags,
- &locked, cc))
+ &locked, cc, spin_unlock_irqrestore))
break;

if (!pfn_valid_within(low_pfn))
@@ -847,8 +848,9 @@ isolate_migratepages_block(struct compact_control *cc, unsigned long low_pfn,

/* If we already hold the lock, we can skip some rechecking */
if (!locked) {
- locked = compact_trylock_irqsave(zone_lru_lock(zone),
- &flags, cc);
+ locked = compact_trylock(zone_lru_lock(zone), &flags, cc,
+ spin_lock_irqsave,
+ spin_trylock_irqsave);
if (!locked)
break;

diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 3c21775f196b..18fde0139f4a 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -1113,7 +1113,7 @@ static struct page *alloc_gigantic_page(struct hstate *h, gfp_t gfp_mask,

zonelist = node_zonelist(nid, gfp_mask);
for_each_zone_zonelist_nodemask(zone, z, zonelist, gfp_zone(gfp_mask), nodemask) {
- spin_lock_irqsave(&zone->lock, flags);
+ write_lock_irqsave(&zone->lock, flags);

pfn = ALIGN(zone->zone_start_pfn, nr_pages);
while (zone_spans_last_pfn(zone, pfn, nr_pages)) {
@@ -1125,16 +1125,16 @@ static struct page *alloc_gigantic_page(struct hstate *h, gfp_t gfp_mask,
* spinning on this lock, it may win the race
* and cause alloc_contig_range() to fail...
*/
- spin_unlock_irqrestore(&zone->lock, flags);
+ write_unlock_irqrestore(&zone->lock, flags);
ret = __alloc_gigantic_page(pfn, nr_pages, gfp_mask);
if (!ret)
return pfn_to_page(pfn);
- spin_lock_irqsave(&zone->lock, flags);
+ write_lock_irqsave(&zone->lock, flags);
}
pfn += nr_pages;
}

- spin_unlock_irqrestore(&zone->lock, flags);
+ write_unlock_irqrestore(&zone->lock, flags);
}

return NULL;
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 05e983f42316..38e39ccdd6d9 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -1133,7 +1133,7 @@ static void free_pcppages_bulk(struct zone *zone, int count,
} while (--count && --batch_free && !list_empty(list));
}

- spin_lock(&zone->lock);
+ write_lock(&zone->lock);
isolated_pageblocks = has_isolate_pageblock(zone);

/*
@@ -1151,7 +1151,7 @@ static void free_pcppages_bulk(struct zone *zone, int count,
__free_one_page(page, page_to_pfn(page), zone, 0, mt);
trace_mm_page_pcpu_drain(page, 0, mt);
}
- spin_unlock(&zone->lock);
+ write_unlock(&zone->lock);
}

static void free_one_page(struct zone *zone,
@@ -1159,13 +1159,13 @@ static void free_one_page(struct zone *zone,
unsigned int order,
int migratetype)
{
- spin_lock(&zone->lock);
+ write_lock(&zone->lock);
if (unlikely(has_isolate_pageblock(zone) ||
is_migrate_isolate(migratetype))) {
migratetype = get_pfnblock_migratetype(page, pfn);
}
__free_one_page(page, pfn, zone, order, migratetype);
- spin_unlock(&zone->lock);
+ write_unlock(&zone->lock);
}

static void __meminit __init_single_page(struct page *page, unsigned long pfn,
@@ -2251,7 +2251,7 @@ static void reserve_highatomic_pageblock(struct page *page, struct zone *zone,
if (zone->nr_reserved_highatomic >= max_managed)
return;

- spin_lock_irqsave(&zone->lock, flags);
+ write_lock_irqsave(&zone->lock, flags);

/* Recheck the nr_reserved_highatomic limit under the lock */
if (zone->nr_reserved_highatomic >= max_managed)
@@ -2267,7 +2267,7 @@ static void reserve_highatomic_pageblock(struct page *page, struct zone *zone,
}

out_unlock:
- spin_unlock_irqrestore(&zone->lock, flags);
+ write_unlock_irqrestore(&zone->lock, flags);
}

/*
@@ -2300,7 +2300,7 @@ static bool unreserve_highatomic_pageblock(const struct alloc_context *ac,
pageblock_nr_pages)
continue;

- spin_lock_irqsave(&zone->lock, flags);
+ write_lock_irqsave(&zone->lock, flags);
for (order = 0; order < MAX_ORDER; order++) {
struct free_area *area = &(zone->free_area[order]);

@@ -2343,11 +2343,11 @@ static bool unreserve_highatomic_pageblock(const struct alloc_context *ac,
ret = move_freepages_block(zone, page, ac->migratetype,
NULL);
if (ret) {
- spin_unlock_irqrestore(&zone->lock, flags);
+ write_unlock_irqrestore(&zone->lock, flags);
return ret;
}
}
- spin_unlock_irqrestore(&zone->lock, flags);
+ write_unlock_irqrestore(&zone->lock, flags);
}

return false;
@@ -2465,7 +2465,7 @@ static int rmqueue_bulk(struct zone *zone, unsigned int order,
{
int i, alloced = 0;

- spin_lock(&zone->lock);
+ write_lock(&zone->lock);
for (i = 0; i < count; ++i) {
struct page *page = __rmqueue(zone, order, migratetype);
if (unlikely(page == NULL))
@@ -2498,7 +2498,7 @@ static int rmqueue_bulk(struct zone *zone, unsigned int order,
* pages added to the pcp list.
*/
__mod_zone_page_state(zone, NR_FREE_PAGES, -(i << order));
- spin_unlock(&zone->lock);
+ write_unlock(&zone->lock);
return alloced;
}

@@ -2687,7 +2687,7 @@ void mark_free_pages(struct zone *zone)
if (zone_is_empty(zone))
return;

- spin_lock_irqsave(&zone->lock, flags);
+ write_lock_irqsave(&zone->lock, flags);

max_zone_pfn = zone_end_pfn(zone);
for (pfn = zone->zone_start_pfn; pfn < max_zone_pfn; pfn++)
@@ -2721,7 +2721,7 @@ void mark_free_pages(struct zone *zone)
}
}
}
- spin_unlock_irqrestore(&zone->lock, flags);
+ write_unlock_irqrestore(&zone->lock, flags);
}
#endif /* CONFIG_PM */

@@ -2990,7 +2990,7 @@ struct page *rmqueue(struct zone *preferred_zone,
* allocate greater than order-1 page units with __GFP_NOFAIL.
*/
WARN_ON_ONCE((gfp_flags & __GFP_NOFAIL) && (order > 1));
- spin_lock_irqsave(&zone->lock, flags);
+ write_lock_irqsave(&zone->lock, flags);

do {
page = NULL;
@@ -3002,7 +3002,7 @@ struct page *rmqueue(struct zone *preferred_zone,
if (!page)
page = __rmqueue(zone, order, migratetype);
} while (page && check_new_pages(page, order));
- spin_unlock(&zone->lock);
+ write_unlock(&zone->lock);
if (!page)
goto failed;
__mod_zone_freepage_state(zone, -(1 << order),
@@ -5009,7 +5009,7 @@ void show_free_areas(unsigned int filter, nodemask_t *nodemask)
show_node(zone);
printk(KERN_CONT "%s: ", zone->name);

- spin_lock_irqsave(&zone->lock, flags);
+ write_lock_irqsave(&zone->lock, flags);
for (order = 0; order < MAX_ORDER; order++) {
struct free_area *area = &zone->free_area[order];
int type;
@@ -5023,7 +5023,7 @@ void show_free_areas(unsigned int filter, nodemask_t *nodemask)
types[order] |= 1 << type;
}
}
- spin_unlock_irqrestore(&zone->lock, flags);
+ write_unlock_irqrestore(&zone->lock, flags);
for (order = 0; order < MAX_ORDER; order++) {
printk(KERN_CONT "%lu*%lukB ",
nr[order], K(1UL) << order);
@@ -6247,7 +6247,7 @@ static void __meminit zone_init_internals(struct zone *zone, enum zone_type idx,
zone_set_nid(zone, nid);
zone->name = zone_names[idx];
zone->zone_pgdat = NODE_DATA(nid);
- spin_lock_init(&zone->lock);
+ rwlock_init(&zone->lock);
zone_seqlock_init(zone);
zone_pcp_init(zone);
}
@@ -7239,7 +7239,7 @@ static void __setup_per_zone_wmarks(void)
for_each_zone(zone) {
u64 tmp;

- spin_lock_irqsave(&zone->lock, flags);
+ write_lock_irqsave(&zone->lock, flags);
tmp = (u64)pages_min * zone->managed_pages;
do_div(tmp, lowmem_pages);
if (is_highmem(zone)) {
@@ -7277,7 +7277,7 @@ static void __setup_per_zone_wmarks(void)
zone->watermark[WMARK_LOW] = min_wmark_pages(zone) + tmp;
zone->watermark[WMARK_HIGH] = min_wmark_pages(zone) + tmp * 2;

- spin_unlock_irqrestore(&zone->lock, flags);
+ write_unlock_irqrestore(&zone->lock, flags);
}

/* update totalreserve_pages */
@@ -8041,7 +8041,7 @@ __offline_isolated_pages(unsigned long start_pfn, unsigned long end_pfn)
return;
offline_mem_sections(pfn, end_pfn);
zone = page_zone(pfn_to_page(pfn));
- spin_lock_irqsave(&zone->lock, flags);
+ write_lock_irqsave(&zone->lock, flags);
pfn = start_pfn;
while (pfn < end_pfn) {
if (!pfn_valid(pfn)) {
@@ -8073,7 +8073,7 @@ __offline_isolated_pages(unsigned long start_pfn, unsigned long end_pfn)
SetPageReserved((page+i));
pfn += (1 << order);
}
- spin_unlock_irqrestore(&zone->lock, flags);
+ write_unlock_irqrestore(&zone->lock, flags);
}
#endif

@@ -8084,14 +8084,14 @@ bool is_free_buddy_page(struct page *page)
unsigned long flags;
unsigned int order;

- spin_lock_irqsave(&zone->lock, flags);
+ write_lock_irqsave(&zone->lock, flags);
for (order = 0; order < MAX_ORDER; order++) {
struct page *page_head = page - (pfn & ((1 << order) - 1));

if (PageBuddy(page_head) && page_order(page_head) >= order)
break;
}
- spin_unlock_irqrestore(&zone->lock, flags);
+ write_unlock_irqrestore(&zone->lock, flags);

return order < MAX_ORDER;
}
@@ -8110,7 +8110,7 @@ bool set_hwpoison_free_buddy_page(struct page *page)
unsigned int order;
bool hwpoisoned = false;

- spin_lock_irqsave(&zone->lock, flags);
+ write_lock_irqsave(&zone->lock, flags);
for (order = 0; order < MAX_ORDER; order++) {
struct page *page_head = page - (pfn & ((1 << order) - 1));

@@ -8120,7 +8120,7 @@ bool set_hwpoison_free_buddy_page(struct page *page)
break;
}
}
- spin_unlock_irqrestore(&zone->lock, flags);
+ write_unlock_irqrestore(&zone->lock, flags);

return hwpoisoned;
}
diff --git a/mm/page_isolation.c b/mm/page_isolation.c
index 43e085608846..5c99fc2a1616 100644
--- a/mm/page_isolation.c
+++ b/mm/page_isolation.c
@@ -26,7 +26,7 @@ static int set_migratetype_isolate(struct page *page, int migratetype,

zone = page_zone(page);

- spin_lock_irqsave(&zone->lock, flags);
+ write_lock_irqsave(&zone->lock, flags);

/*
* We assume the caller intended to SET migrate type to isolate.
@@ -82,7 +82,7 @@ static int set_migratetype_isolate(struct page *page, int migratetype,
__mod_zone_freepage_state(zone, -nr_pages, mt);
}

- spin_unlock_irqrestore(&zone->lock, flags);
+ write_unlock_irqrestore(&zone->lock, flags);
if (!ret)
drain_all_pages(zone);
return ret;
@@ -98,7 +98,7 @@ static void unset_migratetype_isolate(struct page *page, unsigned migratetype)
struct page *buddy;

zone = page_zone(page);
- spin_lock_irqsave(&zone->lock, flags);
+ write_lock_irqsave(&zone->lock, flags);
if (!is_migrate_isolate_page(page))
goto out;

@@ -137,7 +137,7 @@ static void unset_migratetype_isolate(struct page *page, unsigned migratetype)
set_pageblock_migratetype(page, migratetype);
zone->nr_isolate_pageblock--;
out:
- spin_unlock_irqrestore(&zone->lock, flags);
+ write_unlock_irqrestore(&zone->lock, flags);
if (isolated_page) {
post_alloc_hook(page, order, __GFP_MOVABLE);
__free_pages(page, order);
@@ -299,10 +299,10 @@ int test_pages_isolated(unsigned long start_pfn, unsigned long end_pfn,
return -EBUSY;
/* Check all pages are free or marked as ISOLATED */
zone = page_zone(page);
- spin_lock_irqsave(&zone->lock, flags);
+ write_lock_irqsave(&zone->lock, flags);
pfn = __test_page_isolated_in_pageblock(start_pfn, end_pfn,
skip_hwpoisoned_pages);
- spin_unlock_irqrestore(&zone->lock, flags);
+ write_unlock_irqrestore(&zone->lock, flags);

trace_test_pages_isolated(start_pfn, end_pfn, pfn);

diff --git a/mm/vmstat.c b/mm/vmstat.c
index 8ba0870ecddd..06d79271a8ae 100644
--- a/mm/vmstat.c
+++ b/mm/vmstat.c
@@ -1337,10 +1337,10 @@ static void walk_zones_in_node(struct seq_file *m, pg_data_t *pgdat,
continue;

if (!nolock)
- spin_lock_irqsave(&zone->lock, flags);
+ write_lock_irqsave(&zone->lock, flags);
print(m, pgdat, zone);
if (!nolock)
- spin_unlock_irqrestore(&zone->lock, flags);
+ write_unlock_irqrestore(&zone->lock, flags);
}
}
#endif
--
2.17.1