[patch][rfc][rft] vm throughput 2.4.2-ac4

From: Mike Galbraith (mikeg@wen-online.de)
Date: Tue Feb 27 2001 - 05:43:21 EST

Next message: bert hubert: "Re: rsync over ssh on 2.4.2 to 2.2.18"
Previous message: David S. Miller: "Re: rsync over ssh on 2.4.2 to 2.2.18"
Next in thread: Rik van Riel: "Re: [patch][rfc][rft] vm throughput 2.4.2-ac4"
Reply: Rik van Riel: "Re: [patch][rfc][rft] vm throughput 2.4.2-ac4"
Messages sorted by: [ date ] [ thread ] [ subject ] [ author ]

Hi,

Attempting to avoid doing I/O has been harmful to throughput here
ever since the queueing/elevator woes were fixed. Ever since then,
tossing attempts at avoidance has improved throughput markedly.

IMHO, any patch which claims to improve throughput via code deletion
should be worth a little eyeball time.. and maybe even a test run ;-)

Comments welcome.

-Mike

--- linux-2.4.2-ac4/mm/page_alloc.c.org Mon Feb 26 11:19:27 2001
+++ linux-2.4.2-ac4/mm/page_alloc.c Tue Feb 27 10:31:10 2001
@@ -274,7 +274,7 @@
struct page * __alloc_pages(zonelist_t *zonelist, unsigned long order)
{
         zone_t **zone;
- int direct_reclaim = 0;
+ int direct_reclaim = 0, loop = 0;
         unsigned int gfp_mask = zonelist->gfp_mask;
         struct page * page;

@@ -366,7 +366,7 @@
          * able to free some memory we can't free ourselves
          */
         wakeup_kswapd();
- if (gfp_mask & __GFP_WAIT) {
+ if (gfp_mask & __GFP_WAIT && loop) {
                 __set_current_state(TASK_RUNNING);
                 current->policy |= SCHED_YIELD;
                 schedule();
@@ -440,7 +440,7 @@
                         memory_pressure++;
                         try_to_free_pages(gfp_mask);
                         wakeup_bdflush(0);
- if (!order)
+ if (!order || loop++ < (1 << order))
                                 goto try_again;
                 }
         }
--- linux-2.4.2-ac4/mm/vmscan.c.org Mon Feb 26 09:31:46 2001
+++ linux-2.4.2-ac4/mm/vmscan.c Tue Feb 27 09:04:50 2001
@@ -278,6 +278,8 @@
         /* Always start by trying to penalize the process that is allocating memory */
         if (mm)
                 retval = swap_out_mm(mm, swap_amount(mm));
+ if (retval)
+ return retval;

         /* Then, look at the other mm's */
         counter = (mmlist_nr << SWAP_SHIFT) >> priority;
@@ -418,8 +420,8 @@
#define MAX_LAUNDER (1 << page_cluster)
int page_launder(int gfp_mask, int user)
{
- int launder_loop, maxscan, flushed_pages, freed_pages, maxlaunder;
- int can_get_io_locks, sync, target, shortage;
+ int maxscan, flushed_pages, freed_pages, maxlaunder;
+ int can_get_io_locks;
         struct list_head * page_lru;
         struct page * page;
         struct zone_struct * zone;
@@ -430,15 +432,10 @@
          */
         can_get_io_locks = gfp_mask & __GFP_IO;

- target = free_shortage();
-
- sync = 0;
- launder_loop = 0;
         maxlaunder = 0;
         flushed_pages = 0;
         freed_pages = 0;

-dirty_page_rescan:
         spin_lock(&pagemap_lru_lock);
         maxscan = nr_inactive_dirty_pages;
         while ((page_lru = inactive_dirty_list.prev) != &inactive_dirty_list &&
@@ -446,6 +443,9 @@
                 page = list_entry(page_lru, struct page, lru);
                 zone = page->zone;

+ if ((user && freed_pages + flushed_pages > MAX_LAUNDER)
+ || !free_shortage())
+ break;
                 /* Wrong page on list?! (list corruption, should not happen) */
                 if (!PageInactiveDirty(page)) {
                         printk("VM: page_launder, wrong page on list.\n");
@@ -464,18 +464,7 @@
                         continue;
                 }

- /*
- * Disk IO is really expensive, so we make sure we
- * don't do more work than needed.
- * Note that clean pages from zones with enough free
- * pages still get recycled and dirty pages from these
- * zones can get flushed due to IO clustering.
- */
- if (freed_pages + flushed_pages > target && !free_shortage())
- break;
- if (launder_loop && !maxlaunder)
- break;
- if (launder_loop && zone->inactive_clean_pages +
+ if (zone->inactive_clean_pages +
zone->free_pages > zone->pages_high)
goto skip_page;

@@ -500,14 +489,6 @@
if (!writepage)
goto page_active;

- /* First time through? Move it to the back of the list */
- if (!launder_loop) {
- list_del(page_lru);
- list_add(page_lru, &inactive_dirty_list);
- UnlockPage(page);
- continue;
- }
-
                         /* OK, do a physical asynchronous write to swap. */
                         ClearPageDirty(page);
                         page_cache_get(page);
@@ -517,7 +498,6 @@
                         /* XXX: all ->writepage()s should use nr_async_pages */
                         if (!PageSwapCache(page))
                                 flushed_pages++;
- maxlaunder--;
                         page_cache_release(page);

                         /* And re-start the thing.. */
@@ -535,7 +515,7 @@
                  * buffer pages
                  */
                 if (page->buffers) {
- int wait, clearedbuf;
+ int clearedbuf;
                         /*
                          * Since we might be doing disk IO, we have to
                          * drop the spinlock and take an extra reference
@@ -545,16 +525,8 @@
                         page_cache_get(page);
                         spin_unlock(&pagemap_lru_lock);

- /* Will we do (asynchronous) IO? */
- if (launder_loop && maxlaunder == 0 && sync)
- wait = 2; /* Synchrounous IO */
- else if (launder_loop && maxlaunder-- > 0)
- wait = 1; /* Async IO */
- else
- wait = 0; /* No IO */
-
/* Try to free the page buffers. */
- clearedbuf = try_to_free_buffers(page, wait);
+ clearedbuf = try_to_free_buffers(page, can_get_io_locks);

                         /*
                          * Re-take the spinlock. Note that we cannot
@@ -566,7 +538,7 @@
                         /* The buffers were not freed. */
                         if (!clearedbuf) {
                                 add_page_to_inactive_dirty_list(page);
- if (wait)
+ if (can_get_io_locks)
                                         flushed_pages++;

/* The page was only in the buffer cache. */
@@ -619,61 +591,8 @@
spin_unlock(&pagemap_lru_lock);

         /*
- * If we don't have enough free pages, we loop back once
- * to queue the dirty pages for writeout. When we were called
- * by a user process (that /needs/ a free page) and we didn't
- * free anything yet, we wait synchronously on the writeout of
- * MAX_SYNC_LAUNDER pages.
- *
- * We also wake up bdflush, since bdflush should, under most
- * loads, flush out the dirty pages before we have to wait on
- * IO.
- */
- shortage = free_shortage();
- if (can_get_io_locks && !launder_loop && shortage) {
- launder_loop = 1;
-
- /*
- * User programs can run page_launder() in parallel so
- * we only flush a few pages at a time to avoid big IO
- * storms. Kswapd, OTOH, is expected usually keep up
- * with the paging load in the system and doesn't have
- * the IO storm problem, so it just flushes all pages
- * needed to fix the free shortage.
- */
- maxlaunder = shortage;
- maxlaunder -= flushed_pages;
- maxlaunder -= atomic_read(&nr_async_pages);
-
- if (maxlaunder <= 0)
- goto out;
-
- if (user && maxlaunder > MAX_LAUNDER)
- maxlaunder = MAX_LAUNDER;
-
- /*
- * If we are called by a user program, we need to free
- * some pages. If we couldn't, we'll do the last page IO
- * synchronously to be sure
- */
- if (user && !freed_pages)
- sync = 1;
-
- goto dirty_page_rescan;
- }
-
- /*
- * We have to make sure the data is actually written to
- * the disk now, otherwise we'll never get enough clean
- * pages and the system will keep queueing dirty pages
- * for flushing.
- */
- run_task_queue(&tq_disk);
-
- /*
          * Return the amount of pages we freed or made freeable.
          */
-out:
         return freed_pages + flushed_pages;
}

@@ -846,7 +765,7 @@
  * continue with its real work sooner. It also helps balancing when we
  * have multiple processes in try_to_free_pages simultaneously.
  */
-#define DEF_PRIORITY (6)
+#define DEF_PRIORITY (2)
static int refill_inactive(unsigned int gfp_mask, int user)
{
         int count, start_count, maxtry;
@@ -981,14 +900,6 @@
                 /* If needed, try to free some memory. */
                 if (inactive_shortage() || free_shortage())
                         do_try_to_free_pages(GFP_KSWAPD, 0);
-
- /*
- * Do some (very minimal) background scanning. This
- * will scan all pages on the active list once
- * every minute. This clears old referenced bits
- * and moves unused pages to the inactive list.
- */
- refill_inactive_scan(DEF_PRIORITY, 0);

/* Once a second, recalculate some VM stats. */
if (time_after(jiffies, recalc + HZ)) {

-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/

Next message: bert hubert: "Re: rsync over ssh on 2.4.2 to 2.2.18"
Previous message: David S. Miller: "Re: rsync over ssh on 2.4.2 to 2.2.18"
Next in thread: Rik van Riel: "Re: [patch][rfc][rft] vm throughput 2.4.2-ac4"
Reply: Rik van Riel: "Re: [patch][rfc][rft] vm throughput 2.4.2-ac4"
Messages sorted by: [ date ] [ thread ] [ subject ] [ author ]

This archive was generated by hypermail 2b29 : Wed Feb 28 2001 - 21:00:13 EST