Re: Kernel 2.0.30 pre-X SMP (in)stability report

Dr. Werner Fink (werner@suse.de)
Fri, 19 Sep 1997 14:58:26 +0200


> Before the real 2.0.31 is released, I would like to share
> my own experience with the prepatches. So far I have found
> that:
> - Prepatch 2 is the most reliable/stable.
> - Prepatch 5 & 9 is not stable for my hardware.
>
> Symptom:
> - Total lock up during heavy load & swapping.
> - no log in syslogs.

Hmmm ... IMHO this is not a SMP related problem .. please try the appended
patch.

>
> Kernel configuration:
> - SMP enabled.
> - QNX style scheduling patch applied.

Please remove tha last one for testing purpose ... we need a comparison
with a plain pre-patch-2.0.31-9 ... the only patch I recommend is the
"Swap cache patch for pre-2.0.31-9" of Krzysztof Strasburger and maybe
a simple

echo "40 500 64 256 15 3000 500 1884 2" > /proc/sys/vm/bdflush

to flush at 40% dirty buffers and not at 60%.

For the 2940 ... please use the default settings of the aic7xxx
driver.

... and report to the list, thanks :-)

BTW: The people with glimpse and squake should also test this setup
... only for testing purpose ;^)

Werner

------------------------------------------------------------------------
diff -urN linux-2.0.31-9/fs/buffer.c linux/fs/buffer.c
--- linux-2.0.31-9/fs/buffer.c Mon Sep 8 14:32:14 1997
+++ linux/fs/buffer.c Tue Sep 16 18:57:07 1997
@@ -559,7 +559,7 @@
static struct buffer_head *find_candidate(struct buffer_head *bh,
int *list_len, int size)
{
- int behind = 0;
+ int lookahead = 7;

if (!bh)
goto no_candidate;
@@ -572,11 +572,12 @@
try_to_free_buffer(bh,&bh,1);
if (!bh)
break;
+ lookahead = 7;
continue;
}
else if (buffer_locked(bh) &&
(bh->b_list == BUF_LOCKED || bh->b_list == BUF_LOCKED1)) {
- if (behind++ > 10) {
+ if (!--lookahead) {
(*list_len) = 0;
goto no_candidate;
}
@@ -595,9 +596,10 @@
{
struct buffer_head * bh;
struct buffer_head * candidate[BUF_DIRTY];
+ extern struct task_struct *bdflush_tsk;
unsigned int best_time, winner;
int buffers[BUF_DIRTY];
- int i;
+ int i, limit = ((min_free_pages + free_pages_low) >> 1);
int needed;

refilled = 1;
@@ -606,7 +608,7 @@
for user processes to use (and dirty) */

/* We are going to try to locate this much memory */
- needed =bdf_prm.b_un.nrefill * size;
+ needed = bdf_prm.b_un.nrefill * size;

while (nr_free_pages > min_free_pages*2 && needed > 0 &&
grow_buffers(GFP_BUFFER, size)) {
@@ -661,21 +663,38 @@

/* Dirty buffers should not overtake, wakeup_bdflush(1) calls
bdflush and sleeps, therefore kswapd does his important work. */
- if ((nr_buffers_type[BUF_DIRTY] > nr_buffers * bdf_prm.b_un.nfract/100) ||
- (nr_free_pages < min_free_pages))
+ if (nr_buffers_type[BUF_DIRTY] > nr_buffers * bdf_prm.b_un.nfract/100)
wakeup_bdflush(1);

/* Too bad, that was not enough. Try a little harder to grow some. */

- if (nr_free_pages > min_free_pages + 5) {
+ if (nr_free_pages > limit) {
if (grow_buffers(GFP_BUFFER, size)) {
needed -= PAGE_SIZE;
goto repeat;
};
}

+ /* If we are not bdflush we should wake up bdflush and try it again. */
+
+ if (current != bdflush_tsk) {
+ wakeup_bdflush(1);
+ needed -= PAGE_SIZE;
+ goto repeat;
+ }
+
+ /* We are bdflush: let's try our best */
+
+ /*
+ * In order to protect our reserved pages,
+ * return now if we got any buffers.
+ */
+ allow_interrupts();
+ if (free_list[BUFSIZE_INDEX(size)])
+ return;
+
/* and repeat until we find something good */
- wakeup_bdflush(1);
+ grow_buffers(GFP_BUFFER, size);

/* decrease needed even if there is no success */
needed -= PAGE_SIZE;
@@ -966,11 +985,15 @@
* This is critical. We can't swap out pages to get
* more buffer heads, because the swap-out may need
* more buffer-heads itself. Thus GFP_ATOMIC.
+ *
+ * This is no longer true, it is GFP_BUFFER again, the
+ * swapping code now knows not to perform I/O when that
+ * GFP level is specified... -DaveM
*/
/* we now use kmalloc() here instead of gfp as we want
to be able to easily release buffer heads - they
took up quite a bit of memory (tridge) */
- bh = (struct buffer_head *) kmalloc(sizeof(*bh),GFP_ATOMIC);
+ bh = (struct buffer_head *) kmalloc(sizeof(*bh),GFP_BUFFER);
if (bh) {
put_unused_buffer_head(bh);
nr_buffer_heads++;
diff -urN linux-2.0.31-9/mm/kmalloc.c linux/mm/kmalloc.c
--- linux-2.0.31-9/mm/kmalloc.c Sat Jun 8 17:12:33 1996
+++ linux/mm/kmalloc.c Tue Sep 16 19:04:35 1997
@@ -344,6 +344,7 @@
* Now we're going to muck with the "global" freelist
* for this size: this should be uninterruptible
*/
+ save_flags(flags);
cli();
page->next = *pg;
*pg = page;
@@ -447,6 +448,6 @@
return;

not_on_freelist:
- printk("Ooops. page %p doesn't show on freelist.\n", page);
restore_flags(flags);
+ printk("Ooops. page %p doesn't show on freelist.\n", page);
}
diff -urN linux-2.0.31-9/mm/vmscan.c linux/mm/vmscan.c
--- linux-2.0.31-9/mm/vmscan.c Mon Sep 8 14:32:21 1997
+++ linux/mm/vmscan.c Wed Sep 17 14:46:40 1997
@@ -406,11 +406,8 @@
can_do_io = 1;
if (wait)
stop = 0;
- if (priority == GFP_BUFFER) {
- /* bdflush() should do the rest if we fail */
- stop = 3;
+ if (priority == GFP_BUFFER)
can_do_io = 0;
- }
switch (state) {
do {
case 0:
@@ -492,10 +489,14 @@
interruptible_sleep_on(&kswapd_wait);
kswapd_awake = 1;
swapstats.wakeups++;
+ /* Protect our reserved pages: */
+ i = 0;
+ if (nr_free_pages <= min_free_pages)
+ i = (1+min_free_pages) - nr_free_pages;
/* Do the background pageout: */
- for (i=0; i < kswapd_ctl.maxpages; i++)
+ for (i += kswapd_ctl.maxpages; i > 0; i--)
try_to_free_page(GFP_KERNEL, 0,
- (nr_free_pages < min_free_pages));
+ (nr_free_pages <= min_free_pages));
}
}