ok, i sent another email, here's the output, i managed to get a ps axwl run.
note, it does NOT always hang under the same circumstances, but sending
large bodied transactions has a high chance of triggering it.
(ps outputs trimmed)
James:~# ps axwl
F UID PID PPID PRI NI VSZ RSS WCHAN STAT TTY TIME COMMAND
000 0 120 1 8 0 1032 308 wait_o D ? 0:00 /usr/local/sbin/watchdogd
000 501 716 198 16 0 5036 2432 nanosl S pts/1 0:02 pine
000 501 4612 716 17 0 1472 784 wait_o D pts/1 0:00 joe /tmp/pico.000716
the child proc of pine is joe, I was exiting joe which would flush the
buffer to /tmp/pico.000716 then return to pine, pine would read that file.
ok, I repeated the excercise to gain more information. this time joe
managed to exit and pine started the smtp session and hung at the end. here
is the ps awxl:
nifty:/usr/src/linux-99p3# grep "wait_o" System.map
c01232ec T ___wait_on_page
c012e1cc T __wait_on_buffer
c0131b8c T __wait_on_super
c013e394 t __wait_on_inode
c0152d9c t __wait_on_dquot
c020b25f ? __kstrtab___wait_on_buffer
c020b27a ? __kstrtab____wait_on_page
c020c711 ? __kstrtab___wait_on_super
c0210b80 ? __ksymtab___wait_on_buffer
c0210b88 ? __ksymtab____wait_on_page
c02111c8 ? __ksymtab___wait_on_super
ok, let's try this excercise without quotas enabled.
doesn't affect it.
000 0 120 1 0 0 1032 308 12e345 D ? 0:00 /usr/local/sbin/watchdogd
140 516 408 383 0 0 4292 2512 12e345 D ? 0:00 /usr/local/apache/bin/httpd
040 8 416 414 12 0 2348 1576 15b7e5 D ? 0:15 sendmail: SAA00416 IDENT:mail@mail.l
000 0 425 351 10 0 1376 656 12e345 D pts/2 0:00 wget -r www.kalifornia.com
c012e1cc T __wait_on_buffer
c012e3a4 t sync_buffers
(unrelated i believe)
c015b5a8 t __get_request_wait
c015b848 T is_read_only
Dump of assembler code for function __wait_on_buffer:
[snipped to the point]
0xc012e314 <__wait_on_buffer+328>: movl (%ebx),%ebx
0xc012e316 <__wait_on_buffer+330>: lock addl $0x0,0x0(%esp,1)
0xc012e31c <__wait_on_buffer+336>: movl $0x0,0x4(%eax)
0xc012e323 <__wait_on_buffer+343>: testl %edx,%edx
0xc012e325 <__wait_on_buffer+345>:
je 0xc012e308 <__wait_on_buffer+316>
0xc012e327 <__wait_on_buffer+347>: pushl %ecx
0xc012e328 <__wait_on_buffer+348>: call *%edx
0xc012e32a <__wait_on_buffer+350>: addl $0x4,%esp
0xc012e32d <__wait_on_buffer+353>:
jmp 0xc012e308 <__wait_on_buffer+316>
0xc012e32f <__wait_on_buffer+355>: nop
0xc012e330 <__wait_on_buffer+356>: movl 0x10(%esp,1),%edx
0xc012e334 <__wait_on_buffer+360>: movl $0x2,(%edx)
0xc012e33a <__wait_on_buffer+366>: testb $0x4,0x18(%ebp)
0xc012e33e <__wait_on_buffer+370>:
je 0xc012e34c <__wait_on_buffer+384>
0xc012e340 <__wait_on_buffer+372>: call 0xc011450c <schedule>
0xc012e345 <__wait_on_buffer+377>: testb $0x4,0x18(%ebp)
0xc012e349 <__wait_on_buffer+381>:
jne 0xc012e2e8 <__wait_on_buffer+284>
0xc012e34b <__wait_on_buffer+383>: nop
0xc012e34c <__wait_on_buffer+384>: movl 0x10(%esp,1),%ecx
0xc012e350 <__wait_on_buffer+388>: movl $0x0,(%ecx)
0xc012e356 <__wait_on_buffer+394>: pushf
0xc012e357 <__wait_on_buffer+395>: popl %ebx
0xc012e358 <__wait_on_buffer+396>: cli
0xc012e359 <__wait_on_buffer+397>: movl 0x3c(%esp,1),%eax
0xc012e35d <__wait_on_buffer+401>: cmpl %edi,%eax
0xc012e35f <__wait_on_buffer+403>:
je 0xc012e388 <__wait_on_buffer+444>
0xc012e361 <__wait_on_buffer+405>: pushl %edi
0xc012e362 <__wait_on_buffer+406>: pushl %eax
0xc012e363 <__wait_on_buffer+407>: pushl $0xc01e2920
0xc012e368 <__wait_on_buffer+412>: call 0xc0117170 <printk>
linux/fs/buffer.c:134
134 /*
135 * Rewrote the wait-routines to use the "new" wait-queue functionality,
136 * and getting rid of the cli-sti pairs. The wait-queue routines still
137 * need cli-sti, but now it's just a couple of 386 instructions or so.
138 *
139 * Note that the real wait_on_buffer() is an inline function that checks
140 * if 'b_wait' is set before calling this, so that the queues aren't set
141 * up unnecessarily.
142 */
143 void __wait_on_buffer(struct buffer_head * bh)
144 {
145 struct task_struct *tsk = current;
146 DECLARE_WAITQUEUE(wait, tsk);
147
148 atomic_inc(&bh->b_count);
149 add_wait_queue(&bh->b_wait, &wait);
150 do {
151 run_task_queue(&tq_disk);
152 set_task_state(tsk, TASK_UNINTERRUPTIBLE);
153 if (!buffer_locked(bh))
154 break;
155 schedule();
156 } while (buffer_locked(bh));
157 tsk->state = TASK_RUNNING;
158 remove_wait_queue(&bh->b_wait, &wait);
159 atomic_dec(&bh->b_count);
160 }
It looks like it's hanging on line 156.
include/linux/fs.h:#define __buffer_state(bh, state) (((bh)->b_state & (1UL << BH_##state)) != 0)
linux/include/linux/fs.h:#define buffer_locked(bh) __buffer_state(bh,Lock)
I'm building a kernel now with a printk to show the b_state
-d
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@vger.rutgers.edu
Please read the FAQ at http://www.tux.org/lkml/
This archive was generated by hypermail 2b29 : Thu Mar 23 2000 - 21:00:31 EST