super.c - kerneld race condition in 2.0.27
Thu, 2 Jan 1997 00:44:48 -0500 (EST)

>From linux/fs/super.c and linux/CREDITS
Added kerneld support: Jacques Gelinas and Bjorn Ekwall,

>From linux/drivers/ide-cd.c and linux/CREDITS
N: Scott Snyder

Below is what I think is a fairly well documented oops. However, after
a bit of research I think that what causes the oops is more interesting
than the remains after it. This is from Linux-2.0.27.

The problem seems to be a race condition between block device drivers,
the VFS and kerneld. If I try to mount an IDE-CDROM drive with no
disk in the drive, the IDE-CD takes 1-2 minutes to determine that
it can't read the non-existent disk. It would be really nice if
the IDE-CD driver didn't take so long to time-out and fail but
that isn't the real problem.

When trying to mount the disk with '-t iso9660', kerneld is caused
to immediately load 'isofs.o'. During the time that the IDE-CD
block driver is retrying and refailing to read the non-existent
disk, kerneld times out on isofs.o (it is not marked as in use at
this point) and unloads it. When the IDE-CD driver returns with
failure, linux/fs/super.c attempts to access the iso9660 fs code
that has been unloaded by kerneld and causes the oops below.

The oops can be prevented by one of four methods:

1) preload isofs.o with insmod
2) use '-t auto' which prevents accessing the isofs.o code since
the file system type can't be determined.
3) use the delay= option of kerneld to delay the removal of isofs.o
4) make sure you have a disk in the CD-ROM drive. ;-)

The result of the oops seems to be a stable system but the IDE-CD
drive is reported to be busy and therefore useless until the system
is rebooted.

Making ide-cd.o timeout quicker would be nice and would avoid the
race condition but it won't fix it. My request, only a request as
I don't expect to produce a patch myself ;-) would be to have
ide-cd timeout quicker as a favor to the user _and_ have the super.c
and kerneld semantices be changed to properly prevent the problem.
Would it be possible for super.c to "use" the fs module so that the
useage count would not be zero thus preventing kerneld from unloading
it until the mount has completed or failed?


Tyson D Sawyer  <>
Senior Systems Engineer
Real World Interface, Inc.


>From `dmesg` ------------ Unable to handle kernel paging request at virtual address c28442ff current->tss.cr3 = 00bf4000, %cr3 = 00bf4000 *pde = 0009e067 *pte = 00000000 Oops: 0000 CPU: 0 EIP: 0010:[<00126681>] EFLAGS: 00010206 eax: 0018f665 ebx: 0019fa74 ecx: 001c6cb8 edx: 028442ff esi: 0018f62f edi: 028442ff ebp: 028442ff esp: 007d2ee0 ds: 0018 es: 0018 fs: 002b gs: 002b ss: 0018 Process mount (pid: 9346, process nr: 52, stackpage=007d2000) Stack: 00000000 00001640 00000007 08051640 001269e6 028442ff 001a1640 00001640 00000007 08050798 00126e83 00001640 028442ff 00000007 009e1000 00000000 001a1d00 00001640 080507b0 c0ed0007 00d333e0 001273aa 00001640 080507b0 Call Trace: [<001269e6>] [<028442ff>] [<00126e83>] [<028442ff>] [<001273aa>] [<028442ff>] [<028442ff>] [<0010a615>] Code: ae 75 08 84 c0 75 f8 31 c0 eb 05 19 c0 83 c8 01 85 c0 74 07

Output from ksymoops -------------------- Using `/boot/' to map addresses to symbols.

>>EIP: 126681 <get_fs_type+29/a8> Trace: 1269e6 <read_super+3e/d4> Trace: 28442ff Trace: 126e83 <do_mount+cf/134> Trace: 28442ff Trace: 1273aa <sys_mount+2d6/31c> Trace: 28442ff Trace: 28442ff Trace: 10a615 <system_call+55/80>

Code: 126681 <get_fs_type+29/a8> scasb %es:(%edi),%al Code: 126682 <get_fs_type+2a/a8> jne 12668c <get_fs_type+34/a8> Code: 126684 <get_fs_type+2c/a8> testb %al,%al Code: 126686 <get_fs_type+2e/a8> jne ffffffff <_EIP+ffffffff> Code: 126688 <get_fs_type+30/a8> xorl %eax,%eax Code: 12668a <get_fs_type+32/a8> jmp 126691 <get_fs_type+39/a8> Code: 12668c <get_fs_type+34/a8> sbbl %eax,%eax Code: 12668e <get_fs_type+36/a8> orl $0x1,%eax Code: 126691 <get_fs_type+39/a8> testl %eax,%eax Code: 126693 <get_fs_type+3b/a8> je 12669c <get_fs_type+44/a8>

Output from: gdb /usr/src/linux/vmlinux (gdb) disassemble get_fs_type -------------------------------------- Dump of assembler code for function get_fs_type: 0x126658 <get_fs_type>: pushl %ebp 0x126659 <get_fs_type+1>: pushl %edi 0x12665a <get_fs_type+2>: pushl %esi 0x12665b <get_fs_type+3>: pushl %ebx 0x12665c <get_fs_type+4>: movl 0x14(%esp,1),%ebp 0x126660 <get_fs_type+8>: movl 0x19d4d0,%ebx 0x126666 <get_fs_type+14>: testl %ebp,%ebp 0x126668 <get_fs_type+16>: jne 0x126674 <get_fs_type+28> 0x12666a <get_fs_type+18>: movl %ebx,%eax 0x12666c <get_fs_type+20>: popl %ebx 0x12666d <get_fs_type+21>: popl %esi 0x12666e <get_fs_type+22>: popl %edi 0x12666f <get_fs_type+23>: popl %ebp 0x126670 <get_fs_type+24>: ret 0x126671 <get_fs_type+25>: leal 0x0(%esi),%esi

0x126674 <get_fs_type+28>: testl %ebx,%ebx 0x126676 <get_fs_type+30>: je 0x1266a0 <get_fs_type+72>

0x126678 <get_fs_type+32>: movl 0x4(%ebx),%eax 0x12667b <get_fs_type+35>: movl %eax,%esi 0x12667d <get_fs_type+37>: movl %ebp,%edi 0x12667f <get_fs_type+39>: cld

0x126680 <get_fs_type+40>: lodsb %ds:(%esi),%al 0x126681 <get_fs_type+41>: scasb %es:(%edi),%al 0x126682 <get_fs_type+42>: jne 0x12668c <get_fs_type+52> 0x126684 <get_fs_type+44>: testb %al,%al 0x126686 <get_fs_type+46>: jne 0x126680 <get_fs_type+40> 0x126688 <get_fs_type+48>: xorl %eax,%eax 0x12668a <get_fs_type+50>: jmp 0x126691 <get_fs_type+57>

0x12668c <get_fs_type+52>: sbbl %eax,%eax 0x12668e <get_fs_type+54>: orl $0x1,%eax

0x126691 <get_fs_type+57>: testl %eax,%eax 0x126693 <get_fs_type+59>: je 0x12669c <get_fs_type+68> 0x126695 <get_fs_type+61>: movl 0xc(%ebx),%ebx 0x126698 <get_fs_type+64>: testl %ebx,%ebx 0x12669a <get_fs_type+66>: jne 0x126678 <get_fs_type+32>

0x12669c <get_fs_type+68>: testl %ebx,%ebx 0x12669e <get_fs_type+70>: jne 0x1266f8 <get_fs_type+160>

0x1266a0 <get_fs_type+72>: pushl $0x0 0x1266a2 <get_fs_type+74>: pushl %ebp 0x1266a3 <get_fs_type+75>: movl $0xffffffff,%ecx 0x1266a8 <get_fs_type+80>: movl %ebp,%edi 0x1266aa <get_fs_type+82>: xorl %eax,%eax 0x1266ac <get_fs_type+84>: cld 0x1266ad <get_fs_type+85>: repnz scasb %es:(%edi),%al 0x1266af <get_fs_type+87>: notl %ecx 0x1266b1 <get_fs_type+89>: decl %ecx 0x1266b2 <get_fs_type+90>: pushl %ecx 0x1266b3 <get_fs_type+91>: pushl $0x80000000 0x1266b8 <get_fs_type+96>: pushl $0x2 0x1266ba <get_fs_type+98>: call 0x132c34 <kerneld_send> 0x1266bf <get_fs_type+103>: movl %eax,%edx 0x1266c1 <get_fs_type+105>: addl $0x14,%esp 0x1266c4 <get_fs_type+108>: testl %edx,%edx 0x1266c6 <get_fs_type+110>: jne 0x1266f8 <get_fs_type+160> 0x1266c8 <get_fs_type+112>: movl 0x19d4d0,%ebx 0x1266ce <get_fs_type+118>: testl %ebx,%ebx 0x1266d0 <get_fs_type+120>: je 0x1266f8 <get_fs_type+160> 0x1266d2 <get_fs_type+122>: leal (%esi),%esi

0x1266d4 <get_fs_type+124>: movl 0x4(%ebx),%eax 0x1266d7 <get_fs_type+127>: movl %eax,%esi 0x1266d9 <get_fs_type+129>: movl %ebp,%edi 0x1266db <get_fs_type+131>: cld

0x1266dc <get_fs_type+132>: lodsb %ds:(%esi),%al 0x1266dd <get_fs_type+133>: scasb %es:(%edi),%al 0x1266de <get_fs_type+134>: jne 0x1266e8 <get_fs_type+144> 0x1266e0 <get_fs_type+136>: testb %al,%al 0x1266e2 <get_fs_type+138>: jne 0x1266dc <get_fs_type+132> 0x1266e4 <get_fs_type+140>: xorl %eax,%eax 0x1266e6 <get_fs_type+142>: jmp 0x1266ed <get_fs_type+149>

0x1266e8 <get_fs_type+144>: sbbl %eax,%eax 0x1266ea <get_fs_type+146>: orl $0x1,%eax

0x1266ed <get_fs_type+149>: testl %eax,%eax 0x1266ef <get_fs_type+151>: je 0x1266f8 <get_fs_type+160> 0x1266f1 <get_fs_type+153>: movl 0xc(%ebx),%ebx 0x1266f4 <get_fs_type+156>: testl %ebx,%ebx 0x1266f6 <get_fs_type+158>: jne 0x1266d4 <get_fs_type+124>

0x1266f8 <get_fs_type+160>: movl %ebx,%eax 0x1266fa <get_fs_type+162>: popl %ebx 0x1266fb <get_fs_type+163>: popl %esi 0x1266fc <get_fs_type+164>: popl %edi 0x1266fd <get_fs_type+165>: popl %ebp 0x1266fe <get_fs_type+166>: ret 0x1266ff <get_fs_type+167>: nop End of assembler dump.