Re: bad CDs and eternal retries - what to do? (long)

really kuznet@ms2.inr.ac.ru (inr-linux-kernel@ms2.inr.ac.ru)
28 May 1996 14:35:28 +0400


Alan Cox (alan@lxorguk.UKuu.ORG.UK) wrote:
: > Under Linux kernel 1.3.94 and 1.3.100 it appeared that the isofs module
: > was trying to continually read from the CD and the process that requested
: > to read the file would not respond to a kill -9 thus requiring a reboot
: > to stop the continual reading of the CD.

: I got an interesting very similar looking problem with the Linux Kongress
: CD. My CD is visibly busted (you can see the defects) and the driver or
: isofs is locking the machine dead after about 3 retries. I guess someone
: is getting a bad metadata block and infinite looping.

: If the isofs or sbpcd person would like the CD in question popped in the post
: just ask

It is not CD driver problem, it is bug in page cache.
I've wrote about it, see Subj: "serious bug in filemap.c",
fill_page was "fixed" in 1.99.7, but it and generic_file_read are
still buggy.

My current (very UGLY, but working both for bad CD and for NFS) filemap.c:

....
int generic_file_read(struct inode * inode, struct file * filp, char * buf, int count)
{
int error, read;
unsigned long pos, ppos, page_cache;
int reada_ok;

if (count <= 0)
return 0;

error = 0;
read = 0;
page_cache = 0;

pos = filp->f_pos;
ppos = pos & PAGE_MASK;
/*
* If the current position is outside the previous read-ahead window,
* we reset the current read-ahead context and set read ahead max to zero
* (will be set to just needed value later),
* otherwise, we assume that the file accesses are sequential enough to
* continue read-ahead.
*/
if (ppos > filp->f_raend || ppos + filp->f_rawin < filp->f_raend) {
reada_ok = 0;
filp->f_raend = 0;
filp->f_ralen = 0;
filp->f_ramax = 0;
filp->f_rawin = 0;
} else {
reada_ok = 1;
}
/*
* Adjust the current value of read-ahead max.
* If the read operation stay in the first half page, force no readahead.
* Otherwise try to increase read ahead max just enough to do the read request.
* Then, at least MIN_READAHEAD if read ahead is ok,
* and at most MAX_READAHEAD in all cases.
*/
if (pos + count <= (PAGE_SIZE >> 1)) {
filp->f_ramax = 0;
} else {
unsigned long needed;

needed = ((pos + count) & PAGE_MASK) - (pos & PAGE_MASK);

if (filp->f_ramax < needed)
filp->f_ramax = needed;

if (reada_ok && filp->f_ramax < MIN_READAHEAD)
filp->f_ramax = MIN_READAHEAD;
if (filp->f_ramax > MAX_READAHEAD)
filp->f_ramax = MAX_READAHEAD;
}

for (;;) {
struct page *page;
unsigned long offset, addr, nr;
#ifndef NO_ANK_FIX
int try = 0;
#endif

if (pos >= inode->i_size)
break;
offset = pos & ~PAGE_MASK;
nr = PAGE_SIZE - offset;
/*
* Try to find the data in the page cache..
*/
page = find_page(inode, pos & PAGE_MASK);
if (page)
goto found_page;

/*
* Ok, it wasn't cached, so we need to create a new
* page..
*/
if (page_cache)
goto new_page;

error = -ENOMEM;
page_cache = __get_free_page(GFP_KERNEL);
if (!page_cache)
break;
error = 0;

/*
* That could have slept, so we need to check again..
*/
if (pos >= inode->i_size)
break;
page = find_page(inode, pos & PAGE_MASK);
if (!page)
goto new_page;

found_page:
addr = page_address(page);
if (nr > count)
nr = count;
/*
* Try to read ahead only if the current page is filled or being filled.
* Otherwise, if we were reading ahead, decrease max read ahead size to
* the minimum value.
* In this context, that seems to may happen only on some read error or if
* the page has been rewritten.
*/
if (PageUptodate(page) || PageLocked(page))
page_cache = generic_file_readahead(reada_ok, filp, inode, pos, page, page_cache);
else if (reada_ok && filp->f_ramax > MIN_READAHEAD)
filp->f_ramax = MIN_READAHEAD;

if (PageLocked(page))
__wait_on_page(page);

if (!PageUptodate(page))
#ifdef NO_ANK_FIX
goto read_page;
#else
{
if (!PageError(page) || try > 2) {
error = -EIO;
free_page(addr);
break;
}
goto read_page;
}
#endif
if (nr > inode->i_size - pos)
nr = inode->i_size - pos;
memcpy_tofs(buf, (void *) (addr + offset), nr);
free_page(addr);
buf += nr;
pos += nr;
read += nr;
count -= nr;
if (count)
continue;
break;

new_page:
/*
* Ok, add the new page to the hash-queues...
*/
addr = page_cache;
page = mem_map + MAP_NR(page_cache);
page_cache = 0;
add_to_page_cache(page, inode, pos & PAGE_MASK);

/*
* Error handling is tricky. If we get a read error,
* the cached page stays in the cache (but uptodate=0),
* and the next process that accesses it will try to
* re-read it. This is needed for NFS etc, where the
* identity of the reader can decide if we can read the
* page or not..
*/
read_page:
/*
* We have to read the page.
* If we were reading ahead, we had previously tried to read this page,
* That means that the page has probably been removed from the cache before
* the application process needs it, or has been rewritten.
* Decrease max readahead size to the minimum value in that situation.
*/
if (reada_ok && filp->f_ramax > MIN_READAHEAD)
filp->f_ramax = MIN_READAHEAD;

#ifndef NO_ANK_FIX
try++;
#endif
error = inode->i_op->readpage(inode, page);
if (!error) {
#ifdef NO_ANK_FIX
if (!PageError(page))
goto found_page;
error = -EIO;
#else
goto found_page;
#endif
}
free_page(addr);
break;
}

filp->f_pos = pos;
filp->f_reada = 1;
if (page_cache)
free_page(page_cache);
if (!IS_RDONLY(inode)) {
inode->i_atime = CURRENT_TIME;
inode->i_dirt = 1;
}
if (!read)
read = error;
return read;
}

/*
* Find a cached page and wait for it to become up-to-date, return
* the page address. Increments the page count.
*/
static inline unsigned long fill_page(struct inode * inode, unsigned long offset)
{
struct page * page;
unsigned long new_page;
#ifndef NO_ANK_FIX
int try = 0;
#endif

page = find_page(inode, offset);
if (page)
goto found_page_dont_free;
new_page = __get_free_page(GFP_KERNEL);
page = find_page(inode, offset);
if (page)
goto found_page;
if (!new_page)
return 0;
page = mem_map + MAP_NR(new_page);
new_page = 0;
add_to_page_cache(page, inode, offset);
inode->i_op->readpage(inode, page);
#ifdef NO_ANK_FIX
if (PageLocked(page))
new_page = try_to_read_ahead(inode, offset + PAGE_SIZE, 0);
#else
try++;
if (PageUptodate(page) || PageLocked(page))
new_page = try_to_read_ahead(inode, offset + PAGE_SIZE, 0);
#endif
found_page:
if (new_page)
free_page(new_page);
found_page_dont_free:
wait_on_page(page);
#ifndef NO_ANK_FIX
if (!PageUptodate(page)) {
try++;
inode->i_op->readpage(inode, page);
wait_on_page(page);
if (!PageUptodate(page)) {
if (PageError(page) && try < 2)
goto found_page_dont_free;
return 0;
}
}
#endif
return page_address(page);
}
......

Alexey Kuznetsov.