CRC loop device

From: Pavel Machek (pavel@suse.cz)
Date: Sat Apr 29 2000 - 16:36:00 EST

Next message: Alexander Viro: "[PATCH][mount-7-1-A] fixes to pre7-1 and more mount cleanups"
Previous message: David S. Miller: "Re: [PATCH] 2.3.99pre7-1 - From dev->resource[x].start to pci_resource_start(dev, x)"
Messages sorted by: [ date ] [ thread ] [ subject ] [ author ]

HI!

Now it actually works. If you suspect problems with your disks, try
it!

Pavel

--- clean/drivers/block/ll_rw_blk.c Thu Apr 27 10:05:27 2000
+++ linux/drivers/block/ll_rw_blk.c Sat Apr 29 22:12:07 2000
@@ -603,7 +603,7 @@
          * Cut max_req in half to avoid running out and deadlocking.
          */
          if ((major == LOOP_MAJOR) || (major == NBD_MAJOR))
- max_req >>= 1;
+ max_req >>= 2;

         /*
          * Try to coalesce the new request with old requests
--- clean/drivers/block/loop.c Tue Mar 21 20:30:08 2000
+++ linux/drivers/block/loop.c Sat Apr 29 22:12:33 2000
@@ -61,6 +61,7 @@
#include <linux/devfs_fs_kernel.h>

#include <asm/uaccess.h>
+#include <asm/checksum.h>

#include <linux/loop.h>

@@ -118,6 +119,101 @@
         return 0;
}

+#define ID printk(KERN_ERR "crc: info about (%s, %d, %d) ", kdevname(lo->lo_device), real_block, blksize);
+
+
+static int transfer_crc(struct loop_device *lo, int cmd, char *raw_buf,
+ char *loop_buf, int size, int real_block)
+{
+ struct buffer_head *bh;
+ int blksize = 1024, nsect; /* Size of block on auxilary media */
+ int cksum;
+ u32 *data;
+ nsect = blksize / 4;
+
+ if (!lo->second_device) {
+ ID; printk( "reading from not-yet-setup crc device can result in armagedon. Dont try again.\n" );
+ return -1;
+ }
+ bh = getblk(lo->second_device, 1+real_block/nsect, blksize);
+ if (!bh) {
+ ID; printk( "getblk returned NULL.\n" );
+ return -1;
+ }
+ if (!buffer_uptodate(bh)) {
+ ll_rw_block(READ, 1, &bh);
+ wait_on_buffer(bh);
+ if (!buffer_uptodate(bh)) {
+ ID; printk( "could not read block with CRC\n" );
+ goto error;
+ }
+ }
+
+ data = (u32 *) bh->b_data;
+ if (cmd == READ)
+ cksum = csum_partial_copy_nocheck(raw_buf, loop_buf, size, 0);
+ else
+ cksum = csum_partial_copy_nocheck(loop_buf, raw_buf, size, 0);
+
+ if (cmd == READ) {
+ if (le32_to_cpu(data[real_block%nsect]) != cksum) {
+ if (lo->lo_encrypt_key_size == 0) { /* Normal mode */
+ ID; printk( "wrong checksum reading, is %x, should be %x\n", cksum, 0x1234 );
+ goto error;
+ } else {
+ ID; printk( "wrong checksum repairing, setting to %x\n", cksum );
+ goto repair;
+ }
+ }
+ } else {
+ repair:
+ data[real_block%nsect] = cpu_to_le32(cksum);
+ mark_buffer_uptodate(bh, 1);
+ mark_buffer_dirty(bh, 1);
+ }
+
+ brelse(bh);
+ return 0;
+error:
+ brelse(bh);
+ return -1;
+
+}
+
+static int ioctl_crc(struct loop_device *lo, int cmd, unsigned long arg)
+{
+ struct file *file;
+ struct inode *inode;
+ int error;
+
+ printk( "Entering ioctl_crc\n" );
+ if (cmd != LOOP_CRC_SET_FD)
+ return -EINVAL;
+
+ error = -EBADF;
+ file = fget(arg);
+ if (!file)
+ return -EINVAL;
+
+ error = -EINVAL;
+ inode = file->f_dentry->d_inode;
+ if (!inode) {
+ printk(KERN_ERR "ioctl_crc: NULL inode?!?\n");
+ goto out;
+ }
+
+ if (S_ISBLK(inode->i_mode)) {
+ error = blkdev_open(inode, file);
+ lo->second_device = inode->i_rdev;
+ printk( "loop_crc: Registered device %x\n", lo->second_device );
+ return error;
+ } else {
+ out:
+ fput(file);
+ return -EINVAL;
+ }
+}
+
static int none_status(struct loop_device *lo, struct loop_info *info)
{
         return 0;
@@ -142,10 +238,19 @@
         init: xor_status
};

+struct loop_func_table crc_funcs = {
+ number: LO_CRYPT_CRC,
+ transfer: transfer_crc,
+ init: none_status,
+ ioctl: ioctl_crc
+};
+
/* xfer_funcs[0] is special - its release function is never called */
struct loop_func_table *xfer_funcs[MAX_LO_CRYPT] = {
         &none_funcs,
- &xor_funcs
+ &xor_funcs,
+ NULL, NULL, NULL, NULL, NULL,
+ &crc_funcs,
};

#define MAX_DISK_SIZE 1024*1024*1024
@@ -539,6 +644,7 @@
         lo->transfer = NULL;
         lo->ioctl = NULL;
         lo->lo_device = 0;
+ lo->second_device = 0;
         lo->lo_encrypt_type = 0;
         lo->lo_offset = 0;
         lo->lo_encrypt_key_size = 0;
@@ -750,6 +856,10 @@
EXPORT_SYMBOL(loop_register_transfer);
EXPORT_SYMBOL(loop_unregister_transfer);

+void no_plug_device(void)
+{
+}
+
int __init loop_init(void)
{
         int i;
@@ -795,6 +905,7 @@
         }

         blk_init_queue(BLK_DEFAULT_QUEUE(MAJOR_NR), DEVICE_REQUEST);
+ ((request_queue_t *) BLK_DEFAULT_QUEUE(MAJOR_NR))->plug_device_fn = no_plug_device;
         blk_queue_headactive(BLK_DEFAULT_QUEUE(MAJOR_NR), 0);
         for (i=0; i < max_loop; i++) {
                 memset(&loop_dev[i], 0, sizeof(struct loop_device));
--- clean/include/linux/loop.h Mon Nov 23 06:29:54 1998
+++ linux/include/linux/loop.h Sat Apr 15 13:22:57 2000
@@ -22,6 +22,7 @@
         struct dentry *lo_dentry;
         int lo_refcnt;
         kdev_t lo_device;
+ kdev_t second_device;
         int lo_offset;
         int lo_encrypt_type;
         int lo_encrypt_key_size;
@@ -94,6 +95,7 @@
#define LO_CRYPT_BLOW 4
#define LO_CRYPT_CAST128 5
#define LO_CRYPT_IDEA 6
+#define LO_CRYPT_CRC 7
#define LO_CRYPT_DUMMY 9
#define LO_CRYPT_SKIPJACK 10
#define MAX_LO_CRYPT 20
@@ -126,5 +128,6 @@
#define LOOP_CLR_FD 0x4C01
#define LOOP_SET_STATUS 0x4C02
#define LOOP_GET_STATUS 0x4C03
+#define LOOP_CRC_SET_FD 0x4C04

#endif
--- /dev/null Sun Jun 27 13:16:23 1999
+++ linux/Documentation/loop.txt Sat Apr 29 22:24:02 2000
@@ -0,0 +1,65 @@
+ CRC loop method
+ ---------------
+ pavel@suse.cz
+
+* What is it good for?
+~~~~~~~~~~~~~~~~~~~~~~
+
+ Assume you have flaky hardware. Your scsi cable is too long, your
+cat likes to chafe against it producing static electricity, and your
+system just crashes from time to time.
+
+ Linux filesystems are designed to be fsck-ed first. If they are
+not, they can do bad things (like crashing system
+completely). Incorrect filesystem is able to induce kernel corruption
+with any results possible. Having flaky io subsystem is equivalent (or
+worse that) unchecked filesystem and it resulted in kernel crashes
+before.
+
+ When you asked on linux-kernel, you were told to shoot your cat and
+make your cable shorter. That is not a option.
+
+ Raid5 is not going to help you in case error is between
+raid-controller and main system. What is worse, raid5 will not help
+you even if error is between raid controller and drives: raid5
+protects you against completely different kind of error: error where
+disk dies and you know it. Undetected io error is something else,
+RAID5 is USELESS there.
+
+ CRC loop method was designed to protect you from exactly this kind
+of situation. It expects io subsystem to produce errors which go away
+when retried (that may not be the case with writes: if you have
+bit-error when writing, subsequent read will discover it, but any
+number of retries is not going to help).
+
+* Usage
+~~~~~~~
+
+ CRC loop method needs two devices to work, main device and crc
+device. It exports loop device which has exactly same contents as main
+device, with exception that errors are generated when crc check
+fails. crc device is used for storing checksums, it has to be as big
+as sizeof(main) / 1024 + 16K.
+
+ CRC loop method has two modes of operation, normal and repair. In
+normal mode, when checksum error is detected on read, failure is
+generated (and error is logged). In repair mode, when checksum error
+is detected on read, failure is logged, but data on crc device is
+updated.
+
+ Repair mode is usefull when first installing CRC loop method, and
+when your machine crashes. [During crash, data could be updated on
+main but not on crc; leading to state with false errors]. Updating
+crcs is actually pretty easy: setup CRC loop in repair mode, then cat
+/dev/loop0 > /dev/null.
+
+* Slowdown
+~~~~~~~~~~
+
+ On extreme benchmarks, slowdown is quite big:
+
+root@bug:~# time cat /dev/loop0 > /dev/null
+0.17user 32.31system 249.41 (4m9.413s) elapsed 13.02%CPU
+root@bug:~# time cat /dev/hda7 > /dev/null
+0.16user 10.44system 112.47 (1m52.476s) elapsed 9.42%CPU
+root@bug:~#

-- I'm pavel@ucw.cz. "In my country we have almost anarchy and I don't care." Panos Katsaloulis describing me w.r.t. patents me at discuss@linmodems.org

- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majordomo@vger.rutgers.edu Please read the FAQ at http://www.tux.org/lkml/

Next message: Alexander Viro: "[PATCH][mount-7-1-A] fixes to pre7-1 and more mount cleanups"
Previous message: David S. Miller: "Re: [PATCH] 2.3.99pre7-1 - From dev->resource[x].start to pci_resource_start(dev, x)"
Messages sorted by: [ date ] [ thread ] [ subject ] [ author ]

This archive was generated by hypermail 2b29 : Sun Apr 30 2000 - 21:00:17 EST