[patch 2/4] [md] Add RESYNC_RANGE ioctl

From: scjody
Date: Thu Oct 01 2009 - 18:40:58 EST


Add the RESYNC_RANGE ioctl and implement it for RAID 4/5/6. This causes an
immediate resync of the requested sectors if the device is under resync.

TODO: In raid456 (and probably in any other personality that implements
this), there should be some concept of the last blocks that were resynced
since the personality can resync more data than requested which will result
in multiple resyncs of the same data with this implementation.

Index: linux-2.6.18-128.1.6/drivers/md/md.c
===================================================================
--- linux-2.6.18-128.1.6.orig/drivers/md/md.c
+++ linux-2.6.18-128.1.6/drivers/md/md.c
@@ -4512,6 +4512,71 @@ static int md_ioctl(struct inode *inode,
goto done_unlock;
}

+ case RESYNC_RANGE:
+ {
+ mdu_range_t range;
+ struct hd_struct *part = inode->i_bdev->bd_part;
+ int ret;
+
+ if (!arg) {
+ err = -EINVAL;
+ goto abort_unlock;
+ }
+
+ ret = copy_from_user(&range, argp, sizeof(range));
+ if (ret) {
+ err = -EFAULT;
+ goto abort_unlock;
+ }
+
+ if (range.start > range.end) {
+ err = -EINVAL;
+ goto abort_unlock;
+ }
+
+ if (part) {
+ sector_t part_end;
+
+ range.start += part->start_sect;
+ range.end += part->start_sect;
+
+ part_end = part->start_sect + part->nr_sects - 1;
+
+ if (range.end > part_end) {
+ err = -EINVAL;
+ goto abort_unlock;
+ }
+ }
+
+ if (range.end >= mddev->array_size<<1) {
+ err = -EINVAL;
+ goto abort_unlock;
+ }
+
+ if (!test_bit(MD_RECOVERY_RUNNING, &mddev->recovery)) {
+ /* We are already in sync; return success */
+ err = 0;
+ goto abort_unlock;
+ }
+
+ if (!test_bit(MD_RECOVERY_SYNC, &mddev->recovery)) {
+ /* Something is running but not a resync. */
+ err = -EBUSY;
+ goto abort_unlock;
+ }
+
+ if (mddev->pers->resync_range == NULL) {
+ err = -EINVAL;
+ goto abort_unlock;
+ }
+
+
+ err = mddev->pers->resync_range(mddev, range.start,
+ range.end);
+
+ goto done_unlock;
+ }
+
default:
err = -EINVAL;
goto abort_unlock;
@@ -4865,6 +4930,7 @@ static int md_seq_show(struct seq_file *
mdk_rdev_t *rdev;
struct mdstat_info *mi = seq->private;
struct bitmap *bitmap;
+ unsigned long resync;

if (v == (void*)1) {
struct mdk_personality *pers;
@@ -4883,6 +4949,8 @@ static int md_seq_show(struct seq_file *
return 0;
}

+ resync = (mddev->curr_resync - atomic_read(&mddev->recovery_active))/2;
+
if (mddev_lock(mddev) < 0)
return -EINTR;

Index: linux-2.6.18-128.1.6/include/linux/raid/md_u.h
===================================================================
--- linux-2.6.18-128.1.6.orig/include/linux/raid/md_u.h
+++ linux-2.6.18-128.1.6/include/linux/raid/md_u.h
@@ -46,6 +46,7 @@
#define STOP_ARRAY_RO _IO (MD_MAJOR, 0x33)
#define RESTART_ARRAY_RW _IO (MD_MAJOR, 0x34)
#define SKIP_RESYNC _IO (MD_MAJOR, 0x40)
+#define RESYNC_RANGE _IO (MD_MAJOR, 0x41)

typedef struct mdu_version_s {
int major;
@@ -121,5 +122,11 @@ typedef struct mdu_param_s
int max_fault; /* unused for now */
} mdu_param_t;

+typedef struct mdu_range_s
+{
+ __u64 start; /* starting sector */
+ __u64 end; /* ending sector */
+} mdu_range_t;
+
#endif

Index: linux-2.6.18-128.1.6/drivers/md/raid5.c
===================================================================
--- linux-2.6.18-128.1.6.orig/drivers/md/raid5.c
+++ linux-2.6.18-128.1.6/drivers/md/raid5.c
@@ -1698,8 +1698,10 @@ static void handle_stripe5(struct stripe
}
}
if (failed > 1 && syncing) {
- md_done_sync(conf->mddev, STRIPE_SECTORS,0);
+ if (!test_bit(STRIPE_RESYNC_RANGE, &sh->state))
+ md_done_sync(conf->mddev, STRIPE_SECTORS,0);
clear_bit(STRIPE_SYNCING, &sh->state);
+ clear_bit(STRIPE_RESYNC_RANGE, &sh->state);
syncing = 0;
}

@@ -1932,8 +1934,10 @@ static void handle_stripe5(struct stripe
}
}
if (syncing && locked == 0 && test_bit(STRIPE_INSYNC, &sh->state)) {
- md_done_sync(conf->mddev, STRIPE_SECTORS,1);
+ if (!test_bit(STRIPE_RESYNC_RANGE, &sh->state))
+ md_done_sync(conf->mddev, STRIPE_SECTORS,1);
clear_bit(STRIPE_SYNCING, &sh->state);
+ clear_bit(STRIPE_RESYNC_RANGE, &sh->state);
}

/* If the failed drive is just a ReadError, then we might need to progress
@@ -2275,8 +2279,10 @@ static void handle_stripe6(struct stripe
}
}
if (failed > 2 && syncing) {
- md_done_sync(conf->mddev, STRIPE_SECTORS,0);
+ if (!test_bit(STRIPE_RESYNC_RANGE, &sh->state))
+ md_done_sync(conf->mddev, STRIPE_SECTORS,0);
clear_bit(STRIPE_SYNCING, &sh->state);
+ clear_bit(STRIPE_RESYNC_RANGE, &sh->state);
syncing = 0;
}

@@ -2571,8 +2577,10 @@ static void handle_stripe6(struct stripe
}

if (syncing && locked == 0 && test_bit(STRIPE_INSYNC, &sh->state)) {
- md_done_sync(conf->mddev, STRIPE_SECTORS,1);
+ if (!test_bit(STRIPE_RESYNC_RANGE, &sh->state))
+ md_done_sync(conf->mddev, STRIPE_SECTORS,1);
clear_bit(STRIPE_SYNCING, &sh->state);
+ clear_bit(STRIPE_RESYNC_RANGE, &sh->state);
}

/* If the failed drives are just a ReadError, then we might need
@@ -3300,6 +3308,52 @@ static inline sector_t sync_request(mdde
return STRIPE_SECTORS;
}

+/* Perform an immediate resync of the requested range. */
+static int resync_range(mddev_t *mddev, sector_t start, sector_t end)
+{
+ raid5_conf_t *conf = (raid5_conf_t *) mddev->private;
+ sector_t j, sync_end;
+ unsigned int dd_idx, pd_idx, disks, data_disks;
+
+ printk("resync_range, sectors %llu - %llu\n", (unsigned long long)start,
+ (unsigned long long)end);
+
+ disks = conf->raid_disks;
+ data_disks = disks - conf->max_degraded;
+
+ j = raid5_compute_sector(start, disks, data_disks,
+ &dd_idx, &pd_idx, conf);
+ sync_end = raid5_compute_sector(end, disks, data_disks,
+ &dd_idx, &pd_idx, conf);
+
+ while (j <= sync_end) {
+ struct stripe_head *sh;
+
+ pd_idx = stripe_to_pdidx(j, conf, disks);
+ sh = get_active_stripe(conf, j, disks, pd_idx, 1);
+ if (sh == NULL) {
+ sh = get_active_stripe(conf, j, disks, pd_idx, 0);
+ /* make sure we don't swamp the stripe cache if someone
+ * else is trying to get access
+ */
+ schedule_timeout_uninterruptible(1);
+ }
+
+ spin_lock(&sh->lock);
+ set_bit(STRIPE_SYNCING, &sh->state);
+ set_bit(STRIPE_RESYNC_RANGE, &sh->state);
+ clear_bit(STRIPE_INSYNC, &sh->state);
+ spin_unlock(&sh->lock);
+
+ handle_stripe(sh, NULL, NULL);
+ release_stripe(sh);
+
+ j += STRIPE_SECTORS;
+ }
+
+ return 0;
+}
+
/*
* This is our raid5 kernel thread.
*
@@ -4106,6 +4160,7 @@ static struct mdk_personality raid6_pers
.resize = raid5_resize,
.quiesce = raid5_quiesce,
.skip_resync = skip_resync,
+ .resync_range = resync_range,
};
static struct mdk_personality raid5_personality =
{
@@ -4128,6 +4183,7 @@ static struct mdk_personality raid5_pers
#endif
.quiesce = raid5_quiesce,
.skip_resync = skip_resync,
+ .resync_range = resync_range,
};

static struct mdk_personality raid4_personality =
@@ -4147,6 +4203,7 @@ static struct mdk_personality raid4_pers
.resize = raid5_resize,
.quiesce = raid5_quiesce,
.skip_resync = skip_resync,
+ .resync_range = resync_range,
};

static int __init raid5_init(void)
Index: linux-2.6.18-128.1.6/include/linux/raid/md_k.h
===================================================================
--- linux-2.6.18-128.1.6.orig/include/linux/raid/md_k.h
+++ linux-2.6.18-128.1.6/include/linux/raid/md_k.h
@@ -284,6 +284,7 @@ struct mdk_personality
*/
void (*quiesce) (mddev_t *mddev, int state);
int (*skip_resync) (mddev_t *mddev, sector_t start, sector_t end);
+ int (*resync_range) (mddev_t *mddev, sector_t start, sector_t end);
};


Index: linux-2.6.18-128.1.6/include/linux/raid/raid5.h
===================================================================
--- linux-2.6.18-128.1.6.orig/include/linux/raid/raid5.h
+++ linux-2.6.18-128.1.6/include/linux/raid/raid5.h
@@ -180,6 +180,8 @@ struct stripe_head {
#define STRIPE_EXPANDING 9
#define STRIPE_EXPAND_SOURCE 10
#define STRIPE_EXPAND_READY 11
+#define STRIPE_RESYNC_RANGE 12
+
/*
* Plugging:
*

--
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/