[PATCH 07/10] lightnvm: pblk: remove target using async. I/Os

From: Javier GonzÃlez
Date: Fri Jun 30 2017 - 11:58:34 EST


When removing a pblk instance, pad the current line using asynchronous
I/O. This reduces the removal time from ~1 minute in the worst case to a
couple of seconds.

Signed-off-by: Javier GonzÃlez <javier@xxxxxxxxxxxx>
Signed-off-by: Matias BjÃrling <matias@xxxxxxxxxxxx>
---
drivers/lightnvm/pblk-core.c | 5 +-
drivers/lightnvm/pblk-init.c | 9 +++
drivers/lightnvm/pblk-rb.c | 8 ++
drivers/lightnvm/pblk-recovery.c | 165 ++++++++++++++++++++++-----------------
drivers/lightnvm/pblk-write.c | 2 +-
drivers/lightnvm/pblk.h | 8 ++
6 files changed, 123 insertions(+), 74 deletions(-)

diff --git a/drivers/lightnvm/pblk-core.c b/drivers/lightnvm/pblk-core.c
index 74b8d9db05e1..e6f42cddc8ec 100644
--- a/drivers/lightnvm/pblk-core.c
+++ b/drivers/lightnvm/pblk-core.c
@@ -273,9 +273,10 @@ static void pblk_flush_writer(struct pblk *pblk)
{
pblk_rb_flush(&pblk->rwb);
do {
- if (!pblk_rb_read_count(&pblk->rwb))
+ if (!pblk_rb_sync_count(&pblk->rwb))
break;

+ pblk_write_kick(pblk);
schedule();
} while (1);
}
@@ -1350,6 +1351,7 @@ void pblk_pipeline_stop(struct pblk *pblk)
return;
}

+ flush_workqueue(pblk->bb_wq);
pblk_line_close_meta_sync(pblk);

spin_lock(&l_mg->free_lock);
@@ -1547,6 +1549,7 @@ void pblk_line_close_meta_sync(struct pblk *pblk)
}

pblk_wait_for_meta(pblk);
+ flush_workqueue(pblk->close_wq);
}

static void pblk_line_should_sync_meta(struct pblk *pblk)
diff --git a/drivers/lightnvm/pblk-init.c b/drivers/lightnvm/pblk-init.c
index b3fc310aa51c..025d8fe52154 100644
--- a/drivers/lightnvm/pblk-init.c
+++ b/drivers/lightnvm/pblk-init.c
@@ -841,6 +841,15 @@ static int pblk_writer_init(struct pblk *pblk)

static void pblk_writer_stop(struct pblk *pblk)
{
+ /* The pipeline must be stopped and the write buffer emptied before the
+ * write thread is stopped
+ */
+ WARN(pblk_rb_read_count(&pblk->rwb),
+ "Stopping not fully persisted write buffer\n");
+
+ WARN(pblk_rb_sync_count(&pblk->rwb),
+ "Stopping not fully synced write buffer\n");
+
if (pblk->writer_ts)
kthread_stop(pblk->writer_ts);
del_timer(&pblk->wtimer);
diff --git a/drivers/lightnvm/pblk-rb.c b/drivers/lightnvm/pblk-rb.c
index 2dda874af890..7300be98e831 100644
--- a/drivers/lightnvm/pblk-rb.c
+++ b/drivers/lightnvm/pblk-rb.c
@@ -180,6 +180,14 @@ unsigned int pblk_rb_read_count(struct pblk_rb *rb)
return pblk_rb_ring_count(mem, subm, rb->nr_entries);
}

+unsigned int pblk_rb_sync_count(struct pblk_rb *rb)
+{
+ unsigned int mem = READ_ONCE(rb->mem);
+ unsigned int sync = READ_ONCE(rb->sync);
+
+ return pblk_rb_ring_count(mem, sync, rb->nr_entries);
+}
+
unsigned int pblk_rb_read_commit(struct pblk_rb *rb, unsigned int nr_entries)
{
unsigned int subm;
diff --git a/drivers/lightnvm/pblk-recovery.c b/drivers/lightnvm/pblk-recovery.c
index 6d58659fa3da..0e48d3e4e143 100644
--- a/drivers/lightnvm/pblk-recovery.c
+++ b/drivers/lightnvm/pblk-recovery.c
@@ -327,47 +327,94 @@ static int pblk_recov_read_oob(struct pblk *pblk, struct pblk_line *line,
return 0;
}

+static void pblk_recov_complete(struct kref *ref)
+{
+ struct pblk_pad_rq *pad_rq = container_of(ref, struct pblk_pad_rq, ref);
+
+ complete(&pad_rq->wait);
+}
+
+static void pblk_end_io_recov(struct nvm_rq *rqd)
+{
+ struct pblk_pad_rq *pad_rq = rqd->private;
+ struct pblk *pblk = pad_rq->pblk;
+ struct nvm_tgt_dev *dev = pblk->dev;
+
+ kref_put(&pad_rq->ref, pblk_recov_complete);
+ nvm_dev_dma_free(dev->parent, rqd->meta_list, rqd->dma_meta_list);
+ pblk_free_rqd(pblk, rqd, WRITE);
+}
+
static int pblk_recov_pad_oob(struct pblk *pblk, struct pblk_line *line,
- struct pblk_recov_alloc p, int left_ppas)
+ int left_ppas)
{
struct nvm_tgt_dev *dev = pblk->dev;
struct nvm_geo *geo = &dev->geo;
struct ppa_addr *ppa_list;
struct pblk_sec_meta *meta_list;
+ struct pblk_pad_rq *pad_rq;
struct nvm_rq *rqd;
struct bio *bio;
void *data;
dma_addr_t dma_ppa_list, dma_meta_list;
__le64 *lba_list = emeta_to_lbas(pblk, line->emeta->buf);
u64 w_ptr = line->cur_sec;
- int left_line_ppas = line->left_msecs;
- int rq_ppas, rq_len;
+ int left_line_ppas, rq_ppas, rq_len;
int i, j;
int ret = 0;
- DECLARE_COMPLETION_ONSTACK(wait);
-
- ppa_list = p.ppa_list;
- meta_list = p.meta_list;
- rqd = p.rqd;
- data = p.data;
- dma_ppa_list = p.dma_ppa_list;
- dma_meta_list = p.dma_meta_list;
+
+ spin_lock(&line->lock);
+ left_line_ppas = line->left_msecs;
+ spin_unlock(&line->lock);
+
+ pad_rq = kmalloc(sizeof(struct pblk_pad_rq), GFP_KERNEL);
+ if (!pad_rq)
+ return -ENOMEM;
+
+ data = vzalloc(pblk->max_write_pgs * geo->sec_size);
+ if (!data) {
+ ret = -ENOMEM;
+ goto free_rq;
+ }
+
+ pad_rq->pblk = pblk;
+ init_completion(&pad_rq->wait);
+ kref_init(&pad_rq->ref);

next_pad_rq:
rq_ppas = pblk_calc_secs(pblk, left_ppas, 0);
- if (!rq_ppas)
- rq_ppas = pblk->min_write_pgs;
+ if (rq_ppas < pblk->min_write_pgs) {
+ pr_err("pblk: corrupted pad line %d\n", line->id);
+ goto free_rq;
+ }
+
rq_len = rq_ppas * geo->sec_size;

+ meta_list = nvm_dev_dma_alloc(dev->parent, GFP_KERNEL, &dma_meta_list);
+ if (!meta_list) {
+ ret = -ENOMEM;
+ goto free_data;
+ }
+
+ ppa_list = (void *)(meta_list) + pblk_dma_meta_size;
+ dma_ppa_list = dma_meta_list + pblk_dma_meta_size;
+
+ rqd = pblk_alloc_rqd(pblk, WRITE);
+ if (IS_ERR(rqd)) {
+ ret = PTR_ERR(rqd);
+ goto fail_free_meta;
+ }
+ memset(rqd, 0, pblk_w_rq_size);
+
bio = bio_map_kern(dev->q, data, rq_len, GFP_KERNEL);
- if (IS_ERR(bio))
- return PTR_ERR(bio);
+ if (IS_ERR(bio)) {
+ ret = PTR_ERR(bio);
+ goto fail_free_rqd;
+ }

bio->bi_iter.bi_sector = 0; /* internal bio */
bio_set_op_attrs(bio, REQ_OP_WRITE, 0);

- memset(rqd, 0, pblk_g_rq_size);
-
rqd->bio = bio;
rqd->opcode = NVM_OP_PWRITE;
rqd->flags = pblk_set_progr_mode(pblk, WRITE);
@@ -376,8 +423,8 @@ static int pblk_recov_pad_oob(struct pblk *pblk, struct pblk_line *line,
rqd->ppa_list = ppa_list;
rqd->dma_ppa_list = dma_ppa_list;
rqd->dma_meta_list = dma_meta_list;
- rqd->end_io = pblk_end_io_sync;
- rqd->private = &wait;
+ rqd->end_io = pblk_end_io_recov;
+ rqd->private = pad_rq;

for (i = 0; i < rqd->nr_ppas; ) {
struct ppa_addr ppa;
@@ -405,25 +452,41 @@ static int pblk_recov_pad_oob(struct pblk *pblk, struct pblk_line *line,
}
}

+ kref_get(&pad_rq->ref);
+
ret = pblk_submit_io(pblk, rqd);
if (ret) {
pr_err("pblk: I/O submission failed: %d\n", ret);
- return ret;
+ goto free_data;
}

- if (!wait_for_completion_io_timeout(&wait,
- msecs_to_jiffies(PBLK_COMMAND_TIMEOUT_MS))) {
- pr_err("pblk: L2P recovery write timed out\n");
- }
atomic_dec(&pblk->inflight_io);
- reinit_completion(&wait);

left_line_ppas -= rq_ppas;
left_ppas -= rq_ppas;
- if (left_ppas > 0 && left_line_ppas)
+ if (left_ppas && left_line_ppas)
goto next_pad_rq;

- return 0;
+ kref_put(&pad_rq->ref, pblk_recov_complete);
+
+ if (!wait_for_completion_io_timeout(&pad_rq->wait,
+ msecs_to_jiffies(PBLK_COMMAND_TIMEOUT_MS))) {
+ pr_err("pblk: pad write timed out\n");
+ ret = -ETIME;
+ }
+
+free_rq:
+ kfree(pad_rq);
+free_data:
+ vfree(data);
+ return ret;
+
+fail_free_rqd:
+ pblk_free_rqd(pblk, rqd, WRITE);
+fail_free_meta:
+ nvm_dev_dma_free(dev->parent, meta_list, dma_meta_list);
+ kfree(pad_rq);
+ return ret;
}

/* When this function is called, it means that not all upper pages have been
@@ -555,7 +618,7 @@ static int pblk_recov_scan_all_oob(struct pblk *pblk, struct pblk_line *line,
if (pad_secs > line->left_msecs)
pad_secs = line->left_msecs;

- ret = pblk_recov_pad_oob(pblk, line, p, pad_secs);
+ ret = pblk_recov_pad_oob(pblk, line, pad_secs);
if (ret)
pr_err("pblk: OOB padding failed (err:%d)\n", ret);

@@ -961,64 +1024,22 @@ struct pblk_line *pblk_recov_l2p(struct pblk *pblk)
*/
int pblk_recov_pad(struct pblk *pblk)
{
- struct nvm_tgt_dev *dev = pblk->dev;
- struct nvm_geo *geo = &dev->geo;
struct pblk_line *line;
struct pblk_line_mgmt *l_mg = &pblk->l_mg;
- struct nvm_rq *rqd;
- struct pblk_recov_alloc p;
- struct ppa_addr *ppa_list;
- struct pblk_sec_meta *meta_list;
- void *data;
int left_msecs;
int ret = 0;
- dma_addr_t dma_ppa_list, dma_meta_list;

spin_lock(&l_mg->free_lock);
line = l_mg->data_line;
left_msecs = line->left_msecs;
spin_unlock(&l_mg->free_lock);

- rqd = pblk_alloc_rqd(pblk, READ);
- if (IS_ERR(rqd))
- return PTR_ERR(rqd);
-
- meta_list = nvm_dev_dma_alloc(dev->parent, GFP_KERNEL, &dma_meta_list);
- if (!meta_list) {
- ret = -ENOMEM;
- goto free_rqd;
- }
-
- ppa_list = (void *)(meta_list) + pblk_dma_meta_size;
- dma_ppa_list = dma_meta_list + pblk_dma_meta_size;
-
- data = kcalloc(pblk->max_write_pgs, geo->sec_size, GFP_KERNEL);
- if (!data) {
- ret = -ENOMEM;
- goto free_meta_list;
- }
-
- p.ppa_list = ppa_list;
- p.meta_list = meta_list;
- p.rqd = rqd;
- p.data = data;
- p.dma_ppa_list = dma_ppa_list;
- p.dma_meta_list = dma_meta_list;
-
- ret = pblk_recov_pad_oob(pblk, line, p, left_msecs);
+ ret = pblk_recov_pad_oob(pblk, line, left_msecs);
if (ret) {
pr_err("pblk: Tear down padding failed (%d)\n", ret);
- goto free_data;
+ return ret;
}

pblk_line_close_meta(pblk, line);
-
-free_data:
- kfree(data);
-free_meta_list:
- nvm_dev_dma_free(dev->parent, meta_list, dma_meta_list);
-free_rqd:
- pblk_free_rqd(pblk, rqd, READ);
-
return ret;
}
diff --git a/drivers/lightnvm/pblk-write.c b/drivers/lightnvm/pblk-write.c
index 8151bf4bb945..d62a8f4faaf4 100644
--- a/drivers/lightnvm/pblk-write.c
+++ b/drivers/lightnvm/pblk-write.c
@@ -190,7 +190,7 @@ static void pblk_end_io_write_meta(struct nvm_rq *rqd)

if (rqd->error) {
pblk_log_write_err(pblk, rqd);
- pr_err("pblk: metadata I/O failed\n");
+ pr_err("pblk: metadata I/O failed. Line %d\n", line->id);
}
#ifdef CONFIG_NVM_DEBUG
else
diff --git a/drivers/lightnvm/pblk.h b/drivers/lightnvm/pblk.h
index cdad2c9edbdf..bf5b73fb345f 100644
--- a/drivers/lightnvm/pblk.h
+++ b/drivers/lightnvm/pblk.h
@@ -111,6 +111,13 @@ struct pblk_g_ctx {
void *private;
};

+/* Pad context */
+struct pblk_pad_rq {
+ struct pblk *pblk;
+ struct completion wait;
+ struct kref ref;
+};
+
/* Recovery context */
struct pblk_rec_ctx {
struct pblk *pblk;
@@ -674,6 +681,7 @@ void pblk_rb_sync_end(struct pblk_rb *rb, unsigned long *flags);
unsigned int pblk_rb_sync_point_count(struct pblk_rb *rb);

unsigned int pblk_rb_read_count(struct pblk_rb *rb);
+unsigned int pblk_rb_sync_count(struct pblk_rb *rb);
unsigned int pblk_rb_wrap_pos(struct pblk_rb *rb, unsigned int pos);

int pblk_rb_tear_down_check(struct pblk_rb *rb);
--
2.7.4