[PATCH 16/21] nd: blk labels and namespace instantiation

From: Dan Williams
Date: Fri Apr 17 2015 - 21:39:30 EST


A blk label set describes a namespace comprised of one or more
discontiguous dpa ranges on a single dimm. They may alias with one or
more pmem interleave sets that include the given dimm.

This is the runtime/volatile configuration infrastructure for sysfs
manipulation of 'alt_name', 'uuid', 'size', and 'sector_size'. A later
patch will make these settings persistent by writing back the label(s).

Unlike pmem namespaces, multiple blk namespaces can be created per
region. Once a blk namespace has been created a new seed device
(unconfigured child of a parent blk region) is instantiated. As long as
a region has 'available_size' != 0 new child namespaces may be created.

Cc: Greg KH <gregkh@xxxxxxxxxxxxxxxxxxx>
Cc: Neil Brown <neilb@xxxxxxx>
Signed-off-by: Dan Williams <dan.j.williams@xxxxxxxxx>
---
drivers/block/nd/core.c | 40 +++
drivers/block/nd/dimm_devs.c | 35 +++
drivers/block/nd/namespace_devs.c | 502 ++++++++++++++++++++++++++++++++++---
drivers/block/nd/nd-private.h | 10 +
drivers/block/nd/nd.h | 5
drivers/block/nd/region_devs.c | 15 +
include/linux/nd.h | 25 ++
7 files changed, 588 insertions(+), 44 deletions(-)

diff --git a/drivers/block/nd/core.c b/drivers/block/nd/core.c
index 560ed5555496..880aef08f919 100644
--- a/drivers/block/nd/core.c
+++ b/drivers/block/nd/core.c
@@ -213,6 +213,46 @@ int nd_uuid_store(struct device *dev, u8 **uuid_out, const char *buf,
return 0;
}

+ssize_t nd_sector_size_show(unsigned long current_lbasize,
+ const unsigned long *supported, char *buf)
+{
+ ssize_t len = 0;
+ int i;
+
+ for (i = 0; supported[i]; i++)
+ if (current_lbasize == supported[i])
+ len += sprintf(buf + len, "[%ld] ", supported[i]);
+ else
+ len += sprintf(buf + len, "%ld ", supported[i]);
+ len += sprintf(buf + len, "\n");
+ return len;
+}
+
+ssize_t nd_sector_size_store(struct device *dev, const char *buf,
+ unsigned long *current_lbasize, const unsigned long *supported)
+{
+ unsigned long lbasize;
+ int rc, i;
+
+ if (dev->driver)
+ return -EBUSY;
+
+ rc = kstrtoul(buf, 0, &lbasize);
+ if (rc)
+ return rc;
+
+ for (i = 0; supported[i]; i++)
+ if (lbasize == supported[i])
+ break;
+
+ if (supported[i]) {
+ *current_lbasize = lbasize;
+ return 0;
+ } else {
+ return -EINVAL;
+ }
+}
+
static ssize_t commands_show(struct device *dev,
struct device_attribute *attr, char *buf)
{
diff --git a/drivers/block/nd/dimm_devs.c b/drivers/block/nd/dimm_devs.c
index caa51d3ea6af..ae77bf4a5188 100644
--- a/drivers/block/nd/dimm_devs.c
+++ b/drivers/block/nd/dimm_devs.c
@@ -417,6 +417,41 @@ static struct nd_dimm *nd_dimm_create(struct nd_bus *nd_bus,
}

/**
+ * nd_blk_available_dpa - account the unused dpa of BLK region
+ * @nd_mapping: container of dpa-resource-root + labels
+ *
+ * Unlike PMEM, BLK namespaces can occupy discontiguous DPA ranges.
+ */
+resource_size_t nd_blk_available_dpa(struct nd_mapping *nd_mapping)
+{
+ struct nd_dimm_drvdata *ndd = to_ndd(nd_mapping);
+ resource_size_t map_end, busy = 0, available;
+ struct resource *res;
+
+ if (!ndd)
+ return 0;
+
+ map_end = nd_mapping->start + nd_mapping->size - 1;
+ for_each_dpa_resource(ndd, res)
+ if (res->start >= nd_mapping->start && res->start < map_end) {
+ resource_size_t end = min(map_end, res->end);
+
+ busy += end - res->start + 1;
+ } else if (res->end >= nd_mapping->start && res->end <= map_end) {
+ busy += res->end - nd_mapping->start;
+ } else if (nd_mapping->start > res->start
+ && nd_mapping->start < res->end) {
+ /* total eclipse of the BLK region mapping */
+ busy += nd_mapping->size;
+ }
+
+ available = map_end - nd_mapping->start + 1;
+ if (busy < available)
+ return available - busy;
+ return 0;
+}
+
+/**
* nd_pmem_available_dpa - for the given dimm+region account unallocated dpa
* @nd_mapping: container of dpa-resource-root + labels
* @nd_region: constrain available space check to this reference region
diff --git a/drivers/block/nd/namespace_devs.c b/drivers/block/nd/namespace_devs.c
index 386776845830..de36f3891284 100644
--- a/drivers/block/nd/namespace_devs.c
+++ b/drivers/block/nd/namespace_devs.c
@@ -37,7 +37,15 @@ static void namespace_pmem_release(struct device *dev)

static void namespace_blk_release(struct device *dev)
{
- /* TODO: blk namespace support */
+ struct nd_namespace_blk *nsblk = to_nd_namespace_blk(dev);
+ struct nd_region *nd_region = to_nd_region(dev->parent);
+
+ if (nsblk->id >= 0)
+ ida_simple_remove(&nd_region->ns_ida, nsblk->id);
+ kfree(nsblk->alt_name);
+ kfree(nsblk->uuid);
+ kfree(nsblk->res);
+ kfree(nsblk);
}

static struct device_type namespace_io_device_type = {
@@ -90,8 +98,9 @@ static ssize_t __alt_name_store(struct device *dev, const char *buf,

ns_altname = &nspm->alt_name;
} else if (is_namespace_blk(dev)) {
- /* TODO: blk namespace support */
- return -ENXIO;
+ struct nd_namespace_blk *nsblk = to_nd_namespace_blk(dev);
+
+ ns_altname = &nsblk->alt_name;
} else
return -ENXIO;

@@ -124,6 +133,24 @@ out:
return rc;
}

+static resource_size_t nd_namespace_blk_size(struct nd_namespace_blk *nsblk)
+{
+ struct nd_region *nd_region = to_nd_region(nsblk->dev.parent);
+ struct nd_mapping *nd_mapping = &nd_region->mapping[0];
+ struct nd_dimm_drvdata *ndd = to_ndd(nd_mapping);
+ struct nd_label_id label_id;
+ resource_size_t size = 0;
+ struct resource *res;
+
+ if (!nsblk->uuid)
+ return 0;
+ nd_label_gen_id(&label_id, nsblk->uuid, NSLABEL_FLAG_LOCAL);
+ for_each_dpa_resource(ndd, res)
+ if (strcmp(res->name, label_id.id) == 0)
+ size += resource_size(res);
+ return size;
+}
+
static ssize_t alt_name_store(struct device *dev,
struct device_attribute *attr, const char *buf, size_t len)
{
@@ -150,8 +177,9 @@ static ssize_t alt_name_show(struct device *dev,

ns_altname = nspm->alt_name;
} else if (is_namespace_blk(dev)) {
- /* TODO: blk namespace support */
- return -ENXIO;
+ struct nd_namespace_blk *nsblk = to_nd_namespace_blk(dev);
+
+ ns_altname = nsblk->alt_name;
} else
return -ENXIO;

@@ -197,6 +225,8 @@ static int scan_free(struct nd_region *nd_region,
new_start = res->start;

rc = adjust_resource(res, new_start, resource_size(res) - n);
+ if (rc == 0)
+ res->flags |= DPA_RESOURCE_ADJUSTED;
nd_dbg_dpa(nd_region, ndd, res, "shrink %d\n", rc);
break;
}
@@ -257,14 +287,15 @@ static resource_size_t init_dpa_allocation(struct nd_label_id *label_id,
return rc ? n : 0;
}

-static bool space_valid(bool is_pmem, struct nd_label_id *label_id,
- struct resource *res)
+static bool space_valid(bool is_pmem, bool is_reserve,
+ struct nd_label_id *label_id, struct resource *res)
{
/*
* For BLK-space any space is valid, for PMEM-space, it must be
- * contiguous with an existing allocation.
+ * contiguous with an existing allocation unless we are
+ * reserving pmem.
*/
- if (!is_pmem)
+ if (is_reserve || !is_pmem)
return true;
if (!res || strcmp(res->name, label_id->id) == 0)
return true;
@@ -280,6 +311,7 @@ static resource_size_t scan_allocate(struct nd_region *nd_region,
resource_size_t n)
{
resource_size_t mapping_end = nd_mapping->start + nd_mapping->size - 1;
+ bool is_reserve = strcmp(label_id->id, "pmem-reserve") == 0;
bool is_pmem = strncmp(label_id->id, "pmem", 4) == 0;
struct nd_dimm_drvdata *ndd = to_ndd(nd_mapping);
const resource_size_t to_allocate = n;
@@ -305,7 +337,7 @@ static resource_size_t scan_allocate(struct nd_region *nd_region,
if (!first++ && res->start > nd_mapping->start) {
free_start = nd_mapping->start;
available = res->start - free_start;
- if (space_valid(is_pmem, label_id, NULL))
+ if (space_valid(is_pmem, is_reserve, label_id, NULL))
loc = ALLOC_BEFORE;
}

@@ -313,7 +345,7 @@ static resource_size_t scan_allocate(struct nd_region *nd_region,
if (!loc && next) {
free_start = res->start + resource_size(res);
free_end = min(mapping_end, next->start - 1);
- if (space_valid(is_pmem, label_id, res)
+ if (space_valid(is_pmem, is_reserve, label_id, res)
&& free_start < free_end) {
available = free_end + 1 - free_start;
loc = ALLOC_MID;
@@ -324,7 +356,7 @@ static resource_size_t scan_allocate(struct nd_region *nd_region,
if (!loc && !next) {
free_start = res->start + resource_size(res);
free_end = mapping_end;
- if (space_valid(is_pmem, label_id, res)
+ if (space_valid(is_pmem, is_reserve, label_id, res)
&& free_start < free_end) {
available = free_end + 1 - free_start;
loc = ALLOC_AFTER;
@@ -338,7 +370,7 @@ static resource_size_t scan_allocate(struct nd_region *nd_region,
case ALLOC_BEFORE:
if (strcmp(res->name, label_id->id) == 0) {
/* adjust current resource up */
- if (is_pmem)
+ if (is_pmem && !is_reserve)
return n;
rc = adjust_resource(res, res->start - allocate,
resource_size(res) + allocate);
@@ -349,7 +381,7 @@ static resource_size_t scan_allocate(struct nd_region *nd_region,
case ALLOC_MID:
if (strcmp(next->name, label_id->id) == 0) {
/* adjust next resource up */
- if (is_pmem)
+ if (is_pmem && !is_reserve)
return n;
rc = adjust_resource(next, next->start
- allocate, resource_size(next)
@@ -375,7 +407,7 @@ static resource_size_t scan_allocate(struct nd_region *nd_region,
/* BLK allocate bottom up */
if (!is_pmem)
free_start += available - allocate;
- else if (free_start != nd_mapping->start)
+ else if (!is_reserve && free_start != nd_mapping->start)
return n;

new_res = nd_dimm_allocate_dpa(ndd, label_id,
@@ -386,6 +418,8 @@ static resource_size_t scan_allocate(struct nd_region *nd_region,
/* adjust current resource down */
rc = adjust_resource(res, res->start, resource_size(res)
+ allocate);
+ if (rc == 0)
+ res->flags |= DPA_RESOURCE_ADJUSTED;
}

if (!new_res)
@@ -411,11 +445,106 @@ static resource_size_t scan_allocate(struct nd_region *nd_region,
return 0;
}

- if (is_pmem && n == to_allocate)
+ /*
+ * If we allocated nothing in the BLK case it may be because we are in
+ * an initial "pmem-reserve pass". Only do an initial BLK allocation
+ * when none of the DPA space is reserved.
+ */
+ if ((is_pmem || !ndd->dpa.child) && n == to_allocate)
return init_dpa_allocation(label_id, nd_region, nd_mapping, n);
return n;
}

+static int merge_dpa(struct nd_region *nd_region,
+ struct nd_mapping *nd_mapping, struct nd_label_id *label_id)
+{
+ struct nd_dimm_drvdata *ndd = to_ndd(nd_mapping);
+ struct resource *res;
+
+ if (strncmp("pmem", label_id->id, 4) == 0)
+ return 0;
+ retry:
+ for_each_dpa_resource(ndd, res) {
+ int rc;
+ struct resource *next = res->sibling;
+ resource_size_t end = res->start + resource_size(res);
+
+ if (!next || strcmp(res->name, label_id->id) != 0
+ || strcmp(next->name, label_id->id) != 0
+ || end != next->start)
+ continue;
+ end += resource_size(next);
+ nd_dimm_free_dpa(ndd, next);
+ rc = adjust_resource(res, res->start, end - res->start);
+ nd_dbg_dpa(nd_region, ndd, res, "merge %d\n", rc);
+ if (rc)
+ return rc;
+ res->flags |= DPA_RESOURCE_ADJUSTED;
+ goto retry;
+ }
+
+ return 0;
+}
+
+static int __reserve_free_pmem(struct device *dev, void *data)
+{
+ struct nd_dimm *nd_dimm = data;
+ struct nd_region *nd_region;
+ struct nd_label_id label_id;
+ int i;
+
+ if (!is_nd_pmem(dev))
+ return 0;
+
+ nd_region = to_nd_region(dev);
+ if (nd_region->ndr_mappings == 0)
+ return 0;
+
+ memset(&label_id, 0, sizeof(label_id));
+ strcat(label_id.id, "pmem-reserve");
+ for (i = 0; i < nd_region->ndr_mappings; i++) {
+ struct nd_mapping *nd_mapping = &nd_region->mapping[i];
+ resource_size_t n, rem = 0;
+
+ if (nd_mapping->nd_dimm != nd_dimm)
+ continue;
+
+ n = nd_pmem_available_dpa(nd_region, nd_mapping, &rem);
+ if (n == 0)
+ return 0;
+ rem = scan_allocate(nd_region, nd_mapping, &label_id, n);
+ dev_WARN_ONCE(&nd_region->dev, rem,
+ "pmem reserve underrun: %#llx of %#llx bytes\n",
+ (unsigned long long) n - rem,
+ (unsigned long long) n);
+ return rem ? -ENXIO : 0;
+ }
+
+ return 0;
+}
+
+static void release_free_pmem(struct nd_bus *nd_bus, struct nd_mapping *nd_mapping)
+{
+ struct nd_dimm_drvdata *ndd = to_ndd(nd_mapping);
+ struct resource *res, *_res;
+
+ for_each_dpa_resource_safe(ndd, res, _res)
+ if (strcmp(res->name, "pmem-reserve") == 0)
+ nd_dimm_free_dpa(ndd, res);
+}
+
+static int reserve_free_pmem(struct nd_bus *nd_bus,
+ struct nd_mapping *nd_mapping)
+{
+ struct nd_dimm *nd_dimm = nd_mapping->nd_dimm;
+ int rc;
+
+ rc = device_for_each_child(&nd_bus->dev, nd_dimm, __reserve_free_pmem);
+ if (rc)
+ release_free_pmem(nd_bus, nd_mapping);
+ return rc;
+}
+
/**
* grow_dpa_allocation - for each dimm allocate n bytes for @label_id
* @nd_region: the set of dimms to allocate @n more bytes from
@@ -432,13 +561,44 @@ static resource_size_t scan_allocate(struct nd_region *nd_region,
static int grow_dpa_allocation(struct nd_region *nd_region,
struct nd_label_id *label_id, resource_size_t n)
{
+ struct nd_bus *nd_bus = walk_to_nd_bus(&nd_region->dev);
+ bool is_pmem = strncmp(label_id->id, "pmem", 4) == 0;
int i;

for (i = 0; i < nd_region->ndr_mappings; i++) {
struct nd_mapping *nd_mapping = &nd_region->mapping[i];
- int rc;
+ resource_size_t rem = n;
+ int rc, j;

- rc = scan_allocate(nd_region, nd_mapping, label_id, n);
+ /*
+ * In the BLK case try once with all unallocated PMEM
+ * reserved, and once without
+ */
+ for (j = is_pmem; j < 2; j++) {
+ bool blk_only = j == 0;
+
+ if (blk_only) {
+ rc = reserve_free_pmem(nd_bus, nd_mapping);
+ if (rc)
+ return rc;
+ }
+ rem = scan_allocate(nd_region, nd_mapping, label_id, rem);
+ if (blk_only)
+ release_free_pmem(nd_bus, nd_mapping);
+
+ /* try again and allow encroachments into PMEM */
+ if (rem == 0)
+ break;
+ }
+
+ dev_WARN_ONCE(&nd_region->dev, rem,
+ "allocation underrun: %#llx of %#llx bytes\n",
+ (unsigned long long) n - rem,
+ (unsigned long long) n);
+ if (rem)
+ return -ENXIO;
+
+ rc = merge_dpa(nd_region, nd_mapping, label_id);
if (rc)
return rc;
}
@@ -474,8 +634,10 @@ static ssize_t __size_store(struct device *dev, unsigned long long val)

uuid = nspm->uuid;
} else if (is_namespace_blk(dev)) {
- /* TODO: blk namespace support */
- return -ENXIO;
+ struct nd_namespace_blk *nsblk = to_nd_namespace_blk(dev);
+
+ uuid = nsblk->uuid;
+ flags = NSLABEL_FLAG_LOCAL;
}

/*
@@ -529,6 +691,14 @@ static ssize_t __size_store(struct device *dev, unsigned long long val)

nd_namespace_pmem_set_size(nd_region, nspm,
val * nd_region->ndr_mappings);
+ } else if (is_namespace_blk(dev)) {
+ /*
+ * Try to delete the namespace if we deleted all of its
+ * allocation and this is not the seed device for the
+ * region.
+ */
+ if (val == 0 && nd_region->ns_seed != dev)
+ nd_device_unregister(dev, ND_ASYNC);
}

return rc;
@@ -555,8 +725,9 @@ static ssize_t size_store(struct device *dev,

uuid = &nspm->uuid;
} else if (is_namespace_blk(dev)) {
- /* TODO: blk namespace support */
- rc = -ENXIO;
+ struct nd_namespace_blk *nsblk = to_nd_namespace_blk(dev);
+
+ uuid = &nsblk->uuid;
}

if (rc == 0 && val == 0 && uuid) {
@@ -577,21 +748,23 @@ static ssize_t size_store(struct device *dev,
static ssize_t size_show(struct device *dev,
struct device_attribute *attr, char *buf)
{
+ unsigned long long size = 0;
+
+ nd_bus_lock(dev);
if (is_namespace_pmem(dev)) {
struct nd_namespace_pmem *nspm = to_nd_namespace_pmem(dev);

- return sprintf(buf, "%llu\n", (unsigned long long)
- resource_size(&nspm->nsio.res));
+ size = resource_size(&nspm->nsio.res);
} else if (is_namespace_blk(dev)) {
- /* TODO: blk namespace support */
- return -ENXIO;
+ size = nd_namespace_blk_size(to_nd_namespace_blk(dev));
} else if (is_namespace_io(dev)) {
struct nd_namespace_io *nsio = to_nd_namespace_io(dev);

- return sprintf(buf, "%llu\n", (unsigned long long)
- resource_size(&nsio->res));
- } else
- return -ENXIO;
+ size = resource_size(&nsio->res);
+ }
+ nd_bus_unlock(dev);
+
+ return sprintf(buf, "%llu\n", size);
}
static DEVICE_ATTR(size, S_IRUGO, size_show, size_store);

@@ -605,8 +778,9 @@ static ssize_t uuid_show(struct device *dev,

uuid = nspm->uuid;
} else if (is_namespace_blk(dev)) {
- /* TODO: blk namespace support */
- return -ENXIO;
+ struct nd_namespace_blk *nsblk = to_nd_namespace_blk(dev);
+
+ uuid = nsblk->uuid;
} else
return -ENXIO;

@@ -670,8 +844,9 @@ static ssize_t uuid_store(struct device *dev,

ns_uuid = &nspm->uuid;
} else if (is_namespace_blk(dev)) {
- /* TODO: blk namespace support */
- return -ENXIO;
+ struct nd_namespace_blk *nsblk = to_nd_namespace_blk(dev);
+
+ ns_uuid = &nsblk->uuid;
} else
return -ENXIO;

@@ -713,12 +888,48 @@ static ssize_t resource_show(struct device *dev,
}
static DEVICE_ATTR_RO(resource);

+static const unsigned long ns_lbasize_supported[] = { 512, 0 };
+
+static ssize_t sector_size_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct nd_namespace_blk *nsblk = to_nd_namespace_blk(dev);
+
+ if (!is_namespace_blk(dev))
+ return -ENXIO;
+
+ return nd_sector_size_show(nsblk->lbasize, ns_lbasize_supported, buf);
+}
+
+static ssize_t sector_size_store(struct device *dev,
+ struct device_attribute *attr, const char *buf, size_t len)
+{
+ struct nd_namespace_blk *nsblk = to_nd_namespace_blk(dev);
+ ssize_t rc;
+
+ if (!is_namespace_blk(dev))
+ return -ENXIO;
+
+ device_lock(dev);
+ nd_bus_lock(dev);
+ rc = nd_sector_size_store(dev, buf, &nsblk->lbasize,
+ ns_lbasize_supported);
+ dev_dbg(dev, "%s: result: %zd wrote: %s%s", __func__,
+ rc, buf, buf[len - 1] == '\n' ? "" : "\n");
+ nd_bus_unlock(dev);
+ device_unlock(dev);
+
+ return rc ? rc : len;
+}
+static DEVICE_ATTR_RW(sector_size);
+
static struct attribute *nd_namespace_attributes[] = {
&dev_attr_type.attr,
&dev_attr_size.attr,
&dev_attr_uuid.attr,
&dev_attr_resource.attr,
&dev_attr_alt_name.attr,
+ &dev_attr_sector_size.attr,
NULL,
};

@@ -735,6 +946,10 @@ static umode_t nd_namespace_attr_visible(struct kobject *kobj, struct attribute
if (is_namespace_pmem(dev) || is_namespace_blk(dev)) {
if (a == &dev_attr_size.attr)
return S_IWUSR;
+
+ if (is_namespace_pmem(dev) && a == &dev_attr_sector_size.attr)
+ return 0;
+
return a->mode;
}

@@ -1029,6 +1244,173 @@ static struct device **create_namespace_pmem(struct nd_region *nd_region)
return NULL;
}

+struct resource *nsblk_add_resource(struct nd_region *nd_region,
+ struct nd_dimm_drvdata *ndd, struct nd_namespace_blk *nsblk,
+ resource_size_t start)
+{
+ struct nd_label_id label_id;
+ struct resource *res;
+
+ nd_label_gen_id(&label_id, nsblk->uuid, NSLABEL_FLAG_LOCAL);
+ nsblk->res = krealloc(nsblk->res,
+ sizeof(void *) * (nsblk->num_resources + 1),
+ GFP_KERNEL);
+ if (!nsblk->res)
+ return NULL;
+ for_each_dpa_resource(ndd, res)
+ if (strcmp(res->name, label_id.id) == 0 && res->start == start) {
+ nsblk->res[nsblk->num_resources++] = res;
+ return res;
+ }
+ return NULL;
+}
+
+static struct device *nd_namespace_blk_create(struct nd_region *nd_region)
+{
+ struct nd_namespace_blk *nsblk;
+ struct device *dev;
+
+ if (!is_nd_blk(&nd_region->dev))
+ return NULL;
+
+ nsblk = kzalloc(sizeof(*nsblk), GFP_KERNEL);
+ if (!nsblk)
+ return NULL;
+
+ dev = &nsblk->dev;
+ dev->type = &namespace_blk_device_type;
+ nsblk->id = ida_simple_get(&nd_region->ns_ida, 0, 0, GFP_KERNEL);
+ if (nsblk->id < 0) {
+ kfree(nsblk);
+ return NULL;
+ }
+ dev_set_name(dev, "namespace%d.%d", nd_region->id, nsblk->id);
+ dev->parent = &nd_region->dev;
+ dev->groups = nd_namespace_attribute_groups;
+
+ return &nsblk->dev;
+}
+
+void nd_region_create_blk_seed(struct nd_region *nd_region)
+{
+ WARN_ON(!is_nd_bus_locked(&nd_region->dev));
+ nd_region->ns_seed = nd_namespace_blk_create(nd_region);
+ /*
+ * Seed creation failures are not fatal, provisioning is simply
+ * disabled until memory becomes available
+ */
+ if (!nd_region->ns_seed)
+ dev_err(&nd_region->dev, "failed to create blk namespace\n");
+ else
+ nd_device_register(nd_region->ns_seed);
+}
+
+static struct device **create_namespace_blk(struct nd_region *nd_region)
+{
+ struct nd_mapping *nd_mapping = &nd_region->mapping[0];
+ struct nd_namespace_label __iomem *nd_label;
+ struct device *dev, **devs = NULL;
+ u8 label_uuid[NSLABEL_UUID_LEN];
+ struct nd_namespace_blk *nsblk;
+ struct nd_dimm_drvdata *ndd;
+ int i, l, count = 0;
+ struct resource *res;
+
+ if (nd_region->ndr_mappings == 0)
+ return NULL;
+
+ ndd = to_ndd(nd_mapping);
+ for_each_label(l, nd_label, nd_mapping->labels) {
+ u32 flags = readl(&nd_label->flags);
+ char *name[NSLABEL_NAME_LEN];
+ struct device **__devs;
+
+ if (flags & NSLABEL_FLAG_LOCAL)
+ /* pass */;
+ else
+ continue;
+
+ memcpy_fromio(label_uuid, nd_label->uuid, NSLABEL_UUID_LEN);
+ for (i = 0; i < count; i++) {
+ nsblk = to_nd_namespace_blk(devs[i]);
+ if (memcmp(nsblk->uuid, label_uuid,
+ NSLABEL_UUID_LEN) == 0) {
+ res = nsblk_add_resource(nd_region, ndd, nsblk,
+ readq(&nd_label->dpa));
+ if (!res)
+ goto err;
+ nd_dbg_dpa(nd_region, ndd, res, "%s assign\n",
+ dev_name(&nsblk->dev));
+ break;
+ }
+ }
+ if (i < count)
+ continue;
+ __devs = kcalloc(count + 2, sizeof(dev), GFP_KERNEL);
+ if (!__devs)
+ goto err;
+ memcpy(__devs, devs, sizeof(dev) * count);
+ kfree(devs);
+ devs = __devs;
+
+ nsblk = kzalloc(sizeof(*nsblk), GFP_KERNEL);
+ if (!nsblk)
+ goto err;
+ dev = &nsblk->dev;
+ dev->type = &namespace_blk_device_type;
+ dev_set_name(dev, "namespace%d.%d", nd_region->id, count);
+ devs[count++] = dev;
+ nsblk->id = -1;
+ nsblk->lbasize = readq(&nd_label->lbasize);
+ nsblk->uuid = kmemdup(label_uuid, NSLABEL_UUID_LEN, GFP_KERNEL);
+ if (!nsblk->uuid)
+ goto err;
+ memcpy_fromio(name, nd_label->name, NSLABEL_NAME_LEN);
+ if (name[0])
+ nsblk->alt_name = kmemdup(name, NSLABEL_NAME_LEN,
+ GFP_KERNEL);
+ res = nsblk_add_resource(nd_region, ndd, nsblk,
+ readq(&nd_label->dpa));
+ if (!res)
+ goto err;
+ nd_dbg_dpa(nd_region, ndd, res, "%s assign\n",
+ dev_name(&nsblk->dev));
+ }
+
+ dev_dbg(&nd_region->dev, "%s: discovered %d blk namespace%s\n",
+ __func__, count, count == 1 ? "" : "s");
+
+ if (count == 0) {
+ /* Publish a zero-sized namespace for userspace to configure. */
+ for (i = 0; i < nd_region->ndr_mappings; i++) {
+ struct nd_mapping *nd_mapping = &nd_region->mapping[i];
+
+ kfree(nd_mapping->labels);
+ nd_mapping->labels = NULL;
+ }
+
+ devs = kcalloc(2, sizeof(dev), GFP_KERNEL);
+ if (!devs)
+ goto err;
+ nsblk = kzalloc(sizeof(*nsblk), GFP_KERNEL);
+ if (!nsblk)
+ goto err;
+ dev = &nsblk->dev;
+ dev->type = &namespace_blk_device_type;
+ devs[count++] = dev;
+ }
+
+ return devs;
+
+err:
+ for (i = 0; i < count; i++) {
+ nsblk = to_nd_namespace_blk(devs[i]);
+ namespace_blk_release(&nsblk->dev);
+ }
+ kfree(devs);
+ return NULL;
+}
+
static int init_active_labels(struct nd_region *nd_region)
{
int i;
@@ -1088,6 +1470,9 @@ int nd_region_register_namespaces(struct nd_region *nd_region, int *err)
case ND_DEVICE_NAMESPACE_PMEM:
devs = create_namespace_pmem(nd_region);
break;
+ case ND_DEVICE_NAMESPACE_BLOCK:
+ devs = create_namespace_blk(nd_region);
+ break;
default:
break;
}
@@ -1101,23 +1486,56 @@ int nd_region_register_namespaces(struct nd_region *nd_region, int *err)
nd_region->ns_seed = devs[0];
for (i = 0; devs[i]; i++) {
struct device *dev = devs[i];
+ int id;
+
+ if (type == ND_DEVICE_NAMESPACE_BLOCK) {
+ struct nd_namespace_blk *nsblk;

- dev_set_name(dev, "namespace%d.%d", nd_region->id, i);
+ nsblk = to_nd_namespace_blk(dev);
+ id = ida_simple_get(&nd_region->ns_ida, 0, 0,
+ GFP_KERNEL);
+ nsblk->id = id;
+ } else
+ id = i;
+
+ if (id < 0)
+ break;
+ dev_set_name(dev, "namespace%d.%d", nd_region->id, id);
dev->parent = &nd_region->dev;
dev->groups = nd_namespace_attribute_groups;
nd_device_register(dev);
}
- kfree(devs);

- return i;
+ if (devs[i]) {
+ int j;
+
+ for (j = i; devs[j]; j++) {
+ struct device *dev = devs[j];
+
+ device_initialize(dev);
+ put_device(dev);
+ }
+ *err = j - i;
+ /*
+ * All of the namespaces we tried to register failed, so
+ * fail region activation.
+ */
+ if (*err == 0)
+ rc = -ENODEV;
+ }
+ kfree(devs);

err:
- for (i = 0; i < nd_region->ndr_mappings; i++) {
- struct nd_mapping *nd_mapping = &nd_region->mapping[i];
+ if (rc == -ENODEV) {
+ nd_region->ns_seed = NULL;
+ for (i = 0; i < nd_region->ndr_mappings; i++) {
+ struct nd_mapping *nd_mapping = &nd_region->mapping[i];

- kfree(nd_mapping->labels);
- nd_mapping->labels = NULL;
+ kfree(nd_mapping->labels);
+ nd_mapping->labels = NULL;
+ }
+ return rc;
}

- return rc;
+ return i;
}
diff --git a/drivers/block/nd/nd-private.h b/drivers/block/nd/nd-private.h
index 03b14ab8fc7d..1a14ebb40a1a 100644
--- a/drivers/block/nd/nd-private.h
+++ b/drivers/block/nd/nd-private.h
@@ -17,6 +17,7 @@
#include <linux/sizes.h>
#include <linux/mutex.h>
#include <linux/io.h>
+#include <linux/nd.h>
#include "nfit.h"

extern struct list_head nd_bus_list;
@@ -26,6 +27,8 @@ extern int nd_dimm_major;
enum {
/* need to set a limit somewhere, but yes, this is likely overkill */
ND_IOCTL_MAX_BUFLEN = SZ_4M,
+ /* mark newly adjusted resources as requiring a label update */
+ DPA_RESOURCE_ADJUSTED = 1 << 0,
};

/*
@@ -116,6 +119,8 @@ void nd_dimm_exit(void);
int nd_region_exit(void);
void nd_region_probe_start(struct nd_bus *nd_bus, struct device *dev);
void nd_region_probe_end(struct nd_bus *nd_bus, struct device *dev, int rc);
+struct nd_region;
+void nd_region_create_blk_seed(struct nd_region *nd_region);
void nd_region_notify_remove(struct nd_bus *nd_bus, struct device *dev, int rc);
int nd_bus_create_ndctl(struct nd_bus *nd_bus);
void nd_bus_destroy_ndctl(struct nd_bus *nd_bus);
@@ -131,10 +136,15 @@ struct nd_dimm_drvdata;
struct nd_mapping;
resource_size_t nd_pmem_available_dpa(struct nd_region *nd_region,
struct nd_mapping *nd_mapping, resource_size_t *overlap);
+resource_size_t nd_blk_available_dpa(struct nd_mapping *nd_mapping);
resource_size_t nd_region_available_dpa(struct nd_region *nd_region);
struct resource *nd_dimm_allocate_dpa(struct nd_dimm_drvdata *ndd,
struct nd_label_id *label_id, resource_size_t start,
resource_size_t n);
resource_size_t nd_dimm_allocated_dpa(struct nd_dimm_drvdata *ndd,
struct nd_label_id *label_id);
+struct nd_mapping;
+struct resource *nsblk_add_resource(struct nd_region *nd_region,
+ struct nd_dimm_drvdata *ndd, struct nd_namespace_blk *nsblk,
+ resource_size_t start);
#endif /* __ND_PRIVATE_H__ */
diff --git a/drivers/block/nd/nd.h b/drivers/block/nd/nd.h
index 386e17056d3c..5adb55e76b33 100644
--- a/drivers/block/nd/nd.h
+++ b/drivers/block/nd/nd.h
@@ -97,6 +97,7 @@ static inline struct nd_namespace_label __iomem *nd_get_label(
struct nd_region {
struct device dev;
struct nd_spa *nd_spa;
+ struct ida ns_ida;
struct device *ns_seed;
u16 ndr_mappings;
u64 ndr_size;
@@ -125,6 +126,10 @@ void nd_device_unregister(struct device *dev, enum nd_async_mode mode);
u64 nd_fletcher64(void __iomem *addr, size_t len);
int nd_uuid_store(struct device *dev, u8 **uuid_out, const char *buf,
size_t len);
+ssize_t nd_sector_size_show(unsigned long current_lbasize,
+ const unsigned long *supported, char *buf);
+ssize_t nd_sector_size_store(struct device *dev, const char *buf,
+ unsigned long *current_lbasize, const unsigned long *supported);
extern struct attribute_group nd_device_attribute_group;
struct nd_dimm;
struct nd_dimm_drvdata *to_ndd(struct nd_mapping *nd_mapping);
diff --git a/drivers/block/nd/region_devs.c b/drivers/block/nd/region_devs.c
index 8bcfd9b91a71..1b2c81e0eb0f 100644
--- a/drivers/block/nd/region_devs.c
+++ b/drivers/block/nd/region_devs.c
@@ -112,7 +112,12 @@ static int is_uuid_busy(struct device *dev, void *data)
break;
}
case ND_DEVICE_NAMESPACE_BLOCK: {
- /* TODO: blk namespace support */
+ struct nd_namespace_blk *nsblk = to_nd_namespace_blk(dev);
+
+ if (!nsblk->uuid)
+ break;
+ if (memcmp(uuid, nsblk->uuid, NSLABEL_UUID_LEN) == 0)
+ return -EBUSY;
break;
}
default:
@@ -235,7 +240,7 @@ resource_size_t nd_region_available_dpa(struct nd_region *nd_region)
goto retry;
}
} else if (is_nd_blk(&nd_region->dev)) {
- /* TODO: BLK Namespace support */
+ available += nd_blk_available_dpa(nd_mapping);
}
}

@@ -494,6 +499,11 @@ static void nd_region_notify_driver_action(struct nd_bus *nd_bus,
else
atomic_dec(&nd_dimm->busy);
}
+ } else if (dev->parent && is_nd_blk(dev->parent) && probe && rc == 0) {
+ struct nd_region *nd_region = to_nd_region(dev->parent);
+
+ if (nd_region->ns_seed == dev)
+ nd_region_create_blk_seed(nd_region);
}
}

@@ -735,6 +745,7 @@ static struct nd_region *nd_region_create(struct nd_bus *nd_bus,
dev->groups = nd_region_attribute_groups;
nd_region->ndr_size = readq(&nd_spa->nfit_spa->spa_length);
nd_region->ndr_start = readq(&nd_spa->nfit_spa->spa_base);
+ ida_init(&nd_region->ns_ida);
switch (spa_type) {
case NFIT_SPA_PM:
nd_spa_range_init(nd_bus, nd_region, &nd_pmem_device_type);
diff --git a/include/linux/nd.h b/include/linux/nd.h
index 255c38a83083..23276ea91690 100644
--- a/include/linux/nd.h
+++ b/include/linux/nd.h
@@ -50,6 +50,26 @@ struct nd_namespace_pmem {
u8 *uuid;
};

+/**
+ * struct nd_namespace_blk - namespace for dimm-bounded persistent memory
+ * @dev: namespace device creation by the nd region driver
+ * @alt_name: namespace name supplied in the dimm label
+ * @uuid: namespace name supplied in the dimm label
+ * @id: ida allocated id
+ * @lbasize: blk namespaces have a native sector size when btt not present
+ * @num_resources: number of dpa extents to claim
+ * @res: discontiguous dpa extents for given dimm
+ */
+struct nd_namespace_blk {
+ struct device dev;
+ char *alt_name;
+ u8 *uuid;
+ int id;
+ unsigned long lbasize;
+ int num_resources;
+ struct resource **res;
+};
+
static inline struct nd_namespace_io *to_nd_namespace_io(struct device *dev)
{
return container_of(dev, struct nd_namespace_io, dev);
@@ -62,6 +82,11 @@ static inline struct nd_namespace_pmem *to_nd_namespace_pmem(struct device *dev)
return container_of(nsio, struct nd_namespace_pmem, nsio);
}

+static inline struct nd_namespace_blk *to_nd_namespace_blk(struct device *dev)
+{
+ return container_of(dev, struct nd_namespace_blk, dev);
+}
+
#define MODULE_ALIAS_ND_DEVICE(type) \
MODULE_ALIAS("nd:t" __stringify(type) "*")
#define ND_DEVICE_MODALIAS_FMT "nd:t%d"

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/