Re: [PATCH 05/12] PCI: Require CAP_COMPROMISE_KERNEL for PCI BARaccess

From: Kyle McMartin
Date: Wed Mar 27 2013 - 11:08:20 EST


On Wed, Mar 27, 2013 at 11:03:26AM -0400, Josh Boyer wrote:
> On Mon, Mar 18, 2013 at 5:32 PM, Matthew Garrett
> <matthew.garrett@xxxxxxxxxx> wrote:
> > Any hardware that can potentially generate DMA has to be locked down from
> > userspace in order to avoid it being possible for an attacker to cause
> > arbitrary kernel behaviour. Default to paranoid - in future we can
> > potentially relax this for sufficiently IOMMU-isolated devices.
> >
> > Signed-off-by: Matthew Garrett <matthew.garrett@xxxxxxxxxx>
>
> As noted here:
>
> https://bugzilla.redhat.com/show_bug.cgi?id=908888
>
> this breaks pci passthru with QEMU. The suggestion in the bug is to move
> the check from read/write to open, but sysfs makes that somewhat
> difficult. The open code is part of the core sysfs functionality shared
> with the majority of sysfs files, so adding a check there would restrict
> things that clearly don't need to be restricted.
>
> Kyle had the idea to add a cap field to the attribute structure, and do
> a capable check if that is set. That would allow for a more generic
> usage of capabilities in sysfs code, at the cost of slightly increasing
> the structure size and open path. That seems somewhat promising if we
> stick with capabilities.
>
> I would love to just squarely blame capabilities for causing this, but we
> can't just replace it with an efi_enabled(EFI_SECURE_BOOT) check because
> of the sysfs open case. I'm not sure there are great answers here.
>

Yeah, that was something like this (I don't even remember which Fedora
kernel version this was against.)

--- a/drivers/pci/pci-sysfs.c
+++ b/drivers/pci/pci-sysfs.c
@@ -546,9 +546,6 @@ pci_write_config(struct file* filp, struct kobject *kobj,
loff_t init_off = off;
u8 *data = (u8*) buf;

- if (!capable(CAP_COMPROMISE_KERNEL))
- return -EPERM;
-
if (off > dev->cfg_size)
return 0;
if (off + count > dev->cfg_size) {
@@ -772,6 +769,7 @@ void pci_create_legacy_files(struct pci_bus *b)
b->legacy_io->attr.name = "legacy_io";
b->legacy_io->size = 0xffff;
b->legacy_io->attr.mode = S_IRUSR | S_IWUSR;
+ b->legacy_io->attr.cap = CAP_COMPROMISE_KERNEL;
b->legacy_io->read = pci_read_legacy_io;
b->legacy_io->write = pci_write_legacy_io;
b->legacy_io->mmap = pci_mmap_legacy_io;
@@ -786,6 +784,7 @@ void pci_create_legacy_files(struct pci_bus *b)
b->legacy_mem->attr.name = "legacy_mem";
b->legacy_mem->size = 1024*1024;
b->legacy_mem->attr.mode = S_IRUSR | S_IWUSR;
+ b->legacy_io->attr.cap = CAP_COMPROMISE_KERNEL;
b->legacy_mem->mmap = pci_mmap_legacy_mem;
pci_adjust_legacy_attr(b, pci_mmap_mem);
error = device_create_bin_file(&b->dev, b->legacy_mem);
@@ -855,9 +854,6 @@ pci_mmap_resource(struct kobject *kobj, struct bin_attribute *attr,
resource_size_t start, end;
int i;

- if (!capable(CAP_COMPROMISE_KERNEL))
- return -EPERM;
-
for (i = 0; i < PCI_ROM_RESOURCE; i++)
if (res == &pdev->resource[i])
break;
@@ -965,9 +961,6 @@ pci_write_resource_io(struct file *filp, struct kobject *kobj,
struct bin_attribute *attr, char *buf,
loff_t off, size_t count)
{
- if (!capable(CAP_COMPROMISE_KERNEL))
- return -EPERM;
-
return pci_resource_io(filp, kobj, attr, buf, off, count, true);
}

@@ -1027,6 +1020,7 @@ static int pci_create_attr(struct pci_dev *pdev, int num, int write_combine)
}
res_attr->attr.name = res_attr_name;
res_attr->attr.mode = S_IRUSR | S_IWUSR;
+ res_attr->attr.cap = CAP_COMPROMISE_KERNEL;
res_attr->size = pci_resource_len(pdev, num);
res_attr->private = &pdev->resource[num];
retval = sysfs_create_bin_file(&pdev->dev.kobj, res_attr);
@@ -1142,6 +1136,7 @@ static struct bin_attribute pci_config_attr = {
.attr = {
.name = "config",
.mode = S_IRUGO | S_IWUSR,
+ .cap = CAP_COMPROMISE_KERNEL,
},
.size = PCI_CFG_SPACE_SIZE,
.read = pci_read_config,
@@ -1152,6 +1147,7 @@ static struct bin_attribute pcie_config_attr = {
.attr = {
.name = "config",
.mode = S_IRUGO | S_IWUSR,
+ .cap = CAP_COMPROMISE_KERNEL,
},
.size = PCI_CFG_SPACE_EXP_SIZE,
.read = pci_read_config,
@@ -1201,6 +1197,7 @@ static int pci_create_capabilities_sysfs(struct pci_dev *dev)
attr->size = dev->vpd->len;
attr->attr.name = "vpd";
attr->attr.mode = S_IRUSR | S_IWUSR;
+ attr->attr.cap = CAP_COMPROMISE_KERNEL;
attr->read = read_vpd_attr;
attr->write = write_vpd_attr;
retval = sysfs_create_bin_file(&dev->dev.kobj, attr);
diff --git a/fs/sysfs/bin.c b/fs/sysfs/bin.c
index 614b2b5..e40a725 100644
--- a/fs/sysfs/bin.c
+++ b/fs/sysfs/bin.c
@@ -402,6 +402,10 @@ static int open(struct inode * inode, struct file * file)
if (!sysfs_get_active(attr_sd))
return -ENODEV;

+ error = -EACCES;
+ if (attr->attr.cap && !capable(attr->attr.cap))
+ goto err_out;
+
error = -EACCES;
if ((file->f_mode & FMODE_WRITE) && !(attr->write || attr->mmap))
goto err_out;
diff --git a/include/linux/sysfs.h b/include/linux/sysfs.h
index 381f06d..0cf0034 100644
--- a/include/linux/sysfs.h
+++ b/include/linux/sysfs.h
@@ -26,6 +26,7 @@ enum kobj_ns_type;
struct attribute {
const char *name;
umode_t mode;
+ int cap;
#ifdef CONFIG_DEBUG_LOCK_ALLOC
bool ignore_lockdep:1;
struct lock_class_key *key;
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/