Re: [PATCH v1 04/15] nitro_enclaves: Init PCI device driver

From: Paraschiv, Andra-Irina
Date: Wed Apr 29 2020 - 12:32:06 EST




On 25/04/2020 17:25, Liran Alon wrote:

On 21/04/2020 21:41, Andra Paraschiv wrote:
+
+/**
+ * ne_setup_msix - Setup MSI-X vectors for the PCI device.
+ *
+ * @pdev: PCI device to setup the MSI-X for.
+ * @ne_pci_dev: PCI device private data structure.
+ *
+ * @returns: 0 on success, negative return value on failure.
+ */
+static int ne_setup_msix(struct pci_dev *pdev, struct ne_pci_dev *ne_pci_dev)
+{
+ÂÂÂ int nr_vecs = 0;
+ÂÂÂ int rc = -EINVAL;
+
+ÂÂÂ BUG_ON(!ne_pci_dev);
This kind of defensive programming does not align with Linux coding convention.
I think these BUG_ON() conditions should be removed.

I replaced with WARN_ON here and in the other places in the codebase.

+
+ÂÂÂ nr_vecs = pci_msix_vec_count(pdev);
+ÂÂÂ if (nr_vecs < 0) {
+ÂÂÂÂÂÂÂ rc = nr_vecs;
+
+ÂÂÂÂÂÂÂ dev_err_ratelimited(&pdev->dev,
+ÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂ "Failure in getting vec count [rc=%d]\n",
+ÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂ rc);
+
+ÂÂÂÂÂÂÂ return rc;
+ÂÂÂ }
+
+ÂÂÂ rc = pci_alloc_irq_vectors(pdev, nr_vecs, nr_vecs, PCI_IRQ_MSIX);
+ÂÂÂ if (rc < 0) {
+ÂÂÂÂÂÂÂ dev_err_ratelimited(&pdev->dev,
+ÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂ "Failure in alloc MSI-X vecs [rc=%d]\n",
+ÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂ rc);
+
+ÂÂÂÂÂÂÂ goto err_alloc_irq_vecs;
You should just replace this with "return rc;" as no cleanup is required here.

Done.

+ÂÂÂ }
+
+ÂÂÂ return 0;
+
+err_alloc_irq_vecs:
+ÂÂÂ return rc;
+}
+
+/**
+ * ne_pci_dev_enable - Select PCI device version and enable it.
+ *
+ * @pdev: PCI device to select version for and then enable.
+ * @ne_pci_dev: PCI device private data structure.
+ *
+ * @returns: 0 on success, negative return value on failure.
+ */
+static int ne_pci_dev_enable(struct pci_dev *pdev,
+ÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂ struct ne_pci_dev *ne_pci_dev)
+{
+ÂÂÂ u8 dev_enable_reply = 0;
+ÂÂÂ u16 dev_version_reply = 0;
+
+ÂÂÂ BUG_ON(!pdev);
+ÂÂÂ BUG_ON(!ne_pci_dev);
+ÂÂÂ BUG_ON(!ne_pci_dev->iomem_base);
Same.
+
+ÂÂÂ iowrite16(NE_VERSION_MAX, ne_pci_dev->iomem_base + NE_VERSION);
+
+ÂÂÂ dev_version_reply = ioread16(ne_pci_dev->iomem_base + NE_VERSION);
+ÂÂÂ if (dev_version_reply != NE_VERSION_MAX) {
+ÂÂÂÂÂÂÂ dev_err_ratelimited(&pdev->dev,
+ÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂ "Failure in pci dev version cmd\n");
+
+ÂÂÂÂÂÂÂ return -EIO;
+ÂÂÂ }
+
+ÂÂÂ iowrite8(NE_ENABLE_ON, ne_pci_dev->iomem_base + NE_ENABLE);
+
+ÂÂÂ dev_enable_reply = ioread8(ne_pci_dev->iomem_base + NE_ENABLE);
+ÂÂÂ if (dev_enable_reply != NE_ENABLE_ON) {
+ÂÂÂÂÂÂÂ dev_err_ratelimited(&pdev->dev,
+ÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂ "Failure in pci dev enable cmd\n");
+
+ÂÂÂÂÂÂÂ return -EIO;
+ÂÂÂ }
+
+ÂÂÂ return 0;
+}
+
+/**
+ * ne_pci_dev_disable - Disable PCI device.
+ *
+ * @pdev: PCI device to disable.
+ * @ne_pci_dev: PCI device private data structure.
+ *
+ * @returns: 0 on success, negative return value on failure.
+ */
+static int ne_pci_dev_disable(struct pci_dev *pdev,
+ÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂ struct ne_pci_dev *ne_pci_dev)
+{
+ÂÂÂ u8 dev_disable_reply = 0;
+
+ÂÂÂ BUG_ON(!pdev);
+ÂÂÂ BUG_ON(!ne_pci_dev);
+ÂÂÂ BUG_ON(!ne_pci_dev->iomem_base);
Same.
+
+ÂÂÂ iowrite8(NE_ENABLE_OFF, ne_pci_dev->iomem_base + NE_ENABLE);
+
+ÂÂÂ /*
+ÂÂÂÂ * TODO: Check for NE_ENABLE_OFF in a loop, to handle cases when the
+ÂÂÂÂ * device state is not immediately set to disabled and going through a
+ÂÂÂÂ * transitory state of disabling.
+ÂÂÂÂ */
+ÂÂÂ dev_disable_reply = ioread8(ne_pci_dev->iomem_base + NE_ENABLE);
+ÂÂÂ if (dev_disable_reply != NE_ENABLE_OFF) {
+ÂÂÂÂÂÂÂ dev_err_ratelimited(&pdev->dev,
+ÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂ "Failure in pci dev disable cmd\n");
+
+ÂÂÂÂÂÂÂ return -EIO;
+ÂÂÂ }
+
+ÂÂÂ return 0;
+}
+
+static int ne_probe(struct pci_dev *pdev, const struct pci_device_id *id)
+{
+ÂÂÂ struct ne_pci_dev *ne_pci_dev = NULL;
+ÂÂÂ int rc = -EINVAL;
Unnecessary variable initialization.
ne_pci_dev and rc are initialized below always before they are used.

I would rather keep the initialization in place overall, to not have a mix of init and uninit vars, when needed.

+
+ÂÂÂ ne_pci_dev = kzalloc(sizeof(*ne_pci_dev), GFP_KERNEL);
+ÂÂÂ if (!ne_pci_dev)
+ÂÂÂÂÂÂÂ return -ENOMEM;
+
+ÂÂÂ rc = pci_enable_device(pdev);
+ÂÂÂ if (rc < 0) {
+ÂÂÂÂÂÂÂ dev_err_ratelimited(&pdev->dev,
+ÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂ "Failure in pci dev enable [rc=%d]\n", rc);
+
Why is this dev_err_ratelimited() instead of dev_err()?
Same for the rest of error printing in this probe() method and other places in this patch.

Just to avoid any misbehaving scenario, where would be way too many logs in a short timeframe. Here may not happen, but while handling PCI dev requests could be.

+ÂÂÂÂÂÂÂ goto err_pci_enable_dev;
I find it confusing that the error labels are named based on the failure-case they are used,
instead of the action they do (i.e. Unwind previous successful operation that requires unwinding).
This doesn't seem to match Linux kernel coding convention.
It also created an unnecessary 2 labels pointing to the same place in cleanup code.

Yep, that's better this way wrt the naming of the labels. I updated the gotos in the patch series.

+ÂÂÂ }
+
+ÂÂÂ rc = pci_request_regions_exclusive(pdev, "ne_pci_dev");
+ÂÂÂ if (rc < 0) {
+ÂÂÂÂÂÂÂ dev_err_ratelimited(&pdev->dev,
+ÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂ "Failure in pci request regions [rc=%d]\n",
+ÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂ rc);
+
+ÂÂÂÂÂÂÂ goto err_req_regions;
+ÂÂÂ }
+
+ÂÂÂ ne_pci_dev->iomem_base = pci_iomap(pdev, PCI_BAR_NE, 0);
+ÂÂÂ if (!ne_pci_dev->iomem_base) {
+ÂÂÂÂÂÂÂ rc = -ENOMEM;
+
+ÂÂÂÂÂÂÂ dev_err_ratelimited(&pdev->dev,
+ÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂ "Failure in pci bar mapping [rc=%d]\n", rc);
+
+ÂÂÂÂÂÂÂ goto err_iomap;
+ÂÂÂ }
+
+ÂÂÂ rc = ne_setup_msix(pdev, ne_pci_dev);
+ÂÂÂ if (rc < 0) {
+ÂÂÂÂÂÂÂ dev_err_ratelimited(&pdev->dev,
+ÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂ "Failure in pci dev msix setup [rc=%d]\n",
+ÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂ rc);
+
+ÂÂÂÂÂÂÂ goto err_setup_msix;
+ÂÂÂ }
+
+ÂÂÂ rc = ne_pci_dev_disable(pdev, ne_pci_dev);
+ÂÂÂ if (rc < 0) {
+ÂÂÂÂÂÂÂ dev_err_ratelimited(&pdev->dev,
+ÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂ "Failure in ne_pci_dev disable [rc=%d]\n",
+ÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂ rc);
+
+ÂÂÂÂÂÂÂ goto err_ne_pci_dev_disable;
+ÂÂÂ }
It seems weird that we need to disable the device before enabling it on the probe() method.
Why can't we just enable the device without disabling it?

The pci dev disable call cleanups the internal state of the device and terminates any running / "dangling" enclaves; here it is included just in case any remaining state from a previous PCI device use. The below enable call would fail in that case, though.

+
+ÂÂÂ rc = ne_pci_dev_enable(pdev, ne_pci_dev);
+ÂÂÂ if (rc < 0) {
+ÂÂÂÂÂÂÂ dev_err_ratelimited(&pdev->dev,
+ÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂ "Failure in ne_pci_dev enable [rc=%d]\n",
+ÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂ rc);
+
+ÂÂÂÂÂÂÂ goto err_ne_pci_dev_enable;
+ÂÂÂ }
+
+ÂÂÂ atomic_set(&ne_pci_dev->cmd_reply_avail, 0);
+ÂÂÂ init_waitqueue_head(&ne_pci_dev->cmd_reply_wait_q);
+ÂÂÂ INIT_LIST_HEAD(&ne_pci_dev->enclaves_list);
+ÂÂÂ mutex_init(&ne_pci_dev->enclaves_list_mutex);
+ÂÂÂ mutex_init(&ne_pci_dev->pci_dev_mutex);
+
+ÂÂÂ pci_set_drvdata(pdev, ne_pci_dev);
If you would have pci_set_drvdata() as one of the first operations in ne_probe(), then you could have avoided
passing both struct pci_dev and struct ne_pci_dev parameters to ne_setup_msix(), ne_pci_dev_enable() and ne_pci_dev_disable().
Which would have been a bit more elegant.

Fair point. I moved pci_set_drvdata() upper in the logic and updated the signature of the functions to only include the pci_dev parameter.

+
+ÂÂÂ return 0;
+
+err_ne_pci_dev_enable:
+err_ne_pci_dev_disable:
+ÂÂÂ pci_free_irq_vectors(pdev);
+err_setup_msix:
+ÂÂÂ pci_iounmap(pdev, ne_pci_dev->iomem_base);
+err_iomap:
+ÂÂÂ pci_release_regions(pdev);
+err_req_regions:
+ÂÂÂ pci_disable_device(pdev);
+err_pci_enable_dev:
+ÂÂÂ kzfree(ne_pci_dev);
An empty new-line is appropriate here.
To separate the return statement from the cleanup logic.

Done.

+ÂÂÂ return rc;
+}
+
+static void ne_remove(struct pci_dev *pdev)
+{
+ÂÂÂ struct ne_pci_dev *ne_pci_dev = pci_get_drvdata(pdev);
+
+ÂÂÂ if (!ne_pci_dev || !ne_pci_dev->iomem_base)
+ÂÂÂÂÂÂÂ return;
Why is this condition necessary?
The ne_remove() function should be called only in case ne_probe() succeeded.
In that case, both ne_pci_dev and ne_pci_dev->iomem_base should be non-NULL.

Correct, that shouldn't happen.

Just for early exit in case of bad behavior.

+
+ÂÂÂ ne_pci_dev_disable(pdev, ne_pci_dev);
+
+ÂÂÂ pci_set_drvdata(pdev, NULL);
+
+ÂÂÂ pci_free_irq_vectors(pdev);
+
+ÂÂÂ pci_iounmap(pdev, ne_pci_dev->iomem_base);
+
+ÂÂÂ kzfree(ne_pci_dev);
+
+ÂÂÂ pci_release_regions(pdev);
+
+ÂÂÂ pci_disable_device(pdev);
You should aspire to keep ne_remove() order of operations to be the reverse order of operations done in ne_probe().
Which would also nicely match the order of operations done in ne_probe() cleanup.
i.e. The following order:

pci_set_drvdata();
ne_pci_dev_disable();
pci_free_irq_vectors();
pci_iounmap();
pci_release_regions();
pci_disable_device()
kzfree();

I updated the order of operations.


Thanks for review, Liran.

Andra





Amazon Development Center (Romania) S.R.L. registered office: 27A Sf. Lazar Street, UBC5, floor 2, Iasi, Iasi County, 700045, Romania. Registered in Romania. Registration number J22/2621/2005.