Re: [PATCH V2] xen/virtio: Handle PCI devices which Host controller is described in DT

From: Xenia Ragiadakou
Date: Fri Oct 21 2022 - 02:08:48 EST


On 10/20/22 23:07, Oleksandr Tyshchenko wrote:
Hi Oleksandr

On 20.10.22 21:11, Xenia Ragiadakou wrote:

Hello Xenia


On 10/20/22 17:12, Oleksandr Tyshchenko wrote:

On 20.10.22 11:24, Xenia Ragiadakou wrote:
On 10/19/22 22:41, Oleksandr Tyshchenko wrote:

Hi Oleksandr


Hello Xenia




On 19.10.22 11:47, Xenia Ragiadakou wrote:

Hello Xenia

On 10/19/22 03:58, Stefano Stabellini wrote:
On Sat, 15 Oct 2022, Oleksandr Tyshchenko wrote:
From: Oleksandr Tyshchenko <oleksandr_tyshchenko@xxxxxxxx>

Use the same "xen-grant-dma" device concept for the PCI devices
behind device-tree based PCI Host controller, but with one
modification.
Unlike for platform devices, we cannot use generic IOMMU bindings
(iommus property), as we need to support more flexible
configuration.
The problem is that PCI devices under the single PCI Host
controller
may have the backends running in different Xen domains and thus
have
different endpoints ID (backend domains ID).

So use generic PCI-IOMMU bindings instead (iommu-map/iommu-map-mask
properties) which allows us to describe relationship between PCI
devices and backend domains ID properly.

Signed-off-by: Oleksandr Tyshchenko <oleksandr_tyshchenko@xxxxxxxx>

Now that I understood the approach and the reasons for it, I can
review
the patch :-)

Please add an example of the bindings in the commit message.


---
Slightly RFC. This is needed to support Xen grant mappings for
virtio-pci devices
on Arm at some point in the future. The Xen toolstack side is not
completely ready yet.
Here, for PCI devices we use more flexible way to pass backend
domid
to the guest
than for platform devices.

Changes V1 -> V2:
      - update commit description
      - rebase
      - rework to use generic PCI-IOMMU bindings instead of generic
IOMMU bindings

Previous discussion is at:
https://urldefense.com/v3/__https://lore.kernel.org/xen-devel/20221006174804.2003029-1-olekstysh@xxxxxxxxx/__;!!GF_29dbcQIUBPA!3-vq7Edm3XfKtD5cnNjnOzDQvuo_XrhJ73yH-nPfqOkGGU0IjLG7R7MR_nAJCAPeOutHRLT44wKYwQwz3SauACie_ZAy$


[lore[.]kernel[.]org]

Based on:
https://urldefense.com/v3/__https://git.kernel.org/pub/scm/linux/kernel/git/xen/tip.git/log/?h=for-linus-6.1__;!!GF_29dbcQIUBPA!3-vq7Edm3XfKtD5cnNjnOzDQvuo_XrhJ73yH-nPfqOkGGU0IjLG7R7MR_nAJCAPeOutHRLT44wKYwQwz3SauAEnMDHAq$


[git[.]kernel[.]org]
---
    drivers/xen/grant-dma-ops.c | 87
++++++++++++++++++++++++++++++++-----
    1 file changed, 76 insertions(+), 11 deletions(-)

diff --git a/drivers/xen/grant-dma-ops.c
b/drivers/xen/grant-dma-ops.c
index daa525df7bdc..b79d9d6ce154 100644
--- a/drivers/xen/grant-dma-ops.c
+++ b/drivers/xen/grant-dma-ops.c
@@ -10,6 +10,7 @@
    #include <linux/module.h>
    #include <linux/dma-map-ops.h>
    #include <linux/of.h>
+#include <linux/pci.h>
    #include <linux/pfn.h>
    #include <linux/xarray.h>
    #include <linux/virtio_anchor.h>
@@ -292,12 +293,55 @@ static const struct dma_map_ops
xen_grant_dma_ops = {
        .dma_supported = xen_grant_dma_supported,
    };
    +static struct device_node *xen_dt_get_pci_host_node(struct
device
*dev)
+{
+    struct pci_dev *pdev = to_pci_dev(dev);
+    struct pci_bus *bus = pdev->bus;
+
+    /* Walk up to the root bus to look for PCI Host controller */
+    while (!pci_is_root_bus(bus))
+        bus = bus->parent;
+
+    return of_node_get(bus->bridge->parent->of_node);
+}

It seems silly that we need to walk the hierachy that way, but I
couldn't find another way to do it


+static struct device_node *xen_dt_get_node(struct device *dev)
+{
+    if (dev_is_pci(dev))
+        return xen_dt_get_pci_host_node(dev);
+
+    return of_node_get(dev->of_node);
+}
+
+static int xen_dt_map_id(struct device *dev, struct device_node
**iommu_np,
+             u32 *sid)
+{
+    struct pci_dev *pdev = to_pci_dev(dev);
+    u32 rid = PCI_DEVID(pdev->bus->number, pdev->devfn);
+    struct device_node *host_np;
+    int ret;
+
+    host_np = xen_dt_get_pci_host_node(dev);
+    if (!host_np)
+        return -ENODEV;
+
+    ret = of_map_id(host_np, rid, "iommu-map", "iommu-map-mask",
iommu_np, sid);
+    of_node_put(host_np);
+    return ret;
+}
+
    static bool xen_is_dt_grant_dma_device(struct device *dev)
    {
-    struct device_node *iommu_np;
+    struct device_node *iommu_np = NULL;
        bool has_iommu;
    -    iommu_np = of_parse_phandle(dev->of_node, "iommus", 0);
+    if (dev_is_pci(dev)) {
+        if (xen_dt_map_id(dev, &iommu_np, NULL))
+            return false;
+    } else
+        iommu_np = of_parse_phandle(dev->of_node, "iommus", 0);
+
        has_iommu = iommu_np &&
                of_device_is_compatible(iommu_np, "xen,grant-dma");
        of_node_put(iommu_np);
@@ -307,9 +351,17 @@ static bool xen_is_dt_grant_dma_device(struct
device *dev)
      bool xen_is_grant_dma_device(struct device *dev)
    {
+    struct device_node *np;
+
        /* XXX Handle only DT devices for now */
-    if (dev->of_node)
-        return xen_is_dt_grant_dma_device(dev);
+    np = xen_dt_get_node(dev);
+    if (np) {
+        bool ret;
+
+        ret = xen_is_dt_grant_dma_device(dev);
+        of_node_put(np);
+        return ret;
+    }

We don't need to walk the PCI hierachy twice. Maybe we can add the
of_node check directly to xen_is_dt_grant_dma_device?


I think in general we could pass directly the host bridge device if
dev_is_pci(dev) (which can be retrieved with
pci_get_host_bridge_device(to_pci_dev(dev), and after done with it
pci_put_host_bridge_device(phb)).
So that, xen_is_dt_grant_dma_device() and
xen_dt_grant_init_backend_domid() won't need to discover it
themselves.
This will simplify the code.


Good point. I have some remark. Can we use pci_find_host_bridge()
instead? This way we don't have to add #include "../pci/pci.h", and
have
to drop reference afterwards.

With that xen_dt_get_pci_host_node() will became the following:


static struct device_node *xen_dt_get_pci_host_node(struct device
*dev)
{
       struct pci_host_bridge *bridge =
pci_find_host_bridge(to_pci_dev(dev)->bus);

       return of_node_get(bridge->dev.parent->of_node);
}


You are right. I prefer your version instead of the above.


ok, thanks





With Stefano's suggestion, we won't walk the PCI hierarchy twice when
executing xen_is_grant_dma_device() for PCI device:

xen_is_grant_dma_device() -> xen_is_dt_grant_dma_device() ->
xen_dt_map_id() -> xen_dt_get_pci_host_node()


What do you think?


I was thinking passing the device_node along with the device in the
function arguments. More specifically, of doing this (not tested, just
an idea):

bool xen_is_grant_dma_device(struct device *dev)
{
     struct device_node *np;
     bool has_iommu = false;

     /* XXX Handle only DT devices for now */
     np = xen_dt_get_node(dev);
     if (np)
         has_iommu = xen_is_dt_grant_dma_device(dev, np);
     of_node_put(np);
     return has_iommu;
}

static bool xen_is_dt_grant_dma_device(struct device *dev,
                                        struct device_node *np)
{
     struct device_node *iommu_np = NULL;
     bool has_iommu;

     if (dev_is_pci(dev)) {
         struct pci_dev *pdev = to_pci_dev(dev);
     u32 id = PCI_DEVID(pdev->bus->number, pdev->devfn);
         of_map_id(np, id, "iommu-map", "iommu-map-mask", &iommu_np,
NULL);
     } else {
         iommu_np = of_parse_phandle(np, "iommus", 0);
     }

     has_iommu = iommu_np && of_device_is_compatible(iommu_np,
"xen,grant-dma");
     of_node_put(iommu_np);

     return has_iommu;
}


I got it.

xen_is_grant_dma_device() for V3 won't call xen_dt_get_node(), but call
xen_is_dt_grant_dma_device() directly.

static bool xen_is_dt_grant_dma_device(struct device *dev)
{
      struct device_node *iommu_np = NULL;
      bool has_iommu;

      if (dev_is_pci(dev)) {
          if (xen_dt_map_id(dev, &iommu_np, NULL))
              return false;
      } else if (dev->of_node)
          iommu_np = of_parse_phandle(dev->of_node, "iommus", 0);
      else
          return false;

      has_iommu = iommu_np &&
              of_device_is_compatible(iommu_np, "xen,grant-dma");
      of_node_put(iommu_np);

      return has_iommu;
}

bool xen_is_grant_dma_device(struct device *dev)
{
      /* XXX Handle only DT devices for now */
      return xen_is_dt_grant_dma_device(dev);
}



Ok. One difference, that I see from the previous, is that here you
don't use the dynamic interface when you access the dev->of_node
(of_node_get/of_node_put). Before, this was guarded through the
external xen_dt_get_node().

I suspect that the same needs to be done for the function
xen_grant_setup_dma_ops(). There, also, the code walks up to the root
bus twice.


Hmm, xen_dt_grant_init_backend_domid() should only be called if we deal
with device-tree based device.

I think you are completely right, thanks!

In order to address both your comments, I think I need to rework the
code (taking into the account your example with xen_is_dt_grant_dma_device()

provided a few letters ago and extrapolate this example to
xen_dt_grant_init_backend_domid()). Below the patch (not tested) which
seems to address both your comments (also I dropped

xen_dt_map_id() and squashed xen_dt_get_pci_host_node() with
xen_dt_get_node()).


diff --git a/drivers/xen/grant-dma-ops.c b/drivers/xen/grant-dma-ops.c
index daa525df7bdc..dae24dbd2ef7 100644
--- a/drivers/xen/grant-dma-ops.c
+++ b/drivers/xen/grant-dma-ops.c
@@ -10,6 +10,7 @@
 #include <linux/module.h>
 #include <linux/dma-map-ops.h>
 #include <linux/of.h>
+#include <linux/pci.h>
 #include <linux/pfn.h>
 #include <linux/xarray.h>
 #include <linux/virtio_anchor.h>
@@ -292,12 +293,33 @@ static const struct dma_map_ops xen_grant_dma_ops = {
        .dma_supported = xen_grant_dma_supported,
 };

-static bool xen_is_dt_grant_dma_device(struct device *dev)
+static struct device_node *xen_dt_get_node(struct device *dev)
 {
-       struct device_node *iommu_np;
+       if (dev_is_pci(dev)) {
+               struct pci_dev *pdev = to_pci_dev(dev);
+               struct pci_host_bridge *bridge =
pci_find_host_bridge(pdev->bus);
+
+               return of_node_get(bridge->dev.parent->of_node);
+       }
+
+       return of_node_get(dev->of_node);
+}
+

It does not seem right to me to expose the struct pci_host_bridge (which we would need to check if it is null by the way). I would prefer your version for the above i.e
static struct device_node *xen_dt_get_node(struct device *dev)
{
if (dev_is_pci(dev)) {
struct pci_bus *bus = to_pci_dev(dev)->bus;

/* Walk up to the root bus to look for PCI Host controller */
while (!pci_is_root_bus(bus))
bus = bus->parent;
return of_node_get(bus->bridge->parent->of_node);
}

return of_node_get(dev->of_node);
}

+static bool xen_is_dt_grant_dma_device(struct device *dev,
+                                       struct device_node *np)
+{
+       struct device_node *iommu_np = NULL;
        bool has_iommu;

-       iommu_np = of_parse_phandle(dev->of_node, "iommus", 0);
+       if (dev_is_pci(dev)) {
+               struct pci_dev *pdev = to_pci_dev(dev);
+               u32 rid = PCI_DEVID(pdev->bus->number, pdev->devfn);
+
+               if (of_map_id(np, rid, "iommu-map", "iommu-map-mask",
&iommu_np, NULL))
+                       return false;
+       } else
+               iommu_np = of_parse_phandle(np, "iommus", 0);
+
        has_iommu = iommu_np &&
                    of_device_is_compatible(iommu_np, "xen,grant-dma");
        of_node_put(iommu_np);
@@ -307,9 +329,17 @@ static bool xen_is_dt_grant_dma_device(struct
device *dev)

 bool xen_is_grant_dma_device(struct device *dev)
 {
+       struct device_node *np;
+
        /* XXX Handle only DT devices for now */
-       if (dev->of_node)
-               return xen_is_dt_grant_dma_device(dev);
+       np = xen_dt_get_node(dev);
+       if (np) {
+               bool ret;
+
+               ret = xen_is_dt_grant_dma_device(dev, np);
+               of_node_put(np);
+               return ret;
+       }

        return false;
 }

@@ -323,14 +353,26 @@ bool xen_virtio_mem_acc(struct virtio_device *dev)
 }

 static int xen_dt_grant_init_backend_domid(struct device *dev,
+                                          struct device_node *np,
                                           struct xen_grant_dma_data *data)
 {
-       struct of_phandle_args iommu_spec;
+       struct of_phandle_args iommu_spec = { .args_count = 1 };

-       if (of_parse_phandle_with_args(dev->of_node, "iommus",
"#iommu-cells",
-                       0, &iommu_spec)) {
-               dev_err(dev, "Cannot parse iommus property\n");
-               return -ESRCH;
+       if (dev_is_pci(dev)) {
+               struct pci_dev *pdev = to_pci_dev(dev);
+               u32 rid = PCI_DEVID(pdev->bus->number, pdev->devfn);
+
+               if (of_map_id(np, rid, "iommu-map", "iommu-map-mask",
&iommu_spec.np,
+                               iommu_spec.args)) {
+                       dev_err(dev, "Cannot translate ID\n");
+                       return -ESRCH;
+               }
+       } else {
+               if (of_parse_phandle_with_args(np, "iommus", "#iommu-cells",
+                               0, &iommu_spec)) {
+                       dev_err(dev, "Cannot parse iommus property\n");
+                       return -ESRCH;
+               }
        }


IMO, instead of passing struct xen_grant_dma_data *data to xen_dt_grant_init_backend_domid(), you could pass domid_t *backend_domid (e.g xen_dt_grant_init_backend_domid(dev, np, &data->backend_domid)).
I think this way the internal struct xen_grant_dma_datain is manipulated in a single place and xen_dt_grant_init_backend_domid() does not depend on it.

        if (!of_device_is_compatible(iommu_spec.np, "xen,grant-dma") ||
@@ -354,6 +396,7 @@ static int xen_dt_grant_init_backend_domid(struct
device *dev,
 void xen_grant_setup_dma_ops(struct device *dev)
 {
        struct xen_grant_dma_data *data;
+       struct device_node *np;

        data = find_xen_grant_dma_data(dev);
        if (data) {
@@ -365,8 +408,13 @@ void xen_grant_setup_dma_ops(struct device *dev)
        if (!data)
                goto err;

-       if (dev->of_node) {
-               if (xen_dt_grant_init_backend_domid(dev, data))
+       np = xen_dt_get_node(dev);
+       if (np) {
+               int ret;
+
+               ret = xen_dt_grant_init_backend_domid(dev, np, data);
+               of_node_put(np);
+               if (ret)
                        goto err;
        } else if (IS_ENABLED(CONFIG_XEN_VIRTIO_FORCE_GRANT)) {
                dev_info(dev, "Using dom0 as backend\n");


Does it look ok now?

That is what I had in mind. I do not know if Stefano agrees with this approach.




I 'm wondering ... is it possible for the host bridge device node to
have the iommus property set? meaning that all of its pci devs will
have the same backend?

Good question. I think, it is possible... This is technically what V1 is
doing.


Are you asking because to support "iommus" for PCI devices as well to
describe that use-case with all PCI devices having the same endpoint ID
(backend ID)?
If yes, I think, this could be still described by "iommu-map" property,
something like that (if we don't want to describe mapping for each PCI
device one-by-one).

iommu-map = <0x0 &iommu X 0x1>;

iommu-map-mask = <0x0>;

where the X is backend ID.


It feels to me that it should be written down somewhere that for
platform devices we expect "iommus" and for PCI devices we expect
"iommu-map/iommu-map-mask" to be present.

Thanks for the clarification, now I got it. Yes I agree.


ok, good







        return false;
    }
@@ -325,12 +377,19 @@ bool xen_virtio_mem_acc(struct virtio_device
*dev)
    static int xen_dt_grant_init_backend_domid(struct device *dev,
                           struct xen_grant_dma_data *data)
    {
-    struct of_phandle_args iommu_spec;
+    struct of_phandle_args iommu_spec = { .args_count = 1 };
    -    if (of_parse_phandle_with_args(dev->of_node, "iommus",
"#iommu-cells",
-            0, &iommu_spec)) {
-        dev_err(dev, "Cannot parse iommus property\n");
-        return -ESRCH;
+    if (dev_is_pci(dev)) {
+        if (xen_dt_map_id(dev, &iommu_spec.np, iommu_spec.args)) {
+            dev_err(dev, "Cannot translate ID\n");
+            return -ESRCH;
+        }
+    } else {
+        if (of_parse_phandle_with_args(dev->of_node, "iommus",
"#iommu-cells",
+                0, &iommu_spec)) {
+            dev_err(dev, "Cannot parse iommus property\n");
+            return -ESRCH;
+        }
        }
          if (!of_device_is_compatible(iommu_spec.np,
"xen,grant-dma") ||
@@ -354,6 +413,7 @@ static int
xen_dt_grant_init_backend_domid(struct device *dev,
    void xen_grant_setup_dma_ops(struct device *dev)
    {
        struct xen_grant_dma_data *data;
+    struct device_node *np;
          data = find_xen_grant_dma_data(dev);
        if (data) {
@@ -365,8 +425,13 @@ void xen_grant_setup_dma_ops(struct device
*dev)
        if (!data)
            goto err;
    -    if (dev->of_node) {
-        if (xen_dt_grant_init_backend_domid(dev, data))
+    np = xen_dt_get_node(dev);
+    if (np) {
+        int ret;
+
+        ret = xen_dt_grant_init_backend_domid(dev, data);
+        of_node_put(np);
+        if (ret)
                goto err;
        } else if (IS_ENABLED(CONFIG_XEN_VIRTIO_FORCE_GRANT)) {
            dev_info(dev, "Using dom0 as backend\n");
--
2.25.1






--
Xenia