Re: [PATCH V2] xen/virtio: Handle PCI devices which Host controller is described in DT

From: Oleksandr Tyshchenko
Date: Thu Oct 20 2022 - 16:08:19 EST



On 20.10.22 21:11, Xenia Ragiadakou wrote:

Hello Xenia


> On 10/20/22 17:12, Oleksandr Tyshchenko wrote:
>>
>> On 20.10.22 11:24, Xenia Ragiadakou wrote:
>>> On 10/19/22 22:41, Oleksandr Tyshchenko wrote:
>>>
>>> Hi Oleksandr
>>
>>
>> Hello Xenia
>>
>>
>>>
>>>>
>>>> On 19.10.22 11:47, Xenia Ragiadakou wrote:
>>>>
>>>> Hello Xenia
>>>>
>>>>> On 10/19/22 03:58, Stefano Stabellini wrote:
>>>>>> On Sat, 15 Oct 2022, Oleksandr Tyshchenko wrote:
>>>>>>> From: Oleksandr Tyshchenko <oleksandr_tyshchenko@xxxxxxxx>
>>>>>>>
>>>>>>> Use the same "xen-grant-dma" device concept for the PCI devices
>>>>>>> behind device-tree based PCI Host controller, but with one
>>>>>>> modification.
>>>>>>> Unlike for platform devices, we cannot use generic IOMMU bindings
>>>>>>> (iommus property), as we need to support more flexible
>>>>>>> configuration.
>>>>>>> The problem is that PCI devices under the single PCI Host
>>>>>>> controller
>>>>>>> may have the backends running in different Xen domains and thus
>>>>>>> have
>>>>>>> different endpoints ID (backend domains ID).
>>>>>>>
>>>>>>> So use generic PCI-IOMMU bindings instead (iommu-map/iommu-map-mask
>>>>>>> properties) which allows us to describe relationship between PCI
>>>>>>> devices and backend domains ID properly.
>>>>>>>
>>>>>>> Signed-off-by: Oleksandr Tyshchenko <oleksandr_tyshchenko@xxxxxxxx>
>>>>>>
>>>>>> Now that I understood the approach and the reasons for it, I can
>>>>>> review
>>>>>> the patch :-)
>>>>>>
>>>>>> Please add an example of the bindings in the commit message.
>>>>>>
>>>>>>
>>>>>>> ---
>>>>>>> Slightly RFC. This is needed to support Xen grant mappings for
>>>>>>> virtio-pci devices
>>>>>>> on Arm at some point in the future. The Xen toolstack side is not
>>>>>>> completely ready yet.
>>>>>>> Here, for PCI devices we use more flexible way to pass backend
>>>>>>> domid
>>>>>>> to the guest
>>>>>>> than for platform devices.
>>>>>>>
>>>>>>> Changes V1 -> V2:
>>>>>>>       - update commit description
>>>>>>>       - rebase
>>>>>>>       - rework to use generic PCI-IOMMU bindings instead of generic
>>>>>>> IOMMU bindings
>>>>>>>
>>>>>>> Previous discussion is at:
>>>>>>> https://urldefense.com/v3/__https://lore.kernel.org/xen-devel/20221006174804.2003029-1-olekstysh@xxxxxxxxx/__;!!GF_29dbcQIUBPA!3-vq7Edm3XfKtD5cnNjnOzDQvuo_XrhJ73yH-nPfqOkGGU0IjLG7R7MR_nAJCAPeOutHRLT44wKYwQwz3SauACie_ZAy$
>>>>>>>
>>>>>>>
>>>>>>> [lore[.]kernel[.]org]
>>>>>>>
>>>>>>> Based on:
>>>>>>> https://urldefense.com/v3/__https://git.kernel.org/pub/scm/linux/kernel/git/xen/tip.git/log/?h=for-linus-6.1__;!!GF_29dbcQIUBPA!3-vq7Edm3XfKtD5cnNjnOzDQvuo_XrhJ73yH-nPfqOkGGU0IjLG7R7MR_nAJCAPeOutHRLT44wKYwQwz3SauAEnMDHAq$
>>>>>>>
>>>>>>>
>>>>>>> [git[.]kernel[.]org]
>>>>>>> ---
>>>>>>>     drivers/xen/grant-dma-ops.c | 87
>>>>>>> ++++++++++++++++++++++++++++++++-----
>>>>>>>     1 file changed, 76 insertions(+), 11 deletions(-)
>>>>>>>
>>>>>>> diff --git a/drivers/xen/grant-dma-ops.c
>>>>>>> b/drivers/xen/grant-dma-ops.c
>>>>>>> index daa525df7bdc..b79d9d6ce154 100644
>>>>>>> --- a/drivers/xen/grant-dma-ops.c
>>>>>>> +++ b/drivers/xen/grant-dma-ops.c
>>>>>>> @@ -10,6 +10,7 @@
>>>>>>>     #include <linux/module.h>
>>>>>>>     #include <linux/dma-map-ops.h>
>>>>>>>     #include <linux/of.h>
>>>>>>> +#include <linux/pci.h>
>>>>>>>     #include <linux/pfn.h>
>>>>>>>     #include <linux/xarray.h>
>>>>>>>     #include <linux/virtio_anchor.h>
>>>>>>> @@ -292,12 +293,55 @@ static const struct dma_map_ops
>>>>>>> xen_grant_dma_ops = {
>>>>>>>         .dma_supported = xen_grant_dma_supported,
>>>>>>>     };
>>>>>>>     +static struct device_node *xen_dt_get_pci_host_node(struct
>>>>>>> device
>>>>>>> *dev)
>>>>>>> +{
>>>>>>> +    struct pci_dev *pdev = to_pci_dev(dev);
>>>>>>> +    struct pci_bus *bus = pdev->bus;
>>>>>>> +
>>>>>>> +    /* Walk up to the root bus to look for PCI Host controller */
>>>>>>> +    while (!pci_is_root_bus(bus))
>>>>>>> +        bus = bus->parent;
>>>>>>> +
>>>>>>> +    return of_node_get(bus->bridge->parent->of_node);
>>>>>>> +}
>>>>>>
>>>>>> It seems silly that we need to walk the hierachy that way, but I
>>>>>> couldn't find another way to do it
>>>>>>
>>>>>>
>>>>>>> +static struct device_node *xen_dt_get_node(struct device *dev)
>>>>>>> +{
>>>>>>> +    if (dev_is_pci(dev))
>>>>>>> +        return xen_dt_get_pci_host_node(dev);
>>>>>>> +
>>>>>>> +    return of_node_get(dev->of_node);
>>>>>>> +}
>>>>>>> +
>>>>>>> +static int xen_dt_map_id(struct device *dev, struct device_node
>>>>>>> **iommu_np,
>>>>>>> +             u32 *sid)
>>>>>>> +{
>>>>>>> +    struct pci_dev *pdev = to_pci_dev(dev);
>>>>>>> +    u32 rid = PCI_DEVID(pdev->bus->number, pdev->devfn);
>>>>>>> +    struct device_node *host_np;
>>>>>>> +    int ret;
>>>>>>> +
>>>>>>> +    host_np = xen_dt_get_pci_host_node(dev);
>>>>>>> +    if (!host_np)
>>>>>>> +        return -ENODEV;
>>>>>>> +
>>>>>>> +    ret = of_map_id(host_np, rid, "iommu-map", "iommu-map-mask",
>>>>>>> iommu_np, sid);
>>>>>>> +    of_node_put(host_np);
>>>>>>> +    return ret;
>>>>>>> +}
>>>>>>> +
>>>>>>>     static bool xen_is_dt_grant_dma_device(struct device *dev)
>>>>>>>     {
>>>>>>> -    struct device_node *iommu_np;
>>>>>>> +    struct device_node *iommu_np = NULL;
>>>>>>>         bool has_iommu;
>>>>>>>     -    iommu_np = of_parse_phandle(dev->of_node, "iommus", 0);
>>>>>>> +    if (dev_is_pci(dev)) {
>>>>>>> +        if (xen_dt_map_id(dev, &iommu_np, NULL))
>>>>>>> +            return false;
>>>>>>> +    } else
>>>>>>> +        iommu_np = of_parse_phandle(dev->of_node, "iommus", 0);
>>>>>>> +
>>>>>>>         has_iommu = iommu_np &&
>>>>>>>                 of_device_is_compatible(iommu_np, "xen,grant-dma");
>>>>>>>         of_node_put(iommu_np);
>>>>>>> @@ -307,9 +351,17 @@ static bool xen_is_dt_grant_dma_device(struct
>>>>>>> device *dev)
>>>>>>>       bool xen_is_grant_dma_device(struct device *dev)
>>>>>>>     {
>>>>>>> +    struct device_node *np;
>>>>>>> +
>>>>>>>         /* XXX Handle only DT devices for now */
>>>>>>> -    if (dev->of_node)
>>>>>>> -        return xen_is_dt_grant_dma_device(dev);
>>>>>>> +    np = xen_dt_get_node(dev);
>>>>>>> +    if (np) {
>>>>>>> +        bool ret;
>>>>>>> +
>>>>>>> +        ret = xen_is_dt_grant_dma_device(dev);
>>>>>>> +        of_node_put(np);
>>>>>>> +        return ret;
>>>>>>> +    }
>>>>>>
>>>>>> We don't need to walk the PCI hierachy twice. Maybe we can add the
>>>>>> of_node check directly to xen_is_dt_grant_dma_device?
>>>>>>
>>>>>
>>>>> I think in general we could pass directly the host bridge device if
>>>>> dev_is_pci(dev) (which can be retrieved with
>>>>> pci_get_host_bridge_device(to_pci_dev(dev), and after done with it
>>>>> pci_put_host_bridge_device(phb)).
>>>>> So that, xen_is_dt_grant_dma_device() and
>>>>> xen_dt_grant_init_backend_domid() won't need to discover it
>>>>> themselves.
>>>>> This will simplify the code.
>>>>
>>>>
>>>> Good point. I have some remark. Can we use pci_find_host_bridge()
>>>> instead? This way we don't have to add #include "../pci/pci.h", and
>>>> have
>>>> to drop reference afterwards.
>>>>
>>>> With that xen_dt_get_pci_host_node() will became the following:
>>>>
>>>>
>>>> static struct device_node *xen_dt_get_pci_host_node(struct device
>>>> *dev)
>>>> {
>>>>        struct pci_host_bridge *bridge =
>>>> pci_find_host_bridge(to_pci_dev(dev)->bus);
>>>>
>>>>        return of_node_get(bridge->dev.parent->of_node);
>>>> }
>>>>
>>>
>>> You are right. I prefer your version instead of the above.
>>
>>
>> ok, thanks
>>
>>
>>>
>>>
>>>>
>>>> With Stefano's suggestion, we won't walk the PCI hierarchy twice when
>>>> executing xen_is_grant_dma_device() for PCI device:
>>>>
>>>> xen_is_grant_dma_device() -> xen_is_dt_grant_dma_device() ->
>>>> xen_dt_map_id() -> xen_dt_get_pci_host_node()
>>>>
>>>>
>>>> What do you think?
>>>>
>>>
>>> I was thinking passing the device_node along with the device in the
>>> function arguments. More specifically, of doing this (not tested, just
>>> an idea):
>>>
>>> bool xen_is_grant_dma_device(struct device *dev)
>>> {
>>>      struct device_node *np;
>>>      bool has_iommu = false;
>>>
>>>      /* XXX Handle only DT devices for now */
>>>      np = xen_dt_get_node(dev);
>>>      if (np)
>>>          has_iommu = xen_is_dt_grant_dma_device(dev, np);
>>>      of_node_put(np);
>>>      return has_iommu;
>>> }
>>>
>>> static bool xen_is_dt_grant_dma_device(struct device *dev,
>>>                                         struct device_node *np)
>>> {
>>>      struct device_node *iommu_np = NULL;
>>>      bool has_iommu;
>>>
>>>      if (dev_is_pci(dev)) {
>>>          struct pci_dev *pdev = to_pci_dev(dev);
>>>      u32 id = PCI_DEVID(pdev->bus->number, pdev->devfn);
>>>          of_map_id(np, id, "iommu-map", "iommu-map-mask", &iommu_np,
>>> NULL);
>>>      } else {
>>>          iommu_np = of_parse_phandle(np, "iommus", 0);
>>>      }
>>>
>>>      has_iommu = iommu_np && of_device_is_compatible(iommu_np,
>>> "xen,grant-dma");
>>>      of_node_put(iommu_np);
>>>
>>>      return has_iommu;
>>> }
>>
>>
>> I got it.
>>
>> xen_is_grant_dma_device() for V3 won't call xen_dt_get_node(), but call
>> xen_is_dt_grant_dma_device() directly.
>>
>> static bool xen_is_dt_grant_dma_device(struct device *dev)
>> {
>>       struct device_node *iommu_np = NULL;
>>       bool has_iommu;
>>
>>       if (dev_is_pci(dev)) {
>>           if (xen_dt_map_id(dev, &iommu_np, NULL))
>>               return false;
>>       } else if (dev->of_node)
>>           iommu_np = of_parse_phandle(dev->of_node, "iommus", 0);
>>       else
>>           return false;
>>
>>       has_iommu = iommu_np &&
>>               of_device_is_compatible(iommu_np, "xen,grant-dma");
>>       of_node_put(iommu_np);
>>
>>       return has_iommu;
>> }
>>
>> bool xen_is_grant_dma_device(struct device *dev)
>> {
>>       /* XXX Handle only DT devices for now */
>>       return xen_is_dt_grant_dma_device(dev);
>> }
>>
>>
>
> Ok. One difference, that I see from the previous, is that here you
> don't use the dynamic interface when you access the dev->of_node
> (of_node_get/of_node_put). Before, this was guarded through the
> external xen_dt_get_node().
>
> I suspect that the same needs to be done for the function
> xen_grant_setup_dma_ops(). There, also, the code walks up to the root
> bus twice.


Hmm, xen_dt_grant_init_backend_domid() should only be called if we deal
with device-tree based device.

I think you are completely right, thanks!

In order to address both your comments, I think I need to rework the
code (taking into the account your example with xen_is_dt_grant_dma_device()

provided a few letters ago and extrapolate this example to
xen_dt_grant_init_backend_domid()). Below the patch (not tested) which
seems to address both your comments (also I dropped

xen_dt_map_id() and squashed xen_dt_get_pci_host_node() with
xen_dt_get_node()).


diff --git a/drivers/xen/grant-dma-ops.c b/drivers/xen/grant-dma-ops.c
index daa525df7bdc..dae24dbd2ef7 100644
--- a/drivers/xen/grant-dma-ops.c
+++ b/drivers/xen/grant-dma-ops.c
@@ -10,6 +10,7 @@
 #include <linux/module.h>
 #include <linux/dma-map-ops.h>
 #include <linux/of.h>
+#include <linux/pci.h>
 #include <linux/pfn.h>
 #include <linux/xarray.h>
 #include <linux/virtio_anchor.h>
@@ -292,12 +293,33 @@ static const struct dma_map_ops xen_grant_dma_ops = {
        .dma_supported = xen_grant_dma_supported,
 };

-static bool xen_is_dt_grant_dma_device(struct device *dev)
+static struct device_node *xen_dt_get_node(struct device *dev)
 {
-       struct device_node *iommu_np;
+       if (dev_is_pci(dev)) {
+               struct pci_dev *pdev = to_pci_dev(dev);
+               struct pci_host_bridge *bridge =
pci_find_host_bridge(pdev->bus);
+
+               return of_node_get(bridge->dev.parent->of_node);
+       }
+
+       return of_node_get(dev->of_node);
+}
+
+static bool xen_is_dt_grant_dma_device(struct device *dev,
+                                       struct device_node *np)
+{
+       struct device_node *iommu_np = NULL;
        bool has_iommu;

-       iommu_np = of_parse_phandle(dev->of_node, "iommus", 0);
+       if (dev_is_pci(dev)) {
+               struct pci_dev *pdev = to_pci_dev(dev);
+               u32 rid = PCI_DEVID(pdev->bus->number, pdev->devfn);
+
+               if (of_map_id(np, rid, "iommu-map", "iommu-map-mask",
&iommu_np, NULL))
+                       return false;
+       } else
+               iommu_np = of_parse_phandle(np, "iommus", 0);
+
        has_iommu = iommu_np &&
                    of_device_is_compatible(iommu_np, "xen,grant-dma");
        of_node_put(iommu_np);
@@ -307,9 +329,17 @@ static bool xen_is_dt_grant_dma_device(struct
device *dev)

 bool xen_is_grant_dma_device(struct device *dev)
 {
+       struct device_node *np;
+
        /* XXX Handle only DT devices for now */
-       if (dev->of_node)
-               return xen_is_dt_grant_dma_device(dev);
+       np = xen_dt_get_node(dev);
+       if (np) {
+               bool ret;
+
+               ret = xen_is_dt_grant_dma_device(dev, np);
+               of_node_put(np);
+               return ret;
+       }

        return false;
 }
@@ -323,14 +353,26 @@ bool xen_virtio_mem_acc(struct virtio_device *dev)
 }

 static int xen_dt_grant_init_backend_domid(struct device *dev,
+                                          struct device_node *np,
                                           struct xen_grant_dma_data *data)
 {
-       struct of_phandle_args iommu_spec;
+       struct of_phandle_args iommu_spec = { .args_count = 1 };

-       if (of_parse_phandle_with_args(dev->of_node, "iommus",
"#iommu-cells",
-                       0, &iommu_spec)) {
-               dev_err(dev, "Cannot parse iommus property\n");
-               return -ESRCH;
+       if (dev_is_pci(dev)) {
+               struct pci_dev *pdev = to_pci_dev(dev);
+               u32 rid = PCI_DEVID(pdev->bus->number, pdev->devfn);
+
+               if (of_map_id(np, rid, "iommu-map", "iommu-map-mask",
&iommu_spec.np,
+                               iommu_spec.args)) {
+                       dev_err(dev, "Cannot translate ID\n");
+                       return -ESRCH;
+               }
+       } else {
+               if (of_parse_phandle_with_args(np, "iommus", "#iommu-cells",
+                               0, &iommu_spec)) {
+                       dev_err(dev, "Cannot parse iommus property\n");
+                       return -ESRCH;
+               }
        }

        if (!of_device_is_compatible(iommu_spec.np, "xen,grant-dma") ||
@@ -354,6 +396,7 @@ static int xen_dt_grant_init_backend_domid(struct
device *dev,
 void xen_grant_setup_dma_ops(struct device *dev)
 {
        struct xen_grant_dma_data *data;
+       struct device_node *np;

        data = find_xen_grant_dma_data(dev);
        if (data) {
@@ -365,8 +408,13 @@ void xen_grant_setup_dma_ops(struct device *dev)
        if (!data)
                goto err;

-       if (dev->of_node) {
-               if (xen_dt_grant_init_backend_domid(dev, data))
+       np = xen_dt_get_node(dev);
+       if (np) {
+               int ret;
+
+               ret = xen_dt_grant_init_backend_domid(dev, np, data);
+               of_node_put(np);
+               if (ret)
                        goto err;
        } else if (IS_ENABLED(CONFIG_XEN_VIRTIO_FORCE_GRANT)) {
                dev_info(dev, "Using dom0 as backend\n");


Does it look ok now?


>
>
>>>
>>> I 'm wondering ... is it possible for the host bridge device node to
>>> have the iommus property set? meaning that all of its pci devs will
>>> have the same backend?
>>
>> Good question. I think, it is possible... This is technically what V1 is
>> doing.
>>
>>
>> Are you asking because to support "iommus" for PCI devices as well to
>> describe that use-case with all PCI devices having the same endpoint ID
>> (backend ID)?
>> If yes, I think, this could be still described by "iommu-map" property,
>> something like that (if we don't want to describe mapping for each PCI
>> device one-by-one).
>>
>> iommu-map = <0x0 &iommu X 0x1>;
>>
>> iommu-map-mask = <0x0>;
>>
>> where the X is backend ID.
>>
>>
>> It feels to me that it should be written down somewhere that for
>> platform devices we expect "iommus" and for PCI devices we expect
>> "iommu-map/iommu-map-mask" to be present.
>
> Thanks for the clarification, now I got it. Yes I agree.


ok, good


>
>>>
>>>
>>>>>
>>>>>>
>>>>>>>         return false;
>>>>>>>     }
>>>>>>> @@ -325,12 +377,19 @@ bool xen_virtio_mem_acc(struct virtio_device
>>>>>>> *dev)
>>>>>>>     static int xen_dt_grant_init_backend_domid(struct device *dev,
>>>>>>>                            struct xen_grant_dma_data *data)
>>>>>>>     {
>>>>>>> -    struct of_phandle_args iommu_spec;
>>>>>>> +    struct of_phandle_args iommu_spec = { .args_count = 1 };
>>>>>>>     -    if (of_parse_phandle_with_args(dev->of_node, "iommus",
>>>>>>> "#iommu-cells",
>>>>>>> -            0, &iommu_spec)) {
>>>>>>> -        dev_err(dev, "Cannot parse iommus property\n");
>>>>>>> -        return -ESRCH;
>>>>>>> +    if (dev_is_pci(dev)) {
>>>>>>> +        if (xen_dt_map_id(dev, &iommu_spec.np, iommu_spec.args)) {
>>>>>>> +            dev_err(dev, "Cannot translate ID\n");
>>>>>>> +            return -ESRCH;
>>>>>>> +        }
>>>>>>> +    } else {
>>>>>>> +        if (of_parse_phandle_with_args(dev->of_node, "iommus",
>>>>>>> "#iommu-cells",
>>>>>>> +                0, &iommu_spec)) {
>>>>>>> +            dev_err(dev, "Cannot parse iommus property\n");
>>>>>>> +            return -ESRCH;
>>>>>>> +        }
>>>>>>>         }
>>>>>>>           if (!of_device_is_compatible(iommu_spec.np,
>>>>>>> "xen,grant-dma") ||
>>>>>>> @@ -354,6 +413,7 @@ static int
>>>>>>> xen_dt_grant_init_backend_domid(struct device *dev,
>>>>>>>     void xen_grant_setup_dma_ops(struct device *dev)
>>>>>>>     {
>>>>>>>         struct xen_grant_dma_data *data;
>>>>>>> +    struct device_node *np;
>>>>>>>           data = find_xen_grant_dma_data(dev);
>>>>>>>         if (data) {
>>>>>>> @@ -365,8 +425,13 @@ void xen_grant_setup_dma_ops(struct device
>>>>>>> *dev)
>>>>>>>         if (!data)
>>>>>>>             goto err;
>>>>>>>     -    if (dev->of_node) {
>>>>>>> -        if (xen_dt_grant_init_backend_domid(dev, data))
>>>>>>> +    np = xen_dt_get_node(dev);
>>>>>>> +    if (np) {
>>>>>>> +        int ret;
>>>>>>> +
>>>>>>> +        ret = xen_dt_grant_init_backend_domid(dev, data);
>>>>>>> +        of_node_put(np);
>>>>>>> +        if (ret)
>>>>>>>                 goto err;
>>>>>>>         } else if (IS_ENABLED(CONFIG_XEN_VIRTIO_FORCE_GRANT)) {
>>>>>>>             dev_info(dev, "Using dom0 as backend\n");
>>>>>>> --
>>>>>>> 2.25.1
>>>>>>>
>>>>>>
>>>>>
>>>
>
--
Regards,

Oleksandr Tyshchenko