Re: [PATCH] PCI/IOV: Decrease VF memory BAR size to save host memory occupied by PTEs

From: Bjorn Helgaas
Date: Wed Nov 09 2022 - 18:46:58 EST


On Tue, Oct 11, 2022 at 07:23:25PM +0800, Rui Ma wrote:
> In some certain SR-IOV scene, when the device physical space(such as Video
> RAM)is fixed, as the number of VFs increases, some device driver may decrease
> actual BAR memory space used by each VF. However, the VF BAR memory mapping is
> always based on the usual BAR probing algorithm in PCIe spec. So do not map this
> unneeded memory can save host memory which occupied by PTEs. Although each PTE
> only occupies a few bytes of space on its own, a large number of PTEs can still
> take up a lot of space.

Dropping this for now until we resolve whether this is working around
a KVM bug as Alex suggests:

https://lore.kernel.org/r/BL1PR12MB51446437265DD1E8AA0794E9F7239@xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx

> Signed-off-by: Rui Ma <Rui.Ma@xxxxxxx>
> ---
> drivers/pci/iov.c | 14 ++++++++++++--
> drivers/pci/pci.h | 15 +++++++++++++++
> drivers/pci/quirks.c | 37 +++++++++++++++++++++++++++++++++++++
> 3 files changed, 64 insertions(+), 2 deletions(-)
>
> diff --git a/drivers/pci/iov.c b/drivers/pci/iov.c
> index 952217572113..92a69e51d85c 100644
> --- a/drivers/pci/iov.c
> +++ b/drivers/pci/iov.c
> @@ -296,6 +296,14 @@ int pci_iov_add_virtfn(struct pci_dev *dev, int id)
> struct pci_sriov *iov = dev->sriov;
> struct pci_bus *bus;
>
> + /*
> + * Some SR-IOV device's BAR map range is larger than they can actually use.
> + * This extra BAR space occupy too much reverse mapping size(physical page
> + * back to the PTEs). So add a divisor shift parameter to resize the request
> + * resource of VF according to num of VFs.
> + */
> + u16 shift = 1;
> +
> bus = virtfn_add_bus(dev->bus, pci_iov_virtfn_bus(dev, id));
> if (!bus)
> goto failed;
> @@ -328,8 +336,10 @@ int pci_iov_add_virtfn(struct pci_dev *dev, int id)
> virtfn->resource[i].name = pci_name(virtfn);
> virtfn->resource[i].flags = res->flags;
> size = pci_iov_resource_size(dev, i + PCI_IOV_RESOURCES);
> + shift = 1;
> + shift = virtfn_get_shift(dev, iov->num_VFs, i);
> virtfn->resource[i].start = res->start + size * id;
> - virtfn->resource[i].end = virtfn->resource[i].start + size - 1;
> + virtfn->resource[i].end = virtfn->resource[i].start + (size >> (shift - 1)) - 1;
> rc = request_resource(res, &virtfn->resource[i]);
> BUG_ON(rc);
> }
> @@ -680,12 +690,12 @@ static int sriov_enable(struct pci_dev *dev, int nr_virtfn)
> msleep(100);
> pci_cfg_access_unlock(dev);
>
> + iov->num_VFs = nr_virtfn;
> rc = sriov_add_vfs(dev, initial);
> if (rc)
> goto err_pcibios;
>
> kobject_uevent(&dev->dev.kobj, KOBJ_CHANGE);
> - iov->num_VFs = nr_virtfn;
>
> return 0;
>
> diff --git a/drivers/pci/pci.h b/drivers/pci/pci.h
> index 3d60cabde1a1..befc67a280eb 100644
> --- a/drivers/pci/pci.h
> +++ b/drivers/pci/pci.h
> @@ -603,6 +603,21 @@ static inline int pci_dev_specific_reset(struct pci_dev *dev, bool probe)
> }
> #endif
>
> +struct virtfn_get_shift_methods {
> + u16 vendor;
> + u16 device;
> + u16 (*get_shift)(struct pci_dev *dev, u16 arg, int arg2);
> +};
> +
> +#ifdef CONFIG_PCI_QUIRKS
> +u16 virtfn_get_shift(struct pci_dev *dev, u16 arg1, int arg2);
> +#else
> +static inline u16 virtfn_get_shift(struct pci_dev *dev, u16 arg1, int arg2)
> +{
> + return (u16)1;
> +}
> +#endif
> +
> #if defined(CONFIG_PCI_QUIRKS) && defined(CONFIG_ARM64)
> int acpi_get_rc_resources(struct device *dev, const char *hid, u16 segment,
> struct resource *res);
> diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c
> index da829274fc66..3466738c1c54 100644
> --- a/drivers/pci/quirks.c
> +++ b/drivers/pci/quirks.c
> @@ -4085,6 +4085,43 @@ int pci_dev_specific_reset(struct pci_dev *dev, bool probe)
> return -ENOTTY;
> }
>
> +static u16 resize_vf_bar0(struct pci_dev *dev, u16 num_VFs, int bar_num)
> +{
> + u16 shift = 1;
> +
> + if (bar_num == 0) {
> + while ((1 << shift) <= num_VFs)
> + shift += 1;
> + }
> + pci_info(dev, "with %d VFs, VF BAR%d get shift: %d\n", num_VFs, bar_num, shift);
> + return shift;
> +}
> +
> +static const struct virtfn_get_shift_methods virtfn_get_shift_methods[] = {
> + { PCI_VENDOR_ID_ATI, 0x73a1, resize_vf_bar0},
> + { 0 }
> +};
> +
> +/*
> + * Get shift num to calculate SR-IOV device BAR. Sometimes the BAR size for
> + * SR-IOV device is too large and we want to calculate the size to define
> + * the end of virtfn.
> + */
> +u16 virtfn_get_shift(struct pci_dev *dev, u16 arg1, int arg2)
> +{
> + const struct virtfn_get_shift_methods *i;
> +
> + for (i = virtfn_get_shift_methods; i->get_shift; i++) {
> + if ((i->vendor == dev->vendor ||
> + i->vendor == (u16)PCI_ANY_ID) &&
> + (i->device == dev->device ||
> + i->device == (u16)PCI_ANY_ID))
> + return i->get_shift(dev, arg1, arg2);
> + }
> +
> + return (u16)1;
> +}
> +
> static void quirk_dma_func0_alias(struct pci_dev *dev)
> {
> if (PCI_FUNC(dev->devfn) != 0)
> --
> 2.25.1
>