[PATCH net-next 10/11] sfc: implement vdpa config_ops for dma operations

From: Gautam Dawar
Date: Wed Dec 07 2022 - 09:58:58 EST


Although sfc uses the platform IOMMU but it still
implements the DMA config operations to deal with
possible IOVA overlap with the MCDI DMA buffer and
relocates the latter if such overlap is detected.

Signed-off-by: Gautam Dawar <gautam.dawar@xxxxxxx>
---
drivers/net/ethernet/sfc/ef100_vdpa.c | 140 ++++++++++++++++++++++
drivers/net/ethernet/sfc/ef100_vdpa.h | 3 +
drivers/net/ethernet/sfc/ef100_vdpa_ops.c | 111 +++++++++++++++++
drivers/net/ethernet/sfc/net_driver.h | 12 ++
4 files changed, 266 insertions(+)

diff --git a/drivers/net/ethernet/sfc/ef100_vdpa.c b/drivers/net/ethernet/sfc/ef100_vdpa.c
index b9368eb1acd5..16681d164fd1 100644
--- a/drivers/net/ethernet/sfc/ef100_vdpa.c
+++ b/drivers/net/ethernet/sfc/ef100_vdpa.c
@@ -309,6 +309,140 @@ static int vdpa_update_domain(struct ef100_vdpa_nic *vdpa_nic)
vdpa_nic->geo_aper_end + 1, 0);
}

+static int ef100_vdpa_alloc_buffer(struct efx_nic *efx, struct efx_buffer *buf)
+{
+ struct ef100_vdpa_nic *vdpa_nic = efx->vdpa_nic;
+ struct device *dev = &vdpa_nic->vdpa_dev.dev;
+ int rc;
+
+ buf->addr = kzalloc(buf->len, GFP_KERNEL);
+ if (!buf->addr)
+ return -ENOMEM;
+
+ rc = iommu_map(vdpa_nic->domain, buf->dma_addr,
+ virt_to_phys(buf->addr), buf->len,
+ IOMMU_READ | IOMMU_WRITE | IOMMU_CACHE);
+ if (rc)
+ dev_err(dev, "iommu_map failed, rc: %d\n", rc);
+
+ return rc;
+}
+
+static void ef100_vdpa_free_buffer(struct ef100_vdpa_nic *vdpa_nic,
+ struct efx_buffer *buf)
+{
+ struct device *dev = &vdpa_nic->vdpa_dev.dev;
+ int rc;
+
+ rc = iommu_unmap(vdpa_nic->domain, buf->dma_addr, buf->len);
+ if (rc < 0)
+ dev_err(dev, "iommu_unmap failed, rc: %d\n", rc);
+
+ kfree(buf->addr);
+}
+
+int ef100_setup_ef100_mcdi_buffer(struct ef100_vdpa_nic *vdpa_nic)
+{
+ struct efx_nic *efx = vdpa_nic->efx;
+ struct ef100_nic_data *nic_data;
+ struct efx_mcdi_iface *mcdi;
+ struct efx_buffer mcdi_buf;
+ enum efx_mcdi_mode mode;
+ struct device *dev;
+ int rc;
+
+ /* Switch to poll mode MCDI mode */
+ nic_data = efx->nic_data;
+ dev = &vdpa_nic->vdpa_dev.dev;
+ mcdi = efx_mcdi(efx);
+ mode = mcdi->mode;
+ efx_mcdi_mode_poll(efx);
+ efx_mcdi_flush_async(efx);
+
+ /* First, allocate the MCDI buffer for EF100 mode */
+ rc = efx_nic_alloc_buffer(efx, &mcdi_buf,
+ MCDI_BUF_LEN, GFP_KERNEL);
+ if (rc) {
+ dev_err(dev, "nic alloc buf failed, rc: %d\n", rc);
+ goto restore_mode;
+ }
+
+ /* unmap and free the vDPA MCDI buffer now */
+ ef100_vdpa_free_buffer(vdpa_nic, &nic_data->mcdi_buf);
+ memcpy(&nic_data->mcdi_buf, &mcdi_buf, sizeof(struct efx_buffer));
+ efx->mcdi_buf_mode = EFX_BUF_MODE_EF100;
+
+restore_mode:
+ if (mode == MCDI_MODE_EVENTS)
+ efx_mcdi_mode_event(efx);
+
+ return rc;
+}
+
+int ef100_setup_vdpa_mcdi_buffer(struct efx_nic *efx, u64 mcdi_iova)
+{
+ struct ef100_nic_data *nic_data = efx->nic_data;
+ struct efx_mcdi_iface *mcdi = efx_mcdi(efx);
+ enum efx_mcdi_mode mode = mcdi->mode;
+ struct efx_buffer mcdi_buf;
+ int rc;
+
+ efx_mcdi_mode_poll(efx);
+ efx_mcdi_flush_async(efx);
+
+ /* First, prepare the MCDI buffer for vDPA mode */
+ mcdi_buf.dma_addr = mcdi_iova;
+ /* iommu_map requires page aligned memory */
+ mcdi_buf.len = PAGE_ALIGN(MCDI_BUF_LEN);
+ rc = ef100_vdpa_alloc_buffer(efx, &mcdi_buf);
+ if (rc) {
+ pci_err(efx->pci_dev, "alloc vdpa buf failed, rc: %d\n", rc);
+ goto restore_mode;
+ }
+
+ /* All set-up, free the EF100 MCDI buffer now */
+ efx_nic_free_buffer(efx, &nic_data->mcdi_buf);
+ memcpy(&nic_data->mcdi_buf, &mcdi_buf, sizeof(struct efx_buffer));
+ efx->mcdi_buf_mode = EFX_BUF_MODE_VDPA;
+
+restore_mode:
+ if (mode == MCDI_MODE_EVENTS)
+ efx_mcdi_mode_event(efx);
+ return rc;
+}
+
+int ef100_remap_vdpa_mcdi_buffer(struct efx_nic *efx, u64 mcdi_iova)
+{
+ struct ef100_nic_data *nic_data = efx->nic_data;
+ struct ef100_vdpa_nic *vdpa_nic = efx->vdpa_nic;
+ struct efx_mcdi_iface *mcdi = efx_mcdi(efx);
+ struct efx_buffer *mcdi_buf;
+ int rc;
+
+ mcdi_buf = &nic_data->mcdi_buf;
+ spin_lock_bh(&mcdi->iface_lock);
+
+ rc = iommu_unmap(vdpa_nic->domain, mcdi_buf->dma_addr, mcdi_buf->len);
+ if (rc < 0) {
+ pci_err(efx->pci_dev, "iommu_unmap failed, rc: %d\n", rc);
+ goto out;
+ }
+
+ rc = iommu_map(vdpa_nic->domain, mcdi_iova,
+ virt_to_phys(mcdi_buf->addr),
+ mcdi_buf->len,
+ IOMMU_READ | IOMMU_WRITE | IOMMU_CACHE);
+ if (rc) {
+ pci_err(efx->pci_dev, "iommu_map failed, rc: %d\n", rc);
+ goto out;
+ }
+
+ mcdi_buf->dma_addr = mcdi_iova;
+out:
+ spin_unlock_bh(&mcdi->iface_lock);
+ return rc;
+}
+
static struct ef100_vdpa_nic *ef100_vdpa_create(struct efx_nic *efx,
const char *dev_name,
enum ef100_vdpa_class dev_type,
@@ -391,6 +525,12 @@ static struct ef100_vdpa_nic *ef100_vdpa_create(struct efx_nic *efx,
goto err_put_device;
}

+ rc = ef100_setup_vdpa_mcdi_buffer(efx, EF100_VDPA_IOVA_BASE_ADDR);
+ if (rc) {
+ pci_err(efx->pci_dev, "realloc mcdi failed, err: %d\n", rc);
+ goto err_put_device;
+ }
+
rc = get_net_config(vdpa_nic);
if (rc)
goto err_put_device;
diff --git a/drivers/net/ethernet/sfc/ef100_vdpa.h b/drivers/net/ethernet/sfc/ef100_vdpa.h
index c3c77029973d..f15d8739dcde 100644
--- a/drivers/net/ethernet/sfc/ef100_vdpa.h
+++ b/drivers/net/ethernet/sfc/ef100_vdpa.h
@@ -202,6 +202,9 @@ int ef100_vdpa_add_filter(struct ef100_vdpa_nic *vdpa_nic,
int ef100_vdpa_irq_vectors_alloc(struct pci_dev *pci_dev, u16 nvqs);
void ef100_vdpa_irq_vectors_free(void *data);
int ef100_vdpa_reset(struct vdpa_device *vdev);
+int ef100_setup_ef100_mcdi_buffer(struct ef100_vdpa_nic *vdpa_nic);
+int ef100_setup_vdpa_mcdi_buffer(struct efx_nic *efx, u64 mcdi_iova);
+int ef100_remap_vdpa_mcdi_buffer(struct efx_nic *efx, u64 mcdi_iova);

static inline bool efx_vdpa_is_little_endian(struct ef100_vdpa_nic *vdpa_nic)
{
diff --git a/drivers/net/ethernet/sfc/ef100_vdpa_ops.c b/drivers/net/ethernet/sfc/ef100_vdpa_ops.c
index 8c198d949fdb..7c632f179bcf 100644
--- a/drivers/net/ethernet/sfc/ef100_vdpa_ops.c
+++ b/drivers/net/ethernet/sfc/ef100_vdpa_ops.c
@@ -12,6 +12,7 @@
#include "ef100_vdpa.h"
#include "ef100_iova.h"
#include "io.h"
+#include "ef100_iova.h"
#include "mcdi_vdpa.h"

/* Get the queue's function-local index of the associated VI
@@ -739,14 +740,121 @@ static void ef100_vdpa_set_config(struct vdpa_device *vdev, unsigned int offset,
}
}

+static bool is_iova_overlap(u64 iova1, u64 size1, u64 iova2, u64 size2)
+{
+ return max(iova1, iova2) < min(iova1 + size1, iova2 + size2);
+}
+
+static int ef100_vdpa_dma_map(struct vdpa_device *vdev,
+ unsigned int asid,
+ u64 iova, u64 size,
+ u64 pa, u32 perm, void *opaque)
+{
+ struct ef100_vdpa_nic *vdpa_nic;
+ struct ef100_nic_data *nic_data;
+ unsigned int mcdi_buf_len;
+ dma_addr_t mcdi_buf_addr;
+ u64 mcdi_iova = 0;
+ int rc;
+
+ vdpa_nic = get_vdpa_nic(vdev);
+ nic_data = vdpa_nic->efx->nic_data;
+ mcdi_buf_addr = nic_data->mcdi_buf.dma_addr;
+ mcdi_buf_len = nic_data->mcdi_buf.len;
+
+ /* Validate the iova range against geo aperture */
+ if (iova < vdpa_nic->geo_aper_start ||
+ ((iova + size - 1) > vdpa_nic->geo_aper_end)) {
+ dev_err(&vdpa_nic->vdpa_dev.dev,
+ "%s: iova range (%llx, %llx) not within geo aperture\n",
+ __func__, iova, (iova + size));
+ return -EINVAL;
+ }
+
+ rc = efx_ef100_insert_iova_node(vdpa_nic, iova, size);
+ if (rc) {
+ dev_err(&vdpa_nic->vdpa_dev.dev,
+ "%s: iova_node insert failure: %d\n", __func__, rc);
+ return rc;
+ }
+
+ if (is_iova_overlap(mcdi_buf_addr, mcdi_buf_len, iova, size)) {
+ dev_info(&vdpa_nic->vdpa_dev.dev,
+ "%s: mcdi iova overlap detected: %llx\n",
+ __func__, mcdi_buf_addr);
+ /* find the new iova for mcdi buffer */
+ rc = efx_ef100_find_new_iova(vdpa_nic, mcdi_buf_len,
+ &mcdi_iova);
+ if (rc) {
+ dev_err(&vdpa_nic->vdpa_dev.dev,
+ "new mcdi iova not found, err: %d\n", rc);
+ goto fail;
+ }
+
+ if (vdpa_nic->efx->mcdi_buf_mode == EFX_BUF_MODE_VDPA)
+ rc = ef100_remap_vdpa_mcdi_buffer(vdpa_nic->efx,
+ mcdi_iova);
+ else if (vdpa_nic->efx->mcdi_buf_mode == EFX_BUF_MODE_EF100)
+ rc = ef100_setup_vdpa_mcdi_buffer(vdpa_nic->efx,
+ mcdi_iova);
+ else
+ goto fail;
+
+ if (rc) {
+ dev_err(&vdpa_nic->vdpa_dev.dev,
+ "mcdi buf update failed, err: %d\n", rc);
+ goto fail;
+ }
+ }
+
+ rc = iommu_map(vdpa_nic->domain, iova, pa, size, perm);
+ if (rc) {
+ dev_err(&vdev->dev,
+ "%s: iommu_map iova: %llx size: %llx rc: %d\n",
+ __func__, iova, size, rc);
+ goto fail;
+ }
+
+ return 0;
+
+fail:
+ efx_ef100_remove_iova_node(vdpa_nic, iova);
+ return rc;
+}
+
+static int ef100_vdpa_dma_unmap(struct vdpa_device *vdev,
+ unsigned int asid,
+ u64 iova, u64 size)
+{
+ struct ef100_vdpa_nic *vdpa_nic = get_vdpa_nic(vdev);
+ int rc;
+
+ rc = iommu_unmap(vdpa_nic->domain, iova, size);
+ if (rc < 0)
+ dev_info(&vdev->dev,
+ "%s: iommu_unmap iova: %llx size: %llx rc: %d\n",
+ __func__, iova, size, rc);
+ efx_ef100_remove_iova_node(vdpa_nic, iova);
+ return rc;
+}
+
static void ef100_vdpa_free(struct vdpa_device *vdev)
{
struct ef100_vdpa_nic *vdpa_nic = get_vdpa_nic(vdev);
+ int rc;
int i;

if (vdpa_nic) {
/* clean-up the mappings and iova tree */
efx_ef100_delete_iova(vdpa_nic);
+ if (vdpa_nic->efx->mcdi_buf_mode == EFX_BUF_MODE_VDPA) {
+ rc = ef100_setup_ef100_mcdi_buffer(vdpa_nic);
+ if (rc) {
+ dev_err(&vdev->dev,
+ "setup_ef100_mcdi failed, err: %d\n",
+ rc);
+ }
+ }
for (i = 0; i < (vdpa_nic->max_queue_pairs * 2); i++)
reset_vring(vdpa_nic, i);
ef100_vdpa_irq_vectors_free(vdpa_nic->efx->pci_dev);
@@ -782,5 +890,8 @@ const struct vdpa_config_ops ef100_vdpa_config_ops = {
.get_config = ef100_vdpa_get_config,
.set_config = ef100_vdpa_set_config,
.get_generation = NULL,
+ .set_map = NULL,
+ .dma_map = ef100_vdpa_dma_map,
+ .dma_unmap = ef100_vdpa_dma_unmap,
.free = ef100_vdpa_free,
};
diff --git a/drivers/net/ethernet/sfc/net_driver.h b/drivers/net/ethernet/sfc/net_driver.h
index 79356d614109..34b94372d9a6 100644
--- a/drivers/net/ethernet/sfc/net_driver.h
+++ b/drivers/net/ethernet/sfc/net_driver.h
@@ -846,6 +846,16 @@ enum efx_xdp_tx_queues_mode {
EFX_XDP_TX_QUEUES_BORROWED /* queues borrowed from net stack */
};

+/**
+ * enum efx_buf_alloc_mode - buffer allocation mode
+ * @EFX_BUF_MODE_EF100: buffer setup in ef100 mode
+ * @EFX_BUF_MODE_VDPA: buffer setup in vdpa mode
+ */
+enum efx_buf_alloc_mode {
+ EFX_BUF_MODE_EF100,
+ EFX_BUF_MODE_VDPA
+};
+
/**
* struct efx_nic - an Efx NIC
* @name: Device name (net device name or bus id before net device registered)
@@ -997,6 +1007,7 @@ enum efx_xdp_tx_queues_mode {
* @tc: state for TC offload (EF100).
* @mem_bar: The BAR that is mapped into membase.
* @reg_base: Offset from the start of the bar to the function control window.
+ * @mcdi_buf_mode: mcdi buffer allocation mode
* @monitor_work: Hardware monitor workitem
* @biu_lock: BIU (bus interface unit) lock
* @last_irq_cpu: Last CPU to handle a possible test interrupt. This
@@ -1182,6 +1193,7 @@ struct efx_nic {

unsigned int mem_bar;
u32 reg_base;
+ enum efx_buf_alloc_mode mcdi_buf_mode;
#ifdef CONFIG_SFC_VDPA
/** @mgmt_dev: vDPA Management device */
struct vdpa_mgmt_dev *mgmt_dev;
--
2.30.1