Re: [PATCH v13 06/24] gunyah: rsc_mgr: Add resource manager RPC core

From: Alex Elder
Date: Mon Jun 05 2023 - 15:48:10 EST


On 5/9/23 3:47 PM, Elliot Berman wrote:
The resource manager is a special virtual machine which is always
running on a Gunyah system. It provides APIs for creating and destroying
VMs, secure memory management, sharing/lending of memory between VMs,
and setup of inter-VM communication. Calls to the resource manager are
made via message queues.

This patch implements the basic probing and RPC mechanism to make those
API calls. Request/response calls can be made with gh_rm_call.
Drivers can also register to notifications pushed by RM via
gh_rm_register_notifier

Specific API calls that resource manager supports will be implemented in
subsequent patches.

Signed-off-by: Elliot Berman <quic_eberman@xxxxxxxxxxx>

I have some comments below, but none is critical so whether or not you
address what I mention:

Reviewed-by: Alex Elder <elder@xxxxxxxxxx>

---
drivers/virt/Makefile | 1 +
drivers/virt/gunyah/Makefile | 4 +
drivers/virt/gunyah/rsc_mgr.c | 702 +++++++++++++++++++++++++++++++++
drivers/virt/gunyah/rsc_mgr.h | 16 +
include/linux/gunyah_rsc_mgr.h | 21 +
5 files changed, 744 insertions(+)
create mode 100644 drivers/virt/gunyah/Makefile
create mode 100644 drivers/virt/gunyah/rsc_mgr.c
create mode 100644 drivers/virt/gunyah/rsc_mgr.h
create mode 100644 include/linux/gunyah_rsc_mgr.h

diff --git a/drivers/virt/Makefile b/drivers/virt/Makefile
index e9aa6fc96fab..a5817e2d7d71 100644
--- a/drivers/virt/Makefile
+++ b/drivers/virt/Makefile
@@ -12,3 +12,4 @@ obj-$(CONFIG_ACRN_HSM) += acrn/
obj-$(CONFIG_EFI_SECRET) += coco/efi_secret/
obj-$(CONFIG_SEV_GUEST) += coco/sev-guest/
obj-$(CONFIG_INTEL_TDX_GUEST) += coco/tdx-guest/
+obj-y += gunyah/
diff --git a/drivers/virt/gunyah/Makefile b/drivers/virt/gunyah/Makefile
new file mode 100644
index 000000000000..0f5aec834698
--- /dev/null
+++ b/drivers/virt/gunyah/Makefile
@@ -0,0 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0
+
+gunyah-y += rsc_mgr.o
+obj-$(CONFIG_GUNYAH) += gunyah.o
diff --git a/drivers/virt/gunyah/rsc_mgr.c b/drivers/virt/gunyah/rsc_mgr.c
new file mode 100644
index 000000000000..88b5beb1ea51
--- /dev/null
+++ b/drivers/virt/gunyah/rsc_mgr.c
@@ -0,0 +1,702 @@

. . .

+/**
+ * struct gh_rm - private data for communicating w/Gunyah resource manager
+ * @dev: pointer to device

This points to the device structure for the RM platform device.
(Maybe that's clear...)

+ * @tx_ghrsc: message queue resource to TX to RM
+ * @rx_ghrsc: message queue resource to RX from RM
+ * @msgq: mailbox instance of TX/RX resources above
+ * @msgq_client: mailbox client of above msgq
+ * @active_rx_connection: ongoing gh_rm_connection for which we're receiving fragments
+ * @last_tx_ret: return value of last mailbox tx
+ * @call_xarray: xarray to allocate & lookup sequence IDs for Request/Response flows
+ * @next_seq: next ID to allocate (for xa_alloc_cyclic)
+ * @cache: cache for allocating Tx messages
+ * @send_lock: synchronization to allow only one request to be sent at a time
+ * @nh: notifier chain for clients interested in RM notification messages
+ */
+struct gh_rm {
+ struct device *dev;
+ struct gh_resource tx_ghrsc;
+ struct gh_resource rx_ghrsc;
+ struct gh_msgq msgq;
+ struct mbox_client msgq_client;
+ struct gh_rm_connection *active_rx_connection;
+ int last_tx_ret;
+
+ struct xarray call_xarray;
+ u32 next_seq;
+
+ struct kmem_cache *cache;
+ struct mutex send_lock;
+ struct blocking_notifier_head nh;
+};
+
+/**
+ * gh_rm_remap_error() - Remap Gunyah resource manager errors into a Linux error code
+ * @rm_error: "Standard" return value from Gunyah resource manager
+ */
+static inline int gh_rm_remap_error(enum gh_rm_error rm_error)

I suggested something similar last time. I you are operating
on an rm_error value, so I would call this gh_rm_error_remap().

+{
+ switch (rm_error) {
+ case GH_RM_ERROR_OK:
+ return 0;
+ case GH_RM_ERROR_UNIMPLEMENTED:
+ return -EOPNOTSUPP;
+ case GH_RM_ERROR_NOMEM:
+ return -ENOMEM;
+ case GH_RM_ERROR_NORESOURCE:
+ return -ENODEV;
+ case GH_RM_ERROR_DENIED:
+ return -EPERM;
+ case GH_RM_ERROR_BUSY:
+ return -EBUSY;
+ case GH_RM_ERROR_INVALID:
+ case GH_RM_ERROR_ARGUMENT_INVALID:
+ case GH_RM_ERROR_HANDLE_INVALID:
+ case GH_RM_ERROR_VALIDATE_FAILED:
+ case GH_RM_ERROR_MAP_FAILED:
+ case GH_RM_ERROR_MEM_INVALID:
+ case GH_RM_ERROR_MEM_INUSE:
+ case GH_RM_ERROR_MEM_RELEASED:
+ case GH_RM_ERROR_VMID_INVALID:
+ case GH_RM_ERROR_LOOKUP_FAILED:
+ case GH_RM_ERROR_IRQ_INVALID:
+ case GH_RM_ERROR_IRQ_INUSE:
+ case GH_RM_ERROR_IRQ_RELEASED:
+ return -EINVAL;
+ default:
+ return -EBADMSG;
+ }
+}

. . .

+static void gh_rm_process_rply(struct gh_rm *rm, void *msg, size_t msg_size)
+{
+ struct gh_rm_rpc_reply_hdr *reply_hdr = msg;
+ struct gh_rm_connection *connection;
+ u16 seq_id;
+
+ seq_id = le16_to_cpu(reply_hdr->hdr.seq);
+ connection = xa_load(&rm->call_xarray, seq_id);
+
+ if (!connection || connection->msg_id != reply_hdr->hdr.msg_id)
+ return;

Do either of the above conditions warrant reporting a warning if
it occurs? Or are these expected to be possible--and if either
occur they're harmless if handled this way?

+
+ if (rm->active_rx_connection)
+ gh_rm_abort_connection(rm);
+
+ if (gh_rm_init_connection_payload(connection, msg, sizeof(*reply_hdr), msg_size)) {
+ dev_err(rm->dev, "Failed to alloc connection buffer for sequence %d\n", seq_id);
+ /* Send connection complete and error the client. */
+ connection->reply.ret = -ENOMEM;
+ complete(&connection->reply.seq_done);
+ return;
+ }
+
+ connection->reply.rm_error = le32_to_cpu(reply_hdr->err_code);
+ rm->active_rx_connection = connection;
+}

. . .