Re: [PATCH v2 7/8] s390: vfio-ap: handle bind and unbind of AP queue device

From: Tony Krowiak
Date: Tue Apr 23 2019 - 10:53:49 EST


On 4/23/19 9:38 AM, Pierre Morel wrote:
On 20/04/2019 23:49, Tony Krowiak wrote:
There is an implied guarantee that when an AP queue device is bound to a
device driver, that driver will have exclusive control over the device.
When an AP queue device is unbound from the vfio_ap driver while the
queue is in use by a guest and subsquently bound to a zcrypt driver, the
guarantee that the zcrypt driver has exclusive control of the queue
device will be violated. Likewise, when an AP queue device is bound to
the vfio_ap device driver and its APQN is assigned to an mdev device in
use by a guest, the expectation is that the guest will have access to
the queue.

The purpose of this patch is to ensure three things:

1. When an AP queue device in use by a guest is unbound, the guest shall
ÂÂÂ no longer access the queue. Due to the limitations of the
ÂÂÂ architecture, access to a single queue can not be denied to a guest,
ÂÂÂ so access to the AP card to which the queue is connected will be
ÂÂÂ denied to the guest.

2. When an AP queue device with an APQN assigned to an mdev device is
ÂÂÂ bound to the vfio_ap driver while the mdev device is in use by a guest,
ÂÂÂ the guest shall be granted access to the queue.

3. When a guest is started, each APQN assigned to the guest's mdev device
ÂÂÂ must be owned by the vfio_ap driver.

Signed-off-by: Tony Krowiak <akrowiak@xxxxxxxxxxxxx>
---
 drivers/s390/crypto/vfio_ap_drv.c | 16 ++++++-
 drivers/s390/crypto/vfio_ap_ops.c | 82 ++++++++++++++++++++++++++++++++++-
 drivers/s390/crypto/vfio_ap_private.h | 2 +
 3 files changed, 97 insertions(+), 3 deletions(-)

diff --git a/drivers/s390/crypto/vfio_ap_drv.c b/drivers/s390/crypto/vfio_ap_drv.c
index e9824c35c34f..0f5dafddf5b1 100644
--- a/drivers/s390/crypto/vfio_ap_drv.c
+++ b/drivers/s390/crypto/vfio_ap_drv.c
@@ -42,12 +42,26 @@ MODULE_DEVICE_TABLE(vfio_ap, ap_queue_ids);
 static int vfio_ap_queue_dev_probe(struct ap_device *apdev)
 {
+ÂÂÂ struct ap_queue *apq = to_ap_queue(&apdev->device);
+ÂÂÂ unsigned long apid = AP_QID_CARD(apq->qid);
+ÂÂÂ unsigned long apqi = AP_QID_QUEUE(apq->qid);
+
+ÂÂÂ mutex_lock(&matrix_dev->lock);
+ÂÂÂ vfio_ap_mdev_probe_queue(apid, apqi);
+ÂÂÂ mutex_unlock(&matrix_dev->lock);
+
ÂÂÂÂÂ return 0;
 }
 static void vfio_ap_queue_dev_remove(struct ap_device *apdev)
 {
-ÂÂÂ /* Nothing to do yet */
+ÂÂÂ struct ap_queue *apq = to_ap_queue(&apdev->device);
+ÂÂÂ unsigned long apid = AP_QID_CARD(apq->qid);
+ÂÂÂ unsigned long apqi = AP_QID_QUEUE(apq->qid);
+
+ÂÂÂ mutex_lock(&matrix_dev->lock);
+ÂÂÂ vfio_ap_mdev_remove_queue(apid, apqi);
+ÂÂÂ mutex_unlock(&matrix_dev->lock);
 }
 static void vfio_ap_matrix_dev_release(struct device *dev)
diff --git a/drivers/s390/crypto/vfio_ap_ops.c b/drivers/s390/crypto/vfio_ap_ops.c
index 31ce30ee784d..3f9506516545 100644
--- a/drivers/s390/crypto/vfio_ap_ops.c
+++ b/drivers/s390/crypto/vfio_ap_ops.c
@@ -763,8 +763,8 @@ static void vfio_ap_mdev_wait_for_qempty(unsigned long apid, unsigned long apqi)
ÂÂÂÂÂÂÂÂÂÂÂÂÂ msleep(20);
ÂÂÂÂÂÂÂÂÂÂÂÂÂ break;
ÂÂÂÂÂÂÂÂÂ default:
-ÂÂÂÂÂÂÂÂÂÂÂ pr_warn("%s: tapq err %02x: 0x%04x may not be empty\n",
-ÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂ __func__, status.response_code, q->apqn);
+ÂÂÂÂÂÂÂÂÂÂÂ pr_warn("%s: tapq err %02x: %02lx%04lx may not be empty\n",
+ÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂ __func__, status.response_code, apid, apqi);
ÂÂÂÂÂÂÂÂÂÂÂÂÂ return;
ÂÂÂÂÂÂÂÂÂ }
ÂÂÂÂÂ } while (--retry);
@@ -823,6 +823,14 @@ static int vfio_ap_mdev_reset_queues(struct mdev_device *mdev)
 static int vfio_ap_mdev_create_shadow_crycb(struct ap_matrix_mdev *matrix_mdev)
 {
+ÂÂÂ /*
+ÂÂÂÂ * If an AP resource is not owned by the vfio_ap driver - e.g., it was
+ÂÂÂÂ * unbound from the driver while still assigned to the mdev device
+ÂÂÂÂ */
+ÂÂÂ if (ap_apqn_in_matrix_owned_by_def_drv(matrix_mdev->matrix.apm,
+ÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂ matrix_mdev->matrix.aqm))
+ÂÂÂÂÂÂÂ return -EADDRNOTAVAIL;
+
ÂÂÂÂÂ matrix_mdev->shadow_crycb = kzalloc(sizeof(*matrix_mdev->shadow_crycb),
ÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂ GFP_KERNEL);
ÂÂÂÂÂ if (!matrix_mdev->shadow_crycb)
@@ -847,6 +855,18 @@ static int vfio_ap_mdev_open(struct mdev_device *mdev)
ÂÂÂÂÂ if (!try_module_get(THIS_MODULE))
ÂÂÂÂÂÂÂÂÂ return -ENODEV;
+ÂÂÂ ret = ap_apqn_in_matrix_owned_by_def_drv(matrix_mdev->matrix.apm,
+ÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂ matrix_mdev->matrix.aqm);
+
+ÂÂÂ /*
+ÂÂÂÂ * If any APQN is owned by a default driver, it can not be used by the
+ÂÂÂÂ * guest. This can happen if a queue is unbound from the vfio_ap
+ÂÂÂÂ * driver but not unassigned from the mdev device.
+ÂÂÂÂ */
+ÂÂÂ ret = (ret == 1) ? -EADDRNOTAVAIL : ret;
+ÂÂÂ if (ret)
+ÂÂÂÂÂÂÂ return ret;
+
ÂÂÂÂÂ matrix_mdev->group_notifier.notifier_call = vfio_ap_mdev_group_notifier;
ÂÂÂÂÂ events = VFIO_GROUP_NOTIFY_SET_KVM;
@@ -938,3 +958,61 @@ void vfio_ap_mdev_unregister(void)
 {
ÂÂÂÂÂ mdev_unregister_device(&matrix_dev->device);
 }
+
+static struct ap_matrix_mdev *vfio_ap_mdev_find_matrix_mdev(unsigned long apid,
+ÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂ unsigned long apqi)
+{
+ÂÂÂ struct ap_matrix_mdev *matrix_mdev;
+
+ÂÂÂ list_for_each_entry(matrix_mdev, &matrix_dev->mdev_list, node) {
+ÂÂÂÂÂÂÂ if (test_bit_inv(apid, matrix_mdev->matrix.apm) &&
+ÂÂÂÂÂÂÂÂÂÂÂ test_bit_inv(apqi, matrix_mdev->matrix.aqm))
+ÂÂÂÂÂÂÂÂÂÂÂ return matrix_mdev;
+ÂÂÂ }
+
+ÂÂÂ return NULL;
+}
+
+void vfio_ap_mdev_remove_queue(unsigned long apid, unsigned long apqi)
+{
+ÂÂÂ struct ap_matrix_mdev *matrix_mdev;
+
+ÂÂÂ matrix_mdev = vfio_ap_mdev_find_matrix_mdev(apid, apqi);
+
+ÂÂÂ /*
+ÂÂÂÂ * If the queue is assigned to the mdev device and the mdev device
+ÂÂÂÂ * is in use by a guest
+ÂÂÂÂ */
+ÂÂÂ if (matrix_mdev && matrix_mdev->kvm) {
+ÂÂÂÂÂÂÂ /*
+ÂÂÂÂÂÂÂÂ * Unplug the adapter from the guest but don't unassign it
+ÂÂÂÂÂÂÂÂ * from the mdev device
+ÂÂÂÂÂÂÂÂ */
+ÂÂÂÂÂÂÂ clear_bit_inv(apid, matrix_mdev->shadow_crycb->apm);
+ÂÂÂÂÂÂÂ vfio_ap_mdev_update_crycb(matrix_mdev);
+ÂÂÂ }
+
+ÂÂÂ vfio_ap_mdev_reset_queue(apid, apqi);
+}
+
+void vfio_ap_mdev_probe_queue(unsigned long apid, unsigned long apqi)
+{
+ÂÂÂ struct ap_matrix_mdev *matrix_mdev;
+
+ÂÂÂ matrix_mdev = vfio_ap_mdev_find_matrix_mdev(apid, apqi);
+
+ÂÂÂ /*
+ÂÂÂÂ * If the queue is assigned to the mdev device and the mdev device
+ÂÂÂÂ * is in use by a guest
+ÂÂÂÂ */
+ÂÂÂ if (matrix_mdev && matrix_mdev->kvm) {
+ÂÂÂÂÂÂÂ /* Plug the adapter into the guest */
+ÂÂÂÂÂÂÂ set_bit_inv(apid, matrix_mdev->shadow_crycb->apm);

Before you set the bit in the shadow...

yes


+
+ÂÂÂÂÂÂÂ /* Make sure the queue is also plugged in to the guest */

I Think we must take care in the use of queues and domains to avoid being confusing.

Duly noted.


+ÂÂÂÂÂÂÂ if (!test_bit_inv(apqi, matrix_mdev->shadow_crycb->aqm))
+ÂÂÂÂÂÂÂÂÂÂÂ set_bit_inv(apqi, matrix_mdev->shadow_crycb->aqm);
+
+ÂÂÂÂÂÂÂ vfio_ap_mdev_update_crycb(matrix_mdev);

...and you update the real CRYCB,

don't you need to verify that all ap queues which verify APID and any already pre-existing APQI are bound to the driver?

Same for pre-existing APID if you set the APQI.

Since I last responded to your comment, I've been doing some testing
and discovered some scenarios that need to be considered. There is
definitely some additional checking that needs to be done here. It is
not necessary to verify all queues are bound to the vfio_ap driver, but
it we must assure that no queues are reserved for use
by the zcrypt drivers (i.e., APQN set in the apmask/aqmask).



+ÂÂÂ }
+}
diff --git a/drivers/s390/crypto/vfio_ap_private.h b/drivers/s390/crypto/vfio_ap_private.h
index e8457aa61976..00eaae3b853f 100644
--- a/drivers/s390/crypto/vfio_ap_private.h
+++ b/drivers/s390/crypto/vfio_ap_private.h
@@ -87,5 +87,7 @@ struct ap_matrix_mdev {
 extern int vfio_ap_mdev_register(void);
 extern void vfio_ap_mdev_unregister(void);
+void vfio_ap_mdev_remove_queue(unsigned long apid, unsigned long apqi);
+void vfio_ap_mdev_probe_queue(unsigned long apid, unsigned long apqi);
 #endif /* _VFIO_AP_PRIVATE_H_ */