Re: [PATCH v9 16/21] KVM: s390: pci: add routines to start/stop interpretive execution

From: Matthew Rosato
Date: Tue Jun 28 2022 - 09:29:59 EST


On 6/28/22 6:53 AM, Pierre Morel wrote:


On 6/6/22 22:33, Matthew Rosato wrote:
These routines will be invoked at the time an s390x vfio-pci device is
associated with a KVM (or when the association is removed), allowing
the zPCI device to enable or disable load/store intepretation mode;
this requires the host zPCI device to inform firmware of the unique
token (GISA designation) that is associated with the owning KVM.

Signed-off-by: Matthew Rosato <mjrosato@xxxxxxxxxxxxx>
---
  arch/s390/include/asm/kvm_host.h |  18 ++++
  arch/s390/include/asm/pci.h      |   1 +
  arch/s390/kvm/kvm-s390.c         |  15 +++
  arch/s390/kvm/pci.c              | 162 +++++++++++++++++++++++++++++++
  arch/s390/kvm/pci.h              |   5 +
  arch/s390/pci/pci.c              |   4 +
  6 files changed, 205 insertions(+)

diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h
index 8e381603b6a7..6e83d746bae2 100644
--- a/arch/s390/include/asm/kvm_host.h
+++ b/arch/s390/include/asm/kvm_host.h
@@ -19,6 +19,7 @@
  #include <linux/kvm.h>
  #include <linux/seqlock.h>
  #include <linux/module.h>
+#include <linux/pci.h>
  #include <asm/debug.h>
  #include <asm/cpu.h>
  #include <asm/fpu/api.h>
@@ -967,6 +968,8 @@ struct kvm_arch{
      DECLARE_BITMAP(idle_mask, KVM_MAX_VCPUS);
      struct kvm_s390_gisa_interrupt gisa_int;
      struct kvm_s390_pv pv;
+    struct list_head kzdev_list;
+    spinlock_t kzdev_list_lock;
  };
  #define KVM_HVA_ERR_BAD        (-1UL)
@@ -1017,4 +1020,19 @@ static inline void kvm_arch_flush_shadow_memslot(struct kvm *kvm,
  static inline void kvm_arch_vcpu_blocking(struct kvm_vcpu *vcpu) {}
  static inline void kvm_arch_vcpu_unblocking(struct kvm_vcpu *vcpu) {}
+#define __KVM_HAVE_ARCH_VM_FREE
+void kvm_arch_free_vm(struct kvm *kvm);
+
+#ifdef CONFIG_VFIO_PCI_ZDEV_KVM
+int kvm_s390_pci_register_kvm(struct zpci_dev *zdev, struct kvm *kvm);
+void kvm_s390_pci_unregister_kvm(struct zpci_dev *zdev);
+#else
+static inline int kvm_s390_pci_register_kvm(struct zpci_dev *dev,
+                        struct kvm *kvm)
+{
+    return -EPERM;
+}
+static inline void kvm_s390_pci_unregister_kvm(struct zpci_dev *dev) {}
+#endif
+
  #endif
diff --git a/arch/s390/include/asm/pci.h b/arch/s390/include/asm/pci.h
index 322060a75d9f..85eb0ef9d4c3 100644
--- a/arch/s390/include/asm/pci.h
+++ b/arch/s390/include/asm/pci.h
@@ -194,6 +194,7 @@ struct zpci_dev {
      /* IOMMU and passthrough */
      struct s390_domain *s390_domain; /* s390 IOMMU domain data */
      struct kvm_zdev *kzdev;
+    struct mutex kzdev_lock;

I guess that since it did not exist before the lock is not there to protect the zpci_dev struct.

Right, not the zpci_dev itself but it is protecting the contents of the kzdev (including the pointer to the zdev e.g. kzdev->zdev)

May be add a comment to say what it is protecting.

Sure



  };
  static inline bool zdev_enabled(struct zpci_dev *zdev)
diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index a66da3f66114..4758bb731199 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -2790,6 +2790,14 @@ static void sca_dispose(struct kvm *kvm)
      kvm->arch.sca = NULL;
  }
+void kvm_arch_free_vm(struct kvm *kvm)
+{
+    if (IS_ENABLED(CONFIG_VFIO_PCI_ZDEV_KVM))
+        kvm_s390_pci_clear_list(kvm);
+
+    __kvm_arch_free_vm(kvm);
+}
+
  int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
  {
      gfp_t alloc_flags = GFP_KERNEL_ACCOUNT;
@@ -2872,6 +2880,13 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
      kvm_s390_crypto_init(kvm);
+    if (IS_ENABLED(CONFIG_VFIO_PCI_ZDEV_KVM)) {
+        mutex_lock(&kvm->lock);
+        kvm_s390_pci_init_list(kvm);
+        kvm_s390_vcpu_pci_enable_interp(kvm);
+        mutex_unlock(&kvm->lock);
+    }
+
      mutex_init(&kvm->arch.float_int.ais_lock);
      spin_lock_init(&kvm->arch.float_int.lock);
      for (i = 0; i < FIRQ_LIST_COUNT; i++)
diff --git a/arch/s390/kvm/pci.c b/arch/s390/kvm/pci.c
index b232c8cbaa81..24211741deb0 100644
--- a/arch/s390/kvm/pci.c
+++ b/arch/s390/kvm/pci.c
@@ -12,7 +12,9 @@
  #include <asm/pci.h>
  #include <asm/pci_insn.h>
  #include <asm/pci_io.h>
+#include <asm/sclp.h>
  #include "pci.h"
+#include "kvm-s390.h"
  struct zpci_aift *aift;
@@ -423,6 +425,166 @@ static void kvm_s390_pci_dev_release(struct zpci_dev *zdev)
      kfree(kzdev);
  }
+
+/*
+ * Register device with the specified KVM. If interpetation facilities are
+ * available, enable them and let userspace indicate whether or not they will
+ * be used (specify SHM bit to disable).
+ */
+int kvm_s390_pci_register_kvm(struct zpci_dev *zdev, struct kvm *kvm)
+{
+    int rc;
+
+    if (!zdev)
+        return -EINVAL;
+
+    mutex_lock(&zdev->kzdev_lock);
+
+    if (zdev->kzdev || zdev->gisa != 0 || !kvm) {
+        mutex_unlock(&zdev->kzdev_lock);
+        return -EINVAL;
+    }
+
+    kvm_get_kvm(kvm);
+
+    mutex_lock(&kvm->lock);

Why do we need to lock KVM here?

Hmm, good point, now that we get a reference this seems unnecessary


just a question, I do not think it is a big problem.

+
+    rc = kvm_s390_pci_dev_open(zdev);
+    if (rc)
+        goto err;
+
+    /*
+     * If interpretation facilities aren't available, add the device to
+     * the kzdev list but don't enable for interpretation.
+     */
+    if (!kvm_s390_pci_interp_allowed())
+        goto out;
+
+    /*
+     * If this is the first request to use an interpreted device, make the
+     * necessary vcpu changes
+     */
+    if (!kvm->arch.use_zpci_interp)
+        kvm_s390_vcpu_pci_enable_interp(kvm);
+
+    if (zdev_enabled(zdev)) {
+        rc = zpci_disable_device(zdev);
+        if (rc)
+            goto err;
+    }
+
+    /*
+     * Store information about the identity of the kvm guest allowed to
+     * access this device via interpretation to be used by host CLP
+     */
+    zdev->gisa = (u32)virt_to_phys(&kvm->arch.sie_page2->gisa);
+
+    rc = zpci_enable_device(zdev);
+    if (rc)
+        goto clear_gisa;
+
+    /* Re-register the IOMMU that was already created */
+    rc = zpci_register_ioat(zdev, 0, zdev->start_dma, zdev->end_dma,
+                virt_to_phys(zdev->dma_table));
+    if (rc)
+        goto clear_gisa;
+
+out:
+    zdev->kzdev->kvm = kvm;
+
+    spin_lock(&kvm->arch.kzdev_list_lock);
+    list_add_tail(&zdev->kzdev->entry, &kvm->arch.kzdev_list);
+    spin_unlock(&kvm->arch.kzdev_list_lock);
+
+    mutex_unlock(&kvm->lock);
+    mutex_unlock(&zdev->kzdev_lock);
+    return 0;
+
+clear_gisa:
+    zdev->gisa = 0;
+err:
+    if (zdev->kzdev)
+        kvm_s390_pci_dev_release(zdev);
+    mutex_unlock(&kvm->lock);
+    mutex_unlock(&zdev->kzdev_lock);
+    kvm_put_kvm(kvm);
+    return rc;
+}
+EXPORT_SYMBOL_GPL(kvm_s390_pci_register_kvm);
+
+void kvm_s390_pci_unregister_kvm(struct zpci_dev *zdev)
+{
+    struct kvm *kvm;
+
+    if (!zdev)
+        return;
+
+    mutex_lock(&zdev->kzdev_lock);
+
+    if (WARN_ON(!zdev->kzdev)) {

When can this happen ?


It cannot today, nor should it ever (hence the WARN_ON) -- if we do, it's a case of programming error introduced somewhere (vfio has a KVM reference but we never built a kzdev via kvm_s390_pci_register_kvm or lost it somehow)

+        mutex_unlock(&zdev->kzdev_lock);
+        return;
+    }
+
+    kvm = zdev->kzdev->kvm;
+    mutex_lock(&kvm->lock);
+
+    /*
+     * A 0 gisa means interpretation was never enabled, just remove the
+     * device from the list.
+     */
+    if (zdev->gisa == 0)
+        goto out;
+
+    /* Forwarding must be turned off before interpretation */
+    if (zdev->kzdev->fib.fmt0.aibv != 0)
+        kvm_s390_pci_aif_disable(zdev, true);
+
+    /* Remove the host CLP guest designation */
+    zdev->gisa = 0;
+
+    if (zdev_enabled(zdev)) {
+        if (zpci_disable_device(zdev))
+            goto out;

NIT debug trace ?

We should at least get a trace entry in from clp_disable_fh() if something goes wrong here.


+    }
+
+    if (zpci_enable_device(zdev))
+        goto out;

NIT debug trace?

And similarly, a trace entry from clp_enable_fh() here. So I think these are OK for now.

I am consdering a follow-on to add new s390dbf entries for 'kvm-pci' or so, these might make sense there for additional context, but let's leave that for after this series.


Only some questions, otherwise, LGTM

Acked-by: Pierre Morel <pmorel@xxxxxxxxxxxxx>


Thanks!