[PATCH v1 21/26] crypto: ccp: Add panic notifier for SEV/SNP firmware shutdown on kdump

From: Michael Roth
Date: Sat Dec 30 2023 - 11:28:46 EST


From: Ashish Kalra <ashish.kalra@xxxxxxx>

Add a kdump safe version of sev_firmware_shutdown() registered as a
crash_kexec_post_notifier, which is invoked during panic/crash to do
SEV/SNP shutdown. This is required for transitioning all IOMMU pages
to reclaim/hypervisor state, otherwise re-init of IOMMU pages during
crashdump kernel boot fails and panics the crashdump kernel. This
panic notifier runs in atomic context, hence it ensures not to
acquire any locks/mutexes and polls for PSP command completion
instead of depending on PSP command completion interrupt.

Signed-off-by: Ashish Kalra <ashish.kalra@xxxxxxx>
[mdr: remove use of "we" in comments]
Signed-off-by: Michael Roth <michael.roth@xxxxxxx>
---
arch/x86/kernel/crash.c | 7 +++
drivers/crypto/ccp/sev-dev.c | 112 +++++++++++++++++++++++++----------
2 files changed, 89 insertions(+), 30 deletions(-)

diff --git a/arch/x86/kernel/crash.c b/arch/x86/kernel/crash.c
index cbffb27f6468..b8c44c492d43 100644
--- a/arch/x86/kernel/crash.c
+++ b/arch/x86/kernel/crash.c
@@ -59,6 +59,13 @@ static void kdump_nmi_callback(int cpu, struct pt_regs *regs)
*/
cpu_emergency_stop_pt();

+ /*
+ * for SNP do wbinvd() on remote CPUs to
+ * safely do SNP_SHUTDOWN on the local CPU.
+ */
+ if (cpu_feature_enabled(X86_FEATURE_SEV_SNP))
+ wbinvd();
+
disable_local_APIC();
}

diff --git a/drivers/crypto/ccp/sev-dev.c b/drivers/crypto/ccp/sev-dev.c
index 9792c7af3005..598878e760bc 100644
--- a/drivers/crypto/ccp/sev-dev.c
+++ b/drivers/crypto/ccp/sev-dev.c
@@ -21,6 +21,7 @@
#include <linux/hw_random.h>
#include <linux/ccp.h>
#include <linux/firmware.h>
+#include <linux/panic_notifier.h>
#include <linux/gfp.h>
#include <linux/cpufeature.h>
#include <linux/fs.h>
@@ -144,6 +145,26 @@ static int sev_wait_cmd_ioc(struct sev_device *sev,
{
int ret;

+ /*
+ * If invoked during panic handling, local interrupts are disabled,
+ * so the PSP command completion interrupt can't be used. Poll for
+ * PSP command completion instead.
+ */
+ if (irqs_disabled()) {
+ unsigned long timeout_usecs = (timeout * USEC_PER_SEC) / 10;
+
+ /* Poll for SEV command completion: */
+ while (timeout_usecs--) {
+ *reg = ioread32(sev->io_regs + sev->vdata->cmdresp_reg);
+ if (*reg & PSP_CMDRESP_RESP)
+ return 0;
+
+ udelay(10);
+ }
+
+ return -ETIMEDOUT;
+ }
+
ret = wait_event_timeout(sev->int_queue,
sev->int_rcvd, timeout * HZ);
if (!ret)
@@ -1358,17 +1379,6 @@ static int __sev_platform_shutdown_locked(int *error)
return ret;
}

-static int sev_platform_shutdown(int *error)
-{
- int rc;
-
- mutex_lock(&sev_cmd_mutex);
- rc = __sev_platform_shutdown_locked(NULL);
- mutex_unlock(&sev_cmd_mutex);
-
- return rc;
-}
-
static int sev_get_platform_state(int *state, int *error)
{
struct sev_user_data_status data;
@@ -1644,7 +1654,7 @@ static int sev_update_firmware(struct device *dev)
return ret;
}

-static int __sev_snp_shutdown_locked(int *error)
+static int __sev_snp_shutdown_locked(int *error, bool in_panic)
{
struct sev_device *sev = psp_master->sev_data;
struct sev_data_snp_shutdown_ex data;
@@ -1657,7 +1667,16 @@ static int __sev_snp_shutdown_locked(int *error)
data.length = sizeof(data);
data.iommu_snp_shutdown = 1;

- wbinvd_on_all_cpus();
+ /*
+ * If invoked during panic handling, local interrupts are disabled
+ * and all CPUs are stopped, so wbinvd_on_all_cpus() can't be called.
+ * In that case, a wbinvd() is done on remote CPUs via the NMI
+ * callback, so only a local wbinvd() is needed here.
+ */
+ if (!in_panic)
+ wbinvd_on_all_cpus();
+ else
+ wbinvd();

ret = __sev_do_cmd_locked(SEV_CMD_SNP_SHUTDOWN_EX, &data, error);
/* SHUTDOWN may require DF_FLUSH */
@@ -1701,17 +1720,6 @@ static int __sev_snp_shutdown_locked(int *error)
return ret;
}

-static int sev_snp_shutdown(int *error)
-{
- int rc;
-
- mutex_lock(&sev_cmd_mutex);
- rc = __sev_snp_shutdown_locked(error);
- mutex_unlock(&sev_cmd_mutex);
-
- return rc;
-}
-
static int sev_ioctl_do_pek_import(struct sev_issue_cmd *argp, bool writable)
{
struct sev_device *sev = psp_master->sev_data;
@@ -2191,19 +2199,29 @@ int sev_dev_init(struct psp_device *psp)
return ret;
}

-static void sev_firmware_shutdown(struct sev_device *sev)
+static void __sev_firmware_shutdown(struct sev_device *sev, bool in_panic)
{
int error;

- sev_platform_shutdown(NULL);
+ __sev_platform_shutdown_locked(NULL);

if (sev_es_tmr) {
- /* The TMR area was encrypted, flush it from the cache */
- wbinvd_on_all_cpus();
+ /*
+ * The TMR area was encrypted, flush it from the cache
+ *
+ * If invoked during panic handling, local interrupts are
+ * disabled and all CPUs are stopped, so wbinvd_on_all_cpus()
+ * can't be used. In that case, wbinvd() is done on remote CPUs
+ * via the NMI callback, so a local wbinvd() is sufficient here.
+ */
+ if (!in_panic)
+ wbinvd_on_all_cpus();
+ else
+ wbinvd();

__snp_free_firmware_pages(virt_to_page(sev_es_tmr),
get_order(sev_es_tmr_size),
- false);
+ true);
sev_es_tmr = NULL;
}

@@ -2219,7 +2237,14 @@ static void sev_firmware_shutdown(struct sev_device *sev)
snp_range_list = NULL;
}

- sev_snp_shutdown(&error);
+ __sev_snp_shutdown_locked(&error, in_panic);
+}
+
+static void sev_firmware_shutdown(struct sev_device *sev)
+{
+ mutex_lock(&sev_cmd_mutex);
+ __sev_firmware_shutdown(sev, false);
+ mutex_unlock(&sev_cmd_mutex);
}

void sev_dev_destroy(struct psp_device *psp)
@@ -2237,6 +2262,28 @@ void sev_dev_destroy(struct psp_device *psp)
psp_clear_sev_irq_handler(psp);
}

+static int sev_snp_shutdown_on_panic(struct notifier_block *nb,
+ unsigned long reason, void *arg)
+{
+ struct sev_device *sev = psp_master->sev_data;
+
+ /*
+ * Panic callbacks are executed with all other CPUs stopped,
+ * so don't wait for sev_cmd_mutex to be released since it
+ * would block here forever.
+ */
+ if (mutex_is_locked(&sev_cmd_mutex))
+ return NOTIFY_DONE;
+
+ __sev_firmware_shutdown(sev, true);
+
+ return NOTIFY_DONE;
+}
+
+static struct notifier_block sev_snp_panic_notifier = {
+ .notifier_call = sev_snp_shutdown_on_panic,
+};
+
int sev_issue_cmd_external_user(struct file *filep, unsigned int cmd,
void *data, int *error)
{
@@ -2274,6 +2321,8 @@ void sev_pci_init(void)
dev_info(sev->dev, "SEV%s API:%d.%d build:%d\n", sev->snp_initialized ?
"-SNP" : "", sev->api_major, sev->api_minor, sev->build);

+ atomic_notifier_chain_register(&panic_notifier_list,
+ &sev_snp_panic_notifier);
return;

err:
@@ -2288,4 +2337,7 @@ void sev_pci_exit(void)
return;

sev_firmware_shutdown(sev);
+
+ atomic_notifier_chain_unregister(&panic_notifier_list,
+ &sev_snp_panic_notifier);
}
--
2.25.1