Re: [PATCH v10 14/50] crypto: ccp: Add support to initialize the AMD-SP for SEV-SNP

From: Borislav Petkov
Date: Mon Nov 27 2023 - 05:00:51 EST


On Mon, Oct 16, 2023 at 08:27:43AM -0500, Michael Roth wrote:
> +/*
> + * SEV_DATA_RANGE_LIST:
> + * Array containing range of pages that firmware transitions to HV-fixed
> + * page state.
> + */
> +struct sev_data_range_list *snp_range_list;
> +static int __sev_snp_init_locked(int *error);

Put the function above the caller instead of doing a forward
declaration.

> static inline bool sev_version_greater_or_equal(u8 maj, u8 min)
> {
> struct sev_device *sev = psp_master->sev_data;
> @@ -466,9 +479,9 @@ static inline int __sev_do_init_locked(int *psp_ret)
> return __sev_init_locked(psp_ret);
> }
>
> -static int __sev_platform_init_locked(int *error)
> +static int ___sev_platform_init_locked(int *error, bool probe)
> {
> - int rc = 0, psp_ret = SEV_RET_NO_FW_CALL;
> + int rc, psp_ret = SEV_RET_NO_FW_CALL;
> struct psp_device *psp = psp_master;
> struct sev_device *sev;
>
> @@ -480,6 +493,34 @@ static int __sev_platform_init_locked(int *error)
> if (sev->state == SEV_STATE_INIT)
> return 0;
>
> + /*
> + * Legacy guests cannot be running while SNP_INIT(_EX) is executing,
> + * so perform SEV-SNP initialization at probe time.
> + */
> + rc = __sev_snp_init_locked(error);
> + if (rc && rc != -ENODEV) {
> + /*
> + * Don't abort the probe if SNP INIT failed,
> + * continue to initialize the legacy SEV firmware.
> + */
> + dev_err(sev->dev, "SEV-SNP: failed to INIT rc %d, error %#x\n", rc, *error);
> + }
> +
> + /* Delay SEV/SEV-ES support initialization */
> + if (probe && !psp_init_on_probe)
> + return 0;
> +
> + if (!sev_es_tmr) {
> + /* Obtain the TMR memory area for SEV-ES use */
> + sev_es_tmr = sev_fw_alloc(SEV_ES_TMR_SIZE);
> + if (sev_es_tmr)
> + /* Must flush the cache before giving it to the firmware */
> + clflush_cache_range(sev_es_tmr, SEV_ES_TMR_SIZE);
> + else
> + dev_warn(sev->dev,
> + "SEV: TMR allocation failed, SEV-ES support unavailable\n");
> + }
> +
> if (sev_init_ex_buffer) {
> rc = sev_read_init_ex_file();
> if (rc)
> @@ -522,6 +563,11 @@ static int __sev_platform_init_locked(int *error)
> return 0;
> }
>
> +static int __sev_platform_init_locked(int *error)
> +{
> + return ___sev_platform_init_locked(error, false);
> +}

Uff, this is silly. And it makes the code hard to follow and that meat
of the platform init functionality in the ___-prefixed function a mess.

And the problem is that that "probe" functionality is replicated from
the one place where it is actually needed - sev_pci_init() which calls
that new sev_platform_init_on_probe() function - to everything that
calls __sev_platform_init_locked() for which you've added a wrapper.

What you should do, instead, is split the code around
__sev_snp_init_locked() in a separate function which does only that and
is called something like __sev_platform_init_snp_locked() or so which
does that unconditional work. And then you define:

_sev_platform_init_locked(int *error, bool probe)

note the *one* '_' - i.e., first layer:

_sev_platform_init_locked(int *error, bool probe):
{
__sev_platform_init_snp_locked(error);

if (!probe)
return 0;

if (psp_init_on_probe)
__sev_platform_init_locked(error);

...
}

and you do the probing in that function only so that it doesn't get lost
in the bunch of things __sev_platform_init_locked() does.

And then you call _sev_platform_init_locked() everywhere and no need for
a second sev_platform_init_on_probe().

> +
> int sev_platform_init(int *error)
> {
> int rc;
> @@ -534,6 +580,17 @@ int sev_platform_init(int *error)
> }
> EXPORT_SYMBOL_GPL(sev_platform_init);
>
> +static int sev_platform_init_on_probe(int *error)
> +{
> + int rc;
> +
> + mutex_lock(&sev_cmd_mutex);
> + rc = ___sev_platform_init_locked(error, true);
> + mutex_unlock(&sev_cmd_mutex);
> +
> + return rc;
> +}
> +
> static int __sev_platform_shutdown_locked(int *error)
> {
> struct sev_device *sev = psp_master->sev_data;
> @@ -838,6 +895,191 @@ static int sev_update_firmware(struct device *dev)
> return ret;
> }
>
> +static void snp_set_hsave_pa(void *arg)
> +{
> + wrmsrl(MSR_VM_HSAVE_PA, 0);
> +}
> +
> +static int snp_filter_reserved_mem_regions(struct resource *rs, void *arg)
> +{
> + struct sev_data_range_list *range_list = arg;
> + struct sev_data_range *range = &range_list->ranges[range_list->num_elements];
> + size_t size;
> +
> + if ((range_list->num_elements * sizeof(struct sev_data_range) +
> + sizeof(struct sev_data_range_list)) > PAGE_SIZE)
> + return -E2BIG;

Why? A comment would be helpful like with the rest this patch adds.

> + switch (rs->desc) {
> + case E820_TYPE_RESERVED:
> + case E820_TYPE_PMEM:
> + case E820_TYPE_ACPI:
> + range->base = rs->start & PAGE_MASK;
> + size = (rs->end + 1) - rs->start;
> + range->page_count = size >> PAGE_SHIFT;
> + range_list->num_elements++;
> + break;
> + default:
> + break;
> + }
> +
> + return 0;
> +}
> +
> +static int __sev_snp_init_locked(int *error)
> +{
> + struct psp_device *psp = psp_master;
> + struct sev_data_snp_init_ex data;
> + struct sev_device *sev;
> + int rc = 0;
> +
> + if (!cpu_feature_enabled(X86_FEATURE_SEV_SNP))
> + return -ENODEV;
> +
> + if (!psp || !psp->sev_data)
> + return -ENODEV;

Only caller checks this already.

> + sev = psp->sev_data;
> +
> + if (sev->snp_initialized)

Do we really need this silly boolean or is there a way to query the
platform whether SNP has been initialized?

> + return 0;
> +
> + if (!sev_version_greater_or_equal(SNP_MIN_API_MAJOR, SNP_MIN_API_MINOR)) {
> + dev_dbg(sev->dev, "SEV-SNP support requires firmware version >= %d:%d\n",
> + SNP_MIN_API_MAJOR, SNP_MIN_API_MINOR);
> + return 0;
> + }
> +
> + /*
> + * The SNP_INIT requires the MSR_VM_HSAVE_PA must be set to 0h
> + * across all cores.
> + */
> + on_each_cpu(snp_set_hsave_pa, NULL, 1);
> +
> + /*
> + * Starting in SNP firmware v1.52, the SNP_INIT_EX command takes a list of
> + * system physical address ranges to convert into the HV-fixed page states
> + * during the RMP initialization. For instance, the memory that UEFI
> + * reserves should be included in the range list. This allows system
> + * components that occasionally write to memory (e.g. logging to UEFI
> + * reserved regions) to not fail due to RMP initialization and SNP enablement.
> + */
> + if (sev_version_greater_or_equal(SNP_MIN_API_MAJOR, 52)) {

Is there a generic way to probe SNP_INIT_EX presence in the firmware or
are FW version numbers the only way?

> + /*
> + * Firmware checks that the pages containing the ranges enumerated
> + * in the RANGES structure are either in the Default page state or in the

"default"

> + * firmware page state.
> + */
> + snp_range_list = kzalloc(PAGE_SIZE, GFP_KERNEL);
> + if (!snp_range_list) {
> + dev_err(sev->dev,
> + "SEV: SNP_INIT_EX range list memory allocation failed\n");
> + return -ENOMEM;
> + }
> +
> + /*
> + * Retrieve all reserved memory regions setup by UEFI from the e820 memory map
> + * to be setup as HV-fixed pages.
> + */
> +


^ Superfluous newline.

> + rc = walk_iomem_res_desc(IORES_DESC_NONE, IORESOURCE_MEM, 0, ~0,
> + snp_range_list, snp_filter_reserved_mem_regions);
> + if (rc) {
> + dev_err(sev->dev,
> + "SEV: SNP_INIT_EX walk_iomem_res_desc failed rc = %d\n", rc);
> + return rc;
> + }
> +
> + memset(&data, 0, sizeof(data));
> + data.init_rmp = 1;
> + data.list_paddr_en = 1;
> + data.list_paddr = __psp_pa(snp_range_list);
> +
> + /*
> + * Before invoking SNP_INIT_EX with INIT_RMP=1, make sure that
> + * all dirty cache lines containing the RMP are flushed.
> + *
> + * NOTE: that includes writes via RMPUPDATE instructions, which
> + * are also cacheable writes.
> + */
> + wbinvd_on_all_cpus();
> +
> + rc = __sev_do_cmd_locked(SEV_CMD_SNP_INIT_EX, &data, error);
> + if (rc)
> + return rc;
> + } else {
> + /*
> + * SNP_INIT is equivalent to SNP_INIT_EX with INIT_RMP=1, so
> + * just as with that case, make sure all dirty cache lines
> + * containing the RMP are flushed.
> + */
> + wbinvd_on_all_cpus();
> +
> + rc = __sev_do_cmd_locked(SEV_CMD_SNP_INIT, NULL, error);
> + if (rc)
> + return rc;
> + }

So instead of duplicating the code here at the end of the if-else
branching, you can do:

void *arg = &data;

if () {
...
cmd = SEV_CMD_SNP_INIT_EX;
} else {
cmd = SEV_CMD_SNP_INIT;
arg = NULL;
}

wbinvd_on_all_cpus();
rc = __sev_do_cmd_locked(cmd, arg, error);
if (rc)
return rc;

> + /* Prepare for first SNP guest launch after INIT */
> + wbinvd_on_all_cpus();

Why is that WBINVD needed?

> + rc = __sev_do_cmd_locked(SEV_CMD_SNP_DF_FLUSH, NULL, error);
> + if (rc)
> + return rc;
> +
> + sev->snp_initialized = true;
> + dev_dbg(sev->dev, "SEV-SNP firmware initialized\n");
> +
> + return rc;
> +}
> +
> +static int __sev_snp_shutdown_locked(int *error)
> +{
> + struct sev_device *sev = psp_master->sev_data;
> + struct sev_data_snp_shutdown_ex data;
> + int ret;
> +
> + if (!sev->snp_initialized)
> + return 0;
> +
> + memset(&data, 0, sizeof(data));
> + data.length = sizeof(data);
> + data.iommu_snp_shutdown = 1;
> +
> + wbinvd_on_all_cpus();
> +
> +retry:
> + ret = __sev_do_cmd_locked(SEV_CMD_SNP_SHUTDOWN_EX, &data, error);
> + /* SHUTDOWN may require DF_FLUSH */
> + if (*error == SEV_RET_DFFLUSH_REQUIRED) {
> + ret = __sev_do_cmd_locked(SEV_CMD_SNP_DF_FLUSH, NULL, NULL);
> + if (ret) {
> + dev_err(sev->dev, "SEV-SNP DF_FLUSH failed\n");
> + return ret;

When you return here, sev->snp_initialized is still true but, in
reality, it probably is in some half-broken state after issuing those
commands you it is not really initialized anymore.

> + }
> + goto retry;

This needs an upper limit from which to break out and not potentially
endless-loop.

> + }
> + if (ret) {
> + dev_err(sev->dev, "SEV-SNP firmware shutdown failed\n");
> + return ret;
> + }
> +
> + sev->snp_initialized = false;
> + dev_dbg(sev->dev, "SEV-SNP firmware shutdown\n");
> +
> + return ret;
> +}
> +
> +static int sev_snp_shutdown(int *error)
> +{
> + int rc;
> +
> + mutex_lock(&sev_cmd_mutex);
> + rc = __sev_snp_shutdown_locked(error);

Why is this "locked" version even there if it is called only here?

IOW, put all the logic in here - no need for
__sev_snp_shutdown_locked().

> + mutex_unlock(&sev_cmd_mutex);
> +
> + return rc;
> +}

...

--
Regards/Gruss,
Boris.

https://people.kernel.org/tglx/notes-about-netiquette