Re: [PATCH] x86/sgx: fix a NULL pointer

From: Huang, Kai
Date: Mon Jul 17 2023 - 18:42:19 EST


On Mon, 2023-07-17 at 13:29 -0700, Haitao Huang wrote:
> Under heavy load, the SGX EPC reclaimers (current ksgxd or future EPC
> cgroup worker) may reclaim the SECS EPC page for an enclave and set
> encl->secs.epc_page to NULL. 
>

As a bug fix, I don't think you need to mention "future EPC cgroup worker".

> But the SECS EPC page is used for EAUG in
> the SGX #PF handler without checking for NULL and reloading.
>
> Fix this by checking if SECS is loaded before EAUG and load it if it was
^
loading
> reclaimed.
>
> Fixes: 5a90d2c3f5ef8 ("x86/sgx: Support adding of pages to an initialized enclave")
> Cc: stable@xxxxxxxxxxxxxxx
> Signed-off-by: Haitao Huang <haitao.huang@xxxxxxxxxxxxxxx>
> ---
> arch/x86/kernel/cpu/sgx/encl.c | 25 ++++++++++++++++++++-----
> arch/x86/kernel/cpu/sgx/main.c | 4 ++++
> 2 files changed, 24 insertions(+), 5 deletions(-)
>
> diff --git a/arch/x86/kernel/cpu/sgx/encl.c b/arch/x86/kernel/cpu/sgx/encl.c
> index 2a0e90fe2abc..2ab544da1664 100644
> --- a/arch/x86/kernel/cpu/sgx/encl.c
> +++ b/arch/x86/kernel/cpu/sgx/encl.c
> @@ -235,6 +235,16 @@ static struct sgx_epc_page *sgx_encl_eldu(struct sgx_encl_page *encl_page,
> return epc_page;
> }
>
> +static struct sgx_epc_page *sgx_encl_load_secs(struct sgx_encl *encl)
> +{
> + struct sgx_epc_page *epc_page = encl->secs.epc_page;
> +
> + if (!epc_page)
> + epc_page = sgx_encl_eldu(&encl->secs, NULL);
> +
> + return epc_page;
> +}
> +
> static struct sgx_encl_page *__sgx_encl_load_page(struct sgx_encl *encl,
> struct sgx_encl_page *entry)
> {
> @@ -248,11 +258,9 @@ static struct sgx_encl_page *__sgx_encl_load_page(struct sgx_encl *encl,
> return entry;
> }
>
> - if (!(encl->secs.epc_page)) {
> - epc_page = sgx_encl_eldu(&encl->secs, NULL);
> - if (IS_ERR(epc_page))
> - return ERR_CAST(epc_page);
> - }
> + epc_page = sgx_encl_load_secs(encl);
> + if (IS_ERR(epc_page))
> + return ERR_CAST(epc_page);
>
> epc_page = sgx_encl_eldu(entry, encl->secs.epc_page);
> if (IS_ERR(epc_page))
> @@ -339,6 +347,13 @@ static vm_fault_t sgx_encl_eaug_page(struct vm_area_struct *vma,
>
> mutex_lock(&encl->lock);
>
> + epc_page = sgx_encl_load_secs(encl);
> + if (IS_ERR(epc_page)) {
> + if (PTR_ERR(epc_page) == -EBUSY)
> + vmret = VM_FAULT_NOPAGE;
> + goto err_out_unlock;
> + }
> +
> epc_page = sgx_alloc_epc_page(encl_page, false);
> if (IS_ERR(epc_page)) {
> if (PTR_ERR(epc_page) == -EBUSY)
> diff --git a/arch/x86/kernel/cpu/sgx/main.c b/arch/x86/kernel/cpu/sgx/main.c
> index 166692f2d501..4662a364ce62 100644
> --- a/arch/x86/kernel/cpu/sgx/main.c
> +++ b/arch/x86/kernel/cpu/sgx/main.c
> @@ -257,6 +257,10 @@ static void sgx_reclaimer_write(struct sgx_epc_page *epc_page,
>
> mutex_lock(&encl->lock);
>
> + /* Should not be possible */
> + if (WARN_ON(!(encl->secs.epc_page)))
> + goto out;
> +

This shouldn't be a mandatory part of this fix, no?

If there's good reason to do, then probably you should describe the reason in
the changelog.


> sgx_encl_ewb(epc_page, backing);
> encl_page->epc_page = NULL;
> encl->secs_child_cnt--;