Re: [RFC 0/2] x86/PCI: Ignore EFI memmap MMIO entries

From: Rafael J. Wysocki
Date: Tue Feb 15 2022 - 12:21:09 EST


On Tue, Feb 15, 2022 at 5:12 PM Hans de Goede <hdegoede@xxxxxxxxxx> wrote:
>
> Hi All,
>
> On 2/14/22 16:17, Hans de Goede wrote:
> > Hi All,
> >
> > Here is a new attempt at fixing the issue where on some laptops
> > there are EFI memmap MMIO entries covering the entire PCI bridge
> > mem window, causing Linux to be unable to find free space to
> > assign to unassigned BARs.
> >
> > This is marked as RFC atm because I'm waiting for feedback from
> > testers.
>
> Unfortunately the troublesome 0xdfa00000-0xdfa0ffff region on
> the Lenovo X1 carbon gen 2 is marked as MMIO by the EFI memmap,
> so the approach from this series won't work.
>
> Interestingly enough this RFC series does seem to help to fix
> the suspend/resume on this x1c2, since for some reason merely
> splitting the original:
>
> BIOS-e820: [mem 0x00000000dceff000-0x00000000dfa0ffff] reserved
>
> range into:
>
> BIOS-e820: [mem 0x00000000dceff000-0x00000000df9fffff] reserved
> BIOS-e820: [mem 0x00000000dfa00000-0x00000000dfa0ffff] MMIO
>
> causes the PCI resource allocation code to pick slightly
> different resources avoiding the troublesome overlap, see:
> https://bugzilla.redhat.com/show_bug.cgi?id=2029207
> for logs.
>
> But I don't think we should rely in this, since from a
> arch_remove_reservations() pov the troublesome overlap area
> which is now marked as MMIO is fair game for PCI bars with
> the change to allow MMIO areas for PCI bars, so things seem
> to mostly work by sheer luck after this RFC series.
>
> So now I have yet another plan to fix this (see below) I'll get
> that tested and assuming it works post that as a proper patch.
>
> Regards,
>
> Hans
>
>
>
> diff --git a/arch/x86/include/asm/pci_x86.h b/arch/x86/include/asm/pci_x86.h
> index 490411dba438..573e1323f490 100644
> --- a/arch/x86/include/asm/pci_x86.h
> +++ b/arch/x86/include/asm/pci_x86.h
> @@ -64,6 +64,8 @@ void pcibios_scan_specific_bus(int busn);
>
> /* pci-irq.c */
>
> +struct pci_dev;
> +
> struct irq_info {
> u8 bus, devfn; /* Bus, device and function */
> struct {
> @@ -232,3 +234,9 @@ static inline void mmio_config_writel(void __iomem *pos, u32 val)
> # define x86_default_pci_init_irq NULL
> # define x86_default_pci_fixup_irqs NULL
> #endif
> +
> +#if defined CONFIG_PCI && defined CONFIG_ACPI
> +extern bool pci_use_e820;
> +#else
> +#define pci_use_e820 true
> +#endif
> diff --git a/arch/x86/kernel/resource.c b/arch/x86/kernel/resource.c
> index 9b9fb7882c20..e8dc9bc327bd 100644
> --- a/arch/x86/kernel/resource.c
> +++ b/arch/x86/kernel/resource.c
> @@ -1,6 +1,7 @@
> // SPDX-License-Identifier: GPL-2.0
> #include <linux/ioport.h>
> #include <asm/e820/api.h>
> +#include <asm/pci_x86.h>
>
> static void resource_clip(struct resource *res, resource_size_t start,
> resource_size_t end)
> @@ -28,6 +29,9 @@ static void remove_e820_regions(struct resource *avail)
> int i;
> struct e820_entry *entry;
>
> + if (!pci_use_e820)
> + return;
> +
> for (i = 0; i < e820_table->nr_entries; i++) {
> entry = &e820_table->entries[i];
>
> diff --git a/arch/x86/pci/acpi.c b/arch/x86/pci/acpi.c
> index 052f1d78a562..7167934819b3 100644
> --- a/arch/x86/pci/acpi.c
> +++ b/arch/x86/pci/acpi.c
> @@ -1,4 +1,5 @@
> // SPDX-License-Identifier: GPL-2.0
> +#include <linux/efi.h>
> #include <linux/pci.h>
> #include <linux/acpi.h>
> #include <linux/init.h>
> @@ -21,6 +22,7 @@ struct pci_root_info {
>
> static bool pci_use_crs = true;
> static bool pci_ignore_seg;
> +bool pci_use_e820 = true;
>
> static int __init set_use_crs(const struct dmi_system_id *id)
> {
> @@ -291,6 +293,28 @@ static bool resource_is_pcicfg_ioport(struct resource *res)
> res->start == 0xCF8 && res->end == 0xCFF;
> }
>
> +static bool resource_matches_efi_mmio_region(const struct resource *res)

I would call this resource_is_efi_mmio() FWIW.

> +{
> + unsigned long long start, end;
> + efi_memory_desc_t *md;
> +
> + if (!efi_enabled(EFI_MEMMAP))
> + return false;
> +
> + for_each_efi_memory_desc(md) {
> + if (md->type != EFI_MEMORY_MAPPED_IO)
> + continue;
> +
> + start = md->phys_addr;
> + end = start + (md->num_pages << EFI_PAGE_SHIFT) - 1;
> +
> + if (res->start >= start && res->end <= end)
> + return true;
> + }
> +
> + return false;
> +}
> +
> static int pci_acpi_root_prepare_resources(struct acpi_pci_root_info *ci)
> {
> struct acpi_device *device = ci->bridge;
> @@ -300,9 +324,16 @@ static int pci_acpi_root_prepare_resources(struct acpi_pci_root_info *ci)
>
> status = acpi_pci_probe_root_resources(ci);
> if (pci_use_crs) {
> - resource_list_for_each_entry_safe(entry, tmp, &ci->resources)
> + resource_list_for_each_entry_safe(entry, tmp, &ci->resources) {
> if (resource_is_pcicfg_ioport(entry->res))
> resource_list_destroy_entry(entry);
> + if (resource_matches_efi_mmio_region(entry->res)) {

I would add a pci_use_e820 check to this.

> + dev_info(&device->dev,
> + "host bridge window %pR is marked by EFI as MMIO\n",
> + entry->res);
> + pci_use_e820 = false;
> + }
> + }
> return status;
> }

Overall, it looks reasonable to me.