[PATCH] PCI/ASPM: fix UAF by removing cached downstream

From: Ding Hui
Date: Sat Apr 29 2023 - 09:33:55 EST


If the function 0 of a multifunction device is removed, an freed
downstream pointer will be left in struct pcie_link_state, and then
when pcie_config_aspm_link() be invoked from any path, we will get a
KASAN use-after-free report.

Reproducer:

[root@host ~]# cat repro.sh
#!/bin/bash
DEV_F0="0000:03:00.0"
echo 1 > /sys/bus/pci/devices/$DEV_F0/remove
echo powersave > /sys/module/pcie_aspm/parameters/policy

Result:

[ 177.433490] ==================================================================
[ 177.433814] BUG: KASAN: slab-use-after-free in pcie_config_aspm_link+0x42d/0x500
[ 177.434114] Read of size 4 at addr ffff8881070c80a0 by task repro.sh/2056

[ 177.434747] CPU: 3 PID: 2056 Comm: repro.sh Not tainted 6.3.0+ #15
[ 177.435062] Hardware name: VMware, Inc. VMware Virtual Platform/440BX Desktop Reference Platform, BIOS 6.00 11/12/2020
[ 177.435379] Call Trace:
[ 177.435741] <TASK>
[ 177.436058] dump_stack_lvl+0x33/0x50
[ 177.436379] print_address_description.constprop.0+0x27/0x310
[ 177.436733] print_report+0x3e/0x70
[ 177.437067] ? pcie_config_aspm_link+0x42d/0x500
[ 177.437398] kasan_report+0xae/0xe0
[ 177.437753] ? pcie_config_aspm_link+0x42d/0x500
[ 177.438115] pcie_config_aspm_link+0x42d/0x500
[ 177.438447] pcie_aspm_set_policy+0x8e/0x1a0
[ 177.438862] param_attr_store+0x162/0x2c0
[ 177.439217] ? __pfx_sysfs_kf_write+0x10/0x10
[ 177.439571] module_attr_store+0x3e/0x80
[ 177.439898] kernfs_fop_write_iter+0x2d5/0x460
[ 177.440218] vfs_write+0x72e/0xae0
[ 177.440552] ? __pfx_vfs_write+0x10/0x10
[ 177.440866] ? __count_memcg_events+0xea/0x1d0
[ 177.441173] ksys_write+0xed/0x1c0
[ 177.441519] ? __pfx_ksys_write+0x10/0x10
[ 177.441874] do_syscall_64+0x38/0x90
[ 177.442202] entry_SYSCALL_64_after_hwframe+0x72/0xdc
[ 177.442547] RIP: 0033:0x7f0829a622c7
[ 177.442922] Code: 0e 00 f7 d8 64 89 02 48 c7 c0 ff ff ff ff eb b7 0f 1f 00 f3 0f 1e fa 64 8b 04 25 18 00 00 00 85 c0 75 10 b8 01 00 00 00 0f 05 <48> 3d 00 f0 ff ff 77 51 c3 48 83 ec 28 48 89 54 24 18 48 89 74 24
[ 177.443721] RSP: 002b:00007ffc6bb2d128 EFLAGS: 00000246 ORIG_RAX: 0000000000000001
[ 177.444151] RAX: ffffffffffffffda RBX: 000000000000000a RCX: 00007f0829a622c7
[ 177.444572] RDX: 000000000000000a RSI: 000055b8e6b168f0 RDI: 0000000000000001
[ 177.445049] RBP: 000055b8e6b168f0 R08: 00007f0829b12000 R09: 00007f0829b12080
[ 177.445490] R10: 00007f0829b11f80 R11: 0000000000000246 R12: 000000000000000a
[ 177.445984] R13: 00007f0829b535a0 R14: 000000000000000a R15: 00007f0829b53780
[ 177.446443] </TASK>

[ 177.447402] Allocated by task 1:
[ 177.447902] kasan_save_stack+0x1e/0x40
[ 177.448444] kasan_set_track+0x21/0x30
[ 177.448946] __kasan_kmalloc+0x7b/0x90
[ 177.449447] pci_alloc_dev+0x44/0x260
[ 177.449950] pci_scan_single_device+0x132/0x280
[ 177.450467] pci_scan_slot+0x193/0x510
[ 177.450986] pci_scan_child_bus_extend+0x61/0x5d0
[ 177.451528] pci_scan_bridge_extend+0xc4c/0x10c0
[ 177.452021] pci_scan_child_bus_extend+0x332/0x5d0
[ 177.452539] acpi_pci_root_create+0x4ec/0x700
[ 177.453015] pci_acpi_scan_root+0x3ad/0x4e0
[ 177.453495] acpi_pci_root_add+0x3f2/0x910
[ 177.453973] acpi_bus_attach+0x38b/0x890
[ 177.454415] device_for_each_child+0xd8/0x150
[ 177.454896] acpi_dev_for_each_child+0x77/0xa0
[ 177.455328] acpi_bus_attach+0x19f/0x890
[ 177.455785] device_for_each_child+0xd8/0x150
[ 177.456189] acpi_dev_for_each_child+0x77/0xa0
[ 177.456626] acpi_bus_attach+0x19f/0x890
[ 177.457077] acpi_bus_scan+0x98/0x160
[ 177.457481] acpi_scan_init+0x1e3/0x3f0
[ 177.457905] acpi_init+0xff/0x2c0
[ 177.458300] do_one_initcall+0x87/0x300
[ 177.458694] do_initcalls+0x127/0x260
[ 177.459113] kernel_init_freeable+0x811/0xc80
[ 177.459517] kernel_init+0x1b/0x1e0
[ 177.459915] ret_from_fork+0x29/0x50

[ 177.460724] Freed by task 2011:
[ 177.461119] kasan_save_stack+0x1e/0x40
[ 177.461556] kasan_set_track+0x21/0x30
[ 177.461939] kasan_save_free_info+0x2a/0x50
[ 177.462320] __kasan_slab_free+0x106/0x190
[ 177.462737] __kmem_cache_free+0x133/0x270
[ 177.463115] device_release+0x98/0x210
[ 177.463492] kobject_cleanup+0x101/0x360
[ 177.463952] pci_stop_and_remove_bus_device_locked+0xdb/0x110
[ 177.464341] remove_store+0xcf/0xe0
[ 177.464746] kernfs_fop_write_iter+0x2d5/0x460
[ 177.465112] vfs_write+0x72e/0xae0
[ 177.465493] ksys_write+0xed/0x1c0
[ 177.465834] do_syscall_64+0x38/0x90
[ 177.466174] entry_SYSCALL_64_after_hwframe+0x72/0xdc

This patch is based on original RFC by Bolarinwa O. Saheed in 2021 [1],
which using pci_function_0() to obtain child instead of cached
downstream pointer in struct pcie_link_state.

In addition, I added the condition about child to avoid dereference
null pointer, in case the device is removed without rescan back.

[1] https://lore.kernel.org/lkml/20211106175503.27178-5-refactormyself@xxxxxxxxx/

--------

The commit log of original patch:

Information on the downstream component is cached in
struct pcie_link_state.downstream it obtained within
alloc_pcie_link_state() by calling pci_function_0()

- remove *downstream* from the struct pcie_link_state.
- replaces references to pcie_link_state.downstream with
a call to pci_function_0(pdev->subordinate).

Originally-by: Bolarinwa O. Saheed <refactormyself@xxxxxxxxx>
Fixes: b5a0a9b59c81 ("PCI/ASPM: Read and set up L1 substate capabilities")
Debugged-by: Zongquan Qin <qinzongquan@xxxxxxxxxxxxxx>
Signed-off-by: Ding Hui <dinghui@xxxxxxxxxxxxxx>
---
drivers/pci/pcie/aspm.c | 31 +++++++++++++++++++++++--------
1 file changed, 23 insertions(+), 8 deletions(-)

diff --git a/drivers/pci/pcie/aspm.c b/drivers/pci/pcie/aspm.c
index 66d7514ca111..bca3f52009fe 100644
--- a/drivers/pci/pcie/aspm.c
+++ b/drivers/pci/pcie/aspm.c
@@ -44,7 +44,6 @@

struct pcie_link_state {
struct pci_dev *pdev; /* Upstream component of the Link */
- struct pci_dev *downstream; /* Downstream component, function 0 */
struct pcie_link_state *root; /* pointer to the root port link */
struct pcie_link_state *parent; /* pointer to the parent Link state */
struct list_head sibling; /* node in link_list */
@@ -474,7 +473,8 @@ static void pci_clear_and_set_dword(struct pci_dev *pdev, int pos,
static void aspm_calc_l1ss_info(struct pcie_link_state *link,
u32 parent_l1ss_cap, u32 child_l1ss_cap)
{
- struct pci_dev *child = link->downstream, *parent = link->pdev;
+ struct pci_dev *parent = link->pdev;
+ struct pci_dev *child = pci_function_0(link->pdev->subordinate);
u32 val1, val2, scale1, scale2;
u32 t_common_mode, t_power_on, l1_2_threshold, scale, value;
u32 ctl1 = 0, ctl2 = 0;
@@ -484,6 +484,9 @@ static void aspm_calc_l1ss_info(struct pcie_link_state *link,
if (!(link->aspm_support & ASPM_STATE_L1_2_MASK))
return;

+ if (!child)
+ return;
+
/* Choose the greater of the two Port Common_Mode_Restore_Times */
val1 = (parent_l1ss_cap & PCI_L1SS_CAP_CM_RESTORE_TIME) >> 8;
val2 = (child_l1ss_cap & PCI_L1SS_CAP_CM_RESTORE_TIME) >> 8;
@@ -565,11 +568,12 @@ static void aspm_calc_l1ss_info(struct pcie_link_state *link,

static void aspm_l1ss_init(struct pcie_link_state *link)
{
- struct pci_dev *child = link->downstream, *parent = link->pdev;
+ struct pci_dev *parent = link->pdev;
+ struct pci_dev *child = pci_function_0(link->pdev->subordinate);
u32 parent_l1ss_cap, child_l1ss_cap;
u32 parent_l1ss_ctl1 = 0, child_l1ss_ctl1 = 0;

- if (!parent->l1ss || !child->l1ss)
+ if (!parent->l1ss || !child || !child->l1ss)
return;

/* Setup L1 substate */
@@ -622,7 +626,8 @@ static void aspm_l1ss_init(struct pcie_link_state *link)

static void pcie_aspm_cap_init(struct pcie_link_state *link, int blacklist)
{
- struct pci_dev *child = link->downstream, *parent = link->pdev;
+ struct pci_dev *parent = link->pdev;
+ struct pci_dev *child = pci_function_0(link->pdev->subordinate);
u32 parent_lnkcap, child_lnkcap;
u16 parent_lnkctl, child_lnkctl;
struct pci_bus *linkbus = parent->subordinate;
@@ -634,6 +639,9 @@ static void pcie_aspm_cap_init(struct pcie_link_state *link, int blacklist)
return;
}

+ if (!child)
+ return;
+
/*
* If ASPM not supported, don't mess with the clocks and link,
* bail out now.
@@ -701,7 +709,11 @@ static void pcie_aspm_cap_init(struct pcie_link_state *link, int blacklist)
static void pcie_config_aspm_l1ss(struct pcie_link_state *link, u32 state)
{
u32 val, enable_req;
- struct pci_dev *child = link->downstream, *parent = link->pdev;
+ struct pci_dev *parent = link->pdev;
+ struct pci_dev *child = pci_function_0(link->pdev->subordinate);
+
+ if (!child)
+ return;

enable_req = (link->aspm_enabled ^ state) & state;

@@ -760,9 +772,13 @@ static void pcie_config_aspm_dev(struct pci_dev *pdev, u32 val)
static void pcie_config_aspm_link(struct pcie_link_state *link, u32 state)
{
u32 upstream = 0, dwstream = 0;
- struct pci_dev *child = link->downstream, *parent = link->pdev;
+ struct pci_dev *parent = link->pdev;
+ struct pci_dev *child = pci_function_0(link->pdev->subordinate);
struct pci_bus *linkbus = parent->subordinate;

+ if (!child)
+ return;
+
/* Enable only the states that were not explicitly disabled */
state &= (link->aspm_capable & ~link->aspm_disable);

@@ -867,7 +883,6 @@ static struct pcie_link_state *alloc_pcie_link_state(struct pci_dev *pdev)

INIT_LIST_HEAD(&link->sibling);
link->pdev = pdev;
- link->downstream = pci_function_0(pdev->subordinate);

/*
* Root Ports and PCI/PCI-X to PCIe Bridges are roots of PCIe
--
2.17.1