Re: [RFC PATCH 3/3] mips: ralink: mt7621: do not use kzalloc too early

From: John Thomson
Date: Thu Nov 03 2022 - 07:16:04 EST


On Thu, 3 Nov 2022, at 05:05, John Thomson wrote:
> Following commit 6edf2576a6cc ("mm/slub: enable debugging memory wasting
> of kmalloc") mt7621 failed to boot very early, without showing any
> console messages.
> This exposed the pre-existing bug of mt7621.c using kzalloc before normal
> memory management was available.
> Prior to this slub change, there existed the unintended protection against
> "kmem_cache *s" being NULL as slab_pre_alloc_hook() happened to
> return NULL and bailed out of slab_alloc_node().
> This allowed mt7621 prom_soc_init to fail in the soc_dev_init kzalloc,
> but continue booting without this soc device.
>
> Console output from a DEBUG_ZBOOT vmlinuz kernel loading,
> with mm/slub modified to warn on kmem_cache zero or null:
>
> zimage at: 80B842A0 810B4BC0
> Uncompressing Linux at load address 80001000
> Copy device tree to address 80B80EE0
> Now, booting the kernel...
>
> [ 0.000000] Linux version 6.1.0-rc3+ (john@john)
> (mipsel-buildroot-linux-gnu-gcc.br_real (Buildroot
> 2021.11-4428-g6b6741b) 12.2.0, GNU ld (GNU Binutils) 2.39) #73 SMP Wed
> Nov 2 05:10:01 AEST 2022
> [ 0.000000] ------------[ cut here ]------------
> [ 0.000000] WARNING: CPU: 0 PID: 0 at mm/slub.c:3416
> kmem_cache_alloc+0x5a4/0x5e8
> [ 0.000000] Modules linked in:
> [ 0.000000] CPU: 0 PID: 0 Comm: swapper Not tainted 6.1.0-rc3+ #73
> [ 0.000000] Stack : 810fff78 80084d98 00000000 00000004 00000000
> 00000000 80889d04 80c90000
> [ 0.000000] 80920000 807bd328 8089d368 80923bd3 00000000
> 00000001 80889cb0 00000000
> [ 0.000000] 00000000 00000000 807bd328 8084bcb1 00000002
> 00000002 00000001 6d6f4320
> [ 0.000000] 00000000 80c97d3d 80c97d68 fffffffc 807bd328
> 00000000 00000000 00000000
> [ 0.000000] 00000000 a0000000 80910000 8110a0b4 00000000
> 00000020 80010000 80010000
> [ 0.000000] ...
> [ 0.000000] Call Trace:
> [ 0.000000] [<80008260>] show_stack+0x28/0xf0
> [ 0.000000] [<8070c958>] dump_stack_lvl+0x60/0x80
> [ 0.000000] [<8002e184>] __warn+0xc4/0xf8
> [ 0.000000] [<8002e210>] warn_slowpath_fmt+0x58/0xa4
> [ 0.000000] [<801c0fac>] kmem_cache_alloc+0x5a4/0x5e8
> [ 0.000000] [<8092856c>] prom_soc_init+0x1fc/0x2b4
> [ 0.000000] [<80928060>] prom_init+0x44/0xf0
> [ 0.000000] [<80929214>] setup_arch+0x4c/0x6a8
> [ 0.000000] [<809257e0>] start_kernel+0x88/0x7c0
> [ 0.000000]
> [ 0.000000] ---[ end trace 0000000000000000 ]---
> [ 0.000000] SoC Type: MediaTek MT7621 ver:1 eco:3
> [ 0.000000] printk: bootconsole [early0] enabled
>
> This early kzalloc was introduced in commit 71b9b5e0130d ("MIPS: ralink:
> mt7621: introduce 'soc_device' initialization")
>
> Link:
> https://lore.kernel.org/linux-mm/becf2ac3-2a90-4f3a-96d9-a70f67c66e4a@xxxxxxxxxxxxxxxx/
> Signed-off-by: John Thomson <git@xxxxxxxxxxxxxxxxxxxxxxxxxxx>
> ---
> arch/mips/ralink/mt7621.c | 14 +++++++++-----
> 1 file changed, 9 insertions(+), 5 deletions(-)
>
> diff --git a/arch/mips/ralink/mt7621.c b/arch/mips/ralink/mt7621.c
> index f2443b833bc3..836965021d5c 100644
> --- a/arch/mips/ralink/mt7621.c
> +++ b/arch/mips/ralink/mt7621.c
> @@ -25,6 +25,7 @@
> #define MT7621_MEM_TEST_PATTERN 0xaa5555aa
>
> static u32 detect_magic __initdata;
> +struct ralink_soc_info *soc_info_ptr;
>
> int pcibios_root_bridge_prepare(struct pci_host_bridge *bridge)
> {
> @@ -147,27 +148,30 @@ static const char __init *mt7621_get_soc_revision(void)
> return "E1";
> }
>
> -static void soc_dev_init(struct ralink_soc_info *soc_info)
> +static int __init mt7621_soc_dev_init(void)
> {
> struct soc_device *soc_dev;
> struct soc_device_attribute *soc_dev_attr;
>
> soc_dev_attr = kzalloc(sizeof(*soc_dev_attr), GFP_KERNEL);
> if (!soc_dev_attr)
> - return;
> + return -ENOMEM;
>
> soc_dev_attr->soc_id = "mt7621";
> soc_dev_attr->family = "Ralink";
> soc_dev_attr->revision = mt7621_get_soc_revision();
>
> - soc_dev_attr->data = soc_info;
> + soc_dev_attr->data = soc_info_ptr;
>
> soc_dev = soc_device_register(soc_dev_attr);
> if (IS_ERR(soc_dev)) {
> kfree(soc_dev_attr);
> - return;
> + return PTR_ERR(soc_dev);
> }
> +
> + return 0;
> }
> +device_initcall(mt7621_soc_dev_init);
>
> void __init prom_soc_init(struct ralink_soc_info *soc_info)
> {
> @@ -209,7 +213,7 @@ void __init prom_soc_init(struct ralink_soc_info *soc_info)
>
> soc_info->mem_detect = mt7621_memory_detect;
>
> - soc_dev_init(soc_info);
> + soc_info_ptr = soc_info;
>
> if (!register_cps_smp_ops())
> return;
> --
> 2.37.2

I backported this to kernel 5.10 as a test
without it, there was no /sys/bus/soc
with it, the drivers/staging/mt7621-pci-phy/pci-mt7621-phy.c driver
panicked in soc_device_match_attr
This was fixed with an added sentinel element in the quirk table:
--- a/drivers/staging/mt7621-pci-phy/pci-mt7621-phy.c
+++ b/drivers/staging/mt7621-pci-phy/pci-mt7621-phy.c
@@ -293,7 +293,8 @@ static struct phy *mt7621_pcie_phy_of_xlate(struct device *d
ev,
}

static const struct soc_device_attribute mt7621_pci_quirks_match[] = {
- { .soc_id = "mt7621", .revision = "E2" }
+ { .soc_id = "mt7621", .revision = "E2" },
+ { /* sentinel */ }
};

static const struct regmap_config mt7621_pci_phy_regmap_config = {

There is the same quirk table to kernel 5.15 in drivers/staging/mt7621-pci/pci-mt7621.c
Should I add commits for these for the stable kernels?

In master, these files are now
drivers/pci/controller/pcie-mt7621.c
drivers/phy/ralink/phy-mt7621-pci.c

Should I add sentinels to the soc_device_attribute quirk tables in all of these files?

Cheers,
--
John Thomson