Re: Linux 5.4.129

From: Sasha Levin
Date: Wed Jun 30 2021 - 09:40:52 EST


diff --git a/Makefile b/Makefile
index 5db87d8031f1..802520ad08cc 100644
--- a/Makefile
+++ b/Makefile
@@ -1,7 +1,7 @@
# SPDX-License-Identifier: GPL-2.0
VERSION = 5
PATCHLEVEL = 4
-SUBLEVEL = 128
+SUBLEVEL = 129
EXTRAVERSION =
NAME = Kleptomaniac Octopus

diff --git a/arch/arm/kernel/setup.c b/arch/arm/kernel/setup.c
index 924285d0bccd..43d6a6085d86 100644
--- a/arch/arm/kernel/setup.c
+++ b/arch/arm/kernel/setup.c
@@ -544,9 +544,11 @@ void notrace cpu_init(void)
* In Thumb-2, msr with an immediate value is not allowed.
*/
#ifdef CONFIG_THUMB2_KERNEL
-#define PLC "r"
+#define PLC_l "l"
+#define PLC_r "r"
#else
-#define PLC "I"
+#define PLC_l "I"
+#define PLC_r "I"
#endif

/*
@@ -568,15 +570,15 @@ void notrace cpu_init(void)
"msr cpsr_c, %9"
:
: "r" (stk),
- PLC (PSR_F_BIT | PSR_I_BIT | IRQ_MODE),
+ PLC_r (PSR_F_BIT | PSR_I_BIT | IRQ_MODE),
"I" (offsetof(struct stack, irq[0])),
- PLC (PSR_F_BIT | PSR_I_BIT | ABT_MODE),
+ PLC_r (PSR_F_BIT | PSR_I_BIT | ABT_MODE),
"I" (offsetof(struct stack, abt[0])),
- PLC (PSR_F_BIT | PSR_I_BIT | UND_MODE),
+ PLC_r (PSR_F_BIT | PSR_I_BIT | UND_MODE),
"I" (offsetof(struct stack, und[0])),
- PLC (PSR_F_BIT | PSR_I_BIT | FIQ_MODE),
+ PLC_r (PSR_F_BIT | PSR_I_BIT | FIQ_MODE),
"I" (offsetof(struct stack, fiq[0])),
- PLC (PSR_F_BIT | PSR_I_BIT | SVC_MODE)
+ PLC_l (PSR_F_BIT | PSR_I_BIT | SVC_MODE)
: "r14");
#endif
}
diff --git a/arch/arm64/Makefile b/arch/arm64/Makefile
index cd8f3cdabfd0..d227cf87c48f 100644
--- a/arch/arm64/Makefile
+++ b/arch/arm64/Makefile
@@ -10,7 +10,7 @@
#
# Copyright (C) 1995-2001 by Russell King

-LDFLAGS_vmlinux :=--no-undefined -X -z norelro
+LDFLAGS_vmlinux :=--no-undefined -X
CPPFLAGS_vmlinux.lds = -DTEXT_OFFSET=$(TEXT_OFFSET)
GZFLAGS :=-9

@@ -82,17 +82,21 @@ CHECKFLAGS += -D__AARCH64EB__
AS += -EB
# Prefer the baremetal ELF build target, but not all toolchains include
# it so fall back to the standard linux version if needed.
-KBUILD_LDFLAGS += -EB $(call ld-option, -maarch64elfb, -maarch64linuxb)
+KBUILD_LDFLAGS += -EB $(call ld-option, -maarch64elfb, -maarch64linuxb -z norelro)
UTS_MACHINE := aarch64_be
else
KBUILD_CPPFLAGS += -mlittle-endian
CHECKFLAGS += -D__AARCH64EL__
AS += -EL
# Same as above, prefer ELF but fall back to linux target if needed.
-KBUILD_LDFLAGS += -EL $(call ld-option, -maarch64elf, -maarch64linux)
+KBUILD_LDFLAGS += -EL $(call ld-option, -maarch64elf, -maarch64linux -z norelro)
UTS_MACHINE := aarch64
endif

+ifeq ($(CONFIG_LD_IS_LLD), y)
+KBUILD_LDFLAGS += -z norelro
+endif
+
CHECKFLAGS += -D__aarch64__

ifeq ($(CONFIG_ARM64_MODULE_PLTS),y)
diff --git a/arch/mips/generic/board-boston.its.S b/arch/mips/generic/board-boston.its.S
index a7f51f97b910..c45ad2759421 100644
--- a/arch/mips/generic/board-boston.its.S
+++ b/arch/mips/generic/board-boston.its.S
@@ -1,22 +1,22 @@
/ {
images {
- fdt@boston {
+ fdt-boston {
description = "img,boston Device Tree";
data = /incbin/("boot/dts/img/boston.dtb");
type = "flat_dt";
arch = "mips";
compression = "none";
- hash@0 {
+ hash {
algo = "sha1";
};
};
};

configurations {
- conf@boston {
+ conf-boston {
description = "Boston Linux kernel";
- kernel = "kernel@0";
- fdt = "fdt@boston";
+ kernel = "kernel";
+ fdt = "fdt-boston";
};
};
};
diff --git a/arch/mips/generic/board-ni169445.its.S b/arch/mips/generic/board-ni169445.its.S
index e4cb4f95a8cc..0a2e8f7a8526 100644
--- a/arch/mips/generic/board-ni169445.its.S
+++ b/arch/mips/generic/board-ni169445.its.S
@@ -1,22 +1,22 @@
/ {
images {
- fdt@ni169445 {
+ fdt-ni169445 {
description = "NI 169445 device tree";
data = /incbin/("boot/dts/ni/169445.dtb");
type = "flat_dt";
arch = "mips";
compression = "none";
- hash@0 {
+ hash {
algo = "sha1";
};
};
};

configurations {
- conf@ni169445 {
+ conf-ni169445 {
description = "NI 169445 Linux Kernel";
- kernel = "kernel@0";
- fdt = "fdt@ni169445";
+ kernel = "kernel";
+ fdt = "fdt-ni169445";
};
};
};
diff --git a/arch/mips/generic/board-ocelot.its.S b/arch/mips/generic/board-ocelot.its.S
index 3da23988149a..8c7e3a1b68d3 100644
--- a/arch/mips/generic/board-ocelot.its.S
+++ b/arch/mips/generic/board-ocelot.its.S
@@ -1,40 +1,40 @@
/* SPDX-License-Identifier: (GPL-2.0 OR MIT) */
/ {
images {
- fdt@ocelot_pcb123 {
+ fdt-ocelot_pcb123 {
description = "MSCC Ocelot PCB123 Device Tree";
data = /incbin/("boot/dts/mscc/ocelot_pcb123.dtb");
type = "flat_dt";
arch = "mips";
compression = "none";
- hash@0 {
+ hash {
algo = "sha1";
};
};

- fdt@ocelot_pcb120 {
+ fdt-ocelot_pcb120 {
description = "MSCC Ocelot PCB120 Device Tree";
data = /incbin/("boot/dts/mscc/ocelot_pcb120.dtb");
type = "flat_dt";
arch = "mips";
compression = "none";
- hash@0 {
+ hash {
algo = "sha1";
};
};
};

configurations {
- conf@ocelot_pcb123 {
+ conf-ocelot_pcb123 {
description = "Ocelot Linux kernel";
- kernel = "kernel@0";
- fdt = "fdt@ocelot_pcb123";
+ kernel = "kernel";
+ fdt = "fdt-ocelot_pcb123";
};

- conf@ocelot_pcb120 {
+ conf-ocelot_pcb120 {
description = "Ocelot Linux kernel";
- kernel = "kernel@0";
- fdt = "fdt@ocelot_pcb120";
+ kernel = "kernel";
+ fdt = "fdt-ocelot_pcb120";
};
};
};
diff --git a/arch/mips/generic/board-xilfpga.its.S b/arch/mips/generic/board-xilfpga.its.S
index a2e773d3f14f..08c1e900eb4e 100644
--- a/arch/mips/generic/board-xilfpga.its.S
+++ b/arch/mips/generic/board-xilfpga.its.S
@@ -1,22 +1,22 @@
/ {
images {
- fdt@xilfpga {
+ fdt-xilfpga {
description = "MIPSfpga (xilfpga) Device Tree";
data = /incbin/("boot/dts/xilfpga/nexys4ddr.dtb");
type = "flat_dt";
arch = "mips";
compression = "none";
- hash@0 {
+ hash {
algo = "sha1";
};
};
};

configurations {
- conf@xilfpga {
+ conf-xilfpga {
description = "MIPSfpga Linux kernel";
- kernel = "kernel@0";
- fdt = "fdt@xilfpga";
+ kernel = "kernel";
+ fdt = "fdt-xilfpga";
};
};
};
diff --git a/arch/mips/generic/vmlinux.its.S b/arch/mips/generic/vmlinux.its.S
index 1a08438fd893..3e254676540f 100644
--- a/arch/mips/generic/vmlinux.its.S
+++ b/arch/mips/generic/vmlinux.its.S
@@ -6,7 +6,7 @@
#address-cells = <ADDR_CELLS>;

images {
- kernel@0 {
+ kernel {
description = KERNEL_NAME;
data = /incbin/(VMLINUX_BINARY);
type = "kernel";
@@ -15,18 +15,18 @@
compression = VMLINUX_COMPRESSION;
load = /bits/ ADDR_BITS <VMLINUX_LOAD_ADDRESS>;
entry = /bits/ ADDR_BITS <VMLINUX_ENTRY_ADDRESS>;
- hash@0 {
+ hash {
algo = "sha1";
};
};
};

configurations {
- default = "conf@default";
+ default = "conf-default";

- conf@default {
+ conf-default {
description = "Generic Linux kernel";
- kernel = "kernel@0";
+ kernel = "kernel";
};
};
};
diff --git a/arch/x86/pci/fixup.c b/arch/x86/pci/fixup.c
index 0c67a5a94de3..76959a7d88c8 100644
--- a/arch/x86/pci/fixup.c
+++ b/arch/x86/pci/fixup.c
@@ -779,4 +779,48 @@ DECLARE_PCI_FIXUP_RESUME(PCI_VENDOR_ID_AMD, 0x1571, pci_amd_enable_64bit_bar);
DECLARE_PCI_FIXUP_RESUME(PCI_VENDOR_ID_AMD, 0x15b1, pci_amd_enable_64bit_bar);
DECLARE_PCI_FIXUP_RESUME(PCI_VENDOR_ID_AMD, 0x1601, pci_amd_enable_64bit_bar);

+#define RS690_LOWER_TOP_OF_DRAM2 0x30
+#define RS690_LOWER_TOP_OF_DRAM2_VALID 0x1
+#define RS690_UPPER_TOP_OF_DRAM2 0x31
+#define RS690_HTIU_NB_INDEX 0xA8
+#define RS690_HTIU_NB_INDEX_WR_ENABLE 0x100
+#define RS690_HTIU_NB_DATA 0xAC
+
+/*
+ * Some BIOS implementations support RAM above 4GB, but do not configure the
+ * PCI host to respond to bus master accesses for these addresses. These
+ * implementations set the TOP_OF_DRAM_SLOT1 register correctly, so PCI DMA
+ * works as expected for addresses below 4GB.
+ *
+ * Reference: "AMD RS690 ASIC Family Register Reference Guide" (pg. 2-57)
+ * https://www.amd.com/system/files/TechDocs/43372_rs690_rrg_3.00o.pdf
+ */
+static void rs690_fix_64bit_dma(struct pci_dev *pdev)
+{
+ u32 val = 0;
+ phys_addr_t top_of_dram = __pa(high_memory - 1) + 1;
+
+ if (top_of_dram <= (1ULL << 32))
+ return;
+
+ pci_write_config_dword(pdev, RS690_HTIU_NB_INDEX,
+ RS690_LOWER_TOP_OF_DRAM2);
+ pci_read_config_dword(pdev, RS690_HTIU_NB_DATA, &val);
+
+ if (val)
+ return;
+
+ pci_info(pdev, "Adjusting top of DRAM to %pa for 64-bit DMA support\n", &top_of_dram);
+
+ pci_write_config_dword(pdev, RS690_HTIU_NB_INDEX,
+ RS690_UPPER_TOP_OF_DRAM2 | RS690_HTIU_NB_INDEX_WR_ENABLE);
+ pci_write_config_dword(pdev, RS690_HTIU_NB_DATA, top_of_dram >> 32);
+
+ pci_write_config_dword(pdev, RS690_HTIU_NB_INDEX,
+ RS690_LOWER_TOP_OF_DRAM2 | RS690_HTIU_NB_INDEX_WR_ENABLE);
+ pci_write_config_dword(pdev, RS690_HTIU_NB_DATA,
+ top_of_dram | RS690_LOWER_TOP_OF_DRAM2_VALID);
+}
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_ATI, 0x7910, rs690_fix_64bit_dma);
+
#endif
diff --git a/certs/Kconfig b/certs/Kconfig
index c94e93d8bccf..76e469b56a77 100644
--- a/certs/Kconfig
+++ b/certs/Kconfig
@@ -83,4 +83,13 @@ config SYSTEM_BLACKLIST_HASH_LIST
wrapper to incorporate the list into the kernel. Each <hash> should
be a string of hex digits.

+config SYSTEM_REVOCATION_LIST
+ bool "Provide system-wide ring of revocation certificates"
+ depends on SYSTEM_BLACKLIST_KEYRING
+ depends on PKCS7_MESSAGE_PARSER=y
+ help
+ If set, this allows revocation certificates to be stored in the
+ blacklist keyring and implements a hook whereby a PKCS#7 message can
+ be checked to see if it matches such a certificate.
+
endmenu
diff --git a/certs/Makefile b/certs/Makefile
index f4c25b67aad9..f4b90bad8690 100644
--- a/certs/Makefile
+++ b/certs/Makefile
@@ -3,7 +3,7 @@
# Makefile for the linux kernel signature checking certificates.
#

-obj-$(CONFIG_SYSTEM_TRUSTED_KEYRING) += system_keyring.o system_certificates.o
+obj-$(CONFIG_SYSTEM_TRUSTED_KEYRING) += system_keyring.o system_certificates.o common.o
obj-$(CONFIG_SYSTEM_BLACKLIST_KEYRING) += blacklist.o
ifneq ($(CONFIG_SYSTEM_BLACKLIST_HASH_LIST),"")
obj-$(CONFIG_SYSTEM_BLACKLIST_KEYRING) += blacklist_hashes.o
diff --git a/certs/blacklist.c b/certs/blacklist.c
index 025a41de28fd..59b2f106b294 100644
--- a/certs/blacklist.c
+++ b/certs/blacklist.c
@@ -135,6 +135,58 @@ int is_hash_blacklisted(const u8 *hash, size_t hash_len, const char *type)
}
EXPORT_SYMBOL_GPL(is_hash_blacklisted);

+int is_binary_blacklisted(const u8 *hash, size_t hash_len)
+{
+ if (is_hash_blacklisted(hash, hash_len, "bin") == -EKEYREJECTED)
+ return -EPERM;
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(is_binary_blacklisted);
+
+#ifdef CONFIG_SYSTEM_REVOCATION_LIST
+/**
+ * add_key_to_revocation_list - Add a revocation certificate to the blacklist
+ * @data: The data blob containing the certificate
+ * @size: The size of data blob
+ */
+int add_key_to_revocation_list(const char *data, size_t size)
+{
+ key_ref_t key;
+
+ key = key_create_or_update(make_key_ref(blacklist_keyring, true),
+ "asymmetric",
+ NULL,
+ data,
+ size,
+ ((KEY_POS_ALL & ~KEY_POS_SETATTR) | KEY_USR_VIEW),
+ KEY_ALLOC_NOT_IN_QUOTA | KEY_ALLOC_BUILT_IN);
+
+ if (IS_ERR(key)) {
+ pr_err("Problem with revocation key (%ld)\n", PTR_ERR(key));
+ return PTR_ERR(key);
+ }
+
+ return 0;
+}
+
+/**
+ * is_key_on_revocation_list - Determine if the key for a PKCS#7 message is revoked
+ * @pkcs7: The PKCS#7 message to check
+ */
+int is_key_on_revocation_list(struct pkcs7_message *pkcs7)
+{
+ int ret;
+
+ ret = pkcs7_validate_trust(pkcs7, blacklist_keyring);
+
+ if (ret == 0)
+ return -EKEYREJECTED;
+
+ return -ENOKEY;
+}
+#endif
+
/*
* Initialise the blacklist
*/
diff --git a/certs/blacklist.h b/certs/blacklist.h
index 1efd6fa0dc60..51b320cf8574 100644
--- a/certs/blacklist.h
+++ b/certs/blacklist.h
@@ -1,3 +1,5 @@
#include <linux/kernel.h>
+#include <linux/errno.h>
+#include <crypto/pkcs7.h>

extern const char __initconst *const blacklist_hashes[];
diff --git a/certs/common.c b/certs/common.c
new file mode 100644
index 000000000000..16a220887a53
--- /dev/null
+++ b/certs/common.c
@@ -0,0 +1,57 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+
+#include <linux/kernel.h>
+#include <linux/key.h>
+#include "common.h"
+
+int load_certificate_list(const u8 cert_list[],
+ const unsigned long list_size,
+ const struct key *keyring)
+{
+ key_ref_t key;
+ const u8 *p, *end;
+ size_t plen;
+
+ p = cert_list;
+ end = p + list_size;
+ while (p < end) {
+ /* Each cert begins with an ASN.1 SEQUENCE tag and must be more
+ * than 256 bytes in size.
+ */
+ if (end - p < 4)
+ goto dodgy_cert;
+ if (p[0] != 0x30 &&
+ p[1] != 0x82)
+ goto dodgy_cert;
+ plen = (p[2] << 8) | p[3];
+ plen += 4;
+ if (plen > end - p)
+ goto dodgy_cert;
+
+ key = key_create_or_update(make_key_ref(keyring, 1),
+ "asymmetric",
+ NULL,
+ p,
+ plen,
+ ((KEY_POS_ALL & ~KEY_POS_SETATTR) |
+ KEY_USR_VIEW | KEY_USR_READ),
+ KEY_ALLOC_NOT_IN_QUOTA |
+ KEY_ALLOC_BUILT_IN |
+ KEY_ALLOC_BYPASS_RESTRICTION);
+ if (IS_ERR(key)) {
+ pr_err("Problem loading in-kernel X.509 certificate (%ld)\n",
+ PTR_ERR(key));
+ } else {
+ pr_notice("Loaded X.509 cert '%s'\n",
+ key_ref_to_ptr(key)->description);
+ key_ref_put(key);
+ }
+ p += plen;
+ }
+
+ return 0;
+
+dodgy_cert:
+ pr_err("Problem parsing in-kernel X.509 certificate list\n");
+ return 0;
+}
diff --git a/certs/common.h b/certs/common.h
new file mode 100644
index 000000000000..abdb5795936b
--- /dev/null
+++ b/certs/common.h
@@ -0,0 +1,9 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+
+#ifndef _CERT_COMMON_H
+#define _CERT_COMMON_H
+
+int load_certificate_list(const u8 cert_list[], const unsigned long list_size,
+ const struct key *keyring);
+
+#endif
diff --git a/certs/system_keyring.c b/certs/system_keyring.c
index 798291177186..a44a8915c94c 100644
--- a/certs/system_keyring.c
+++ b/certs/system_keyring.c
@@ -15,6 +15,7 @@
#include <keys/asymmetric-type.h>
#include <keys/system_keyring.h>
#include <crypto/pkcs7.h>
+#include "common.h"

static struct key *builtin_trusted_keys;
#ifdef CONFIG_SECONDARY_TRUSTED_KEYRING
@@ -136,54 +137,10 @@ device_initcall(system_trusted_keyring_init);
*/
static __init int load_system_certificate_list(void)
{
- key_ref_t key;
- const u8 *p, *end;
- size_t plen;
-
pr_notice("Loading compiled-in X.509 certificates\n");

- p = system_certificate_list;
- end = p + system_certificate_list_size;
- while (p < end) {
- /* Each cert begins with an ASN.1 SEQUENCE tag and must be more
- * than 256 bytes in size.
- */
- if (end - p < 4)
- goto dodgy_cert;
- if (p[0] != 0x30 &&
- p[1] != 0x82)
- goto dodgy_cert;
- plen = (p[2] << 8) | p[3];
- plen += 4;
- if (plen > end - p)
- goto dodgy_cert;
-
- key = key_create_or_update(make_key_ref(builtin_trusted_keys, 1),
- "asymmetric",
- NULL,
- p,
- plen,
- ((KEY_POS_ALL & ~KEY_POS_SETATTR) |
- KEY_USR_VIEW | KEY_USR_READ),
- KEY_ALLOC_NOT_IN_QUOTA |
- KEY_ALLOC_BUILT_IN |
- KEY_ALLOC_BYPASS_RESTRICTION);
- if (IS_ERR(key)) {
- pr_err("Problem loading in-kernel X.509 certificate (%ld)\n",
- PTR_ERR(key));
- } else {
- pr_notice("Loaded X.509 cert '%s'\n",
- key_ref_to_ptr(key)->description);
- key_ref_put(key);
- }
- p += plen;
- }
-
- return 0;
-
-dodgy_cert:
- pr_err("Problem parsing in-kernel X.509 certificate list\n");
- return 0;
+ return load_certificate_list(system_certificate_list, system_certificate_list_size,
+ builtin_trusted_keys);
}
late_initcall(load_system_certificate_list);

@@ -241,6 +198,12 @@ int verify_pkcs7_message_sig(const void *data, size_t len,
pr_devel("PKCS#7 platform keyring is not available\n");
goto error;
}
+
+ ret = is_key_on_revocation_list(pkcs7);
+ if (ret != -ENOKEY) {
+ pr_devel("PKCS#7 platform key is on revocation list\n");
+ goto error;
+ }
}
ret = pkcs7_validate_trust(pkcs7, trusted_keys);
if (ret < 0) {
diff --git a/drivers/dma/mediatek/mtk-uart-apdma.c b/drivers/dma/mediatek/mtk-uart-apdma.c
index f40051d6aecb..9c0ea13ca788 100644
--- a/drivers/dma/mediatek/mtk-uart-apdma.c
+++ b/drivers/dma/mediatek/mtk-uart-apdma.c
@@ -131,10 +131,7 @@ static unsigned int mtk_uart_apdma_read(struct mtk_chan *c, unsigned int reg)

static void mtk_uart_apdma_desc_free(struct virt_dma_desc *vd)
{
- struct dma_chan *chan = vd->tx.chan;
- struct mtk_chan *c = to_mtk_uart_apdma_chan(chan);
-
- kfree(c->desc);
+ kfree(container_of(vd, struct mtk_uart_apdma_desc, vd));
}

static void mtk_uart_apdma_start_tx(struct mtk_chan *c)
@@ -207,14 +204,9 @@ static void mtk_uart_apdma_start_rx(struct mtk_chan *c)

static void mtk_uart_apdma_tx_handler(struct mtk_chan *c)
{
- struct mtk_uart_apdma_desc *d = c->desc;
-
mtk_uart_apdma_write(c, VFF_INT_FLAG, VFF_TX_INT_CLR_B);
mtk_uart_apdma_write(c, VFF_INT_EN, VFF_INT_EN_CLR_B);
mtk_uart_apdma_write(c, VFF_EN, VFF_EN_CLR_B);
-
- list_del(&d->vd.node);
- vchan_cookie_complete(&d->vd);
}

static void mtk_uart_apdma_rx_handler(struct mtk_chan *c)
@@ -245,9 +237,17 @@ static void mtk_uart_apdma_rx_handler(struct mtk_chan *c)

c->rx_status = d->avail_len - cnt;
mtk_uart_apdma_write(c, VFF_RPT, wg);
+}

- list_del(&d->vd.node);
- vchan_cookie_complete(&d->vd);
+static void mtk_uart_apdma_chan_complete_handler(struct mtk_chan *c)
+{
+ struct mtk_uart_apdma_desc *d = c->desc;
+
+ if (d) {
+ list_del(&d->vd.node);
+ vchan_cookie_complete(&d->vd);
+ c->desc = NULL;
+ }
}

static irqreturn_t mtk_uart_apdma_irq_handler(int irq, void *dev_id)
@@ -261,6 +261,7 @@ static irqreturn_t mtk_uart_apdma_irq_handler(int irq, void *dev_id)
mtk_uart_apdma_rx_handler(c);
else if (c->dir == DMA_MEM_TO_DEV)
mtk_uart_apdma_tx_handler(c);
+ mtk_uart_apdma_chan_complete_handler(c);
spin_unlock_irqrestore(&c->vc.lock, flags);

return IRQ_HANDLED;
@@ -348,7 +349,7 @@ static struct dma_async_tx_descriptor *mtk_uart_apdma_prep_slave_sg
return NULL;

/* Now allocate and setup the descriptor */
- d = kzalloc(sizeof(*d), GFP_ATOMIC);
+ d = kzalloc(sizeof(*d), GFP_NOWAIT);
if (!d)
return NULL;

@@ -366,7 +367,7 @@ static void mtk_uart_apdma_issue_pending(struct dma_chan *chan)
unsigned long flags;

spin_lock_irqsave(&c->vc.lock, flags);
- if (vchan_issue_pending(&c->vc)) {
+ if (vchan_issue_pending(&c->vc) && !c->desc) {
vd = vchan_next_desc(&c->vc);
c->desc = to_mtk_uart_apdma_desc(&vd->tx);

diff --git a/drivers/dma/sh/rcar-dmac.c b/drivers/dma/sh/rcar-dmac.c
index 3993ab65c62c..89eb9ea25814 100644
--- a/drivers/dma/sh/rcar-dmac.c
+++ b/drivers/dma/sh/rcar-dmac.c
@@ -1855,7 +1855,7 @@ static int rcar_dmac_probe(struct platform_device *pdev)

/* Enable runtime PM and initialize the device. */
pm_runtime_enable(&pdev->dev);
- ret = pm_runtime_get_sync(&pdev->dev);
+ ret = pm_runtime_resume_and_get(&pdev->dev);
if (ret < 0) {
dev_err(&pdev->dev, "runtime PM get sync failed (%d)\n", ret);
return ret;
diff --git a/drivers/dma/xilinx/zynqmp_dma.c b/drivers/dma/xilinx/zynqmp_dma.c
index d47749a35863..84009c5e0f33 100644
--- a/drivers/dma/xilinx/zynqmp_dma.c
+++ b/drivers/dma/xilinx/zynqmp_dma.c
@@ -467,7 +467,7 @@ static int zynqmp_dma_alloc_chan_resources(struct dma_chan *dchan)
struct zynqmp_dma_desc_sw *desc;
int i, ret;

- ret = pm_runtime_get_sync(chan->dev);
+ ret = pm_runtime_resume_and_get(chan->dev);
if (ret < 0)
return ret;

diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
index 9964ec0035ed..1d8739a4fbca 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
@@ -3416,12 +3416,8 @@ static int gfx_v10_0_kiq_init_register(struct amdgpu_ring *ring)
if (ring->use_doorbell) {
WREG32_SOC15(GC, 0, mmCP_MEC_DOORBELL_RANGE_LOWER,
(adev->doorbell_index.kiq * 2) << 2);
- /* If GC has entered CGPG, ringing doorbell > first page doesn't
- * wakeup GC. Enlarge CP_MEC_DOORBELL_RANGE_UPPER to workaround
- * this issue.
- */
WREG32_SOC15(GC, 0, mmCP_MEC_DOORBELL_RANGE_UPPER,
- (adev->doorbell.size - 4));
+ (adev->doorbell_index.userqueue_end * 2) << 2);
}

WREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL,
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
index 354da41f52de..06cdc22b5501 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
@@ -3593,12 +3593,8 @@ static int gfx_v9_0_kiq_init_register(struct amdgpu_ring *ring)
if (ring->use_doorbell) {
WREG32_SOC15(GC, 0, mmCP_MEC_DOORBELL_RANGE_LOWER,
(adev->doorbell_index.kiq * 2) << 2);
- /* If GC has entered CGPG, ringing doorbell > first page doesn't
- * wakeup GC. Enlarge CP_MEC_DOORBELL_RANGE_UPPER to workaround
- * this issue.
- */
WREG32_SOC15(GC, 0, mmCP_MEC_DOORBELL_RANGE_UPPER,
- (adev->doorbell.size - 4));
+ (adev->doorbell_index.userqueue_end * 2) << 2);
}

WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL,
diff --git a/drivers/gpu/drm/nouveau/nouveau_prime.c b/drivers/gpu/drm/nouveau/nouveau_prime.c
index bae6a3eccee0..f9ee562f72d3 100644
--- a/drivers/gpu/drm/nouveau/nouveau_prime.c
+++ b/drivers/gpu/drm/nouveau/nouveau_prime.c
@@ -112,7 +112,22 @@ int nouveau_gem_prime_pin(struct drm_gem_object *obj)
if (ret)
return -EINVAL;

- return 0;
+ ret = ttm_bo_reserve(&nvbo->bo, false, false, NULL);
+ if (ret)
+ goto error;
+
+ if (nvbo->bo.moving)
+ ret = dma_fence_wait(nvbo->bo.moving, true);
+
+ ttm_bo_unreserve(&nvbo->bo);
+ if (ret)
+ goto error;
+
+ return ret;
+
+error:
+ nouveau_bo_unpin(nvbo);
+ return ret;
}

void nouveau_gem_prime_unpin(struct drm_gem_object *obj)
diff --git a/drivers/gpu/drm/radeon/radeon_prime.c b/drivers/gpu/drm/radeon/radeon_prime.c
index b906e8fbd5f3..7bc33a80934c 100644
--- a/drivers/gpu/drm/radeon/radeon_prime.c
+++ b/drivers/gpu/drm/radeon/radeon_prime.c
@@ -94,9 +94,19 @@ int radeon_gem_prime_pin(struct drm_gem_object *obj)

/* pin buffer into GTT */
ret = radeon_bo_pin(bo, RADEON_GEM_DOMAIN_GTT, NULL);
- if (likely(ret == 0))
- bo->prime_shared_count++;
-
+ if (unlikely(ret))
+ goto error;
+
+ if (bo->tbo.moving) {
+ ret = dma_fence_wait(bo->tbo.moving, false);
+ if (unlikely(ret)) {
+ radeon_bo_unpin(bo);
+ goto error;
+ }
+ }
+
+ bo->prime_shared_count++;
+error:
radeon_bo_unreserve(bo);
return ret;
}
diff --git a/drivers/i2c/busses/i2c-robotfuzz-osif.c b/drivers/i2c/busses/i2c-robotfuzz-osif.c
index a39f7d092797..66dfa211e736 100644
--- a/drivers/i2c/busses/i2c-robotfuzz-osif.c
+++ b/drivers/i2c/busses/i2c-robotfuzz-osif.c
@@ -83,7 +83,7 @@ static int osif_xfer(struct i2c_adapter *adapter, struct i2c_msg *msgs,
}
}

- ret = osif_usb_read(adapter, OSIFI2C_STOP, 0, 0, NULL, 0);
+ ret = osif_usb_write(adapter, OSIFI2C_STOP, 0, 0, NULL, 0);
if (ret) {
dev_err(&adapter->dev, "failure sending STOP\n");
return -EREMOTEIO;
@@ -153,7 +153,7 @@ static int osif_probe(struct usb_interface *interface,
* Set bus frequency. The frequency is:
* 120,000,000 / ( 16 + 2 * div * 4^prescale).
* Using dev = 52, prescale = 0 give 100KHz */
- ret = osif_usb_read(&priv->adapter, OSIFI2C_SET_BIT_RATE, 52, 0,
+ ret = osif_usb_write(&priv->adapter, OSIFI2C_SET_BIT_RATE, 52, 0,
NULL, 0);
if (ret) {
dev_err(&interface->dev, "failure sending bit rate");
diff --git a/drivers/mmc/host/meson-gx-mmc.c b/drivers/mmc/host/meson-gx-mmc.c
index 545c3f2f8a06..a3e3b274f0ea 100644
--- a/drivers/mmc/host/meson-gx-mmc.c
+++ b/drivers/mmc/host/meson-gx-mmc.c
@@ -166,6 +166,7 @@ struct meson_host {

unsigned int bounce_buf_size;
void *bounce_buf;
+ void __iomem *bounce_iomem_buf;
dma_addr_t bounce_dma_addr;
struct sd_emmc_desc *descs;
dma_addr_t descs_dma_addr;
@@ -737,6 +738,47 @@ static void meson_mmc_desc_chain_transfer(struct mmc_host *mmc, u32 cmd_cfg)
writel(start, host->regs + SD_EMMC_START);
}

+/* local sg copy to buffer version with _to/fromio usage for dram_access_quirk */
+static void meson_mmc_copy_buffer(struct meson_host *host, struct mmc_data *data,
+ size_t buflen, bool to_buffer)
+{
+ unsigned int sg_flags = SG_MITER_ATOMIC;
+ struct scatterlist *sgl = data->sg;
+ unsigned int nents = data->sg_len;
+ struct sg_mapping_iter miter;
+ unsigned int offset = 0;
+
+ if (to_buffer)
+ sg_flags |= SG_MITER_FROM_SG;
+ else
+ sg_flags |= SG_MITER_TO_SG;
+
+ sg_miter_start(&miter, sgl, nents, sg_flags);
+
+ while ((offset < buflen) && sg_miter_next(&miter)) {
+ unsigned int len;
+
+ len = min(miter.length, buflen - offset);
+
+ /* When dram_access_quirk, the bounce buffer is a iomem mapping */
+ if (host->dram_access_quirk) {
+ if (to_buffer)
+ memcpy_toio(host->bounce_iomem_buf + offset, miter.addr, len);
+ else
+ memcpy_fromio(miter.addr, host->bounce_iomem_buf + offset, len);
+ } else {
+ if (to_buffer)
+ memcpy(host->bounce_buf + offset, miter.addr, len);
+ else
+ memcpy(miter.addr, host->bounce_buf + offset, len);
+ }
+
+ offset += len;
+ }
+
+ sg_miter_stop(&miter);
+}
+
static void meson_mmc_start_cmd(struct mmc_host *mmc, struct mmc_command *cmd)
{
struct meson_host *host = mmc_priv(mmc);
@@ -780,8 +822,7 @@ static void meson_mmc_start_cmd(struct mmc_host *mmc, struct mmc_command *cmd)
if (data->flags & MMC_DATA_WRITE) {
cmd_cfg |= CMD_CFG_DATA_WR;
WARN_ON(xfer_bytes > host->bounce_buf_size);
- sg_copy_to_buffer(data->sg, data->sg_len,
- host->bounce_buf, xfer_bytes);
+ meson_mmc_copy_buffer(host, data, xfer_bytes, true);
dma_wmb();
}

@@ -950,8 +991,7 @@ static irqreturn_t meson_mmc_irq_thread(int irq, void *dev_id)
if (meson_mmc_bounce_buf_read(data)) {
xfer_bytes = data->blksz * data->blocks;
WARN_ON(xfer_bytes > host->bounce_buf_size);
- sg_copy_from_buffer(data->sg, data->sg_len,
- host->bounce_buf, xfer_bytes);
+ meson_mmc_copy_buffer(host, data, xfer_bytes, false);
}

next_cmd = meson_mmc_get_next_command(cmd);
@@ -1179,7 +1219,7 @@ static int meson_mmc_probe(struct platform_device *pdev)
* instead of the DDR memory
*/
host->bounce_buf_size = SD_EMMC_SRAM_DATA_BUF_LEN;
- host->bounce_buf = host->regs + SD_EMMC_SRAM_DATA_BUF_OFF;
+ host->bounce_iomem_buf = host->regs + SD_EMMC_SRAM_DATA_BUF_OFF;
host->bounce_dma_addr = res->start + SD_EMMC_SRAM_DATA_BUF_OFF;
} else {
/* data bounce buffer */
diff --git a/drivers/net/caif/caif_serial.c b/drivers/net/caif/caif_serial.c
index 0f2bee59a82b..0bc7f6518fb3 100644
--- a/drivers/net/caif/caif_serial.c
+++ b/drivers/net/caif/caif_serial.c
@@ -351,6 +351,7 @@ static int ldisc_open(struct tty_struct *tty)
rtnl_lock();
result = register_netdevice(dev);
if (result) {
+ tty_kref_put(tty);
rtnl_unlock();
free_netdev(dev);
return -ENODEV;
diff --git a/drivers/net/ethernet/qlogic/qed/qed_dcbx.c b/drivers/net/ethernet/qlogic/qed/qed_dcbx.c
index 5c6a276f69ac..426b8098c50e 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_dcbx.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_dcbx.c
@@ -1293,9 +1293,11 @@ int qed_dcbx_get_config_params(struct qed_hwfn *p_hwfn,
p_hwfn->p_dcbx_info->set.ver_num |= DCBX_CONFIG_VERSION_STATIC;

p_hwfn->p_dcbx_info->set.enabled = dcbx_info->operational.enabled;
+ BUILD_BUG_ON(sizeof(dcbx_info->operational.params) !=
+ sizeof(p_hwfn->p_dcbx_info->set.config.params));
memcpy(&p_hwfn->p_dcbx_info->set.config.params,
&dcbx_info->operational.params,
- sizeof(struct qed_dcbx_admin_params));
+ sizeof(p_hwfn->p_dcbx_info->set.config.params));
p_hwfn->p_dcbx_info->set.config.valid = true;

memcpy(params, &p_hwfn->p_dcbx_info->set, sizeof(struct qed_dcbx_set));
diff --git a/drivers/net/ethernet/realtek/r8169_main.c b/drivers/net/ethernet/realtek/r8169_main.c
index 8ff178fc2670..661202e85412 100644
--- a/drivers/net/ethernet/realtek/r8169_main.c
+++ b/drivers/net/ethernet/realtek/r8169_main.c
@@ -1801,7 +1801,7 @@ static void rtl8169_get_strings(struct net_device *dev, u32 stringset, u8 *data)
{
switch(stringset) {
case ETH_SS_STATS:
- memcpy(data, *rtl8169_gstrings, sizeof(rtl8169_gstrings));
+ memcpy(data, rtl8169_gstrings, sizeof(rtl8169_gstrings));
break;
}
}
diff --git a/drivers/net/ethernet/renesas/sh_eth.c b/drivers/net/ethernet/renesas/sh_eth.c
index a042f4607b0d..931a44fe7afe 100644
--- a/drivers/net/ethernet/renesas/sh_eth.c
+++ b/drivers/net/ethernet/renesas/sh_eth.c
@@ -2322,7 +2322,7 @@ static void sh_eth_get_strings(struct net_device *ndev, u32 stringset, u8 *data)
{
switch (stringset) {
case ETH_SS_STATS:
- memcpy(data, *sh_eth_gstrings_stats,
+ memcpy(data, sh_eth_gstrings_stats,
sizeof(sh_eth_gstrings_stats));
break;
}
diff --git a/drivers/net/ethernet/xilinx/ll_temac_main.c b/drivers/net/ethernet/xilinx/ll_temac_main.c
index 9b55fbdc3a7c..9a7af7dda70d 100644
--- a/drivers/net/ethernet/xilinx/ll_temac_main.c
+++ b/drivers/net/ethernet/xilinx/ll_temac_main.c
@@ -770,12 +770,15 @@ static void temac_start_xmit_done(struct net_device *ndev)
stat = be32_to_cpu(cur_p->app0);

while (stat & STS_CTRL_APP0_CMPLT) {
+ /* Make sure that the other fields are read after bd is
+ * released by dma
+ */
+ rmb();
dma_unmap_single(ndev->dev.parent, be32_to_cpu(cur_p->phys),
be32_to_cpu(cur_p->len), DMA_TO_DEVICE);
skb = (struct sk_buff *)ptr_from_txbd(cur_p);
if (skb)
dev_consume_skb_irq(skb);
- cur_p->app0 = 0;
cur_p->app1 = 0;
cur_p->app2 = 0;
cur_p->app3 = 0;
@@ -784,6 +787,12 @@ static void temac_start_xmit_done(struct net_device *ndev)
ndev->stats.tx_packets++;
ndev->stats.tx_bytes += be32_to_cpu(cur_p->len);

+ /* app0 must be visible last, as it is used to flag
+ * availability of the bd
+ */
+ smp_mb();
+ cur_p->app0 = 0;
+
lp->tx_bd_ci++;
if (lp->tx_bd_ci >= TX_BD_NUM)
lp->tx_bd_ci = 0;
@@ -810,6 +819,9 @@ static inline int temac_check_tx_bd_space(struct temac_local *lp, int num_frag)
if (cur_p->app0)
return NETDEV_TX_BUSY;

+ /* Make sure to read next bd app0 after this one */
+ rmb();
+
tail++;
if (tail >= TX_BD_NUM)
tail = 0;
@@ -927,6 +939,11 @@ temac_start_xmit(struct sk_buff *skb, struct net_device *ndev)
wmb();
lp->dma_out(lp, TX_TAILDESC_PTR, tail_p); /* DMA start */

+ if (temac_check_tx_bd_space(lp, MAX_SKB_FRAGS + 1)) {
+ netdev_info(ndev, "%s -> netif_stop_queue\n", __func__);
+ netif_stop_queue(ndev);
+ }
+
return NETDEV_TX_OK;
}

diff --git a/drivers/net/phy/dp83867.c b/drivers/net/phy/dp83867.c
index 31a559513362..87c0cdbf262a 100644
--- a/drivers/net/phy/dp83867.c
+++ b/drivers/net/phy/dp83867.c
@@ -468,16 +468,12 @@ static int dp83867_phy_reset(struct phy_device *phydev)
{
int err;

- err = phy_write(phydev, DP83867_CTRL, DP83867_SW_RESET);
+ err = phy_write(phydev, DP83867_CTRL, DP83867_SW_RESTART);
if (err < 0)
return err;

usleep_range(10, 20);

- /* After reset FORCE_LINK_GOOD bit is set. Although the
- * default value should be unset. Disable FORCE_LINK_GOOD
- * for the phy to work properly.
- */
return phy_modify(phydev, MII_DP83867_PHYCTRL,
DP83867_PHYCR_FORCE_LINK_GOOD, 0);
}
diff --git a/drivers/net/usb/r8152.c b/drivers/net/usb/r8152.c
index f6d643ecaf39..24d124633037 100644
--- a/drivers/net/usb/r8152.c
+++ b/drivers/net/usb/r8152.c
@@ -5065,7 +5065,7 @@ static void rtl8152_get_strings(struct net_device *dev, u32 stringset, u8 *data)
{
switch (stringset) {
case ETH_SS_STATS:
- memcpy(data, *rtl8152_gstrings, sizeof(rtl8152_gstrings));
+ memcpy(data, rtl8152_gstrings, sizeof(rtl8152_gstrings));
break;
}
}
diff --git a/drivers/net/wireless/mac80211_hwsim.c b/drivers/net/wireless/mac80211_hwsim.c
index c48c68090d76..1033513d3d9d 100644
--- a/drivers/net/wireless/mac80211_hwsim.c
+++ b/drivers/net/wireless/mac80211_hwsim.c
@@ -1458,8 +1458,13 @@ static int mac80211_hwsim_start(struct ieee80211_hw *hw)
static void mac80211_hwsim_stop(struct ieee80211_hw *hw)
{
struct mac80211_hwsim_data *data = hw->priv;
+
data->started = false;
hrtimer_cancel(&data->beacon_timer);
+
+ while (!skb_queue_empty(&data->pending))
+ ieee80211_free_txskb(hw, skb_dequeue(&data->pending));
+
wiphy_dbg(hw->wiphy, "%s\n", __func__);
}

diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c
index 34a06e89e176..3c3bc9f58498 100644
--- a/drivers/pci/pci.c
+++ b/drivers/pci/pci.c
@@ -1666,11 +1666,21 @@ static int pci_enable_device_flags(struct pci_dev *dev, unsigned long flags)
int err;
int i, bars = 0;

- if (atomic_inc_return(&dev->enable_cnt) > 1) {
- pci_update_current_state(dev, dev->current_state);
- return 0; /* already enabled */
+ /*
+ * Power state could be unknown at this point, either due to a fresh
+ * boot or a device removal call. So get the current power state
+ * so that things like MSI message writing will behave as expected
+ * (e.g. if the device really is in D0 at enable time).
+ */
+ if (dev->pm_cap) {
+ u16 pmcsr;
+ pci_read_config_word(dev, dev->pm_cap + PCI_PM_CTRL, &pmcsr);
+ dev->current_state = (pmcsr & PCI_PM_CTRL_STATE_MASK);
}

+ if (atomic_inc_return(&dev->enable_cnt) > 1)
+ return 0; /* already enabled */
+
bridge = pci_upstream_bridge(dev);
if (bridge)
pci_enable_bridge(bridge);
diff --git a/drivers/pinctrl/stm32/pinctrl-stm32.c b/drivers/pinctrl/stm32/pinctrl-stm32.c
index 2d5e0435af0a..bac1d040baca 100644
--- a/drivers/pinctrl/stm32/pinctrl-stm32.c
+++ b/drivers/pinctrl/stm32/pinctrl-stm32.c
@@ -1153,7 +1153,7 @@ static int stm32_gpiolib_register_bank(struct stm32_pinctrl *pctl,
struct resource res;
struct reset_control *rstc;
int npins = STM32_GPIO_PINS_PER_BANK;
- int bank_nr, err;
+ int bank_nr, err, i = 0;

rstc = of_reset_control_get_exclusive(np, NULL);
if (!IS_ERR(rstc))
@@ -1182,9 +1182,14 @@ static int stm32_gpiolib_register_bank(struct stm32_pinctrl *pctl,

of_property_read_string(np, "st,bank-name", &bank->gpio_chip.label);

- if (!of_parse_phandle_with_fixed_args(np, "gpio-ranges", 3, 0, &args)) {
+ if (!of_parse_phandle_with_fixed_args(np, "gpio-ranges", 3, i, &args)) {
bank_nr = args.args[1] / STM32_GPIO_PINS_PER_BANK;
bank->gpio_chip.base = args.args[1];
+
+ npins = args.args[2];
+ while (!of_parse_phandle_with_fixed_args(np, "gpio-ranges", 3,
+ ++i, &args))
+ npins += args.args[2];
} else {
bank_nr = pctl->nbanks;
bank->gpio_chip.base = bank_nr * STM32_GPIO_PINS_PER_BANK;
diff --git a/drivers/spi/spi-nxp-fspi.c b/drivers/spi/spi-nxp-fspi.c
index efd9e908e224..36a44a837031 100644
--- a/drivers/spi/spi-nxp-fspi.c
+++ b/drivers/spi/spi-nxp-fspi.c
@@ -975,12 +975,6 @@ static int nxp_fspi_probe(struct platform_device *pdev)
goto err_put_ctrl;
}

- /* Clear potential interrupts */
- reg = fspi_readl(f, f->iobase + FSPI_INTR);
- if (reg)
- fspi_writel(f, reg, f->iobase + FSPI_INTR);
-
-
/* find the resources - controller memory mapped space */
res = platform_get_resource_byname(pdev, IORESOURCE_MEM, "fspi_mmap");
f->ahb_addr = devm_ioremap_resource(dev, res);
@@ -1012,6 +1006,11 @@ static int nxp_fspi_probe(struct platform_device *pdev)
goto err_put_ctrl;
}

+ /* Clear potential interrupts */
+ reg = fspi_readl(f, f->iobase + FSPI_INTR);
+ if (reg)
+ fspi_writel(f, reg, f->iobase + FSPI_INTR);
+
/* find the irq */
ret = platform_get_irq(pdev, 0);
if (ret < 0)
diff --git a/fs/nilfs2/sysfs.c b/fs/nilfs2/sysfs.c
index e60be7bb55b0..c6c8a33c81d5 100644
--- a/fs/nilfs2/sysfs.c
+++ b/fs/nilfs2/sysfs.c
@@ -1054,6 +1054,7 @@ void nilfs_sysfs_delete_device_group(struct the_nilfs *nilfs)
nilfs_sysfs_delete_superblock_group(nilfs);
nilfs_sysfs_delete_segctor_group(nilfs);
kobject_del(&nilfs->ns_dev_kobj);
+ kobject_put(&nilfs->ns_dev_kobj);
kfree(nilfs->ns_dev_subgroups);
}

diff --git a/include/keys/system_keyring.h b/include/keys/system_keyring.h
index c1a96fdf598b..875e002a4180 100644
--- a/include/keys/system_keyring.h
+++ b/include/keys/system_keyring.h
@@ -31,16 +31,37 @@ extern int restrict_link_by_builtin_and_secondary_trusted(
#define restrict_link_by_builtin_and_secondary_trusted restrict_link_by_builtin_trusted
#endif

+extern struct pkcs7_message *pkcs7;
#ifdef CONFIG_SYSTEM_BLACKLIST_KEYRING
extern int mark_hash_blacklisted(const char *hash);
extern int is_hash_blacklisted(const u8 *hash, size_t hash_len,
const char *type);
+extern int is_binary_blacklisted(const u8 *hash, size_t hash_len);
#else
static inline int is_hash_blacklisted(const u8 *hash, size_t hash_len,
const char *type)
{
return 0;
}
+
+static inline int is_binary_blacklisted(const u8 *hash, size_t hash_len)
+{
+ return 0;
+}
+#endif
+
+#ifdef CONFIG_SYSTEM_REVOCATION_LIST
+extern int add_key_to_revocation_list(const char *data, size_t size);
+extern int is_key_on_revocation_list(struct pkcs7_message *pkcs7);
+#else
+static inline int add_key_to_revocation_list(const char *data, size_t size)
+{
+ return 0;
+}
+static inline int is_key_on_revocation_list(struct pkcs7_message *pkcs7)
+{
+ return -ENOKEY;
+}
#endif

#ifdef CONFIG_IMA_BLACKLIST_KEYRING
diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h
index d8b86fd39113..d2dbe462efee 100644
--- a/include/linux/huge_mm.h
+++ b/include/linux/huge_mm.h
@@ -259,6 +259,7 @@ struct page *follow_devmap_pud(struct vm_area_struct *vma, unsigned long addr,
extern vm_fault_t do_huge_pmd_numa_page(struct vm_fault *vmf, pmd_t orig_pmd);

extern struct page *huge_zero_page;
+extern unsigned long huge_zero_pfn;

static inline bool is_huge_zero_page(struct page *page)
{
@@ -267,7 +268,7 @@ static inline bool is_huge_zero_page(struct page *page)

static inline bool is_huge_zero_pmd(pmd_t pmd)
{
- return is_huge_zero_page(pmd_page(pmd));
+ return READ_ONCE(huge_zero_pfn) == pmd_pfn(pmd) && pmd_present(pmd);
}

static inline bool is_huge_zero_pud(pud_t pud)
@@ -398,6 +399,11 @@ static inline bool is_huge_zero_page(struct page *page)
return false;
}

+static inline bool is_huge_zero_pmd(pmd_t pmd)
+{
+ return false;
+}
+
static inline bool is_huge_zero_pud(pud_t pud)
{
return false;
diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
index fc717aeb2b3d..a0513c444446 100644
--- a/include/linux/hugetlb.h
+++ b/include/linux/hugetlb.h
@@ -469,17 +469,6 @@ static inline int hstate_index(struct hstate *h)
return h - hstates;
}

-pgoff_t __basepage_index(struct page *page);
-
-/* Return page->index in PAGE_SIZE units */
-static inline pgoff_t basepage_index(struct page *page)
-{
- if (!PageCompound(page))
- return page->index;
-
- return __basepage_index(page);
-}
-
extern int dissolve_free_huge_page(struct page *page);
extern int dissolve_free_huge_pages(unsigned long start_pfn,
unsigned long end_pfn);
@@ -695,11 +684,6 @@ static inline int hstate_index(struct hstate *h)
return 0;
}

-static inline pgoff_t basepage_index(struct page *page)
-{
- return page->index;
-}
-
static inline int dissolve_free_huge_page(struct page *page)
{
return 0;
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 5565d11f9542..a7d626b4cad1 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1459,6 +1459,7 @@ struct zap_details {
struct address_space *check_mapping; /* Check page->mapping if set */
pgoff_t first_index; /* Lowest page->index to unmap */
pgoff_t last_index; /* Highest page->index to unmap */
+ struct page *single_page; /* Locked page to be unmapped */
};

struct page *vm_normal_page(struct vm_area_struct *vma, unsigned long addr,
@@ -1505,6 +1506,7 @@ extern vm_fault_t handle_mm_fault(struct vm_area_struct *vma,
extern int fixup_user_fault(struct task_struct *tsk, struct mm_struct *mm,
unsigned long address, unsigned int fault_flags,
bool *unlocked);
+void unmap_mapping_page(struct page *page);
void unmap_mapping_pages(struct address_space *mapping,
pgoff_t start, pgoff_t nr, bool even_cows);
void unmap_mapping_range(struct address_space *mapping,
@@ -1525,6 +1527,7 @@ static inline int fixup_user_fault(struct task_struct *tsk,
BUG();
return -EFAULT;
}
+static inline void unmap_mapping_page(struct page *page) { }
static inline void unmap_mapping_pages(struct address_space *mapping,
pgoff_t start, pgoff_t nr, bool even_cows) { }
static inline void unmap_mapping_range(struct address_space *mapping,
diff --git a/include/linux/mmdebug.h b/include/linux/mmdebug.h
index 2ad72d2c8cc5..5d0767cb424a 100644
--- a/include/linux/mmdebug.h
+++ b/include/linux/mmdebug.h
@@ -37,6 +37,18 @@ void dump_mm(const struct mm_struct *mm);
BUG(); \
} \
} while (0)
+#define VM_WARN_ON_ONCE_PAGE(cond, page) ({ \
+ static bool __section(".data.once") __warned; \
+ int __ret_warn_once = !!(cond); \
+ \
+ if (unlikely(__ret_warn_once && !__warned)) { \
+ dump_page(page, "VM_WARN_ON_ONCE_PAGE(" __stringify(cond)")");\
+ __warned = true; \
+ WARN_ON(1); \
+ } \
+ unlikely(__ret_warn_once); \
+})
+
#define VM_WARN_ON(cond) (void)WARN_ON(cond)
#define VM_WARN_ON_ONCE(cond) (void)WARN_ON_ONCE(cond)
#define VM_WARN_ONCE(cond, format...) (void)WARN_ONCE(cond, format)
@@ -48,6 +60,7 @@ void dump_mm(const struct mm_struct *mm);
#define VM_BUG_ON_MM(cond, mm) VM_BUG_ON(cond)
#define VM_WARN_ON(cond) BUILD_BUG_ON_INVALID(cond)
#define VM_WARN_ON_ONCE(cond) BUILD_BUG_ON_INVALID(cond)
+#define VM_WARN_ON_ONCE_PAGE(cond, page) BUILD_BUG_ON_INVALID(cond)
#define VM_WARN_ONCE(cond, format...) BUILD_BUG_ON_INVALID(cond)
#define VM_WARN(cond, format...) BUILD_BUG_ON_INVALID(cond)
#endif
diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h
index 37a4d9e32cd3..8543b1aaa529 100644
--- a/include/linux/pagemap.h
+++ b/include/linux/pagemap.h
@@ -397,7 +397,7 @@ static inline struct page *read_mapping_page(struct address_space *mapping,
}

/*
- * Get index of the page with in radix-tree
+ * Get index of the page within radix-tree (but not for hugetlb pages).
* (TODO: remove once hugetlb pages will have ->index in PAGE_SIZE)
*/
static inline pgoff_t page_to_index(struct page *page)
@@ -416,15 +416,16 @@ static inline pgoff_t page_to_index(struct page *page)
return pgoff;
}

+extern pgoff_t hugetlb_basepage_index(struct page *page);
+
/*
- * Get the offset in PAGE_SIZE.
- * (TODO: hugepage should have ->index in PAGE_SIZE)
+ * Get the offset in PAGE_SIZE (even for hugetlb pages).
+ * (TODO: hugetlb pages should have ->index in PAGE_SIZE)
*/
static inline pgoff_t page_to_pgoff(struct page *page)
{
- if (unlikely(PageHeadHuge(page)))
- return page->index << compound_order(page);
-
+ if (unlikely(PageHuge(page)))
+ return hugetlb_basepage_index(page);
return page_to_index(page);
}

diff --git a/include/linux/rmap.h b/include/linux/rmap.h
index d7d6d4eb1794..91ccae946716 100644
--- a/include/linux/rmap.h
+++ b/include/linux/rmap.h
@@ -98,7 +98,8 @@ enum ttu_flags {
* do a final flush if necessary */
TTU_RMAP_LOCKED = 0x80, /* do not grab rmap lock:
* caller holds it */
- TTU_SPLIT_FREEZE = 0x100, /* freeze pte under splitting thp */
+ TTU_SPLIT_FREEZE = 0x100, /* freeze pte under splitting thp */
+ TTU_SYNC = 0x200, /* avoid racy checks with PVMW_SYNC */
};

#ifdef CONFIG_MMU
diff --git a/include/net/sock.h b/include/net/sock.h
index a0728f24ecc5..d3dd89b6e2cb 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -1860,7 +1860,8 @@ static inline u32 net_tx_rndhash(void)

static inline void sk_set_txhash(struct sock *sk)
{
- sk->sk_txhash = net_tx_rndhash();
+ /* This pairs with READ_ONCE() in skb_set_hash_from_sk() */
+ WRITE_ONCE(sk->sk_txhash, net_tx_rndhash());
}

static inline void sk_rethink_txhash(struct sock *sk)
@@ -2125,9 +2126,12 @@ static inline void sock_poll_wait(struct file *filp, struct socket *sock,

static inline void skb_set_hash_from_sk(struct sk_buff *skb, struct sock *sk)
{
- if (sk->sk_txhash) {
+ /* This pairs with WRITE_ONCE() in sk_set_txhash() */
+ u32 txhash = READ_ONCE(sk->sk_txhash);
+
+ if (txhash) {
skb->l4_hash = 1;
- skb->hash = sk->sk_txhash;
+ skb->hash = txhash;
}
}

diff --git a/init/Kconfig b/init/Kconfig
index 4f9fd78e2200..f23e90d9935f 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -20,6 +20,9 @@ config GCC_VERSION
config CC_IS_CLANG
def_bool $(success,$(CC) --version | head -n 1 | grep -q clang)

+config LD_IS_LLD
+ def_bool $(success,$(LD) -v | head -n 1 | grep -q LLD)
+
config CLANG_VERSION
int
default $(shell,$(srctree)/scripts/clang-version.sh $(CC))
diff --git a/kernel/futex.c b/kernel/futex.c
index 375e7e98e301..f82879ae6577 100644
--- a/kernel/futex.c
+++ b/kernel/futex.c
@@ -737,7 +737,7 @@ again:

key->both.offset |= FUT_OFF_INODE; /* inode-based key */
key->shared.i_seq = get_inode_sequence_number(inode);
- key->shared.pgoff = basepage_index(tail);
+ key->shared.pgoff = page_to_pgoff(tail);
rcu_read_unlock();
}

diff --git a/kernel/kthread.c b/kernel/kthread.c
index 1d4c98a19043..2eb8d7550324 100644
--- a/kernel/kthread.c
+++ b/kernel/kthread.c
@@ -1020,8 +1020,38 @@ void kthread_flush_work(struct kthread_work *work)
EXPORT_SYMBOL_GPL(kthread_flush_work);

/*
- * This function removes the work from the worker queue. Also it makes sure
- * that it won't get queued later via the delayed work's timer.
+ * Make sure that the timer is neither set nor running and could
+ * not manipulate the work list_head any longer.
+ *
+ * The function is called under worker->lock. The lock is temporary
+ * released but the timer can't be set again in the meantime.
+ */
+static void kthread_cancel_delayed_work_timer(struct kthread_work *work,
+ unsigned long *flags)
+{
+ struct kthread_delayed_work *dwork =
+ container_of(work, struct kthread_delayed_work, work);
+ struct kthread_worker *worker = work->worker;
+
+ /*
+ * del_timer_sync() must be called to make sure that the timer
+ * callback is not running. The lock must be temporary released
+ * to avoid a deadlock with the callback. In the meantime,
+ * any queuing is blocked by setting the canceling counter.
+ */
+ work->canceling++;
+ raw_spin_unlock_irqrestore(&worker->lock, *flags);
+ del_timer_sync(&dwork->timer);
+ raw_spin_lock_irqsave(&worker->lock, *flags);
+ work->canceling--;
+}
+
+/*
+ * This function removes the work from the worker queue.
+ *
+ * It is called under worker->lock. The caller must make sure that
+ * the timer used by delayed work is not running, e.g. by calling
+ * kthread_cancel_delayed_work_timer().
*
* The work might still be in use when this function finishes. See the
* current_work proceed by the worker.
@@ -1029,28 +1059,8 @@ EXPORT_SYMBOL_GPL(kthread_flush_work);
* Return: %true if @work was pending and successfully canceled,
* %false if @work was not pending
*/
-static bool __kthread_cancel_work(struct kthread_work *work, bool is_dwork,
- unsigned long *flags)
+static bool __kthread_cancel_work(struct kthread_work *work)
{
- /* Try to cancel the timer if exists. */
- if (is_dwork) {
- struct kthread_delayed_work *dwork =
- container_of(work, struct kthread_delayed_work, work);
- struct kthread_worker *worker = work->worker;
-
- /*
- * del_timer_sync() must be called to make sure that the timer
- * callback is not running. The lock must be temporary released
- * to avoid a deadlock with the callback. In the meantime,
- * any queuing is blocked by setting the canceling counter.
- */
- work->canceling++;
- raw_spin_unlock_irqrestore(&worker->lock, *flags);
- del_timer_sync(&dwork->timer);
- raw_spin_lock_irqsave(&worker->lock, *flags);
- work->canceling--;
- }
-
/*
* Try to remove the work from a worker list. It might either
* be from worker->work_list or from worker->delayed_work_list.
@@ -1103,11 +1113,23 @@ bool kthread_mod_delayed_work(struct kthread_worker *worker,
/* Work must not be used with >1 worker, see kthread_queue_work() */
WARN_ON_ONCE(work->worker != worker);

- /* Do not fight with another command that is canceling this work. */
+ /*
+ * Temporary cancel the work but do not fight with another command
+ * that is canceling the work as well.
+ *
+ * It is a bit tricky because of possible races with another
+ * mod_delayed_work() and cancel_delayed_work() callers.
+ *
+ * The timer must be canceled first because worker->lock is released
+ * when doing so. But the work can be removed from the queue (list)
+ * only when it can be queued again so that the return value can
+ * be used for reference counting.
+ */
+ kthread_cancel_delayed_work_timer(work, &flags);
if (work->canceling)
goto out;
+ ret = __kthread_cancel_work(work);

- ret = __kthread_cancel_work(work, true, &flags);
fast_queue:
__kthread_queue_delayed_work(worker, dwork, delay);
out:
@@ -1129,7 +1151,10 @@ static bool __kthread_cancel_work_sync(struct kthread_work *work, bool is_dwork)
/* Work must not be used with >1 worker, see kthread_queue_work(). */
WARN_ON_ONCE(work->worker != worker);

- ret = __kthread_cancel_work(work, is_dwork, &flags);
+ if (is_dwork)
+ kthread_cancel_delayed_work_timer(work, &flags);
+
+ ret = __kthread_cancel_work(work);

if (worker->current_work != work)
goto out_fast;
diff --git a/kernel/module.c b/kernel/module.c
index 88a6a9e04f8d..59d487b8d8da 100644
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -268,9 +268,18 @@ static void module_assert_mutex_or_preempt(void)
#endif
}

+#ifdef CONFIG_MODULE_SIG
static bool sig_enforce = IS_ENABLED(CONFIG_MODULE_SIG_FORCE);
module_param(sig_enforce, bool_enable_only, 0644);

+void set_module_sig_enforced(void)
+{
+ sig_enforce = true;
+}
+#else
+#define sig_enforce false
+#endif
+
/*
* Export sig_enforce kernel cmdline parameter to allow other subsystems rely
* on that instead of directly to CONFIG_MODULE_SIG_FORCE config.
@@ -281,11 +290,6 @@ bool is_module_sig_enforced(void)
}
EXPORT_SYMBOL(is_module_sig_enforced);

-void set_module_sig_enforced(void)
-{
- sig_enforce = true;
-}
-
/* Block module loading/unloading? */
int modules_disabled = 0;
core_param(nomodule, modules_disabled, bint, 0);
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 7bbf419bb86d..87a07aa61be0 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -61,6 +61,7 @@ static struct shrinker deferred_split_shrinker;

static atomic_t huge_zero_refcount;
struct page *huge_zero_page __read_mostly;
+unsigned long huge_zero_pfn __read_mostly = ~0UL;

bool transparent_hugepage_enabled(struct vm_area_struct *vma)
{
@@ -97,6 +98,7 @@ retry:
__free_pages(zero_page, compound_order(zero_page));
goto retry;
}
+ WRITE_ONCE(huge_zero_pfn, page_to_pfn(zero_page));

/* We take additional reference here. It will be put back by shrinker */
atomic_set(&huge_zero_refcount, 2);
@@ -146,6 +148,7 @@ static unsigned long shrink_huge_zero_page_scan(struct shrinker *shrink,
if (atomic_cmpxchg(&huge_zero_refcount, 1, 0) == 1) {
struct page *zero_page = xchg(&huge_zero_page, NULL);
BUG_ON(zero_page == NULL);
+ WRITE_ONCE(huge_zero_pfn, ~0UL);
__free_pages(zero_page, compound_order(zero_page));
return HPAGE_PMD_NR;
}
@@ -2155,7 +2158,7 @@ static void __split_huge_pmd_locked(struct vm_area_struct *vma, pmd_t *pmd,
count_vm_event(THP_SPLIT_PMD);

if (!vma_is_anonymous(vma)) {
- _pmd = pmdp_huge_clear_flush_notify(vma, haddr, pmd);
+ old_pmd = pmdp_huge_clear_flush_notify(vma, haddr, pmd);
/*
* We are going to unmap this huge page. So
* just go ahead and zap it
@@ -2164,16 +2167,25 @@ static void __split_huge_pmd_locked(struct vm_area_struct *vma, pmd_t *pmd,
zap_deposited_table(mm, pmd);
if (vma_is_dax(vma))
return;
- page = pmd_page(_pmd);
- if (!PageDirty(page) && pmd_dirty(_pmd))
- set_page_dirty(page);
- if (!PageReferenced(page) && pmd_young(_pmd))
- SetPageReferenced(page);
- page_remove_rmap(page, true);
- put_page(page);
+ if (unlikely(is_pmd_migration_entry(old_pmd))) {
+ swp_entry_t entry;
+
+ entry = pmd_to_swp_entry(old_pmd);
+ page = migration_entry_to_page(entry);
+ } else {
+ page = pmd_page(old_pmd);
+ if (!PageDirty(page) && pmd_dirty(old_pmd))
+ set_page_dirty(page);
+ if (!PageReferenced(page) && pmd_young(old_pmd))
+ SetPageReferenced(page);
+ page_remove_rmap(page, true);
+ put_page(page);
+ }
add_mm_counter(mm, mm_counter_file(page), -HPAGE_PMD_NR);
return;
- } else if (pmd_trans_huge(*pmd) && is_huge_zero_pmd(*pmd)) {
+ }
+
+ if (is_huge_zero_pmd(*pmd)) {
/*
* FIXME: Do we want to invalidate secondary mmu by calling
* mmu_notifier_invalidate_range() see comments below inside
@@ -2449,16 +2461,16 @@ void vma_adjust_trans_huge(struct vm_area_struct *vma,
static void unmap_page(struct page *page)
{
enum ttu_flags ttu_flags = TTU_IGNORE_MLOCK | TTU_IGNORE_ACCESS |
- TTU_RMAP_LOCKED | TTU_SPLIT_HUGE_PMD;
- bool unmap_success;
+ TTU_RMAP_LOCKED | TTU_SPLIT_HUGE_PMD | TTU_SYNC;

VM_BUG_ON_PAGE(!PageHead(page), page);

if (PageAnon(page))
ttu_flags |= TTU_SPLIT_FREEZE;

- unmap_success = try_to_unmap(page, ttu_flags);
- VM_BUG_ON_PAGE(!unmap_success, page);
+ try_to_unmap(page, ttu_flags);
+
+ VM_WARN_ON_ONCE_PAGE(page_mapped(page), page);
}

static void remap_page(struct page *page)
@@ -2737,7 +2749,7 @@ int split_huge_page_to_list(struct page *page, struct list_head *list)
struct deferred_split *ds_queue = get_deferred_split_queue(page);
struct anon_vma *anon_vma = NULL;
struct address_space *mapping = NULL;
- int count, mapcount, extra_pins, ret;
+ int extra_pins, ret;
bool mlocked;
unsigned long flags;
pgoff_t end;
@@ -2799,7 +2811,6 @@ int split_huge_page_to_list(struct page *page, struct list_head *list)

mlocked = PageMlocked(page);
unmap_page(head);
- VM_BUG_ON_PAGE(compound_mapcount(head), head);

/* Make sure the page is not on per-CPU pagevec as it takes pin */
if (mlocked)
@@ -2822,9 +2833,7 @@ int split_huge_page_to_list(struct page *page, struct list_head *list)

/* Prevent deferred_split_scan() touching ->_refcount */
spin_lock(&ds_queue->split_queue_lock);
- count = page_count(head);
- mapcount = total_mapcount(head);
- if (!mapcount && page_ref_freeze(head, 1 + extra_pins)) {
+ if (page_ref_freeze(head, 1 + extra_pins)) {
if (!list_empty(page_deferred_list(head))) {
ds_queue->split_queue_len--;
list_del(page_deferred_list(head));
@@ -2845,16 +2854,9 @@ int split_huge_page_to_list(struct page *page, struct list_head *list)
} else
ret = 0;
} else {
- if (IS_ENABLED(CONFIG_DEBUG_VM) && mapcount) {
- pr_alert("total_mapcount: %u, page_count(): %u\n",
- mapcount, count);
- if (PageTail(page))
- dump_page(head, NULL);
- dump_page(page, "total_mapcount(head) > 0");
- BUG();
- }
spin_unlock(&ds_queue->split_queue_lock);
-fail: if (mapping)
+fail:
+ if (mapping)
xa_unlock(&mapping->i_pages);
spin_unlock_irqrestore(&pgdata->lru_lock, flags);
remap_page(head);
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index fe15e7d8220a..95a32749af4d 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -1461,15 +1461,12 @@ int PageHeadHuge(struct page *page_head)
return get_compound_page_dtor(page_head) == free_huge_page;
}

-pgoff_t __basepage_index(struct page *page)
+pgoff_t hugetlb_basepage_index(struct page *page)
{
struct page *page_head = compound_head(page);
pgoff_t index = page_index(page_head);
unsigned long compound_idx;

- if (!PageHuge(page_head))
- return page_index(page);
-
if (compound_order(page_head) >= MAX_ORDER)
compound_idx = page_to_pfn(page) - page_to_pfn(page_head);
else
diff --git a/mm/internal.h b/mm/internal.h
index 7dd7fbb577a9..cf382549dd70 100644
--- a/mm/internal.h
+++ b/mm/internal.h
@@ -339,27 +339,52 @@ static inline void mlock_migrate_page(struct page *newpage, struct page *page)
extern pmd_t maybe_pmd_mkwrite(pmd_t pmd, struct vm_area_struct *vma);

/*
- * At what user virtual address is page expected in @vma?
+ * At what user virtual address is page expected in vma?
+ * Returns -EFAULT if all of the page is outside the range of vma.
+ * If page is a compound head, the entire compound page is considered.
*/
static inline unsigned long
-__vma_address(struct page *page, struct vm_area_struct *vma)
+vma_address(struct page *page, struct vm_area_struct *vma)
{
- pgoff_t pgoff = page_to_pgoff(page);
- return vma->vm_start + ((pgoff - vma->vm_pgoff) << PAGE_SHIFT);
+ pgoff_t pgoff;
+ unsigned long address;
+
+ VM_BUG_ON_PAGE(PageKsm(page), page); /* KSM page->index unusable */
+ pgoff = page_to_pgoff(page);
+ if (pgoff >= vma->vm_pgoff) {
+ address = vma->vm_start +
+ ((pgoff - vma->vm_pgoff) << PAGE_SHIFT);
+ /* Check for address beyond vma (or wrapped through 0?) */
+ if (address < vma->vm_start || address >= vma->vm_end)
+ address = -EFAULT;
+ } else if (PageHead(page) &&
+ pgoff + compound_nr(page) - 1 >= vma->vm_pgoff) {
+ /* Test above avoids possibility of wrap to 0 on 32-bit */
+ address = vma->vm_start;
+ } else {
+ address = -EFAULT;
+ }
+ return address;
}

+/*
+ * Then at what user virtual address will none of the page be found in vma?
+ * Assumes that vma_address() already returned a good starting address.
+ * If page is a compound head, the entire compound page is considered.
+ */
static inline unsigned long
-vma_address(struct page *page, struct vm_area_struct *vma)
+vma_address_end(struct page *page, struct vm_area_struct *vma)
{
- unsigned long start, end;
-
- start = __vma_address(page, vma);
- end = start + PAGE_SIZE * (hpage_nr_pages(page) - 1);
-
- /* page should be within @vma mapping range */
- VM_BUG_ON_VMA(end < vma->vm_start || start >= vma->vm_end, vma);
-
- return max(start, vma->vm_start);
+ pgoff_t pgoff;
+ unsigned long address;
+
+ VM_BUG_ON_PAGE(PageKsm(page), page); /* KSM page->index unusable */
+ pgoff = page_to_pgoff(page) + compound_nr(page);
+ address = vma->vm_start + ((pgoff - vma->vm_pgoff) << PAGE_SHIFT);
+ /* Check for address beyond vma (or wrapped through 0?) */
+ if (address < vma->vm_start || address > vma->vm_end)
+ address = vma->vm_end;
+ return address;
}

static inline struct file *maybe_unlock_mmap_for_io(struct vm_fault *vmf,
diff --git a/mm/memory.c b/mm/memory.c
index 13a575ce2ec8..4bb7c6a364c8 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -1165,7 +1165,18 @@ static inline unsigned long zap_pmd_range(struct mmu_gather *tlb,
else if (zap_huge_pmd(tlb, vma, pmd, addr))
goto next;
/* fall through */
+ } else if (details && details->single_page &&
+ PageTransCompound(details->single_page) &&
+ next - addr == HPAGE_PMD_SIZE && pmd_none(*pmd)) {
+ spinlock_t *ptl = pmd_lock(tlb->mm, pmd);
+ /*
+ * Take and drop THP pmd lock so that we cannot return
+ * prematurely, while zap_huge_pmd() has cleared *pmd,
+ * but not yet decremented compound_mapcount().
+ */
+ spin_unlock(ptl);
}
+
/*
* Here there can be other concurrent MADV_DONTNEED or
* trans huge page faults running, and if the pmd is
@@ -2769,6 +2780,36 @@ static inline void unmap_mapping_range_tree(struct rb_root_cached *root,
}
}

+/**
+ * unmap_mapping_page() - Unmap single page from processes.
+ * @page: The locked page to be unmapped.
+ *
+ * Unmap this page from any userspace process which still has it mmaped.
+ * Typically, for efficiency, the range of nearby pages has already been
+ * unmapped by unmap_mapping_pages() or unmap_mapping_range(). But once
+ * truncation or invalidation holds the lock on a page, it may find that
+ * the page has been remapped again: and then uses unmap_mapping_page()
+ * to unmap it finally.
+ */
+void unmap_mapping_page(struct page *page)
+{
+ struct address_space *mapping = page->mapping;
+ struct zap_details details = { };
+
+ VM_BUG_ON(!PageLocked(page));
+ VM_BUG_ON(PageTail(page));
+
+ details.check_mapping = mapping;
+ details.first_index = page->index;
+ details.last_index = page->index + hpage_nr_pages(page) - 1;
+ details.single_page = page;
+
+ i_mmap_lock_write(mapping);
+ if (unlikely(!RB_EMPTY_ROOT(&mapping->i_mmap.rb_root)))
+ unmap_mapping_range_tree(&mapping->i_mmap, &details);
+ i_mmap_unlock_write(mapping);
+}
+
/**
* unmap_mapping_pages() - Unmap pages from processes.
* @mapping: The address space containing pages to be unmapped.
diff --git a/mm/migrate.c b/mm/migrate.c
index 00bbe57c1ce2..5092ef2aa8a1 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -321,6 +321,7 @@ void __migration_entry_wait(struct mm_struct *mm, pte_t *ptep,
goto out;

page = migration_entry_to_page(entry);
+ page = compound_head(page);

/*
* Once page cache replacement of page migration started, page_count
diff --git a/mm/page_vma_mapped.c b/mm/page_vma_mapped.c
index eff4b4520c8d..029f5598251c 100644
--- a/mm/page_vma_mapped.c
+++ b/mm/page_vma_mapped.c
@@ -111,6 +111,13 @@ static bool check_pte(struct page_vma_mapped_walk *pvmw)
return pfn_in_hpage(pvmw->page, pfn);
}

+static void step_forward(struct page_vma_mapped_walk *pvmw, unsigned long size)
+{
+ pvmw->address = (pvmw->address + size) & ~(size - 1);
+ if (!pvmw->address)
+ pvmw->address = ULONG_MAX;
+}
+
/**
* page_vma_mapped_walk - check if @pvmw->page is mapped in @pvmw->vma at
* @pvmw->address
@@ -139,6 +146,7 @@ bool page_vma_mapped_walk(struct page_vma_mapped_walk *pvmw)
{
struct mm_struct *mm = pvmw->vma->vm_mm;
struct page *page = pvmw->page;
+ unsigned long end;
pgd_t *pgd;
p4d_t *p4d;
pud_t *pud;
@@ -148,10 +156,11 @@ bool page_vma_mapped_walk(struct page_vma_mapped_walk *pvmw)
if (pvmw->pmd && !pvmw->pte)
return not_found(pvmw);

- if (pvmw->pte)
- goto next_pte;
+ if (unlikely(PageHuge(page))) {
+ /* The only possible mapping was handled on last iteration */
+ if (pvmw->pte)
+ return not_found(pvmw);

- if (unlikely(PageHuge(pvmw->page))) {
/* when pud is not present, pte will be NULL */
pvmw->pte = huge_pte_offset(mm, pvmw->address, page_size(page));
if (!pvmw->pte)
@@ -163,78 +172,108 @@ bool page_vma_mapped_walk(struct page_vma_mapped_walk *pvmw)
return not_found(pvmw);
return true;
}
-restart:
- pgd = pgd_offset(mm, pvmw->address);
- if (!pgd_present(*pgd))
- return false;
- p4d = p4d_offset(pgd, pvmw->address);
- if (!p4d_present(*p4d))
- return false;
- pud = pud_offset(p4d, pvmw->address);
- if (!pud_present(*pud))
- return false;
- pvmw->pmd = pmd_offset(pud, pvmw->address);
+
/*
- * Make sure the pmd value isn't cached in a register by the
- * compiler and used as a stale value after we've observed a
- * subsequent update.
+ * Seek to next pte only makes sense for THP.
+ * But more important than that optimization, is to filter out
+ * any PageKsm page: whose page->index misleads vma_address()
+ * and vma_address_end() to disaster.
*/
- pmde = READ_ONCE(*pvmw->pmd);
- if (pmd_trans_huge(pmde) || is_pmd_migration_entry(pmde)) {
- pvmw->ptl = pmd_lock(mm, pvmw->pmd);
- if (likely(pmd_trans_huge(*pvmw->pmd))) {
- if (pvmw->flags & PVMW_MIGRATION)
- return not_found(pvmw);
- if (pmd_page(*pvmw->pmd) != page)
- return not_found(pvmw);
- return true;
- } else if (!pmd_present(*pvmw->pmd)) {
- if (thp_migration_supported()) {
- if (!(pvmw->flags & PVMW_MIGRATION))
+ end = PageTransCompound(page) ?
+ vma_address_end(page, pvmw->vma) :
+ pvmw->address + PAGE_SIZE;
+ if (pvmw->pte)
+ goto next_pte;
+restart:
+ do {
+ pgd = pgd_offset(mm, pvmw->address);
+ if (!pgd_present(*pgd)) {
+ step_forward(pvmw, PGDIR_SIZE);
+ continue;
+ }
+ p4d = p4d_offset(pgd, pvmw->address);
+ if (!p4d_present(*p4d)) {
+ step_forward(pvmw, P4D_SIZE);
+ continue;
+ }
+ pud = pud_offset(p4d, pvmw->address);
+ if (!pud_present(*pud)) {
+ step_forward(pvmw, PUD_SIZE);
+ continue;
+ }
+
+ pvmw->pmd = pmd_offset(pud, pvmw->address);
+ /*
+ * Make sure the pmd value isn't cached in a register by the
+ * compiler and used as a stale value after we've observed a
+ * subsequent update.
+ */
+ pmde = READ_ONCE(*pvmw->pmd);
+
+ if (pmd_trans_huge(pmde) || is_pmd_migration_entry(pmde)) {
+ pvmw->ptl = pmd_lock(mm, pvmw->pmd);
+ pmde = *pvmw->pmd;
+ if (likely(pmd_trans_huge(pmde))) {
+ if (pvmw->flags & PVMW_MIGRATION)
return not_found(pvmw);
- if (is_migration_entry(pmd_to_swp_entry(*pvmw->pmd))) {
- swp_entry_t entry = pmd_to_swp_entry(*pvmw->pmd);
+ if (pmd_page(pmde) != page)
+ return not_found(pvmw);
+ return true;
+ }
+ if (!pmd_present(pmde)) {
+ swp_entry_t entry;

- if (migration_entry_to_page(entry) != page)
- return not_found(pvmw);
- return true;
- }
+ if (!thp_migration_supported() ||
+ !(pvmw->flags & PVMW_MIGRATION))
+ return not_found(pvmw);
+ entry = pmd_to_swp_entry(pmde);
+ if (!is_migration_entry(entry) ||
+ migration_entry_to_page(entry) != page)
+ return not_found(pvmw);
+ return true;
}
- return not_found(pvmw);
- } else {
/* THP pmd was split under us: handle on pte level */
spin_unlock(pvmw->ptl);
pvmw->ptl = NULL;
+ } else if (!pmd_present(pmde)) {
+ /*
+ * If PVMW_SYNC, take and drop THP pmd lock so that we
+ * cannot return prematurely, while zap_huge_pmd() has
+ * cleared *pmd but not decremented compound_mapcount().
+ */
+ if ((pvmw->flags & PVMW_SYNC) &&
+ PageTransCompound(page)) {
+ spinlock_t *ptl = pmd_lock(mm, pvmw->pmd);
+
+ spin_unlock(ptl);
+ }
+ step_forward(pvmw, PMD_SIZE);
+ continue;
}
- } else if (!pmd_present(pmde)) {
- return false;
- }
- if (!map_pte(pvmw))
- goto next_pte;
- while (1) {
+ if (!map_pte(pvmw))
+ goto next_pte;
+this_pte:
if (check_pte(pvmw))
return true;
next_pte:
- /* Seek to next pte only makes sense for THP */
- if (!PageTransHuge(pvmw->page) || PageHuge(pvmw->page))
- return not_found(pvmw);
do {
pvmw->address += PAGE_SIZE;
- if (pvmw->address >= pvmw->vma->vm_end ||
- pvmw->address >=
- __vma_address(pvmw->page, pvmw->vma) +
- hpage_nr_pages(pvmw->page) * PAGE_SIZE)
+ if (pvmw->address >= end)
return not_found(pvmw);
/* Did we cross page table boundary? */
- if (pvmw->address % PMD_SIZE == 0) {
- pte_unmap(pvmw->pte);
+ if ((pvmw->address & (PMD_SIZE - PAGE_SIZE)) == 0) {
if (pvmw->ptl) {
spin_unlock(pvmw->ptl);
pvmw->ptl = NULL;
}
+ pte_unmap(pvmw->pte);
+ pvmw->pte = NULL;
goto restart;
- } else {
- pvmw->pte++;
+ }
+ pvmw->pte++;
+ if ((pvmw->flags & PVMW_SYNC) && !pvmw->ptl) {
+ pvmw->ptl = pte_lockptr(mm, pvmw->pmd);
+ spin_lock(pvmw->ptl);
}
} while (pte_none(*pvmw->pte));

@@ -242,7 +281,10 @@ next_pte:
pvmw->ptl = pte_lockptr(mm, pvmw->pmd);
spin_lock(pvmw->ptl);
}
- }
+ goto this_pte;
+ } while (pvmw->address < end);
+
+ return false;
}

/**
@@ -261,14 +303,10 @@ int page_mapped_in_vma(struct page *page, struct vm_area_struct *vma)
.vma = vma,
.flags = PVMW_SYNC,
};
- unsigned long start, end;
-
- start = __vma_address(page, vma);
- end = start + PAGE_SIZE * (hpage_nr_pages(page) - 1);

- if (unlikely(end < vma->vm_start || start >= vma->vm_end))
+ pvmw.address = vma_address(page, vma);
+ if (pvmw.address == -EFAULT)
return 0;
- pvmw.address = max(start, vma->vm_start);
if (!page_vma_mapped_walk(&pvmw))
return 0;
page_vma_mapped_walk_done(&pvmw);
diff --git a/mm/pgtable-generic.c b/mm/pgtable-generic.c
index 532c29276fce..49e8a4fbc205 100644
--- a/mm/pgtable-generic.c
+++ b/mm/pgtable-generic.c
@@ -126,8 +126,8 @@ pmd_t pmdp_huge_clear_flush(struct vm_area_struct *vma, unsigned long address,
{
pmd_t pmd;
VM_BUG_ON(address & ~HPAGE_PMD_MASK);
- VM_BUG_ON((pmd_present(*pmdp) && !pmd_trans_huge(*pmdp) &&
- !pmd_devmap(*pmdp)) || !pmd_present(*pmdp));
+ VM_BUG_ON(pmd_present(*pmdp) && !pmd_trans_huge(*pmdp) &&
+ !pmd_devmap(*pmdp));
pmd = pmdp_huge_get_and_clear(vma->vm_mm, address, pmdp);
flush_pmd_tlb_range(vma, address, address + HPAGE_PMD_SIZE);
return pmd;
diff --git a/mm/rmap.c b/mm/rmap.c
index 0c7b2a9400d4..45f2106852e8 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -687,7 +687,6 @@ static bool should_defer_flush(struct mm_struct *mm, enum ttu_flags flags)
*/
unsigned long page_address_in_vma(struct page *page, struct vm_area_struct *vma)
{
- unsigned long address;
if (PageAnon(page)) {
struct anon_vma *page__anon_vma = page_anon_vma(page);
/*
@@ -697,15 +696,13 @@ unsigned long page_address_in_vma(struct page *page, struct vm_area_struct *vma)
if (!vma->anon_vma || !page__anon_vma ||
vma->anon_vma->root != page__anon_vma->root)
return -EFAULT;
- } else if (page->mapping) {
- if (!vma->vm_file || vma->vm_file->f_mapping != page->mapping)
- return -EFAULT;
- } else
+ } else if (!vma->vm_file) {
return -EFAULT;
- address = __vma_address(page, vma);
- if (unlikely(address < vma->vm_start || address >= vma->vm_end))
+ } else if (vma->vm_file->f_mapping != compound_head(page)->mapping) {
return -EFAULT;
- return address;
+ }
+
+ return vma_address(page, vma);
}

pmd_t *mm_find_pmd(struct mm_struct *mm, unsigned long address)
@@ -899,7 +896,7 @@ static bool page_mkclean_one(struct page *page, struct vm_area_struct *vma,
*/
mmu_notifier_range_init(&range, MMU_NOTIFY_PROTECTION_PAGE,
0, vma, vma->vm_mm, address,
- min(vma->vm_end, address + page_size(page)));
+ vma_address_end(page, vma));
mmu_notifier_invalidate_range_start(&range);

while (page_vma_mapped_walk(&pvmw)) {
@@ -1353,6 +1350,15 @@ static bool try_to_unmap_one(struct page *page, struct vm_area_struct *vma,
struct mmu_notifier_range range;
enum ttu_flags flags = (enum ttu_flags)arg;

+ /*
+ * When racing against e.g. zap_pte_range() on another cpu,
+ * in between its ptep_get_and_clear_full() and page_remove_rmap(),
+ * try_to_unmap() may return false when it is about to become true,
+ * if page table locking is skipped: use TTU_SYNC to wait for that.
+ */
+ if (flags & TTU_SYNC)
+ pvmw.flags = PVMW_SYNC;
+
/* munlock has nothing to gain from examining un-locked vmas */
if ((flags & TTU_MUNLOCK) && !(vma->vm_flags & VM_LOCKED))
return true;
@@ -1374,9 +1380,10 @@ static bool try_to_unmap_one(struct page *page, struct vm_area_struct *vma,
* Note that the page can not be free in this function as call of
* try_to_unmap() must hold a reference on the page.
*/
+ range.end = PageKsm(page) ?
+ address + PAGE_SIZE : vma_address_end(page, vma);
mmu_notifier_range_init(&range, MMU_NOTIFY_CLEAR, 0, vma, vma->vm_mm,
- address,
- min(vma->vm_end, address + page_size(page)));
+ address, range.end);
if (PageHuge(page)) {
/*
* If sharing is possible, start and end will be adjusted
@@ -1690,9 +1697,9 @@ static bool invalid_migration_vma(struct vm_area_struct *vma, void *arg)
return is_vma_temporary_stack(vma);
}

-static int page_mapcount_is_zero(struct page *page)
+static int page_not_mapped(struct page *page)
{
- return !total_mapcount(page);
+ return !page_mapped(page);
}

/**
@@ -1710,7 +1717,7 @@ bool try_to_unmap(struct page *page, enum ttu_flags flags)
struct rmap_walk_control rwc = {
.rmap_one = try_to_unmap_one,
.arg = (void *)flags,
- .done = page_mapcount_is_zero,
+ .done = page_not_mapped,
.anon_lock = page_lock_anon_vma_read,
};

@@ -1731,14 +1738,15 @@ bool try_to_unmap(struct page *page, enum ttu_flags flags)
else
rmap_walk(page, &rwc);

- return !page_mapcount(page) ? true : false;
+ /*
+ * When racing against e.g. zap_pte_range() on another cpu,
+ * in between its ptep_get_and_clear_full() and page_remove_rmap(),
+ * try_to_unmap() may return false when it is about to become true,
+ * if page table locking is skipped: use TTU_SYNC to wait for that.
+ */
+ return !page_mapcount(page);
}

-static int page_not_mapped(struct page *page)
-{
- return !page_mapped(page);
-};
-
/**
* try_to_munlock - try to munlock a page
* @page: the page to be munlocked
@@ -1833,6 +1841,7 @@ static void rmap_walk_anon(struct page *page, struct rmap_walk_control *rwc,
struct vm_area_struct *vma = avc->vma;
unsigned long address = vma_address(page, vma);

+ VM_BUG_ON_VMA(address == -EFAULT, vma);
cond_resched();

if (rwc->invalid_vma && rwc->invalid_vma(vma, rwc->arg))
@@ -1887,6 +1896,7 @@ static void rmap_walk_file(struct page *page, struct rmap_walk_control *rwc,
pgoff_start, pgoff_end) {
unsigned long address = vma_address(page, vma);

+ VM_BUG_ON_VMA(address == -EFAULT, vma);
cond_resched();

if (rwc->invalid_vma && rwc->invalid_vma(vma, rwc->arg))
diff --git a/mm/truncate.c b/mm/truncate.c
index dd9ebc1da356..4d5add7d8ab6 100644
--- a/mm/truncate.c
+++ b/mm/truncate.c
@@ -173,13 +173,10 @@ void do_invalidatepage(struct page *page, unsigned int offset,
* its lock, b) when a concurrent invalidate_mapping_pages got there first and
* c) when tmpfs swizzles a page between a tmpfs inode and swapper_space.
*/
-static void
-truncate_cleanup_page(struct address_space *mapping, struct page *page)
+static void truncate_cleanup_page(struct page *page)
{
- if (page_mapped(page)) {
- pgoff_t nr = PageTransHuge(page) ? HPAGE_PMD_NR : 1;
- unmap_mapping_pages(mapping, page->index, nr, false);
- }
+ if (page_mapped(page))
+ unmap_mapping_page(page);

if (page_has_private(page))
do_invalidatepage(page, 0, PAGE_SIZE);
@@ -224,7 +221,7 @@ int truncate_inode_page(struct address_space *mapping, struct page *page)
if (page->mapping != mapping)
return -EIO;

- truncate_cleanup_page(mapping, page);
+ truncate_cleanup_page(page);
delete_from_page_cache(page);
return 0;
}
@@ -362,7 +359,7 @@ void truncate_inode_pages_range(struct address_space *mapping,
pagevec_add(&locked_pvec, page);
}
for (i = 0; i < pagevec_count(&locked_pvec); i++)
- truncate_cleanup_page(mapping, locked_pvec.pages[i]);
+ truncate_cleanup_page(locked_pvec.pages[i]);
delete_from_page_cache_batch(mapping, &locked_pvec);
for (i = 0; i < pagevec_count(&locked_pvec); i++)
unlock_page(locked_pvec.pages[i]);
@@ -715,6 +712,16 @@ int invalidate_inode_pages2_range(struct address_space *mapping,
continue;
}

+ if (!did_range_unmap && page_mapped(page)) {
+ /*
+ * If page is mapped, before taking its lock,
+ * zap the rest of the file in one hit.
+ */
+ unmap_mapping_pages(mapping, index,
+ (1 + end - index), false);
+ did_range_unmap = 1;
+ }
+
lock_page(page);
WARN_ON(page_to_index(page) != index);
if (page->mapping != mapping) {
@@ -722,23 +729,11 @@ int invalidate_inode_pages2_range(struct address_space *mapping,
continue;
}
wait_on_page_writeback(page);
- if (page_mapped(page)) {
- if (!did_range_unmap) {
- /*
- * Zap the rest of the file in one hit.
- */
- unmap_mapping_pages(mapping, index,
- (1 + end - index), false);
- did_range_unmap = 1;
- } else {
- /*
- * Just zap this page
- */
- unmap_mapping_pages(mapping, index,
- 1, false);
- }
- }
+
+ if (page_mapped(page))
+ unmap_mapping_page(page);
BUG_ON(page_mapped(page));
+
ret2 = do_launder_page(mapping, page);
if (ret2 == 0) {
if (!invalidate_complete_page2(mapping, page))
diff --git a/net/core/ethtool.c b/net/core/ethtool.c
index 76506975d59a..cbd1885f2459 100644
--- a/net/core/ethtool.c
+++ b/net/core/ethtool.c
@@ -1508,7 +1508,7 @@ static int ethtool_get_any_eeprom(struct net_device *dev, void __user *useraddr,
if (eeprom.offset + eeprom.len > total_len)
return -EINVAL;

- data = kmalloc(PAGE_SIZE, GFP_USER);
+ data = kzalloc(PAGE_SIZE, GFP_USER);
if (!data)
return -ENOMEM;

@@ -1573,7 +1573,7 @@ static int ethtool_set_eeprom(struct net_device *dev, void __user *useraddr)
if (eeprom.offset + eeprom.len > ops->get_eeprom_len(dev))
return -EINVAL;

- data = kmalloc(PAGE_SIZE, GFP_USER);
+ data = kzalloc(PAGE_SIZE, GFP_USER);
if (!data)
return -ENOMEM;

@@ -1764,7 +1764,7 @@ static int ethtool_self_test(struct net_device *dev, char __user *useraddr)
return -EFAULT;

test.len = test_len;
- data = kmalloc_array(test_len, sizeof(u64), GFP_USER);
+ data = kcalloc(test_len, sizeof(u64), GFP_USER);
if (!data)
return -ENOMEM;

@@ -2295,7 +2295,7 @@ static int ethtool_get_tunable(struct net_device *dev, void __user *useraddr)
ret = ethtool_tunable_valid(&tuna);
if (ret)
return ret;
- data = kmalloc(tuna.len, GFP_USER);
+ data = kzalloc(tuna.len, GFP_USER);
if (!data)
return -ENOMEM;
ret = ops->get_tunable(dev, &tuna, data);
@@ -2481,7 +2481,7 @@ static int get_phy_tunable(struct net_device *dev, void __user *useraddr)
ret = ethtool_phy_tunable_valid(&tuna);
if (ret)
return ret;
- data = kmalloc(tuna.len, GFP_USER);
+ data = kzalloc(tuna.len, GFP_USER);
if (!data)
return -ENOMEM;
mutex_lock(&phydev->lock);
diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
index a27d034c85cc..603a3495afa6 100644
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@ -1989,7 +1989,7 @@ static int inet_set_link_af(struct net_device *dev, const struct nlattr *nla)
return -EAFNOSUPPORT;

if (nla_parse_nested_deprecated(tb, IFLA_INET_MAX, nla, NULL, NULL) < 0)
- BUG();
+ return -EINVAL;

if (tb[IFLA_INET_CONF]) {
nla_for_each_nested(a, tb[IFLA_INET_CONF], rem)
diff --git a/net/ipv4/ping.c b/net/ipv4/ping.c
index df6fbefe44d4..1c3d5d3702a1 100644
--- a/net/ipv4/ping.c
+++ b/net/ipv4/ping.c
@@ -963,6 +963,7 @@ bool ping_rcv(struct sk_buff *skb)
struct sock *sk;
struct net *net = dev_net(skb->dev);
struct icmphdr *icmph = icmp_hdr(skb);
+ bool rc = false;

/* We assume the packet has already been checked by icmp_rcv */

@@ -977,14 +978,15 @@ bool ping_rcv(struct sk_buff *skb)
struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);

pr_debug("rcv on socket %p\n", sk);
- if (skb2)
- ping_queue_rcv_skb(sk, skb2);
+ if (skb2 && !ping_queue_rcv_skb(sk, skb2))
+ rc = true;
sock_put(sk);
- return true;
}
- pr_debug("no socket, dropping\n");

- return false;
+ if (!rc)
+ pr_debug("no socket, dropping\n");
+
+ return rc;
}
EXPORT_SYMBOL_GPL(ping_rcv);

diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 52feab2baeee..366c3792b860 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -5761,7 +5761,7 @@ static int inet6_set_link_af(struct net_device *dev, const struct nlattr *nla)
return -EAFNOSUPPORT;

if (nla_parse_nested_deprecated(tb, IFLA_INET6_MAX, nla, NULL, NULL) < 0)
- BUG();
+ return -EINVAL;

if (tb[IFLA_INET6_TOKEN]) {
err = inet6_set_iftoken(idev, nla_data(tb[IFLA_INET6_TOKEN]));
diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
index a7933279a80b..e574fbf6745a 100644
--- a/net/mac80211/ieee80211_i.h
+++ b/net/mac80211/ieee80211_i.h
@@ -1420,7 +1420,7 @@ ieee80211_get_sband(struct ieee80211_sub_if_data *sdata)
rcu_read_lock();
chanctx_conf = rcu_dereference(sdata->vif.chanctx_conf);

- if (WARN_ON_ONCE(!chanctx_conf)) {
+ if (!chanctx_conf) {
rcu_read_unlock();
return NULL;
}
diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c
index 3d7a5c5e586a..670d84e54db7 100644
--- a/net/mac80211/rx.c
+++ b/net/mac80211/rx.c
@@ -2200,17 +2200,15 @@ ieee80211_rx_h_defragment(struct ieee80211_rx_data *rx)
sc = le16_to_cpu(hdr->seq_ctrl);
frag = sc & IEEE80211_SCTL_FRAG;

- if (is_multicast_ether_addr(hdr->addr1)) {
- I802_DEBUG_INC(rx->local->dot11MulticastReceivedFrameCount);
- goto out_no_led;
- }
-
if (rx->sta)
cache = &rx->sta->frags;

if (likely(!ieee80211_has_morefrags(fc) && frag == 0))
goto out;

+ if (is_multicast_ether_addr(hdr->addr1))
+ return RX_DROP_MONITOR;
+
I802_DEBUG_INC(rx->local->rx_handlers_fragments);

if (skb_linearize(rx->skb))
@@ -2336,7 +2334,6 @@ ieee80211_rx_h_defragment(struct ieee80211_rx_data *rx)

out:
ieee80211_led_rx(rx->local);
- out_no_led:
if (rx->sta)
rx->sta->rx_stats.packets++;
return RX_CONTINUE;
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index fbc2d4dfddf0..0ffbf3d17911 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -2656,7 +2656,7 @@ static int tpacket_snd(struct packet_sock *po, struct msghdr *msg)
}
if (likely(saddr == NULL)) {
dev = packet_cached_dev_get(po);
- proto = po->num;
+ proto = READ_ONCE(po->num);
} else {
err = -EINVAL;
if (msg->msg_namelen < sizeof(struct sockaddr_ll))
@@ -2869,7 +2869,7 @@ static int packet_snd(struct socket *sock, struct msghdr *msg, size_t len)

if (likely(saddr == NULL)) {
dev = packet_cached_dev_get(po);
- proto = po->num;
+ proto = READ_ONCE(po->num);
} else {
err = -EINVAL;
if (msg->msg_namelen < sizeof(struct sockaddr_ll))
@@ -3141,7 +3141,7 @@ static int packet_do_bind(struct sock *sk, const char *name, int ifindex,
/* prevents packet_notifier() from calling
* register_prot_hook()
*/
- po->num = 0;
+ WRITE_ONCE(po->num, 0);
__unregister_prot_hook(sk, true);
rcu_read_lock();
dev_curr = po->prot_hook.dev;
@@ -3151,17 +3151,17 @@ static int packet_do_bind(struct sock *sk, const char *name, int ifindex,
}

BUG_ON(po->running);
- po->num = proto;
+ WRITE_ONCE(po->num, proto);
po->prot_hook.type = proto;

if (unlikely(unlisted)) {
dev_put(dev);
po->prot_hook.dev = NULL;
- po->ifindex = -1;
+ WRITE_ONCE(po->ifindex, -1);
packet_cached_dev_reset(po);
} else {
po->prot_hook.dev = dev;
- po->ifindex = dev ? dev->ifindex : 0;
+ WRITE_ONCE(po->ifindex, dev ? dev->ifindex : 0);
packet_cached_dev_assign(po, dev);
}
}
@@ -3475,7 +3475,7 @@ static int packet_getname_spkt(struct socket *sock, struct sockaddr *uaddr,
uaddr->sa_family = AF_PACKET;
memset(uaddr->sa_data, 0, sizeof(uaddr->sa_data));
rcu_read_lock();
- dev = dev_get_by_index_rcu(sock_net(sk), pkt_sk(sk)->ifindex);
+ dev = dev_get_by_index_rcu(sock_net(sk), READ_ONCE(pkt_sk(sk)->ifindex));
if (dev)
strlcpy(uaddr->sa_data, dev->name, sizeof(uaddr->sa_data));
rcu_read_unlock();
@@ -3490,16 +3490,18 @@ static int packet_getname(struct socket *sock, struct sockaddr *uaddr,
struct sock *sk = sock->sk;
struct packet_sock *po = pkt_sk(sk);
DECLARE_SOCKADDR(struct sockaddr_ll *, sll, uaddr);
+ int ifindex;

if (peer)
return -EOPNOTSUPP;

+ ifindex = READ_ONCE(po->ifindex);
sll->sll_family = AF_PACKET;
- sll->sll_ifindex = po->ifindex;
- sll->sll_protocol = po->num;
+ sll->sll_ifindex = ifindex;
+ sll->sll_protocol = READ_ONCE(po->num);
sll->sll_pkttype = 0;
rcu_read_lock();
- dev = dev_get_by_index_rcu(sock_net(sk), po->ifindex);
+ dev = dev_get_by_index_rcu(sock_net(sk), ifindex);
if (dev) {
sll->sll_hatype = dev->type;
sll->sll_halen = dev->addr_len;
@@ -4099,7 +4101,7 @@ static int packet_notifier(struct notifier_block *this,
}
if (msg == NETDEV_UNREGISTER) {
packet_cached_dev_reset(po);
- po->ifindex = -1;
+ WRITE_ONCE(po->ifindex, -1);
if (po->prot_hook.dev)
dev_put(po->prot_hook.dev);
po->prot_hook.dev = NULL;
@@ -4405,7 +4407,7 @@ static int packet_set_ring(struct sock *sk, union tpacket_req_u *req_u,
was_running = po->running;
num = po->num;
if (was_running) {
- po->num = 0;
+ WRITE_ONCE(po->num, 0);
__unregister_prot_hook(sk, false);
}
spin_unlock(&po->bind_lock);
@@ -4440,7 +4442,7 @@ static int packet_set_ring(struct sock *sk, union tpacket_req_u *req_u,

spin_lock(&po->bind_lock);
if (was_running) {
- po->num = num;
+ WRITE_ONCE(po->num, num);
register_prot_hook(sk);
}
spin_unlock(&po->bind_lock);
@@ -4613,8 +4615,8 @@ static int packet_seq_show(struct seq_file *seq, void *v)
s,
refcount_read(&s->sk_refcnt),
s->sk_type,
- ntohs(po->num),
- po->ifindex,
+ ntohs(READ_ONCE(po->num)),
+ READ_ONCE(po->ifindex),
po->running,
atomic_read(&s->sk_rmem_alloc),
from_kuid_munged(seq_user_ns(seq), sock_i_uid(s)),
diff --git a/net/wireless/util.c b/net/wireless/util.c
index 4eae6ad32851..f0247eab5bc9 100644
--- a/net/wireless/util.c
+++ b/net/wireless/util.c
@@ -1006,6 +1006,9 @@ int cfg80211_change_iface(struct cfg80211_registered_device *rdev,
case NL80211_IFTYPE_MESH_POINT:
/* mesh should be handled? */
break;
+ case NL80211_IFTYPE_OCB:
+ cfg80211_leave_ocb(rdev, dev);
+ break;
default:
break;
}
diff --git a/scripts/recordmcount.h b/scripts/recordmcount.h
index f9b19524da11..1e9baa5c4fc6 100644
--- a/scripts/recordmcount.h
+++ b/scripts/recordmcount.h
@@ -192,15 +192,20 @@ static unsigned int get_symindex(Elf_Sym const *sym, Elf32_Word const *symtab,
Elf32_Word const *symtab_shndx)
{
unsigned long offset;
+ unsigned short shndx = w2(sym->st_shndx);
int index;

- if (sym->st_shndx != SHN_XINDEX)
- return w2(sym->st_shndx);
+ if (shndx > SHN_UNDEF && shndx < SHN_LORESERVE)
+ return shndx;

- offset = (unsigned long)sym - (unsigned long)symtab;
- index = offset / sizeof(*sym);
+ if (shndx == SHN_XINDEX) {
+ offset = (unsigned long)sym - (unsigned long)symtab;
+ index = offset / sizeof(*sym);

- return w(symtab_shndx[index]);
+ return w(symtab_shndx[index]);
+ }
+
+ return 0;
}

static unsigned int get_shnum(Elf_Ehdr const *ehdr, Elf_Shdr const *shdr0)
diff --git a/security/integrity/Makefile b/security/integrity/Makefile
index 35e6ca773734..351c9662994b 100644
--- a/security/integrity/Makefile
+++ b/security/integrity/Makefile
@@ -11,7 +11,8 @@ integrity-$(CONFIG_INTEGRITY_SIGNATURE) += digsig.o
integrity-$(CONFIG_INTEGRITY_ASYMMETRIC_KEYS) += digsig_asymmetric.o
integrity-$(CONFIG_INTEGRITY_PLATFORM_KEYRING) += platform_certs/platform_keyring.o
integrity-$(CONFIG_LOAD_UEFI_KEYS) += platform_certs/efi_parser.o \
- platform_certs/load_uefi.o
+ platform_certs/load_uefi.o \
+ platform_certs/keyring_handler.o
integrity-$(CONFIG_LOAD_IPL_KEYS) += platform_certs/load_ipl_s390.o

obj-$(CONFIG_IMA) += ima/
diff --git a/security/integrity/platform_certs/keyring_handler.c b/security/integrity/platform_certs/keyring_handler.c
new file mode 100644
index 000000000000..5604bd57c990
--- /dev/null
+++ b/security/integrity/platform_certs/keyring_handler.c
@@ -0,0 +1,91 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/cred.h>
+#include <linux/err.h>
+#include <linux/efi.h>
+#include <linux/slab.h>
+#include <keys/asymmetric-type.h>
+#include <keys/system_keyring.h>
+#include "../integrity.h"
+
+static efi_guid_t efi_cert_x509_guid __initdata = EFI_CERT_X509_GUID;
+static efi_guid_t efi_cert_x509_sha256_guid __initdata =
+ EFI_CERT_X509_SHA256_GUID;
+static efi_guid_t efi_cert_sha256_guid __initdata = EFI_CERT_SHA256_GUID;
+
+/*
+ * Blacklist a hash.
+ */
+static __init void uefi_blacklist_hash(const char *source, const void *data,
+ size_t len, const char *type,
+ size_t type_len)
+{
+ char *hash, *p;
+
+ hash = kmalloc(type_len + len * 2 + 1, GFP_KERNEL);
+ if (!hash)
+ return;
+ p = memcpy(hash, type, type_len);
+ p += type_len;
+ bin2hex(p, data, len);
+ p += len * 2;
+ *p = 0;
+
+ mark_hash_blacklisted(hash);
+ kfree(hash);
+}
+
+/*
+ * Blacklist an X509 TBS hash.
+ */
+static __init void uefi_blacklist_x509_tbs(const char *source,
+ const void *data, size_t len)
+{
+ uefi_blacklist_hash(source, data, len, "tbs:", 4);
+}
+
+/*
+ * Blacklist the hash of an executable.
+ */
+static __init void uefi_blacklist_binary(const char *source,
+ const void *data, size_t len)
+{
+ uefi_blacklist_hash(source, data, len, "bin:", 4);
+}
+
+/*
+ * Add an X509 cert to the revocation list.
+ */
+static __init void uefi_revocation_list_x509(const char *source,
+ const void *data, size_t len)
+{
+ add_key_to_revocation_list(data, len);
+}
+
+/*
+ * Return the appropriate handler for particular signature list types found in
+ * the UEFI db and MokListRT tables.
+ */
+__init efi_element_handler_t get_handler_for_db(const efi_guid_t *sig_type)
+{
+ if (efi_guidcmp(*sig_type, efi_cert_x509_guid) == 0)
+ return add_to_platform_keyring;
+ return 0;
+}
+
+/*
+ * Return the appropriate handler for particular signature list types found in
+ * the UEFI dbx and MokListXRT tables.
+ */
+__init efi_element_handler_t get_handler_for_dbx(const efi_guid_t *sig_type)
+{
+ if (efi_guidcmp(*sig_type, efi_cert_x509_sha256_guid) == 0)
+ return uefi_blacklist_x509_tbs;
+ if (efi_guidcmp(*sig_type, efi_cert_sha256_guid) == 0)
+ return uefi_blacklist_binary;
+ if (efi_guidcmp(*sig_type, efi_cert_x509_guid) == 0)
+ return uefi_revocation_list_x509;
+ return 0;
+}
diff --git a/security/integrity/platform_certs/keyring_handler.h b/security/integrity/platform_certs/keyring_handler.h
new file mode 100644
index 000000000000..2462bfa08fe3
--- /dev/null
+++ b/security/integrity/platform_certs/keyring_handler.h
@@ -0,0 +1,32 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#ifndef PLATFORM_CERTS_INTERNAL_H
+#define PLATFORM_CERTS_INTERNAL_H
+
+#include <linux/efi.h>
+
+void blacklist_hash(const char *source, const void *data,
+ size_t len, const char *type,
+ size_t type_len);
+
+/*
+ * Blacklist an X509 TBS hash.
+ */
+void blacklist_x509_tbs(const char *source, const void *data, size_t len);
+
+/*
+ * Blacklist the hash of an executable.
+ */
+void blacklist_binary(const char *source, const void *data, size_t len);
+
+/*
+ * Return the handler for particular signature list types found in the db.
+ */
+efi_element_handler_t get_handler_for_db(const efi_guid_t *sig_type);
+
+/*
+ * Return the handler for particular signature list types found in the dbx.
+ */
+efi_element_handler_t get_handler_for_dbx(const efi_guid_t *sig_type);
+
+#endif
diff --git a/security/integrity/platform_certs/load_uefi.c b/security/integrity/platform_certs/load_uefi.c
index 020fc7a11ef0..aa874d84e413 100644
--- a/security/integrity/platform_certs/load_uefi.c
+++ b/security/integrity/platform_certs/load_uefi.c
@@ -9,6 +9,7 @@
#include <keys/asymmetric-type.h>
#include <keys/system_keyring.h>
#include "../integrity.h"
+#include "keyring_handler.h"

static efi_guid_t efi_cert_x509_guid __initdata = EFI_CERT_X509_GUID;
static efi_guid_t efi_cert_x509_sha256_guid __initdata =
@@ -69,72 +70,6 @@ static __init void *get_cert_list(efi_char16_t *name, efi_guid_t *guid,
return db;
}

-/*
- * Blacklist a hash.
- */
-static __init void uefi_blacklist_hash(const char *source, const void *data,
- size_t len, const char *type,
- size_t type_len)
-{
- char *hash, *p;
-
- hash = kmalloc(type_len + len * 2 + 1, GFP_KERNEL);
- if (!hash)
- return;
- p = memcpy(hash, type, type_len);
- p += type_len;
- bin2hex(p, data, len);
- p += len * 2;
- *p = 0;
-
- mark_hash_blacklisted(hash);
- kfree(hash);
-}
-
-/*
- * Blacklist an X509 TBS hash.
- */
-static __init void uefi_blacklist_x509_tbs(const char *source,
- const void *data, size_t len)
-{
- uefi_blacklist_hash(source, data, len, "tbs:", 4);
-}
-
-/*
- * Blacklist the hash of an executable.
- */
-static __init void uefi_blacklist_binary(const char *source,
- const void *data, size_t len)
-{
- uefi_blacklist_hash(source, data, len, "bin:", 4);
-}
-
-/*
- * Return the appropriate handler for particular signature list types found in
- * the UEFI db and MokListRT tables.
- */
-static __init efi_element_handler_t get_handler_for_db(const efi_guid_t *
- sig_type)
-{
- if (efi_guidcmp(*sig_type, efi_cert_x509_guid) == 0)
- return add_to_platform_keyring;
- return 0;
-}
-
-/*
- * Return the appropriate handler for particular signature list types found in
- * the UEFI dbx and MokListXRT tables.
- */
-static __init efi_element_handler_t get_handler_for_dbx(const efi_guid_t *
- sig_type)
-{
- if (efi_guidcmp(*sig_type, efi_cert_x509_sha256_guid) == 0)
- return uefi_blacklist_x509_tbs;
- if (efi_guidcmp(*sig_type, efi_cert_sha256_guid) == 0)
- return uefi_blacklist_binary;
- return 0;
-}
-
/*
* Load the certs contained in the UEFI databases into the platform trusted
* keyring and the UEFI blacklisted X.509 cert SHA256 hashes into the blacklist
diff --git a/tools/testing/selftests/kvm/lib/kvm_util.c b/tools/testing/selftests/kvm/lib/kvm_util.c
index 41cf45416060..38de88e5ffbb 100644
--- a/tools/testing/selftests/kvm/lib/kvm_util.c
+++ b/tools/testing/selftests/kvm/lib/kvm_util.c
@@ -54,7 +54,7 @@ int kvm_check_cap(long cap)
exit(KSFT_SKIP);

ret = ioctl(kvm_fd, KVM_CHECK_EXTENSION, cap);
- TEST_ASSERT(ret != -1, "KVM_CHECK_EXTENSION IOCTL failed,\n"
+ TEST_ASSERT(ret >= 0, "KVM_CHECK_EXTENSION IOCTL failed,\n"
" rc: %i errno: %i", ret, errno);

close(kvm_fd);
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index f83fa0aeeb45..b2287e7d3ba4 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -1593,6 +1593,13 @@ static bool vma_is_valid(struct vm_area_struct *vma, bool write_fault)
return true;
}

+static int kvm_try_get_pfn(kvm_pfn_t pfn)
+{
+ if (kvm_is_reserved_pfn(pfn))
+ return 1;
+ return get_page_unless_zero(pfn_to_page(pfn));
+}
+
static int hva_to_pfn_remapped(struct vm_area_struct *vma,
unsigned long addr, bool *async,
bool write_fault, bool *writable,
@@ -1642,13 +1649,21 @@ static int hva_to_pfn_remapped(struct vm_area_struct *vma,
* Whoever called remap_pfn_range is also going to call e.g.
* unmap_mapping_range before the underlying pages are freed,
* causing a call to our MMU notifier.
+ *
+ * Certain IO or PFNMAP mappings can be backed with valid
+ * struct pages, but be allocated without refcounting e.g.,
+ * tail pages of non-compound higher order allocations, which
+ * would then underflow the refcount when the caller does the
+ * required put_page. Don't allow those pages here.
*/
- kvm_get_pfn(pfn);
+ if (!kvm_try_get_pfn(pfn))
+ r = -EFAULT;

out:
pte_unmap_unlock(ptep, ptl);
*p_pfn = pfn;
- return 0;
+
+ return r;
}

/*