Re: Linux 6.1.81

From: Greg Kroah-Hartman
Date: Wed Mar 06 2024 - 10:09:03 EST


diff --git a/Documentation/x86/boot.rst b/Documentation/x86/boot.rst
index 894a19897005..bac3789f3e8f 100644
--- a/Documentation/x86/boot.rst
+++ b/Documentation/x86/boot.rst
@@ -1416,7 +1416,7 @@ execution context provided by the EFI firmware.

The function prototype for the handover entry point looks like this::

- efi_main(void *handle, efi_system_table_t *table, struct boot_params *bp)
+ efi_stub_entry(void *handle, efi_system_table_t *table, struct boot_params *bp)

'handle' is the EFI image handle passed to the boot loader by the EFI
firmware, 'table' is the EFI system table - these are the first two
diff --git a/Documentation/x86/mds.rst b/Documentation/x86/mds.rst
index 5d4330be200f..e801df0bb3a8 100644
--- a/Documentation/x86/mds.rst
+++ b/Documentation/x86/mds.rst
@@ -95,6 +95,9 @@ The kernel provides a function to invoke the buffer clearing:

mds_clear_cpu_buffers()

+Also macro CLEAR_CPU_BUFFERS can be used in ASM late in exit-to-user path.
+Other than CFLAGS.ZF, this macro doesn't clobber any registers.
+
The mitigation is invoked on kernel/userspace, hypervisor/guest and C-state
(idle) transitions.

@@ -138,17 +141,30 @@ Mitigation points

When transitioning from kernel to user space the CPU buffers are flushed
on affected CPUs when the mitigation is not disabled on the kernel
- command line. The migitation is enabled through the static key
- mds_user_clear.
-
- The mitigation is invoked in prepare_exit_to_usermode() which covers
- all but one of the kernel to user space transitions. The exception
- is when we return from a Non Maskable Interrupt (NMI), which is
- handled directly in do_nmi().
-
- (The reason that NMI is special is that prepare_exit_to_usermode() can
- enable IRQs. In NMI context, NMIs are blocked, and we don't want to
- enable IRQs with NMIs blocked.)
+ command line. The mitigation is enabled through the feature flag
+ X86_FEATURE_CLEAR_CPU_BUF.
+
+ The mitigation is invoked just before transitioning to userspace after
+ user registers are restored. This is done to minimize the window in
+ which kernel data could be accessed after VERW e.g. via an NMI after
+ VERW.
+
+ **Corner case not handled**
+ Interrupts returning to kernel don't clear CPUs buffers since the
+ exit-to-user path is expected to do that anyways. But, there could be
+ a case when an NMI is generated in kernel after the exit-to-user path
+ has cleared the buffers. This case is not handled and NMI returning to
+ kernel don't clear CPU buffers because:
+
+ 1. It is rare to get an NMI after VERW, but before returning to userspace.
+ 2. For an unprivileged user, there is no known way to make that NMI
+ less rare or target it.
+ 3. It would take a large number of these precisely-timed NMIs to mount
+ an actual attack. There's presumably not enough bandwidth.
+ 4. The NMI in question occurs after a VERW, i.e. when user state is
+ restored and most interesting data is already scrubbed. Whats left
+ is only the data that NMI touches, and that may or may not be of
+ any interest.


2. C-State transition
diff --git a/MAINTAINERS b/MAINTAINERS
index 13d1078808bb..bbfedb0b2093 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -10051,6 +10051,7 @@ F: drivers/infiniband/
F: include/rdma/
F: include/trace/events/ib_mad.h
F: include/trace/events/ib_umad.h
+F: include/trace/misc/rdma.h
F: include/uapi/linux/if_infiniband.h
F: include/uapi/rdma/
F: samples/bpf/ibumad_kern.c
@@ -11139,6 +11140,12 @@ F: fs/nfs_common/
F: fs/nfsd/
F: include/linux/lockd/
F: include/linux/sunrpc/
+F: include/trace/events/rpcgss.h
+F: include/trace/events/rpcrdma.h
+F: include/trace/events/sunrpc.h
+F: include/trace/misc/fs.h
+F: include/trace/misc/nfs.h
+F: include/trace/misc/sunrpc.h
F: include/uapi/linux/nfsd/
F: include/uapi/linux/sunrpc/
F: net/sunrpc/
diff --git a/Makefile b/Makefile
index bc4adb561a7c..e13df565a1cb 100644
--- a/Makefile
+++ b/Makefile
@@ -1,7 +1,7 @@
# SPDX-License-Identifier: GPL-2.0
VERSION = 6
PATCHLEVEL = 1
-SUBLEVEL = 80
+SUBLEVEL = 81
EXTRAVERSION =
NAME = Curry Ramen

diff --git a/arch/arm/boot/dts/imx23.dtsi b/arch/arm/boot/dts/imx23.dtsi
index ec476b159649..b236d23f8071 100644
--- a/arch/arm/boot/dts/imx23.dtsi
+++ b/arch/arm/boot/dts/imx23.dtsi
@@ -59,7 +59,7 @@ icoll: interrupt-controller@80000000 {
reg = <0x80000000 0x2000>;
};

- dma_apbh: dma-apbh@80004000 {
+ dma_apbh: dma-controller@80004000 {
compatible = "fsl,imx23-dma-apbh";
reg = <0x80004000 0x2000>;
interrupts = <0 14 20 0
diff --git a/arch/arm/boot/dts/imx28.dtsi b/arch/arm/boot/dts/imx28.dtsi
index b15df16ecb01..b81592a61311 100644
--- a/arch/arm/boot/dts/imx28.dtsi
+++ b/arch/arm/boot/dts/imx28.dtsi
@@ -78,7 +78,7 @@ hsadc: hsadc@80002000 {
status = "disabled";
};

- dma_apbh: dma-apbh@80004000 {
+ dma_apbh: dma-controller@80004000 {
compatible = "fsl,imx28-dma-apbh";
reg = <0x80004000 0x2000>;
interrupts = <82 83 84 85
diff --git a/arch/arm/boot/dts/imx6qdl.dtsi b/arch/arm/boot/dts/imx6qdl.dtsi
index ff1e0173b39b..2c6eada01d79 100644
--- a/arch/arm/boot/dts/imx6qdl.dtsi
+++ b/arch/arm/boot/dts/imx6qdl.dtsi
@@ -150,7 +150,7 @@ soc: soc {
interrupt-parent = <&gpc>;
ranges;

- dma_apbh: dma-apbh@110000 {
+ dma_apbh: dma-controller@110000 {
compatible = "fsl,imx6q-dma-apbh", "fsl,imx28-dma-apbh";
reg = <0x00110000 0x2000>;
interrupts = <0 13 IRQ_TYPE_LEVEL_HIGH>,
diff --git a/arch/arm/boot/dts/imx6sx.dtsi b/arch/arm/boot/dts/imx6sx.dtsi
index 1f1053a898fb..67d344ae76b5 100644
--- a/arch/arm/boot/dts/imx6sx.dtsi
+++ b/arch/arm/boot/dts/imx6sx.dtsi
@@ -209,7 +209,7 @@ gpu: gpu@1800000 {
power-domains = <&pd_pu>;
};

- dma_apbh: dma-apbh@1804000 {
+ dma_apbh: dma-controller@1804000 {
compatible = "fsl,imx6sx-dma-apbh", "fsl,imx28-dma-apbh";
reg = <0x01804000 0x2000>;
interrupts = <GIC_SPI 13 IRQ_TYPE_LEVEL_HIGH>,
diff --git a/arch/arm/boot/dts/imx6ul.dtsi b/arch/arm/boot/dts/imx6ul.dtsi
index 2b5996395701..aac081b6daaa 100644
--- a/arch/arm/boot/dts/imx6ul.dtsi
+++ b/arch/arm/boot/dts/imx6ul.dtsi
@@ -164,7 +164,7 @@ intc: interrupt-controller@a01000 {
<0x00a06000 0x2000>;
};

- dma_apbh: dma-apbh@1804000 {
+ dma_apbh: dma-controller@1804000 {
compatible = "fsl,imx6q-dma-apbh", "fsl,imx28-dma-apbh";
reg = <0x01804000 0x2000>;
interrupts = <0 13 IRQ_TYPE_LEVEL_HIGH>,
diff --git a/arch/arm/boot/dts/imx7s.dtsi b/arch/arm/boot/dts/imx7s.dtsi
index 4b23630fc738..69aebc691526 100644
--- a/arch/arm/boot/dts/imx7s.dtsi
+++ b/arch/arm/boot/dts/imx7s.dtsi
@@ -1267,14 +1267,13 @@ fec1: ethernet@30be0000 {
};
};

- dma_apbh: dma-apbh@33000000 {
+ dma_apbh: dma-controller@33000000 {
compatible = "fsl,imx7d-dma-apbh", "fsl,imx28-dma-apbh";
reg = <0x33000000 0x2000>;
interrupts = <GIC_SPI 12 IRQ_TYPE_LEVEL_HIGH>,
<GIC_SPI 12 IRQ_TYPE_LEVEL_HIGH>,
<GIC_SPI 12 IRQ_TYPE_LEVEL_HIGH>,
<GIC_SPI 12 IRQ_TYPE_LEVEL_HIGH>;
- interrupt-names = "gpmi0", "gpmi1", "gpmi2", "gpmi3";
#dma-cells = <1>;
dma-channels = <4>;
clocks = <&clks IMX7D_NAND_USDHC_BUS_RAWNAND_CLK>;
diff --git a/arch/arm64/crypto/aes-neonbs-glue.c b/arch/arm64/crypto/aes-neonbs-glue.c
index bac4cabef607..467ac2f768ac 100644
--- a/arch/arm64/crypto/aes-neonbs-glue.c
+++ b/arch/arm64/crypto/aes-neonbs-glue.c
@@ -227,8 +227,19 @@ static int ctr_encrypt(struct skcipher_request *req)
src += blocks * AES_BLOCK_SIZE;
}
if (nbytes && walk.nbytes == walk.total) {
+ u8 buf[AES_BLOCK_SIZE];
+ u8 *d = dst;
+
+ if (unlikely(nbytes < AES_BLOCK_SIZE))
+ src = dst = memcpy(buf + sizeof(buf) - nbytes,
+ src, nbytes);
+
neon_aes_ctr_encrypt(dst, src, ctx->enc, ctx->key.rounds,
nbytes, walk.iv);
+
+ if (unlikely(nbytes < AES_BLOCK_SIZE))
+ memcpy(d, dst, nbytes);
+
nbytes = 0;
}
kernel_neon_end();
diff --git a/arch/arm64/include/asm/efi.h b/arch/arm64/include/asm/efi.h
index 62c846be2d76..a75c0772ecfc 100644
--- a/arch/arm64/include/asm/efi.h
+++ b/arch/arm64/include/asm/efi.h
@@ -103,6 +103,7 @@ static inline void free_screen_info(struct screen_info *si)
}

#define EFI_ALLOC_ALIGN SZ_64K
+#define EFI_ALLOC_LIMIT ((1UL << 48) - 1)

/*
* On ARM systems, virtually remapped UEFI runtime services are set up in two
diff --git a/arch/powerpc/platforms/pseries/iommu.c b/arch/powerpc/platforms/pseries/iommu.c
index 97b026130c71..1e5f083cdb72 100644
--- a/arch/powerpc/platforms/pseries/iommu.c
+++ b/arch/powerpc/platforms/pseries/iommu.c
@@ -569,29 +569,6 @@ static void iommu_table_setparms(struct pci_controller *phb,

struct iommu_table_ops iommu_table_lpar_multi_ops;

-/*
- * iommu_table_setparms_lpar
- *
- * Function: On pSeries LPAR systems, return TCE table info, given a pci bus.
- */
-static void iommu_table_setparms_lpar(struct pci_controller *phb,
- struct device_node *dn,
- struct iommu_table *tbl,
- struct iommu_table_group *table_group,
- const __be32 *dma_window)
-{
- unsigned long offset, size, liobn;
-
- of_parse_dma_window(dn, dma_window, &liobn, &offset, &size);
-
- iommu_table_setparms_common(tbl, phb->bus->number, liobn, offset, size, IOMMU_PAGE_SHIFT_4K, NULL,
- &iommu_table_lpar_multi_ops);
-
-
- table_group->tce32_start = offset;
- table_group->tce32_size = size;
-}
-
struct iommu_table_ops iommu_table_pseries_ops = {
.set = tce_build_pSeries,
.clear = tce_free_pSeries,
@@ -719,26 +696,71 @@ struct iommu_table_ops iommu_table_lpar_multi_ops = {
* dynamic 64bit DMA window, walking up the device tree.
*/
static struct device_node *pci_dma_find(struct device_node *dn,
- const __be32 **dma_window)
+ struct dynamic_dma_window_prop *prop)
{
- const __be32 *dw = NULL;
+ const __be32 *default_prop = NULL;
+ const __be32 *ddw_prop = NULL;
+ struct device_node *rdn = NULL;
+ bool default_win = false, ddw_win = false;

for ( ; dn && PCI_DN(dn); dn = dn->parent) {
- dw = of_get_property(dn, "ibm,dma-window", NULL);
- if (dw) {
- if (dma_window)
- *dma_window = dw;
- return dn;
+ default_prop = of_get_property(dn, "ibm,dma-window", NULL);
+ if (default_prop) {
+ rdn = dn;
+ default_win = true;
+ }
+ ddw_prop = of_get_property(dn, DIRECT64_PROPNAME, NULL);
+ if (ddw_prop) {
+ rdn = dn;
+ ddw_win = true;
+ break;
+ }
+ ddw_prop = of_get_property(dn, DMA64_PROPNAME, NULL);
+ if (ddw_prop) {
+ rdn = dn;
+ ddw_win = true;
+ break;
}
- dw = of_get_property(dn, DIRECT64_PROPNAME, NULL);
- if (dw)
- return dn;
- dw = of_get_property(dn, DMA64_PROPNAME, NULL);
- if (dw)
- return dn;
+
+ /* At least found default window, which is the case for normal boot */
+ if (default_win)
+ break;
}

- return NULL;
+ /* For PCI devices there will always be a DMA window, either on the device
+ * or parent bus
+ */
+ WARN_ON(!(default_win | ddw_win));
+
+ /* caller doesn't want to get DMA window property */
+ if (!prop)
+ return rdn;
+
+ /* parse DMA window property. During normal system boot, only default
+ * DMA window is passed in OF. But, for kdump, a dedicated adapter might
+ * have both default and DDW in FDT. In this scenario, DDW takes precedence
+ * over default window.
+ */
+ if (ddw_win) {
+ struct dynamic_dma_window_prop *p;
+
+ p = (struct dynamic_dma_window_prop *)ddw_prop;
+ prop->liobn = p->liobn;
+ prop->dma_base = p->dma_base;
+ prop->tce_shift = p->tce_shift;
+ prop->window_shift = p->window_shift;
+ } else if (default_win) {
+ unsigned long offset, size, liobn;
+
+ of_parse_dma_window(rdn, default_prop, &liobn, &offset, &size);
+
+ prop->liobn = cpu_to_be32((u32)liobn);
+ prop->dma_base = cpu_to_be64(offset);
+ prop->tce_shift = cpu_to_be32(IOMMU_PAGE_SHIFT_4K);
+ prop->window_shift = cpu_to_be32(order_base_2(size));
+ }
+
+ return rdn;
}

static void pci_dma_bus_setup_pSeriesLP(struct pci_bus *bus)
@@ -746,17 +768,20 @@ static void pci_dma_bus_setup_pSeriesLP(struct pci_bus *bus)
struct iommu_table *tbl;
struct device_node *dn, *pdn;
struct pci_dn *ppci;
- const __be32 *dma_window = NULL;
+ struct dynamic_dma_window_prop prop;

dn = pci_bus_to_OF_node(bus);

pr_debug("pci_dma_bus_setup_pSeriesLP: setting up bus %pOF\n",
dn);

- pdn = pci_dma_find(dn, &dma_window);
+ pdn = pci_dma_find(dn, &prop);

- if (dma_window == NULL)
- pr_debug(" no ibm,dma-window property !\n");
+ /* In PPC architecture, there will always be DMA window on bus or one of the
+ * parent bus. During reboot, there will be ibm,dma-window property to
+ * define DMA window. For kdump, there will at least be default window or DDW
+ * or both.
+ */

ppci = PCI_DN(pdn);

@@ -766,13 +791,24 @@ static void pci_dma_bus_setup_pSeriesLP(struct pci_bus *bus)
if (!ppci->table_group) {
ppci->table_group = iommu_pseries_alloc_group(ppci->phb->node);
tbl = ppci->table_group->tables[0];
- if (dma_window) {
- iommu_table_setparms_lpar(ppci->phb, pdn, tbl,
- ppci->table_group, dma_window);

- if (!iommu_init_table(tbl, ppci->phb->node, 0, 0))
- panic("Failed to initialize iommu table");
- }
+ iommu_table_setparms_common(tbl, ppci->phb->bus->number,
+ be32_to_cpu(prop.liobn),
+ be64_to_cpu(prop.dma_base),
+ 1ULL << be32_to_cpu(prop.window_shift),
+ be32_to_cpu(prop.tce_shift), NULL,
+ &iommu_table_lpar_multi_ops);
+
+ /* Only for normal boot with default window. Doesn't matter even
+ * if we set these with DDW which is 64bit during kdump, since
+ * these will not be used during kdump.
+ */
+ ppci->table_group->tce32_start = be64_to_cpu(prop.dma_base);
+ ppci->table_group->tce32_size = 1 << be32_to_cpu(prop.window_shift);
+
+ if (!iommu_init_table(tbl, ppci->phb->node, 0, 0))
+ panic("Failed to initialize iommu table");
+
iommu_register_group(ppci->table_group,
pci_domain_nr(bus), 0);
pr_debug(" created table: %p\n", ppci->table_group);
@@ -960,6 +996,12 @@ static void find_existing_ddw_windows_named(const char *name)
continue;
}

+ /* If at the time of system initialization, there are DDWs in OF,
+ * it means this is during kexec. DDW could be direct or dynamic.
+ * We will just mark DDWs as "dynamic" since this is kdump path,
+ * no need to worry about perforance. ddw_list_new_entry() will
+ * set window->direct = false.
+ */
window = ddw_list_new_entry(pdn, dma64);
if (!window) {
of_node_put(pdn);
@@ -1525,8 +1567,8 @@ static void pci_dma_dev_setup_pSeriesLP(struct pci_dev *dev)
{
struct device_node *pdn, *dn;
struct iommu_table *tbl;
- const __be32 *dma_window = NULL;
struct pci_dn *pci;
+ struct dynamic_dma_window_prop prop;

pr_debug("pci_dma_dev_setup_pSeriesLP: %s\n", pci_name(dev));

@@ -1539,7 +1581,7 @@ static void pci_dma_dev_setup_pSeriesLP(struct pci_dev *dev)
dn = pci_device_to_OF_node(dev);
pr_debug(" node is %pOF\n", dn);

- pdn = pci_dma_find(dn, &dma_window);
+ pdn = pci_dma_find(dn, &prop);
if (!pdn || !PCI_DN(pdn)) {
printk(KERN_WARNING "pci_dma_dev_setup_pSeriesLP: "
"no DMA window found for pci dev=%s dn=%pOF\n",
@@ -1552,8 +1594,20 @@ static void pci_dma_dev_setup_pSeriesLP(struct pci_dev *dev)
if (!pci->table_group) {
pci->table_group = iommu_pseries_alloc_group(pci->phb->node);
tbl = pci->table_group->tables[0];
- iommu_table_setparms_lpar(pci->phb, pdn, tbl,
- pci->table_group, dma_window);
+
+ iommu_table_setparms_common(tbl, pci->phb->bus->number,
+ be32_to_cpu(prop.liobn),
+ be64_to_cpu(prop.dma_base),
+ 1ULL << be32_to_cpu(prop.window_shift),
+ be32_to_cpu(prop.tce_shift), NULL,
+ &iommu_table_lpar_multi_ops);
+
+ /* Only for normal boot with default window. Doesn't matter even
+ * if we set these with DDW which is 64bit during kdump, since
+ * these will not be used during kdump.
+ */
+ pci->table_group->tce32_start = be64_to_cpu(prop.dma_base);
+ pci->table_group->tce32_size = 1 << be32_to_cpu(prop.window_shift);

iommu_init_table(tbl, pci->phb->node, 0, 0);
iommu_register_group(pci->table_group,
diff --git a/arch/riscv/include/asm/ftrace.h b/arch/riscv/include/asm/ftrace.h
index d47d87c2d7e3..dcf1bc9de584 100644
--- a/arch/riscv/include/asm/ftrace.h
+++ b/arch/riscv/include/asm/ftrace.h
@@ -25,6 +25,11 @@

#define ARCH_SUPPORTS_FTRACE_OPS 1
#ifndef __ASSEMBLY__
+
+extern void *return_address(unsigned int level);
+
+#define ftrace_return_address(n) return_address(n)
+
void MCOUNT_NAME(void);
static inline unsigned long ftrace_call_adjust(unsigned long addr)
{
diff --git a/arch/riscv/include/asm/pgtable.h b/arch/riscv/include/asm/pgtable.h
index 59bb53da473d..63055c6ad2c2 100644
--- a/arch/riscv/include/asm/pgtable.h
+++ b/arch/riscv/include/asm/pgtable.h
@@ -79,7 +79,7 @@
* Define vmemmap for pfn_to_page & page_to_pfn calls. Needed if kernel
* is configured with CONFIG_SPARSEMEM_VMEMMAP enabled.
*/
-#define vmemmap ((struct page *)VMEMMAP_START)
+#define vmemmap ((struct page *)VMEMMAP_START - (phys_ram_base >> PAGE_SHIFT))

#define PCI_IO_SIZE SZ_16M
#define PCI_IO_END VMEMMAP_START
diff --git a/arch/riscv/kernel/Makefile b/arch/riscv/kernel/Makefile
index ab333cb792fd..4c0805d264ca 100644
--- a/arch/riscv/kernel/Makefile
+++ b/arch/riscv/kernel/Makefile
@@ -7,6 +7,7 @@ ifdef CONFIG_FTRACE
CFLAGS_REMOVE_ftrace.o = $(CC_FLAGS_FTRACE)
CFLAGS_REMOVE_patch.o = $(CC_FLAGS_FTRACE)
CFLAGS_REMOVE_sbi.o = $(CC_FLAGS_FTRACE)
+CFLAGS_REMOVE_return_address.o = $(CC_FLAGS_FTRACE)
endif
CFLAGS_syscall_table.o += $(call cc-option,-Wno-override-init,)
CFLAGS_compat_syscall_table.o += $(call cc-option,-Wno-override-init,)
@@ -41,6 +42,7 @@ obj-y += irq.o
obj-y += process.o
obj-y += ptrace.o
obj-y += reset.o
+obj-y += return_address.o
obj-y += setup.o
obj-y += signal.o
obj-y += syscall_table.o
diff --git a/arch/riscv/kernel/return_address.c b/arch/riscv/kernel/return_address.c
new file mode 100644
index 000000000000..c8115ec8fb30
--- /dev/null
+++ b/arch/riscv/kernel/return_address.c
@@ -0,0 +1,48 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * This code come from arch/arm64/kernel/return_address.c
+ *
+ * Copyright (C) 2023 SiFive.
+ */
+
+#include <linux/export.h>
+#include <linux/kprobes.h>
+#include <linux/stacktrace.h>
+
+struct return_address_data {
+ unsigned int level;
+ void *addr;
+};
+
+static bool save_return_addr(void *d, unsigned long pc)
+{
+ struct return_address_data *data = d;
+
+ if (!data->level) {
+ data->addr = (void *)pc;
+ return false;
+ }
+
+ --data->level;
+
+ return true;
+}
+NOKPROBE_SYMBOL(save_return_addr);
+
+noinline void *return_address(unsigned int level)
+{
+ struct return_address_data data;
+
+ data.level = level + 3;
+ data.addr = NULL;
+
+ arch_stack_walk(save_return_addr, &data, current, NULL);
+
+ if (!data.level)
+ return data.addr;
+ else
+ return NULL;
+
+}
+EXPORT_SYMBOL_GPL(return_address);
+NOKPROBE_SYMBOL(return_address);
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 4c9bfc4be58d..2f7af61b49b6 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -1982,6 +1982,23 @@ config EFI_STUB

See Documentation/admin-guide/efi-stub.rst for more information.

+config EFI_HANDOVER_PROTOCOL
+ bool "EFI handover protocol (DEPRECATED)"
+ depends on EFI_STUB
+ default y
+ help
+ Select this in order to include support for the deprecated EFI
+ handover protocol, which defines alternative entry points into the
+ EFI stub. This is a practice that has no basis in the UEFI
+ specification, and requires a priori knowledge on the part of the
+ bootloader about Linux/x86 specific ways of passing the command line
+ and initrd, and where in memory those assets may be loaded.
+
+ If in doubt, say Y. Even though the corresponding support is not
+ present in upstream GRUB or other bootloaders, most distros build
+ GRUB with numerous downstream patches applied, and may rely on the
+ handover protocol as as result.
+
config EFI_MIXED
bool "EFI mixed-mode support"
depends on EFI_STUB && X86_64
diff --git a/arch/x86/boot/compressed/Makefile b/arch/x86/boot/compressed/Makefile
index 15b7b403a4bd..3965b2c9efee 100644
--- a/arch/x86/boot/compressed/Makefile
+++ b/arch/x86/boot/compressed/Makefile
@@ -74,6 +74,11 @@ LDFLAGS_vmlinux += -z noexecstack
ifeq ($(CONFIG_LD_IS_BFD),y)
LDFLAGS_vmlinux += $(call ld-option,--no-warn-rwx-segments)
endif
+ifeq ($(CONFIG_EFI_STUB),y)
+# ensure that the static EFI stub library will be pulled in, even if it is
+# never referenced explicitly from the startup code
+LDFLAGS_vmlinux += -u efi_pe_entry
+endif
LDFLAGS_vmlinux += -T

hostprogs := mkpiggy
@@ -100,7 +105,7 @@ vmlinux-objs-$(CONFIG_RANDOMIZE_BASE) += $(obj)/kaslr.o
ifdef CONFIG_X86_64
vmlinux-objs-y += $(obj)/ident_map_64.o
vmlinux-objs-y += $(obj)/idt_64.o $(obj)/idt_handlers_64.o
- vmlinux-objs-y += $(obj)/mem_encrypt.o
+ vmlinux-objs-$(CONFIG_AMD_MEM_ENCRYPT) += $(obj)/mem_encrypt.o
vmlinux-objs-y += $(obj)/pgtable_64.o
vmlinux-objs-$(CONFIG_AMD_MEM_ENCRYPT) += $(obj)/sev.o
endif
@@ -108,11 +113,11 @@ endif
vmlinux-objs-$(CONFIG_ACPI) += $(obj)/acpi.o
vmlinux-objs-$(CONFIG_INTEL_TDX_GUEST) += $(obj)/tdx.o $(obj)/tdcall.o

-vmlinux-objs-$(CONFIG_EFI_MIXED) += $(obj)/efi_thunk_$(BITS).o
vmlinux-objs-$(CONFIG_EFI) += $(obj)/efi.o
-efi-obj-$(CONFIG_EFI_STUB) = $(objtree)/drivers/firmware/efi/libstub/lib.a
+vmlinux-objs-$(CONFIG_EFI_MIXED) += $(obj)/efi_mixed.o
+vmlinux-objs-$(CONFIG_EFI_STUB) += $(objtree)/drivers/firmware/efi/libstub/lib.a

-$(obj)/vmlinux: $(vmlinux-objs-y) $(efi-obj-y) FORCE
+$(obj)/vmlinux: $(vmlinux-objs-y) FORCE
$(call if_changed,ld)

OBJCOPYFLAGS_vmlinux.bin := -R .comment -S
diff --git a/arch/x86/boot/compressed/acpi.c b/arch/x86/boot/compressed/acpi.c
index 9caf89063e77..55c98fdd67d2 100644
--- a/arch/x86/boot/compressed/acpi.c
+++ b/arch/x86/boot/compressed/acpi.c
@@ -30,13 +30,13 @@ __efi_get_rsdp_addr(unsigned long cfg_tbl_pa, unsigned int cfg_tbl_len)
* Search EFI system tables for RSDP. Preferred is ACPI_20_TABLE_GUID to
* ACPI_TABLE_GUID because it has more features.
*/
- rsdp_addr = efi_find_vendor_table(boot_params, cfg_tbl_pa, cfg_tbl_len,
+ rsdp_addr = efi_find_vendor_table(boot_params_ptr, cfg_tbl_pa, cfg_tbl_len,
ACPI_20_TABLE_GUID);
if (rsdp_addr)
return (acpi_physical_address)rsdp_addr;

/* No ACPI_20_TABLE_GUID found, fallback to ACPI_TABLE_GUID. */
- rsdp_addr = efi_find_vendor_table(boot_params, cfg_tbl_pa, cfg_tbl_len,
+ rsdp_addr = efi_find_vendor_table(boot_params_ptr, cfg_tbl_pa, cfg_tbl_len,
ACPI_TABLE_GUID);
if (rsdp_addr)
return (acpi_physical_address)rsdp_addr;
@@ -56,15 +56,15 @@ static acpi_physical_address efi_get_rsdp_addr(void)
enum efi_type et;
int ret;

- et = efi_get_type(boot_params);
+ et = efi_get_type(boot_params_ptr);
if (et == EFI_TYPE_NONE)
return 0;

- systab_pa = efi_get_system_table(boot_params);
+ systab_pa = efi_get_system_table(boot_params_ptr);
if (!systab_pa)
error("EFI support advertised, but unable to locate system table.");

- ret = efi_get_conf_table(boot_params, &cfg_tbl_pa, &cfg_tbl_len);
+ ret = efi_get_conf_table(boot_params_ptr, &cfg_tbl_pa, &cfg_tbl_len);
if (ret || !cfg_tbl_pa)
error("EFI config table not found.");

@@ -156,7 +156,7 @@ acpi_physical_address get_rsdp_addr(void)
{
acpi_physical_address pa;

- pa = boot_params->acpi_rsdp_addr;
+ pa = boot_params_ptr->acpi_rsdp_addr;

if (!pa)
pa = efi_get_rsdp_addr();
@@ -210,7 +210,7 @@ static unsigned long get_acpi_srat_table(void)
rsdp = (struct acpi_table_rsdp *)get_cmdline_acpi_rsdp();
if (!rsdp)
rsdp = (struct acpi_table_rsdp *)(long)
- boot_params->acpi_rsdp_addr;
+ boot_params_ptr->acpi_rsdp_addr;

if (!rsdp)
return 0;
diff --git a/arch/x86/boot/compressed/cmdline.c b/arch/x86/boot/compressed/cmdline.c
index f1add5d85da9..c1bb180973ea 100644
--- a/arch/x86/boot/compressed/cmdline.c
+++ b/arch/x86/boot/compressed/cmdline.c
@@ -14,9 +14,9 @@ static inline char rdfs8(addr_t addr)
#include "../cmdline.c"
unsigned long get_cmd_line_ptr(void)
{
- unsigned long cmd_line_ptr = boot_params->hdr.cmd_line_ptr;
+ unsigned long cmd_line_ptr = boot_params_ptr->hdr.cmd_line_ptr;

- cmd_line_ptr |= (u64)boot_params->ext_cmd_line_ptr << 32;
+ cmd_line_ptr |= (u64)boot_params_ptr->ext_cmd_line_ptr << 32;

return cmd_line_ptr;
}
diff --git a/arch/x86/boot/compressed/efi_mixed.S b/arch/x86/boot/compressed/efi_mixed.S
new file mode 100644
index 000000000000..8232c5b2a9bf
--- /dev/null
+++ b/arch/x86/boot/compressed/efi_mixed.S
@@ -0,0 +1,328 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 2014, 2015 Intel Corporation; author Matt Fleming
+ *
+ * Early support for invoking 32-bit EFI services from a 64-bit kernel.
+ *
+ * Because this thunking occurs before ExitBootServices() we have to
+ * restore the firmware's 32-bit GDT and IDT before we make EFI service
+ * calls.
+ *
+ * On the plus side, we don't have to worry about mangling 64-bit
+ * addresses into 32-bits because we're executing with an identity
+ * mapped pagetable and haven't transitioned to 64-bit virtual addresses
+ * yet.
+ */
+
+#include <linux/linkage.h>
+#include <asm/msr.h>
+#include <asm/page_types.h>
+#include <asm/processor-flags.h>
+#include <asm/segment.h>
+
+ .code64
+ .text
+/*
+ * When booting in 64-bit mode on 32-bit EFI firmware, startup_64_mixed_mode()
+ * is the first thing that runs after switching to long mode. Depending on
+ * whether the EFI handover protocol or the compat entry point was used to
+ * enter the kernel, it will either branch to the common 64-bit EFI stub
+ * entrypoint efi_stub_entry() directly, or via the 64-bit EFI PE/COFF
+ * entrypoint efi_pe_entry(). In the former case, the bootloader must provide a
+ * struct bootparams pointer as the third argument, so the presence of such a
+ * pointer is used to disambiguate.
+ *
+ * +--------------+
+ * +------------------+ +------------+ +------>| efi_pe_entry |
+ * | efi32_pe_entry |---->| | | +-----------+--+
+ * +------------------+ | | +------+----------------+ |
+ * | startup_32 |---->| startup_64_mixed_mode | |
+ * +------------------+ | | +------+----------------+ |
+ * | efi32_stub_entry |---->| | | |
+ * +------------------+ +------------+ | |
+ * V |
+ * +------------+ +----------------+ |
+ * | startup_64 |<----| efi_stub_entry |<--------+
+ * +------------+ +----------------+
+ */
+SYM_FUNC_START(startup_64_mixed_mode)
+ lea efi32_boot_args(%rip), %rdx
+ mov 0(%rdx), %edi
+ mov 4(%rdx), %esi
+#ifdef CONFIG_EFI_HANDOVER_PROTOCOL
+ mov 8(%rdx), %edx // saved bootparams pointer
+ test %edx, %edx
+ jnz efi_stub_entry
+#endif
+ /*
+ * efi_pe_entry uses MS calling convention, which requires 32 bytes of
+ * shadow space on the stack even if all arguments are passed in
+ * registers. We also need an additional 8 bytes for the space that
+ * would be occupied by the return address, and this also results in
+ * the correct stack alignment for entry.
+ */
+ sub $40, %rsp
+ mov %rdi, %rcx // MS calling convention
+ mov %rsi, %rdx
+ jmp efi_pe_entry
+SYM_FUNC_END(startup_64_mixed_mode)
+
+SYM_FUNC_START(__efi64_thunk)
+ push %rbp
+ push %rbx
+
+ movl %ds, %eax
+ push %rax
+ movl %es, %eax
+ push %rax
+ movl %ss, %eax
+ push %rax
+
+ /* Copy args passed on stack */
+ movq 0x30(%rsp), %rbp
+ movq 0x38(%rsp), %rbx
+ movq 0x40(%rsp), %rax
+
+ /*
+ * Convert x86-64 ABI params to i386 ABI
+ */
+ subq $64, %rsp
+ movl %esi, 0x0(%rsp)
+ movl %edx, 0x4(%rsp)
+ movl %ecx, 0x8(%rsp)
+ movl %r8d, 0xc(%rsp)
+ movl %r9d, 0x10(%rsp)
+ movl %ebp, 0x14(%rsp)
+ movl %ebx, 0x18(%rsp)
+ movl %eax, 0x1c(%rsp)
+
+ leaq 0x20(%rsp), %rbx
+ sgdt (%rbx)
+ sidt 16(%rbx)
+
+ leaq 1f(%rip), %rbp
+
+ /*
+ * Switch to IDT and GDT with 32-bit segments. These are the firmware
+ * GDT and IDT that were installed when the kernel started executing.
+ * The pointers were saved by the efi32_entry() routine below.
+ *
+ * Pass the saved DS selector to the 32-bit code, and use far return to
+ * restore the saved CS selector.
+ */
+ lidt efi32_boot_idt(%rip)
+ lgdt efi32_boot_gdt(%rip)
+
+ movzwl efi32_boot_ds(%rip), %edx
+ movzwq efi32_boot_cs(%rip), %rax
+ pushq %rax
+ leaq efi_enter32(%rip), %rax
+ pushq %rax
+ lretq
+
+1: addq $64, %rsp
+ movq %rdi, %rax
+
+ pop %rbx
+ movl %ebx, %ss
+ pop %rbx
+ movl %ebx, %es
+ pop %rbx
+ movl %ebx, %ds
+ /* Clear out 32-bit selector from FS and GS */
+ xorl %ebx, %ebx
+ movl %ebx, %fs
+ movl %ebx, %gs
+
+ /*
+ * Convert 32-bit status code into 64-bit.
+ */
+ roll $1, %eax
+ rorq $1, %rax
+
+ pop %rbx
+ pop %rbp
+ RET
+SYM_FUNC_END(__efi64_thunk)
+
+ .code32
+#ifdef CONFIG_EFI_HANDOVER_PROTOCOL
+SYM_FUNC_START(efi32_stub_entry)
+ call 1f
+1: popl %ecx
+
+ /* Clear BSS */
+ xorl %eax, %eax
+ leal (_bss - 1b)(%ecx), %edi
+ leal (_ebss - 1b)(%ecx), %ecx
+ subl %edi, %ecx
+ shrl $2, %ecx
+ cld
+ rep stosl
+
+ add $0x4, %esp /* Discard return address */
+ popl %ecx
+ popl %edx
+ popl %esi
+ jmp efi32_entry
+SYM_FUNC_END(efi32_stub_entry)
+#endif
+
+/*
+ * EFI service pointer must be in %edi.
+ *
+ * The stack should represent the 32-bit calling convention.
+ */
+SYM_FUNC_START_LOCAL(efi_enter32)
+ /* Load firmware selector into data and stack segment registers */
+ movl %edx, %ds
+ movl %edx, %es
+ movl %edx, %fs
+ movl %edx, %gs
+ movl %edx, %ss
+
+ /* Reload pgtables */
+ movl %cr3, %eax
+ movl %eax, %cr3
+
+ /* Disable paging */
+ movl %cr0, %eax
+ btrl $X86_CR0_PG_BIT, %eax
+ movl %eax, %cr0
+
+ /* Disable long mode via EFER */
+ movl $MSR_EFER, %ecx
+ rdmsr
+ btrl $_EFER_LME, %eax
+ wrmsr
+
+ call *%edi
+
+ /* We must preserve return value */
+ movl %eax, %edi
+
+ /*
+ * Some firmware will return with interrupts enabled. Be sure to
+ * disable them before we switch GDTs and IDTs.
+ */
+ cli
+
+ lidtl 16(%ebx)
+ lgdtl (%ebx)
+
+ movl %cr4, %eax
+ btsl $(X86_CR4_PAE_BIT), %eax
+ movl %eax, %cr4
+
+ movl %cr3, %eax
+ movl %eax, %cr3
+
+ movl $MSR_EFER, %ecx
+ rdmsr
+ btsl $_EFER_LME, %eax
+ wrmsr
+
+ xorl %eax, %eax
+ lldt %ax
+
+ pushl $__KERNEL_CS
+ pushl %ebp
+
+ /* Enable paging */
+ movl %cr0, %eax
+ btsl $X86_CR0_PG_BIT, %eax
+ movl %eax, %cr0
+ lret
+SYM_FUNC_END(efi_enter32)
+
+/*
+ * This is the common EFI stub entry point for mixed mode.
+ *
+ * Arguments: %ecx image handle
+ * %edx EFI system table pointer
+ * %esi struct bootparams pointer (or NULL when not using
+ * the EFI handover protocol)
+ *
+ * Since this is the point of no return for ordinary execution, no registers
+ * are considered live except for the function parameters. [Note that the EFI
+ * stub may still exit and return to the firmware using the Exit() EFI boot
+ * service.]
+ */
+SYM_FUNC_START_LOCAL(efi32_entry)
+ call 1f
+1: pop %ebx
+
+ /* Save firmware GDTR and code/data selectors */
+ sgdtl (efi32_boot_gdt - 1b)(%ebx)
+ movw %cs, (efi32_boot_cs - 1b)(%ebx)
+ movw %ds, (efi32_boot_ds - 1b)(%ebx)
+
+ /* Store firmware IDT descriptor */
+ sidtl (efi32_boot_idt - 1b)(%ebx)
+
+ /* Store boot arguments */
+ leal (efi32_boot_args - 1b)(%ebx), %ebx
+ movl %ecx, 0(%ebx)
+ movl %edx, 4(%ebx)
+ movl %esi, 8(%ebx)
+ movb $0x0, 12(%ebx) // efi_is64
+
+ /* Disable paging */
+ movl %cr0, %eax
+ btrl $X86_CR0_PG_BIT, %eax
+ movl %eax, %cr0
+
+ jmp startup_32
+SYM_FUNC_END(efi32_entry)
+
+/*
+ * efi_status_t efi32_pe_entry(efi_handle_t image_handle,
+ * efi_system_table_32_t *sys_table)
+ */
+SYM_FUNC_START(efi32_pe_entry)
+ pushl %ebp
+ movl %esp, %ebp
+ pushl %ebx // save callee-save registers
+ pushl %edi
+
+ call verify_cpu // check for long mode support
+ testl %eax, %eax
+ movl $0x80000003, %eax // EFI_UNSUPPORTED
+ jnz 2f
+
+ movl 8(%ebp), %ecx // image_handle
+ movl 12(%ebp), %edx // sys_table
+ xorl %esi, %esi
+ jmp efi32_entry // pass %ecx, %edx, %esi
+ // no other registers remain live
+
+2: popl %edi // restore callee-save registers
+ popl %ebx
+ leave
+ RET
+SYM_FUNC_END(efi32_pe_entry)
+
+#ifdef CONFIG_EFI_HANDOVER_PROTOCOL
+ .org efi32_stub_entry + 0x200
+ .code64
+SYM_FUNC_START_NOALIGN(efi64_stub_entry)
+ jmp efi_handover_entry
+SYM_FUNC_END(efi64_stub_entry)
+#endif
+
+ .data
+ .balign 8
+SYM_DATA_START_LOCAL(efi32_boot_gdt)
+ .word 0
+ .quad 0
+SYM_DATA_END(efi32_boot_gdt)
+
+SYM_DATA_START_LOCAL(efi32_boot_idt)
+ .word 0
+ .quad 0
+SYM_DATA_END(efi32_boot_idt)
+
+SYM_DATA_LOCAL(efi32_boot_cs, .word 0)
+SYM_DATA_LOCAL(efi32_boot_ds, .word 0)
+SYM_DATA_LOCAL(efi32_boot_args, .long 0, 0, 0)
+SYM_DATA(efi_is64, .byte 1)
diff --git a/arch/x86/boot/compressed/efi_thunk_64.S b/arch/x86/boot/compressed/efi_thunk_64.S
deleted file mode 100644
index 67e7edcdfea8..000000000000
--- a/arch/x86/boot/compressed/efi_thunk_64.S
+++ /dev/null
@@ -1,195 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * Copyright (C) 2014, 2015 Intel Corporation; author Matt Fleming
- *
- * Early support for invoking 32-bit EFI services from a 64-bit kernel.
- *
- * Because this thunking occurs before ExitBootServices() we have to
- * restore the firmware's 32-bit GDT and IDT before we make EFI service
- * calls.
- *
- * On the plus side, we don't have to worry about mangling 64-bit
- * addresses into 32-bits because we're executing with an identity
- * mapped pagetable and haven't transitioned to 64-bit virtual addresses
- * yet.
- */
-
-#include <linux/linkage.h>
-#include <asm/msr.h>
-#include <asm/page_types.h>
-#include <asm/processor-flags.h>
-#include <asm/segment.h>
-
- .code64
- .text
-SYM_FUNC_START(__efi64_thunk)
- push %rbp
- push %rbx
-
- movl %ds, %eax
- push %rax
- movl %es, %eax
- push %rax
- movl %ss, %eax
- push %rax
-
- /* Copy args passed on stack */
- movq 0x30(%rsp), %rbp
- movq 0x38(%rsp), %rbx
- movq 0x40(%rsp), %rax
-
- /*
- * Convert x86-64 ABI params to i386 ABI
- */
- subq $64, %rsp
- movl %esi, 0x0(%rsp)
- movl %edx, 0x4(%rsp)
- movl %ecx, 0x8(%rsp)
- movl %r8d, 0xc(%rsp)
- movl %r9d, 0x10(%rsp)
- movl %ebp, 0x14(%rsp)
- movl %ebx, 0x18(%rsp)
- movl %eax, 0x1c(%rsp)
-
- leaq 0x20(%rsp), %rbx
- sgdt (%rbx)
-
- addq $16, %rbx
- sidt (%rbx)
-
- leaq 1f(%rip), %rbp
-
- /*
- * Switch to IDT and GDT with 32-bit segments. This is the firmware GDT
- * and IDT that was installed when the kernel started executing. The
- * pointers were saved at the EFI stub entry point in head_64.S.
- *
- * Pass the saved DS selector to the 32-bit code, and use far return to
- * restore the saved CS selector.
- */
- leaq efi32_boot_idt(%rip), %rax
- lidt (%rax)
- leaq efi32_boot_gdt(%rip), %rax
- lgdt (%rax)
-
- movzwl efi32_boot_ds(%rip), %edx
- movzwq efi32_boot_cs(%rip), %rax
- pushq %rax
- leaq efi_enter32(%rip), %rax
- pushq %rax
- lretq
-
-1: addq $64, %rsp
- movq %rdi, %rax
-
- pop %rbx
- movl %ebx, %ss
- pop %rbx
- movl %ebx, %es
- pop %rbx
- movl %ebx, %ds
- /* Clear out 32-bit selector from FS and GS */
- xorl %ebx, %ebx
- movl %ebx, %fs
- movl %ebx, %gs
-
- /*
- * Convert 32-bit status code into 64-bit.
- */
- roll $1, %eax
- rorq $1, %rax
-
- pop %rbx
- pop %rbp
- RET
-SYM_FUNC_END(__efi64_thunk)
-
- .code32
-/*
- * EFI service pointer must be in %edi.
- *
- * The stack should represent the 32-bit calling convention.
- */
-SYM_FUNC_START_LOCAL(efi_enter32)
- /* Load firmware selector into data and stack segment registers */
- movl %edx, %ds
- movl %edx, %es
- movl %edx, %fs
- movl %edx, %gs
- movl %edx, %ss
-
- /* Reload pgtables */
- movl %cr3, %eax
- movl %eax, %cr3
-
- /* Disable paging */
- movl %cr0, %eax
- btrl $X86_CR0_PG_BIT, %eax
- movl %eax, %cr0
-
- /* Disable long mode via EFER */
- movl $MSR_EFER, %ecx
- rdmsr
- btrl $_EFER_LME, %eax
- wrmsr
-
- call *%edi
-
- /* We must preserve return value */
- movl %eax, %edi
-
- /*
- * Some firmware will return with interrupts enabled. Be sure to
- * disable them before we switch GDTs and IDTs.
- */
- cli
-
- lidtl (%ebx)
- subl $16, %ebx
-
- lgdtl (%ebx)
-
- movl %cr4, %eax
- btsl $(X86_CR4_PAE_BIT), %eax
- movl %eax, %cr4
-
- movl %cr3, %eax
- movl %eax, %cr3
-
- movl $MSR_EFER, %ecx
- rdmsr
- btsl $_EFER_LME, %eax
- wrmsr
-
- xorl %eax, %eax
- lldt %ax
-
- pushl $__KERNEL_CS
- pushl %ebp
-
- /* Enable paging */
- movl %cr0, %eax
- btsl $X86_CR0_PG_BIT, %eax
- movl %eax, %cr0
- lret
-SYM_FUNC_END(efi_enter32)
-
- .data
- .balign 8
-SYM_DATA_START(efi32_boot_gdt)
- .word 0
- .quad 0
-SYM_DATA_END(efi32_boot_gdt)
-
-SYM_DATA_START(efi32_boot_idt)
- .word 0
- .quad 0
-SYM_DATA_END(efi32_boot_idt)
-
-SYM_DATA_START(efi32_boot_cs)
- .word 0
-SYM_DATA_END(efi32_boot_cs)
-
-SYM_DATA_START(efi32_boot_ds)
- .word 0
-SYM_DATA_END(efi32_boot_ds)
diff --git a/arch/x86/boot/compressed/head_32.S b/arch/x86/boot/compressed/head_32.S
index 3b354eb9516d..1cfe9802a42f 100644
--- a/arch/x86/boot/compressed/head_32.S
+++ b/arch/x86/boot/compressed/head_32.S
@@ -84,19 +84,6 @@ SYM_FUNC_START(startup_32)

#ifdef CONFIG_RELOCATABLE
leal startup_32@GOTOFF(%edx), %ebx
-
-#ifdef CONFIG_EFI_STUB
-/*
- * If we were loaded via the EFI LoadImage service, startup_32() will be at an
- * offset to the start of the space allocated for the image. efi_pe_entry() will
- * set up image_offset to tell us where the image actually starts, so that we
- * can use the full available buffer.
- * image_offset = startup_32 - image_base
- * Otherwise image_offset will be zero and has no effect on the calculations.
- */
- subl image_offset@GOTOFF(%edx), %ebx
-#endif
-
movl BP_kernel_alignment(%esi), %eax
decl %eax
addl %eax, %ebx
@@ -150,17 +137,6 @@ SYM_FUNC_START(startup_32)
jmp *%eax
SYM_FUNC_END(startup_32)

-#ifdef CONFIG_EFI_STUB
-SYM_FUNC_START(efi32_stub_entry)
- add $0x4, %esp
- movl 8(%esp), %esi /* save boot_params pointer */
- call efi_main
- /* efi_main returns the possibly relocated address of startup_32 */
- jmp *%eax
-SYM_FUNC_END(efi32_stub_entry)
-SYM_FUNC_ALIAS(efi_stub_entry, efi32_stub_entry)
-#endif
-
.text
SYM_FUNC_START_LOCAL_NOALIGN(.Lrelocated)

@@ -179,15 +155,9 @@ SYM_FUNC_START_LOCAL_NOALIGN(.Lrelocated)
*/
/* push arguments for extract_kernel: */

- pushl output_len@GOTOFF(%ebx) /* decompressed length, end of relocs */
pushl %ebp /* output address */
- pushl input_len@GOTOFF(%ebx) /* input_len */
- leal input_data@GOTOFF(%ebx), %eax
- pushl %eax /* input_data */
- leal boot_heap@GOTOFF(%ebx), %eax
- pushl %eax /* heap area */
pushl %esi /* real mode pointer */
- call extract_kernel /* returns kernel location in %eax */
+ call extract_kernel /* returns kernel entry point in %eax */
addl $24, %esp

/*
@@ -208,17 +178,11 @@ SYM_DATA_START_LOCAL(gdt)
.quad 0x00cf92000000ffff /* __KERNEL_DS */
SYM_DATA_END_LABEL(gdt, SYM_L_LOCAL, gdt_end)

-#ifdef CONFIG_EFI_STUB
-SYM_DATA(image_offset, .long 0)
-#endif
-
/*
* Stack and heap for uncompression
*/
.bss
.balign 4
-boot_heap:
- .fill BOOT_HEAP_SIZE, 1, 0
boot_stack:
.fill BOOT_STACK_SIZE, 1, 0
boot_stack_end:
diff --git a/arch/x86/boot/compressed/head_64.S b/arch/x86/boot/compressed/head_64.S
index b4bd6df29116..0d7aef10b19a 100644
--- a/arch/x86/boot/compressed/head_64.S
+++ b/arch/x86/boot/compressed/head_64.S
@@ -118,7 +118,9 @@ SYM_FUNC_START(startup_32)
1:

/* Setup Exception handling for SEV-ES */
+#ifdef CONFIG_AMD_MEM_ENCRYPT
call startup32_load_idt
+#endif

/* Make sure cpu supports long mode. */
call verify_cpu
@@ -136,19 +138,6 @@ SYM_FUNC_START(startup_32)

#ifdef CONFIG_RELOCATABLE
movl %ebp, %ebx
-
-#ifdef CONFIG_EFI_STUB
-/*
- * If we were loaded via the EFI LoadImage service, startup_32 will be at an
- * offset to the start of the space allocated for the image. efi_pe_entry will
- * set up image_offset to tell us where the image actually starts, so that we
- * can use the full available buffer.
- * image_offset = startup_32 - image_base
- * Otherwise image_offset will be zero and has no effect on the calculations.
- */
- subl rva(image_offset)(%ebp), %ebx
-#endif
-
movl BP_kernel_alignment(%esi), %eax
decl %eax
addl %eax, %ebx
@@ -178,12 +167,13 @@ SYM_FUNC_START(startup_32)
*/
/*
* If SEV is active then set the encryption mask in the page tables.
- * This will insure that when the kernel is copied and decompressed
+ * This will ensure that when the kernel is copied and decompressed
* it will be done so encrypted.
*/
- call get_sev_encryption_bit
xorl %edx, %edx
#ifdef CONFIG_AMD_MEM_ENCRYPT
+ call get_sev_encryption_bit
+ xorl %edx, %edx
testl %eax, %eax
jz 1f
subl $32, %eax /* Encryption bit is always above bit 31 */
@@ -249,6 +239,11 @@ SYM_FUNC_START(startup_32)
movl $__BOOT_TSS, %eax
ltr %ax

+#ifdef CONFIG_AMD_MEM_ENCRYPT
+ /* Check if the C-bit position is correct when SEV is active */
+ call startup32_check_sev_cbit
+#endif
+
/*
* Setup for the jump to 64bit mode
*
@@ -261,29 +256,11 @@ SYM_FUNC_START(startup_32)
*/
leal rva(startup_64)(%ebp), %eax
#ifdef CONFIG_EFI_MIXED
- movl rva(efi32_boot_args)(%ebp), %edi
- testl %edi, %edi
- jz 1f
- leal rva(efi64_stub_entry)(%ebp), %eax
- movl rva(efi32_boot_args+4)(%ebp), %esi
- movl rva(efi32_boot_args+8)(%ebp), %edx // saved bootparams pointer
- testl %edx, %edx
- jnz 1f
- /*
- * efi_pe_entry uses MS calling convention, which requires 32 bytes of
- * shadow space on the stack even if all arguments are passed in
- * registers. We also need an additional 8 bytes for the space that
- * would be occupied by the return address, and this also results in
- * the correct stack alignment for entry.
- */
- subl $40, %esp
- leal rva(efi_pe_entry)(%ebp), %eax
- movl %edi, %ecx // MS calling convention
- movl %esi, %edx
+ cmpb $1, rva(efi_is64)(%ebp)
+ je 1f
+ leal rva(startup_64_mixed_mode)(%ebp), %eax
1:
#endif
- /* Check if the C-bit position is correct when SEV is active */
- call startup32_check_sev_cbit

pushl $__KERNEL_CS
pushl %eax
@@ -296,41 +273,6 @@ SYM_FUNC_START(startup_32)
lret
SYM_FUNC_END(startup_32)

-#ifdef CONFIG_EFI_MIXED
- .org 0x190
-SYM_FUNC_START(efi32_stub_entry)
- add $0x4, %esp /* Discard return address */
- popl %ecx
- popl %edx
- popl %esi
-
- call 1f
-1: pop %ebp
- subl $ rva(1b), %ebp
-
- movl %esi, rva(efi32_boot_args+8)(%ebp)
-SYM_INNER_LABEL(efi32_pe_stub_entry, SYM_L_LOCAL)
- movl %ecx, rva(efi32_boot_args)(%ebp)
- movl %edx, rva(efi32_boot_args+4)(%ebp)
- movb $0, rva(efi_is64)(%ebp)
-
- /* Save firmware GDTR and code/data selectors */
- sgdtl rva(efi32_boot_gdt)(%ebp)
- movw %cs, rva(efi32_boot_cs)(%ebp)
- movw %ds, rva(efi32_boot_ds)(%ebp)
-
- /* Store firmware IDT descriptor */
- sidtl rva(efi32_boot_idt)(%ebp)
-
- /* Disable paging */
- movl %cr0, %eax
- btrl $X86_CR0_PG_BIT, %eax
- movl %eax, %cr0
-
- jmp startup_32
-SYM_FUNC_END(efi32_stub_entry)
-#endif
-
.code64
.org 0x200
SYM_CODE_START(startup_64)
@@ -372,20 +314,6 @@ SYM_CODE_START(startup_64)
/* Start with the delta to where the kernel will run at. */
#ifdef CONFIG_RELOCATABLE
leaq startup_32(%rip) /* - $startup_32 */, %rbp
-
-#ifdef CONFIG_EFI_STUB
-/*
- * If we were loaded via the EFI LoadImage service, startup_32 will be at an
- * offset to the start of the space allocated for the image. efi_pe_entry will
- * set up image_offset to tell us where the image actually starts, so that we
- * can use the full available buffer.
- * image_offset = startup_32 - image_base
- * Otherwise image_offset will be zero and has no effect on the calculations.
- */
- movl image_offset(%rip), %eax
- subq %rax, %rbp
-#endif
-
movl BP_kernel_alignment(%rsi), %eax
decl %eax
addq %rax, %rbp
@@ -424,10 +352,6 @@ SYM_CODE_START(startup_64)
* For the trampoline, we need the top page table to reside in lower
* memory as we don't have a way to load 64-bit values into CR3 in
* 32-bit mode.
- *
- * We go though the trampoline even if we don't have to: if we're
- * already in a desired paging mode. This way the trampoline code gets
- * tested on every boot.
*/

/* Make sure we have GDT with 32-bit code segment */
@@ -442,10 +366,14 @@ SYM_CODE_START(startup_64)
lretq

.Lon_kernel_cs:
+ /*
+ * RSI holds a pointer to a boot_params structure provided by the
+ * loader, and this needs to be preserved across C function calls. So
+ * move it into a callee saved register.
+ */
+ movq %rsi, %r15

- pushq %rsi
call load_stage1_idt
- popq %rsi

#ifdef CONFIG_AMD_MEM_ENCRYPT
/*
@@ -456,82 +384,24 @@ SYM_CODE_START(startup_64)
* CPUID instructions being issued, so go ahead and do that now via
* sev_enable(), which will also handle the rest of the SEV-related
* detection/setup to ensure that has been done in advance of any dependent
- * code.
+ * code. Pass the boot_params pointer as the first argument.
*/
- pushq %rsi
- movq %rsi, %rdi /* real mode address */
+ movq %r15, %rdi
call sev_enable
- popq %rsi
#endif

/*
- * paging_prepare() sets up the trampoline and checks if we need to
- * enable 5-level paging.
- *
- * paging_prepare() returns a two-quadword structure which lands
- * into RDX:RAX:
- * - Address of the trampoline is returned in RAX.
- * - Non zero RDX means trampoline needs to enable 5-level
- * paging.
- *
- * RSI holds real mode data and needs to be preserved across
- * this function call.
- */
- pushq %rsi
- movq %rsi, %rdi /* real mode address */
- call paging_prepare
- popq %rsi
-
- /* Save the trampoline address in RCX */
- movq %rax, %rcx
-
- /* Set up 32-bit addressable stack */
- leaq TRAMPOLINE_32BIT_STACK_END(%rcx), %rsp
-
- /*
- * Preserve live 64-bit registers on the stack: this is necessary
- * because the architecture does not guarantee that GPRs will retain
- * their full 64-bit values across a 32-bit mode switch.
- */
- pushq %rbp
- pushq %rbx
- pushq %rsi
-
- /*
- * Push the 64-bit address of trampoline_return() onto the new stack.
- * It will be used by the trampoline to return to the main code. Due to
- * the 32-bit mode switch, it cannot be kept it in a register either.
- */
- leaq trampoline_return(%rip), %rdi
- pushq %rdi
-
- /* Switch to compatibility mode (CS.L = 0 CS.D = 1) via far return */
- pushq $__KERNEL32_CS
- leaq TRAMPOLINE_32BIT_CODE_OFFSET(%rax), %rax
- pushq %rax
- lretq
-trampoline_return:
- /* Restore live 64-bit registers */
- popq %rsi
- popq %rbx
- popq %rbp
-
- /* Restore the stack, the 32-bit trampoline uses its own stack */
- leaq rva(boot_stack_end)(%rbx), %rsp
-
- /*
- * cleanup_trampoline() would restore trampoline memory.
- *
- * RDI is address of the page table to use instead of page table
- * in trampoline memory (if required).
+ * configure_5level_paging() updates the number of paging levels using
+ * a trampoline in 32-bit addressable memory if the current number does
+ * not match the desired number.
*
- * RSI holds real mode data and needs to be preserved across
- * this function call.
+ * Pass the boot_params pointer as the first argument. The second
+ * argument is the relocated address of the page table to use instead
+ * of the page table in trampoline memory (if required).
*/
- pushq %rsi
- leaq rva(top_pgtable)(%rbx), %rdi
- call cleanup_trampoline
- popq %rsi
+ movq %r15, %rdi
+ leaq rva(top_pgtable)(%rbx), %rsi
+ call configure_5level_paging

/* Zero EFLAGS */
pushq $0
@@ -541,7 +411,6 @@ trampoline_return:
* Copy the compressed kernel to the end of our buffer
* where decompression in place becomes safe.
*/
- pushq %rsi
leaq (_bss-8)(%rip), %rsi
leaq rva(_bss-8)(%rbx), %rdi
movl $(_bss - startup_32), %ecx
@@ -549,7 +418,6 @@ trampoline_return:
std
rep movsq
cld
- popq %rsi

/*
* The GDT may get overwritten either during the copy we just did or
@@ -568,19 +436,6 @@ trampoline_return:
jmp *%rax
SYM_CODE_END(startup_64)

-#ifdef CONFIG_EFI_STUB
- .org 0x390
-SYM_FUNC_START(efi64_stub_entry)
- and $~0xf, %rsp /* realign the stack */
- movq %rdx, %rbx /* save boot_params pointer */
- call efi_main
- movq %rbx,%rsi
- leaq rva(startup_64)(%rax), %rax
- jmp *%rax
-SYM_FUNC_END(efi64_stub_entry)
-SYM_FUNC_ALIAS(efi_stub_entry, efi64_stub_entry)
-#endif
-
.text
SYM_FUNC_START_LOCAL_NOALIGN(.Lrelocated)

@@ -594,125 +449,122 @@ SYM_FUNC_START_LOCAL_NOALIGN(.Lrelocated)
shrq $3, %rcx
rep stosq

- pushq %rsi
call load_stage2_idt

/* Pass boot_params to initialize_identity_maps() */
- movq (%rsp), %rdi
+ movq %r15, %rdi
call initialize_identity_maps
- popq %rsi

/*
* Do the extraction, and jump to the new kernel..
*/
- pushq %rsi /* Save the real mode argument */
- movq %rsi, %rdi /* real mode address */
- leaq boot_heap(%rip), %rsi /* malloc area for uncompression */
- leaq input_data(%rip), %rdx /* input_data */
- movl input_len(%rip), %ecx /* input_len */
- movq %rbp, %r8 /* output target address */
- movl output_len(%rip), %r9d /* decompressed length, end of relocs */
- call extract_kernel /* returns kernel location in %rax */
- popq %rsi
+ /* pass struct boot_params pointer and output target address */
+ movq %r15, %rdi
+ movq %rbp, %rsi
+ call extract_kernel /* returns kernel entry point in %rax */

/*
* Jump to the decompressed kernel.
*/
+ movq %r15, %rsi
jmp *%rax
SYM_FUNC_END(.Lrelocated)

- .code32
/*
- * This is the 32-bit trampoline that will be copied over to low memory.
+ * This is the 32-bit trampoline that will be copied over to low memory. It
+ * will be called using the ordinary 64-bit calling convention from code
+ * running in 64-bit mode.
*
* Return address is at the top of the stack (might be above 4G).
- * ECX contains the base address of the trampoline memory.
- * Non zero RDX means trampoline needs to enable 5-level paging.
+ * The first argument (EDI) contains the address of the temporary PGD level
+ * page table in 32-bit addressable memory which will be programmed into
+ * register CR3.
*/
+ .section ".rodata", "a", @progbits
SYM_CODE_START(trampoline_32bit_src)
- /* Set up data and stack segments */
- movl $__KERNEL_DS, %eax
- movl %eax, %ds
- movl %eax, %ss
+ /*
+ * Preserve callee save 64-bit registers on the stack: this is
+ * necessary because the architecture does not guarantee that GPRs will
+ * retain their full 64-bit values across a 32-bit mode switch.
+ */
+ pushq %r15
+ pushq %r14
+ pushq %r13
+ pushq %r12
+ pushq %rbp
+ pushq %rbx
+
+ /* Preserve top half of RSP in a legacy mode GPR to avoid truncation */
+ movq %rsp, %rbx
+ shrq $32, %rbx

+ /* Switch to compatibility mode (CS.L = 0 CS.D = 1) via far return */
+ pushq $__KERNEL32_CS
+ leaq 0f(%rip), %rax
+ pushq %rax
+ lretq
+
+ /*
+ * The 32-bit code below will do a far jump back to long mode and end
+ * up here after reconfiguring the number of paging levels. First, the
+ * stack pointer needs to be restored to its full 64-bit value before
+ * the callee save register contents can be popped from the stack.
+ */
+.Lret:
+ shlq $32, %rbx
+ orq %rbx, %rsp
+
+ /* Restore the preserved 64-bit registers */
+ popq %rbx
+ popq %rbp
+ popq %r12
+ popq %r13
+ popq %r14
+ popq %r15
+ retq
+
+ .code32
+0:
/* Disable paging */
movl %cr0, %eax
btrl $X86_CR0_PG_BIT, %eax
movl %eax, %cr0

- /* Check what paging mode we want to be in after the trampoline */
- testl %edx, %edx
- jz 1f
-
- /* We want 5-level paging: don't touch CR3 if it already points to 5-level page tables */
- movl %cr4, %eax
- testl $X86_CR4_LA57, %eax
- jnz 3f
- jmp 2f
-1:
- /* We want 4-level paging: don't touch CR3 if it already points to 4-level page tables */
- movl %cr4, %eax
- testl $X86_CR4_LA57, %eax
- jz 3f
-2:
/* Point CR3 to the trampoline's new top level page table */
- leal TRAMPOLINE_32BIT_PGTABLE_OFFSET(%ecx), %eax
- movl %eax, %cr3
-3:
+ movl %edi, %cr3
+
/* Set EFER.LME=1 as a precaution in case hypervsior pulls the rug */
- pushl %ecx
- pushl %edx
movl $MSR_EFER, %ecx
rdmsr
btsl $_EFER_LME, %eax
/* Avoid writing EFER if no change was made (for TDX guest) */
jc 1f
wrmsr
-1: popl %edx
- popl %ecx
-
-#ifdef CONFIG_X86_MCE
- /*
- * Preserve CR4.MCE if the kernel will enable #MC support.
- * Clearing MCE may fault in some environments (that also force #MC
- * support). Any machine check that occurs before #MC support is fully
- * configured will crash the system regardless of the CR4.MCE value set
- * here.
- */
- movl %cr4, %eax
- andl $X86_CR4_MCE, %eax
-#else
- movl $0, %eax
-#endif
-
- /* Enable PAE and LA57 (if required) paging modes */
- orl $X86_CR4_PAE, %eax
- testl %edx, %edx
- jz 1f
- orl $X86_CR4_LA57, %eax
1:
+ /* Toggle CR4.LA57 */
+ movl %cr4, %eax
+ btcl $X86_CR4_LA57_BIT, %eax
movl %eax, %cr4

- /* Calculate address of paging_enabled() once we are executing in the trampoline */
- leal .Lpaging_enabled - trampoline_32bit_src + TRAMPOLINE_32BIT_CODE_OFFSET(%ecx), %eax
-
- /* Prepare the stack for far return to Long Mode */
- pushl $__KERNEL_CS
- pushl %eax
-
/* Enable paging again. */
movl %cr0, %eax
btsl $X86_CR0_PG_BIT, %eax
movl %eax, %cr0

- lret
+ /*
+ * Return to the 64-bit calling code using LJMP rather than LRET, to
+ * avoid the need for a 32-bit addressable stack. The destination
+ * address will be adjusted after the template code is copied into a
+ * 32-bit addressable buffer.
+ */
+.Ljmp: ljmpl $__KERNEL_CS, $(.Lret - trampoline_32bit_src)
SYM_CODE_END(trampoline_32bit_src)

- .code64
-SYM_FUNC_START_LOCAL_NOALIGN(.Lpaging_enabled)
- /* Return from the trampoline */
- retq
-SYM_FUNC_END(.Lpaging_enabled)
+/*
+ * This symbol is placed right after trampoline_32bit_src() so its address can
+ * be used to infer the size of the trampoline code.
+ */
+SYM_DATA(trampoline_ljmp_imm_offset, .word .Ljmp + 1 - trampoline_32bit_src)

/*
* The trampoline code has a size limit.
@@ -721,7 +573,7 @@ SYM_FUNC_END(.Lpaging_enabled)
*/
.org trampoline_32bit_src + TRAMPOLINE_32BIT_CODE_SIZE

- .code32
+ .text
SYM_FUNC_START_LOCAL_NOALIGN(.Lno_longmode)
/* This isn't an x86-64 CPU, so hang intentionally, we cannot continue */
1:
@@ -729,6 +581,7 @@ SYM_FUNC_START_LOCAL_NOALIGN(.Lno_longmode)
jmp 1b
SYM_FUNC_END(.Lno_longmode)

+ .globl verify_cpu
#include "../../kernel/verify_cpu.S"

.data
@@ -760,249 +613,11 @@ SYM_DATA_START(boot_idt)
.endr
SYM_DATA_END_LABEL(boot_idt, SYM_L_GLOBAL, boot_idt_end)

-#ifdef CONFIG_AMD_MEM_ENCRYPT
-SYM_DATA_START(boot32_idt_desc)
- .word boot32_idt_end - boot32_idt - 1
- .long 0
-SYM_DATA_END(boot32_idt_desc)
- .balign 8
-SYM_DATA_START(boot32_idt)
- .rept 32
- .quad 0
- .endr
-SYM_DATA_END_LABEL(boot32_idt, SYM_L_GLOBAL, boot32_idt_end)
-#endif
-
-#ifdef CONFIG_EFI_STUB
-SYM_DATA(image_offset, .long 0)
-#endif
-#ifdef CONFIG_EFI_MIXED
-SYM_DATA_LOCAL(efi32_boot_args, .long 0, 0, 0)
-SYM_DATA(efi_is64, .byte 1)
-
-#define ST32_boottime 60 // offsetof(efi_system_table_32_t, boottime)
-#define BS32_handle_protocol 88 // offsetof(efi_boot_services_32_t, handle_protocol)
-#define LI32_image_base 32 // offsetof(efi_loaded_image_32_t, image_base)
-
- __HEAD
- .code32
-SYM_FUNC_START(efi32_pe_entry)
-/*
- * efi_status_t efi32_pe_entry(efi_handle_t image_handle,
- * efi_system_table_32_t *sys_table)
- */
-
- pushl %ebp
- movl %esp, %ebp
- pushl %eax // dummy push to allocate loaded_image
-
- pushl %ebx // save callee-save registers
- pushl %edi
-
- call verify_cpu // check for long mode support
- testl %eax, %eax
- movl $0x80000003, %eax // EFI_UNSUPPORTED
- jnz 2f
-
- call 1f
-1: pop %ebx
- subl $ rva(1b), %ebx
-
- /* Get the loaded image protocol pointer from the image handle */
- leal -4(%ebp), %eax
- pushl %eax // &loaded_image
- leal rva(loaded_image_proto)(%ebx), %eax
- pushl %eax // pass the GUID address
- pushl 8(%ebp) // pass the image handle
-
- /*
- * Note the alignment of the stack frame.
- * sys_table
- * handle <-- 16-byte aligned on entry by ABI
- * return address
- * frame pointer
- * loaded_image <-- local variable
- * saved %ebx <-- 16-byte aligned here
- * saved %edi
- * &loaded_image
- * &loaded_image_proto
- * handle <-- 16-byte aligned for call to handle_protocol
- */
-
- movl 12(%ebp), %eax // sys_table
- movl ST32_boottime(%eax), %eax // sys_table->boottime
- call *BS32_handle_protocol(%eax) // sys_table->boottime->handle_protocol
- addl $12, %esp // restore argument space
- testl %eax, %eax
- jnz 2f
-
- movl 8(%ebp), %ecx // image_handle
- movl 12(%ebp), %edx // sys_table
- movl -4(%ebp), %esi // loaded_image
- movl LI32_image_base(%esi), %esi // loaded_image->image_base
- movl %ebx, %ebp // startup_32 for efi32_pe_stub_entry
- /*
- * We need to set the image_offset variable here since startup_32() will
- * use it before we get to the 64-bit efi_pe_entry() in C code.
- */
- subl %esi, %ebx
- movl %ebx, rva(image_offset)(%ebp) // save image_offset
- jmp efi32_pe_stub_entry
-
-2: popl %edi // restore callee-save registers
- popl %ebx
- leave
- RET
-SYM_FUNC_END(efi32_pe_entry)
-
- .section ".rodata"
- /* EFI loaded image protocol GUID */
- .balign 4
-SYM_DATA_START_LOCAL(loaded_image_proto)
- .long 0x5b1b31a1
- .word 0x9562, 0x11d2
- .byte 0x8e, 0x3f, 0x00, 0xa0, 0xc9, 0x69, 0x72, 0x3b
-SYM_DATA_END(loaded_image_proto)
-#endif
-
-#ifdef CONFIG_AMD_MEM_ENCRYPT
- __HEAD
- .code32
-/*
- * Write an IDT entry into boot32_idt
- *
- * Parameters:
- *
- * %eax: Handler address
- * %edx: Vector number
- *
- * Physical offset is expected in %ebp
- */
-SYM_FUNC_START(startup32_set_idt_entry)
- push %ebx
- push %ecx
-
- /* IDT entry address to %ebx */
- leal rva(boot32_idt)(%ebp), %ebx
- shl $3, %edx
- addl %edx, %ebx
-
- /* Build IDT entry, lower 4 bytes */
- movl %eax, %edx
- andl $0x0000ffff, %edx # Target code segment offset [15:0]
- movl $__KERNEL32_CS, %ecx # Target code segment selector
- shl $16, %ecx
- orl %ecx, %edx
-
- /* Store lower 4 bytes to IDT */
- movl %edx, (%ebx)
-
- /* Build IDT entry, upper 4 bytes */
- movl %eax, %edx
- andl $0xffff0000, %edx # Target code segment offset [31:16]
- orl $0x00008e00, %edx # Present, Type 32-bit Interrupt Gate
-
- /* Store upper 4 bytes to IDT */
- movl %edx, 4(%ebx)
-
- pop %ecx
- pop %ebx
- RET
-SYM_FUNC_END(startup32_set_idt_entry)
-#endif
-
-SYM_FUNC_START(startup32_load_idt)
-#ifdef CONFIG_AMD_MEM_ENCRYPT
- /* #VC handler */
- leal rva(startup32_vc_handler)(%ebp), %eax
- movl $X86_TRAP_VC, %edx
- call startup32_set_idt_entry
-
- /* Load IDT */
- leal rva(boot32_idt)(%ebp), %eax
- movl %eax, rva(boot32_idt_desc+2)(%ebp)
- lidt rva(boot32_idt_desc)(%ebp)
-#endif
- RET
-SYM_FUNC_END(startup32_load_idt)
-
-/*
- * Check for the correct C-bit position when the startup_32 boot-path is used.
- *
- * The check makes use of the fact that all memory is encrypted when paging is
- * disabled. The function creates 64 bits of random data using the RDRAND
- * instruction. RDRAND is mandatory for SEV guests, so always available. If the
- * hypervisor violates that the kernel will crash right here.
- *
- * The 64 bits of random data are stored to a memory location and at the same
- * time kept in the %eax and %ebx registers. Since encryption is always active
- * when paging is off the random data will be stored encrypted in main memory.
- *
- * Then paging is enabled. When the C-bit position is correct all memory is
- * still mapped encrypted and comparing the register values with memory will
- * succeed. An incorrect C-bit position will map all memory unencrypted, so that
- * the compare will use the encrypted random data and fail.
- */
-SYM_FUNC_START(startup32_check_sev_cbit)
-#ifdef CONFIG_AMD_MEM_ENCRYPT
- pushl %eax
- pushl %ebx
- pushl %ecx
- pushl %edx
-
- /* Check for non-zero sev_status */
- movl rva(sev_status)(%ebp), %eax
- testl %eax, %eax
- jz 4f
-
- /*
- * Get two 32-bit random values - Don't bail out if RDRAND fails
- * because it is better to prevent forward progress if no random value
- * can be gathered.
- */
-1: rdrand %eax
- jnc 1b
-2: rdrand %ebx
- jnc 2b
-
- /* Store to memory and keep it in the registers */
- movl %eax, rva(sev_check_data)(%ebp)
- movl %ebx, rva(sev_check_data+4)(%ebp)
-
- /* Enable paging to see if encryption is active */
- movl %cr0, %edx /* Backup %cr0 in %edx */
- movl $(X86_CR0_PG | X86_CR0_PE), %ecx /* Enable Paging and Protected mode */
- movl %ecx, %cr0
-
- cmpl %eax, rva(sev_check_data)(%ebp)
- jne 3f
- cmpl %ebx, rva(sev_check_data+4)(%ebp)
- jne 3f
-
- movl %edx, %cr0 /* Restore previous %cr0 */
-
- jmp 4f
-
-3: /* Check failed - hlt the machine */
- hlt
- jmp 3b
-
-4:
- popl %edx
- popl %ecx
- popl %ebx
- popl %eax
-#endif
- RET
-SYM_FUNC_END(startup32_check_sev_cbit)
-
/*
* Stack and heap for uncompression
*/
.bss
.balign 4
-SYM_DATA_LOCAL(boot_heap, .fill BOOT_HEAP_SIZE, 1, 0)
-
SYM_DATA_START_LOCAL(boot_stack)
.fill BOOT_STACK_SIZE, 1, 0
.balign 16
diff --git a/arch/x86/boot/compressed/ident_map_64.c b/arch/x86/boot/compressed/ident_map_64.c
index d34222816c9f..b8c42339bc35 100644
--- a/arch/x86/boot/compressed/ident_map_64.c
+++ b/arch/x86/boot/compressed/ident_map_64.c
@@ -167,8 +167,9 @@ void initialize_identity_maps(void *rmode)
* or does not touch all the pages covering them.
*/
kernel_add_identity_map((unsigned long)_head, (unsigned long)_end);
- boot_params = rmode;
- kernel_add_identity_map((unsigned long)boot_params, (unsigned long)(boot_params + 1));
+ boot_params_ptr = rmode;
+ kernel_add_identity_map((unsigned long)boot_params_ptr,
+ (unsigned long)(boot_params_ptr + 1));
cmdline = get_cmd_line_ptr();
kernel_add_identity_map(cmdline, cmdline + COMMAND_LINE_SIZE);

@@ -176,7 +177,7 @@ void initialize_identity_maps(void *rmode)
* Also map the setup_data entries passed via boot_params in case they
* need to be accessed by uncompressed kernel via the identity mapping.
*/
- sd = (struct setup_data *)boot_params->hdr.setup_data;
+ sd = (struct setup_data *)boot_params_ptr->hdr.setup_data;
while (sd) {
unsigned long sd_addr = (unsigned long)sd;

diff --git a/arch/x86/boot/compressed/kaslr.c b/arch/x86/boot/compressed/kaslr.c
index e476bcbd9b42..9794d9174795 100644
--- a/arch/x86/boot/compressed/kaslr.c
+++ b/arch/x86/boot/compressed/kaslr.c
@@ -63,7 +63,7 @@ static unsigned long get_boot_seed(void)
unsigned long hash = 0;

hash = rotate_xor(hash, build_str, sizeof(build_str));
- hash = rotate_xor(hash, boot_params, sizeof(*boot_params));
+ hash = rotate_xor(hash, boot_params_ptr, sizeof(*boot_params_ptr));

return hash;
}
@@ -383,7 +383,7 @@ static void handle_mem_options(void)
static void mem_avoid_init(unsigned long input, unsigned long input_size,
unsigned long output)
{
- unsigned long init_size = boot_params->hdr.init_size;
+ unsigned long init_size = boot_params_ptr->hdr.init_size;
u64 initrd_start, initrd_size;
unsigned long cmd_line, cmd_line_size;

@@ -395,10 +395,10 @@ static void mem_avoid_init(unsigned long input, unsigned long input_size,
mem_avoid[MEM_AVOID_ZO_RANGE].size = (output + init_size) - input;

/* Avoid initrd. */
- initrd_start = (u64)boot_params->ext_ramdisk_image << 32;
- initrd_start |= boot_params->hdr.ramdisk_image;
- initrd_size = (u64)boot_params->ext_ramdisk_size << 32;
- initrd_size |= boot_params->hdr.ramdisk_size;
+ initrd_start = (u64)boot_params_ptr->ext_ramdisk_image << 32;
+ initrd_start |= boot_params_ptr->hdr.ramdisk_image;
+ initrd_size = (u64)boot_params_ptr->ext_ramdisk_size << 32;
+ initrd_size |= boot_params_ptr->hdr.ramdisk_size;
mem_avoid[MEM_AVOID_INITRD].start = initrd_start;
mem_avoid[MEM_AVOID_INITRD].size = initrd_size;
/* No need to set mapping for initrd, it will be handled in VO. */
@@ -413,8 +413,8 @@ static void mem_avoid_init(unsigned long input, unsigned long input_size,
}

/* Avoid boot parameters. */
- mem_avoid[MEM_AVOID_BOOTPARAMS].start = (unsigned long)boot_params;
- mem_avoid[MEM_AVOID_BOOTPARAMS].size = sizeof(*boot_params);
+ mem_avoid[MEM_AVOID_BOOTPARAMS].start = (unsigned long)boot_params_ptr;
+ mem_avoid[MEM_AVOID_BOOTPARAMS].size = sizeof(*boot_params_ptr);

/* We don't need to set a mapping for setup_data. */

@@ -447,7 +447,7 @@ static bool mem_avoid_overlap(struct mem_vector *img,
}

/* Avoid all entries in the setup_data linked list. */
- ptr = (struct setup_data *)(unsigned long)boot_params->hdr.setup_data;
+ ptr = (struct setup_data *)(unsigned long)boot_params_ptr->hdr.setup_data;
while (ptr) {
struct mem_vector avoid;

@@ -679,7 +679,7 @@ static bool process_mem_region(struct mem_vector *region,
static bool
process_efi_entries(unsigned long minimum, unsigned long image_size)
{
- struct efi_info *e = &boot_params->efi_info;
+ struct efi_info *e = &boot_params_ptr->efi_info;
bool efi_mirror_found = false;
struct mem_vector region;
efi_memory_desc_t *md;
@@ -761,8 +761,8 @@ static void process_e820_entries(unsigned long minimum,
struct boot_e820_entry *entry;

/* Verify potential e820 positions, appending to slots list. */
- for (i = 0; i < boot_params->e820_entries; i++) {
- entry = &boot_params->e820_table[i];
+ for (i = 0; i < boot_params_ptr->e820_entries; i++) {
+ entry = &boot_params_ptr->e820_table[i];
/* Skip non-RAM entries. */
if (entry->type != E820_TYPE_RAM)
continue;
@@ -836,7 +836,7 @@ void choose_random_location(unsigned long input,
return;
}

- boot_params->hdr.loadflags |= KASLR_FLAG;
+ boot_params_ptr->hdr.loadflags |= KASLR_FLAG;

if (IS_ENABLED(CONFIG_X86_32))
mem_limit = KERNEL_IMAGE_SIZE;
diff --git a/arch/x86/boot/compressed/mem_encrypt.S b/arch/x86/boot/compressed/mem_encrypt.S
index a73e4d783cae..32f7cc8a8625 100644
--- a/arch/x86/boot/compressed/mem_encrypt.S
+++ b/arch/x86/boot/compressed/mem_encrypt.S
@@ -12,16 +12,13 @@
#include <asm/processor-flags.h>
#include <asm/msr.h>
#include <asm/asm-offsets.h>
+#include <asm/segment.h>
+#include <asm/trapnr.h>

.text
.code32
SYM_FUNC_START(get_sev_encryption_bit)
- xor %eax, %eax
-
-#ifdef CONFIG_AMD_MEM_ENCRYPT
push %ebx
- push %ecx
- push %edx

movl $0x80000000, %eax /* CPUID to check the highest leaf */
cpuid
@@ -52,12 +49,7 @@ SYM_FUNC_START(get_sev_encryption_bit)
xor %eax, %eax

.Lsev_exit:
- pop %edx
- pop %ecx
pop %ebx
-
-#endif /* CONFIG_AMD_MEM_ENCRYPT */
-
RET
SYM_FUNC_END(get_sev_encryption_bit)

@@ -98,7 +90,7 @@ SYM_CODE_START_LOCAL(sev_es_req_cpuid)
jmp 1b
SYM_CODE_END(sev_es_req_cpuid)

-SYM_CODE_START(startup32_vc_handler)
+SYM_CODE_START_LOCAL(startup32_vc_handler)
pushl %eax
pushl %ebx
pushl %ecx
@@ -184,15 +176,149 @@ SYM_CODE_START(startup32_vc_handler)
jmp .Lfail
SYM_CODE_END(startup32_vc_handler)

+/*
+ * Write an IDT entry into boot32_idt
+ *
+ * Parameters:
+ *
+ * %eax: Handler address
+ * %edx: Vector number
+ * %ecx: IDT address
+ */
+SYM_FUNC_START_LOCAL(startup32_set_idt_entry)
+ /* IDT entry address to %ecx */
+ leal (%ecx, %edx, 8), %ecx
+
+ /* Build IDT entry, lower 4 bytes */
+ movl %eax, %edx
+ andl $0x0000ffff, %edx # Target code segment offset [15:0]
+ orl $(__KERNEL32_CS << 16), %edx # Target code segment selector
+
+ /* Store lower 4 bytes to IDT */
+ movl %edx, (%ecx)
+
+ /* Build IDT entry, upper 4 bytes */
+ movl %eax, %edx
+ andl $0xffff0000, %edx # Target code segment offset [31:16]
+ orl $0x00008e00, %edx # Present, Type 32-bit Interrupt Gate
+
+ /* Store upper 4 bytes to IDT */
+ movl %edx, 4(%ecx)
+
+ RET
+SYM_FUNC_END(startup32_set_idt_entry)
+
+SYM_FUNC_START(startup32_load_idt)
+ push %ebp
+ push %ebx
+
+ call 1f
+1: pop %ebp
+
+ leal (boot32_idt - 1b)(%ebp), %ebx
+
+ /* #VC handler */
+ leal (startup32_vc_handler - 1b)(%ebp), %eax
+ movl $X86_TRAP_VC, %edx
+ movl %ebx, %ecx
+ call startup32_set_idt_entry
+
+ /* Load IDT */
+ leal (boot32_idt_desc - 1b)(%ebp), %ecx
+ movl %ebx, 2(%ecx)
+ lidt (%ecx)
+
+ pop %ebx
+ pop %ebp
+ RET
+SYM_FUNC_END(startup32_load_idt)
+
+/*
+ * Check for the correct C-bit position when the startup_32 boot-path is used.
+ *
+ * The check makes use of the fact that all memory is encrypted when paging is
+ * disabled. The function creates 64 bits of random data using the RDRAND
+ * instruction. RDRAND is mandatory for SEV guests, so always available. If the
+ * hypervisor violates that the kernel will crash right here.
+ *
+ * The 64 bits of random data are stored to a memory location and at the same
+ * time kept in the %eax and %ebx registers. Since encryption is always active
+ * when paging is off the random data will be stored encrypted in main memory.
+ *
+ * Then paging is enabled. When the C-bit position is correct all memory is
+ * still mapped encrypted and comparing the register values with memory will
+ * succeed. An incorrect C-bit position will map all memory unencrypted, so that
+ * the compare will use the encrypted random data and fail.
+ */
+SYM_FUNC_START(startup32_check_sev_cbit)
+ pushl %ebx
+ pushl %ebp
+
+ call 0f
+0: popl %ebp
+
+ /* Check for non-zero sev_status */
+ movl (sev_status - 0b)(%ebp), %eax
+ testl %eax, %eax
+ jz 4f
+
+ /*
+ * Get two 32-bit random values - Don't bail out if RDRAND fails
+ * because it is better to prevent forward progress if no random value
+ * can be gathered.
+ */
+1: rdrand %eax
+ jnc 1b
+2: rdrand %ebx
+ jnc 2b
+
+ /* Store to memory and keep it in the registers */
+ leal (sev_check_data - 0b)(%ebp), %ebp
+ movl %eax, 0(%ebp)
+ movl %ebx, 4(%ebp)
+
+ /* Enable paging to see if encryption is active */
+ movl %cr0, %edx /* Backup %cr0 in %edx */
+ movl $(X86_CR0_PG | X86_CR0_PE), %ecx /* Enable Paging and Protected mode */
+ movl %ecx, %cr0
+
+ cmpl %eax, 0(%ebp)
+ jne 3f
+ cmpl %ebx, 4(%ebp)
+ jne 3f
+
+ movl %edx, %cr0 /* Restore previous %cr0 */
+
+ jmp 4f
+
+3: /* Check failed - hlt the machine */
+ hlt
+ jmp 3b
+
+4:
+ popl %ebp
+ popl %ebx
+ RET
+SYM_FUNC_END(startup32_check_sev_cbit)
+
.code64

#include "../../kernel/sev_verify_cbit.S"

.data

-#ifdef CONFIG_AMD_MEM_ENCRYPT
.balign 8
SYM_DATA(sme_me_mask, .quad 0)
SYM_DATA(sev_status, .quad 0)
SYM_DATA(sev_check_data, .quad 0)
-#endif
+
+SYM_DATA_START_LOCAL(boot32_idt)
+ .rept 32
+ .quad 0
+ .endr
+SYM_DATA_END(boot32_idt)
+
+SYM_DATA_START_LOCAL(boot32_idt_desc)
+ .word . - boot32_idt - 1
+ .long 0
+SYM_DATA_END(boot32_idt_desc)
diff --git a/arch/x86/boot/compressed/misc.c b/arch/x86/boot/compressed/misc.c
index cf690d8712f4..8ae7893d712f 100644
--- a/arch/x86/boot/compressed/misc.c
+++ b/arch/x86/boot/compressed/misc.c
@@ -46,7 +46,7 @@ void *memmove(void *dest, const void *src, size_t n);
/*
* This is set up by the setup-routine at boot-time
*/
-struct boot_params *boot_params;
+struct boot_params *boot_params_ptr;

struct port_io_ops pio_ops;

@@ -132,8 +132,8 @@ void __putstr(const char *s)
if (lines == 0 || cols == 0)
return;

- x = boot_params->screen_info.orig_x;
- y = boot_params->screen_info.orig_y;
+ x = boot_params_ptr->screen_info.orig_x;
+ y = boot_params_ptr->screen_info.orig_y;

while ((c = *s++) != '\0') {
if (c == '\n') {
@@ -154,8 +154,8 @@ void __putstr(const char *s)
}
}

- boot_params->screen_info.orig_x = x;
- boot_params->screen_info.orig_y = y;
+ boot_params_ptr->screen_info.orig_x = x;
+ boot_params_ptr->screen_info.orig_y = y;

pos = (x + cols * y) * 2; /* Update cursor position */
outb(14, vidport);
@@ -277,7 +277,7 @@ static inline void handle_relocations(void *output, unsigned long output_len,
{ }
#endif

-static void parse_elf(void *output)
+static size_t parse_elf(void *output)
{
#ifdef CONFIG_X86_64
Elf64_Ehdr ehdr;
@@ -293,10 +293,8 @@ static void parse_elf(void *output)
if (ehdr.e_ident[EI_MAG0] != ELFMAG0 ||
ehdr.e_ident[EI_MAG1] != ELFMAG1 ||
ehdr.e_ident[EI_MAG2] != ELFMAG2 ||
- ehdr.e_ident[EI_MAG3] != ELFMAG3) {
+ ehdr.e_ident[EI_MAG3] != ELFMAG3)
error("Kernel is not a valid ELF file");
- return;
- }

debug_putstr("Parsing ELF... ");

@@ -328,6 +326,35 @@ static void parse_elf(void *output)
}

free(phdrs);
+
+ return ehdr.e_entry - LOAD_PHYSICAL_ADDR;
+}
+
+const unsigned long kernel_total_size = VO__end - VO__text;
+
+static u8 boot_heap[BOOT_HEAP_SIZE] __aligned(4);
+
+extern unsigned char input_data[];
+extern unsigned int input_len, output_len;
+
+unsigned long decompress_kernel(unsigned char *outbuf, unsigned long virt_addr,
+ void (*error)(char *x))
+{
+ unsigned long entry;
+
+ if (!free_mem_ptr) {
+ free_mem_ptr = (unsigned long)boot_heap;
+ free_mem_end_ptr = (unsigned long)boot_heap + sizeof(boot_heap);
+ }
+
+ if (__decompress(input_data, input_len, NULL, NULL, outbuf, output_len,
+ NULL, error) < 0)
+ return ULONG_MAX;
+
+ entry = parse_elf(outbuf);
+ handle_relocations(outbuf, output_len, virt_addr);
+
+ return entry;
}

/*
@@ -347,25 +374,22 @@ static void parse_elf(void *output)
* |-------uncompressed kernel image---------|
*
*/
-asmlinkage __visible void *extract_kernel(void *rmode, memptr heap,
- unsigned char *input_data,
- unsigned long input_len,
- unsigned char *output,
- unsigned long output_len)
+asmlinkage __visible void *extract_kernel(void *rmode, unsigned char *output)
{
- const unsigned long kernel_total_size = VO__end - VO__text;
unsigned long virt_addr = LOAD_PHYSICAL_ADDR;
+ memptr heap = (memptr)boot_heap;
unsigned long needed_size;
+ size_t entry_offset;

/* Retain x86 boot parameters pointer passed from startup_32/64. */
- boot_params = rmode;
+ boot_params_ptr = rmode;

/* Clear flags intended for solely in-kernel use. */
- boot_params->hdr.loadflags &= ~KASLR_FLAG;
+ boot_params_ptr->hdr.loadflags &= ~KASLR_FLAG;

- sanitize_boot_params(boot_params);
+ sanitize_boot_params(boot_params_ptr);

- if (boot_params->screen_info.orig_video_mode == 7) {
+ if (boot_params_ptr->screen_info.orig_video_mode == 7) {
vidmem = (char *) 0xb0000;
vidport = 0x3b4;
} else {
@@ -373,8 +397,8 @@ asmlinkage __visible void *extract_kernel(void *rmode, memptr heap,
vidport = 0x3d4;
}

- lines = boot_params->screen_info.orig_video_lines;
- cols = boot_params->screen_info.orig_video_cols;
+ lines = boot_params_ptr->screen_info.orig_video_lines;
+ cols = boot_params_ptr->screen_info.orig_video_cols;

init_default_io_ops();

@@ -393,7 +417,7 @@ asmlinkage __visible void *extract_kernel(void *rmode, memptr heap,
* so that early debugging output from the RSDP parsing code can be
* collected.
*/
- boot_params->acpi_rsdp_addr = get_rsdp_addr();
+ boot_params_ptr->acpi_rsdp_addr = get_rsdp_addr();

debug_putstr("early console in extract_kernel\n");

@@ -411,7 +435,7 @@ asmlinkage __visible void *extract_kernel(void *rmode, memptr heap,
* entries. This ensures the full mapped area is usable RAM
* and doesn't include any reserved areas.
*/
- needed_size = max(output_len, kernel_total_size);
+ needed_size = max_t(unsigned long, output_len, kernel_total_size);
#ifdef CONFIG_X86_64
needed_size = ALIGN(needed_size, MIN_KERNEL_ALIGN);
#endif
@@ -442,7 +466,7 @@ asmlinkage __visible void *extract_kernel(void *rmode, memptr heap,
#ifdef CONFIG_X86_64
if (heap > 0x3fffffffffffUL)
error("Destination address too large");
- if (virt_addr + max(output_len, kernel_total_size) > KERNEL_IMAGE_SIZE)
+ if (virt_addr + needed_size > KERNEL_IMAGE_SIZE)
error("Destination virtual address is beyond the kernel mapping area");
#else
if (heap > ((-__PAGE_OFFSET-(128<<20)-1) & 0x7fffffff))
@@ -454,16 +478,17 @@ asmlinkage __visible void *extract_kernel(void *rmode, memptr heap,
#endif

debug_putstr("\nDecompressing Linux... ");
- __decompress(input_data, input_len, NULL, NULL, output, output_len,
- NULL, error);
- parse_elf(output);
- handle_relocations(output, output_len, virt_addr);
- debug_putstr("done.\nBooting the kernel.\n");
+
+ entry_offset = decompress_kernel(output, virt_addr, error);
+
+ debug_putstr("done.\nBooting the kernel (entry_offset: 0x");
+ debug_puthex(entry_offset);
+ debug_putstr(").\n");

/* Disable exception handling before booting the kernel */
cleanup_exception_handling();

- return output;
+ return output + entry_offset;
}

void fortify_panic(const char *name)
diff --git a/arch/x86/boot/compressed/misc.h b/arch/x86/boot/compressed/misc.h
index a49d9219c06e..254acd76efde 100644
--- a/arch/x86/boot/compressed/misc.h
+++ b/arch/x86/boot/compressed/misc.h
@@ -52,7 +52,6 @@ extern memptr free_mem_ptr;
extern memptr free_mem_end_ptr;
void *malloc(int size);
void free(void *where);
-extern struct boot_params *boot_params;
void __putstr(const char *s);
void __puthex(unsigned long value);
#define error_putstr(__x) __putstr(__x)
@@ -170,9 +169,7 @@ static inline int count_immovable_mem_regions(void) { return 0; }
#endif

/* ident_map_64.c */
-#ifdef CONFIG_X86_5LEVEL
extern unsigned int __pgtable_l5_enabled, pgdir_shift, ptrs_per_p4d;
-#endif
extern void kernel_add_identity_map(unsigned long start, unsigned long end);

/* Used by PAGE_KERN* macros: */
diff --git a/arch/x86/boot/compressed/pgtable.h b/arch/x86/boot/compressed/pgtable.h
index cc9b2529a086..6d595abe06b3 100644
--- a/arch/x86/boot/compressed/pgtable.h
+++ b/arch/x86/boot/compressed/pgtable.h
@@ -3,18 +3,16 @@

#define TRAMPOLINE_32BIT_SIZE (2 * PAGE_SIZE)

-#define TRAMPOLINE_32BIT_PGTABLE_OFFSET 0
-
#define TRAMPOLINE_32BIT_CODE_OFFSET PAGE_SIZE
-#define TRAMPOLINE_32BIT_CODE_SIZE 0x80
-
-#define TRAMPOLINE_32BIT_STACK_END TRAMPOLINE_32BIT_SIZE
+#define TRAMPOLINE_32BIT_CODE_SIZE 0xA0

#ifndef __ASSEMBLER__

extern unsigned long *trampoline_32bit;

-extern void trampoline_32bit_src(void *return_ptr);
+extern void trampoline_32bit_src(void *trampoline, bool enable_5lvl);
+
+extern const u16 trampoline_ljmp_imm_offset;

#endif /* __ASSEMBLER__ */
#endif /* BOOT_COMPRESSED_PAGETABLE_H */
diff --git a/arch/x86/boot/compressed/pgtable_64.c b/arch/x86/boot/compressed/pgtable_64.c
index 2ac12ff4111b..51f957b24ba7 100644
--- a/arch/x86/boot/compressed/pgtable_64.c
+++ b/arch/x86/boot/compressed/pgtable_64.c
@@ -16,11 +16,6 @@ unsigned int __section(".data") pgdir_shift = 39;
unsigned int __section(".data") ptrs_per_p4d = 1;
#endif

-struct paging_config {
- unsigned long trampoline_start;
- unsigned long l5_required;
-};
-
/* Buffer to preserve trampoline memory */
static char trampoline_save[TRAMPOLINE_32BIT_SIZE];

@@ -29,11 +24,10 @@ static char trampoline_save[TRAMPOLINE_32BIT_SIZE];
* purposes.
*
* Avoid putting the pointer into .bss as it will be cleared between
- * paging_prepare() and extract_kernel().
+ * configure_5level_paging() and extract_kernel().
*/
unsigned long *trampoline_32bit __section(".data");

-extern struct boot_params *boot_params;
int cmdline_find_option_bool(const char *option);

static unsigned long find_trampoline_placement(void)
@@ -54,7 +48,7 @@ static unsigned long find_trampoline_placement(void)
*
* Only look for values in the legacy ROM for non-EFI system.
*/
- signature = (char *)&boot_params->efi_info.efi_loader_signature;
+ signature = (char *)&boot_params_ptr->efi_info.efi_loader_signature;
if (strncmp(signature, EFI32_LOADER_SIGNATURE, 4) &&
strncmp(signature, EFI64_LOADER_SIGNATURE, 4)) {
ebda_start = *(unsigned short *)0x40e << 4;
@@ -70,10 +64,10 @@ static unsigned long find_trampoline_placement(void)
bios_start = round_down(bios_start, PAGE_SIZE);

/* Find the first usable memory region under bios_start. */
- for (i = boot_params->e820_entries - 1; i >= 0; i--) {
+ for (i = boot_params_ptr->e820_entries - 1; i >= 0; i--) {
unsigned long new = bios_start;

- entry = &boot_params->e820_table[i];
+ entry = &boot_params_ptr->e820_table[i];

/* Skip all entries above bios_start. */
if (bios_start <= entry->addr)
@@ -106,12 +100,13 @@ static unsigned long find_trampoline_placement(void)
return bios_start - TRAMPOLINE_32BIT_SIZE;
}

-struct paging_config paging_prepare(void *rmode)
+asmlinkage void configure_5level_paging(struct boot_params *bp, void *pgtable)
{
- struct paging_config paging_config = {};
+ void (*toggle_la57)(void *cr3);
+ bool l5_required = false;

/* Initialize boot_params. Required for cmdline_find_option_bool(). */
- boot_params = rmode;
+ boot_params_ptr = bp;

/*
* Check if LA57 is desired and supported.
@@ -129,12 +124,22 @@ struct paging_config paging_prepare(void *rmode)
!cmdline_find_option_bool("no5lvl") &&
native_cpuid_eax(0) >= 7 &&
(native_cpuid_ecx(7) & (1 << (X86_FEATURE_LA57 & 31)))) {
- paging_config.l5_required = 1;
+ l5_required = true;
+
+ /* Initialize variables for 5-level paging */
+ __pgtable_l5_enabled = 1;
+ pgdir_shift = 48;
+ ptrs_per_p4d = 512;
}

- paging_config.trampoline_start = find_trampoline_placement();
+ /*
+ * The trampoline will not be used if the paging mode is already set to
+ * the desired one.
+ */
+ if (l5_required == !!(native_read_cr4() & X86_CR4_LA57))
+ return;

- trampoline_32bit = (unsigned long *)paging_config.trampoline_start;
+ trampoline_32bit = (unsigned long *)find_trampoline_placement();

/* Preserve trampoline memory */
memcpy(trampoline_save, trampoline_32bit, TRAMPOLINE_32BIT_SIZE);
@@ -143,32 +148,32 @@ struct paging_config paging_prepare(void *rmode)
memset(trampoline_32bit, 0, TRAMPOLINE_32BIT_SIZE);

/* Copy trampoline code in place */
- memcpy(trampoline_32bit + TRAMPOLINE_32BIT_CODE_OFFSET / sizeof(unsigned long),
+ toggle_la57 = memcpy(trampoline_32bit +
+ TRAMPOLINE_32BIT_CODE_OFFSET / sizeof(unsigned long),
&trampoline_32bit_src, TRAMPOLINE_32BIT_CODE_SIZE);

+ /*
+ * Avoid the need for a stack in the 32-bit trampoline code, by using
+ * LJMP rather than LRET to return back to long mode. LJMP takes an
+ * immediate absolute address, which needs to be adjusted based on the
+ * placement of the trampoline.
+ */
+ *(u32 *)((u8 *)toggle_la57 + trampoline_ljmp_imm_offset) +=
+ (unsigned long)toggle_la57;
+
/*
* The code below prepares page table in trampoline memory.
*
* The new page table will be used by trampoline code for switching
* from 4- to 5-level paging or vice versa.
- *
- * If switching is not required, the page table is unused: trampoline
- * code wouldn't touch CR3.
- */
-
- /*
- * We are not going to use the page table in trampoline memory if we
- * are already in the desired paging mode.
*/
- if (paging_config.l5_required == !!(native_read_cr4() & X86_CR4_LA57))
- goto out;

- if (paging_config.l5_required) {
+ if (l5_required) {
/*
* For 4- to 5-level paging transition, set up current CR3 as
* the first and the only entry in a new top-level page table.
*/
- trampoline_32bit[TRAMPOLINE_32BIT_PGTABLE_OFFSET] = __native_read_cr3() | _PAGE_TABLE_NOENC;
+ *trampoline_32bit = __native_read_cr3() | _PAGE_TABLE_NOENC;
} else {
unsigned long src;

@@ -181,38 +186,17 @@ struct paging_config paging_prepare(void *rmode)
* may be above 4G.
*/
src = *(unsigned long *)__native_read_cr3() & PAGE_MASK;
- memcpy(trampoline_32bit + TRAMPOLINE_32BIT_PGTABLE_OFFSET / sizeof(unsigned long),
- (void *)src, PAGE_SIZE);
+ memcpy(trampoline_32bit, (void *)src, PAGE_SIZE);
}

-out:
- return paging_config;
-}
-
-void cleanup_trampoline(void *pgtable)
-{
- void *trampoline_pgtable;
-
- trampoline_pgtable = trampoline_32bit + TRAMPOLINE_32BIT_PGTABLE_OFFSET / sizeof(unsigned long);
+ toggle_la57(trampoline_32bit);

/*
- * Move the top level page table out of trampoline memory,
- * if it's there.
+ * Move the top level page table out of trampoline memory.
*/
- if ((void *)__native_read_cr3() == trampoline_pgtable) {
- memcpy(pgtable, trampoline_pgtable, PAGE_SIZE);
- native_write_cr3((unsigned long)pgtable);
- }
+ memcpy(pgtable, trampoline_32bit, PAGE_SIZE);
+ native_write_cr3((unsigned long)pgtable);

/* Restore trampoline memory */
memcpy(trampoline_32bit, trampoline_save, TRAMPOLINE_32BIT_SIZE);
-
- /* Initialize variables for 5-level paging */
-#ifdef CONFIG_X86_5LEVEL
- if (__read_cr4() & X86_CR4_LA57) {
- __pgtable_l5_enabled = 1;
- pgdir_shift = 48;
- ptrs_per_p4d = 512;
- }
-#endif
}
diff --git a/arch/x86/boot/compressed/sev.c b/arch/x86/boot/compressed/sev.c
index 9c91cc40f456..d07e665bb265 100644
--- a/arch/x86/boot/compressed/sev.c
+++ b/arch/x86/boot/compressed/sev.c
@@ -327,20 +327,25 @@ static void enforce_vmpl0(void)
*/
#define SNP_FEATURES_PRESENT (0)

+u64 snp_get_unsupported_features(u64 status)
+{
+ if (!(status & MSR_AMD64_SEV_SNP_ENABLED))
+ return 0;
+
+ return status & SNP_FEATURES_IMPL_REQ & ~SNP_FEATURES_PRESENT;
+}
+
void snp_check_features(void)
{
u64 unsupported;

- if (!(sev_status & MSR_AMD64_SEV_SNP_ENABLED))
- return;
-
/*
* Terminate the boot if hypervisor has enabled any feature lacking
* guest side implementation. Pass on the unsupported features mask through
* EXIT_INFO_2 of the GHCB protocol so that those features can be reported
* as part of the guest boot failure.
*/
- unsupported = sev_status & SNP_FEATURES_IMPL_REQ & ~SNP_FEATURES_PRESENT;
+ unsupported = snp_get_unsupported_features(sev_status);
if (unsupported) {
if (ghcb_version < 2 || (!boot_ghcb && !early_setup_ghcb()))
sev_es_terminate(SEV_TERM_SET_GEN, GHCB_SNP_UNSUPPORTED);
@@ -350,35 +355,22 @@ void snp_check_features(void)
}
}

-void sev_enable(struct boot_params *bp)
+/*
+ * sev_check_cpu_support - Check for SEV support in the CPU capabilities
+ *
+ * Returns < 0 if SEV is not supported, otherwise the position of the
+ * encryption bit in the page table descriptors.
+ */
+static int sev_check_cpu_support(void)
{
unsigned int eax, ebx, ecx, edx;
- struct msr m;
- bool snp;
-
- /*
- * bp->cc_blob_address should only be set by boot/compressed kernel.
- * Initialize it to 0 to ensure that uninitialized values from
- * buggy bootloaders aren't propagated.
- */
- if (bp)
- bp->cc_blob_address = 0;
-
- /*
- * Do an initial SEV capability check before snp_init() which
- * loads the CPUID page and the same checks afterwards are done
- * without the hypervisor and are trustworthy.
- *
- * If the HV fakes SEV support, the guest will crash'n'burn
- * which is good enough.
- */

/* Check for the SME/SEV support leaf */
eax = 0x80000000;
ecx = 0;
native_cpuid(&eax, &ebx, &ecx, &edx);
if (eax < 0x8000001f)
- return;
+ return -ENODEV;

/*
* Check for the SME/SEV feature:
@@ -393,6 +385,35 @@ void sev_enable(struct boot_params *bp)
native_cpuid(&eax, &ebx, &ecx, &edx);
/* Check whether SEV is supported */
if (!(eax & BIT(1)))
+ return -ENODEV;
+
+ return ebx & 0x3f;
+}
+
+void sev_enable(struct boot_params *bp)
+{
+ struct msr m;
+ int bitpos;
+ bool snp;
+
+ /*
+ * bp->cc_blob_address should only be set by boot/compressed kernel.
+ * Initialize it to 0 to ensure that uninitialized values from
+ * buggy bootloaders aren't propagated.
+ */
+ if (bp)
+ bp->cc_blob_address = 0;
+
+ /*
+ * Do an initial SEV capability check before snp_init() which
+ * loads the CPUID page and the same checks afterwards are done
+ * without the hypervisor and are trustworthy.
+ *
+ * If the HV fakes SEV support, the guest will crash'n'burn
+ * which is good enough.
+ */
+
+ if (sev_check_cpu_support() < 0)
return;

/*
@@ -403,26 +424,8 @@ void sev_enable(struct boot_params *bp)

/* Now repeat the checks with the SNP CPUID table. */

- /* Recheck the SME/SEV support leaf */
- eax = 0x80000000;
- ecx = 0;
- native_cpuid(&eax, &ebx, &ecx, &edx);
- if (eax < 0x8000001f)
- return;
-
- /*
- * Recheck for the SME/SEV feature:
- * CPUID Fn8000_001F[EAX]
- * - Bit 0 - Secure Memory Encryption support
- * - Bit 1 - Secure Encrypted Virtualization support
- * CPUID Fn8000_001F[EBX]
- * - Bits 5:0 - Pagetable bit position used to indicate encryption
- */
- eax = 0x8000001f;
- ecx = 0;
- native_cpuid(&eax, &ebx, &ecx, &edx);
- /* Check whether SEV is supported */
- if (!(eax & BIT(1))) {
+ bitpos = sev_check_cpu_support();
+ if (bitpos < 0) {
if (snp)
error("SEV-SNP support indicated by CC blob, but not CPUID.");
return;
@@ -454,7 +457,24 @@ void sev_enable(struct boot_params *bp)
if (snp && !(sev_status & MSR_AMD64_SEV_SNP_ENABLED))
error("SEV-SNP supported indicated by CC blob, but not SEV status MSR.");

- sme_me_mask = BIT_ULL(ebx & 0x3f);
+ sme_me_mask = BIT_ULL(bitpos);
+}
+
+/*
+ * sev_get_status - Retrieve the SEV status mask
+ *
+ * Returns 0 if the CPU is not SEV capable, otherwise the value of the
+ * AMD64_SEV MSR.
+ */
+u64 sev_get_status(void)
+{
+ struct msr m;
+
+ if (sev_check_cpu_support() < 0)
+ return 0;
+
+ boot_rdmsr(MSR_AMD64_SEV, &m);
+ return m.q;
}

/* Search for Confidential Computing blob in the EFI config table. */
@@ -545,7 +565,7 @@ void sev_prep_identity_maps(unsigned long top_level_pgt)
* accessed after switchover.
*/
if (sev_snp_enabled()) {
- unsigned long cc_info_pa = boot_params->cc_blob_address;
+ unsigned long cc_info_pa = boot_params_ptr->cc_blob_address;
struct cc_blob_sev_info *cc_info;

kernel_add_identity_map(cc_info_pa, cc_info_pa + sizeof(*cc_info));
diff --git a/arch/x86/boot/header.S b/arch/x86/boot/header.S
index f912d7770130..d31982509654 100644
--- a/arch/x86/boot/header.S
+++ b/arch/x86/boot/header.S
@@ -406,7 +406,7 @@ xloadflags:
# define XLF1 0
#endif

-#ifdef CONFIG_EFI_STUB
+#ifdef CONFIG_EFI_HANDOVER_PROTOCOL
# ifdef CONFIG_EFI_MIXED
# define XLF23 (XLF_EFI_HANDOVER_32|XLF_EFI_HANDOVER_64)
# else
diff --git a/arch/x86/boot/tools/build.c b/arch/x86/boot/tools/build.c
index a3725ad46c5a..bd247692b701 100644
--- a/arch/x86/boot/tools/build.c
+++ b/arch/x86/boot/tools/build.c
@@ -290,6 +290,7 @@ static void efi_stub_entry_update(void)
{
unsigned long addr = efi32_stub_entry;

+#ifdef CONFIG_EFI_HANDOVER_PROTOCOL
#ifdef CONFIG_X86_64
/* Yes, this is really how we defined it :( */
addr = efi64_stub_entry - 0x200;
@@ -298,6 +299,7 @@ static void efi_stub_entry_update(void)
#ifdef CONFIG_EFI_MIXED
if (efi32_stub_entry != addr)
die("32-bit and 64-bit EFI entry points do not match\n");
+#endif
#endif
put_unaligned_le32(addr, &buf[0x264]);
}
diff --git a/arch/x86/entry/entry.S b/arch/x86/entry/entry.S
index bfb7bcb362bc..09e99d13fc0b 100644
--- a/arch/x86/entry/entry.S
+++ b/arch/x86/entry/entry.S
@@ -6,6 +6,9 @@
#include <linux/linkage.h>
#include <asm/export.h>
#include <asm/msr-index.h>
+#include <asm/unwind_hints.h>
+#include <asm/segment.h>
+#include <asm/cache.h>

.pushsection .noinstr.text, "ax"

@@ -20,3 +23,23 @@ SYM_FUNC_END(entry_ibpb)
EXPORT_SYMBOL_GPL(entry_ibpb);

.popsection
+
+/*
+ * Define the VERW operand that is disguised as entry code so that
+ * it can be referenced with KPTI enabled. This ensure VERW can be
+ * used late in exit-to-user path after page tables are switched.
+ */
+.pushsection .entry.text, "ax"
+
+.align L1_CACHE_BYTES, 0xcc
+SYM_CODE_START_NOALIGN(mds_verw_sel)
+ UNWIND_HINT_EMPTY
+ ANNOTATE_NOENDBR
+ .word __KERNEL_DS
+.align L1_CACHE_BYTES, 0xcc
+SYM_CODE_END(mds_verw_sel);
+/* For KVM */
+EXPORT_SYMBOL_GPL(mds_verw_sel);
+
+.popsection
+
diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S
index e309e7156038..ee5def1060c8 100644
--- a/arch/x86/entry/entry_32.S
+++ b/arch/x86/entry/entry_32.S
@@ -912,6 +912,7 @@ SYM_FUNC_START(entry_SYSENTER_32)
BUG_IF_WRONG_CR3 no_user_check=1
popfl
popl %eax
+ CLEAR_CPU_BUFFERS

/*
* Return back to the vDSO, which will pop ecx and edx.
@@ -981,6 +982,7 @@ restore_all_switch_stack:

/* Restore user state */
RESTORE_REGS pop=4 # skip orig_eax/error_code
+ CLEAR_CPU_BUFFERS
.Lirq_return:
/*
* ARCH_HAS_MEMBARRIER_SYNC_CORE rely on IRET core serialization
@@ -1173,6 +1175,7 @@ SYM_CODE_START(asm_exc_nmi)

/* Not on SYSENTER stack. */
call exc_nmi
+ CLEAR_CPU_BUFFERS
jmp .Lnmi_return

.Lnmi_from_sysenter_stack:
diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
index 9953d966d124..c2383c2880ec 100644
--- a/arch/x86/entry/entry_64.S
+++ b/arch/x86/entry/entry_64.S
@@ -223,6 +223,7 @@ syscall_return_via_sysret:
SYM_INNER_LABEL(entry_SYSRETQ_unsafe_stack, SYM_L_GLOBAL)
ANNOTATE_NOENDBR
swapgs
+ CLEAR_CPU_BUFFERS
sysretq
SYM_INNER_LABEL(entry_SYSRETQ_end, SYM_L_GLOBAL)
ANNOTATE_NOENDBR
@@ -656,6 +657,7 @@ SYM_INNER_LABEL(swapgs_restore_regs_and_return_to_usermode, SYM_L_GLOBAL)
/* Restore RDI. */
popq %rdi
swapgs
+ CLEAR_CPU_BUFFERS
jmp .Lnative_iret


@@ -767,6 +769,8 @@ native_irq_return_ldt:
*/
popq %rax /* Restore user RAX */

+ CLEAR_CPU_BUFFERS
+
/*
* RSP now points to an ordinary IRET frame, except that the page
* is read-only and RSP[31:16] are preloaded with the userspace
@@ -1493,6 +1497,12 @@ nmi_restore:
std
movq $0, 5*8(%rsp) /* clear "NMI executing" */

+ /*
+ * Skip CLEAR_CPU_BUFFERS here, since it only helps in rare cases like
+ * NMI in kernel after user state is restored. For an unprivileged user
+ * these conditions are hard to meet.
+ */
+
/*
* iretq reads the "iret" frame and exits the NMI stack in a
* single instruction. We are returning to kernel mode, so this
@@ -1511,6 +1521,7 @@ SYM_CODE_START(ignore_sysret)
UNWIND_HINT_EMPTY
ENDBR
mov $-ENOSYS, %eax
+ CLEAR_CPU_BUFFERS
sysretl
SYM_CODE_END(ignore_sysret)
#endif
diff --git a/arch/x86/entry/entry_64_compat.S b/arch/x86/entry/entry_64_compat.S
index d6c08d8986b1..4bcd009a232b 100644
--- a/arch/x86/entry/entry_64_compat.S
+++ b/arch/x86/entry/entry_64_compat.S
@@ -272,6 +272,7 @@ SYM_INNER_LABEL(entry_SYSRETL_compat_unsafe_stack, SYM_L_GLOBAL)
xorl %r9d, %r9d
xorl %r10d, %r10d
swapgs
+ CLEAR_CPU_BUFFERS
sysretl
SYM_INNER_LABEL(entry_SYSRETL_compat_end, SYM_L_GLOBAL)
ANNOTATE_NOENDBR
diff --git a/arch/x86/include/asm/boot.h b/arch/x86/include/asm/boot.h
index 215d37f7dde8..a38cc0afc90a 100644
--- a/arch/x86/include/asm/boot.h
+++ b/arch/x86/include/asm/boot.h
@@ -79,4 +79,14 @@
# define BOOT_STACK_SIZE 0x1000
#endif

+#ifndef __ASSEMBLY__
+extern unsigned int output_len;
+extern const unsigned long kernel_total_size;
+
+unsigned long decompress_kernel(unsigned char *outbuf, unsigned long virt_addr,
+ void (*error)(char *x));
+
+extern struct boot_params *boot_params_ptr;
+#endif
+
#endif /* _ASM_X86_BOOT_H */
diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h
index b122708792c4..b60f24b30cb9 100644
--- a/arch/x86/include/asm/cpufeatures.h
+++ b/arch/x86/include/asm/cpufeatures.h
@@ -304,7 +304,7 @@
#define X86_FEATURE_UNRET (11*32+15) /* "" AMD BTB untrain return */
#define X86_FEATURE_USE_IBPB_FW (11*32+16) /* "" Use IBPB during runtime firmware calls */
#define X86_FEATURE_RSB_VMEXIT_LITE (11*32+17) /* "" Fill RSB on VM exit when EIBRS is enabled */
-
+#define X86_FEATURE_CLEAR_CPU_BUF (11*32+18) /* "" Clear CPU buffers using VERW */

#define X86_FEATURE_MSR_TSX_CTRL (11*32+20) /* "" MSR IA32_TSX_CTRL (Intel) implemented */

diff --git a/arch/x86/include/asm/efi.h b/arch/x86/include/asm/efi.h
index 233ae6986d6f..e601264b1a24 100644
--- a/arch/x86/include/asm/efi.h
+++ b/arch/x86/include/asm/efi.h
@@ -88,6 +88,8 @@ static inline void efi_fpu_end(void)
}

#ifdef CONFIG_X86_32
+#define EFI_X86_KERNEL_ALLOC_LIMIT (SZ_512M - 1)
+
#define arch_efi_call_virt_setup() \
({ \
efi_fpu_begin(); \
@@ -101,8 +103,7 @@ static inline void efi_fpu_end(void)
})

#else /* !CONFIG_X86_32 */
-
-#define EFI_LOADER_SIGNATURE "EL64"
+#define EFI_X86_KERNEL_ALLOC_LIMIT EFI_ALLOC_LIMIT

extern asmlinkage u64 __efi_call(void *fp, ...);

@@ -214,6 +215,8 @@ efi_status_t efi_set_virtual_address_map(unsigned long memory_map_size,

#ifdef CONFIG_EFI_MIXED

+#define EFI_ALLOC_LIMIT (efi_is_64bit() ? ULONG_MAX : U32_MAX)
+
#define ARCH_HAS_EFISTUB_WRAPPERS

static inline bool efi_is_64bit(void)
@@ -325,6 +328,13 @@ static inline u32 efi64_convert_status(efi_status_t status)
#define __efi64_argmap_set_memory_space_attributes(phys, size, flags) \
(__efi64_split(phys), __efi64_split(size), __efi64_split(flags))

+/* Memory Attribute Protocol */
+#define __efi64_argmap_set_memory_attributes(protocol, phys, size, flags) \
+ ((protocol), __efi64_split(phys), __efi64_split(size), __efi64_split(flags))
+
+#define __efi64_argmap_clear_memory_attributes(protocol, phys, size, flags) \
+ ((protocol), __efi64_split(phys), __efi64_split(size), __efi64_split(flags))
+
/*
* The macros below handle the plumbing for the argument mapping. To add a
* mapping for a specific EFI method, simply define a macro
diff --git a/arch/x86/include/asm/entry-common.h b/arch/x86/include/asm/entry-common.h
index 11203a9fe0a8..ffe72790ceaf 100644
--- a/arch/x86/include/asm/entry-common.h
+++ b/arch/x86/include/asm/entry-common.h
@@ -91,7 +91,6 @@ static inline void arch_exit_to_user_mode_prepare(struct pt_regs *regs,

static __always_inline void arch_exit_to_user_mode(void)
{
- mds_user_clear_cpu_buffers();
amd_clear_divider();
}
#define arch_exit_to_user_mode arch_exit_to_user_mode
diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h
index d3706de91a93..8f6f17a8617b 100644
--- a/arch/x86/include/asm/nospec-branch.h
+++ b/arch/x86/include/asm/nospec-branch.h
@@ -194,6 +194,19 @@
#endif
.endm

+/*
+ * Macro to execute VERW instruction that mitigate transient data sampling
+ * attacks such as MDS. On affected systems a microcode update overloaded VERW
+ * instruction to also clear the CPU buffers. VERW clobbers CFLAGS.ZF.
+ *
+ * Note: Only the memory operand variant of VERW clears the CPU buffers.
+ */
+.macro CLEAR_CPU_BUFFERS
+ ALTERNATIVE "jmp .Lskip_verw_\@", "", X86_FEATURE_CLEAR_CPU_BUF
+ verw _ASM_RIP(mds_verw_sel)
+.Lskip_verw_\@:
+.endm
+
#else /* __ASSEMBLY__ */

#define ANNOTATE_RETPOLINE_SAFE \
@@ -368,13 +381,14 @@ DECLARE_STATIC_KEY_FALSE(switch_to_cond_stibp);
DECLARE_STATIC_KEY_FALSE(switch_mm_cond_ibpb);
DECLARE_STATIC_KEY_FALSE(switch_mm_always_ibpb);

-DECLARE_STATIC_KEY_FALSE(mds_user_clear);
DECLARE_STATIC_KEY_FALSE(mds_idle_clear);

DECLARE_STATIC_KEY_FALSE(switch_mm_cond_l1d_flush);

DECLARE_STATIC_KEY_FALSE(mmio_stale_data_clear);

+extern u16 mds_verw_sel;
+
#include <asm/segment.h>

/**
@@ -400,17 +414,6 @@ static __always_inline void mds_clear_cpu_buffers(void)
asm volatile("verw %[ds]" : : [ds] "m" (ds) : "cc");
}

-/**
- * mds_user_clear_cpu_buffers - Mitigation for MDS and TAA vulnerability
- *
- * Clear CPU buffers if the corresponding static key is enabled
- */
-static __always_inline void mds_user_clear_cpu_buffers(void)
-{
- if (static_branch_likely(&mds_user_clear))
- mds_clear_cpu_buffers();
-}
-
/**
* mds_idle_clear_cpu_buffers - Mitigation for MDS vulnerability
*
diff --git a/arch/x86/include/asm/sev.h b/arch/x86/include/asm/sev.h
index 7ca5c9ec8b52..cf98fc28601f 100644
--- a/arch/x86/include/asm/sev.h
+++ b/arch/x86/include/asm/sev.h
@@ -157,6 +157,7 @@ static __always_inline void sev_es_nmi_complete(void)
__sev_es_nmi_complete();
}
extern int __init sev_es_efi_map_ghcbs(pgd_t *pgd);
+extern void sev_enable(struct boot_params *bp);

static inline int rmpadjust(unsigned long vaddr, bool rmp_psize, unsigned long attrs)
{
@@ -202,12 +203,15 @@ void snp_set_wakeup_secondary_cpu(void);
bool snp_init(struct boot_params *bp);
void __init __noreturn snp_abort(void);
int snp_issue_guest_request(u64 exit_code, struct snp_req_data *input, struct snp_guest_request_ioctl *rio);
+u64 snp_get_unsupported_features(u64 status);
+u64 sev_get_status(void);
#else
static inline void sev_es_ist_enter(struct pt_regs *regs) { }
static inline void sev_es_ist_exit(void) { }
static inline int sev_es_setup_ap_jump_table(struct real_mode_header *rmh) { return 0; }
static inline void sev_es_nmi_complete(void) { }
static inline int sev_es_efi_map_ghcbs(pgd_t *pgd) { return 0; }
+static inline void sev_enable(struct boot_params *bp) { }
static inline int pvalidate(unsigned long vaddr, bool rmp_psize, bool validate) { return 0; }
static inline int rmpadjust(unsigned long vaddr, bool rmp_psize, unsigned long attrs) { return 0; }
static inline void setup_ghcb(void) { }
@@ -225,6 +229,9 @@ static inline int snp_issue_guest_request(u64 exit_code, struct snp_req_data *in
{
return -ENOTTY;
}
+
+static inline u64 snp_get_unsupported_features(u64 status) { return 0; }
+static inline u64 sev_get_status(void) { return 0; }
#endif

#endif
diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c
index 13dffc43ded0..d1895930e6eb 100644
--- a/arch/x86/kernel/cpu/bugs.c
+++ b/arch/x86/kernel/cpu/bugs.c
@@ -110,9 +110,6 @@ DEFINE_STATIC_KEY_FALSE(switch_mm_cond_ibpb);
/* Control unconditional IBPB in switch_mm() */
DEFINE_STATIC_KEY_FALSE(switch_mm_always_ibpb);

-/* Control MDS CPU buffer clear before returning to user space */
-DEFINE_STATIC_KEY_FALSE(mds_user_clear);
-EXPORT_SYMBOL_GPL(mds_user_clear);
/* Control MDS CPU buffer clear before idling (halt, mwait) */
DEFINE_STATIC_KEY_FALSE(mds_idle_clear);
EXPORT_SYMBOL_GPL(mds_idle_clear);
@@ -251,7 +248,7 @@ static void __init mds_select_mitigation(void)
if (!boot_cpu_has(X86_FEATURE_MD_CLEAR))
mds_mitigation = MDS_MITIGATION_VMWERV;

- static_branch_enable(&mds_user_clear);
+ setup_force_cpu_cap(X86_FEATURE_CLEAR_CPU_BUF);

if (!boot_cpu_has(X86_BUG_MSBDS_ONLY) &&
(mds_nosmt || cpu_mitigations_auto_nosmt()))
@@ -355,7 +352,7 @@ static void __init taa_select_mitigation(void)
* For guests that can't determine whether the correct microcode is
* present on host, enable the mitigation for UCODE_NEEDED as well.
*/
- static_branch_enable(&mds_user_clear);
+ setup_force_cpu_cap(X86_FEATURE_CLEAR_CPU_BUF);

if (taa_nosmt || cpu_mitigations_auto_nosmt())
cpu_smt_disable(false);
@@ -423,7 +420,7 @@ static void __init mmio_select_mitigation(void)
*/
if (boot_cpu_has_bug(X86_BUG_MDS) || (boot_cpu_has_bug(X86_BUG_TAA) &&
boot_cpu_has(X86_FEATURE_RTM)))
- static_branch_enable(&mds_user_clear);
+ setup_force_cpu_cap(X86_FEATURE_CLEAR_CPU_BUF);
else
static_branch_enable(&mmio_stale_data_clear);

@@ -483,12 +480,12 @@ static void __init md_clear_update_mitigation(void)
if (cpu_mitigations_off())
return;

- if (!static_key_enabled(&mds_user_clear))
+ if (!boot_cpu_has(X86_FEATURE_CLEAR_CPU_BUF))
goto out;

/*
- * mds_user_clear is now enabled. Update MDS, TAA and MMIO Stale Data
- * mitigation, if necessary.
+ * X86_FEATURE_CLEAR_CPU_BUF is now enabled. Update MDS, TAA and MMIO
+ * Stale Data mitigation, if necessary.
*/
if (mds_mitigation == MDS_MITIGATION_OFF &&
boot_cpu_has_bug(X86_BUG_MDS)) {
diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c
index 427899650483..32bd64017047 100644
--- a/arch/x86/kernel/cpu/intel.c
+++ b/arch/x86/kernel/cpu/intel.c
@@ -216,6 +216,90 @@ int intel_cpu_collect_info(struct ucode_cpu_info *uci)
}
EXPORT_SYMBOL_GPL(intel_cpu_collect_info);

+#define MSR_IA32_TME_ACTIVATE 0x982
+
+/* Helpers to access TME_ACTIVATE MSR */
+#define TME_ACTIVATE_LOCKED(x) (x & 0x1)
+#define TME_ACTIVATE_ENABLED(x) (x & 0x2)
+
+#define TME_ACTIVATE_POLICY(x) ((x >> 4) & 0xf) /* Bits 7:4 */
+#define TME_ACTIVATE_POLICY_AES_XTS_128 0
+
+#define TME_ACTIVATE_KEYID_BITS(x) ((x >> 32) & 0xf) /* Bits 35:32 */
+
+#define TME_ACTIVATE_CRYPTO_ALGS(x) ((x >> 48) & 0xffff) /* Bits 63:48 */
+#define TME_ACTIVATE_CRYPTO_AES_XTS_128 1
+
+/* Values for mktme_status (SW only construct) */
+#define MKTME_ENABLED 0
+#define MKTME_DISABLED 1
+#define MKTME_UNINITIALIZED 2
+static int mktme_status = MKTME_UNINITIALIZED;
+
+static void detect_tme_early(struct cpuinfo_x86 *c)
+{
+ u64 tme_activate, tme_policy, tme_crypto_algs;
+ int keyid_bits = 0, nr_keyids = 0;
+ static u64 tme_activate_cpu0 = 0;
+
+ rdmsrl(MSR_IA32_TME_ACTIVATE, tme_activate);
+
+ if (mktme_status != MKTME_UNINITIALIZED) {
+ if (tme_activate != tme_activate_cpu0) {
+ /* Broken BIOS? */
+ pr_err_once("x86/tme: configuration is inconsistent between CPUs\n");
+ pr_err_once("x86/tme: MKTME is not usable\n");
+ mktme_status = MKTME_DISABLED;
+
+ /* Proceed. We may need to exclude bits from x86_phys_bits. */
+ }
+ } else {
+ tme_activate_cpu0 = tme_activate;
+ }
+
+ if (!TME_ACTIVATE_LOCKED(tme_activate) || !TME_ACTIVATE_ENABLED(tme_activate)) {
+ pr_info_once("x86/tme: not enabled by BIOS\n");
+ mktme_status = MKTME_DISABLED;
+ return;
+ }
+
+ if (mktme_status != MKTME_UNINITIALIZED)
+ goto detect_keyid_bits;
+
+ pr_info("x86/tme: enabled by BIOS\n");
+
+ tme_policy = TME_ACTIVATE_POLICY(tme_activate);
+ if (tme_policy != TME_ACTIVATE_POLICY_AES_XTS_128)
+ pr_warn("x86/tme: Unknown policy is active: %#llx\n", tme_policy);
+
+ tme_crypto_algs = TME_ACTIVATE_CRYPTO_ALGS(tme_activate);
+ if (!(tme_crypto_algs & TME_ACTIVATE_CRYPTO_AES_XTS_128)) {
+ pr_err("x86/mktme: No known encryption algorithm is supported: %#llx\n",
+ tme_crypto_algs);
+ mktme_status = MKTME_DISABLED;
+ }
+detect_keyid_bits:
+ keyid_bits = TME_ACTIVATE_KEYID_BITS(tme_activate);
+ nr_keyids = (1UL << keyid_bits) - 1;
+ if (nr_keyids) {
+ pr_info_once("x86/mktme: enabled by BIOS\n");
+ pr_info_once("x86/mktme: %d KeyIDs available\n", nr_keyids);
+ } else {
+ pr_info_once("x86/mktme: disabled by BIOS\n");
+ }
+
+ if (mktme_status == MKTME_UNINITIALIZED) {
+ /* MKTME is usable */
+ mktme_status = MKTME_ENABLED;
+ }
+
+ /*
+ * KeyID bits effectively lower the number of physical address
+ * bits. Update cpuinfo_x86::x86_phys_bits accordingly.
+ */
+ c->x86_phys_bits -= keyid_bits;
+}
+
static void early_init_intel(struct cpuinfo_x86 *c)
{
u64 misc_enable;
@@ -367,6 +451,13 @@ static void early_init_intel(struct cpuinfo_x86 *c)
*/
if (detect_extended_topology_early(c) < 0)
detect_ht_early(c);
+
+ /*
+ * Adjust the number of physical bits early because it affects the
+ * valid bits of the MTRR mask registers.
+ */
+ if (cpu_has(c, X86_FEATURE_TME))
+ detect_tme_early(c);
}

static void bsp_init_intel(struct cpuinfo_x86 *c)
@@ -527,90 +618,6 @@ static void srat_detect_node(struct cpuinfo_x86 *c)
#endif
}

-#define MSR_IA32_TME_ACTIVATE 0x982
-
-/* Helpers to access TME_ACTIVATE MSR */
-#define TME_ACTIVATE_LOCKED(x) (x & 0x1)
-#define TME_ACTIVATE_ENABLED(x) (x & 0x2)
-
-#define TME_ACTIVATE_POLICY(x) ((x >> 4) & 0xf) /* Bits 7:4 */
-#define TME_ACTIVATE_POLICY_AES_XTS_128 0
-
-#define TME_ACTIVATE_KEYID_BITS(x) ((x >> 32) & 0xf) /* Bits 35:32 */
-
-#define TME_ACTIVATE_CRYPTO_ALGS(x) ((x >> 48) & 0xffff) /* Bits 63:48 */
-#define TME_ACTIVATE_CRYPTO_AES_XTS_128 1
-
-/* Values for mktme_status (SW only construct) */
-#define MKTME_ENABLED 0
-#define MKTME_DISABLED 1
-#define MKTME_UNINITIALIZED 2
-static int mktme_status = MKTME_UNINITIALIZED;
-
-static void detect_tme(struct cpuinfo_x86 *c)
-{
- u64 tme_activate, tme_policy, tme_crypto_algs;
- int keyid_bits = 0, nr_keyids = 0;
- static u64 tme_activate_cpu0 = 0;
-
- rdmsrl(MSR_IA32_TME_ACTIVATE, tme_activate);
-
- if (mktme_status != MKTME_UNINITIALIZED) {
- if (tme_activate != tme_activate_cpu0) {
- /* Broken BIOS? */
- pr_err_once("x86/tme: configuration is inconsistent between CPUs\n");
- pr_err_once("x86/tme: MKTME is not usable\n");
- mktme_status = MKTME_DISABLED;
-
- /* Proceed. We may need to exclude bits from x86_phys_bits. */
- }
- } else {
- tme_activate_cpu0 = tme_activate;
- }
-
- if (!TME_ACTIVATE_LOCKED(tme_activate) || !TME_ACTIVATE_ENABLED(tme_activate)) {
- pr_info_once("x86/tme: not enabled by BIOS\n");
- mktme_status = MKTME_DISABLED;
- return;
- }
-
- if (mktme_status != MKTME_UNINITIALIZED)
- goto detect_keyid_bits;
-
- pr_info("x86/tme: enabled by BIOS\n");
-
- tme_policy = TME_ACTIVATE_POLICY(tme_activate);
- if (tme_policy != TME_ACTIVATE_POLICY_AES_XTS_128)
- pr_warn("x86/tme: Unknown policy is active: %#llx\n", tme_policy);
-
- tme_crypto_algs = TME_ACTIVATE_CRYPTO_ALGS(tme_activate);
- if (!(tme_crypto_algs & TME_ACTIVATE_CRYPTO_AES_XTS_128)) {
- pr_err("x86/mktme: No known encryption algorithm is supported: %#llx\n",
- tme_crypto_algs);
- mktme_status = MKTME_DISABLED;
- }
-detect_keyid_bits:
- keyid_bits = TME_ACTIVATE_KEYID_BITS(tme_activate);
- nr_keyids = (1UL << keyid_bits) - 1;
- if (nr_keyids) {
- pr_info_once("x86/mktme: enabled by BIOS\n");
- pr_info_once("x86/mktme: %d KeyIDs available\n", nr_keyids);
- } else {
- pr_info_once("x86/mktme: disabled by BIOS\n");
- }
-
- if (mktme_status == MKTME_UNINITIALIZED) {
- /* MKTME is usable */
- mktme_status = MKTME_ENABLED;
- }
-
- /*
- * KeyID bits effectively lower the number of physical address
- * bits. Update cpuinfo_x86::x86_phys_bits accordingly.
- */
- c->x86_phys_bits -= keyid_bits;
-}
-
static void init_cpuid_fault(struct cpuinfo_x86 *c)
{
u64 msr;
@@ -747,9 +754,6 @@ static void init_intel(struct cpuinfo_x86 *c)

init_ia32_feat_ctl(c);

- if (cpu_has(c, X86_FEATURE_TME))
- detect_tme(c);
-
init_intel_misc_features(c);

split_lock_init();
diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c
index 9dac24680ff8..993734e96615 100644
--- a/arch/x86/kernel/e820.c
+++ b/arch/x86/kernel/e820.c
@@ -1017,10 +1017,12 @@ void __init e820__reserve_setup_data(void)
e820__range_update(pa_data, sizeof(*data)+data->len, E820_TYPE_RAM, E820_TYPE_RESERVED_KERN);

/*
- * SETUP_EFI and SETUP_IMA are supplied by kexec and do not need
- * to be reserved.
+ * SETUP_EFI, SETUP_IMA and SETUP_RNG_SEED are supplied by
+ * kexec and do not need to be reserved.
*/
- if (data->type != SETUP_EFI && data->type != SETUP_IMA)
+ if (data->type != SETUP_EFI &&
+ data->type != SETUP_IMA &&
+ data->type != SETUP_RNG_SEED)
e820__range_update_kexec(pa_data,
sizeof(*data) + data->len,
E820_TYPE_RAM, E820_TYPE_RESERVED_KERN);
diff --git a/arch/x86/kernel/nmi.c b/arch/x86/kernel/nmi.c
index cec0bfa3bc04..ed6cce6c3950 100644
--- a/arch/x86/kernel/nmi.c
+++ b/arch/x86/kernel/nmi.c
@@ -522,9 +522,6 @@ DEFINE_IDTENTRY_RAW(exc_nmi)
write_cr2(this_cpu_read(nmi_cr2));
if (this_cpu_dec_return(nmi_state))
goto nmi_restart;
-
- if (user_mode(regs))
- mds_user_clear_cpu_buffers();
}

#if defined(CONFIG_X86_64) && IS_ENABLED(CONFIG_KVM_INTEL)
diff --git a/arch/x86/kvm/vmx/run_flags.h b/arch/x86/kvm/vmx/run_flags.h
index edc3f16cc189..6a9bfdfbb6e5 100644
--- a/arch/x86/kvm/vmx/run_flags.h
+++ b/arch/x86/kvm/vmx/run_flags.h
@@ -2,7 +2,10 @@
#ifndef __KVM_X86_VMX_RUN_FLAGS_H
#define __KVM_X86_VMX_RUN_FLAGS_H

-#define VMX_RUN_VMRESUME (1 << 0)
-#define VMX_RUN_SAVE_SPEC_CTRL (1 << 1)
+#define VMX_RUN_VMRESUME_SHIFT 0
+#define VMX_RUN_SAVE_SPEC_CTRL_SHIFT 1
+
+#define VMX_RUN_VMRESUME BIT(VMX_RUN_VMRESUME_SHIFT)
+#define VMX_RUN_SAVE_SPEC_CTRL BIT(VMX_RUN_SAVE_SPEC_CTRL_SHIFT)

#endif /* __KVM_X86_VMX_RUN_FLAGS_H */
diff --git a/arch/x86/kvm/vmx/vmenter.S b/arch/x86/kvm/vmx/vmenter.S
index 0b5db4de4d09..0b2cad66dee1 100644
--- a/arch/x86/kvm/vmx/vmenter.S
+++ b/arch/x86/kvm/vmx/vmenter.S
@@ -106,7 +106,7 @@ SYM_FUNC_START(__vmx_vcpu_run)
mov (%_ASM_SP), %_ASM_AX

/* Check if vmlaunch or vmresume is needed */
- testb $VMX_RUN_VMRESUME, %bl
+ bt $VMX_RUN_VMRESUME_SHIFT, %bx

/* Load guest registers. Don't clobber flags. */
mov VCPU_RCX(%_ASM_AX), %_ASM_CX
@@ -128,8 +128,11 @@ SYM_FUNC_START(__vmx_vcpu_run)
/* Load guest RAX. This kills the @regs pointer! */
mov VCPU_RAX(%_ASM_AX), %_ASM_AX

- /* Check EFLAGS.ZF from 'testb' above */
- jz .Lvmlaunch
+ /* Clobbers EFLAGS.ZF */
+ CLEAR_CPU_BUFFERS
+
+ /* Check EFLAGS.CF from the VMX_RUN_VMRESUME bit test above. */
+ jnc .Lvmlaunch

/*
* After a successful VMRESUME/VMLAUNCH, control flow "magically"
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index 57c1374fdfd4..5c1590855ffc 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -407,7 +407,8 @@ static __always_inline void vmx_enable_fb_clear(struct vcpu_vmx *vmx)

static void vmx_update_fb_clear_dis(struct kvm_vcpu *vcpu, struct vcpu_vmx *vmx)
{
- vmx->disable_fb_clear = vmx_fb_clear_ctrl_available;
+ vmx->disable_fb_clear = !cpu_feature_enabled(X86_FEATURE_CLEAR_CPU_BUF) &&
+ vmx_fb_clear_ctrl_available;

/*
* If guest will not execute VERW, there is no need to set FB_CLEAR_DIS
@@ -7120,11 +7121,14 @@ static noinstr void vmx_vcpu_enter_exit(struct kvm_vcpu *vcpu,
{
guest_state_enter_irqoff();

- /* L1D Flush includes CPU buffer clear to mitigate MDS */
+ /*
+ * L1D Flush includes CPU buffer clear to mitigate MDS, but VERW
+ * mitigation for MDS is done late in VMentry and is still
+ * executed in spite of L1D Flush. This is because an extra VERW
+ * should not matter much after the big hammer L1D Flush.
+ */
if (static_branch_unlikely(&vmx_l1d_should_flush))
vmx_l1d_flush(vcpu);
- else if (static_branch_unlikely(&mds_user_clear))
- mds_clear_cpu_buffers();
else if (static_branch_unlikely(&mmio_stale_data_clear) &&
kvm_arch_has_assigned_device(vcpu->kvm))
mds_clear_cpu_buffers();
diff --git a/drivers/bluetooth/btqca.c b/drivers/bluetooth/btqca.c
index c9064d34d830..0211f704a358 100644
--- a/drivers/bluetooth/btqca.c
+++ b/drivers/bluetooth/btqca.c
@@ -152,7 +152,7 @@ static int qca_send_patch_config_cmd(struct hci_dev *hdev)
bt_dev_dbg(hdev, "QCA Patch config");

skb = __hci_cmd_sync_ev(hdev, EDL_PATCH_CMD_OPCODE, sizeof(cmd),
- cmd, HCI_EV_VENDOR, HCI_INIT_TIMEOUT);
+ cmd, 0, HCI_INIT_TIMEOUT);
if (IS_ERR(skb)) {
err = PTR_ERR(skb);
bt_dev_err(hdev, "Sending QCA Patch config failed (%d)", err);
@@ -594,27 +594,48 @@ int qca_uart_setup(struct hci_dev *hdev, uint8_t baudrate,
/* Firmware files to download are based on ROM version.
* ROM version is derived from last two bytes of soc_ver.
*/
- rom_ver = ((soc_ver & 0x00000f00) >> 0x04) | (soc_ver & 0x0000000f);
+ if (soc_type == QCA_WCN3988)
+ rom_ver = ((soc_ver & 0x00000f00) >> 0x05) | (soc_ver & 0x0000000f);
+ else
+ rom_ver = ((soc_ver & 0x00000f00) >> 0x04) | (soc_ver & 0x0000000f);

if (soc_type == QCA_WCN6750)
qca_send_patch_config_cmd(hdev);

/* Download rampatch file */
config.type = TLV_TYPE_PATCH;
- if (qca_is_wcn399x(soc_type)) {
+ switch (soc_type) {
+ case QCA_WCN3990:
+ case QCA_WCN3991:
+ case QCA_WCN3998:
snprintf(config.fwname, sizeof(config.fwname),
"qca/crbtfw%02x.tlv", rom_ver);
- } else if (soc_type == QCA_QCA6390) {
+ break;
+ case QCA_WCN3988:
+ snprintf(config.fwname, sizeof(config.fwname),
+ "qca/apbtfw%02x.tlv", rom_ver);
+ break;
+ case QCA_QCA6390:
snprintf(config.fwname, sizeof(config.fwname),
"qca/htbtfw%02x.tlv", rom_ver);
- } else if (soc_type == QCA_WCN6750) {
+ break;
+ case QCA_WCN6750:
/* Choose mbn file by default.If mbn file is not found
* then choose tlv file
*/
config.type = ELF_TYPE_PATCH;
snprintf(config.fwname, sizeof(config.fwname),
"qca/msbtfw%02x.mbn", rom_ver);
- } else {
+ break;
+ case QCA_WCN6855:
+ snprintf(config.fwname, sizeof(config.fwname),
+ "qca/hpbtfw%02x.tlv", rom_ver);
+ break;
+ case QCA_WCN7850:
+ snprintf(config.fwname, sizeof(config.fwname),
+ "qca/hmtbtfw%02x.tlv", rom_ver);
+ break;
+ default:
snprintf(config.fwname, sizeof(config.fwname),
"qca/rampatch_%08x.bin", soc_ver);
}
@@ -630,27 +651,48 @@ int qca_uart_setup(struct hci_dev *hdev, uint8_t baudrate,

/* Download NVM configuration */
config.type = TLV_TYPE_NVM;
- if (firmware_name)
+ if (firmware_name) {
snprintf(config.fwname, sizeof(config.fwname),
"qca/%s", firmware_name);
- else if (qca_is_wcn399x(soc_type)) {
- if (ver.soc_id == QCA_WCN3991_SOC_ID) {
+ } else {
+ switch (soc_type) {
+ case QCA_WCN3990:
+ case QCA_WCN3991:
+ case QCA_WCN3998:
+ if (le32_to_cpu(ver.soc_id) == QCA_WCN3991_SOC_ID) {
+ snprintf(config.fwname, sizeof(config.fwname),
+ "qca/crnv%02xu.bin", rom_ver);
+ } else {
+ snprintf(config.fwname, sizeof(config.fwname),
+ "qca/crnv%02x.bin", rom_ver);
+ }
+ break;
+ case QCA_WCN3988:
snprintf(config.fwname, sizeof(config.fwname),
- "qca/crnv%02xu.bin", rom_ver);
- } else {
+ "qca/apnv%02x.bin", rom_ver);
+ break;
+ case QCA_QCA6390:
+ snprintf(config.fwname, sizeof(config.fwname),
+ "qca/htnv%02x.bin", rom_ver);
+ break;
+ case QCA_WCN6750:
snprintf(config.fwname, sizeof(config.fwname),
- "qca/crnv%02x.bin", rom_ver);
+ "qca/msnv%02x.bin", rom_ver);
+ break;
+ case QCA_WCN6855:
+ snprintf(config.fwname, sizeof(config.fwname),
+ "qca/hpnv%02x.bin", rom_ver);
+ break;
+ case QCA_WCN7850:
+ snprintf(config.fwname, sizeof(config.fwname),
+ "qca/hmtnv%02x.bin", rom_ver);
+ break;
+
+ default:
+ snprintf(config.fwname, sizeof(config.fwname),
+ "qca/nvm_%08x.bin", soc_ver);
}
}
- else if (soc_type == QCA_QCA6390)
- snprintf(config.fwname, sizeof(config.fwname),
- "qca/htnv%02x.bin", rom_ver);
- else if (soc_type == QCA_WCN6750)
- snprintf(config.fwname, sizeof(config.fwname),
- "qca/msnv%02x.bin", rom_ver);
- else
- snprintf(config.fwname, sizeof(config.fwname),
- "qca/nvm_%08x.bin", soc_ver);

err = qca_download_firmware(hdev, &config, soc_type, rom_ver);
if (err < 0) {
@@ -658,16 +700,25 @@ int qca_uart_setup(struct hci_dev *hdev, uint8_t baudrate,
return err;
}

- if (soc_type >= QCA_WCN3991) {
+ switch (soc_type) {
+ case QCA_WCN3991:
+ case QCA_QCA6390:
+ case QCA_WCN6750:
+ case QCA_WCN6855:
+ case QCA_WCN7850:
err = qca_disable_soc_logging(hdev);
if (err < 0)
return err;
+ break;
+ default:
+ break;
}

/* WCN399x and WCN6750 supports the Microsoft vendor extension with 0xFD70 as the
* VsMsftOpCode.
*/
switch (soc_type) {
+ case QCA_WCN3988:
case QCA_WCN3990:
case QCA_WCN3991:
case QCA_WCN3998:
@@ -685,11 +736,18 @@ int qca_uart_setup(struct hci_dev *hdev, uint8_t baudrate,
return err;
}

- if (soc_type == QCA_WCN3991 || soc_type == QCA_WCN6750) {
+ switch (soc_type) {
+ case QCA_WCN3991:
+ case QCA_WCN6750:
+ case QCA_WCN6855:
+ case QCA_WCN7850:
/* get fw build info */
err = qca_read_fw_build_info(hdev);
if (err < 0)
return err;
+ break;
+ default:
+ break;
}

bt_dev_info(hdev, "QCA setup on UART is completed");
diff --git a/drivers/bluetooth/btqca.h b/drivers/bluetooth/btqca.h
index 61e9a50e66ae..03bff5c0059d 100644
--- a/drivers/bluetooth/btqca.h
+++ b/drivers/bluetooth/btqca.h
@@ -142,11 +142,14 @@ enum qca_btsoc_type {
QCA_INVALID = -1,
QCA_AR3002,
QCA_ROME,
+ QCA_WCN3988,
QCA_WCN3990,
QCA_WCN3998,
QCA_WCN3991,
QCA_QCA6390,
QCA_WCN6750,
+ QCA_WCN6855,
+ QCA_WCN7850,
};

#if IS_ENABLED(CONFIG_BT_QCA)
@@ -159,16 +162,6 @@ int qca_read_soc_version(struct hci_dev *hdev, struct qca_btsoc_version *ver,
enum qca_btsoc_type);
int qca_set_bdaddr(struct hci_dev *hdev, const bdaddr_t *bdaddr);
int qca_send_pre_shutdown_cmd(struct hci_dev *hdev);
-static inline bool qca_is_wcn399x(enum qca_btsoc_type soc_type)
-{
- return soc_type == QCA_WCN3990 || soc_type == QCA_WCN3991 ||
- soc_type == QCA_WCN3998;
-}
-static inline bool qca_is_wcn6750(enum qca_btsoc_type soc_type)
-{
- return soc_type == QCA_WCN6750;
-}
-
#else

static inline int qca_set_bdaddr_rome(struct hci_dev *hdev, const bdaddr_t *bdaddr)
@@ -196,16 +189,6 @@ static inline int qca_set_bdaddr(struct hci_dev *hdev, const bdaddr_t *bdaddr)
return -EOPNOTSUPP;
}

-static inline bool qca_is_wcn399x(enum qca_btsoc_type soc_type)
-{
- return false;
-}
-
-static inline bool qca_is_wcn6750(enum qca_btsoc_type soc_type)
-{
- return false;
-}
-
static inline int qca_send_pre_shutdown_cmd(struct hci_dev *hdev)
{
return -EOPNOTSUPP;
diff --git a/drivers/bluetooth/hci_qca.c b/drivers/bluetooth/hci_qca.c
index 76ceb8a0183d..8bfef7f81b41 100644
--- a/drivers/bluetooth/hci_qca.c
+++ b/drivers/bluetooth/hci_qca.c
@@ -7,6 +7,7 @@
*
* Copyright (C) 2007 Texas Instruments, Inc.
* Copyright (c) 2010, 2012, 2018 The Linux Foundation. All rights reserved.
+ * Copyright (c) 2023 Qualcomm Innovation Center, Inc. All rights reserved.
*
* Acknowledgements:
* This file is based on hci_ll.c, which was...
@@ -606,9 +607,18 @@ static int qca_open(struct hci_uart *hu)
if (hu->serdev) {
qcadev = serdev_device_get_drvdata(hu->serdev);

- if (qca_is_wcn399x(qcadev->btsoc_type) ||
- qca_is_wcn6750(qcadev->btsoc_type))
+ switch (qcadev->btsoc_type) {
+ case QCA_WCN3988:
+ case QCA_WCN3990:
+ case QCA_WCN3991:
+ case QCA_WCN3998:
+ case QCA_WCN6750:
hu->init_speed = qcadev->init_speed;
+ break;
+
+ default:
+ break;
+ }

if (qcadev->oper_speed)
hu->oper_speed = qcadev->oper_speed;
@@ -1314,11 +1324,20 @@ static int qca_set_baudrate(struct hci_dev *hdev, uint8_t baudrate)
msecs_to_jiffies(CMD_TRANS_TIMEOUT_MS));

/* Give the controller time to process the request */
- if (qca_is_wcn399x(qca_soc_type(hu)) ||
- qca_is_wcn6750(qca_soc_type(hu)))
+ switch (qca_soc_type(hu)) {
+ case QCA_WCN3988:
+ case QCA_WCN3990:
+ case QCA_WCN3991:
+ case QCA_WCN3998:
+ case QCA_WCN6750:
+ case QCA_WCN6855:
+ case QCA_WCN7850:
usleep_range(1000, 10000);
- else
+ break;
+
+ default:
msleep(300);
+ }

return 0;
}
@@ -1391,12 +1410,20 @@ static unsigned int qca_get_speed(struct hci_uart *hu,

static int qca_check_speeds(struct hci_uart *hu)
{
- if (qca_is_wcn399x(qca_soc_type(hu)) ||
- qca_is_wcn6750(qca_soc_type(hu))) {
+ switch (qca_soc_type(hu)) {
+ case QCA_WCN3988:
+ case QCA_WCN3990:
+ case QCA_WCN3991:
+ case QCA_WCN3998:
+ case QCA_WCN6750:
+ case QCA_WCN6855:
+ case QCA_WCN7850:
if (!qca_get_speed(hu, QCA_INIT_SPEED) &&
!qca_get_speed(hu, QCA_OPER_SPEED))
return -EINVAL;
- } else {
+ break;
+
+ default:
if (!qca_get_speed(hu, QCA_INIT_SPEED) ||
!qca_get_speed(hu, QCA_OPER_SPEED))
return -EINVAL;
@@ -1425,13 +1452,29 @@ static int qca_set_speed(struct hci_uart *hu, enum qca_speed_type speed_type)
/* Disable flow control for wcn3990 to deassert RTS while
* changing the baudrate of chip and host.
*/
- if (qca_is_wcn399x(soc_type) ||
- qca_is_wcn6750(soc_type))
+ switch (soc_type) {
+ case QCA_WCN3988:
+ case QCA_WCN3990:
+ case QCA_WCN3991:
+ case QCA_WCN3998:
+ case QCA_WCN6750:
+ case QCA_WCN6855:
+ case QCA_WCN7850:
hci_uart_set_flow_control(hu, true);
+ break;

- if (soc_type == QCA_WCN3990) {
+ default:
+ break;
+ }
+
+ switch (soc_type) {
+ case QCA_WCN3990:
reinit_completion(&qca->drop_ev_comp);
set_bit(QCA_DROP_VENDOR_EVENT, &qca->flags);
+ break;
+
+ default:
+ break;
}

qca_baudrate = qca_get_baudrate_value(speed);
@@ -1443,11 +1486,23 @@ static int qca_set_speed(struct hci_uart *hu, enum qca_speed_type speed_type)
host_set_baudrate(hu, speed);

error:
- if (qca_is_wcn399x(soc_type) ||
- qca_is_wcn6750(soc_type))
+ switch (soc_type) {
+ case QCA_WCN3988:
+ case QCA_WCN3990:
+ case QCA_WCN3991:
+ case QCA_WCN3998:
+ case QCA_WCN6750:
+ case QCA_WCN6855:
+ case QCA_WCN7850:
hci_uart_set_flow_control(hu, false);
+ break;

- if (soc_type == QCA_WCN3990) {
+ default:
+ break;
+ }
+
+ switch (soc_type) {
+ case QCA_WCN3990:
/* Wait for the controller to send the vendor event
* for the baudrate change command.
*/
@@ -1459,6 +1514,10 @@ static int qca_set_speed(struct hci_uart *hu, enum qca_speed_type speed_type)
}

clear_bit(QCA_DROP_VENDOR_EVENT, &qca->flags);
+ break;
+
+ default:
+ break;
}
}

@@ -1620,12 +1679,20 @@ static int qca_regulator_init(struct hci_uart *hu)
}
}

- if (qca_is_wcn399x(soc_type)) {
+ switch (soc_type) {
+ case QCA_WCN3988:
+ case QCA_WCN3990:
+ case QCA_WCN3991:
+ case QCA_WCN3998:
/* Forcefully enable wcn399x to enter in to boot mode. */
host_set_baudrate(hu, 2400);
ret = qca_send_power_pulse(hu, false);
if (ret)
return ret;
+ break;
+
+ default:
+ break;
}

/* For wcn6750 need to enable gpio bt_en */
@@ -1642,10 +1709,18 @@ static int qca_regulator_init(struct hci_uart *hu)

qca_set_speed(hu, QCA_INIT_SPEED);

- if (qca_is_wcn399x(soc_type)) {
+ switch (soc_type) {
+ case QCA_WCN3988:
+ case QCA_WCN3990:
+ case QCA_WCN3991:
+ case QCA_WCN3998:
ret = qca_send_power_pulse(hu, true);
if (ret)
return ret;
+ break;
+
+ default:
+ break;
}

/* Now the device is in ready state to communicate with host.
@@ -1679,10 +1754,18 @@ static int qca_power_on(struct hci_dev *hdev)
if (!hu->serdev)
return 0;

- if (qca_is_wcn399x(soc_type) ||
- qca_is_wcn6750(soc_type)) {
+ switch (soc_type) {
+ case QCA_WCN3988:
+ case QCA_WCN3990:
+ case QCA_WCN3991:
+ case QCA_WCN3998:
+ case QCA_WCN6750:
+ case QCA_WCN6855:
+ case QCA_WCN7850:
ret = qca_regulator_init(hu);
- } else {
+ break;
+
+ default:
qcadev = serdev_device_get_drvdata(hu->serdev);
if (qcadev->bt_en) {
gpiod_set_value_cansleep(qcadev->bt_en, 1);
@@ -1705,6 +1788,7 @@ static int qca_setup(struct hci_uart *hu)
const char *firmware_name = qca_get_firmware_name(hu);
int ret;
struct qca_btsoc_version ver;
+ const char *soc_name;

ret = qca_check_speeds(hu);
if (ret)
@@ -1719,9 +1803,30 @@ static int qca_setup(struct hci_uart *hu)
*/
set_bit(HCI_QUIRK_SIMULTANEOUS_DISCOVERY, &hdev->quirks);

- bt_dev_info(hdev, "setting up %s",
- qca_is_wcn399x(soc_type) ? "wcn399x" :
- (soc_type == QCA_WCN6750) ? "wcn6750" : "ROME/QCA6390");
+ switch (soc_type) {
+ case QCA_WCN3988:
+ case QCA_WCN3990:
+ case QCA_WCN3991:
+ case QCA_WCN3998:
+ soc_name = "wcn399x";
+ break;
+
+ case QCA_WCN6750:
+ soc_name = "wcn6750";
+ break;
+
+ case QCA_WCN6855:
+ soc_name = "wcn6855";
+ break;
+
+ case QCA_WCN7850:
+ soc_name = "wcn7850";
+ break;
+
+ default:
+ soc_name = "ROME/QCA6390";
+ }
+ bt_dev_info(hdev, "setting up %s", soc_name);

qca->memdump_state = QCA_MEMDUMP_IDLE;

@@ -1732,15 +1837,33 @@ static int qca_setup(struct hci_uart *hu)

clear_bit(QCA_SSR_TRIGGERED, &qca->flags);

- if (qca_is_wcn399x(soc_type) ||
- qca_is_wcn6750(soc_type)) {
- set_bit(HCI_QUIRK_USE_BDADDR_PROPERTY, &hdev->quirks);
+ switch (soc_type) {
+ case QCA_WCN3988:
+ case QCA_WCN3990:
+ case QCA_WCN3991:
+ case QCA_WCN3998:
+ case QCA_WCN6750:
+ case QCA_WCN6855:
+ case QCA_WCN7850:
+
+ /* Set BDA quirk bit for reading BDA value from fwnode property
+ * only if that property exist in DT.
+ */
+ if (fwnode_property_present(dev_fwnode(hdev->dev.parent), "local-bd-address")) {
+ set_bit(HCI_QUIRK_USE_BDADDR_PROPERTY, &hdev->quirks);
+ bt_dev_info(hdev, "setting quirk bit to read BDA from fwnode later");
+ } else {
+ bt_dev_dbg(hdev, "local-bd-address` is not present in the devicetree so not setting quirk bit for BDA");
+ }
+
hci_set_aosp_capable(hdev);

ret = qca_read_soc_version(hdev, &ver, soc_type);
if (ret)
goto out;
- } else {
+ break;
+
+ default:
qca_set_speed(hu, QCA_INIT_SPEED);
}

@@ -1754,8 +1877,17 @@ static int qca_setup(struct hci_uart *hu)
qca_baudrate = qca_get_baudrate_value(speed);
}

- if (!(qca_is_wcn399x(soc_type) ||
- qca_is_wcn6750(soc_type))) {
+ switch (soc_type) {
+ case QCA_WCN3988:
+ case QCA_WCN3990:
+ case QCA_WCN3991:
+ case QCA_WCN3998:
+ case QCA_WCN6750:
+ case QCA_WCN6855:
+ case QCA_WCN7850:
+ break;
+
+ default:
/* Get QCA version information */
ret = qca_read_soc_version(hdev, &ver, soc_type);
if (ret)
@@ -1824,7 +1956,18 @@ static const struct hci_uart_proto qca_proto = {
.dequeue = qca_dequeue,
};

-static const struct qca_device_data qca_soc_data_wcn3990 = {
+static const struct qca_device_data qca_soc_data_wcn3988 __maybe_unused = {
+ .soc_type = QCA_WCN3988,
+ .vregs = (struct qca_vreg []) {
+ { "vddio", 15000 },
+ { "vddxo", 80000 },
+ { "vddrf", 300000 },
+ { "vddch0", 450000 },
+ },
+ .num_vregs = 4,
+};
+
+static const struct qca_device_data qca_soc_data_wcn3990 __maybe_unused = {
.soc_type = QCA_WCN3990,
.vregs = (struct qca_vreg []) {
{ "vddio", 15000 },
@@ -1835,7 +1978,7 @@ static const struct qca_device_data qca_soc_data_wcn3990 = {
.num_vregs = 4,
};

-static const struct qca_device_data qca_soc_data_wcn3991 = {
+static const struct qca_device_data qca_soc_data_wcn3991 __maybe_unused = {
.soc_type = QCA_WCN3991,
.vregs = (struct qca_vreg []) {
{ "vddio", 15000 },
@@ -1847,7 +1990,7 @@ static const struct qca_device_data qca_soc_data_wcn3991 = {
.capabilities = QCA_CAP_WIDEBAND_SPEECH | QCA_CAP_VALID_LE_STATES,
};

-static const struct qca_device_data qca_soc_data_wcn3998 = {
+static const struct qca_device_data qca_soc_data_wcn3998 __maybe_unused = {
.soc_type = QCA_WCN3998,
.vregs = (struct qca_vreg []) {
{ "vddio", 10000 },
@@ -1858,13 +2001,13 @@ static const struct qca_device_data qca_soc_data_wcn3998 = {
.num_vregs = 4,
};

-static const struct qca_device_data qca_soc_data_qca6390 = {
+static const struct qca_device_data qca_soc_data_qca6390 __maybe_unused = {
.soc_type = QCA_QCA6390,
.num_vregs = 0,
.capabilities = QCA_CAP_WIDEBAND_SPEECH | QCA_CAP_VALID_LE_STATES,
};

-static const struct qca_device_data qca_soc_data_wcn6750 = {
+static const struct qca_device_data qca_soc_data_wcn6750 __maybe_unused = {
.soc_type = QCA_WCN6750,
.vregs = (struct qca_vreg []) {
{ "vddio", 5000 },
@@ -1881,6 +2024,34 @@ static const struct qca_device_data qca_soc_data_wcn6750 = {
.capabilities = QCA_CAP_WIDEBAND_SPEECH | QCA_CAP_VALID_LE_STATES,
};

+static const struct qca_device_data qca_soc_data_wcn6855 = {
+ .soc_type = QCA_WCN6855,
+ .vregs = (struct qca_vreg []) {
+ { "vddio", 5000 },
+ { "vddbtcxmx", 126000 },
+ { "vddrfacmn", 12500 },
+ { "vddrfa0p8", 102000 },
+ { "vddrfa1p7", 302000 },
+ { "vddrfa1p2", 257000 },
+ },
+ .num_vregs = 6,
+ .capabilities = QCA_CAP_WIDEBAND_SPEECH | QCA_CAP_VALID_LE_STATES,
+};
+
+static const struct qca_device_data qca_soc_data_wcn7850 __maybe_unused = {
+ .soc_type = QCA_WCN7850,
+ .vregs = (struct qca_vreg []) {
+ { "vddio", 5000 },
+ { "vddaon", 26000 },
+ { "vdddig", 126000 },
+ { "vddrfa0p8", 102000 },
+ { "vddrfa1p2", 257000 },
+ { "vddrfa1p9", 302000 },
+ },
+ .num_vregs = 6,
+ .capabilities = QCA_CAP_WIDEBAND_SPEECH | QCA_CAP_VALID_LE_STATES,
+};
+
static void qca_power_shutdown(struct hci_uart *hu)
{
struct qca_serdev *qcadev;
@@ -1906,11 +2077,18 @@ static void qca_power_shutdown(struct hci_uart *hu)

qcadev = serdev_device_get_drvdata(hu->serdev);

- if (qca_is_wcn399x(soc_type)) {
+ switch (soc_type) {
+ case QCA_WCN3988:
+ case QCA_WCN3990:
+ case QCA_WCN3991:
+ case QCA_WCN3998:
host_set_baudrate(hu, 2400);
qca_send_power_pulse(hu, false);
qca_regulator_disable(qcadev);
- } else if (soc_type == QCA_WCN6750) {
+ break;
+
+ case QCA_WCN6750:
+ case QCA_WCN6855:
gpiod_set_value_cansleep(qcadev->bt_en, 0);
msleep(100);
qca_regulator_disable(qcadev);
@@ -1918,7 +2096,9 @@ static void qca_power_shutdown(struct hci_uart *hu)
sw_ctrl_state = gpiod_get_value_cansleep(qcadev->sw_ctrl);
bt_dev_dbg(hu->hdev, "SW_CTRL is %d", sw_ctrl_state);
}
- } else if (qcadev->bt_en) {
+ break;
+
+ default:
gpiod_set_value_cansleep(qcadev->bt_en, 0);
}

@@ -2043,10 +2223,19 @@ static int qca_serdev_probe(struct serdev_device *serdev)
if (!qcadev->oper_speed)
BT_DBG("UART will pick default operating speed");

- if (data &&
- (qca_is_wcn399x(data->soc_type) ||
- qca_is_wcn6750(data->soc_type))) {
+ if (data)
qcadev->btsoc_type = data->soc_type;
+ else
+ qcadev->btsoc_type = QCA_ROME;
+
+ switch (qcadev->btsoc_type) {
+ case QCA_WCN3988:
+ case QCA_WCN3990:
+ case QCA_WCN3991:
+ case QCA_WCN3998:
+ case QCA_WCN6750:
+ case QCA_WCN6855:
+ case QCA_WCN7850:
qcadev->bt_power = devm_kzalloc(&serdev->dev,
sizeof(struct qca_power),
GFP_KERNEL);
@@ -2065,14 +2254,19 @@ static int qca_serdev_probe(struct serdev_device *serdev)

qcadev->bt_en = devm_gpiod_get_optional(&serdev->dev, "enable",
GPIOD_OUT_LOW);
- if (IS_ERR_OR_NULL(qcadev->bt_en) && data->soc_type == QCA_WCN6750) {
+ if (IS_ERR_OR_NULL(qcadev->bt_en) &&
+ (data->soc_type == QCA_WCN6750 ||
+ data->soc_type == QCA_WCN6855)) {
dev_err(&serdev->dev, "failed to acquire BT_EN gpio\n");
power_ctrl_enabled = false;
}

qcadev->sw_ctrl = devm_gpiod_get_optional(&serdev->dev, "swctrl",
GPIOD_IN);
- if (IS_ERR_OR_NULL(qcadev->sw_ctrl) && data->soc_type == QCA_WCN6750)
+ if (IS_ERR_OR_NULL(qcadev->sw_ctrl) &&
+ (data->soc_type == QCA_WCN6750 ||
+ data->soc_type == QCA_WCN6855 ||
+ data->soc_type == QCA_WCN7850))
dev_warn(&serdev->dev, "failed to acquire SW_CTRL gpio\n");

qcadev->susclk = devm_clk_get_optional(&serdev->dev, NULL);
@@ -2086,12 +2280,9 @@ static int qca_serdev_probe(struct serdev_device *serdev)
BT_ERR("wcn3990 serdev registration failed");
return err;
}
- } else {
- if (data)
- qcadev->btsoc_type = data->soc_type;
- else
- qcadev->btsoc_type = QCA_ROME;
+ break;

+ default:
qcadev->bt_en = devm_gpiod_get_optional(&serdev->dev, "enable",
GPIOD_OUT_LOW);
if (IS_ERR_OR_NULL(qcadev->bt_en)) {
@@ -2147,12 +2338,24 @@ static void qca_serdev_remove(struct serdev_device *serdev)
struct qca_serdev *qcadev = serdev_device_get_drvdata(serdev);
struct qca_power *power = qcadev->bt_power;

- if ((qca_is_wcn399x(qcadev->btsoc_type) ||
- qca_is_wcn6750(qcadev->btsoc_type)) &&
- power->vregs_on)
- qca_power_shutdown(&qcadev->serdev_hu);
- else if (qcadev->susclk)
- clk_disable_unprepare(qcadev->susclk);
+ switch (qcadev->btsoc_type) {
+ case QCA_WCN3988:
+ case QCA_WCN3990:
+ case QCA_WCN3991:
+ case QCA_WCN3998:
+ case QCA_WCN6750:
+ case QCA_WCN6855:
+ case QCA_WCN7850:
+ if (power->vregs_on) {
+ qca_power_shutdown(&qcadev->serdev_hu);
+ break;
+ }
+ fallthrough;
+
+ default:
+ if (qcadev->susclk)
+ clk_disable_unprepare(qcadev->susclk);
+ }

hci_uart_unregister_device(&qcadev->serdev_hu);
}
@@ -2329,10 +2532,13 @@ static const struct of_device_id qca_bluetooth_of_match[] = {
{ .compatible = "qcom,qca6174-bt" },
{ .compatible = "qcom,qca6390-bt", .data = &qca_soc_data_qca6390},
{ .compatible = "qcom,qca9377-bt" },
+ { .compatible = "qcom,wcn3988-bt", .data = &qca_soc_data_wcn3988},
{ .compatible = "qcom,wcn3990-bt", .data = &qca_soc_data_wcn3990},
{ .compatible = "qcom,wcn3991-bt", .data = &qca_soc_data_wcn3991},
{ .compatible = "qcom,wcn3998-bt", .data = &qca_soc_data_wcn3998},
{ .compatible = "qcom,wcn6750-bt", .data = &qca_soc_data_wcn6750},
+ { .compatible = "qcom,wcn6855-bt", .data = &qca_soc_data_wcn6855},
+ { .compatible = "qcom,wcn7850-bt", .data = &qca_soc_data_wcn7850},
{ /* sentinel */ }
};
MODULE_DEVICE_TABLE(of, qca_bluetooth_of_match);
diff --git a/drivers/clk/tegra/clk-tegra20.c b/drivers/clk/tegra/clk-tegra20.c
index 422d78247553..dcacc5064d33 100644
--- a/drivers/clk/tegra/clk-tegra20.c
+++ b/drivers/clk/tegra/clk-tegra20.c
@@ -21,24 +21,24 @@
#define MISC_CLK_ENB 0x48

#define OSC_CTRL 0x50
-#define OSC_CTRL_OSC_FREQ_MASK (3<<30)
-#define OSC_CTRL_OSC_FREQ_13MHZ (0<<30)
-#define OSC_CTRL_OSC_FREQ_19_2MHZ (1<<30)
-#define OSC_CTRL_OSC_FREQ_12MHZ (2<<30)
-#define OSC_CTRL_OSC_FREQ_26MHZ (3<<30)
-#define OSC_CTRL_MASK (0x3f2 | OSC_CTRL_OSC_FREQ_MASK)
-
-#define OSC_CTRL_PLL_REF_DIV_MASK (3<<28)
-#define OSC_CTRL_PLL_REF_DIV_1 (0<<28)
-#define OSC_CTRL_PLL_REF_DIV_2 (1<<28)
-#define OSC_CTRL_PLL_REF_DIV_4 (2<<28)
+#define OSC_CTRL_OSC_FREQ_MASK (3u<<30)
+#define OSC_CTRL_OSC_FREQ_13MHZ (0u<<30)
+#define OSC_CTRL_OSC_FREQ_19_2MHZ (1u<<30)
+#define OSC_CTRL_OSC_FREQ_12MHZ (2u<<30)
+#define OSC_CTRL_OSC_FREQ_26MHZ (3u<<30)
+#define OSC_CTRL_MASK (0x3f2u | OSC_CTRL_OSC_FREQ_MASK)
+
+#define OSC_CTRL_PLL_REF_DIV_MASK (3u<<28)
+#define OSC_CTRL_PLL_REF_DIV_1 (0u<<28)
+#define OSC_CTRL_PLL_REF_DIV_2 (1u<<28)
+#define OSC_CTRL_PLL_REF_DIV_4 (2u<<28)

#define OSC_FREQ_DET 0x58
-#define OSC_FREQ_DET_TRIG (1<<31)
+#define OSC_FREQ_DET_TRIG (1u<<31)

#define OSC_FREQ_DET_STATUS 0x5c
-#define OSC_FREQ_DET_BUSY (1<<31)
-#define OSC_FREQ_DET_CNT_MASK 0xFFFF
+#define OSC_FREQ_DET_BUSYu (1<<31)
+#define OSC_FREQ_DET_CNT_MASK 0xFFFFu

#define TEGRA20_CLK_PERIPH_BANKS 3

diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c
index abdd26f7d04c..5771f3fc6115 100644
--- a/drivers/cpufreq/intel_pstate.c
+++ b/drivers/cpufreq/intel_pstate.c
@@ -2952,6 +2952,9 @@ static void intel_cpufreq_adjust_perf(unsigned int cpunum,
if (min_pstate < cpu->min_perf_ratio)
min_pstate = cpu->min_perf_ratio;

+ if (min_pstate > cpu->max_perf_ratio)
+ min_pstate = cpu->max_perf_ratio;
+
max_pstate = min(cap_pstate, cpu->max_perf_ratio);
if (max_pstate < min_pstate)
max_pstate = min_pstate;
diff --git a/drivers/dma/fsl-qdma.c b/drivers/dma/fsl-qdma.c
index f383f219ed00..7082a5a6814a 100644
--- a/drivers/dma/fsl-qdma.c
+++ b/drivers/dma/fsl-qdma.c
@@ -109,6 +109,7 @@
#define FSL_QDMA_CMD_WTHROTL_OFFSET 20
#define FSL_QDMA_CMD_DSEN_OFFSET 19
#define FSL_QDMA_CMD_LWC_OFFSET 16
+#define FSL_QDMA_CMD_PF BIT(17)

/* Field definition for Descriptor status */
#define QDMA_CCDF_STATUS_RTE BIT(5)
@@ -384,7 +385,8 @@ static void fsl_qdma_comp_fill_memcpy(struct fsl_qdma_comp *fsl_comp,
qdma_csgf_set_f(csgf_dest, len);
/* Descriptor Buffer */
cmd = cpu_to_le32(FSL_QDMA_CMD_RWTTYPE <<
- FSL_QDMA_CMD_RWTTYPE_OFFSET);
+ FSL_QDMA_CMD_RWTTYPE_OFFSET) |
+ FSL_QDMA_CMD_PF;
sdf->data = QDMA_SDDF_CMD(cmd);

cmd = cpu_to_le32(FSL_QDMA_CMD_RWTTYPE <<
@@ -1201,10 +1203,6 @@ static int fsl_qdma_probe(struct platform_device *pdev)
if (!fsl_qdma->queue)
return -ENOMEM;

- ret = fsl_qdma_irq_init(pdev, fsl_qdma);
- if (ret)
- return ret;
-
fsl_qdma->irq_base = platform_get_irq_byname(pdev, "qdma-queue0");
if (fsl_qdma->irq_base < 0)
return fsl_qdma->irq_base;
@@ -1243,16 +1241,19 @@ static int fsl_qdma_probe(struct platform_device *pdev)

platform_set_drvdata(pdev, fsl_qdma);

- ret = dma_async_device_register(&fsl_qdma->dma_dev);
+ ret = fsl_qdma_reg_init(fsl_qdma);
if (ret) {
- dev_err(&pdev->dev,
- "Can't register NXP Layerscape qDMA engine.\n");
+ dev_err(&pdev->dev, "Can't Initialize the qDMA engine.\n");
return ret;
}

- ret = fsl_qdma_reg_init(fsl_qdma);
+ ret = fsl_qdma_irq_init(pdev, fsl_qdma);
+ if (ret)
+ return ret;
+
+ ret = dma_async_device_register(&fsl_qdma->dma_dev);
if (ret) {
- dev_err(&pdev->dev, "Can't Initialize the qDMA engine.\n");
+ dev_err(&pdev->dev, "Can't register NXP Layerscape qDMA engine.\n");
return ret;
}

diff --git a/drivers/dma/ptdma/ptdma-dmaengine.c b/drivers/dma/ptdma/ptdma-dmaengine.c
index 1aa65e5de0f3..f79240734807 100644
--- a/drivers/dma/ptdma/ptdma-dmaengine.c
+++ b/drivers/dma/ptdma/ptdma-dmaengine.c
@@ -385,8 +385,6 @@ int pt_dmaengine_register(struct pt_device *pt)
chan->vc.desc_free = pt_do_cleanup;
vchan_init(&chan->vc, dma_dev);

- dma_set_mask_and_coherent(pt->dev, DMA_BIT_MASK(64));
-
ret = dma_async_device_register(dma_dev);
if (ret)
goto err_reg;
diff --git a/drivers/firmware/efi/capsule-loader.c b/drivers/firmware/efi/capsule-loader.c
index 3e8d4b51a814..97bafb5f7038 100644
--- a/drivers/firmware/efi/capsule-loader.c
+++ b/drivers/firmware/efi/capsule-loader.c
@@ -292,7 +292,7 @@ static int efi_capsule_open(struct inode *inode, struct file *file)
return -ENOMEM;
}

- cap_info->phys = kzalloc(sizeof(void *), GFP_KERNEL);
+ cap_info->phys = kzalloc(sizeof(phys_addr_t), GFP_KERNEL);
if (!cap_info->phys) {
kfree(cap_info->pages);
kfree(cap_info);
diff --git a/drivers/firmware/efi/efi.c b/drivers/firmware/efi/efi.c
index b7c0e8cc0764..9077353d1c98 100644
--- a/drivers/firmware/efi/efi.c
+++ b/drivers/firmware/efi/efi.c
@@ -185,8 +185,27 @@ static const struct attribute_group efi_subsys_attr_group = {
static struct efivars generic_efivars;
static struct efivar_operations generic_ops;

+static bool generic_ops_supported(void)
+{
+ unsigned long name_size;
+ efi_status_t status;
+ efi_char16_t name;
+ efi_guid_t guid;
+
+ name_size = sizeof(name);
+
+ status = efi.get_next_variable(&name_size, &name, &guid);
+ if (status == EFI_UNSUPPORTED)
+ return false;
+
+ return true;
+}
+
static int generic_ops_register(void)
{
+ if (!generic_ops_supported())
+ return 0;
+
generic_ops.get_variable = efi.get_variable;
generic_ops.get_next_variable = efi.get_next_variable;
generic_ops.query_variable_store = efi_query_variable_store;
@@ -200,6 +219,9 @@ static int generic_ops_register(void)

static void generic_ops_unregister(void)
{
+ if (!generic_ops.get_variable)
+ return;
+
efivars_unregister(&generic_efivars);
}

diff --git a/drivers/firmware/efi/libstub/Makefile b/drivers/firmware/efi/libstub/Makefile
index b6e1dcb98a64..473ef18421db 100644
--- a/drivers/firmware/efi/libstub/Makefile
+++ b/drivers/firmware/efi/libstub/Makefile
@@ -84,6 +84,7 @@ lib-$(CONFIG_EFI_GENERIC_STUB) += efi-stub.o string.o intrinsics.o systable.o
lib-$(CONFIG_ARM) += arm32-stub.o
lib-$(CONFIG_ARM64) += arm64-stub.o smbios.o
lib-$(CONFIG_X86) += x86-stub.o
+lib-$(CONFIG_X86_64) += x86-5lvl.o
lib-$(CONFIG_RISCV) += riscv-stub.o
lib-$(CONFIG_LOONGARCH) += loongarch-stub.o

diff --git a/drivers/firmware/efi/libstub/alignedmem.c b/drivers/firmware/efi/libstub/alignedmem.c
index 1de9878ddd3a..6b83c492c3b8 100644
--- a/drivers/firmware/efi/libstub/alignedmem.c
+++ b/drivers/firmware/efi/libstub/alignedmem.c
@@ -22,12 +22,15 @@
* Return: status code
*/
efi_status_t efi_allocate_pages_aligned(unsigned long size, unsigned long *addr,
- unsigned long max, unsigned long align)
+ unsigned long max, unsigned long align,
+ int memory_type)
{
efi_physical_addr_t alloc_addr;
efi_status_t status;
int slack;

+ max = min(max, EFI_ALLOC_LIMIT);
+
if (align < EFI_ALLOC_ALIGN)
align = EFI_ALLOC_ALIGN;

@@ -36,7 +39,7 @@ efi_status_t efi_allocate_pages_aligned(unsigned long size, unsigned long *addr,
slack = align / EFI_PAGE_SIZE - 1;

status = efi_bs_call(allocate_pages, EFI_ALLOCATE_MAX_ADDRESS,
- EFI_LOADER_DATA, size / EFI_PAGE_SIZE + slack,
+ memory_type, size / EFI_PAGE_SIZE + slack,
&alloc_addr);
if (status != EFI_SUCCESS)
return status;
diff --git a/drivers/firmware/efi/libstub/arm64-stub.c b/drivers/firmware/efi/libstub/arm64-stub.c
index e2f90566b291..16f15e36f9a7 100644
--- a/drivers/firmware/efi/libstub/arm64-stub.c
+++ b/drivers/firmware/efi/libstub/arm64-stub.c
@@ -180,7 +180,8 @@ efi_status_t handle_kernel_image(unsigned long *image_addr,
* locate the kernel at a randomized offset in physical memory.
*/
status = efi_random_alloc(*reserve_size, min_kimg_align,
- reserve_addr, phys_seed);
+ reserve_addr, phys_seed,
+ EFI_LOADER_CODE, 0, EFI_ALLOC_LIMIT);
if (status != EFI_SUCCESS)
efi_warn("efi_random_alloc() failed: 0x%lx\n", status);
} else {
@@ -190,10 +191,11 @@ efi_status_t handle_kernel_image(unsigned long *image_addr,
if (status != EFI_SUCCESS) {
if (!check_image_region((u64)_text, kernel_memsize)) {
efi_err("FIRMWARE BUG: Image BSS overlaps adjacent EFI memory region\n");
- } else if (IS_ALIGNED((u64)_text, min_kimg_align)) {
+ } else if (IS_ALIGNED((u64)_text, min_kimg_align) &&
+ (u64)_end < EFI_ALLOC_LIMIT) {
/*
* Just execute from wherever we were loaded by the
- * UEFI PE/COFF loader if the alignment is suitable.
+ * UEFI PE/COFF loader if the placement is suitable.
*/
*image_addr = (u64)_text;
*reserve_size = 0;
@@ -201,7 +203,8 @@ efi_status_t handle_kernel_image(unsigned long *image_addr,
}

status = efi_allocate_pages_aligned(*reserve_size, reserve_addr,
- ULONG_MAX, min_kimg_align);
+ ULONG_MAX, min_kimg_align,
+ EFI_LOADER_CODE);

if (status != EFI_SUCCESS) {
efi_err("Failed to relocate kernel\n");
diff --git a/drivers/firmware/efi/libstub/efi-stub-helper.c b/drivers/firmware/efi/libstub/efi-stub-helper.c
index 3d9b2469a0df..97744822dd95 100644
--- a/drivers/firmware/efi/libstub/efi-stub-helper.c
+++ b/drivers/firmware/efi/libstub/efi-stub-helper.c
@@ -216,6 +216,8 @@ efi_status_t efi_parse_options(char const *cmdline)
efi_loglevel = CONSOLE_LOGLEVEL_QUIET;
} else if (!strcmp(param, "noinitrd")) {
efi_noinitrd = true;
+ } else if (IS_ENABLED(CONFIG_X86_64) && !strcmp(param, "no5lvl")) {
+ efi_no5lvl = true;
} else if (!strcmp(param, "efi") && val) {
efi_nochunk = parse_option_str(val, "nochunk");
efi_novamap |= parse_option_str(val, "novamap");
diff --git a/drivers/firmware/efi/libstub/efistub.h b/drivers/firmware/efi/libstub/efistub.h
index 970e86e3aab0..6741f3d900c5 100644
--- a/drivers/firmware/efi/libstub/efistub.h
+++ b/drivers/firmware/efi/libstub/efistub.h
@@ -29,6 +29,11 @@
#define EFI_ALLOC_ALIGN EFI_PAGE_SIZE
#endif

+#ifndef EFI_ALLOC_LIMIT
+#define EFI_ALLOC_LIMIT ULONG_MAX
+#endif
+
+extern bool efi_no5lvl;
extern bool efi_nochunk;
extern bool efi_nokaslr;
extern int efi_loglevel;
@@ -415,6 +420,26 @@ union efi_dxe_services_table {
} mixed_mode;
};

+typedef union efi_memory_attribute_protocol efi_memory_attribute_protocol_t;
+
+union efi_memory_attribute_protocol {
+ struct {
+ efi_status_t (__efiapi *get_memory_attributes)(
+ efi_memory_attribute_protocol_t *, efi_physical_addr_t, u64, u64 *);
+
+ efi_status_t (__efiapi *set_memory_attributes)(
+ efi_memory_attribute_protocol_t *, efi_physical_addr_t, u64, u64);
+
+ efi_status_t (__efiapi *clear_memory_attributes)(
+ efi_memory_attribute_protocol_t *, efi_physical_addr_t, u64, u64);
+ };
+ struct {
+ u32 get_memory_attributes;
+ u32 set_memory_attributes;
+ u32 clear_memory_attributes;
+ } mixed_mode;
+};
+
typedef union efi_uga_draw_protocol efi_uga_draw_protocol_t;

union efi_uga_draw_protocol {
@@ -880,7 +905,9 @@ void efi_get_virtmap(efi_memory_desc_t *memory_map, unsigned long map_size,
efi_status_t efi_get_random_bytes(unsigned long size, u8 *out);

efi_status_t efi_random_alloc(unsigned long size, unsigned long align,
- unsigned long *addr, unsigned long random_seed);
+ unsigned long *addr, unsigned long random_seed,
+ int memory_type, unsigned long alloc_min,
+ unsigned long alloc_max);

efi_status_t efi_random_get_seed(void);

@@ -907,7 +934,8 @@ efi_status_t efi_allocate_pages(unsigned long size, unsigned long *addr,
unsigned long max);

efi_status_t efi_allocate_pages_aligned(unsigned long size, unsigned long *addr,
- unsigned long max, unsigned long align);
+ unsigned long max, unsigned long align,
+ int memory_type);

efi_status_t efi_low_alloc_above(unsigned long size, unsigned long align,
unsigned long *addr, unsigned long min);
diff --git a/drivers/firmware/efi/libstub/mem.c b/drivers/firmware/efi/libstub/mem.c
index 45841ef55a9f..4f1fa302234d 100644
--- a/drivers/firmware/efi/libstub/mem.c
+++ b/drivers/firmware/efi/libstub/mem.c
@@ -89,9 +89,12 @@ efi_status_t efi_allocate_pages(unsigned long size, unsigned long *addr,
efi_physical_addr_t alloc_addr;
efi_status_t status;

+ max = min(max, EFI_ALLOC_LIMIT);
+
if (EFI_ALLOC_ALIGN > EFI_PAGE_SIZE)
return efi_allocate_pages_aligned(size, addr, max,
- EFI_ALLOC_ALIGN);
+ EFI_ALLOC_ALIGN,
+ EFI_LOADER_DATA);

alloc_addr = ALIGN_DOWN(max + 1, EFI_ALLOC_ALIGN) - 1;
status = efi_bs_call(allocate_pages, EFI_ALLOCATE_MAX_ADDRESS,
diff --git a/drivers/firmware/efi/libstub/randomalloc.c b/drivers/firmware/efi/libstub/randomalloc.c
index 9fb5869896be..7ba05719a53b 100644
--- a/drivers/firmware/efi/libstub/randomalloc.c
+++ b/drivers/firmware/efi/libstub/randomalloc.c
@@ -16,7 +16,8 @@
*/
static unsigned long get_entry_num_slots(efi_memory_desc_t *md,
unsigned long size,
- unsigned long align_shift)
+ unsigned long align_shift,
+ u64 alloc_min, u64 alloc_max)
{
unsigned long align = 1UL << align_shift;
u64 first_slot, last_slot, region_end;
@@ -29,11 +30,11 @@ static unsigned long get_entry_num_slots(efi_memory_desc_t *md,
return 0;

region_end = min(md->phys_addr + md->num_pages * EFI_PAGE_SIZE - 1,
- (u64)ULONG_MAX);
+ alloc_max);
if (region_end < size)
return 0;

- first_slot = round_up(md->phys_addr, align);
+ first_slot = round_up(max(md->phys_addr, alloc_min), align);
last_slot = round_down(region_end - size + 1, align);

if (first_slot > last_slot)
@@ -53,7 +54,10 @@ static unsigned long get_entry_num_slots(efi_memory_desc_t *md,
efi_status_t efi_random_alloc(unsigned long size,
unsigned long align,
unsigned long *addr,
- unsigned long random_seed)
+ unsigned long random_seed,
+ int memory_type,
+ unsigned long alloc_min,
+ unsigned long alloc_max)
{
unsigned long total_slots = 0, target_slot;
unsigned long total_mirrored_slots = 0;
@@ -75,7 +79,8 @@ efi_status_t efi_random_alloc(unsigned long size,
efi_memory_desc_t *md = (void *)map->map + map_offset;
unsigned long slots;

- slots = get_entry_num_slots(md, size, ilog2(align));
+ slots = get_entry_num_slots(md, size, ilog2(align), alloc_min,
+ alloc_max);
MD_NUM_SLOTS(md) = slots;
total_slots += slots;
if (md->attribute & EFI_MEMORY_MORE_RELIABLE)
@@ -118,7 +123,7 @@ efi_status_t efi_random_alloc(unsigned long size,
pages = size / EFI_PAGE_SIZE;

status = efi_bs_call(allocate_pages, EFI_ALLOCATE_ADDRESS,
- EFI_LOADER_DATA, pages, &target);
+ memory_type, pages, &target);
if (status == EFI_SUCCESS)
*addr = target;
break;
diff --git a/drivers/firmware/efi/libstub/x86-5lvl.c b/drivers/firmware/efi/libstub/x86-5lvl.c
new file mode 100644
index 000000000000..479dd445acdc
--- /dev/null
+++ b/drivers/firmware/efi/libstub/x86-5lvl.c
@@ -0,0 +1,95 @@
+// SPDX-License-Identifier: GPL-2.0-only
+#include <linux/efi.h>
+
+#include <asm/boot.h>
+#include <asm/desc.h>
+#include <asm/efi.h>
+
+#include "efistub.h"
+#include "x86-stub.h"
+
+bool efi_no5lvl;
+
+static void (*la57_toggle)(void *cr3);
+
+static const struct desc_struct gdt[] = {
+ [GDT_ENTRY_KERNEL32_CS] = GDT_ENTRY_INIT(0xc09b, 0, 0xfffff),
+ [GDT_ENTRY_KERNEL_CS] = GDT_ENTRY_INIT(0xa09b, 0, 0xfffff),
+};
+
+/*
+ * Enabling (or disabling) 5 level paging is tricky, because it can only be
+ * done from 32-bit mode with paging disabled. This means not only that the
+ * code itself must be running from 32-bit addressable physical memory, but
+ * also that the root page table must be 32-bit addressable, as programming
+ * a 64-bit value into CR3 when running in 32-bit mode is not supported.
+ */
+efi_status_t efi_setup_5level_paging(void)
+{
+ u8 tmpl_size = (u8 *)&trampoline_ljmp_imm_offset - (u8 *)&trampoline_32bit_src;
+ efi_status_t status;
+ u8 *la57_code;
+
+ if (!efi_is_64bit())
+ return EFI_SUCCESS;
+
+ /* check for 5 level paging support */
+ if (native_cpuid_eax(0) < 7 ||
+ !(native_cpuid_ecx(7) & (1 << (X86_FEATURE_LA57 & 31))))
+ return EFI_SUCCESS;
+
+ /* allocate some 32-bit addressable memory for code and a page table */
+ status = efi_allocate_pages(2 * PAGE_SIZE, (unsigned long *)&la57_code,
+ U32_MAX);
+ if (status != EFI_SUCCESS)
+ return status;
+
+ la57_toggle = memcpy(la57_code, trampoline_32bit_src, tmpl_size);
+ memset(la57_code + tmpl_size, 0x90, PAGE_SIZE - tmpl_size);
+
+ /*
+ * To avoid the need to allocate a 32-bit addressable stack, the
+ * trampoline uses a LJMP instruction to switch back to long mode.
+ * LJMP takes an absolute destination address, which needs to be
+ * fixed up at runtime.
+ */
+ *(u32 *)&la57_code[trampoline_ljmp_imm_offset] += (unsigned long)la57_code;
+
+ efi_adjust_memory_range_protection((unsigned long)la57_toggle, PAGE_SIZE);
+
+ return EFI_SUCCESS;
+}
+
+void efi_5level_switch(void)
+{
+ bool want_la57 = IS_ENABLED(CONFIG_X86_5LEVEL) && !efi_no5lvl;
+ bool have_la57 = native_read_cr4() & X86_CR4_LA57;
+ bool need_toggle = want_la57 ^ have_la57;
+ u64 *pgt = (void *)la57_toggle + PAGE_SIZE;
+ u64 *cr3 = (u64 *)__native_read_cr3();
+ u64 *new_cr3;
+
+ if (!la57_toggle || !need_toggle)
+ return;
+
+ if (!have_la57) {
+ /*
+ * 5 level paging will be enabled, so a root level page needs
+ * to be allocated from the 32-bit addressable physical region,
+ * with its first entry referring to the existing hierarchy.
+ */
+ new_cr3 = memset(pgt, 0, PAGE_SIZE);
+ new_cr3[0] = (u64)cr3 | _PAGE_TABLE_NOENC;
+ } else {
+ /* take the new root table pointer from the current entry #0 */
+ new_cr3 = (u64 *)(cr3[0] & PAGE_MASK);
+
+ /* copy the new root table if it is not 32-bit addressable */
+ if ((u64)new_cr3 > U32_MAX)
+ new_cr3 = memcpy(pgt, new_cr3, PAGE_SIZE);
+ }
+
+ native_load_gdt(&(struct desc_ptr){ sizeof(gdt) - 1, (u64)gdt });
+
+ la57_toggle(new_cr3);
+}
diff --git a/drivers/firmware/efi/libstub/x86-stub.c b/drivers/firmware/efi/libstub/x86-stub.c
index 4f0152b11a89..784e1b2ae5cc 100644
--- a/drivers/firmware/efi/libstub/x86-stub.c
+++ b/drivers/firmware/efi/libstub/x86-stub.c
@@ -15,16 +15,16 @@
#include <asm/setup.h>
#include <asm/desc.h>
#include <asm/boot.h>
+#include <asm/kaslr.h>
+#include <asm/sev.h>

#include "efistub.h"
-
-/* Maximum physical address for 64-bit kernel with 4-level paging */
-#define MAXMEM_X86_64_4LEVEL (1ull << 46)
+#include "x86-stub.h"

const efi_system_table_t *efi_system_table;
const efi_dxe_services_table_t *efi_dxe_table;
-extern u32 image_offset;
static efi_loaded_image_t *image = NULL;
+static efi_memory_attribute_protocol_t *memattr;

static efi_status_t
preserve_pci_rom_image(efi_pci_io_protocol_t *pci, struct pci_setup_rom **__rom)
@@ -212,8 +212,8 @@ static void retrieve_apple_device_properties(struct boot_params *boot_params)
}
}

-static void
-adjust_memory_range_protection(unsigned long start, unsigned long size)
+efi_status_t efi_adjust_memory_range_protection(unsigned long start,
+ unsigned long size)
{
efi_status_t status;
efi_gcd_memory_space_desc_t desc;
@@ -221,12 +221,22 @@ adjust_memory_range_protection(unsigned long start, unsigned long size)
unsigned long rounded_start, rounded_end;
unsigned long unprotect_start, unprotect_size;

- if (efi_dxe_table == NULL)
- return;
-
rounded_start = rounddown(start, EFI_PAGE_SIZE);
rounded_end = roundup(start + size, EFI_PAGE_SIZE);

+ if (memattr != NULL) {
+ status = efi_call_proto(memattr, clear_memory_attributes,
+ rounded_start,
+ rounded_end - rounded_start,
+ EFI_MEMORY_XP);
+ if (status != EFI_SUCCESS)
+ efi_warn("Failed to clear EFI_MEMORY_XP attribute\n");
+ return status;
+ }
+
+ if (efi_dxe_table == NULL)
+ return EFI_SUCCESS;
+
/*
* Don't modify memory region attributes, they are
* already suitable, to lower the possibility to
@@ -238,7 +248,7 @@ adjust_memory_range_protection(unsigned long start, unsigned long size)
status = efi_dxe_call(get_memory_space_descriptor, start, &desc);

if (status != EFI_SUCCESS)
- return;
+ break;

next = desc.base_address + desc.length;

@@ -263,69 +273,26 @@ adjust_memory_range_protection(unsigned long start, unsigned long size)
unprotect_start,
unprotect_start + unprotect_size,
status);
+ break;
}
}
+ return EFI_SUCCESS;
}

-/*
- * Trampoline takes 2 pages and can be loaded in first megabyte of memory
- * with its end placed between 128k and 640k where BIOS might start.
- * (see arch/x86/boot/compressed/pgtable_64.c)
- *
- * We cannot find exact trampoline placement since memory map
- * can be modified by UEFI, and it can alter the computed address.
- */
-
-#define TRAMPOLINE_PLACEMENT_BASE ((128 - 8)*1024)
-#define TRAMPOLINE_PLACEMENT_SIZE (640*1024 - (128 - 8)*1024)
-
-void startup_32(struct boot_params *boot_params);
-
-static void
-setup_memory_protection(unsigned long image_base, unsigned long image_size)
+static efi_char16_t *efistub_fw_vendor(void)
{
- /*
- * Allow execution of possible trampoline used
- * for switching between 4- and 5-level page tables
- * and relocated kernel image.
- */
-
- adjust_memory_range_protection(TRAMPOLINE_PLACEMENT_BASE,
- TRAMPOLINE_PLACEMENT_SIZE);
+ unsigned long vendor = efi_table_attr(efi_system_table, fw_vendor);

-#ifdef CONFIG_64BIT
- if (image_base != (unsigned long)startup_32)
- adjust_memory_range_protection(image_base, image_size);
-#else
- /*
- * Clear protection flags on a whole range of possible
- * addresses used for KASLR. We don't need to do that
- * on x86_64, since KASLR/extraction is performed after
- * dedicated identity page tables are built and we only
- * need to remove possible protection on relocated image
- * itself disregarding further relocations.
- */
- adjust_memory_range_protection(LOAD_PHYSICAL_ADDR,
- KERNEL_IMAGE_SIZE - LOAD_PHYSICAL_ADDR);
-#endif
+ return (efi_char16_t *)vendor;
}

static const efi_char16_t apple[] = L"Apple";

-static void setup_quirks(struct boot_params *boot_params,
- unsigned long image_base,
- unsigned long image_size)
+static void setup_quirks(struct boot_params *boot_params)
{
- efi_char16_t *fw_vendor = (efi_char16_t *)(unsigned long)
- efi_table_attr(efi_system_table, fw_vendor);
-
- if (!memcmp(fw_vendor, apple, sizeof(apple))) {
- if (IS_ENABLED(CONFIG_APPLE_PROPERTIES))
- retrieve_apple_device_properties(boot_params);
- }
-
- if (IS_ENABLED(CONFIG_EFI_DXE_MEM_ATTRIBUTES))
- setup_memory_protection(image_base, image_size);
+ if (IS_ENABLED(CONFIG_APPLE_PROPERTIES) &&
+ !memcmp(efistub_fw_vendor(), apple, sizeof(apple)))
+ retrieve_apple_device_properties(boot_params);
}

/*
@@ -478,7 +445,6 @@ efi_status_t __efiapi efi_pe_entry(efi_handle_t handle,
}

image_base = efi_table_attr(image, image_base);
- image_offset = (void *)startup_32 - image_base;

status = efi_allocate_pages(sizeof(struct boot_params),
(unsigned long *)&boot_params, ULONG_MAX);
@@ -760,85 +726,139 @@ static efi_status_t exit_boot(struct boot_params *boot_params, void *handle)
return EFI_SUCCESS;
}

+static bool have_unsupported_snp_features(void)
+{
+ u64 unsupported;
+
+ unsupported = snp_get_unsupported_features(sev_get_status());
+ if (unsupported) {
+ efi_err("Unsupported SEV-SNP features detected: 0x%llx\n",
+ unsupported);
+ return true;
+ }
+ return false;
+}
+
+static void efi_get_seed(void *seed, int size)
+{
+ efi_get_random_bytes(size, seed);
+
+ /*
+ * This only updates seed[0] when running on 32-bit, but in that case,
+ * seed[1] is not used anyway, as there is no virtual KASLR on 32-bit.
+ */
+ *(unsigned long *)seed ^= kaslr_get_random_long("EFI");
+}
+
+static void error(char *str)
+{
+ efi_warn("Decompression failed: %s\n", str);
+}
+
+static efi_status_t efi_decompress_kernel(unsigned long *kernel_entry)
+{
+ unsigned long virt_addr = LOAD_PHYSICAL_ADDR;
+ unsigned long addr, alloc_size, entry;
+ efi_status_t status;
+ u32 seed[2] = {};
+
+ /* determine the required size of the allocation */
+ alloc_size = ALIGN(max_t(unsigned long, output_len, kernel_total_size),
+ MIN_KERNEL_ALIGN);
+
+ if (IS_ENABLED(CONFIG_RANDOMIZE_BASE) && !efi_nokaslr) {
+ u64 range = KERNEL_IMAGE_SIZE - LOAD_PHYSICAL_ADDR - kernel_total_size;
+ static const efi_char16_t ami[] = L"American Megatrends";
+
+ efi_get_seed(seed, sizeof(seed));
+
+ virt_addr += (range * seed[1]) >> 32;
+ virt_addr &= ~(CONFIG_PHYSICAL_ALIGN - 1);
+
+ /*
+ * Older Dell systems with AMI UEFI firmware v2.0 may hang
+ * while decompressing the kernel if physical address
+ * randomization is enabled.
+ *
+ * https://bugzilla.kernel.org/show_bug.cgi?id=218173
+ */
+ if (efi_system_table->hdr.revision <= EFI_2_00_SYSTEM_TABLE_REVISION &&
+ !memcmp(efistub_fw_vendor(), ami, sizeof(ami))) {
+ efi_debug("AMI firmware v2.0 or older detected - disabling physical KASLR\n");
+ seed[0] = 0;
+ }
+
+ boot_params_ptr->hdr.loadflags |= KASLR_FLAG;
+ }
+
+ status = efi_random_alloc(alloc_size, CONFIG_PHYSICAL_ALIGN, &addr,
+ seed[0], EFI_LOADER_CODE,
+ LOAD_PHYSICAL_ADDR,
+ EFI_X86_KERNEL_ALLOC_LIMIT);
+ if (status != EFI_SUCCESS)
+ return status;
+
+ entry = decompress_kernel((void *)addr, virt_addr, error);
+ if (entry == ULONG_MAX) {
+ efi_free(alloc_size, addr);
+ return EFI_LOAD_ERROR;
+ }
+
+ *kernel_entry = addr + entry;
+
+ return efi_adjust_memory_range_protection(addr, kernel_total_size);
+}
+
+static void __noreturn enter_kernel(unsigned long kernel_addr,
+ struct boot_params *boot_params)
+{
+ /* enter decompressed kernel with boot_params pointer in RSI/ESI */
+ asm("jmp *%0"::"r"(kernel_addr), "S"(boot_params));
+
+ unreachable();
+}
+
/*
- * On success, we return the address of startup_32, which has potentially been
- * relocated by efi_relocate_kernel.
- * On failure, we exit to the firmware via efi_exit instead of returning.
+ * On success, this routine will jump to the relocated image directly and never
+ * return. On failure, it will exit to the firmware via efi_exit() instead of
+ * returning.
*/
-asmlinkage unsigned long efi_main(efi_handle_t handle,
- efi_system_table_t *sys_table_arg,
- struct boot_params *boot_params)
+void __noreturn efi_stub_entry(efi_handle_t handle,
+ efi_system_table_t *sys_table_arg,
+ struct boot_params *boot_params)
{
- unsigned long bzimage_addr = (unsigned long)startup_32;
- unsigned long buffer_start, buffer_end;
+ efi_guid_t guid = EFI_MEMORY_ATTRIBUTE_PROTOCOL_GUID;
struct setup_header *hdr = &boot_params->hdr;
const struct linux_efi_initrd *initrd = NULL;
+ unsigned long kernel_entry;
efi_status_t status;

+ boot_params_ptr = boot_params;
+
efi_system_table = sys_table_arg;
/* Check if we were booted by the EFI firmware */
if (efi_system_table->hdr.signature != EFI_SYSTEM_TABLE_SIGNATURE)
efi_exit(handle, EFI_INVALID_PARAMETER);

- efi_dxe_table = get_efi_config_table(EFI_DXE_SERVICES_TABLE_GUID);
- if (efi_dxe_table &&
- efi_dxe_table->hdr.signature != EFI_DXE_SERVICES_TABLE_SIGNATURE) {
- efi_warn("Ignoring DXE services table: invalid signature\n");
- efi_dxe_table = NULL;
+ if (have_unsupported_snp_features())
+ efi_exit(handle, EFI_UNSUPPORTED);
+
+ if (IS_ENABLED(CONFIG_EFI_DXE_MEM_ATTRIBUTES)) {
+ efi_dxe_table = get_efi_config_table(EFI_DXE_SERVICES_TABLE_GUID);
+ if (efi_dxe_table &&
+ efi_dxe_table->hdr.signature != EFI_DXE_SERVICES_TABLE_SIGNATURE) {
+ efi_warn("Ignoring DXE services table: invalid signature\n");
+ efi_dxe_table = NULL;
+ }
}

- /*
- * If the kernel isn't already loaded at a suitable address,
- * relocate it.
- *
- * It must be loaded above LOAD_PHYSICAL_ADDR.
- *
- * The maximum address for 64-bit is 1 << 46 for 4-level paging. This
- * is defined as the macro MAXMEM, but unfortunately that is not a
- * compile-time constant if 5-level paging is configured, so we instead
- * define our own macro for use here.
- *
- * For 32-bit, the maximum address is complicated to figure out, for
- * now use KERNEL_IMAGE_SIZE, which will be 512MiB, the same as what
- * KASLR uses.
- *
- * Also relocate it if image_offset is zero, i.e. the kernel wasn't
- * loaded by LoadImage, but rather by a bootloader that called the
- * handover entry. The reason we must always relocate in this case is
- * to handle the case of systemd-boot booting a unified kernel image,
- * which is a PE executable that contains the bzImage and an initrd as
- * COFF sections. The initrd section is placed after the bzImage
- * without ensuring that there are at least init_size bytes available
- * for the bzImage, and thus the compressed kernel's startup code may
- * overwrite the initrd unless it is moved out of the way.
- */
+ /* grab the memory attributes protocol if it exists */
+ efi_bs_call(locate_protocol, &guid, NULL, (void **)&memattr);

- buffer_start = ALIGN(bzimage_addr - image_offset,
- hdr->kernel_alignment);
- buffer_end = buffer_start + hdr->init_size;
-
- if ((buffer_start < LOAD_PHYSICAL_ADDR) ||
- (IS_ENABLED(CONFIG_X86_32) && buffer_end > KERNEL_IMAGE_SIZE) ||
- (IS_ENABLED(CONFIG_X86_64) && buffer_end > MAXMEM_X86_64_4LEVEL) ||
- (image_offset == 0)) {
- extern char _bss[];
-
- status = efi_relocate_kernel(&bzimage_addr,
- (unsigned long)_bss - bzimage_addr,
- hdr->init_size,
- hdr->pref_address,
- hdr->kernel_alignment,
- LOAD_PHYSICAL_ADDR);
- if (status != EFI_SUCCESS) {
- efi_err("efi_relocate_kernel() failed!\n");
- goto fail;
- }
- /*
- * Now that we've copied the kernel elsewhere, we no longer
- * have a set up block before startup_32(), so reset image_offset
- * to zero in case it was set earlier.
- */
- image_offset = 0;
+ status = efi_setup_5level_paging();
+ if (status != EFI_SUCCESS) {
+ efi_err("efi_setup_5level_paging() failed!\n");
+ goto fail;
}

#ifdef CONFIG_CMDLINE_BOOL
@@ -858,6 +878,12 @@ asmlinkage unsigned long efi_main(efi_handle_t handle,
}
}

+ status = efi_decompress_kernel(&kernel_entry);
+ if (status != EFI_SUCCESS) {
+ efi_err("Failed to decompress kernel\n");
+ goto fail;
+ }
+
/*
* At this point, an initrd may already have been loaded by the
* bootloader and passed via bootparams. We permit an initrd loaded
@@ -897,7 +923,7 @@ asmlinkage unsigned long efi_main(efi_handle_t handle,

setup_efi_pci(boot_params);

- setup_quirks(boot_params, bzimage_addr, buffer_end - buffer_start);
+ setup_quirks(boot_params);

status = exit_boot(boot_params, handle);
if (status != EFI_SUCCESS) {
@@ -905,9 +931,38 @@ asmlinkage unsigned long efi_main(efi_handle_t handle,
goto fail;
}

- return bzimage_addr;
+ /*
+ * Call the SEV init code while still running with the firmware's
+ * GDT/IDT, so #VC exceptions will be handled by EFI.
+ */
+ sev_enable(boot_params);
+
+ efi_5level_switch();
+
+ enter_kernel(kernel_entry, boot_params);
fail:
- efi_err("efi_main() failed!\n");
+ efi_err("efi_stub_entry() failed!\n");

efi_exit(handle, status);
}
+
+#ifdef CONFIG_EFI_HANDOVER_PROTOCOL
+void efi_handover_entry(efi_handle_t handle, efi_system_table_t *sys_table_arg,
+ struct boot_params *boot_params)
+{
+ extern char _bss[], _ebss[];
+
+ memset(_bss, 0, _ebss - _bss);
+ efi_stub_entry(handle, sys_table_arg, boot_params);
+}
+
+#ifndef CONFIG_EFI_MIXED
+extern __alias(efi_handover_entry)
+void efi32_stub_entry(efi_handle_t handle, efi_system_table_t *sys_table_arg,
+ struct boot_params *boot_params);
+
+extern __alias(efi_handover_entry)
+void efi64_stub_entry(efi_handle_t handle, efi_system_table_t *sys_table_arg,
+ struct boot_params *boot_params);
+#endif
+#endif
diff --git a/drivers/firmware/efi/libstub/x86-stub.h b/drivers/firmware/efi/libstub/x86-stub.h
new file mode 100644
index 000000000000..1c20e99a6494
--- /dev/null
+++ b/drivers/firmware/efi/libstub/x86-stub.h
@@ -0,0 +1,17 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+
+#include <linux/efi.h>
+
+extern void trampoline_32bit_src(void *, bool);
+extern const u16 trampoline_ljmp_imm_offset;
+
+efi_status_t efi_adjust_memory_range_protection(unsigned long start,
+ unsigned long size);
+
+#ifdef CONFIG_X86_64
+efi_status_t efi_setup_5level_paging(void);
+void efi_5level_switch(void);
+#else
+static inline efi_status_t efi_setup_5level_paging(void) { return EFI_SUCCESS; }
+static inline void efi_5level_switch(void) {}
+#endif
diff --git a/drivers/firmware/efi/vars.c b/drivers/firmware/efi/vars.c
index 0ba9f18312f5..4ca256bcd697 100644
--- a/drivers/firmware/efi/vars.c
+++ b/drivers/firmware/efi/vars.c
@@ -66,19 +66,28 @@ int efivars_register(struct efivars *efivars,
const struct efivar_operations *ops,
struct kobject *kobject)
{
+ int rv;
+
if (down_interruptible(&efivars_lock))
return -EINTR;

+ if (__efivars) {
+ pr_warn("efivars already registered\n");
+ rv = -EBUSY;
+ goto out;
+ }
+
efivars->ops = ops;
efivars->kobject = kobject;

__efivars = efivars;

pr_info("Registered efivars operations\n");
-
+ rv = 0;
+out:
up(&efivars_lock);

- return 0;
+ return rv;
}
EXPORT_SYMBOL_GPL(efivars_register);

diff --git a/drivers/gpio/gpio-74x164.c b/drivers/gpio/gpio-74x164.c
index e00c33310517..753e7be039e4 100644
--- a/drivers/gpio/gpio-74x164.c
+++ b/drivers/gpio/gpio-74x164.c
@@ -127,8 +127,6 @@ static int gen_74x164_probe(struct spi_device *spi)
if (IS_ERR(chip->gpiod_oe))
return PTR_ERR(chip->gpiod_oe);

- gpiod_set_value_cansleep(chip->gpiod_oe, 1);
-
spi_set_drvdata(spi, chip);

chip->gpio_chip.label = spi->modalias;
@@ -153,6 +151,8 @@ static int gen_74x164_probe(struct spi_device *spi)
goto exit_destroy;
}

+ gpiod_set_value_cansleep(chip->gpiod_oe, 1);
+
ret = gpiochip_add_data(&chip->gpio_chip, chip);
if (!ret)
return 0;
diff --git a/drivers/gpio/gpiolib.c b/drivers/gpio/gpiolib.c
index 6d3e3454a6ed..9d8c78312403 100644
--- a/drivers/gpio/gpiolib.c
+++ b/drivers/gpio/gpiolib.c
@@ -784,11 +784,11 @@ int gpiochip_add_data_with_key(struct gpio_chip *gc, void *data,

ret = gpiochip_irqchip_init_valid_mask(gc);
if (ret)
- goto err_remove_acpi_chip;
+ goto err_free_hogs;

ret = gpiochip_irqchip_init_hw(gc);
if (ret)
- goto err_remove_acpi_chip;
+ goto err_remove_irqchip_mask;

ret = gpiochip_add_irqchip(gc, lock_key, request_key);
if (ret)
@@ -813,13 +813,13 @@ int gpiochip_add_data_with_key(struct gpio_chip *gc, void *data,
gpiochip_irqchip_remove(gc);
err_remove_irqchip_mask:
gpiochip_irqchip_free_valid_mask(gc);
-err_remove_acpi_chip:
+err_free_hogs:
+ gpiochip_free_hogs(gc);
acpi_gpiochip_remove(gc);
+ gpiochip_remove_pin_ranges(gc);
err_remove_of_chip:
- gpiochip_free_hogs(gc);
of_gpiochip_remove(gc);
err_free_gpiochip_mask:
- gpiochip_remove_pin_ranges(gc);
gpiochip_free_valid_mask(gc);
if (gdev->dev.release) {
/* release() has been registered by gpiochip_setup_dev() */
diff --git a/drivers/gpu/drm/amd/display/dc/dml/Makefile b/drivers/gpu/drm/amd/display/dc/dml/Makefile
index 6fdf87a6e240..6c7b286e1123 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/Makefile
+++ b/drivers/gpu/drm/amd/display/dc/dml/Makefile
@@ -51,8 +51,12 @@ endif
endif

ifneq ($(CONFIG_FRAME_WARN),0)
+ifeq ($(filter y,$(CONFIG_KASAN)$(CONFIG_KCSAN)),y)
+frame_warn_flag := -Wframe-larger-than=3072
+else
frame_warn_flag := -Wframe-larger-than=2048
endif
+endif

CFLAGS_$(AMDDALPATH)/dc/dml/display_mode_lib.o := $(dml_ccflags)

diff --git a/drivers/gpu/drm/amd/pm/legacy-dpm/si_dpm.c b/drivers/gpu/drm/amd/pm/legacy-dpm/si_dpm.c
index dc0a6fba7050..ff1032de4f76 100644
--- a/drivers/gpu/drm/amd/pm/legacy-dpm/si_dpm.c
+++ b/drivers/gpu/drm/amd/pm/legacy-dpm/si_dpm.c
@@ -6925,6 +6925,23 @@ static int si_dpm_enable(struct amdgpu_device *adev)
return 0;
}

+static int si_set_temperature_range(struct amdgpu_device *adev)
+{
+ int ret;
+
+ ret = si_thermal_enable_alert(adev, false);
+ if (ret)
+ return ret;
+ ret = si_thermal_set_temperature_range(adev, R600_TEMP_RANGE_MIN, R600_TEMP_RANGE_MAX);
+ if (ret)
+ return ret;
+ ret = si_thermal_enable_alert(adev, true);
+ if (ret)
+ return ret;
+
+ return ret;
+}
+
static void si_dpm_disable(struct amdgpu_device *adev)
{
struct rv7xx_power_info *pi = rv770_get_pi(adev);
@@ -7608,6 +7625,18 @@ static int si_dpm_process_interrupt(struct amdgpu_device *adev,

static int si_dpm_late_init(void *handle)
{
+ int ret;
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ if (!adev->pm.dpm_enabled)
+ return 0;
+
+ ret = si_set_temperature_range(adev);
+ if (ret)
+ return ret;
+#if 0 //TODO ?
+ si_dpm_powergate_uvd(adev, true);
+#endif
return 0;
}

diff --git a/drivers/gpu/drm/drm_buddy.c b/drivers/gpu/drm/drm_buddy.c
index 7098f125b54a..fd32041f8226 100644
--- a/drivers/gpu/drm/drm_buddy.c
+++ b/drivers/gpu/drm/drm_buddy.c
@@ -332,6 +332,7 @@ alloc_range_bias(struct drm_buddy *mm,
u64 start, u64 end,
unsigned int order)
{
+ u64 req_size = mm->chunk_size << order;
struct drm_buddy_block *block;
struct drm_buddy_block *buddy;
LIST_HEAD(dfs);
@@ -367,6 +368,15 @@ alloc_range_bias(struct drm_buddy *mm,
if (drm_buddy_block_is_allocated(block))
continue;

+ if (block_start < start || block_end > end) {
+ u64 adjusted_start = max(block_start, start);
+ u64 adjusted_end = min(block_end, end);
+
+ if (round_down(adjusted_end + 1, req_size) <=
+ round_up(adjusted_start, req_size))
+ continue;
+ }
+
if (contains(start, end, block_start, block_end) &&
order == drm_buddy_block_order(block)) {
/*
diff --git a/drivers/gpu/drm/meson/meson_drv.c b/drivers/gpu/drm/meson/meson_drv.c
index 119544d88b58..fbac39aa38cc 100644
--- a/drivers/gpu/drm/meson/meson_drv.c
+++ b/drivers/gpu/drm/meson/meson_drv.c
@@ -316,32 +316,34 @@ static int meson_drv_bind_master(struct device *dev, bool has_components)
goto exit_afbcd;

if (has_components) {
- ret = component_bind_all(drm->dev, drm);
+ ret = component_bind_all(dev, drm);
if (ret) {
dev_err(drm->dev, "Couldn't bind all components\n");
+ /* Do not try to unbind */
+ has_components = false;
goto exit_afbcd;
}
}

ret = meson_encoder_hdmi_init(priv);
if (ret)
- goto unbind_all;
+ goto exit_afbcd;

ret = meson_plane_create(priv);
if (ret)
- goto unbind_all;
+ goto exit_afbcd;

ret = meson_overlay_create(priv);
if (ret)
- goto unbind_all;
+ goto exit_afbcd;

ret = meson_crtc_create(priv);
if (ret)
- goto unbind_all;
+ goto exit_afbcd;

ret = request_irq(priv->vsync_irq, meson_irq, 0, drm->driver->name, drm);
if (ret)
- goto unbind_all;
+ goto exit_afbcd;

drm_mode_config_reset(drm);

@@ -359,15 +361,18 @@ static int meson_drv_bind_master(struct device *dev, bool has_components)

uninstall_irq:
free_irq(priv->vsync_irq, drm);
-unbind_all:
- if (has_components)
- component_unbind_all(drm->dev, drm);
exit_afbcd:
if (priv->afbcd.ops)
priv->afbcd.ops->exit(priv);
free_drm:
drm_dev_put(drm);

+ meson_encoder_hdmi_remove(priv);
+ meson_encoder_cvbs_remove(priv);
+
+ if (has_components)
+ component_unbind_all(dev, drm);
+
return ret;
}

diff --git a/drivers/gpu/drm/meson/meson_encoder_cvbs.c b/drivers/gpu/drm/meson/meson_encoder_cvbs.c
index 3f73b211fa8e..3407450435e2 100644
--- a/drivers/gpu/drm/meson/meson_encoder_cvbs.c
+++ b/drivers/gpu/drm/meson/meson_encoder_cvbs.c
@@ -294,6 +294,5 @@ void meson_encoder_cvbs_remove(struct meson_drm *priv)
if (priv->encoders[MESON_ENC_CVBS]) {
meson_encoder_cvbs = priv->encoders[MESON_ENC_CVBS];
drm_bridge_remove(&meson_encoder_cvbs->bridge);
- drm_bridge_remove(meson_encoder_cvbs->next_bridge);
}
}
diff --git a/drivers/gpu/drm/meson/meson_encoder_hdmi.c b/drivers/gpu/drm/meson/meson_encoder_hdmi.c
index b14e6e507c61..03062e7a02b6 100644
--- a/drivers/gpu/drm/meson/meson_encoder_hdmi.c
+++ b/drivers/gpu/drm/meson/meson_encoder_hdmi.c
@@ -472,6 +472,5 @@ void meson_encoder_hdmi_remove(struct meson_drm *priv)
if (priv->encoders[MESON_ENC_HDMI]) {
meson_encoder_hdmi = priv->encoders[MESON_ENC_HDMI];
drm_bridge_remove(&meson_encoder_hdmi->bridge);
- drm_bridge_remove(meson_encoder_hdmi->next_bridge);
}
}
diff --git a/drivers/gpu/drm/tegra/drm.c b/drivers/gpu/drm/tegra/drm.c
index 5fc55b9777cb..6806779f8ecc 100644
--- a/drivers/gpu/drm/tegra/drm.c
+++ b/drivers/gpu/drm/tegra/drm.c
@@ -1252,9 +1252,26 @@ static int host1x_drm_probe(struct host1x_device *dev)

drm_mode_config_reset(drm);

- err = drm_aperture_remove_framebuffers(&tegra_drm_driver);
- if (err < 0)
- goto hub;
+ /*
+ * Only take over from a potential firmware framebuffer if any CRTCs
+ * have been registered. This must not be a fatal error because there
+ * are other accelerators that are exposed via this driver.
+ *
+ * Another case where this happens is on Tegra234 where the display
+ * hardware is no longer part of the host1x complex, so this driver
+ * will not expose any modesetting features.
+ */
+ if (drm->mode_config.num_crtc > 0) {
+ err = drm_aperture_remove_framebuffers(&tegra_drm_driver);
+ if (err < 0)
+ goto hub;
+ } else {
+ /*
+ * Indicate to userspace that this doesn't expose any display
+ * capabilities.
+ */
+ drm->driver_features &= ~(DRIVER_MODESET | DRIVER_ATOMIC);
+ }

err = tegra_drm_fb_init(drm);
if (err < 0)
diff --git a/drivers/infiniband/core/cm_trace.h b/drivers/infiniband/core/cm_trace.h
index e9d282679ef1..944d9071245d 100644
--- a/drivers/infiniband/core/cm_trace.h
+++ b/drivers/infiniband/core/cm_trace.h
@@ -16,7 +16,7 @@

#include <linux/tracepoint.h>
#include <rdma/ib_cm.h>
-#include <trace/events/rdma.h>
+#include <trace/misc/rdma.h>

/*
* enum ib_cm_state, from include/rdma/ib_cm.h
diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c
index 0773ca7ace24..067d7f42871f 100644
--- a/drivers/infiniband/core/cma.c
+++ b/drivers/infiniband/core/cma.c
@@ -3547,121 +3547,6 @@ static int cma_resolve_ib_addr(struct rdma_id_private *id_priv)
return ret;
}

-static int cma_bind_addr(struct rdma_cm_id *id, struct sockaddr *src_addr,
- const struct sockaddr *dst_addr)
-{
- struct sockaddr_storage zero_sock = {};
-
- if (src_addr && src_addr->sa_family)
- return rdma_bind_addr(id, src_addr);
-
- /*
- * When the src_addr is not specified, automatically supply an any addr
- */
- zero_sock.ss_family = dst_addr->sa_family;
- if (IS_ENABLED(CONFIG_IPV6) && dst_addr->sa_family == AF_INET6) {
- struct sockaddr_in6 *src_addr6 =
- (struct sockaddr_in6 *)&zero_sock;
- struct sockaddr_in6 *dst_addr6 =
- (struct sockaddr_in6 *)dst_addr;
-
- src_addr6->sin6_scope_id = dst_addr6->sin6_scope_id;
- if (ipv6_addr_type(&dst_addr6->sin6_addr) & IPV6_ADDR_LINKLOCAL)
- id->route.addr.dev_addr.bound_dev_if =
- dst_addr6->sin6_scope_id;
- } else if (dst_addr->sa_family == AF_IB) {
- ((struct sockaddr_ib *)&zero_sock)->sib_pkey =
- ((struct sockaddr_ib *)dst_addr)->sib_pkey;
- }
- return rdma_bind_addr(id, (struct sockaddr *)&zero_sock);
-}
-
-/*
- * If required, resolve the source address for bind and leave the id_priv in
- * state RDMA_CM_ADDR_BOUND. This oddly uses the state to determine the prior
- * calls made by ULP, a previously bound ID will not be re-bound and src_addr is
- * ignored.
- */
-static int resolve_prepare_src(struct rdma_id_private *id_priv,
- struct sockaddr *src_addr,
- const struct sockaddr *dst_addr)
-{
- int ret;
-
- memcpy(cma_dst_addr(id_priv), dst_addr, rdma_addr_size(dst_addr));
- if (!cma_comp_exch(id_priv, RDMA_CM_ADDR_BOUND, RDMA_CM_ADDR_QUERY)) {
- /* For a well behaved ULP state will be RDMA_CM_IDLE */
- ret = cma_bind_addr(&id_priv->id, src_addr, dst_addr);
- if (ret)
- goto err_dst;
- if (WARN_ON(!cma_comp_exch(id_priv, RDMA_CM_ADDR_BOUND,
- RDMA_CM_ADDR_QUERY))) {
- ret = -EINVAL;
- goto err_dst;
- }
- }
-
- if (cma_family(id_priv) != dst_addr->sa_family) {
- ret = -EINVAL;
- goto err_state;
- }
- return 0;
-
-err_state:
- cma_comp_exch(id_priv, RDMA_CM_ADDR_QUERY, RDMA_CM_ADDR_BOUND);
-err_dst:
- memset(cma_dst_addr(id_priv), 0, rdma_addr_size(dst_addr));
- return ret;
-}
-
-int rdma_resolve_addr(struct rdma_cm_id *id, struct sockaddr *src_addr,
- const struct sockaddr *dst_addr, unsigned long timeout_ms)
-{
- struct rdma_id_private *id_priv =
- container_of(id, struct rdma_id_private, id);
- int ret;
-
- ret = resolve_prepare_src(id_priv, src_addr, dst_addr);
- if (ret)
- return ret;
-
- if (cma_any_addr(dst_addr)) {
- ret = cma_resolve_loopback(id_priv);
- } else {
- if (dst_addr->sa_family == AF_IB) {
- ret = cma_resolve_ib_addr(id_priv);
- } else {
- /*
- * The FSM can return back to RDMA_CM_ADDR_BOUND after
- * rdma_resolve_ip() is called, eg through the error
- * path in addr_handler(). If this happens the existing
- * request must be canceled before issuing a new one.
- * Since canceling a request is a bit slow and this
- * oddball path is rare, keep track once a request has
- * been issued. The track turns out to be a permanent
- * state since this is the only cancel as it is
- * immediately before rdma_resolve_ip().
- */
- if (id_priv->used_resolve_ip)
- rdma_addr_cancel(&id->route.addr.dev_addr);
- else
- id_priv->used_resolve_ip = 1;
- ret = rdma_resolve_ip(cma_src_addr(id_priv), dst_addr,
- &id->route.addr.dev_addr,
- timeout_ms, addr_handler,
- false, id_priv);
- }
- }
- if (ret)
- goto err;
-
- return 0;
-err:
- cma_comp_exch(id_priv, RDMA_CM_ADDR_QUERY, RDMA_CM_ADDR_BOUND);
- return ret;
-}
-EXPORT_SYMBOL(rdma_resolve_addr);
-
int rdma_set_reuseaddr(struct rdma_cm_id *id, int reuse)
{
struct rdma_id_private *id_priv;
@@ -4064,27 +3949,26 @@ int rdma_listen(struct rdma_cm_id *id, int backlog)
}
EXPORT_SYMBOL(rdma_listen);

-int rdma_bind_addr(struct rdma_cm_id *id, struct sockaddr *addr)
+static int rdma_bind_addr_dst(struct rdma_id_private *id_priv,
+ struct sockaddr *addr, const struct sockaddr *daddr)
{
- struct rdma_id_private *id_priv;
+ struct sockaddr *id_daddr;
int ret;
- struct sockaddr *daddr;

if (addr->sa_family != AF_INET && addr->sa_family != AF_INET6 &&
addr->sa_family != AF_IB)
return -EAFNOSUPPORT;

- id_priv = container_of(id, struct rdma_id_private, id);
if (!cma_comp_exch(id_priv, RDMA_CM_IDLE, RDMA_CM_ADDR_BOUND))
return -EINVAL;

- ret = cma_check_linklocal(&id->route.addr.dev_addr, addr);
+ ret = cma_check_linklocal(&id_priv->id.route.addr.dev_addr, addr);
if (ret)
goto err1;

memcpy(cma_src_addr(id_priv), addr, rdma_addr_size(addr));
if (!cma_any_addr(addr)) {
- ret = cma_translate_addr(addr, &id->route.addr.dev_addr);
+ ret = cma_translate_addr(addr, &id_priv->id.route.addr.dev_addr);
if (ret)
goto err1;

@@ -4104,8 +3988,10 @@ int rdma_bind_addr(struct rdma_cm_id *id, struct sockaddr *addr)
}
#endif
}
- daddr = cma_dst_addr(id_priv);
- daddr->sa_family = addr->sa_family;
+ id_daddr = cma_dst_addr(id_priv);
+ if (daddr != id_daddr)
+ memcpy(id_daddr, daddr, rdma_addr_size(addr));
+ id_daddr->sa_family = addr->sa_family;

ret = cma_get_port(id_priv);
if (ret)
@@ -4121,6 +4007,129 @@ int rdma_bind_addr(struct rdma_cm_id *id, struct sockaddr *addr)
cma_comp_exch(id_priv, RDMA_CM_ADDR_BOUND, RDMA_CM_IDLE);
return ret;
}
+
+static int cma_bind_addr(struct rdma_cm_id *id, struct sockaddr *src_addr,
+ const struct sockaddr *dst_addr)
+{
+ struct rdma_id_private *id_priv =
+ container_of(id, struct rdma_id_private, id);
+ struct sockaddr_storage zero_sock = {};
+
+ if (src_addr && src_addr->sa_family)
+ return rdma_bind_addr_dst(id_priv, src_addr, dst_addr);
+
+ /*
+ * When the src_addr is not specified, automatically supply an any addr
+ */
+ zero_sock.ss_family = dst_addr->sa_family;
+ if (IS_ENABLED(CONFIG_IPV6) && dst_addr->sa_family == AF_INET6) {
+ struct sockaddr_in6 *src_addr6 =
+ (struct sockaddr_in6 *)&zero_sock;
+ struct sockaddr_in6 *dst_addr6 =
+ (struct sockaddr_in6 *)dst_addr;
+
+ src_addr6->sin6_scope_id = dst_addr6->sin6_scope_id;
+ if (ipv6_addr_type(&dst_addr6->sin6_addr) & IPV6_ADDR_LINKLOCAL)
+ id->route.addr.dev_addr.bound_dev_if =
+ dst_addr6->sin6_scope_id;
+ } else if (dst_addr->sa_family == AF_IB) {
+ ((struct sockaddr_ib *)&zero_sock)->sib_pkey =
+ ((struct sockaddr_ib *)dst_addr)->sib_pkey;
+ }
+ return rdma_bind_addr_dst(id_priv, (struct sockaddr *)&zero_sock, dst_addr);
+}
+
+/*
+ * If required, resolve the source address for bind and leave the id_priv in
+ * state RDMA_CM_ADDR_BOUND. This oddly uses the state to determine the prior
+ * calls made by ULP, a previously bound ID will not be re-bound and src_addr is
+ * ignored.
+ */
+static int resolve_prepare_src(struct rdma_id_private *id_priv,
+ struct sockaddr *src_addr,
+ const struct sockaddr *dst_addr)
+{
+ int ret;
+
+ if (!cma_comp_exch(id_priv, RDMA_CM_ADDR_BOUND, RDMA_CM_ADDR_QUERY)) {
+ /* For a well behaved ULP state will be RDMA_CM_IDLE */
+ ret = cma_bind_addr(&id_priv->id, src_addr, dst_addr);
+ if (ret)
+ return ret;
+ if (WARN_ON(!cma_comp_exch(id_priv, RDMA_CM_ADDR_BOUND,
+ RDMA_CM_ADDR_QUERY)))
+ return -EINVAL;
+
+ } else {
+ memcpy(cma_dst_addr(id_priv), dst_addr, rdma_addr_size(dst_addr));
+ }
+
+ if (cma_family(id_priv) != dst_addr->sa_family) {
+ ret = -EINVAL;
+ goto err_state;
+ }
+ return 0;
+
+err_state:
+ cma_comp_exch(id_priv, RDMA_CM_ADDR_QUERY, RDMA_CM_ADDR_BOUND);
+ return ret;
+}
+
+int rdma_resolve_addr(struct rdma_cm_id *id, struct sockaddr *src_addr,
+ const struct sockaddr *dst_addr, unsigned long timeout_ms)
+{
+ struct rdma_id_private *id_priv =
+ container_of(id, struct rdma_id_private, id);
+ int ret;
+
+ ret = resolve_prepare_src(id_priv, src_addr, dst_addr);
+ if (ret)
+ return ret;
+
+ if (cma_any_addr(dst_addr)) {
+ ret = cma_resolve_loopback(id_priv);
+ } else {
+ if (dst_addr->sa_family == AF_IB) {
+ ret = cma_resolve_ib_addr(id_priv);
+ } else {
+ /*
+ * The FSM can return back to RDMA_CM_ADDR_BOUND after
+ * rdma_resolve_ip() is called, eg through the error
+ * path in addr_handler(). If this happens the existing
+ * request must be canceled before issuing a new one.
+ * Since canceling a request is a bit slow and this
+ * oddball path is rare, keep track once a request has
+ * been issued. The track turns out to be a permanent
+ * state since this is the only cancel as it is
+ * immediately before rdma_resolve_ip().
+ */
+ if (id_priv->used_resolve_ip)
+ rdma_addr_cancel(&id->route.addr.dev_addr);
+ else
+ id_priv->used_resolve_ip = 1;
+ ret = rdma_resolve_ip(cma_src_addr(id_priv), dst_addr,
+ &id->route.addr.dev_addr,
+ timeout_ms, addr_handler,
+ false, id_priv);
+ }
+ }
+ if (ret)
+ goto err;
+
+ return 0;
+err:
+ cma_comp_exch(id_priv, RDMA_CM_ADDR_QUERY, RDMA_CM_ADDR_BOUND);
+ return ret;
+}
+EXPORT_SYMBOL(rdma_resolve_addr);
+
+int rdma_bind_addr(struct rdma_cm_id *id, struct sockaddr *addr)
+{
+ struct rdma_id_private *id_priv =
+ container_of(id, struct rdma_id_private, id);
+
+ return rdma_bind_addr_dst(id_priv, addr, cma_dst_addr(id_priv));
+}
EXPORT_SYMBOL(rdma_bind_addr);

static int cma_format_hdr(void *hdr, struct rdma_id_private *id_priv)
diff --git a/drivers/infiniband/core/cma_trace.h b/drivers/infiniband/core/cma_trace.h
index e45264267bcc..47f3c6e4be89 100644
--- a/drivers/infiniband/core/cma_trace.h
+++ b/drivers/infiniband/core/cma_trace.h
@@ -15,7 +15,7 @@
#define _TRACE_RDMA_CMA_H

#include <linux/tracepoint.h>
-#include <trace/events/rdma.h>
+#include <trace/misc/rdma.h>


DECLARE_EVENT_CLASS(cma_fsm_class,
diff --git a/drivers/infiniband/core/user_mad.c b/drivers/infiniband/core/user_mad.c
index d96c78e436f9..5c284dfbe692 100644
--- a/drivers/infiniband/core/user_mad.c
+++ b/drivers/infiniband/core/user_mad.c
@@ -131,6 +131,11 @@ struct ib_umad_packet {
struct ib_user_mad mad;
};

+struct ib_rmpp_mad_hdr {
+ struct ib_mad_hdr mad_hdr;
+ struct ib_rmpp_hdr rmpp_hdr;
+} __packed;
+
#define CREATE_TRACE_POINTS
#include <trace/events/ib_umad.h>

@@ -494,11 +499,11 @@ static ssize_t ib_umad_write(struct file *filp, const char __user *buf,
size_t count, loff_t *pos)
{
struct ib_umad_file *file = filp->private_data;
+ struct ib_rmpp_mad_hdr *rmpp_mad_hdr;
struct ib_umad_packet *packet;
struct ib_mad_agent *agent;
struct rdma_ah_attr ah_attr;
struct ib_ah *ah;
- struct ib_rmpp_mad *rmpp_mad;
__be64 *tid;
int ret, data_len, hdr_len, copy_offset, rmpp_active;
u8 base_version;
@@ -506,7 +511,7 @@ static ssize_t ib_umad_write(struct file *filp, const char __user *buf,
if (count < hdr_size(file) + IB_MGMT_RMPP_HDR)
return -EINVAL;

- packet = kzalloc(sizeof *packet + IB_MGMT_RMPP_HDR, GFP_KERNEL);
+ packet = kzalloc(sizeof(*packet) + IB_MGMT_RMPP_HDR, GFP_KERNEL);
if (!packet)
return -ENOMEM;

@@ -560,13 +565,13 @@ static ssize_t ib_umad_write(struct file *filp, const char __user *buf,
goto err_up;
}

- rmpp_mad = (struct ib_rmpp_mad *) packet->mad.data;
- hdr_len = ib_get_mad_data_offset(rmpp_mad->mad_hdr.mgmt_class);
+ rmpp_mad_hdr = (struct ib_rmpp_mad_hdr *)packet->mad.data;
+ hdr_len = ib_get_mad_data_offset(rmpp_mad_hdr->mad_hdr.mgmt_class);

- if (ib_is_mad_class_rmpp(rmpp_mad->mad_hdr.mgmt_class)
+ if (ib_is_mad_class_rmpp(rmpp_mad_hdr->mad_hdr.mgmt_class)
&& ib_mad_kernel_rmpp_agent(agent)) {
copy_offset = IB_MGMT_RMPP_HDR;
- rmpp_active = ib_get_rmpp_flags(&rmpp_mad->rmpp_hdr) &
+ rmpp_active = ib_get_rmpp_flags(&rmpp_mad_hdr->rmpp_hdr) &
IB_MGMT_RMPP_FLAG_ACTIVE;
} else {
copy_offset = IB_MGMT_MAD_HDR;
@@ -615,12 +620,12 @@ static ssize_t ib_umad_write(struct file *filp, const char __user *buf,
tid = &((struct ib_mad_hdr *) packet->msg->mad)->tid;
*tid = cpu_to_be64(((u64) agent->hi_tid) << 32 |
(be64_to_cpup(tid) & 0xffffffff));
- rmpp_mad->mad_hdr.tid = *tid;
+ rmpp_mad_hdr->mad_hdr.tid = *tid;
}

if (!ib_mad_kernel_rmpp_agent(agent)
- && ib_is_mad_class_rmpp(rmpp_mad->mad_hdr.mgmt_class)
- && (ib_get_rmpp_flags(&rmpp_mad->rmpp_hdr) & IB_MGMT_RMPP_FLAG_ACTIVE)) {
+ && ib_is_mad_class_rmpp(rmpp_mad_hdr->mad_hdr.mgmt_class)
+ && (ib_get_rmpp_flags(&rmpp_mad_hdr->rmpp_hdr) & IB_MGMT_RMPP_FLAG_ACTIVE)) {
spin_lock_irq(&file->send_lock);
list_add_tail(&packet->list, &file->send_list);
spin_unlock_irq(&file->send_lock);
diff --git a/drivers/input/joystick/xpad.c b/drivers/input/joystick/xpad.c
index 02f3bc4e4895..13c36f51b935 100644
--- a/drivers/input/joystick/xpad.c
+++ b/drivers/input/joystick/xpad.c
@@ -564,6 +564,9 @@ struct xboxone_init_packet {
#define GIP_MOTOR_LT BIT(3)
#define GIP_MOTOR_ALL (GIP_MOTOR_R | GIP_MOTOR_L | GIP_MOTOR_RT | GIP_MOTOR_LT)

+#define GIP_WIRED_INTF_DATA 0
+#define GIP_WIRED_INTF_AUDIO 1
+
/*
* This packet is required for all Xbox One pads with 2015
* or later firmware installed (or present from the factory).
@@ -2008,7 +2011,7 @@ static int xpad_probe(struct usb_interface *intf, const struct usb_device_id *id
}

if (xpad->xtype == XTYPE_XBOXONE &&
- intf->cur_altsetting->desc.bInterfaceNumber != 0) {
+ intf->cur_altsetting->desc.bInterfaceNumber != GIP_WIRED_INTF_DATA) {
/*
* The Xbox One controller lists three interfaces all with the
* same interface class, subclass and protocol. Differentiate by
diff --git a/drivers/interconnect/core.c b/drivers/interconnect/core.c
index 1d9494f64a21..4526ff2e1bd5 100644
--- a/drivers/interconnect/core.c
+++ b/drivers/interconnect/core.c
@@ -29,7 +29,6 @@ static LIST_HEAD(icc_providers);
static int providers_count;
static bool synced_state;
static DEFINE_MUTEX(icc_lock);
-static DEFINE_MUTEX(icc_bw_lock);
static struct dentry *icc_debugfs_dir;

static void icc_summary_show_one(struct seq_file *s, struct icc_node *n)
@@ -636,7 +635,7 @@ int icc_set_bw(struct icc_path *path, u32 avg_bw, u32 peak_bw)
if (WARN_ON(IS_ERR(path) || !path->num_nodes))
return -EINVAL;

- mutex_lock(&icc_bw_lock);
+ mutex_lock(&icc_lock);

old_avg = path->reqs[0].avg_bw;
old_peak = path->reqs[0].peak_bw;
@@ -668,7 +667,7 @@ int icc_set_bw(struct icc_path *path, u32 avg_bw, u32 peak_bw)
apply_constraints(path);
}

- mutex_unlock(&icc_bw_lock);
+ mutex_unlock(&icc_lock);

trace_icc_set_bw_end(path, ret);

@@ -971,7 +970,6 @@ void icc_node_add(struct icc_node *node, struct icc_provider *provider)
return;

mutex_lock(&icc_lock);
- mutex_lock(&icc_bw_lock);

node->provider = provider;
list_add_tail(&node->node_list, &provider->nodes);
@@ -997,7 +995,6 @@ void icc_node_add(struct icc_node *node, struct icc_provider *provider)
node->avg_bw = 0;
node->peak_bw = 0;

- mutex_unlock(&icc_bw_lock);
mutex_unlock(&icc_lock);
}
EXPORT_SYMBOL_GPL(icc_node_add);
@@ -1137,7 +1134,6 @@ void icc_sync_state(struct device *dev)
return;

mutex_lock(&icc_lock);
- mutex_lock(&icc_bw_lock);
synced_state = true;
list_for_each_entry(p, &icc_providers, provider_list) {
dev_dbg(p->dev, "interconnect provider is in synced state\n");
@@ -1150,21 +1146,13 @@ void icc_sync_state(struct device *dev)
}
}
}
- mutex_unlock(&icc_bw_lock);
mutex_unlock(&icc_lock);
}
EXPORT_SYMBOL_GPL(icc_sync_state);

static int __init icc_init(void)
{
- struct device_node *root;
-
- /* Teach lockdep about lock ordering wrt. shrinker: */
- fs_reclaim_acquire(GFP_KERNEL);
- might_lock(&icc_bw_lock);
- fs_reclaim_release(GFP_KERNEL);
-
- root = of_find_node_by_path("/");
+ struct device_node *root = of_find_node_by_path("/");

providers_count = of_count_icc_providers(root);
of_node_put(root);
diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
index 8966f7d5aab6..82f100e591b5 100644
--- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
+++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
@@ -152,6 +152,18 @@ static void queue_inc_cons(struct arm_smmu_ll_queue *q)
q->cons = Q_OVF(q->cons) | Q_WRP(q, cons) | Q_IDX(q, cons);
}

+static void queue_sync_cons_ovf(struct arm_smmu_queue *q)
+{
+ struct arm_smmu_ll_queue *llq = &q->llq;
+
+ if (likely(Q_OVF(llq->prod) == Q_OVF(llq->cons)))
+ return;
+
+ llq->cons = Q_OVF(llq->prod) | Q_WRP(llq, llq->cons) |
+ Q_IDX(llq, llq->cons);
+ queue_sync_cons_out(q);
+}
+
static int queue_sync_prod_in(struct arm_smmu_queue *q)
{
u32 prod;
@@ -1583,8 +1595,7 @@ static irqreturn_t arm_smmu_evtq_thread(int irq, void *dev)
} while (!queue_empty(llq));

/* Sync our overflow flag, as we believe we're up to speed */
- llq->cons = Q_OVF(llq->prod) | Q_WRP(llq, llq->cons) |
- Q_IDX(llq, llq->cons);
+ queue_sync_cons_ovf(q);
return IRQ_HANDLED;
}

@@ -1642,9 +1653,7 @@ static irqreturn_t arm_smmu_priq_thread(int irq, void *dev)
} while (!queue_empty(llq));

/* Sync our overflow flag, as we believe we're up to speed */
- llq->cons = Q_OVF(llq->prod) | Q_WRP(llq, llq->cons) |
- Q_IDX(llq, llq->cons);
- queue_sync_cons_out(q);
+ queue_sync_cons_ovf(q);
return IRQ_HANDLED;
}

diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c
index d80065c8105a..f15dcb9e4175 100644
--- a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c
+++ b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c
@@ -267,12 +267,26 @@ static int qcom_smmu_init_context(struct arm_smmu_domain *smmu_domain,

static int qcom_smmu_cfg_probe(struct arm_smmu_device *smmu)
{
- unsigned int last_s2cr = ARM_SMMU_GR0_S2CR(smmu->num_mapping_groups - 1);
struct qcom_smmu *qsmmu = to_qcom_smmu(smmu);
+ unsigned int last_s2cr;
u32 reg;
u32 smr;
int i;

+ /*
+ * Some platforms support more than the Arm SMMU architected maximum of
+ * 128 stream matching groups. For unknown reasons, the additional
+ * groups don't exhibit the same behavior as the architected registers,
+ * so limit the groups to 128 until the behavior is fixed for the other
+ * groups.
+ */
+ if (smmu->num_mapping_groups > 128) {
+ dev_notice(smmu->dev, "\tLimiting the stream matching groups to 128\n");
+ smmu->num_mapping_groups = 128;
+ }
+
+ last_s2cr = ARM_SMMU_GR0_S2CR(smmu->num_mapping_groups - 1);
+
/*
* With some firmware versions writes to S2CR of type FAULT are
* ignored, and writing BYPASS will end up written as FAULT in the
diff --git a/drivers/iommu/sprd-iommu.c b/drivers/iommu/sprd-iommu.c
index 8261066de07d..e4358393fe37 100644
--- a/drivers/iommu/sprd-iommu.c
+++ b/drivers/iommu/sprd-iommu.c
@@ -152,13 +152,6 @@ static struct iommu_domain *sprd_iommu_domain_alloc(unsigned int domain_type)
return &dom->domain;
}

-static void sprd_iommu_domain_free(struct iommu_domain *domain)
-{
- struct sprd_iommu_domain *dom = to_sprd_domain(domain);
-
- kfree(dom);
-}
-
static void sprd_iommu_first_vpn(struct sprd_iommu_domain *dom)
{
struct sprd_iommu_device *sdev = dom->sdev;
@@ -231,6 +224,28 @@ static void sprd_iommu_hw_en(struct sprd_iommu_device *sdev, bool en)
sprd_iommu_update_bits(sdev, reg_cfg, mask, 0, val);
}

+static void sprd_iommu_cleanup(struct sprd_iommu_domain *dom)
+{
+ size_t pgt_size;
+
+ /* Nothing need to do if the domain hasn't been attached */
+ if (!dom->sdev)
+ return;
+
+ pgt_size = sprd_iommu_pgt_size(&dom->domain);
+ dma_free_coherent(dom->sdev->dev, pgt_size, dom->pgt_va, dom->pgt_pa);
+ dom->sdev = NULL;
+ sprd_iommu_hw_en(dom->sdev, false);
+}
+
+static void sprd_iommu_domain_free(struct iommu_domain *domain)
+{
+ struct sprd_iommu_domain *dom = to_sprd_domain(domain);
+
+ sprd_iommu_cleanup(dom);
+ kfree(dom);
+}
+
static int sprd_iommu_attach_device(struct iommu_domain *domain,
struct device *dev)
{
diff --git a/drivers/mmc/core/mmc.c b/drivers/mmc/core/mmc.c
index a46ce0868fe1..3a927452a650 100644
--- a/drivers/mmc/core/mmc.c
+++ b/drivers/mmc/core/mmc.c
@@ -1007,10 +1007,12 @@ static int mmc_select_bus_width(struct mmc_card *card)
static unsigned ext_csd_bits[] = {
EXT_CSD_BUS_WIDTH_8,
EXT_CSD_BUS_WIDTH_4,
+ EXT_CSD_BUS_WIDTH_1,
};
static unsigned bus_widths[] = {
MMC_BUS_WIDTH_8,
MMC_BUS_WIDTH_4,
+ MMC_BUS_WIDTH_1,
};
struct mmc_host *host = card->host;
unsigned idx, bus_width = 0;
diff --git a/drivers/mmc/host/mmci_stm32_sdmmc.c b/drivers/mmc/host/mmci_stm32_sdmmc.c
index 60bca78a72b1..0511583ffa76 100644
--- a/drivers/mmc/host/mmci_stm32_sdmmc.c
+++ b/drivers/mmc/host/mmci_stm32_sdmmc.c
@@ -200,6 +200,8 @@ static int sdmmc_idma_start(struct mmci_host *host, unsigned int *datactrl)
struct scatterlist *sg;
int i;

+ host->dma_in_progress = true;
+
if (!host->variant->dma_lli || data->sg_len == 1 ||
idma->use_bounce_buffer) {
u32 dma_addr;
@@ -238,9 +240,30 @@ static int sdmmc_idma_start(struct mmci_host *host, unsigned int *datactrl)
return 0;
}

+static void sdmmc_idma_error(struct mmci_host *host)
+{
+ struct mmc_data *data = host->data;
+ struct sdmmc_idma *idma = host->dma_priv;
+
+ if (!dma_inprogress(host))
+ return;
+
+ writel_relaxed(0, host->base + MMCI_STM32_IDMACTRLR);
+ host->dma_in_progress = false;
+ data->host_cookie = 0;
+
+ if (!idma->use_bounce_buffer)
+ dma_unmap_sg(mmc_dev(host->mmc), data->sg, data->sg_len,
+ mmc_get_dma_dir(data));
+}
+
static void sdmmc_idma_finalize(struct mmci_host *host, struct mmc_data *data)
{
+ if (!dma_inprogress(host))
+ return;
+
writel_relaxed(0, host->base + MMCI_STM32_IDMACTRLR);
+ host->dma_in_progress = false;

if (!data->host_cookie)
sdmmc_idma_unprep_data(host, data, 0);
@@ -567,6 +590,7 @@ static struct mmci_host_ops sdmmc_variant_ops = {
.dma_setup = sdmmc_idma_setup,
.dma_start = sdmmc_idma_start,
.dma_finalize = sdmmc_idma_finalize,
+ .dma_error = sdmmc_idma_error,
.set_clkreg = mmci_sdmmc_set_clkreg,
.set_pwrreg = mmci_sdmmc_set_pwrreg,
.busy_complete = sdmmc_busy_complete,
diff --git a/drivers/mmc/host/sdhci-xenon-phy.c b/drivers/mmc/host/sdhci-xenon-phy.c
index 8cf3a375de65..cc9d28b75eb9 100644
--- a/drivers/mmc/host/sdhci-xenon-phy.c
+++ b/drivers/mmc/host/sdhci-xenon-phy.c
@@ -11,6 +11,7 @@
#include <linux/slab.h>
#include <linux/delay.h>
#include <linux/ktime.h>
+#include <linux/iopoll.h>
#include <linux/of_address.h>

#include "sdhci-pltfm.h"
@@ -109,6 +110,8 @@
#define XENON_EMMC_PHY_LOGIC_TIMING_ADJUST (XENON_EMMC_PHY_REG_BASE + 0x18)
#define XENON_LOGIC_TIMING_VALUE 0x00AA8977

+#define XENON_MAX_PHY_TIMEOUT_LOOPS 100
+
/*
* List offset of PHY registers and some special register values
* in eMMC PHY 5.0 or eMMC PHY 5.1
@@ -216,6 +219,19 @@ static int xenon_alloc_emmc_phy(struct sdhci_host *host)
return 0;
}

+static int xenon_check_stability_internal_clk(struct sdhci_host *host)
+{
+ u32 reg;
+ int err;
+
+ err = read_poll_timeout(sdhci_readw, reg, reg & SDHCI_CLOCK_INT_STABLE,
+ 1100, 20000, false, host, SDHCI_CLOCK_CONTROL);
+ if (err)
+ dev_err(mmc_dev(host->mmc), "phy_init: Internal clock never stabilized.\n");
+
+ return err;
+}
+
/*
* eMMC 5.0/5.1 PHY init/re-init.
* eMMC PHY init should be executed after:
@@ -232,6 +248,11 @@ static int xenon_emmc_phy_init(struct sdhci_host *host)
struct xenon_priv *priv = sdhci_pltfm_priv(pltfm_host);
struct xenon_emmc_phy_regs *phy_regs = priv->emmc_phy_regs;

+ int ret = xenon_check_stability_internal_clk(host);
+
+ if (ret)
+ return ret;
+
reg = sdhci_readl(host, phy_regs->timing_adj);
reg |= XENON_PHY_INITIALIZAION;
sdhci_writel(host, reg, phy_regs->timing_adj);
@@ -259,18 +280,27 @@ static int xenon_emmc_phy_init(struct sdhci_host *host)
/* get the wait time */
wait /= clock;
wait++;
- /* wait for host eMMC PHY init completes */
- udelay(wait);

- reg = sdhci_readl(host, phy_regs->timing_adj);
- reg &= XENON_PHY_INITIALIZAION;
- if (reg) {
+ /*
+ * AC5X spec says bit must be polled until zero.
+ * We see cases in which timeout can take longer
+ * than the standard calculation on AC5X, which is
+ * expected following the spec comment above.
+ * According to the spec, we must wait as long as
+ * it takes for that bit to toggle on AC5X.
+ * Cap that with 100 delay loops so we won't get
+ * stuck here forever:
+ */
+
+ ret = read_poll_timeout(sdhci_readl, reg,
+ !(reg & XENON_PHY_INITIALIZAION),
+ wait, XENON_MAX_PHY_TIMEOUT_LOOPS * wait,
+ false, host, phy_regs->timing_adj);
+ if (ret)
dev_err(mmc_dev(host->mmc), "eMMC PHY init cannot complete after %d us\n",
- wait);
- return -ETIMEDOUT;
- }
+ wait * XENON_MAX_PHY_TIMEOUT_LOOPS);

- return 0;
+ return ret;
}

#define ARMADA_3700_SOC_PAD_1_8V 0x1
diff --git a/drivers/mtd/nand/spi/gigadevice.c b/drivers/mtd/nand/spi/gigadevice.c
index 6b043e24855f..9116ee7f023e 100644
--- a/drivers/mtd/nand/spi/gigadevice.c
+++ b/drivers/mtd/nand/spi/gigadevice.c
@@ -186,7 +186,7 @@ static int gd5fxgq4uexxg_ecc_get_status(struct spinand_device *spinand,
{
u8 status2;
struct spi_mem_op op = SPINAND_GET_FEATURE_OP(GD5FXGQXXEXXG_REG_STATUS2,
- &status2);
+ spinand->scratchbuf);
int ret;

switch (status & STATUS_ECC_MASK) {
@@ -207,6 +207,7 @@ static int gd5fxgq4uexxg_ecc_get_status(struct spinand_device *spinand,
* report the maximum of 4 in this case
*/
/* bits sorted this way (3...0): ECCS1,ECCS0,ECCSE1,ECCSE0 */
+ status2 = *(spinand->scratchbuf);
return ((status & STATUS_ECC_MASK) >> 2) |
((status2 & STATUS_ECC_MASK) >> 4);

@@ -228,7 +229,7 @@ static int gd5fxgq5xexxg_ecc_get_status(struct spinand_device *spinand,
{
u8 status2;
struct spi_mem_op op = SPINAND_GET_FEATURE_OP(GD5FXGQXXEXXG_REG_STATUS2,
- &status2);
+ spinand->scratchbuf);
int ret;

switch (status & STATUS_ECC_MASK) {
@@ -248,6 +249,7 @@ static int gd5fxgq5xexxg_ecc_get_status(struct spinand_device *spinand,
* 1 ... 4 bits are flipped (and corrected)
*/
/* bits sorted this way (1...0): ECCSE1, ECCSE0 */
+ status2 = *(spinand->scratchbuf);
return ((status2 & STATUS_ECC_MASK) >> 4) + 1;

case STATUS_ECC_UNCOR_ERROR:
diff --git a/drivers/net/ethernet/Kconfig b/drivers/net/ethernet/Kconfig
index 1917da784191..5a274b99f299 100644
--- a/drivers/net/ethernet/Kconfig
+++ b/drivers/net/ethernet/Kconfig
@@ -84,7 +84,6 @@ source "drivers/net/ethernet/huawei/Kconfig"
source "drivers/net/ethernet/i825xx/Kconfig"
source "drivers/net/ethernet/ibm/Kconfig"
source "drivers/net/ethernet/intel/Kconfig"
-source "drivers/net/ethernet/wangxun/Kconfig"
source "drivers/net/ethernet/xscale/Kconfig"

config JME
@@ -189,6 +188,7 @@ source "drivers/net/ethernet/toshiba/Kconfig"
source "drivers/net/ethernet/tundra/Kconfig"
source "drivers/net/ethernet/vertexcom/Kconfig"
source "drivers/net/ethernet/via/Kconfig"
+source "drivers/net/ethernet/wangxun/Kconfig"
source "drivers/net/ethernet/wiznet/Kconfig"
source "drivers/net/ethernet/xilinx/Kconfig"
source "drivers/net/ethernet/xircom/Kconfig"
diff --git a/drivers/net/ethernet/intel/igb/igb_ptp.c b/drivers/net/ethernet/intel/igb/igb_ptp.c
index 07171e574e7d..36e62197fba0 100644
--- a/drivers/net/ethernet/intel/igb/igb_ptp.c
+++ b/drivers/net/ethernet/intel/igb/igb_ptp.c
@@ -976,7 +976,7 @@ static void igb_ptp_tx_hwtstamp(struct igb_adapter *adapter)

igb_ptp_systim_to_hwtstamp(adapter, &shhwtstamps, regval);
/* adjust timestamp for the TX latency based on link speed */
- if (adapter->hw.mac.type == e1000_i210) {
+ if (hw->mac.type == e1000_i210 || hw->mac.type == e1000_i211) {
switch (adapter->link_speed) {
case SPEED_10:
adjust = IGB_I210_TX_LATENCY_10;
@@ -1022,6 +1022,7 @@ int igb_ptp_rx_pktstamp(struct igb_q_vector *q_vector, void *va,
ktime_t *timestamp)
{
struct igb_adapter *adapter = q_vector->adapter;
+ struct e1000_hw *hw = &adapter->hw;
struct skb_shared_hwtstamps ts;
__le64 *regval = (__le64 *)va;
int adjust = 0;
@@ -1041,7 +1042,7 @@ int igb_ptp_rx_pktstamp(struct igb_q_vector *q_vector, void *va,
igb_ptp_systim_to_hwtstamp(adapter, &ts, le64_to_cpu(regval[1]));

/* adjust timestamp for the RX latency based on link speed */
- if (adapter->hw.mac.type == e1000_i210) {
+ if (hw->mac.type == e1000_i210 || hw->mac.type == e1000_i211) {
switch (adapter->link_speed) {
case SPEED_10:
adjust = IGB_I210_RX_LATENCY_10;
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.c
index dc2e204bcd72..41eac7dfb67e 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.c
@@ -52,8 +52,10 @@ int mlxsw_sp_acl_tcam_init(struct mlxsw_sp *mlxsw_sp,
max_regions = max_tcam_regions;

tcam->used_regions = bitmap_zalloc(max_regions, GFP_KERNEL);
- if (!tcam->used_regions)
- return -ENOMEM;
+ if (!tcam->used_regions) {
+ err = -ENOMEM;
+ goto err_alloc_used_regions;
+ }
tcam->max_regions = max_regions;

max_groups = MLXSW_CORE_RES_GET(mlxsw_sp->core, ACL_MAX_GROUPS);
@@ -78,6 +80,8 @@ int mlxsw_sp_acl_tcam_init(struct mlxsw_sp *mlxsw_sp,
bitmap_free(tcam->used_groups);
err_alloc_used_groups:
bitmap_free(tcam->used_regions);
+err_alloc_used_regions:
+ mutex_destroy(&tcam->lock);
return err;
}

@@ -86,10 +90,10 @@ void mlxsw_sp_acl_tcam_fini(struct mlxsw_sp *mlxsw_sp,
{
const struct mlxsw_sp_acl_tcam_ops *ops = mlxsw_sp->acl_tcam_ops;

- mutex_destroy(&tcam->lock);
ops->fini(mlxsw_sp, tcam->priv);
bitmap_free(tcam->used_groups);
bitmap_free(tcam->used_regions);
+ mutex_destroy(&tcam->lock);
}

int mlxsw_sp_acl_tcam_priority_get(struct mlxsw_sp *mlxsw_sp,
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
index 91b2aa81914b..e2d51014ab4b 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
@@ -3900,8 +3900,10 @@ static void stmmac_fpe_stop_wq(struct stmmac_priv *priv)
{
set_bit(__FPE_REMOVING, &priv->fpe_task_state);

- if (priv->fpe_wq)
+ if (priv->fpe_wq) {
destroy_workqueue(priv->fpe_wq);
+ priv->fpe_wq = NULL;
+ }

netdev_info(priv->dev, "FPE workqueue stop");
}
diff --git a/drivers/net/gtp.c b/drivers/net/gtp.c
index 937dd9cf4fba..7086acfed5b9 100644
--- a/drivers/net/gtp.c
+++ b/drivers/net/gtp.c
@@ -1902,26 +1902,26 @@ static int __init gtp_init(void)

get_random_bytes(&gtp_h_initval, sizeof(gtp_h_initval));

- err = rtnl_link_register(&gtp_link_ops);
+ err = register_pernet_subsys(&gtp_net_ops);
if (err < 0)
goto error_out;

- err = register_pernet_subsys(&gtp_net_ops);
+ err = rtnl_link_register(&gtp_link_ops);
if (err < 0)
- goto unreg_rtnl_link;
+ goto unreg_pernet_subsys;

err = genl_register_family(&gtp_genl_family);
if (err < 0)
- goto unreg_pernet_subsys;
+ goto unreg_rtnl_link;

pr_info("GTP module loaded (pdp ctx size %zd bytes)\n",
sizeof(struct pdp_ctx));
return 0;

-unreg_pernet_subsys:
- unregister_pernet_subsys(&gtp_net_ops);
unreg_rtnl_link:
rtnl_link_unregister(&gtp_link_ops);
+unreg_pernet_subsys:
+ unregister_pernet_subsys(&gtp_net_ops);
error_out:
pr_err("error loading GTP module loaded\n");
return err;
diff --git a/drivers/net/tun.c b/drivers/net/tun.c
index 367255bb44cd..922d6f16d99d 100644
--- a/drivers/net/tun.c
+++ b/drivers/net/tun.c
@@ -653,6 +653,7 @@ static void __tun_detach(struct tun_file *tfile, bool clean)
tun->tfiles[tun->numqueues - 1]);
ntfile = rtnl_dereference(tun->tfiles[index]);
ntfile->queue_index = index;
+ ntfile->xdp_rxq.queue_index = index;
rcu_assign_pointer(tun->tfiles[tun->numqueues - 1],
NULL);

diff --git a/drivers/net/usb/dm9601.c b/drivers/net/usb/dm9601.c
index 99ec1d4a972d..8b6d6a1b3c2e 100644
--- a/drivers/net/usb/dm9601.c
+++ b/drivers/net/usb/dm9601.c
@@ -232,7 +232,7 @@ static int dm9601_mdio_read(struct net_device *netdev, int phy_id, int loc)
err = dm_read_shared_word(dev, 1, loc, &res);
if (err < 0) {
netdev_err(dev->net, "MDIO read error: %d\n", err);
- return err;
+ return 0;
}

netdev_dbg(dev->net,
diff --git a/drivers/net/usb/lan78xx.c b/drivers/net/usb/lan78xx.c
index c458c030fadf..4fd456381129 100644
--- a/drivers/net/usb/lan78xx.c
+++ b/drivers/net/usb/lan78xx.c
@@ -1501,7 +1501,9 @@ static int lan78xx_link_reset(struct lan78xx_net *dev)

lan78xx_rx_urb_submit_all(dev);

+ local_bh_disable();
napi_schedule(&dev->napi);
+ local_bh_enable();
}

return 0;
@@ -3035,7 +3037,8 @@ static int lan78xx_reset(struct lan78xx_net *dev)
if (dev->chipid == ID_REV_CHIP_ID_7801_)
buf &= ~MAC_CR_GMII_EN_;

- if (dev->chipid == ID_REV_CHIP_ID_7800_) {
+ if (dev->chipid == ID_REV_CHIP_ID_7800_ ||
+ dev->chipid == ID_REV_CHIP_ID_7850_) {
ret = lan78xx_read_raw_eeprom(dev, 0, 1, &sig);
if (!ret && sig != EEPROM_INDICATOR) {
/* Implies there is no external eeprom. Set mac speed */
diff --git a/drivers/net/veth.c b/drivers/net/veth.c
index 36c5a41f84e4..dd9f5f146192 100644
--- a/drivers/net/veth.c
+++ b/drivers/net/veth.c
@@ -1135,14 +1135,6 @@ static int veth_enable_xdp(struct net_device *dev)
veth_disable_xdp_range(dev, 0, dev->real_num_rx_queues, true);
return err;
}
-
- if (!veth_gro_requested(dev)) {
- /* user-space did not require GRO, but adding XDP
- * is supposed to get GRO working
- */
- dev->features |= NETIF_F_GRO;
- netdev_features_change(dev);
- }
}
}

@@ -1162,18 +1154,9 @@ static void veth_disable_xdp(struct net_device *dev)
for (i = 0; i < dev->real_num_rx_queues; i++)
rcu_assign_pointer(priv->rq[i].xdp_prog, NULL);

- if (!netif_running(dev) || !veth_gro_requested(dev)) {
+ if (!netif_running(dev) || !veth_gro_requested(dev))
veth_napi_del(dev);

- /* if user-space did not require GRO, since adding XDP
- * enabled it, clear it now
- */
- if (!veth_gro_requested(dev) && netif_running(dev)) {
- dev->features &= ~NETIF_F_GRO;
- netdev_features_change(dev);
- }
- }
-
veth_disable_xdp_range(dev, 0, dev->real_num_rx_queues, false);
}

@@ -1376,7 +1359,8 @@ static int veth_alloc_queues(struct net_device *dev)
struct veth_priv *priv = netdev_priv(dev);
int i;

- priv->rq = kcalloc(dev->num_rx_queues, sizeof(*priv->rq), GFP_KERNEL_ACCOUNT);
+ priv->rq = kvcalloc(dev->num_rx_queues, sizeof(*priv->rq),
+ GFP_KERNEL_ACCOUNT | __GFP_RETRY_MAYFAIL);
if (!priv->rq)
return -ENOMEM;

@@ -1392,7 +1376,7 @@ static void veth_free_queues(struct net_device *dev)
{
struct veth_priv *priv = netdev_priv(dev);

- kfree(priv->rq);
+ kvfree(priv->rq);
}

static int veth_dev_init(struct net_device *dev)
@@ -1558,6 +1542,14 @@ static int veth_xdp_set(struct net_device *dev, struct bpf_prog *prog,
}

if (!old_prog) {
+ if (!veth_gro_requested(dev)) {
+ /* user-space did not require GRO, but adding
+ * XDP is supposed to get GRO working
+ */
+ dev->features |= NETIF_F_GRO;
+ netdev_features_change(dev);
+ }
+
peer->hw_features &= ~NETIF_F_GSO_SOFTWARE;
peer->max_mtu = max_mtu;
}
@@ -1568,6 +1560,14 @@ static int veth_xdp_set(struct net_device *dev, struct bpf_prog *prog,
if (dev->flags & IFF_UP)
veth_disable_xdp(dev);

+ /* if user-space did not require GRO, since adding XDP
+ * enabled it, clear it now
+ */
+ if (!veth_gro_requested(dev)) {
+ dev->features &= ~NETIF_F_GRO;
+ netdev_features_change(dev);
+ }
+
if (peer) {
peer->hw_features |= NETIF_F_GSO_SOFTWARE;
peer->max_mtu = ETH_MAX_MTU;
diff --git a/drivers/of/overlay.c b/drivers/of/overlay.c
index 4402871b5c0c..e663d5585a05 100644
--- a/drivers/of/overlay.c
+++ b/drivers/of/overlay.c
@@ -45,8 +45,8 @@ struct target {

/**
* struct fragment - info about fragment nodes in overlay expanded device tree
- * @target: target of the overlay operation
* @overlay: pointer to the __overlay__ node
+ * @target: target of the overlay operation
*/
struct fragment {
struct device_node *overlay;
diff --git a/drivers/of/property.c b/drivers/of/property.c
index 33d5f16c8120..da5d71219770 100644
--- a/drivers/of/property.c
+++ b/drivers/of/property.c
@@ -1332,7 +1332,7 @@ static struct device_node *parse_remote_endpoint(struct device_node *np,
int index)
{
/* Return NULL for index > 0 to signify end of remote-endpoints. */
- if (!index || strcmp(prop_name, "remote-endpoint"))
+ if (index > 0 || strcmp(prop_name, "remote-endpoint"))
return NULL;

return of_graph_get_remote_port_parent(np);
diff --git a/drivers/pci/controller/dwc/pci-layerscape-ep.c b/drivers/pci/controller/dwc/pci-layerscape-ep.c
index ad99707b3b99..dd7d74fecc48 100644
--- a/drivers/pci/controller/dwc/pci-layerscape-ep.c
+++ b/drivers/pci/controller/dwc/pci-layerscape-ep.c
@@ -18,6 +18,20 @@

#include "pcie-designware.h"

+#define PEX_PF0_CONFIG 0xC0014
+#define PEX_PF0_CFG_READY BIT(0)
+
+/* PEX PFa PCIE PME and message interrupt registers*/
+#define PEX_PF0_PME_MES_DR 0xC0020
+#define PEX_PF0_PME_MES_DR_LUD BIT(7)
+#define PEX_PF0_PME_MES_DR_LDD BIT(9)
+#define PEX_PF0_PME_MES_DR_HRD BIT(10)
+
+#define PEX_PF0_PME_MES_IER 0xC0028
+#define PEX_PF0_PME_MES_IER_LUDIE BIT(7)
+#define PEX_PF0_PME_MES_IER_LDDIE BIT(9)
+#define PEX_PF0_PME_MES_IER_HRDIE BIT(10)
+
#define to_ls_pcie_ep(x) dev_get_drvdata((x)->dev)

struct ls_pcie_ep_drvdata {
@@ -30,8 +44,99 @@ struct ls_pcie_ep {
struct dw_pcie *pci;
struct pci_epc_features *ls_epc;
const struct ls_pcie_ep_drvdata *drvdata;
+ int irq;
+ u32 lnkcap;
+ bool big_endian;
};

+static u32 ls_lut_readl(struct ls_pcie_ep *pcie, u32 offset)
+{
+ struct dw_pcie *pci = pcie->pci;
+
+ if (pcie->big_endian)
+ return ioread32be(pci->dbi_base + offset);
+ else
+ return ioread32(pci->dbi_base + offset);
+}
+
+static void ls_lut_writel(struct ls_pcie_ep *pcie, u32 offset, u32 value)
+{
+ struct dw_pcie *pci = pcie->pci;
+
+ if (pcie->big_endian)
+ iowrite32be(value, pci->dbi_base + offset);
+ else
+ iowrite32(value, pci->dbi_base + offset);
+}
+
+static irqreturn_t ls_pcie_ep_event_handler(int irq, void *dev_id)
+{
+ struct ls_pcie_ep *pcie = dev_id;
+ struct dw_pcie *pci = pcie->pci;
+ u32 val, cfg;
+ u8 offset;
+
+ val = ls_lut_readl(pcie, PEX_PF0_PME_MES_DR);
+ ls_lut_writel(pcie, PEX_PF0_PME_MES_DR, val);
+
+ if (!val)
+ return IRQ_NONE;
+
+ if (val & PEX_PF0_PME_MES_DR_LUD) {
+
+ offset = dw_pcie_find_capability(pci, PCI_CAP_ID_EXP);
+
+ /*
+ * The values of the Maximum Link Width and Supported Link
+ * Speed from the Link Capabilities Register will be lost
+ * during link down or hot reset. Restore initial value
+ * that configured by the Reset Configuration Word (RCW).
+ */
+ dw_pcie_dbi_ro_wr_en(pci);
+ dw_pcie_writel_dbi(pci, offset + PCI_EXP_LNKCAP, pcie->lnkcap);
+ dw_pcie_dbi_ro_wr_dis(pci);
+
+ cfg = ls_lut_readl(pcie, PEX_PF0_CONFIG);
+ cfg |= PEX_PF0_CFG_READY;
+ ls_lut_writel(pcie, PEX_PF0_CONFIG, cfg);
+ dw_pcie_ep_linkup(&pci->ep);
+
+ dev_dbg(pci->dev, "Link up\n");
+ } else if (val & PEX_PF0_PME_MES_DR_LDD) {
+ dev_dbg(pci->dev, "Link down\n");
+ } else if (val & PEX_PF0_PME_MES_DR_HRD) {
+ dev_dbg(pci->dev, "Hot reset\n");
+ }
+
+ return IRQ_HANDLED;
+}
+
+static int ls_pcie_ep_interrupt_init(struct ls_pcie_ep *pcie,
+ struct platform_device *pdev)
+{
+ u32 val;
+ int ret;
+
+ pcie->irq = platform_get_irq_byname(pdev, "pme");
+ if (pcie->irq < 0)
+ return pcie->irq;
+
+ ret = devm_request_irq(&pdev->dev, pcie->irq, ls_pcie_ep_event_handler,
+ IRQF_SHARED, pdev->name, pcie);
+ if (ret) {
+ dev_err(&pdev->dev, "Can't register PCIe IRQ\n");
+ return ret;
+ }
+
+ /* Enable interrupts */
+ val = ls_lut_readl(pcie, PEX_PF0_PME_MES_IER);
+ val |= PEX_PF0_PME_MES_IER_LDDIE | PEX_PF0_PME_MES_IER_HRDIE |
+ PEX_PF0_PME_MES_IER_LUDIE;
+ ls_lut_writel(pcie, PEX_PF0_PME_MES_IER, val);
+
+ return 0;
+}
+
static const struct pci_epc_features*
ls_pcie_ep_get_features(struct dw_pcie_ep *ep)
{
@@ -124,6 +229,8 @@ static int __init ls_pcie_ep_probe(struct platform_device *pdev)
struct ls_pcie_ep *pcie;
struct pci_epc_features *ls_epc;
struct resource *dbi_base;
+ u8 offset;
+ int ret;

pcie = devm_kzalloc(dev, sizeof(*pcie), GFP_KERNEL);
if (!pcie)
@@ -143,6 +250,7 @@ static int __init ls_pcie_ep_probe(struct platform_device *pdev)
pci->ops = pcie->drvdata->dw_pcie_ops;

ls_epc->bar_fixed_64bit = (1 << BAR_2) | (1 << BAR_4);
+ ls_epc->linkup_notifier = true;

pcie->pci = pci;
pcie->ls_epc = ls_epc;
@@ -154,9 +262,18 @@ static int __init ls_pcie_ep_probe(struct platform_device *pdev)

pci->ep.ops = &ls_pcie_ep_ops;

+ pcie->big_endian = of_property_read_bool(dev->of_node, "big-endian");
+
platform_set_drvdata(pdev, pcie);

- return dw_pcie_ep_init(&pci->ep);
+ offset = dw_pcie_find_capability(pci, PCI_CAP_ID_EXP);
+ pcie->lnkcap = dw_pcie_readl_dbi(pci, offset + PCI_EXP_LNKCAP);
+
+ ret = dw_pcie_ep_init(&pci->ep);
+ if (ret)
+ return ret;
+
+ return ls_pcie_ep_interrupt_init(pcie, pdev);
}

static struct platform_driver ls_pcie_ep_driver = {
diff --git a/drivers/phy/freescale/phy-fsl-imx8-mipi-dphy.c b/drivers/phy/freescale/phy-fsl-imx8-mipi-dphy.c
index e625b32889bf..0928a526e2ab 100644
--- a/drivers/phy/freescale/phy-fsl-imx8-mipi-dphy.c
+++ b/drivers/phy/freescale/phy-fsl-imx8-mipi-dphy.c
@@ -706,7 +706,7 @@ static int mixel_dphy_probe(struct platform_device *pdev)
return ret;
}

- priv->id = of_alias_get_id(np, "mipi_dphy");
+ priv->id = of_alias_get_id(np, "mipi-dphy");
if (priv->id < 0) {
dev_err(dev, "Failed to get phy node alias id: %d\n",
priv->id);
diff --git a/drivers/power/supply/bq27xxx_battery_i2c.c b/drivers/power/supply/bq27xxx_battery_i2c.c
index 0713a52a2510..17b37354e32c 100644
--- a/drivers/power/supply/bq27xxx_battery_i2c.c
+++ b/drivers/power/supply/bq27xxx_battery_i2c.c
@@ -209,7 +209,9 @@ static void bq27xxx_battery_i2c_remove(struct i2c_client *client)
{
struct bq27xxx_device_info *di = i2c_get_clientdata(client);

- free_irq(client->irq, di);
+ if (client->irq)
+ free_irq(client->irq, di);
+
bq27xxx_battery_teardown(di);

mutex_lock(&battery_mutex);
diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c
index 5c5954b78585..edd296f950a3 100644
--- a/drivers/scsi/scsi_lib.c
+++ b/drivers/scsi/scsi_lib.c
@@ -185,39 +185,37 @@ void scsi_queue_insert(struct scsi_cmnd *cmd, int reason)
__scsi_queue_insert(cmd, reason, true);
}

-
/**
- * __scsi_execute - insert request and wait for the result
- * @sdev: scsi device
+ * scsi_execute_cmd - insert request and wait for the result
+ * @sdev: scsi_device
* @cmd: scsi command
- * @data_direction: data direction
+ * @opf: block layer request cmd_flags
* @buffer: data buffer
* @bufflen: len of buffer
- * @sense: optional sense buffer
- * @sshdr: optional decoded sense header
* @timeout: request timeout in HZ
* @retries: number of times to retry request
- * @flags: flags for ->cmd_flags
- * @rq_flags: flags for ->rq_flags
- * @resid: optional residual length
+ * @args: Optional args. See struct definition for field descriptions
*
* Returns the scsi_cmnd result field if a command was executed, or a negative
* Linux error code if we didn't get that far.
*/
-int __scsi_execute(struct scsi_device *sdev, const unsigned char *cmd,
- int data_direction, void *buffer, unsigned bufflen,
- unsigned char *sense, struct scsi_sense_hdr *sshdr,
- int timeout, int retries, blk_opf_t flags,
- req_flags_t rq_flags, int *resid)
+int scsi_execute_cmd(struct scsi_device *sdev, const unsigned char *cmd,
+ blk_opf_t opf, void *buffer, unsigned int bufflen,
+ int timeout, int retries,
+ const struct scsi_exec_args *args)
{
+ static const struct scsi_exec_args default_args;
struct request *req;
struct scsi_cmnd *scmd;
int ret;

- req = scsi_alloc_request(sdev->request_queue,
- data_direction == DMA_TO_DEVICE ?
- REQ_OP_DRV_OUT : REQ_OP_DRV_IN,
- rq_flags & RQF_PM ? BLK_MQ_REQ_PM : 0);
+ if (!args)
+ args = &default_args;
+ else if (WARN_ON_ONCE(args->sense &&
+ args->sense_len != SCSI_SENSE_BUFFERSIZE))
+ return -EINVAL;
+
+ req = scsi_alloc_request(sdev->request_queue, opf, args->req_flags);
if (IS_ERR(req))
return PTR_ERR(req);

@@ -232,8 +230,7 @@ int __scsi_execute(struct scsi_device *sdev, const unsigned char *cmd,
memcpy(scmd->cmnd, cmd, scmd->cmd_len);
scmd->allowed = retries;
req->timeout = timeout;
- req->cmd_flags |= flags;
- req->rq_flags |= rq_flags | RQF_QUIET;
+ req->rq_flags |= RQF_QUIET;

/*
* head injection *required* here otherwise quiesce won't work
@@ -249,20 +246,21 @@ int __scsi_execute(struct scsi_device *sdev, const unsigned char *cmd,
if (unlikely(scmd->resid_len > 0 && scmd->resid_len <= bufflen))
memset(buffer + bufflen - scmd->resid_len, 0, scmd->resid_len);

- if (resid)
- *resid = scmd->resid_len;
- if (sense && scmd->sense_len)
- memcpy(sense, scmd->sense_buffer, SCSI_SENSE_BUFFERSIZE);
- if (sshdr)
+ if (args->resid)
+ *args->resid = scmd->resid_len;
+ if (args->sense)
+ memcpy(args->sense, scmd->sense_buffer, SCSI_SENSE_BUFFERSIZE);
+ if (args->sshdr)
scsi_normalize_sense(scmd->sense_buffer, scmd->sense_len,
- sshdr);
+ args->sshdr);
+
ret = scmd->result;
out:
blk_mq_free_request(req);

return ret;
}
-EXPORT_SYMBOL(__scsi_execute);
+EXPORT_SYMBOL(scsi_execute_cmd);

/*
* Wake up the error handler if necessary. Avoid as follows that the error
diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c
index 31b5273f43a7..4433b02c8935 100644
--- a/drivers/scsi/sd.c
+++ b/drivers/scsi/sd.c
@@ -3284,6 +3284,24 @@ static bool sd_validate_opt_xfer_size(struct scsi_disk *sdkp,
return true;
}

+static void sd_read_block_zero(struct scsi_disk *sdkp)
+{
+ unsigned int buf_len = sdkp->device->sector_size;
+ char *buffer, cmd[10] = { };
+
+ buffer = kmalloc(buf_len, GFP_KERNEL);
+ if (!buffer)
+ return;
+
+ cmd[0] = READ_10;
+ put_unaligned_be32(0, &cmd[2]); /* Logical block address 0 */
+ put_unaligned_be16(1, &cmd[7]); /* Transfer 1 logical block */
+
+ scsi_execute_cmd(sdkp->device, cmd, REQ_OP_DRV_IN, buffer, buf_len,
+ SD_TIMEOUT, sdkp->max_retries, NULL);
+ kfree(buffer);
+}
+
/**
* sd_revalidate_disk - called the first time a new disk is seen,
* performs disk spin up, read_capacity, etc.
@@ -3323,7 +3341,13 @@ static int sd_revalidate_disk(struct gendisk *disk)
*/
if (sdkp->media_present) {
sd_read_capacity(sdkp, buffer);
-
+ /*
+ * Some USB/UAS devices return generic values for mode pages
+ * until the media has been accessed. Trigger a READ operation
+ * to force the device to populate mode pages.
+ */
+ if (sdp->read_before_ms)
+ sd_read_block_zero(sdkp);
/*
* set the default to rotational. All non-rotational devices
* support the block characteristics VPD page, which will
diff --git a/drivers/soc/qcom/rpmhpd.c b/drivers/soc/qcom/rpmhpd.c
index 092f6ab09acf..9a90f241bb97 100644
--- a/drivers/soc/qcom/rpmhpd.c
+++ b/drivers/soc/qcom/rpmhpd.c
@@ -492,12 +492,15 @@ static int rpmhpd_aggregate_corner(struct rpmhpd *pd, unsigned int corner)
unsigned int active_corner, sleep_corner;
unsigned int this_active_corner = 0, this_sleep_corner = 0;
unsigned int peer_active_corner = 0, peer_sleep_corner = 0;
+ unsigned int peer_enabled_corner;

to_active_sleep(pd, corner, &this_active_corner, &this_sleep_corner);

- if (peer && peer->enabled)
- to_active_sleep(peer, peer->corner, &peer_active_corner,
+ if (peer && peer->enabled) {
+ peer_enabled_corner = max(peer->corner, peer->enable_corner);
+ to_active_sleep(peer, peer_enabled_corner, &peer_active_corner,
&peer_sleep_corner);
+ }

active_corner = max(this_active_corner, peer_active_corner);

diff --git a/drivers/usb/gadget/composite.c b/drivers/usb/gadget/composite.c
index cb0a4e2cdbb7..247cca46cdfa 100644
--- a/drivers/usb/gadget/composite.c
+++ b/drivers/usb/gadget/composite.c
@@ -511,6 +511,19 @@ static u8 encode_bMaxPower(enum usb_device_speed speed,
return min(val, 900U) / 8;
}

+void check_remote_wakeup_config(struct usb_gadget *g,
+ struct usb_configuration *c)
+{
+ if (USB_CONFIG_ATT_WAKEUP & c->bmAttributes) {
+ /* Reset the rw bit if gadget is not capable of it */
+ if (!g->wakeup_capable && g->ops->set_remote_wakeup) {
+ WARN(c->cdev, "Clearing wakeup bit for config c.%d\n",
+ c->bConfigurationValue);
+ c->bmAttributes &= ~USB_CONFIG_ATT_WAKEUP;
+ }
+ }
+}
+
static int config_buf(struct usb_configuration *config,
enum usb_device_speed speed, void *buf, u8 type)
{
@@ -959,6 +972,11 @@ static int set_config(struct usb_composite_dev *cdev,
power = min(power, 500U);
else
power = min(power, 900U);
+
+ if (USB_CONFIG_ATT_WAKEUP & c->bmAttributes)
+ usb_gadget_set_remote_wakeup(gadget, 1);
+ else
+ usb_gadget_set_remote_wakeup(gadget, 0);
done:
if (power <= USB_SELF_POWER_VBUS_MAX_DRAW)
usb_gadget_set_selfpowered(gadget);
diff --git a/drivers/usb/gadget/configfs.c b/drivers/usb/gadget/configfs.c
index 4dcf29577f8f..b94aec6227c5 100644
--- a/drivers/usb/gadget/configfs.c
+++ b/drivers/usb/gadget/configfs.c
@@ -1376,6 +1376,9 @@ static int configfs_composite_bind(struct usb_gadget *gadget,
if (gadget_is_otg(gadget))
c->descriptors = otg_desc;

+ /* Properly configure the bmAttributes wakeup bit */
+ check_remote_wakeup_config(gadget, c);
+
cfg = container_of(c, struct config_usb_cfg, c);
if (!list_empty(&cfg->string_list)) {
i = 0;
diff --git a/drivers/usb/gadget/udc/core.c b/drivers/usb/gadget/udc/core.c
index c40f2ecbe1b8..0edd9e53fc5a 100644
--- a/drivers/usb/gadget/udc/core.c
+++ b/drivers/usb/gadget/udc/core.c
@@ -525,6 +525,33 @@ int usb_gadget_wakeup(struct usb_gadget *gadget)
}
EXPORT_SYMBOL_GPL(usb_gadget_wakeup);

+/**
+ * usb_gadget_set_remote_wakeup - configures the device remote wakeup feature.
+ * @gadget:the device being configured for remote wakeup
+ * @set:value to be configured.
+ *
+ * set to one to enable remote wakeup feature and zero to disable it.
+ *
+ * returns zero on success, else negative errno.
+ */
+int usb_gadget_set_remote_wakeup(struct usb_gadget *gadget, int set)
+{
+ int ret = 0;
+
+ if (!gadget->ops->set_remote_wakeup) {
+ ret = -EOPNOTSUPP;
+ goto out;
+ }
+
+ ret = gadget->ops->set_remote_wakeup(gadget, set);
+
+out:
+ trace_usb_gadget_set_remote_wakeup(gadget, ret);
+
+ return ret;
+}
+EXPORT_SYMBOL_GPL(usb_gadget_set_remote_wakeup);
+
/**
* usb_gadget_set_selfpowered - sets the device selfpowered feature.
* @gadget:the device being declared as self-powered
diff --git a/drivers/usb/gadget/udc/trace.h b/drivers/usb/gadget/udc/trace.h
index abdbcb1bacb0..a5ed26fbc2da 100644
--- a/drivers/usb/gadget/udc/trace.h
+++ b/drivers/usb/gadget/udc/trace.h
@@ -91,6 +91,11 @@ DEFINE_EVENT(udc_log_gadget, usb_gadget_wakeup,
TP_ARGS(g, ret)
);

+DEFINE_EVENT(udc_log_gadget, usb_gadget_set_remote_wakeup,
+ TP_PROTO(struct usb_gadget *g, int ret),
+ TP_ARGS(g, ret)
+);
+
DEFINE_EVENT(udc_log_gadget, usb_gadget_set_selfpowered,
TP_PROTO(struct usb_gadget *g, int ret),
TP_ARGS(g, ret)
diff --git a/drivers/usb/storage/scsiglue.c b/drivers/usb/storage/scsiglue.c
index c54e9805da53..12cf9940e5b6 100644
--- a/drivers/usb/storage/scsiglue.c
+++ b/drivers/usb/storage/scsiglue.c
@@ -179,6 +179,13 @@ static int slave_configure(struct scsi_device *sdev)
*/
sdev->use_192_bytes_for_3f = 1;

+ /*
+ * Some devices report generic values until the media has been
+ * accessed. Force a READ(10) prior to querying device
+ * characteristics.
+ */
+ sdev->read_before_ms = 1;
+
/*
* Some devices don't like MODE SENSE with page=0x3f,
* which is the command used for checking if a device
diff --git a/drivers/usb/storage/uas.c b/drivers/usb/storage/uas.c
index de3836412bf3..ed22053b3252 100644
--- a/drivers/usb/storage/uas.c
+++ b/drivers/usb/storage/uas.c
@@ -878,6 +878,13 @@ static int uas_slave_configure(struct scsi_device *sdev)
if (devinfo->flags & US_FL_CAPACITY_HEURISTICS)
sdev->guess_capacity = 1;

+ /*
+ * Some devices report generic values until the media has been
+ * accessed. Force a READ(10) prior to querying device
+ * characteristics.
+ */
+ sdev->read_before_ms = 1;
+
/*
* Some devices don't like MODE SENSE with page=0x3f,
* which is the command used for checking if a device
diff --git a/drivers/video/fbdev/core/fbcon.c b/drivers/video/fbdev/core/fbcon.c
index fa205be94a4b..14498a0d13e0 100644
--- a/drivers/video/fbdev/core/fbcon.c
+++ b/drivers/video/fbdev/core/fbcon.c
@@ -2397,11 +2397,9 @@ static int fbcon_do_set_font(struct vc_data *vc, int w, int h, int charcount,
struct fbcon_ops *ops = info->fbcon_par;
struct fbcon_display *p = &fb_display[vc->vc_num];
int resize, ret, old_userfont, old_width, old_height, old_charcount;
- char *old_data = NULL;
+ u8 *old_data = vc->vc_font.data;

resize = (w != vc->vc_font.width) || (h != vc->vc_font.height);
- if (p->userfont)
- old_data = vc->vc_font.data;
vc->vc_font.data = (void *)(p->fontdata = data);
old_userfont = p->userfont;
if ((p->userfont = userfont))
@@ -2435,13 +2433,13 @@ static int fbcon_do_set_font(struct vc_data *vc, int w, int h, int charcount,
update_screen(vc);
}

- if (old_data && (--REFCOUNT(old_data) == 0))
+ if (old_userfont && (--REFCOUNT(old_data) == 0))
kfree(old_data - FONT_EXTRA_WORDS * sizeof(int));
return 0;

err_out:
p->fontdata = old_data;
- vc->vc_font.data = (void *)old_data;
+ vc->vc_font.data = old_data;

if (userfont) {
p->userfont = old_userfont;
diff --git a/drivers/xen/events/events_base.c b/drivers/xen/events/events_base.c
index 00f8e349921d..96b96516c980 100644
--- a/drivers/xen/events/events_base.c
+++ b/drivers/xen/events/events_base.c
@@ -937,8 +937,8 @@ static void shutdown_pirq(struct irq_data *data)
return;

do_mask(info, EVT_MASK_REASON_EXPLICIT);
- xen_evtchn_close(evtchn);
xen_irq_info_cleanup(info);
+ xen_evtchn_close(evtchn);
}

static void enable_pirq(struct irq_data *data)
@@ -982,8 +982,6 @@ static void __unbind_from_irq(unsigned int irq)
unsigned int cpu = cpu_from_irq(irq);
struct xenbus_device *dev;

- xen_evtchn_close(evtchn);
-
switch (type_from_irq(irq)) {
case IRQT_VIRQ:
per_cpu(virq_to_irq, cpu)[virq_from_irq(irq)] = -1;
@@ -1001,6 +999,7 @@ static void __unbind_from_irq(unsigned int irq)
}

xen_irq_info_cleanup(info);
+ xen_evtchn_close(evtchn);
}

xen_free_irq(irq);
diff --git a/fs/afs/dir.c b/fs/afs/dir.c
index cf811b77ee67..6e2c967fae6f 100644
--- a/fs/afs/dir.c
+++ b/fs/afs/dir.c
@@ -478,8 +478,10 @@ static int afs_dir_iterate_block(struct afs_vnode *dvnode,
dire->u.name[0] == '.' &&
ctx->actor != afs_lookup_filldir &&
ctx->actor != afs_lookup_one_filldir &&
- memcmp(dire->u.name, ".__afs", 6) == 0)
+ memcmp(dire->u.name, ".__afs", 6) == 0) {
+ ctx->pos = blkoff + next * sizeof(union afs_xdr_dirent);
continue;
+ }

/* found the next entry */
if (!dir_emit(ctx, dire->u.name, nlen,
diff --git a/fs/btrfs/dev-replace.c b/fs/btrfs/dev-replace.c
index 61e58066b5fd..9c856a73d533 100644
--- a/fs/btrfs/dev-replace.c
+++ b/fs/btrfs/dev-replace.c
@@ -740,6 +740,23 @@ static int btrfs_dev_replace_start(struct btrfs_fs_info *fs_info,
return ret;
}

+static int btrfs_check_replace_dev_names(struct btrfs_ioctl_dev_replace_args *args)
+{
+ if (args->start.srcdevid == 0) {
+ if (memchr(args->start.srcdev_name, 0,
+ sizeof(args->start.srcdev_name)) == NULL)
+ return -ENAMETOOLONG;
+ } else {
+ args->start.srcdev_name[0] = 0;
+ }
+
+ if (memchr(args->start.tgtdev_name, 0,
+ sizeof(args->start.tgtdev_name)) == NULL)
+ return -ENAMETOOLONG;
+
+ return 0;
+}
+
int btrfs_dev_replace_by_ioctl(struct btrfs_fs_info *fs_info,
struct btrfs_ioctl_dev_replace_args *args)
{
@@ -752,10 +769,9 @@ int btrfs_dev_replace_by_ioctl(struct btrfs_fs_info *fs_info,
default:
return -EINVAL;
}
-
- if ((args->start.srcdevid == 0 && args->start.srcdev_name[0] == '\0') ||
- args->start.tgtdev_name[0] == '\0')
- return -EINVAL;
+ ret = btrfs_check_replace_dev_names(args);
+ if (ret < 0)
+ return ret;

ret = btrfs_dev_replace_start(fs_info, args->start.tgtdev_name,
args->start.srcdevid,
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 0d1b05ded1e3..5756edb37c61 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -1643,12 +1643,12 @@ void btrfs_free_fs_info(struct btrfs_fs_info *fs_info)
*
* @objectid: root id
* @anon_dev: preallocated anonymous block device number for new roots,
- * pass 0 for new allocation.
+ * pass NULL for a new allocation.
* @check_ref: whether to check root item references, If true, return -ENOENT
* for orphan roots
*/
static struct btrfs_root *btrfs_get_root_ref(struct btrfs_fs_info *fs_info,
- u64 objectid, dev_t anon_dev,
+ u64 objectid, dev_t *anon_dev,
bool check_ref)
{
struct btrfs_root *root;
@@ -1668,9 +1668,9 @@ static struct btrfs_root *btrfs_get_root_ref(struct btrfs_fs_info *fs_info,
* that common but still possible. In that case, we just need
* to free the anon_dev.
*/
- if (unlikely(anon_dev)) {
- free_anon_bdev(anon_dev);
- anon_dev = 0;
+ if (unlikely(anon_dev && *anon_dev)) {
+ free_anon_bdev(*anon_dev);
+ *anon_dev = 0;
}

if (check_ref && btrfs_root_refs(&root->root_item) == 0) {
@@ -1692,7 +1692,7 @@ static struct btrfs_root *btrfs_get_root_ref(struct btrfs_fs_info *fs_info,
goto fail;
}

- ret = btrfs_init_fs_root(root, anon_dev);
+ ret = btrfs_init_fs_root(root, anon_dev ? *anon_dev : 0);
if (ret)
goto fail;

@@ -1728,7 +1728,7 @@ static struct btrfs_root *btrfs_get_root_ref(struct btrfs_fs_info *fs_info,
* root's anon_dev to 0 to avoid a double free, once by btrfs_put_root()
* and once again by our caller.
*/
- if (anon_dev)
+ if (anon_dev && *anon_dev)
root->anon_dev = 0;
btrfs_put_root(root);
return ERR_PTR(ret);
@@ -1744,7 +1744,7 @@ static struct btrfs_root *btrfs_get_root_ref(struct btrfs_fs_info *fs_info,
struct btrfs_root *btrfs_get_fs_root(struct btrfs_fs_info *fs_info,
u64 objectid, bool check_ref)
{
- return btrfs_get_root_ref(fs_info, objectid, 0, check_ref);
+ return btrfs_get_root_ref(fs_info, objectid, NULL, check_ref);
}

/*
@@ -1752,11 +1752,11 @@ struct btrfs_root *btrfs_get_fs_root(struct btrfs_fs_info *fs_info,
* the anonymous block device id
*
* @objectid: tree objectid
- * @anon_dev: if zero, allocate a new anonymous block device or use the
- * parameter value
+ * @anon_dev: if NULL, allocate a new anonymous block device or use the
+ * parameter value if not NULL
*/
struct btrfs_root *btrfs_get_new_fs_root(struct btrfs_fs_info *fs_info,
- u64 objectid, dev_t anon_dev)
+ u64 objectid, dev_t *anon_dev)
{
return btrfs_get_root_ref(fs_info, objectid, anon_dev, true);
}
diff --git a/fs/btrfs/disk-io.h b/fs/btrfs/disk-io.h
index 7322af63c0cc..24bddca86e9c 100644
--- a/fs/btrfs/disk-io.h
+++ b/fs/btrfs/disk-io.h
@@ -65,7 +65,7 @@ void btrfs_free_fs_roots(struct btrfs_fs_info *fs_info);
struct btrfs_root *btrfs_get_fs_root(struct btrfs_fs_info *fs_info,
u64 objectid, bool check_ref);
struct btrfs_root *btrfs_get_new_fs_root(struct btrfs_fs_info *fs_info,
- u64 objectid, dev_t anon_dev);
+ u64 objectid, dev_t *anon_dev);
struct btrfs_root *btrfs_get_fs_root_commit_root(struct btrfs_fs_info *fs_info,
struct btrfs_path *path,
u64 objectid);
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index 196e222749cc..64b37afb7c87 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -708,7 +708,7 @@ static noinline int create_subvol(struct user_namespace *mnt_userns,
free_extent_buffer(leaf);
leaf = NULL;

- new_root = btrfs_get_new_fs_root(fs_info, objectid, anon_dev);
+ new_root = btrfs_get_new_fs_root(fs_info, objectid, &anon_dev);
if (IS_ERR(new_root)) {
ret = PTR_ERR(new_root);
btrfs_abort_transaction(trans, ret);
diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c
index a75669972dc7..9f7ffd9ef6fd 100644
--- a/fs/btrfs/send.c
+++ b/fs/btrfs/send.c
@@ -6462,11 +6462,20 @@ static int finish_inode_if_needed(struct send_ctx *sctx, int at_end)
if (ret)
goto out;
}
- if (sctx->cur_inode_last_extent <
- sctx->cur_inode_size) {
- ret = send_hole(sctx, sctx->cur_inode_size);
- if (ret)
+ if (sctx->cur_inode_last_extent < sctx->cur_inode_size) {
+ ret = range_is_hole_in_parent(sctx,
+ sctx->cur_inode_last_extent,
+ sctx->cur_inode_size);
+ if (ret < 0) {
goto out;
+ } else if (ret == 0) {
+ ret = send_hole(sctx, sctx->cur_inode_size);
+ if (ret < 0)
+ goto out;
+ } else {
+ /* Range is already a hole, skip. */
+ ret = 0;
+ }
}
}
if (need_truncate) {
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c
index 60db4c3b82fa..b172091f4261 100644
--- a/fs/btrfs/transaction.c
+++ b/fs/btrfs/transaction.c
@@ -1809,7 +1809,7 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans,
}

key.offset = (u64)-1;
- pending->snap = btrfs_get_new_fs_root(fs_info, objectid, pending->anon_dev);
+ pending->snap = btrfs_get_new_fs_root(fs_info, objectid, &pending->anon_dev);
if (IS_ERR(pending->snap)) {
ret = PTR_ERR(pending->snap);
pending->snap = NULL;
diff --git a/fs/efivarfs/vars.c b/fs/efivarfs/vars.c
index 9e4f47808bd5..13bc60698955 100644
--- a/fs/efivarfs/vars.c
+++ b/fs/efivarfs/vars.c
@@ -372,7 +372,7 @@ static void dup_variable_bug(efi_char16_t *str16, efi_guid_t *vendor_guid,
int efivar_init(int (*func)(efi_char16_t *, efi_guid_t, unsigned long, void *),
void *data, bool duplicates, struct list_head *head)
{
- unsigned long variable_name_size = 1024;
+ unsigned long variable_name_size = 512;
efi_char16_t *variable_name;
efi_status_t status;
efi_guid_t vendor_guid;
@@ -389,12 +389,13 @@ int efivar_init(int (*func)(efi_char16_t *, efi_guid_t, unsigned long, void *),
goto free;

/*
- * Per EFI spec, the maximum storage allocated for both
- * the variable name and variable data is 1024 bytes.
+ * A small set of old UEFI implementations reject sizes
+ * above a certain threshold, the lowest seen in the wild
+ * is 512.
*/

do {
- variable_name_size = 1024;
+ variable_name_size = 512;

status = efivar_get_next_variable(&variable_name_size,
variable_name,
@@ -431,9 +432,13 @@ int efivar_init(int (*func)(efi_char16_t *, efi_guid_t, unsigned long, void *),
break;
case EFI_NOT_FOUND:
break;
+ case EFI_BUFFER_TOO_SMALL:
+ pr_warn("efivars: Variable name size exceeds maximum (%lu > 512)\n",
+ variable_name_size);
+ status = EFI_NOT_FOUND;
+ break;
default:
- printk(KERN_WARNING "efivars: get_next_variable: status=%lx\n",
- status);
+ pr_warn("efivars: get_next_variable: status=%lx\n", status);
status = EFI_NOT_FOUND;
break;
}
diff --git a/fs/exportfs/expfs.c b/fs/exportfs/expfs.c
index c648a493faf2..3204bd33e4e8 100644
--- a/fs/exportfs/expfs.c
+++ b/fs/exportfs/expfs.c
@@ -18,7 +18,7 @@
#include <linux/sched.h>
#include <linux/cred.h>

-#define dprintk(fmt, args...) do{}while(0)
+#define dprintk(fmt, args...) pr_debug(fmt, ##args)


static int get_name(const struct path *path, char *name, struct dentry *child);
@@ -132,8 +132,8 @@ static struct dentry *reconnect_one(struct vfsmount *mnt,
inode_unlock(dentry->d_inode);

if (IS_ERR(parent)) {
- dprintk("%s: get_parent of %ld failed, err %d\n",
- __func__, dentry->d_inode->i_ino, PTR_ERR(parent));
+ dprintk("get_parent of %lu failed, err %ld\n",
+ dentry->d_inode->i_ino, PTR_ERR(parent));
return parent;
}

@@ -147,7 +147,7 @@ static struct dentry *reconnect_one(struct vfsmount *mnt,
dprintk("%s: found name: %s\n", __func__, nbuf);
tmp = lookup_one_unlocked(mnt_user_ns(mnt), nbuf, parent, strlen(nbuf));
if (IS_ERR(tmp)) {
- dprintk("%s: lookup failed: %d\n", __func__, PTR_ERR(tmp));
+ dprintk("lookup failed: %ld\n", PTR_ERR(tmp));
err = PTR_ERR(tmp);
goto out_err;
}
diff --git a/fs/lockd/svc4proc.c b/fs/lockd/svc4proc.c
index 284b019cb652..b72023a6b4c1 100644
--- a/fs/lockd/svc4proc.c
+++ b/fs/lockd/svc4proc.c
@@ -52,6 +52,7 @@ nlm4svc_retrieve_args(struct svc_rqst *rqstp, struct nlm_args *argp,
*filp = file;

/* Set up the missing parts of the file_lock structure */
+ lock->fl.fl_flags = FL_POSIX;
lock->fl.fl_file = file->f_file[mode];
lock->fl.fl_pid = current->tgid;
lock->fl.fl_start = (loff_t)lock->lock_start;
diff --git a/fs/lockd/svclock.c b/fs/lockd/svclock.c
index 9c1aa75441e1..4e30f3c50970 100644
--- a/fs/lockd/svclock.c
+++ b/fs/lockd/svclock.c
@@ -659,11 +659,13 @@ nlmsvc_unlock(struct net *net, struct nlm_file *file, struct nlm_lock *lock)
nlmsvc_cancel_blocked(net, file, lock);

lock->fl.fl_type = F_UNLCK;
- if (file->f_file[O_RDONLY])
- error = vfs_lock_file(file->f_file[O_RDONLY], F_SETLK,
+ lock->fl.fl_file = file->f_file[O_RDONLY];
+ if (lock->fl.fl_file)
+ error = vfs_lock_file(lock->fl.fl_file, F_SETLK,
&lock->fl, NULL);
- if (file->f_file[O_WRONLY])
- error = vfs_lock_file(file->f_file[O_WRONLY], F_SETLK,
+ lock->fl.fl_file = file->f_file[O_WRONLY];
+ if (lock->fl.fl_file)
+ error |= vfs_lock_file(lock->fl.fl_file, F_SETLK,
&lock->fl, NULL);

return (error < 0)? nlm_lck_denied_nolocks : nlm_granted;
@@ -697,9 +699,10 @@ nlmsvc_cancel_blocked(struct net *net, struct nlm_file *file, struct nlm_lock *l
block = nlmsvc_lookup_block(file, lock);
mutex_unlock(&file->f_mutex);
if (block != NULL) {
- mode = lock_to_openmode(&lock->fl);
- vfs_cancel_lock(block->b_file->f_file[mode],
- &block->b_call->a_args.lock.fl);
+ struct file_lock *fl = &block->b_call->a_args.lock.fl;
+
+ mode = lock_to_openmode(fl);
+ vfs_cancel_lock(block->b_file->f_file[mode], fl);
status = nlmsvc_unlink_block(block);
nlmsvc_release_block(block);
}
diff --git a/fs/lockd/svcproc.c b/fs/lockd/svcproc.c
index e35c05e27806..32784f508c81 100644
--- a/fs/lockd/svcproc.c
+++ b/fs/lockd/svcproc.c
@@ -77,6 +77,7 @@ nlmsvc_retrieve_args(struct svc_rqst *rqstp, struct nlm_args *argp,

/* Set up the missing parts of the file_lock structure */
mode = lock_to_openmode(&lock->fl);
+ lock->fl.fl_flags = FL_POSIX;
lock->fl.fl_file = file->f_file[mode];
lock->fl.fl_pid = current->tgid;
lock->fl.fl_lmops = &nlmsvc_lock_operations;
diff --git a/fs/lockd/svcsubs.c b/fs/lockd/svcsubs.c
index 3515f17eaf3f..e3b6229e7ae5 100644
--- a/fs/lockd/svcsubs.c
+++ b/fs/lockd/svcsubs.c
@@ -210,7 +210,7 @@ nlm_traverse_locks(struct nlm_host *host, struct nlm_file *file,
{
struct inode *inode = nlmsvc_file_inode(file);
struct file_lock *fl;
- struct file_lock_context *flctx = inode->i_flctx;
+ struct file_lock_context *flctx = locks_inode_context(inode);
struct nlm_host *lockhost;

if (!flctx || list_empty_careful(&flctx->flc_posix))
@@ -265,7 +265,7 @@ nlm_file_inuse(struct nlm_file *file)
{
struct inode *inode = nlmsvc_file_inode(file);
struct file_lock *fl;
- struct file_lock_context *flctx = inode->i_flctx;
+ struct file_lock_context *flctx = locks_inode_context(inode);

if (file->f_count || !list_empty(&file->f_blocks) || file->f_shares)
return 1;
diff --git a/fs/locks.c b/fs/locks.c
index 1047ab2b15e9..7d0918b8fe5d 100644
--- a/fs/locks.c
+++ b/fs/locks.c
@@ -175,7 +175,7 @@ locks_get_lock_context(struct inode *inode, int type)
struct file_lock_context *ctx;

/* paired with cmpxchg() below */
- ctx = smp_load_acquire(&inode->i_flctx);
+ ctx = locks_inode_context(inode);
if (likely(ctx) || type == F_UNLCK)
goto out;

@@ -194,7 +194,7 @@ locks_get_lock_context(struct inode *inode, int type)
*/
if (cmpxchg(&inode->i_flctx, NULL, ctx)) {
kmem_cache_free(flctx_cache, ctx);
- ctx = smp_load_acquire(&inode->i_flctx);
+ ctx = locks_inode_context(inode);
}
out:
trace_locks_get_lock_context(inode, type, ctx);
@@ -247,7 +247,7 @@ locks_check_ctx_file_list(struct file *filp, struct list_head *list,
void
locks_free_lock_context(struct inode *inode)
{
- struct file_lock_context *ctx = inode->i_flctx;
+ struct file_lock_context *ctx = locks_inode_context(inode);

if (unlikely(ctx)) {
locks_check_ctx_lists(inode);
@@ -891,7 +891,7 @@ posix_test_lock(struct file *filp, struct file_lock *fl)
void *owner;
void (*func)(void);

- ctx = smp_load_acquire(&inode->i_flctx);
+ ctx = locks_inode_context(inode);
if (!ctx || list_empty_careful(&ctx->flc_posix)) {
fl->fl_type = F_UNLCK;
return;
@@ -1483,7 +1483,7 @@ int __break_lease(struct inode *inode, unsigned int mode, unsigned int type)
new_fl->fl_flags = type;

/* typically we will check that ctx is non-NULL before calling */
- ctx = smp_load_acquire(&inode->i_flctx);
+ ctx = locks_inode_context(inode);
if (!ctx) {
WARN_ON_ONCE(1);
goto free_lock;
@@ -1588,7 +1588,7 @@ void lease_get_mtime(struct inode *inode, struct timespec64 *time)
struct file_lock_context *ctx;
struct file_lock *fl;

- ctx = smp_load_acquire(&inode->i_flctx);
+ ctx = locks_inode_context(inode);
if (ctx && !list_empty_careful(&ctx->flc_lease)) {
spin_lock(&ctx->flc_lock);
fl = list_first_entry_or_null(&ctx->flc_lease,
@@ -1634,7 +1634,7 @@ int fcntl_getlease(struct file *filp)
int type = F_UNLCK;
LIST_HEAD(dispose);

- ctx = smp_load_acquire(&inode->i_flctx);
+ ctx = locks_inode_context(inode);
if (ctx && !list_empty_careful(&ctx->flc_lease)) {
percpu_down_read(&file_rwsem);
spin_lock(&ctx->flc_lock);
@@ -1823,7 +1823,7 @@ static int generic_delete_lease(struct file *filp, void *owner)
struct file_lock_context *ctx;
LIST_HEAD(dispose);

- ctx = smp_load_acquire(&inode->i_flctx);
+ ctx = locks_inode_context(inode);
if (!ctx) {
trace_generic_delete_lease(inode, NULL);
return error;
@@ -2562,7 +2562,7 @@ void locks_remove_posix(struct file *filp, fl_owner_t owner)
* posix_lock_file(). Another process could be setting a lock on this
* file at the same time, but we wouldn't remove that lock anyway.
*/
- ctx = smp_load_acquire(&inode->i_flctx);
+ ctx = locks_inode_context(inode);
if (!ctx || list_empty(&ctx->flc_posix))
return;

@@ -2635,7 +2635,7 @@ void locks_remove_file(struct file *filp)
{
struct file_lock_context *ctx;

- ctx = smp_load_acquire(&locks_inode(filp)->i_flctx);
+ ctx = locks_inode_context(locks_inode(filp));
if (!ctx)
return;

@@ -2682,7 +2682,7 @@ bool vfs_inode_has_locks(struct inode *inode)
struct file_lock_context *ctx;
bool ret;

- ctx = smp_load_acquire(&inode->i_flctx);
+ ctx = locks_inode_context(inode);
if (!ctx)
return false;

@@ -2863,7 +2863,7 @@ void show_fd_locks(struct seq_file *f,
struct file_lock_context *ctx;
int id = 0;

- ctx = smp_load_acquire(&inode->i_flctx);
+ ctx = locks_inode_context(inode);
if (!ctx)
return;

diff --git a/fs/nfs/nfs4trace.h b/fs/nfs/nfs4trace.h
index 3fa77ad7258f..c8a57cfde64b 100644
--- a/fs/nfs/nfs4trace.h
+++ b/fs/nfs/nfs4trace.h
@@ -9,10 +9,10 @@
#define _TRACE_NFS4_H

#include <linux/tracepoint.h>
-#include <trace/events/sunrpc_base.h>
+#include <trace/misc/sunrpc.h>

-#include <trace/events/fs.h>
-#include <trace/events/nfs.h>
+#include <trace/misc/fs.h>
+#include <trace/misc/nfs.h>

#define show_nfs_fattr_flags(valid) \
__print_flags((unsigned long)valid, "|", \
diff --git a/fs/nfs/nfstrace.h b/fs/nfs/nfstrace.h
index 8c6cc58679ff..642f6921852f 100644
--- a/fs/nfs/nfstrace.h
+++ b/fs/nfs/nfstrace.h
@@ -11,9 +11,9 @@
#include <linux/tracepoint.h>
#include <linux/iversion.h>

-#include <trace/events/fs.h>
-#include <trace/events/nfs.h>
-#include <trace/events/sunrpc_base.h>
+#include <trace/misc/fs.h>
+#include <trace/misc/nfs.h>
+#include <trace/misc/sunrpc.h>

#define nfs_show_cache_validity(v) \
__print_flags(v, "|", \
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index f41d24b54fd1..6a0606668417 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -667,8 +667,10 @@ static int nfs_writepage_locked(struct page *page,
int err;

if (wbc->sync_mode == WB_SYNC_NONE &&
- NFS_SERVER(inode)->write_congested)
+ NFS_SERVER(inode)->write_congested) {
+ redirty_page_for_writepage(wbc, page);
return AOP_WRITEPAGE_ACTIVATE;
+ }

nfs_inc_stats(inode, NFSIOS_VFSWRITEPAGE);
nfs_pageio_init_write(&pgio, inode, 0,
diff --git a/fs/nfsd/Kconfig b/fs/nfsd/Kconfig
index f6a2fd3015e7..7c441f2bd444 100644
--- a/fs/nfsd/Kconfig
+++ b/fs/nfsd/Kconfig
@@ -8,6 +8,7 @@ config NFSD
select SUNRPC
select EXPORTFS
select NFS_ACL_SUPPORT if NFSD_V2_ACL
+ select NFS_ACL_SUPPORT if NFSD_V3_ACL
depends on MULTIUSER
help
Choose Y here if you want to allow other computers to access
@@ -26,19 +27,29 @@ config NFSD

Below you can choose which versions of the NFS protocol are
available to clients mounting the NFS server on this system.
- Support for NFS version 2 (RFC 1094) is always available when
+ Support for NFS version 3 (RFC 1813) is always available when
CONFIG_NFSD is selected.

If unsure, say N.

-config NFSD_V2_ACL
- bool
+config NFSD_V2
+ bool "NFS server support for NFS version 2 (DEPRECATED)"
depends on NFSD
+ default n
+ help
+ NFSv2 (RFC 1094) was the first publicly-released version of NFS.
+ Unless you are hosting ancient (1990's era) NFS clients, you don't
+ need this.
+
+ If unsure, say N.
+
+config NFSD_V2_ACL
+ bool "NFS server support for the NFSv2 ACL protocol extension"
+ depends on NFSD_V2

config NFSD_V3_ACL
bool "NFS server support for the NFSv3 ACL protocol extension"
depends on NFSD
- select NFSD_V2_ACL
help
Solaris NFS servers support an auxiliary NFSv3 ACL protocol that
never became an official part of the NFS version 3 protocol.
diff --git a/fs/nfsd/Makefile b/fs/nfsd/Makefile
index 805c06d5f1b4..6fffc8f03f74 100644
--- a/fs/nfsd/Makefile
+++ b/fs/nfsd/Makefile
@@ -10,9 +10,10 @@ obj-$(CONFIG_NFSD) += nfsd.o
# this one should be compiled first, as the tracing macros can easily blow up
nfsd-y += trace.o

-nfsd-y += nfssvc.o nfsctl.o nfsproc.o nfsfh.o vfs.o \
- export.o auth.o lockd.o nfscache.o nfsxdr.o \
+nfsd-y += nfssvc.o nfsctl.o nfsfh.o vfs.o \
+ export.o auth.o lockd.o nfscache.o \
stats.o filecache.o nfs3proc.o nfs3xdr.o
+nfsd-$(CONFIG_NFSD_V2) += nfsproc.o nfsxdr.o
nfsd-$(CONFIG_NFSD_V2_ACL) += nfs2acl.o
nfsd-$(CONFIG_NFSD_V3_ACL) += nfs3acl.o
nfsd-$(CONFIG_NFSD_V4) += nfs4proc.o nfs4xdr.o nfs4state.o nfs4idmap.o \
diff --git a/fs/nfsd/blocklayout.c b/fs/nfsd/blocklayout.c
index e7e6e78d965d..01d7fd108cf3 100644
--- a/fs/nfsd/blocklayout.c
+++ b/fs/nfsd/blocklayout.c
@@ -12,6 +12,7 @@
#include "blocklayoutxdr.h"
#include "pnfs.h"
#include "filecache.h"
+#include "vfs.h"

#define NFSDDBG_FACILITY NFSDDBG_PNFS

diff --git a/fs/nfsd/blocklayoutxdr.c b/fs/nfsd/blocklayoutxdr.c
index 2455dc8be18a..1ed2f691ebb9 100644
--- a/fs/nfsd/blocklayoutxdr.c
+++ b/fs/nfsd/blocklayoutxdr.c
@@ -9,6 +9,7 @@

#include "nfsd.h"
#include "blocklayoutxdr.h"
+#include "vfs.h"

#define NFSDDBG_FACILITY NFSDDBG_PNFS

diff --git a/fs/nfsd/export.h b/fs/nfsd/export.h
index ee0e3aba4a6e..d03f7f6a8642 100644
--- a/fs/nfsd/export.h
+++ b/fs/nfsd/export.h
@@ -115,7 +115,6 @@ struct svc_export * rqst_find_fsidzero_export(struct svc_rqst *);
int exp_rootfh(struct net *, struct auth_domain *,
char *path, struct knfsd_fh *, int maxsize);
__be32 exp_pseudoroot(struct svc_rqst *, struct svc_fh *);
-__be32 nfserrno(int errno);

static inline void exp_put(struct svc_export *exp)
{
diff --git a/fs/nfsd/filecache.c b/fs/nfsd/filecache.c
index 5cb8cce153a5..697acf5c3c68 100644
--- a/fs/nfsd/filecache.c
+++ b/fs/nfsd/filecache.c
@@ -1,7 +1,32 @@
+// SPDX-License-Identifier: GPL-2.0
/*
- * Open file cache.
+ * The NFSD open file cache.
*
* (c) 2015 - Jeff Layton <jeff.layton@xxxxxxxxxxxxxxx>
+ *
+ * An nfsd_file object is a per-file collection of open state that binds
+ * together:
+ * - a struct file *
+ * - a user credential
+ * - a network namespace
+ * - a read-ahead context
+ * - monitoring for writeback errors
+ *
+ * nfsd_file objects are reference-counted. Consumers acquire a new
+ * object via the nfsd_file_acquire API. They manage their interest in
+ * the acquired object, and hence the object's reference count, via
+ * nfsd_file_get and nfsd_file_put. There are two varieties of nfsd_file
+ * object:
+ *
+ * * non-garbage-collected: When a consumer wants to precisely control
+ * the lifetime of a file's open state, it acquires a non-garbage-
+ * collected nfsd_file. The final nfsd_file_put releases the open
+ * state immediately.
+ *
+ * * garbage-collected: When a consumer does not control the lifetime
+ * of open state, it acquires a garbage-collected nfsd_file. The
+ * final nfsd_file_put allows the open state to linger for a period
+ * during which it may be re-used.
*/

#include <linux/hash.h>
@@ -186,12 +211,9 @@ static const struct rhashtable_params nfsd_file_rhash_params = {
static void
nfsd_file_schedule_laundrette(void)
{
- if ((atomic_read(&nfsd_file_rhash_tbl.nelems) == 0) ||
- test_bit(NFSD_FILE_CACHE_UP, &nfsd_file_flags) == 0)
- return;
-
- queue_delayed_work(system_wq, &nfsd_filecache_laundrette,
- NFSD_LAUNDRETTE_DELAY);
+ if (test_bit(NFSD_FILE_CACHE_UP, &nfsd_file_flags))
+ queue_delayed_work(system_wq, &nfsd_filecache_laundrette,
+ NFSD_LAUNDRETTE_DELAY);
}

static void
@@ -589,7 +611,8 @@ static void
nfsd_file_gc_worker(struct work_struct *work)
{
nfsd_file_gc();
- nfsd_file_schedule_laundrette();
+ if (list_lru_count(&nfsd_file_lru))
+ nfsd_file_schedule_laundrette();
}

static unsigned long
diff --git a/fs/nfsd/flexfilelayout.c b/fs/nfsd/flexfilelayout.c
index 070f90ed09b6..3ca5304440ff 100644
--- a/fs/nfsd/flexfilelayout.c
+++ b/fs/nfsd/flexfilelayout.c
@@ -15,6 +15,7 @@

#include "flexfilelayoutxdr.h"
#include "pnfs.h"
+#include "vfs.h"

#define NFSDDBG_FACILITY NFSDDBG_PNFS

diff --git a/fs/nfsd/netns.h b/fs/nfsd/netns.h
index 8c854ba3285b..51a4b7885cae 100644
--- a/fs/nfsd/netns.h
+++ b/fs/nfsd/netns.h
@@ -195,7 +195,7 @@ struct nfsd_net {

atomic_t nfsd_courtesy_clients;
struct shrinker nfsd_client_shrinker;
- struct delayed_work nfsd_shrinker_work;
+ struct work_struct nfsd_shrinker_work;
};

/* Simple check to find out if a given net was properly initialized */
diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c
index 39989c14c8a1..4eae2c5af2ed 100644
--- a/fs/nfsd/nfs4callback.c
+++ b/fs/nfsd/nfs4callback.c
@@ -76,6 +76,17 @@ static __be32 *xdr_encode_empty_array(__be32 *p)
* 1 Protocol"
*/

+static void encode_uint32(struct xdr_stream *xdr, u32 n)
+{
+ WARN_ON_ONCE(xdr_stream_encode_u32(xdr, n) < 0);
+}
+
+static void encode_bitmap4(struct xdr_stream *xdr, const __u32 *bitmap,
+ size_t len)
+{
+ WARN_ON_ONCE(xdr_stream_encode_uint32_array(xdr, bitmap, len) < 0);
+}
+
/*
* nfs_cb_opnum4
*
@@ -328,6 +339,24 @@ static void encode_cb_recall4args(struct xdr_stream *xdr,
hdr->nops++;
}

+/*
+ * CB_RECALLANY4args
+ *
+ * struct CB_RECALLANY4args {
+ * uint32_t craa_objects_to_keep;
+ * bitmap4 craa_type_mask;
+ * };
+ */
+static void
+encode_cb_recallany4args(struct xdr_stream *xdr,
+ struct nfs4_cb_compound_hdr *hdr, struct nfsd4_cb_recall_any *ra)
+{
+ encode_nfs_cb_opnum4(xdr, OP_CB_RECALL_ANY);
+ encode_uint32(xdr, ra->ra_keep);
+ encode_bitmap4(xdr, ra->ra_bmval, ARRAY_SIZE(ra->ra_bmval));
+ hdr->nops++;
+}
+
/*
* CB_SEQUENCE4args
*
@@ -482,6 +511,26 @@ static void nfs4_xdr_enc_cb_recall(struct rpc_rqst *req, struct xdr_stream *xdr,
encode_cb_nops(&hdr);
}

+/*
+ * 20.6. Operation 8: CB_RECALL_ANY - Keep Any N Recallable Objects
+ */
+static void
+nfs4_xdr_enc_cb_recall_any(struct rpc_rqst *req,
+ struct xdr_stream *xdr, const void *data)
+{
+ const struct nfsd4_callback *cb = data;
+ struct nfsd4_cb_recall_any *ra;
+ struct nfs4_cb_compound_hdr hdr = {
+ .ident = cb->cb_clp->cl_cb_ident,
+ .minorversion = cb->cb_clp->cl_minorversion,
+ };
+
+ ra = container_of(cb, struct nfsd4_cb_recall_any, ra_cb);
+ encode_cb_compound4args(xdr, &hdr);
+ encode_cb_sequence4args(xdr, cb, &hdr);
+ encode_cb_recallany4args(xdr, &hdr, ra);
+ encode_cb_nops(&hdr);
+}

/*
* NFSv4.0 and NFSv4.1 XDR decode functions
@@ -520,6 +569,28 @@ static int nfs4_xdr_dec_cb_recall(struct rpc_rqst *rqstp,
return decode_cb_op_status(xdr, OP_CB_RECALL, &cb->cb_status);
}

+/*
+ * 20.6. Operation 8: CB_RECALL_ANY - Keep Any N Recallable Objects
+ */
+static int
+nfs4_xdr_dec_cb_recall_any(struct rpc_rqst *rqstp,
+ struct xdr_stream *xdr,
+ void *data)
+{
+ struct nfsd4_callback *cb = data;
+ struct nfs4_cb_compound_hdr hdr;
+ int status;
+
+ status = decode_cb_compound4res(xdr, &hdr);
+ if (unlikely(status))
+ return status;
+ status = decode_cb_sequence4res(xdr, cb);
+ if (unlikely(status || cb->cb_seq_status))
+ return status;
+ status = decode_cb_op_status(xdr, OP_CB_RECALL_ANY, &cb->cb_status);
+ return status;
+}
+
#ifdef CONFIG_NFSD_PNFS
/*
* CB_LAYOUTRECALL4args
@@ -783,6 +854,7 @@ static const struct rpc_procinfo nfs4_cb_procedures[] = {
#endif
PROC(CB_NOTIFY_LOCK, COMPOUND, cb_notify_lock, cb_notify_lock),
PROC(CB_OFFLOAD, COMPOUND, cb_offload, cb_offload),
+ PROC(CB_RECALL_ANY, COMPOUND, cb_recall_any, cb_recall_any),
};

static unsigned int nfs4_cb_counts[ARRAY_SIZE(nfs4_cb_procedures)];
diff --git a/fs/nfsd/nfs4idmap.c b/fs/nfsd/nfs4idmap.c
index e70a1a2999b7..5e9809aff37e 100644
--- a/fs/nfsd/nfs4idmap.c
+++ b/fs/nfsd/nfs4idmap.c
@@ -41,6 +41,7 @@
#include "idmap.h"
#include "nfsd.h"
#include "netns.h"
+#include "vfs.h"

/*
* Turn off idmapping when using AUTH_SYS.
diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
index a9105e95b59c..ba53cd89ec62 100644
--- a/fs/nfsd/nfs4proc.c
+++ b/fs/nfsd/nfs4proc.c
@@ -943,12 +943,7 @@ nfsd4_read(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
status = nfs4_preprocess_stateid_op(rqstp, cstate, &cstate->current_fh,
&read->rd_stateid, RD_STATE,
&read->rd_nf, NULL);
- if (status) {
- dprintk("NFSD: nfsd4_read: couldn't process stateid!\n");
- goto out;
- }
- status = nfs_ok;
-out:
+
read->rd_rqstp = rqstp;
read->rd_fhp = &cstate->current_fh;
return status;
@@ -1117,10 +1112,8 @@ nfsd4_setattr(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
status = nfs4_preprocess_stateid_op(rqstp, cstate,
&cstate->current_fh, &setattr->sa_stateid,
WR_STATE, NULL, NULL);
- if (status) {
- dprintk("NFSD: nfsd4_setattr: couldn't process stateid!\n");
+ if (status)
return status;
- }
}
err = fh_want_write(&cstate->current_fh);
if (err)
@@ -1170,10 +1163,8 @@ nfsd4_write(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
write->wr_offset, cnt);
status = nfs4_preprocess_stateid_op(rqstp, cstate, &cstate->current_fh,
stateid, WR_STATE, &nf, NULL);
- if (status) {
- dprintk("NFSD: nfsd4_write: couldn't process stateid!\n");
+ if (status)
return status;
- }

write->wr_how_written = write->wr_stable_how;

@@ -1204,17 +1195,13 @@ nfsd4_verify_copy(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,

status = nfs4_preprocess_stateid_op(rqstp, cstate, &cstate->save_fh,
src_stateid, RD_STATE, src, NULL);
- if (status) {
- dprintk("NFSD: %s: couldn't process src stateid!\n", __func__);
+ if (status)
goto out;
- }

status = nfs4_preprocess_stateid_op(rqstp, cstate, &cstate->current_fh,
dst_stateid, WR_STATE, dst, NULL);
- if (status) {
- dprintk("NFSD: %s: couldn't process dst stateid!\n", __func__);
+ if (status)
goto out_put_src;
- }

/* fix up for NFS-specific error code */
if (!S_ISREG(file_inode((*src)->nf_file)->i_mode) ||
@@ -1935,10 +1922,8 @@ nfsd4_fallocate(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
status = nfs4_preprocess_stateid_op(rqstp, cstate, &cstate->current_fh,
&fallocate->falloc_stateid,
WR_STATE, &nf, NULL);
- if (status != nfs_ok) {
- dprintk("NFSD: nfsd4_fallocate: couldn't process stateid!\n");
+ if (status != nfs_ok)
return status;
- }

status = nfsd4_vfs_fallocate(rqstp, &cstate->current_fh, nf->nf_file,
fallocate->falloc_offset,
@@ -1994,10 +1979,8 @@ nfsd4_seek(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
status = nfs4_preprocess_stateid_op(rqstp, cstate, &cstate->current_fh,
&seek->seek_stateid,
RD_STATE, &nf, NULL);
- if (status) {
- dprintk("NFSD: nfsd4_seek: couldn't process stateid!\n");
+ if (status)
return status;
- }

switch (seek->seek_whence) {
case NFS4_CONTENT_DATA:
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index b3f6dda930d8..b9d694ec25d1 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -44,7 +44,9 @@
#include <linux/jhash.h>
#include <linux/string_helpers.h>
#include <linux/fsnotify.h>
+#include <linux/rhashtable.h>
#include <linux/nfs_ssc.h>
+
#include "xdr4.h"
#include "xdr4cb.h"
#include "vfs.h"
@@ -84,6 +86,7 @@ static bool check_for_locks(struct nfs4_file *fp, struct nfs4_lockowner *lowner)
static void nfs4_free_ol_stateid(struct nfs4_stid *stid);
void nfsd4_end_grace(struct nfsd_net *nn);
static void _free_cpntf_state_locked(struct nfsd_net *nn, struct nfs4_cpntf_state *cps);
+static void nfsd4_file_hash_remove(struct nfs4_file *fi);

/* Locking: */

@@ -588,11 +591,8 @@ static void nfsd4_free_file_rcu(struct rcu_head *rcu)
void
put_nfs4_file(struct nfs4_file *fi)
{
- might_lock(&state_lock);
-
- if (refcount_dec_and_lock(&fi->fi_ref, &state_lock)) {
- hlist_del_rcu(&fi->fi_hash);
- spin_unlock(&state_lock);
+ if (refcount_dec_and_test(&fi->fi_ref)) {
+ nfsd4_file_hash_remove(fi);
WARN_ON_ONCE(!list_empty(&fi->fi_clnt_odstate));
WARN_ON_ONCE(!list_empty(&fi->fi_delegations));
call_rcu(&fi->fi_rcu, nfsd4_free_file_rcu);
@@ -717,19 +717,20 @@ static unsigned int ownerstr_hashval(struct xdr_netobj *ownername)
return ret & OWNER_HASH_MASK;
}

-/* hash table for nfs4_file */
-#define FILE_HASH_BITS 8
-#define FILE_HASH_SIZE (1 << FILE_HASH_BITS)
-
-static unsigned int file_hashval(struct svc_fh *fh)
-{
- struct inode *inode = d_inode(fh->fh_dentry);
+static struct rhltable nfs4_file_rhltable ____cacheline_aligned_in_smp;

- /* XXX: why not (here & in file cache) use inode? */
- return (unsigned int)hash_long(inode->i_ino, FILE_HASH_BITS);
-}
+static const struct rhashtable_params nfs4_file_rhash_params = {
+ .key_len = sizeof_field(struct nfs4_file, fi_inode),
+ .key_offset = offsetof(struct nfs4_file, fi_inode),
+ .head_offset = offsetof(struct nfs4_file, fi_rlist),

-static struct hlist_head file_hashtbl[FILE_HASH_SIZE];
+ /*
+ * Start with a single page hash table to reduce resizing churn
+ * on light workloads.
+ */
+ .min_size = 256,
+ .automatic_shrinking = true,
+};

/*
* Check if courtesy clients have conflicting access and resolve it if possible
@@ -1367,6 +1368,8 @@ static void revoke_delegation(struct nfs4_delegation *dp)

WARN_ON(!list_empty(&dp->dl_recall_lru));

+ trace_nfsd_stid_revoke(&dp->dl_stid);
+
if (clp->cl_minorversion) {
spin_lock(&clp->cl_lock);
dp->dl_stid.sc_type = NFS4_REVOKED_DELEG_STID;
@@ -1831,13 +1834,12 @@ static struct nfsd4_session *alloc_session(struct nfsd4_channel_attrs *fattrs,
int numslots = fattrs->maxreqs;
int slotsize = slot_bytes(fattrs);
struct nfsd4_session *new;
- int mem, i;
+ int i;

- BUILD_BUG_ON(NFSD_MAX_SLOTS_PER_SESSION * sizeof(struct nfsd4_slot *)
- + sizeof(struct nfsd4_session) > PAGE_SIZE);
- mem = numslots * sizeof(struct nfsd4_slot *);
+ BUILD_BUG_ON(struct_size(new, se_slots, NFSD_MAX_SLOTS_PER_SESSION)
+ > PAGE_SIZE);

- new = kzalloc(sizeof(*new) + mem, GFP_KERNEL);
+ new = kzalloc(struct_size(new, se_slots, numslots), GFP_KERNEL);
if (!new)
return NULL;
/* allocate each struct nfsd4_slot and data cache in one piece */
@@ -2143,6 +2145,7 @@ static void __free_client(struct kref *k)
kfree(clp->cl_nii_domain.data);
kfree(clp->cl_nii_name.data);
idr_destroy(&clp->cl_stateids);
+ kfree(clp->cl_ra);
kmem_cache_free(client_slab, clp);
}

@@ -2870,6 +2873,37 @@ static const struct tree_descr client_files[] = {
[3] = {""},
};

+static int
+nfsd4_cb_recall_any_done(struct nfsd4_callback *cb,
+ struct rpc_task *task)
+{
+ trace_nfsd_cb_recall_any_done(cb, task);
+ switch (task->tk_status) {
+ case -NFS4ERR_DELAY:
+ rpc_delay(task, 2 * HZ);
+ return 0;
+ default:
+ return 1;
+ }
+}
+
+static void
+nfsd4_cb_recall_any_release(struct nfsd4_callback *cb)
+{
+ struct nfs4_client *clp = cb->cb_clp;
+ struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id);
+
+ spin_lock(&nn->client_lock);
+ clear_bit(NFSD4_CLIENT_CB_RECALL_ANY, &clp->cl_flags);
+ put_client_renew_locked(clp);
+ spin_unlock(&nn->client_lock);
+}
+
+static const struct nfsd4_callback_ops nfsd4_cb_recall_any_ops = {
+ .done = nfsd4_cb_recall_any_done,
+ .release = nfsd4_cb_recall_any_release,
+};
+
static struct nfs4_client *create_client(struct xdr_netobj name,
struct svc_rqst *rqstp, nfs4_verifier *verf)
{
@@ -2907,6 +2941,14 @@ static struct nfs4_client *create_client(struct xdr_netobj name,
free_client(clp);
return NULL;
}
+ clp->cl_ra = kzalloc(sizeof(*clp->cl_ra), GFP_KERNEL);
+ if (!clp->cl_ra) {
+ free_client(clp);
+ return NULL;
+ }
+ clp->cl_ra_time = 0;
+ nfsd4_init_cb(&clp->cl_ra->ra_cb, clp, &nfsd4_cb_recall_any_ops,
+ NFSPROC4_CLNT_CB_RECALL_ANY);
return clp;
}

@@ -4276,11 +4318,9 @@ static struct nfs4_file *nfsd4_alloc_file(void)
}

/* OPEN Share state helper functions */
-static void nfsd4_init_file(struct svc_fh *fh, unsigned int hashval,
- struct nfs4_file *fp)
-{
- lockdep_assert_held(&state_lock);

+static void nfsd4_file_init(const struct svc_fh *fh, struct nfs4_file *fp)
+{
refcount_set(&fp->fi_ref, 1);
spin_lock_init(&fp->fi_lock);
INIT_LIST_HEAD(&fp->fi_stateids);
@@ -4298,7 +4338,6 @@ static void nfsd4_init_file(struct svc_fh *fh, unsigned int hashval,
INIT_LIST_HEAD(&fp->fi_lo_states);
atomic_set(&fp->fi_lo_recalls, 0);
#endif
- hlist_add_head_rcu(&fp->fi_hash, &file_hashtbl[hashval]);
}

void
@@ -4363,25 +4402,27 @@ nfsd4_init_slabs(void)
}

static unsigned long
-nfsd_courtesy_client_count(struct shrinker *shrink, struct shrink_control *sc)
+nfsd4_state_shrinker_count(struct shrinker *shrink, struct shrink_control *sc)
{
- int cnt;
+ int count;
struct nfsd_net *nn = container_of(shrink,
struct nfsd_net, nfsd_client_shrinker);

- cnt = atomic_read(&nn->nfsd_courtesy_clients);
- if (cnt > 0)
- mod_delayed_work(laundry_wq, &nn->nfsd_shrinker_work, 0);
- return (unsigned long)cnt;
+ count = atomic_read(&nn->nfsd_courtesy_clients);
+ if (!count)
+ count = atomic_long_read(&num_delegations);
+ if (count)
+ queue_work(laundry_wq, &nn->nfsd_shrinker_work);
+ return (unsigned long)count;
}

static unsigned long
-nfsd_courtesy_client_scan(struct shrinker *shrink, struct shrink_control *sc)
+nfsd4_state_shrinker_scan(struct shrinker *shrink, struct shrink_control *sc)
{
return SHRINK_STOP;
}

-int
+void
nfsd4_init_leases_net(struct nfsd_net *nn)
{
struct sysinfo si;
@@ -4403,16 +4444,6 @@ nfsd4_init_leases_net(struct nfsd_net *nn)
nn->nfs4_max_clients = max_t(int, max_clients, NFS4_CLIENTS_PER_GB);

atomic_set(&nn->nfsd_courtesy_clients, 0);
- nn->nfsd_client_shrinker.scan_objects = nfsd_courtesy_client_scan;
- nn->nfsd_client_shrinker.count_objects = nfsd_courtesy_client_count;
- nn->nfsd_client_shrinker.seeks = DEFAULT_SEEKS;
- return register_shrinker(&nn->nfsd_client_shrinker, "nfsd-client");
-}
-
-void
-nfsd4_leases_net_shutdown(struct nfsd_net *nn)
-{
- unregister_shrinker(&nn->nfsd_client_shrinker);
}

static void init_nfs4_replay(struct nfs4_replay *rp)
@@ -4683,71 +4714,80 @@ move_to_close_lru(struct nfs4_ol_stateid *s, struct net *net)
nfs4_put_stid(&last->st_stid);
}

-/* search file_hashtbl[] for file */
-static struct nfs4_file *
-find_file_locked(struct svc_fh *fh, unsigned int hashval)
+static noinline_for_stack struct nfs4_file *
+nfsd4_file_hash_lookup(const struct svc_fh *fhp)
{
- struct nfs4_file *fp;
+ struct inode *inode = d_inode(fhp->fh_dentry);
+ struct rhlist_head *tmp, *list;
+ struct nfs4_file *fi;

- hlist_for_each_entry_rcu(fp, &file_hashtbl[hashval], fi_hash,
- lockdep_is_held(&state_lock)) {
- if (fh_match(&fp->fi_fhandle, &fh->fh_handle)) {
- if (refcount_inc_not_zero(&fp->fi_ref))
- return fp;
+ rcu_read_lock();
+ list = rhltable_lookup(&nfs4_file_rhltable, &inode,
+ nfs4_file_rhash_params);
+ rhl_for_each_entry_rcu(fi, tmp, list, fi_rlist) {
+ if (fh_match(&fi->fi_fhandle, &fhp->fh_handle)) {
+ if (refcount_inc_not_zero(&fi->fi_ref)) {
+ rcu_read_unlock();
+ return fi;
+ }
}
}
+ rcu_read_unlock();
return NULL;
}

-static struct nfs4_file *insert_file(struct nfs4_file *new, struct svc_fh *fh,
- unsigned int hashval)
+/*
+ * On hash insertion, identify entries with the same inode but
+ * distinct filehandles. They will all be on the list returned
+ * by rhltable_lookup().
+ *
+ * inode->i_lock prevents racing insertions from adding an entry
+ * for the same inode/fhp pair twice.
+ */
+static noinline_for_stack struct nfs4_file *
+nfsd4_file_hash_insert(struct nfs4_file *new, const struct svc_fh *fhp)
{
- struct nfs4_file *fp;
+ struct inode *inode = d_inode(fhp->fh_dentry);
+ struct rhlist_head *tmp, *list;
struct nfs4_file *ret = NULL;
bool alias_found = false;
+ struct nfs4_file *fi;
+ int err;

- spin_lock(&state_lock);
- hlist_for_each_entry_rcu(fp, &file_hashtbl[hashval], fi_hash,
- lockdep_is_held(&state_lock)) {
- if (fh_match(&fp->fi_fhandle, &fh->fh_handle)) {
- if (refcount_inc_not_zero(&fp->fi_ref))
- ret = fp;
- } else if (d_inode(fh->fh_dentry) == fp->fi_inode)
- fp->fi_aliased = alias_found = true;
- }
- if (likely(ret == NULL)) {
- nfsd4_init_file(fh, hashval, new);
- new->fi_aliased = alias_found;
- ret = new;
+ rcu_read_lock();
+ spin_lock(&inode->i_lock);
+
+ list = rhltable_lookup(&nfs4_file_rhltable, &inode,
+ nfs4_file_rhash_params);
+ rhl_for_each_entry_rcu(fi, tmp, list, fi_rlist) {
+ if (fh_match(&fi->fi_fhandle, &fhp->fh_handle)) {
+ if (refcount_inc_not_zero(&fi->fi_ref))
+ ret = fi;
+ } else
+ fi->fi_aliased = alias_found = true;
}
- spin_unlock(&state_lock);
- return ret;
-}
+ if (ret)
+ goto out_unlock;

-static struct nfs4_file * find_file(struct svc_fh *fh)
-{
- struct nfs4_file *fp;
- unsigned int hashval = file_hashval(fh);
+ nfsd4_file_init(fhp, new);
+ err = rhltable_insert(&nfs4_file_rhltable, &new->fi_rlist,
+ nfs4_file_rhash_params);
+ if (err)
+ goto out_unlock;

- rcu_read_lock();
- fp = find_file_locked(fh, hashval);
+ new->fi_aliased = alias_found;
+ ret = new;
+
+out_unlock:
+ spin_unlock(&inode->i_lock);
rcu_read_unlock();
- return fp;
+ return ret;
}

-static struct nfs4_file *
-find_or_add_file(struct nfs4_file *new, struct svc_fh *fh)
+static noinline_for_stack void nfsd4_file_hash_remove(struct nfs4_file *fi)
{
- struct nfs4_file *fp;
- unsigned int hashval = file_hashval(fh);
-
- rcu_read_lock();
- fp = find_file_locked(fh, hashval);
- rcu_read_unlock();
- if (fp)
- return fp;
-
- return insert_file(new, fh, hashval);
+ rhltable_remove(&nfs4_file_rhltable, &fi->fi_rlist,
+ nfs4_file_rhash_params);
}

/*
@@ -4760,9 +4800,10 @@ nfs4_share_conflict(struct svc_fh *current_fh, unsigned int deny_type)
struct nfs4_file *fp;
__be32 ret = nfs_ok;

- fp = find_file(current_fh);
+ fp = nfsd4_file_hash_lookup(current_fh);
if (!fp)
return ret;
+
/* Check for conflicting share reservations */
spin_lock(&fp->fi_lock);
if (fp->fi_share_deny & deny_type)
@@ -4774,7 +4815,7 @@ nfs4_share_conflict(struct svc_fh *current_fh, unsigned int deny_type)

static bool nfsd4_deleg_present(const struct inode *inode)
{
- struct file_lock_context *ctx = smp_load_acquire(&inode->i_flctx);
+ struct file_lock_context *ctx = locks_inode_context(inode);

return ctx && !list_empty_careful(&ctx->flc_lease);
}
@@ -5655,7 +5696,9 @@ nfsd4_process_open2(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nf
* and check for delegations in the process of being recalled.
* If not found, create the nfs4_file struct
*/
- fp = find_or_add_file(open->op_file, current_fh);
+ fp = nfsd4_file_hash_insert(open->op_file, current_fh);
+ if (unlikely(!fp))
+ return nfserr_jukebox;
if (fp != open->op_file) {
status = nfs4_check_deleg(cl, open, &dp);
if (status)
@@ -5932,7 +5975,7 @@ nfs4_lockowner_has_blockers(struct nfs4_lockowner *lo)

list_for_each_entry(stp, &lo->lo_owner.so_stateids, st_perstateowner) {
nf = stp->st_stid.sc_file;
- ctx = nf->fi_inode->i_flctx;
+ ctx = locks_inode_context(nf->fi_inode);
if (!ctx)
continue;
if (locks_owner_has_blockers(ctx, lo))
@@ -6160,17 +6203,63 @@ laundromat_main(struct work_struct *laundry)
}

static void
-courtesy_client_reaper(struct work_struct *reaper)
+courtesy_client_reaper(struct nfsd_net *nn)
{
struct list_head reaplist;
- struct delayed_work *dwork = to_delayed_work(reaper);
- struct nfsd_net *nn = container_of(dwork, struct nfsd_net,
- nfsd_shrinker_work);

nfs4_get_courtesy_client_reaplist(nn, &reaplist);
nfs4_process_client_reaplist(&reaplist);
}

+static void
+deleg_reaper(struct nfsd_net *nn)
+{
+ struct list_head *pos, *next;
+ struct nfs4_client *clp;
+ struct list_head cblist;
+
+ INIT_LIST_HEAD(&cblist);
+ spin_lock(&nn->client_lock);
+ list_for_each_safe(pos, next, &nn->client_lru) {
+ clp = list_entry(pos, struct nfs4_client, cl_lru);
+ if (clp->cl_state != NFSD4_ACTIVE ||
+ list_empty(&clp->cl_delegations) ||
+ atomic_read(&clp->cl_delegs_in_recall) ||
+ test_bit(NFSD4_CLIENT_CB_RECALL_ANY, &clp->cl_flags) ||
+ (ktime_get_boottime_seconds() -
+ clp->cl_ra_time < 5)) {
+ continue;
+ }
+ list_add(&clp->cl_ra_cblist, &cblist);
+
+ /* release in nfsd4_cb_recall_any_release */
+ atomic_inc(&clp->cl_rpc_users);
+ set_bit(NFSD4_CLIENT_CB_RECALL_ANY, &clp->cl_flags);
+ clp->cl_ra_time = ktime_get_boottime_seconds();
+ }
+ spin_unlock(&nn->client_lock);
+
+ while (!list_empty(&cblist)) {
+ clp = list_first_entry(&cblist, struct nfs4_client,
+ cl_ra_cblist);
+ list_del_init(&clp->cl_ra_cblist);
+ clp->cl_ra->ra_keep = 0;
+ clp->cl_ra->ra_bmval[0] = BIT(RCA4_TYPE_MASK_RDATA_DLG);
+ trace_nfsd_cb_recall_any(clp->cl_ra);
+ nfsd4_run_cb(&clp->cl_ra->ra_cb);
+ }
+}
+
+static void
+nfsd4_state_shrinker_worker(struct work_struct *work)
+{
+ struct nfsd_net *nn = container_of(work, struct nfsd_net,
+ nfsd_shrinker_work);
+
+ courtesy_client_reaper(nn);
+ deleg_reaper(nn);
+}
+
static inline __be32 nfs4_check_fh(struct svc_fh *fhp, struct nfs4_stid *stp)
{
if (!fh_match(&fhp->fh_handle, &stp->sc_file->fi_fhandle))
@@ -6935,6 +7024,7 @@ nfsd4_delegreturn(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
if (status)
goto put_stateid;

+ trace_nfsd_deleg_return(stateid);
wake_up_var(d_inode(cstate->current_fh.fh_dentry));
destroy_delegation(dp);
put_stateid:
@@ -7748,7 +7838,7 @@ check_for_locks(struct nfs4_file *fp, struct nfs4_lockowner *lowner)
}

inode = locks_inode(nf->nf_file);
- flctx = inode->i_flctx;
+ flctx = locks_inode_context(inode);

if (flctx && !list_empty_careful(&flctx->flc_posix)) {
spin_lock(&flctx->flc_lock);
@@ -7995,11 +8085,20 @@ static int nfs4_state_create_net(struct net *net)
INIT_LIST_HEAD(&nn->blocked_locks_lru);

INIT_DELAYED_WORK(&nn->laundromat_work, laundromat_main);
- INIT_DELAYED_WORK(&nn->nfsd_shrinker_work, courtesy_client_reaper);
+ INIT_WORK(&nn->nfsd_shrinker_work, nfsd4_state_shrinker_worker);
get_net(net);

+ nn->nfsd_client_shrinker.scan_objects = nfsd4_state_shrinker_scan;
+ nn->nfsd_client_shrinker.count_objects = nfsd4_state_shrinker_count;
+ nn->nfsd_client_shrinker.seeks = DEFAULT_SEEKS;
+
+ if (register_shrinker(&nn->nfsd_client_shrinker, "nfsd-client"))
+ goto err_shrinker;
return 0;

+err_shrinker:
+ put_net(net);
+ kfree(nn->sessionid_hashtbl);
err_sessionid:
kfree(nn->unconf_id_hashtbl);
err_unconf_id:
@@ -8071,10 +8170,16 @@ nfs4_state_start(void)
{
int ret;

- ret = nfsd4_create_callback_queue();
+ ret = rhltable_init(&nfs4_file_rhltable, &nfs4_file_rhash_params);
if (ret)
return ret;

+ ret = nfsd4_create_callback_queue();
+ if (ret) {
+ rhltable_destroy(&nfs4_file_rhltable);
+ return ret;
+ }
+
set_max_delegations();
return 0;
}
@@ -8086,6 +8191,8 @@ nfs4_state_shutdown_net(struct net *net)
struct list_head *pos, *next, reaplist;
struct nfsd_net *nn = net_generic(net, nfsd_net_id);

+ unregister_shrinker(&nn->nfsd_client_shrinker);
+ cancel_work(&nn->nfsd_shrinker_work);
cancel_delayed_work_sync(&nn->laundromat_work);
locks_end_grace(&nn->nfsd4_manager);

@@ -8114,6 +8221,7 @@ void
nfs4_state_shutdown(void)
{
nfsd4_destroy_callback_queue();
+ rhltable_destroy(&nfs4_file_rhltable);
}

static void
diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
index 89a579be042e..597f14a80512 100644
--- a/fs/nfsd/nfs4xdr.c
+++ b/fs/nfsd/nfs4xdr.c
@@ -770,16 +770,18 @@ nfsd4_decode_cb_sec(struct nfsd4_compoundargs *argp, struct nfsd4_cb_sec *cbs)

static __be32
nfsd4_decode_access(struct nfsd4_compoundargs *argp,
- struct nfsd4_access *access)
+ union nfsd4_op_u *u)
{
+ struct nfsd4_access *access = &u->access;
if (xdr_stream_decode_u32(argp->xdr, &access->ac_req_access) < 0)
return nfserr_bad_xdr;
return nfs_ok;
}

static __be32
-nfsd4_decode_close(struct nfsd4_compoundargs *argp, struct nfsd4_close *close)
+nfsd4_decode_close(struct nfsd4_compoundargs *argp, union nfsd4_op_u *u)
{
+ struct nfsd4_close *close = &u->close;
if (xdr_stream_decode_u32(argp->xdr, &close->cl_seqid) < 0)
return nfserr_bad_xdr;
return nfsd4_decode_stateid4(argp, &close->cl_stateid);
@@ -787,8 +789,9 @@ nfsd4_decode_close(struct nfsd4_compoundargs *argp, struct nfsd4_close *close)


static __be32
-nfsd4_decode_commit(struct nfsd4_compoundargs *argp, struct nfsd4_commit *commit)
+nfsd4_decode_commit(struct nfsd4_compoundargs *argp, union nfsd4_op_u *u)
{
+ struct nfsd4_commit *commit = &u->commit;
if (xdr_stream_decode_u64(argp->xdr, &commit->co_offset) < 0)
return nfserr_bad_xdr;
if (xdr_stream_decode_u32(argp->xdr, &commit->co_count) < 0)
@@ -798,8 +801,9 @@ nfsd4_decode_commit(struct nfsd4_compoundargs *argp, struct nfsd4_commit *commit
}

static __be32
-nfsd4_decode_create(struct nfsd4_compoundargs *argp, struct nfsd4_create *create)
+nfsd4_decode_create(struct nfsd4_compoundargs *argp, union nfsd4_op_u *u)
{
+ struct nfsd4_create *create = &u->create;
__be32 *p, status;

memset(create, 0, sizeof(*create));
@@ -844,22 +848,25 @@ nfsd4_decode_create(struct nfsd4_compoundargs *argp, struct nfsd4_create *create
}

static inline __be32
-nfsd4_decode_delegreturn(struct nfsd4_compoundargs *argp, struct nfsd4_delegreturn *dr)
+nfsd4_decode_delegreturn(struct nfsd4_compoundargs *argp, union nfsd4_op_u *u)
{
+ struct nfsd4_delegreturn *dr = &u->delegreturn;
return nfsd4_decode_stateid4(argp, &dr->dr_stateid);
}

static inline __be32
-nfsd4_decode_getattr(struct nfsd4_compoundargs *argp, struct nfsd4_getattr *getattr)
+nfsd4_decode_getattr(struct nfsd4_compoundargs *argp, union nfsd4_op_u *u)
{
+ struct nfsd4_getattr *getattr = &u->getattr;
memset(getattr, 0, sizeof(*getattr));
return nfsd4_decode_bitmap4(argp, getattr->ga_bmval,
ARRAY_SIZE(getattr->ga_bmval));
}

static __be32
-nfsd4_decode_link(struct nfsd4_compoundargs *argp, struct nfsd4_link *link)
+nfsd4_decode_link(struct nfsd4_compoundargs *argp, union nfsd4_op_u *u)
{
+ struct nfsd4_link *link = &u->link;
memset(link, 0, sizeof(*link));
return nfsd4_decode_component4(argp, &link->li_name, &link->li_namelen);
}
@@ -907,8 +914,9 @@ nfsd4_decode_locker4(struct nfsd4_compoundargs *argp, struct nfsd4_lock *lock)
}

static __be32
-nfsd4_decode_lock(struct nfsd4_compoundargs *argp, struct nfsd4_lock *lock)
+nfsd4_decode_lock(struct nfsd4_compoundargs *argp, union nfsd4_op_u *u)
{
+ struct nfsd4_lock *lock = &u->lock;
memset(lock, 0, sizeof(*lock));
if (xdr_stream_decode_u32(argp->xdr, &lock->lk_type) < 0)
return nfserr_bad_xdr;
@@ -924,8 +932,9 @@ nfsd4_decode_lock(struct nfsd4_compoundargs *argp, struct nfsd4_lock *lock)
}

static __be32
-nfsd4_decode_lockt(struct nfsd4_compoundargs *argp, struct nfsd4_lockt *lockt)
+nfsd4_decode_lockt(struct nfsd4_compoundargs *argp, union nfsd4_op_u *u)
{
+ struct nfsd4_lockt *lockt = &u->lockt;
memset(lockt, 0, sizeof(*lockt));
if (xdr_stream_decode_u32(argp->xdr, &lockt->lt_type) < 0)
return nfserr_bad_xdr;
@@ -940,8 +949,9 @@ nfsd4_decode_lockt(struct nfsd4_compoundargs *argp, struct nfsd4_lockt *lockt)
}

static __be32
-nfsd4_decode_locku(struct nfsd4_compoundargs *argp, struct nfsd4_locku *locku)
+nfsd4_decode_locku(struct nfsd4_compoundargs *argp, union nfsd4_op_u *u)
{
+ struct nfsd4_locku *locku = &u->locku;
__be32 status;

if (xdr_stream_decode_u32(argp->xdr, &locku->lu_type) < 0)
@@ -962,8 +972,9 @@ nfsd4_decode_locku(struct nfsd4_compoundargs *argp, struct nfsd4_locku *locku)
}

static __be32
-nfsd4_decode_lookup(struct nfsd4_compoundargs *argp, struct nfsd4_lookup *lookup)
+nfsd4_decode_lookup(struct nfsd4_compoundargs *argp, union nfsd4_op_u *u)
{
+ struct nfsd4_lookup *lookup = &u->lookup;
return nfsd4_decode_component4(argp, &lookup->lo_name, &lookup->lo_len);
}

@@ -1143,8 +1154,9 @@ nfsd4_decode_open_claim4(struct nfsd4_compoundargs *argp,
}

static __be32
-nfsd4_decode_open(struct nfsd4_compoundargs *argp, struct nfsd4_open *open)
+nfsd4_decode_open(struct nfsd4_compoundargs *argp, union nfsd4_op_u *u)
{
+ struct nfsd4_open *open = &u->open;
__be32 status;
u32 dummy;

@@ -1171,8 +1183,10 @@ nfsd4_decode_open(struct nfsd4_compoundargs *argp, struct nfsd4_open *open)
}

static __be32
-nfsd4_decode_open_confirm(struct nfsd4_compoundargs *argp, struct nfsd4_open_confirm *open_conf)
+nfsd4_decode_open_confirm(struct nfsd4_compoundargs *argp,
+ union nfsd4_op_u *u)
{
+ struct nfsd4_open_confirm *open_conf = &u->open_confirm;
__be32 status;

if (argp->minorversion >= 1)
@@ -1190,8 +1204,10 @@ nfsd4_decode_open_confirm(struct nfsd4_compoundargs *argp, struct nfsd4_open_con
}

static __be32
-nfsd4_decode_open_downgrade(struct nfsd4_compoundargs *argp, struct nfsd4_open_downgrade *open_down)
+nfsd4_decode_open_downgrade(struct nfsd4_compoundargs *argp,
+ union nfsd4_op_u *u)
{
+ struct nfsd4_open_downgrade *open_down = &u->open_downgrade;
__be32 status;

memset(open_down, 0, sizeof(*open_down));
@@ -1209,8 +1225,9 @@ nfsd4_decode_open_downgrade(struct nfsd4_compoundargs *argp, struct nfsd4_open_d
}

static __be32
-nfsd4_decode_putfh(struct nfsd4_compoundargs *argp, struct nfsd4_putfh *putfh)
+nfsd4_decode_putfh(struct nfsd4_compoundargs *argp, union nfsd4_op_u *u)
{
+ struct nfsd4_putfh *putfh = &u->putfh;
__be32 *p;

if (xdr_stream_decode_u32(argp->xdr, &putfh->pf_fhlen) < 0)
@@ -1229,7 +1246,7 @@ nfsd4_decode_putfh(struct nfsd4_compoundargs *argp, struct nfsd4_putfh *putfh)
}

static __be32
-nfsd4_decode_putpubfh(struct nfsd4_compoundargs *argp, void *p)
+nfsd4_decode_putpubfh(struct nfsd4_compoundargs *argp, union nfsd4_op_u *p)
{
if (argp->minorversion == 0)
return nfs_ok;
@@ -1237,8 +1254,9 @@ nfsd4_decode_putpubfh(struct nfsd4_compoundargs *argp, void *p)
}

static __be32
-nfsd4_decode_read(struct nfsd4_compoundargs *argp, struct nfsd4_read *read)
+nfsd4_decode_read(struct nfsd4_compoundargs *argp, union nfsd4_op_u *u)
{
+ struct nfsd4_read *read = &u->read;
__be32 status;

memset(read, 0, sizeof(*read));
@@ -1254,8 +1272,9 @@ nfsd4_decode_read(struct nfsd4_compoundargs *argp, struct nfsd4_read *read)
}

static __be32
-nfsd4_decode_readdir(struct nfsd4_compoundargs *argp, struct nfsd4_readdir *readdir)
+nfsd4_decode_readdir(struct nfsd4_compoundargs *argp, union nfsd4_op_u *u)
{
+ struct nfsd4_readdir *readdir = &u->readdir;
__be32 status;

memset(readdir, 0, sizeof(*readdir));
@@ -1276,15 +1295,17 @@ nfsd4_decode_readdir(struct nfsd4_compoundargs *argp, struct nfsd4_readdir *read
}

static __be32
-nfsd4_decode_remove(struct nfsd4_compoundargs *argp, struct nfsd4_remove *remove)
+nfsd4_decode_remove(struct nfsd4_compoundargs *argp, union nfsd4_op_u *u)
{
+ struct nfsd4_remove *remove = &u->remove;
memset(&remove->rm_cinfo, 0, sizeof(remove->rm_cinfo));
return nfsd4_decode_component4(argp, &remove->rm_name, &remove->rm_namelen);
}

static __be32
-nfsd4_decode_rename(struct nfsd4_compoundargs *argp, struct nfsd4_rename *rename)
+nfsd4_decode_rename(struct nfsd4_compoundargs *argp, union nfsd4_op_u *u)
{
+ struct nfsd4_rename *rename = &u->rename;
__be32 status;

memset(rename, 0, sizeof(*rename));
@@ -1295,22 +1316,25 @@ nfsd4_decode_rename(struct nfsd4_compoundargs *argp, struct nfsd4_rename *rename
}

static __be32
-nfsd4_decode_renew(struct nfsd4_compoundargs *argp, clientid_t *clientid)
+nfsd4_decode_renew(struct nfsd4_compoundargs *argp, union nfsd4_op_u *u)
{
+ clientid_t *clientid = &u->renew;
return nfsd4_decode_clientid4(argp, clientid);
}

static __be32
nfsd4_decode_secinfo(struct nfsd4_compoundargs *argp,
- struct nfsd4_secinfo *secinfo)
+ union nfsd4_op_u *u)
{
+ struct nfsd4_secinfo *secinfo = &u->secinfo;
secinfo->si_exp = NULL;
return nfsd4_decode_component4(argp, &secinfo->si_name, &secinfo->si_namelen);
}

static __be32
-nfsd4_decode_setattr(struct nfsd4_compoundargs *argp, struct nfsd4_setattr *setattr)
+nfsd4_decode_setattr(struct nfsd4_compoundargs *argp, union nfsd4_op_u *u)
{
+ struct nfsd4_setattr *setattr = &u->setattr;
__be32 status;

memset(setattr, 0, sizeof(*setattr));
@@ -1324,8 +1348,9 @@ nfsd4_decode_setattr(struct nfsd4_compoundargs *argp, struct nfsd4_setattr *seta
}

static __be32
-nfsd4_decode_setclientid(struct nfsd4_compoundargs *argp, struct nfsd4_setclientid *setclientid)
+nfsd4_decode_setclientid(struct nfsd4_compoundargs *argp, union nfsd4_op_u *u)
{
+ struct nfsd4_setclientid *setclientid = &u->setclientid;
__be32 *p, status;

memset(setclientid, 0, sizeof(*setclientid));
@@ -1367,8 +1392,10 @@ nfsd4_decode_setclientid(struct nfsd4_compoundargs *argp, struct nfsd4_setclient
}

static __be32
-nfsd4_decode_setclientid_confirm(struct nfsd4_compoundargs *argp, struct nfsd4_setclientid_confirm *scd_c)
+nfsd4_decode_setclientid_confirm(struct nfsd4_compoundargs *argp,
+ union nfsd4_op_u *u)
{
+ struct nfsd4_setclientid_confirm *scd_c = &u->setclientid_confirm;
__be32 status;

if (argp->minorversion >= 1)
@@ -1382,8 +1409,9 @@ nfsd4_decode_setclientid_confirm(struct nfsd4_compoundargs *argp, struct nfsd4_s

/* Also used for NVERIFY */
static __be32
-nfsd4_decode_verify(struct nfsd4_compoundargs *argp, struct nfsd4_verify *verify)
+nfsd4_decode_verify(struct nfsd4_compoundargs *argp, union nfsd4_op_u *u)
{
+ struct nfsd4_verify *verify = &u->verify;
__be32 *p, status;

memset(verify, 0, sizeof(*verify));
@@ -1409,8 +1437,9 @@ nfsd4_decode_verify(struct nfsd4_compoundargs *argp, struct nfsd4_verify *verify
}

static __be32
-nfsd4_decode_write(struct nfsd4_compoundargs *argp, struct nfsd4_write *write)
+nfsd4_decode_write(struct nfsd4_compoundargs *argp, union nfsd4_op_u *u)
{
+ struct nfsd4_write *write = &u->write;
__be32 status;

status = nfsd4_decode_stateid4(argp, &write->wr_stateid);
@@ -1434,8 +1463,10 @@ nfsd4_decode_write(struct nfsd4_compoundargs *argp, struct nfsd4_write *write)
}

static __be32
-nfsd4_decode_release_lockowner(struct nfsd4_compoundargs *argp, struct nfsd4_release_lockowner *rlockowner)
+nfsd4_decode_release_lockowner(struct nfsd4_compoundargs *argp,
+ union nfsd4_op_u *u)
{
+ struct nfsd4_release_lockowner *rlockowner = &u->release_lockowner;
__be32 status;

if (argp->minorversion >= 1)
@@ -1452,16 +1483,20 @@ nfsd4_decode_release_lockowner(struct nfsd4_compoundargs *argp, struct nfsd4_rel
return nfs_ok;
}

-static __be32 nfsd4_decode_backchannel_ctl(struct nfsd4_compoundargs *argp, struct nfsd4_backchannel_ctl *bc)
+static __be32 nfsd4_decode_backchannel_ctl(struct nfsd4_compoundargs *argp,
+ union nfsd4_op_u *u)
{
+ struct nfsd4_backchannel_ctl *bc = &u->backchannel_ctl;
memset(bc, 0, sizeof(*bc));
if (xdr_stream_decode_u32(argp->xdr, &bc->bc_cb_program) < 0)
return nfserr_bad_xdr;
return nfsd4_decode_cb_sec(argp, &bc->bc_cb_sec);
}

-static __be32 nfsd4_decode_bind_conn_to_session(struct nfsd4_compoundargs *argp, struct nfsd4_bind_conn_to_session *bcts)
+static __be32 nfsd4_decode_bind_conn_to_session(struct nfsd4_compoundargs *argp,
+ union nfsd4_op_u *u)
{
+ struct nfsd4_bind_conn_to_session *bcts = &u->bind_conn_to_session;
u32 use_conn_in_rdma_mode;
__be32 status;

@@ -1603,8 +1638,9 @@ nfsd4_decode_nfs_impl_id4(struct nfsd4_compoundargs *argp,

static __be32
nfsd4_decode_exchange_id(struct nfsd4_compoundargs *argp,
- struct nfsd4_exchange_id *exid)
+ union nfsd4_op_u *u)
{
+ struct nfsd4_exchange_id *exid = &u->exchange_id;
__be32 status;

memset(exid, 0, sizeof(*exid));
@@ -1656,8 +1692,9 @@ nfsd4_decode_channel_attrs4(struct nfsd4_compoundargs *argp,

static __be32
nfsd4_decode_create_session(struct nfsd4_compoundargs *argp,
- struct nfsd4_create_session *sess)
+ union nfsd4_op_u *u)
{
+ struct nfsd4_create_session *sess = &u->create_session;
__be32 status;

memset(sess, 0, sizeof(*sess));
@@ -1681,23 +1718,26 @@ nfsd4_decode_create_session(struct nfsd4_compoundargs *argp,

static __be32
nfsd4_decode_destroy_session(struct nfsd4_compoundargs *argp,
- struct nfsd4_destroy_session *destroy_session)
+ union nfsd4_op_u *u)
{
+ struct nfsd4_destroy_session *destroy_session = &u->destroy_session;
return nfsd4_decode_sessionid4(argp, &destroy_session->sessionid);
}

static __be32
nfsd4_decode_free_stateid(struct nfsd4_compoundargs *argp,
- struct nfsd4_free_stateid *free_stateid)
+ union nfsd4_op_u *u)
{
+ struct nfsd4_free_stateid *free_stateid = &u->free_stateid;
return nfsd4_decode_stateid4(argp, &free_stateid->fr_stateid);
}

#ifdef CONFIG_NFSD_PNFS
static __be32
nfsd4_decode_getdeviceinfo(struct nfsd4_compoundargs *argp,
- struct nfsd4_getdeviceinfo *gdev)
+ union nfsd4_op_u *u)
{
+ struct nfsd4_getdeviceinfo *gdev = &u->getdeviceinfo;
__be32 status;

memset(gdev, 0, sizeof(*gdev));
@@ -1717,8 +1757,9 @@ nfsd4_decode_getdeviceinfo(struct nfsd4_compoundargs *argp,

static __be32
nfsd4_decode_layoutcommit(struct nfsd4_compoundargs *argp,
- struct nfsd4_layoutcommit *lcp)
+ union nfsd4_op_u *u)
{
+ struct nfsd4_layoutcommit *lcp = &u->layoutcommit;
__be32 *p, status;

memset(lcp, 0, sizeof(*lcp));
@@ -1753,8 +1794,9 @@ nfsd4_decode_layoutcommit(struct nfsd4_compoundargs *argp,

static __be32
nfsd4_decode_layoutget(struct nfsd4_compoundargs *argp,
- struct nfsd4_layoutget *lgp)
+ union nfsd4_op_u *u)
{
+ struct nfsd4_layoutget *lgp = &u->layoutget;
__be32 status;

memset(lgp, 0, sizeof(*lgp));
@@ -1781,8 +1823,9 @@ nfsd4_decode_layoutget(struct nfsd4_compoundargs *argp,

static __be32
nfsd4_decode_layoutreturn(struct nfsd4_compoundargs *argp,
- struct nfsd4_layoutreturn *lrp)
+ union nfsd4_op_u *u)
{
+ struct nfsd4_layoutreturn *lrp = &u->layoutreturn;
memset(lrp, 0, sizeof(*lrp));
if (xdr_stream_decode_bool(argp->xdr, &lrp->lr_reclaim) < 0)
return nfserr_bad_xdr;
@@ -1795,8 +1838,9 @@ nfsd4_decode_layoutreturn(struct nfsd4_compoundargs *argp,
#endif /* CONFIG_NFSD_PNFS */

static __be32 nfsd4_decode_secinfo_no_name(struct nfsd4_compoundargs *argp,
- struct nfsd4_secinfo_no_name *sin)
+ union nfsd4_op_u *u)
{
+ struct nfsd4_secinfo_no_name *sin = &u->secinfo_no_name;
if (xdr_stream_decode_u32(argp->xdr, &sin->sin_style) < 0)
return nfserr_bad_xdr;

@@ -1806,8 +1850,9 @@ static __be32 nfsd4_decode_secinfo_no_name(struct nfsd4_compoundargs *argp,

static __be32
nfsd4_decode_sequence(struct nfsd4_compoundargs *argp,
- struct nfsd4_sequence *seq)
+ union nfsd4_op_u *u)
{
+ struct nfsd4_sequence *seq = &u->sequence;
__be32 *p, status;

status = nfsd4_decode_sessionid4(argp, &seq->sessionid);
@@ -1826,8 +1871,10 @@ nfsd4_decode_sequence(struct nfsd4_compoundargs *argp,
}

static __be32
-nfsd4_decode_test_stateid(struct nfsd4_compoundargs *argp, struct nfsd4_test_stateid *test_stateid)
+nfsd4_decode_test_stateid(struct nfsd4_compoundargs *argp,
+ union nfsd4_op_u *u)
{
+ struct nfsd4_test_stateid *test_stateid = &u->test_stateid;
struct nfsd4_test_stateid_id *stateid;
__be32 status;
u32 i;
@@ -1852,14 +1899,16 @@ nfsd4_decode_test_stateid(struct nfsd4_compoundargs *argp, struct nfsd4_test_sta
}

static __be32 nfsd4_decode_destroy_clientid(struct nfsd4_compoundargs *argp,
- struct nfsd4_destroy_clientid *dc)
+ union nfsd4_op_u *u)
{
+ struct nfsd4_destroy_clientid *dc = &u->destroy_clientid;
return nfsd4_decode_clientid4(argp, &dc->clientid);
}

static __be32 nfsd4_decode_reclaim_complete(struct nfsd4_compoundargs *argp,
- struct nfsd4_reclaim_complete *rc)
+ union nfsd4_op_u *u)
{
+ struct nfsd4_reclaim_complete *rc = &u->reclaim_complete;
if (xdr_stream_decode_bool(argp->xdr, &rc->rca_one_fs) < 0)
return nfserr_bad_xdr;
return nfs_ok;
@@ -1867,8 +1916,9 @@ static __be32 nfsd4_decode_reclaim_complete(struct nfsd4_compoundargs *argp,

static __be32
nfsd4_decode_fallocate(struct nfsd4_compoundargs *argp,
- struct nfsd4_fallocate *fallocate)
+ union nfsd4_op_u *u)
{
+ struct nfsd4_fallocate *fallocate = &u->allocate;
__be32 status;

status = nfsd4_decode_stateid4(argp, &fallocate->falloc_stateid);
@@ -1924,8 +1974,9 @@ static __be32 nfsd4_decode_nl4_server(struct nfsd4_compoundargs *argp,
}

static __be32
-nfsd4_decode_copy(struct nfsd4_compoundargs *argp, struct nfsd4_copy *copy)
+nfsd4_decode_copy(struct nfsd4_compoundargs *argp, union nfsd4_op_u *u)
{
+ struct nfsd4_copy *copy = &u->copy;
u32 consecutive, i, count, sync;
struct nl4_server *ns_dummy;
__be32 status;
@@ -1982,8 +2033,9 @@ nfsd4_decode_copy(struct nfsd4_compoundargs *argp, struct nfsd4_copy *copy)

static __be32
nfsd4_decode_copy_notify(struct nfsd4_compoundargs *argp,
- struct nfsd4_copy_notify *cn)
+ union nfsd4_op_u *u)
{
+ struct nfsd4_copy_notify *cn = &u->copy_notify;
__be32 status;

memset(cn, 0, sizeof(*cn));
@@ -2002,16 +2054,18 @@ nfsd4_decode_copy_notify(struct nfsd4_compoundargs *argp,

static __be32
nfsd4_decode_offload_status(struct nfsd4_compoundargs *argp,
- struct nfsd4_offload_status *os)
+ union nfsd4_op_u *u)
{
+ struct nfsd4_offload_status *os = &u->offload_status;
os->count = 0;
os->status = 0;
return nfsd4_decode_stateid4(argp, &os->stateid);
}

static __be32
-nfsd4_decode_seek(struct nfsd4_compoundargs *argp, struct nfsd4_seek *seek)
+nfsd4_decode_seek(struct nfsd4_compoundargs *argp, union nfsd4_op_u *u)
{
+ struct nfsd4_seek *seek = &u->seek;
__be32 status;

status = nfsd4_decode_stateid4(argp, &seek->seek_stateid);
@@ -2028,8 +2082,9 @@ nfsd4_decode_seek(struct nfsd4_compoundargs *argp, struct nfsd4_seek *seek)
}

static __be32
-nfsd4_decode_clone(struct nfsd4_compoundargs *argp, struct nfsd4_clone *clone)
+nfsd4_decode_clone(struct nfsd4_compoundargs *argp, union nfsd4_op_u *u)
{
+ struct nfsd4_clone *clone = &u->clone;
__be32 status;

status = nfsd4_decode_stateid4(argp, &clone->cl_src_stateid);
@@ -2154,8 +2209,9 @@ nfsd4_decode_xattr_name(struct nfsd4_compoundargs *argp, char **namep)
*/
static __be32
nfsd4_decode_getxattr(struct nfsd4_compoundargs *argp,
- struct nfsd4_getxattr *getxattr)
+ union nfsd4_op_u *u)
{
+ struct nfsd4_getxattr *getxattr = &u->getxattr;
__be32 status;
u32 maxcount;

@@ -2173,8 +2229,9 @@ nfsd4_decode_getxattr(struct nfsd4_compoundargs *argp,

static __be32
nfsd4_decode_setxattr(struct nfsd4_compoundargs *argp,
- struct nfsd4_setxattr *setxattr)
+ union nfsd4_op_u *u)
{
+ struct nfsd4_setxattr *setxattr = &u->setxattr;
u32 flags, maxcount, size;
__be32 status;

@@ -2214,8 +2271,9 @@ nfsd4_decode_setxattr(struct nfsd4_compoundargs *argp,

static __be32
nfsd4_decode_listxattrs(struct nfsd4_compoundargs *argp,
- struct nfsd4_listxattrs *listxattrs)
+ union nfsd4_op_u *u)
{
+ struct nfsd4_listxattrs *listxattrs = &u->listxattrs;
u32 maxcount;

memset(listxattrs, 0, sizeof(*listxattrs));
@@ -2245,113 +2303,114 @@ nfsd4_decode_listxattrs(struct nfsd4_compoundargs *argp,

static __be32
nfsd4_decode_removexattr(struct nfsd4_compoundargs *argp,
- struct nfsd4_removexattr *removexattr)
+ union nfsd4_op_u *u)
{
+ struct nfsd4_removexattr *removexattr = &u->removexattr;
memset(removexattr, 0, sizeof(*removexattr));
return nfsd4_decode_xattr_name(argp, &removexattr->rmxa_name);
}

static __be32
-nfsd4_decode_noop(struct nfsd4_compoundargs *argp, void *p)
+nfsd4_decode_noop(struct nfsd4_compoundargs *argp, union nfsd4_op_u *p)
{
return nfs_ok;
}

static __be32
-nfsd4_decode_notsupp(struct nfsd4_compoundargs *argp, void *p)
+nfsd4_decode_notsupp(struct nfsd4_compoundargs *argp, union nfsd4_op_u *p)
{
return nfserr_notsupp;
}

-typedef __be32(*nfsd4_dec)(struct nfsd4_compoundargs *argp, void *);
+typedef __be32(*nfsd4_dec)(struct nfsd4_compoundargs *argp, union nfsd4_op_u *u);

static const nfsd4_dec nfsd4_dec_ops[] = {
- [OP_ACCESS] = (nfsd4_dec)nfsd4_decode_access,
- [OP_CLOSE] = (nfsd4_dec)nfsd4_decode_close,
- [OP_COMMIT] = (nfsd4_dec)nfsd4_decode_commit,
- [OP_CREATE] = (nfsd4_dec)nfsd4_decode_create,
- [OP_DELEGPURGE] = (nfsd4_dec)nfsd4_decode_notsupp,
- [OP_DELEGRETURN] = (nfsd4_dec)nfsd4_decode_delegreturn,
- [OP_GETATTR] = (nfsd4_dec)nfsd4_decode_getattr,
- [OP_GETFH] = (nfsd4_dec)nfsd4_decode_noop,
- [OP_LINK] = (nfsd4_dec)nfsd4_decode_link,
- [OP_LOCK] = (nfsd4_dec)nfsd4_decode_lock,
- [OP_LOCKT] = (nfsd4_dec)nfsd4_decode_lockt,
- [OP_LOCKU] = (nfsd4_dec)nfsd4_decode_locku,
- [OP_LOOKUP] = (nfsd4_dec)nfsd4_decode_lookup,
- [OP_LOOKUPP] = (nfsd4_dec)nfsd4_decode_noop,
- [OP_NVERIFY] = (nfsd4_dec)nfsd4_decode_verify,
- [OP_OPEN] = (nfsd4_dec)nfsd4_decode_open,
- [OP_OPENATTR] = (nfsd4_dec)nfsd4_decode_notsupp,
- [OP_OPEN_CONFIRM] = (nfsd4_dec)nfsd4_decode_open_confirm,
- [OP_OPEN_DOWNGRADE] = (nfsd4_dec)nfsd4_decode_open_downgrade,
- [OP_PUTFH] = (nfsd4_dec)nfsd4_decode_putfh,
- [OP_PUTPUBFH] = (nfsd4_dec)nfsd4_decode_putpubfh,
- [OP_PUTROOTFH] = (nfsd4_dec)nfsd4_decode_noop,
- [OP_READ] = (nfsd4_dec)nfsd4_decode_read,
- [OP_READDIR] = (nfsd4_dec)nfsd4_decode_readdir,
- [OP_READLINK] = (nfsd4_dec)nfsd4_decode_noop,
- [OP_REMOVE] = (nfsd4_dec)nfsd4_decode_remove,
- [OP_RENAME] = (nfsd4_dec)nfsd4_decode_rename,
- [OP_RENEW] = (nfsd4_dec)nfsd4_decode_renew,
- [OP_RESTOREFH] = (nfsd4_dec)nfsd4_decode_noop,
- [OP_SAVEFH] = (nfsd4_dec)nfsd4_decode_noop,
- [OP_SECINFO] = (nfsd4_dec)nfsd4_decode_secinfo,
- [OP_SETATTR] = (nfsd4_dec)nfsd4_decode_setattr,
- [OP_SETCLIENTID] = (nfsd4_dec)nfsd4_decode_setclientid,
- [OP_SETCLIENTID_CONFIRM] = (nfsd4_dec)nfsd4_decode_setclientid_confirm,
- [OP_VERIFY] = (nfsd4_dec)nfsd4_decode_verify,
- [OP_WRITE] = (nfsd4_dec)nfsd4_decode_write,
- [OP_RELEASE_LOCKOWNER] = (nfsd4_dec)nfsd4_decode_release_lockowner,
+ [OP_ACCESS] = nfsd4_decode_access,
+ [OP_CLOSE] = nfsd4_decode_close,
+ [OP_COMMIT] = nfsd4_decode_commit,
+ [OP_CREATE] = nfsd4_decode_create,
+ [OP_DELEGPURGE] = nfsd4_decode_notsupp,
+ [OP_DELEGRETURN] = nfsd4_decode_delegreturn,
+ [OP_GETATTR] = nfsd4_decode_getattr,
+ [OP_GETFH] = nfsd4_decode_noop,
+ [OP_LINK] = nfsd4_decode_link,
+ [OP_LOCK] = nfsd4_decode_lock,
+ [OP_LOCKT] = nfsd4_decode_lockt,
+ [OP_LOCKU] = nfsd4_decode_locku,
+ [OP_LOOKUP] = nfsd4_decode_lookup,
+ [OP_LOOKUPP] = nfsd4_decode_noop,
+ [OP_NVERIFY] = nfsd4_decode_verify,
+ [OP_OPEN] = nfsd4_decode_open,
+ [OP_OPENATTR] = nfsd4_decode_notsupp,
+ [OP_OPEN_CONFIRM] = nfsd4_decode_open_confirm,
+ [OP_OPEN_DOWNGRADE] = nfsd4_decode_open_downgrade,
+ [OP_PUTFH] = nfsd4_decode_putfh,
+ [OP_PUTPUBFH] = nfsd4_decode_putpubfh,
+ [OP_PUTROOTFH] = nfsd4_decode_noop,
+ [OP_READ] = nfsd4_decode_read,
+ [OP_READDIR] = nfsd4_decode_readdir,
+ [OP_READLINK] = nfsd4_decode_noop,
+ [OP_REMOVE] = nfsd4_decode_remove,
+ [OP_RENAME] = nfsd4_decode_rename,
+ [OP_RENEW] = nfsd4_decode_renew,
+ [OP_RESTOREFH] = nfsd4_decode_noop,
+ [OP_SAVEFH] = nfsd4_decode_noop,
+ [OP_SECINFO] = nfsd4_decode_secinfo,
+ [OP_SETATTR] = nfsd4_decode_setattr,
+ [OP_SETCLIENTID] = nfsd4_decode_setclientid,
+ [OP_SETCLIENTID_CONFIRM] = nfsd4_decode_setclientid_confirm,
+ [OP_VERIFY] = nfsd4_decode_verify,
+ [OP_WRITE] = nfsd4_decode_write,
+ [OP_RELEASE_LOCKOWNER] = nfsd4_decode_release_lockowner,

/* new operations for NFSv4.1 */
- [OP_BACKCHANNEL_CTL] = (nfsd4_dec)nfsd4_decode_backchannel_ctl,
- [OP_BIND_CONN_TO_SESSION]= (nfsd4_dec)nfsd4_decode_bind_conn_to_session,
- [OP_EXCHANGE_ID] = (nfsd4_dec)nfsd4_decode_exchange_id,
- [OP_CREATE_SESSION] = (nfsd4_dec)nfsd4_decode_create_session,
- [OP_DESTROY_SESSION] = (nfsd4_dec)nfsd4_decode_destroy_session,
- [OP_FREE_STATEID] = (nfsd4_dec)nfsd4_decode_free_stateid,
- [OP_GET_DIR_DELEGATION] = (nfsd4_dec)nfsd4_decode_notsupp,
+ [OP_BACKCHANNEL_CTL] = nfsd4_decode_backchannel_ctl,
+ [OP_BIND_CONN_TO_SESSION] = nfsd4_decode_bind_conn_to_session,
+ [OP_EXCHANGE_ID] = nfsd4_decode_exchange_id,
+ [OP_CREATE_SESSION] = nfsd4_decode_create_session,
+ [OP_DESTROY_SESSION] = nfsd4_decode_destroy_session,
+ [OP_FREE_STATEID] = nfsd4_decode_free_stateid,
+ [OP_GET_DIR_DELEGATION] = nfsd4_decode_notsupp,
#ifdef CONFIG_NFSD_PNFS
- [OP_GETDEVICEINFO] = (nfsd4_dec)nfsd4_decode_getdeviceinfo,
- [OP_GETDEVICELIST] = (nfsd4_dec)nfsd4_decode_notsupp,
- [OP_LAYOUTCOMMIT] = (nfsd4_dec)nfsd4_decode_layoutcommit,
- [OP_LAYOUTGET] = (nfsd4_dec)nfsd4_decode_layoutget,
- [OP_LAYOUTRETURN] = (nfsd4_dec)nfsd4_decode_layoutreturn,
+ [OP_GETDEVICEINFO] = nfsd4_decode_getdeviceinfo,
+ [OP_GETDEVICELIST] = nfsd4_decode_notsupp,
+ [OP_LAYOUTCOMMIT] = nfsd4_decode_layoutcommit,
+ [OP_LAYOUTGET] = nfsd4_decode_layoutget,
+ [OP_LAYOUTRETURN] = nfsd4_decode_layoutreturn,
#else
- [OP_GETDEVICEINFO] = (nfsd4_dec)nfsd4_decode_notsupp,
- [OP_GETDEVICELIST] = (nfsd4_dec)nfsd4_decode_notsupp,
- [OP_LAYOUTCOMMIT] = (nfsd4_dec)nfsd4_decode_notsupp,
- [OP_LAYOUTGET] = (nfsd4_dec)nfsd4_decode_notsupp,
- [OP_LAYOUTRETURN] = (nfsd4_dec)nfsd4_decode_notsupp,
+ [OP_GETDEVICEINFO] = nfsd4_decode_notsupp,
+ [OP_GETDEVICELIST] = nfsd4_decode_notsupp,
+ [OP_LAYOUTCOMMIT] = nfsd4_decode_notsupp,
+ [OP_LAYOUTGET] = nfsd4_decode_notsupp,
+ [OP_LAYOUTRETURN] = nfsd4_decode_notsupp,
#endif
- [OP_SECINFO_NO_NAME] = (nfsd4_dec)nfsd4_decode_secinfo_no_name,
- [OP_SEQUENCE] = (nfsd4_dec)nfsd4_decode_sequence,
- [OP_SET_SSV] = (nfsd4_dec)nfsd4_decode_notsupp,
- [OP_TEST_STATEID] = (nfsd4_dec)nfsd4_decode_test_stateid,
- [OP_WANT_DELEGATION] = (nfsd4_dec)nfsd4_decode_notsupp,
- [OP_DESTROY_CLIENTID] = (nfsd4_dec)nfsd4_decode_destroy_clientid,
- [OP_RECLAIM_COMPLETE] = (nfsd4_dec)nfsd4_decode_reclaim_complete,
+ [OP_SECINFO_NO_NAME] = nfsd4_decode_secinfo_no_name,
+ [OP_SEQUENCE] = nfsd4_decode_sequence,
+ [OP_SET_SSV] = nfsd4_decode_notsupp,
+ [OP_TEST_STATEID] = nfsd4_decode_test_stateid,
+ [OP_WANT_DELEGATION] = nfsd4_decode_notsupp,
+ [OP_DESTROY_CLIENTID] = nfsd4_decode_destroy_clientid,
+ [OP_RECLAIM_COMPLETE] = nfsd4_decode_reclaim_complete,

/* new operations for NFSv4.2 */
- [OP_ALLOCATE] = (nfsd4_dec)nfsd4_decode_fallocate,
- [OP_COPY] = (nfsd4_dec)nfsd4_decode_copy,
- [OP_COPY_NOTIFY] = (nfsd4_dec)nfsd4_decode_copy_notify,
- [OP_DEALLOCATE] = (nfsd4_dec)nfsd4_decode_fallocate,
- [OP_IO_ADVISE] = (nfsd4_dec)nfsd4_decode_notsupp,
- [OP_LAYOUTERROR] = (nfsd4_dec)nfsd4_decode_notsupp,
- [OP_LAYOUTSTATS] = (nfsd4_dec)nfsd4_decode_notsupp,
- [OP_OFFLOAD_CANCEL] = (nfsd4_dec)nfsd4_decode_offload_status,
- [OP_OFFLOAD_STATUS] = (nfsd4_dec)nfsd4_decode_offload_status,
- [OP_READ_PLUS] = (nfsd4_dec)nfsd4_decode_read,
- [OP_SEEK] = (nfsd4_dec)nfsd4_decode_seek,
- [OP_WRITE_SAME] = (nfsd4_dec)nfsd4_decode_notsupp,
- [OP_CLONE] = (nfsd4_dec)nfsd4_decode_clone,
+ [OP_ALLOCATE] = nfsd4_decode_fallocate,
+ [OP_COPY] = nfsd4_decode_copy,
+ [OP_COPY_NOTIFY] = nfsd4_decode_copy_notify,
+ [OP_DEALLOCATE] = nfsd4_decode_fallocate,
+ [OP_IO_ADVISE] = nfsd4_decode_notsupp,
+ [OP_LAYOUTERROR] = nfsd4_decode_notsupp,
+ [OP_LAYOUTSTATS] = nfsd4_decode_notsupp,
+ [OP_OFFLOAD_CANCEL] = nfsd4_decode_offload_status,
+ [OP_OFFLOAD_STATUS] = nfsd4_decode_offload_status,
+ [OP_READ_PLUS] = nfsd4_decode_read,
+ [OP_SEEK] = nfsd4_decode_seek,
+ [OP_WRITE_SAME] = nfsd4_decode_notsupp,
+ [OP_CLONE] = nfsd4_decode_clone,
/* RFC 8276 extended atributes operations */
- [OP_GETXATTR] = (nfsd4_dec)nfsd4_decode_getxattr,
- [OP_SETXATTR] = (nfsd4_dec)nfsd4_decode_setxattr,
- [OP_LISTXATTRS] = (nfsd4_dec)nfsd4_decode_listxattrs,
- [OP_REMOVEXATTR] = (nfsd4_dec)nfsd4_decode_removexattr,
+ [OP_GETXATTR] = nfsd4_decode_getxattr,
+ [OP_SETXATTR] = nfsd4_decode_setxattr,
+ [OP_LISTXATTRS] = nfsd4_decode_listxattrs,
+ [OP_REMOVEXATTR] = nfsd4_decode_removexattr,
};

static inline bool
@@ -3643,8 +3702,10 @@ nfsd4_encode_stateid(struct xdr_stream *xdr, stateid_t *sid)
}

static __be32
-nfsd4_encode_access(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_access *access)
+nfsd4_encode_access(struct nfsd4_compoundres *resp, __be32 nfserr,
+ union nfsd4_op_u *u)
{
+ struct nfsd4_access *access = &u->access;
struct xdr_stream *xdr = resp->xdr;
__be32 *p;

@@ -3656,8 +3717,10 @@ nfsd4_encode_access(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_
return 0;
}

-static __be32 nfsd4_encode_bind_conn_to_session(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_bind_conn_to_session *bcts)
+static __be32 nfsd4_encode_bind_conn_to_session(struct nfsd4_compoundres *resp, __be32 nfserr,
+ union nfsd4_op_u *u)
{
+ struct nfsd4_bind_conn_to_session *bcts = &u->bind_conn_to_session;
struct xdr_stream *xdr = resp->xdr;
__be32 *p;

@@ -3673,8 +3736,10 @@ static __be32 nfsd4_encode_bind_conn_to_session(struct nfsd4_compoundres *resp,
}

static __be32
-nfsd4_encode_close(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_close *close)
+nfsd4_encode_close(struct nfsd4_compoundres *resp, __be32 nfserr,
+ union nfsd4_op_u *u)
{
+ struct nfsd4_close *close = &u->close;
struct xdr_stream *xdr = resp->xdr;

return nfsd4_encode_stateid(xdr, &close->cl_stateid);
@@ -3682,8 +3747,10 @@ nfsd4_encode_close(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_c


static __be32
-nfsd4_encode_commit(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_commit *commit)
+nfsd4_encode_commit(struct nfsd4_compoundres *resp, __be32 nfserr,
+ union nfsd4_op_u *u)
{
+ struct nfsd4_commit *commit = &u->commit;
struct xdr_stream *xdr = resp->xdr;
__be32 *p;

@@ -3696,8 +3763,10 @@ nfsd4_encode_commit(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_
}

static __be32
-nfsd4_encode_create(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_create *create)
+nfsd4_encode_create(struct nfsd4_compoundres *resp, __be32 nfserr,
+ union nfsd4_op_u *u)
{
+ struct nfsd4_create *create = &u->create;
struct xdr_stream *xdr = resp->xdr;
__be32 *p;

@@ -3710,8 +3779,10 @@ nfsd4_encode_create(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_
}

static __be32
-nfsd4_encode_getattr(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_getattr *getattr)
+nfsd4_encode_getattr(struct nfsd4_compoundres *resp, __be32 nfserr,
+ union nfsd4_op_u *u)
{
+ struct nfsd4_getattr *getattr = &u->getattr;
struct svc_fh *fhp = getattr->ga_fhp;
struct xdr_stream *xdr = resp->xdr;

@@ -3720,8 +3791,10 @@ nfsd4_encode_getattr(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4
}

static __be32
-nfsd4_encode_getfh(struct nfsd4_compoundres *resp, __be32 nfserr, struct svc_fh **fhpp)
+nfsd4_encode_getfh(struct nfsd4_compoundres *resp, __be32 nfserr,
+ union nfsd4_op_u *u)
{
+ struct svc_fh **fhpp = &u->getfh;
struct xdr_stream *xdr = resp->xdr;
struct svc_fh *fhp = *fhpp;
unsigned int len;
@@ -3775,8 +3848,10 @@ nfsd4_encode_lock_denied(struct xdr_stream *xdr, struct nfsd4_lock_denied *ld)
}

static __be32
-nfsd4_encode_lock(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_lock *lock)
+nfsd4_encode_lock(struct nfsd4_compoundres *resp, __be32 nfserr,
+ union nfsd4_op_u *u)
{
+ struct nfsd4_lock *lock = &u->lock;
struct xdr_stream *xdr = resp->xdr;

if (!nfserr)
@@ -3788,8 +3863,10 @@ nfsd4_encode_lock(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_lo
}

static __be32
-nfsd4_encode_lockt(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_lockt *lockt)
+nfsd4_encode_lockt(struct nfsd4_compoundres *resp, __be32 nfserr,
+ union nfsd4_op_u *u)
{
+ struct nfsd4_lockt *lockt = &u->lockt;
struct xdr_stream *xdr = resp->xdr;

if (nfserr == nfserr_denied)
@@ -3798,8 +3875,10 @@ nfsd4_encode_lockt(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_l
}

static __be32
-nfsd4_encode_locku(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_locku *locku)
+nfsd4_encode_locku(struct nfsd4_compoundres *resp, __be32 nfserr,
+ union nfsd4_op_u *u)
{
+ struct nfsd4_locku *locku = &u->locku;
struct xdr_stream *xdr = resp->xdr;

return nfsd4_encode_stateid(xdr, &locku->lu_stateid);
@@ -3807,8 +3886,10 @@ nfsd4_encode_locku(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_l


static __be32
-nfsd4_encode_link(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_link *link)
+nfsd4_encode_link(struct nfsd4_compoundres *resp, __be32 nfserr,
+ union nfsd4_op_u *u)
{
+ struct nfsd4_link *link = &u->link;
struct xdr_stream *xdr = resp->xdr;
__be32 *p;

@@ -3821,8 +3902,10 @@ nfsd4_encode_link(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_li


static __be32
-nfsd4_encode_open(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_open *open)
+nfsd4_encode_open(struct nfsd4_compoundres *resp, __be32 nfserr,
+ union nfsd4_op_u *u)
{
+ struct nfsd4_open *open = &u->open;
struct xdr_stream *xdr = resp->xdr;
__be32 *p;

@@ -3915,16 +3998,20 @@ nfsd4_encode_open(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_op
}

static __be32
-nfsd4_encode_open_confirm(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_open_confirm *oc)
+nfsd4_encode_open_confirm(struct nfsd4_compoundres *resp, __be32 nfserr,
+ union nfsd4_op_u *u)
{
+ struct nfsd4_open_confirm *oc = &u->open_confirm;
struct xdr_stream *xdr = resp->xdr;

return nfsd4_encode_stateid(xdr, &oc->oc_resp_stateid);
}

static __be32
-nfsd4_encode_open_downgrade(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_open_downgrade *od)
+nfsd4_encode_open_downgrade(struct nfsd4_compoundres *resp, __be32 nfserr,
+ union nfsd4_op_u *u)
{
+ struct nfsd4_open_downgrade *od = &u->open_downgrade;
struct xdr_stream *xdr = resp->xdr;

return nfsd4_encode_stateid(xdr, &od->od_stateid);
@@ -4023,8 +4110,9 @@ static __be32 nfsd4_encode_readv(struct nfsd4_compoundres *resp,

static __be32
nfsd4_encode_read(struct nfsd4_compoundres *resp, __be32 nfserr,
- struct nfsd4_read *read)
+ union nfsd4_op_u *u)
{
+ struct nfsd4_read *read = &u->read;
bool splice_ok = test_bit(RQ_SPLICE_OK, &resp->rqstp->rq_flags);
unsigned long maxcount;
struct xdr_stream *xdr = resp->xdr;
@@ -4065,8 +4153,10 @@ nfsd4_encode_read(struct nfsd4_compoundres *resp, __be32 nfserr,
}

static __be32
-nfsd4_encode_readlink(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_readlink *readlink)
+nfsd4_encode_readlink(struct nfsd4_compoundres *resp, __be32 nfserr,
+ union nfsd4_op_u *u)
{
+ struct nfsd4_readlink *readlink = &u->readlink;
__be32 *p, *maxcount_p, zero = xdr_zero;
struct xdr_stream *xdr = resp->xdr;
int length_offset = xdr->buf->len;
@@ -4110,8 +4200,10 @@ nfsd4_encode_readlink(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd
}

static __be32
-nfsd4_encode_readdir(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_readdir *readdir)
+nfsd4_encode_readdir(struct nfsd4_compoundres *resp, __be32 nfserr,
+ union nfsd4_op_u *u)
{
+ struct nfsd4_readdir *readdir = &u->readdir;
int maxcount;
int bytes_left;
loff_t offset;
@@ -4201,8 +4293,10 @@ nfsd4_encode_readdir(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4
}

static __be32
-nfsd4_encode_remove(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_remove *remove)
+nfsd4_encode_remove(struct nfsd4_compoundres *resp, __be32 nfserr,
+ union nfsd4_op_u *u)
{
+ struct nfsd4_remove *remove = &u->remove;
struct xdr_stream *xdr = resp->xdr;
__be32 *p;

@@ -4214,8 +4308,10 @@ nfsd4_encode_remove(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_
}

static __be32
-nfsd4_encode_rename(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_rename *rename)
+nfsd4_encode_rename(struct nfsd4_compoundres *resp, __be32 nfserr,
+ union nfsd4_op_u *u)
{
+ struct nfsd4_rename *rename = &u->rename;
struct xdr_stream *xdr = resp->xdr;
__be32 *p;

@@ -4297,8 +4393,9 @@ nfsd4_do_encode_secinfo(struct xdr_stream *xdr, struct svc_export *exp)

static __be32
nfsd4_encode_secinfo(struct nfsd4_compoundres *resp, __be32 nfserr,
- struct nfsd4_secinfo *secinfo)
+ union nfsd4_op_u *u)
{
+ struct nfsd4_secinfo *secinfo = &u->secinfo;
struct xdr_stream *xdr = resp->xdr;

return nfsd4_do_encode_secinfo(xdr, secinfo->si_exp);
@@ -4306,8 +4403,9 @@ nfsd4_encode_secinfo(struct nfsd4_compoundres *resp, __be32 nfserr,

static __be32
nfsd4_encode_secinfo_no_name(struct nfsd4_compoundres *resp, __be32 nfserr,
- struct nfsd4_secinfo_no_name *secinfo)
+ union nfsd4_op_u *u)
{
+ struct nfsd4_secinfo_no_name *secinfo = &u->secinfo_no_name;
struct xdr_stream *xdr = resp->xdr;

return nfsd4_do_encode_secinfo(xdr, secinfo->sin_exp);
@@ -4318,8 +4416,10 @@ nfsd4_encode_secinfo_no_name(struct nfsd4_compoundres *resp, __be32 nfserr,
* regardless of the error status.
*/
static __be32
-nfsd4_encode_setattr(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_setattr *setattr)
+nfsd4_encode_setattr(struct nfsd4_compoundres *resp, __be32 nfserr,
+ union nfsd4_op_u *u)
{
+ struct nfsd4_setattr *setattr = &u->setattr;
struct xdr_stream *xdr = resp->xdr;
__be32 *p;

@@ -4342,8 +4442,10 @@ nfsd4_encode_setattr(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4
}

static __be32
-nfsd4_encode_setclientid(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_setclientid *scd)
+nfsd4_encode_setclientid(struct nfsd4_compoundres *resp, __be32 nfserr,
+ union nfsd4_op_u *u)
{
+ struct nfsd4_setclientid *scd = &u->setclientid;
struct xdr_stream *xdr = resp->xdr;
__be32 *p;

@@ -4366,8 +4468,10 @@ nfsd4_encode_setclientid(struct nfsd4_compoundres *resp, __be32 nfserr, struct n
}

static __be32
-nfsd4_encode_write(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_write *write)
+nfsd4_encode_write(struct nfsd4_compoundres *resp, __be32 nfserr,
+ union nfsd4_op_u *u)
{
+ struct nfsd4_write *write = &u->write;
struct xdr_stream *xdr = resp->xdr;
__be32 *p;

@@ -4383,8 +4487,9 @@ nfsd4_encode_write(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_w

static __be32
nfsd4_encode_exchange_id(struct nfsd4_compoundres *resp, __be32 nfserr,
- struct nfsd4_exchange_id *exid)
+ union nfsd4_op_u *u)
{
+ struct nfsd4_exchange_id *exid = &u->exchange_id;
struct xdr_stream *xdr = resp->xdr;
__be32 *p;
char *major_id;
@@ -4461,8 +4566,9 @@ nfsd4_encode_exchange_id(struct nfsd4_compoundres *resp, __be32 nfserr,

static __be32
nfsd4_encode_create_session(struct nfsd4_compoundres *resp, __be32 nfserr,
- struct nfsd4_create_session *sess)
+ union nfsd4_op_u *u)
{
+ struct nfsd4_create_session *sess = &u->create_session;
struct xdr_stream *xdr = resp->xdr;
__be32 *p;

@@ -4514,8 +4620,9 @@ nfsd4_encode_create_session(struct nfsd4_compoundres *resp, __be32 nfserr,

static __be32
nfsd4_encode_sequence(struct nfsd4_compoundres *resp, __be32 nfserr,
- struct nfsd4_sequence *seq)
+ union nfsd4_op_u *u)
{
+ struct nfsd4_sequence *seq = &u->sequence;
struct xdr_stream *xdr = resp->xdr;
__be32 *p;

@@ -4537,8 +4644,9 @@ nfsd4_encode_sequence(struct nfsd4_compoundres *resp, __be32 nfserr,

static __be32
nfsd4_encode_test_stateid(struct nfsd4_compoundres *resp, __be32 nfserr,
- struct nfsd4_test_stateid *test_stateid)
+ union nfsd4_op_u *u)
{
+ struct nfsd4_test_stateid *test_stateid = &u->test_stateid;
struct xdr_stream *xdr = resp->xdr;
struct nfsd4_test_stateid_id *stateid, *next;
__be32 *p;
@@ -4558,8 +4666,9 @@ nfsd4_encode_test_stateid(struct nfsd4_compoundres *resp, __be32 nfserr,
#ifdef CONFIG_NFSD_PNFS
static __be32
nfsd4_encode_getdeviceinfo(struct nfsd4_compoundres *resp, __be32 nfserr,
- struct nfsd4_getdeviceinfo *gdev)
+ union nfsd4_op_u *u)
{
+ struct nfsd4_getdeviceinfo *gdev = &u->getdeviceinfo;
struct xdr_stream *xdr = resp->xdr;
const struct nfsd4_layout_ops *ops;
u32 starting_len = xdr->buf->len, needed_len;
@@ -4611,8 +4720,9 @@ nfsd4_encode_getdeviceinfo(struct nfsd4_compoundres *resp, __be32 nfserr,

static __be32
nfsd4_encode_layoutget(struct nfsd4_compoundres *resp, __be32 nfserr,
- struct nfsd4_layoutget *lgp)
+ union nfsd4_op_u *u)
{
+ struct nfsd4_layoutget *lgp = &u->layoutget;
struct xdr_stream *xdr = resp->xdr;
const struct nfsd4_layout_ops *ops;
__be32 *p;
@@ -4638,8 +4748,9 @@ nfsd4_encode_layoutget(struct nfsd4_compoundres *resp, __be32 nfserr,

static __be32
nfsd4_encode_layoutcommit(struct nfsd4_compoundres *resp, __be32 nfserr,
- struct nfsd4_layoutcommit *lcp)
+ union nfsd4_op_u *u)
{
+ struct nfsd4_layoutcommit *lcp = &u->layoutcommit;
struct xdr_stream *xdr = resp->xdr;
__be32 *p;

@@ -4659,8 +4770,9 @@ nfsd4_encode_layoutcommit(struct nfsd4_compoundres *resp, __be32 nfserr,

static __be32
nfsd4_encode_layoutreturn(struct nfsd4_compoundres *resp, __be32 nfserr,
- struct nfsd4_layoutreturn *lrp)
+ union nfsd4_op_u *u)
{
+ struct nfsd4_layoutreturn *lrp = &u->layoutreturn;
struct xdr_stream *xdr = resp->xdr;
__be32 *p;

@@ -4745,8 +4857,9 @@ nfsd42_encode_nl4_server(struct nfsd4_compoundres *resp, struct nl4_server *ns)

static __be32
nfsd4_encode_copy(struct nfsd4_compoundres *resp, __be32 nfserr,
- struct nfsd4_copy *copy)
+ union nfsd4_op_u *u)
{
+ struct nfsd4_copy *copy = &u->copy;
__be32 *p;

nfserr = nfsd42_encode_write_res(resp, &copy->cp_res,
@@ -4762,8 +4875,9 @@ nfsd4_encode_copy(struct nfsd4_compoundres *resp, __be32 nfserr,

static __be32
nfsd4_encode_offload_status(struct nfsd4_compoundres *resp, __be32 nfserr,
- struct nfsd4_offload_status *os)
+ union nfsd4_op_u *u)
{
+ struct nfsd4_offload_status *os = &u->offload_status;
struct xdr_stream *xdr = resp->xdr;
__be32 *p;

@@ -4777,156 +4891,83 @@ nfsd4_encode_offload_status(struct nfsd4_compoundres *resp, __be32 nfserr,

static __be32
nfsd4_encode_read_plus_data(struct nfsd4_compoundres *resp,
- struct nfsd4_read *read,
- unsigned long *maxcount, u32 *eof,
- loff_t *pos)
+ struct nfsd4_read *read)
{
- struct xdr_stream *xdr = resp->xdr;
+ bool splice_ok = test_bit(RQ_SPLICE_OK, &resp->rqstp->rq_flags);
struct file *file = read->rd_nf->nf_file;
- int starting_len = xdr->buf->len;
- loff_t hole_pos;
- __be32 nfserr;
- __be32 *p, tmp;
- __be64 tmp64;
-
- hole_pos = pos ? *pos : vfs_llseek(file, read->rd_offset, SEEK_HOLE);
- if (hole_pos > read->rd_offset)
- *maxcount = min_t(unsigned long, *maxcount, hole_pos - read->rd_offset);
- *maxcount = min_t(unsigned long, *maxcount, (xdr->buf->buflen - xdr->buf->len));
+ struct xdr_stream *xdr = resp->xdr;
+ unsigned long maxcount;
+ __be32 nfserr, *p;

/* Content type, offset, byte count */
p = xdr_reserve_space(xdr, 4 + 8 + 4);
if (!p)
- return nfserr_resource;
+ return nfserr_io;
+ if (resp->xdr->buf->page_len && splice_ok) {
+ WARN_ON_ONCE(splice_ok);
+ return nfserr_serverfault;
+ }

- read->rd_vlen = xdr_reserve_space_vec(xdr, resp->rqstp->rq_vec, *maxcount);
- if (read->rd_vlen < 0)
- return nfserr_resource;
+ maxcount = min_t(unsigned long, read->rd_length,
+ (xdr->buf->buflen - xdr->buf->len));

- nfserr = nfsd_readv(resp->rqstp, read->rd_fhp, file, read->rd_offset,
- resp->rqstp->rq_vec, read->rd_vlen, maxcount, eof);
+ if (file->f_op->splice_read && splice_ok)
+ nfserr = nfsd4_encode_splice_read(resp, read, file, maxcount);
+ else
+ nfserr = nfsd4_encode_readv(resp, read, file, maxcount);
if (nfserr)
return nfserr;
- xdr_truncate_encode(xdr, starting_len + 16 + xdr_align_size(*maxcount));
-
- tmp = htonl(NFS4_CONTENT_DATA);
- write_bytes_to_xdr_buf(xdr->buf, starting_len, &tmp, 4);
- tmp64 = cpu_to_be64(read->rd_offset);
- write_bytes_to_xdr_buf(xdr->buf, starting_len + 4, &tmp64, 8);
- tmp = htonl(*maxcount);
- write_bytes_to_xdr_buf(xdr->buf, starting_len + 12, &tmp, 4);
-
- tmp = xdr_zero;
- write_bytes_to_xdr_buf(xdr->buf, starting_len + 16 + *maxcount, &tmp,
- xdr_pad_size(*maxcount));
- return nfs_ok;
-}
-
-static __be32
-nfsd4_encode_read_plus_hole(struct nfsd4_compoundres *resp,
- struct nfsd4_read *read,
- unsigned long *maxcount, u32 *eof)
-{
- struct file *file = read->rd_nf->nf_file;
- loff_t data_pos = vfs_llseek(file, read->rd_offset, SEEK_DATA);
- loff_t f_size = i_size_read(file_inode(file));
- unsigned long count;
- __be32 *p;
-
- if (data_pos == -ENXIO)
- data_pos = f_size;
- else if (data_pos <= read->rd_offset || (data_pos < f_size && data_pos % PAGE_SIZE))
- return nfsd4_encode_read_plus_data(resp, read, maxcount, eof, &f_size);
- count = data_pos - read->rd_offset;

- /* Content type, offset, byte count */
- p = xdr_reserve_space(resp->xdr, 4 + 8 + 8);
- if (!p)
- return nfserr_resource;
-
- *p++ = htonl(NFS4_CONTENT_HOLE);
+ *p++ = cpu_to_be32(NFS4_CONTENT_DATA);
p = xdr_encode_hyper(p, read->rd_offset);
- p = xdr_encode_hyper(p, count);
+ *p = cpu_to_be32(read->rd_length);

- *eof = (read->rd_offset + count) >= f_size;
- *maxcount = min_t(unsigned long, count, *maxcount);
return nfs_ok;
}

static __be32
nfsd4_encode_read_plus(struct nfsd4_compoundres *resp, __be32 nfserr,
- struct nfsd4_read *read)
+ union nfsd4_op_u *u)
{
- unsigned long maxcount, count;
+ struct nfsd4_read *read = &u->read;
+ struct file *file = read->rd_nf->nf_file;
struct xdr_stream *xdr = resp->xdr;
- struct file *file;
int starting_len = xdr->buf->len;
- int last_segment = xdr->buf->len;
- int segments = 0;
- __be32 *p, tmp;
- bool is_data;
- loff_t pos;
- u32 eof;
+ u32 segments = 0;
+ __be32 *p;

if (nfserr)
return nfserr;
- file = read->rd_nf->nf_file;

/* eof flag, segment count */
p = xdr_reserve_space(xdr, 4 + 4);
if (!p)
- return nfserr_resource;
+ return nfserr_io;
xdr_commit_encode(xdr);

- maxcount = min_t(unsigned long, read->rd_length,
- (xdr->buf->buflen - xdr->buf->len));
- count = maxcount;
-
- eof = read->rd_offset >= i_size_read(file_inode(file));
- if (eof)
+ read->rd_eof = read->rd_offset >= i_size_read(file_inode(file));
+ if (read->rd_eof)
goto out;

- pos = vfs_llseek(file, read->rd_offset, SEEK_HOLE);
- is_data = pos > read->rd_offset;
-
- while (count > 0 && !eof) {
- maxcount = count;
- if (is_data)
- nfserr = nfsd4_encode_read_plus_data(resp, read, &maxcount, &eof,
- segments == 0 ? &pos : NULL);
- else
- nfserr = nfsd4_encode_read_plus_hole(resp, read, &maxcount, &eof);
- if (nfserr)
- goto out;
- count -= maxcount;
- read->rd_offset += maxcount;
- is_data = !is_data;
- last_segment = xdr->buf->len;
- segments++;
- }
-
-out:
- if (nfserr && segments == 0)
+ nfserr = nfsd4_encode_read_plus_data(resp, read);
+ if (nfserr) {
xdr_truncate_encode(xdr, starting_len);
- else {
- if (nfserr) {
- xdr_truncate_encode(xdr, last_segment);
- nfserr = nfs_ok;
- eof = 0;
- }
- tmp = htonl(eof);
- write_bytes_to_xdr_buf(xdr->buf, starting_len, &tmp, 4);
- tmp = htonl(segments);
- write_bytes_to_xdr_buf(xdr->buf, starting_len + 4, &tmp, 4);
+ return nfserr;
}

+ segments++;
+
+out:
+ p = xdr_encode_bool(p, read->rd_eof);
+ *p = cpu_to_be32(segments);
return nfserr;
}

static __be32
nfsd4_encode_copy_notify(struct nfsd4_compoundres *resp, __be32 nfserr,
- struct nfsd4_copy_notify *cn)
+ union nfsd4_op_u *u)
{
+ struct nfsd4_copy_notify *cn = &u->copy_notify;
struct xdr_stream *xdr = resp->xdr;
__be32 *p;

@@ -4960,8 +5001,9 @@ nfsd4_encode_copy_notify(struct nfsd4_compoundres *resp, __be32 nfserr,

static __be32
nfsd4_encode_seek(struct nfsd4_compoundres *resp, __be32 nfserr,
- struct nfsd4_seek *seek)
+ union nfsd4_op_u *u)
{
+ struct nfsd4_seek *seek = &u->seek;
__be32 *p;

p = xdr_reserve_space(resp->xdr, 4 + 8);
@@ -4972,7 +5014,8 @@ nfsd4_encode_seek(struct nfsd4_compoundres *resp, __be32 nfserr,
}

static __be32
-nfsd4_encode_noop(struct nfsd4_compoundres *resp, __be32 nfserr, void *p)
+nfsd4_encode_noop(struct nfsd4_compoundres *resp, __be32 nfserr,
+ union nfsd4_op_u *p)
{
return nfserr;
}
@@ -5023,8 +5066,9 @@ nfsd4_vbuf_to_stream(struct xdr_stream *xdr, char *buf, u32 buflen)

static __be32
nfsd4_encode_getxattr(struct nfsd4_compoundres *resp, __be32 nfserr,
- struct nfsd4_getxattr *getxattr)
+ union nfsd4_op_u *u)
{
+ struct nfsd4_getxattr *getxattr = &u->getxattr;
struct xdr_stream *xdr = resp->xdr;
__be32 *p, err;

@@ -5047,8 +5091,9 @@ nfsd4_encode_getxattr(struct nfsd4_compoundres *resp, __be32 nfserr,

static __be32
nfsd4_encode_setxattr(struct nfsd4_compoundres *resp, __be32 nfserr,
- struct nfsd4_setxattr *setxattr)
+ union nfsd4_op_u *u)
{
+ struct nfsd4_setxattr *setxattr = &u->setxattr;
struct xdr_stream *xdr = resp->xdr;
__be32 *p;

@@ -5088,8 +5133,9 @@ nfsd4_listxattr_validate_cookie(struct nfsd4_listxattrs *listxattrs,

static __be32
nfsd4_encode_listxattrs(struct nfsd4_compoundres *resp, __be32 nfserr,
- struct nfsd4_listxattrs *listxattrs)
+ union nfsd4_op_u *u)
{
+ struct nfsd4_listxattrs *listxattrs = &u->listxattrs;
struct xdr_stream *xdr = resp->xdr;
u32 cookie_offset, count_offset, eof;
u32 left, xdrleft, slen, count;
@@ -5199,8 +5245,9 @@ nfsd4_encode_listxattrs(struct nfsd4_compoundres *resp, __be32 nfserr,

static __be32
nfsd4_encode_removexattr(struct nfsd4_compoundres *resp, __be32 nfserr,
- struct nfsd4_removexattr *removexattr)
+ union nfsd4_op_u *u)
{
+ struct nfsd4_removexattr *removexattr = &u->removexattr;
struct xdr_stream *xdr = resp->xdr;
__be32 *p;

@@ -5212,7 +5259,7 @@ nfsd4_encode_removexattr(struct nfsd4_compoundres *resp, __be32 nfserr,
return 0;
}

-typedef __be32(* nfsd4_enc)(struct nfsd4_compoundres *, __be32, void *);
+typedef __be32(*nfsd4_enc)(struct nfsd4_compoundres *, __be32, union nfsd4_op_u *u);

/*
* Note: nfsd4_enc_ops vector is shared for v4.0 and v4.1
@@ -5220,93 +5267,93 @@ typedef __be32(* nfsd4_enc)(struct nfsd4_compoundres *, __be32, void *);
* done in the decoding phase.
*/
static const nfsd4_enc nfsd4_enc_ops[] = {
- [OP_ACCESS] = (nfsd4_enc)nfsd4_encode_access,
- [OP_CLOSE] = (nfsd4_enc)nfsd4_encode_close,
- [OP_COMMIT] = (nfsd4_enc)nfsd4_encode_commit,
- [OP_CREATE] = (nfsd4_enc)nfsd4_encode_create,
- [OP_DELEGPURGE] = (nfsd4_enc)nfsd4_encode_noop,
- [OP_DELEGRETURN] = (nfsd4_enc)nfsd4_encode_noop,
- [OP_GETATTR] = (nfsd4_enc)nfsd4_encode_getattr,
- [OP_GETFH] = (nfsd4_enc)nfsd4_encode_getfh,
- [OP_LINK] = (nfsd4_enc)nfsd4_encode_link,
- [OP_LOCK] = (nfsd4_enc)nfsd4_encode_lock,
- [OP_LOCKT] = (nfsd4_enc)nfsd4_encode_lockt,
- [OP_LOCKU] = (nfsd4_enc)nfsd4_encode_locku,
- [OP_LOOKUP] = (nfsd4_enc)nfsd4_encode_noop,
- [OP_LOOKUPP] = (nfsd4_enc)nfsd4_encode_noop,
- [OP_NVERIFY] = (nfsd4_enc)nfsd4_encode_noop,
- [OP_OPEN] = (nfsd4_enc)nfsd4_encode_open,
- [OP_OPENATTR] = (nfsd4_enc)nfsd4_encode_noop,
- [OP_OPEN_CONFIRM] = (nfsd4_enc)nfsd4_encode_open_confirm,
- [OP_OPEN_DOWNGRADE] = (nfsd4_enc)nfsd4_encode_open_downgrade,
- [OP_PUTFH] = (nfsd4_enc)nfsd4_encode_noop,
- [OP_PUTPUBFH] = (nfsd4_enc)nfsd4_encode_noop,
- [OP_PUTROOTFH] = (nfsd4_enc)nfsd4_encode_noop,
- [OP_READ] = (nfsd4_enc)nfsd4_encode_read,
- [OP_READDIR] = (nfsd4_enc)nfsd4_encode_readdir,
- [OP_READLINK] = (nfsd4_enc)nfsd4_encode_readlink,
- [OP_REMOVE] = (nfsd4_enc)nfsd4_encode_remove,
- [OP_RENAME] = (nfsd4_enc)nfsd4_encode_rename,
- [OP_RENEW] = (nfsd4_enc)nfsd4_encode_noop,
- [OP_RESTOREFH] = (nfsd4_enc)nfsd4_encode_noop,
- [OP_SAVEFH] = (nfsd4_enc)nfsd4_encode_noop,
- [OP_SECINFO] = (nfsd4_enc)nfsd4_encode_secinfo,
- [OP_SETATTR] = (nfsd4_enc)nfsd4_encode_setattr,
- [OP_SETCLIENTID] = (nfsd4_enc)nfsd4_encode_setclientid,
- [OP_SETCLIENTID_CONFIRM] = (nfsd4_enc)nfsd4_encode_noop,
- [OP_VERIFY] = (nfsd4_enc)nfsd4_encode_noop,
- [OP_WRITE] = (nfsd4_enc)nfsd4_encode_write,
- [OP_RELEASE_LOCKOWNER] = (nfsd4_enc)nfsd4_encode_noop,
+ [OP_ACCESS] = nfsd4_encode_access,
+ [OP_CLOSE] = nfsd4_encode_close,
+ [OP_COMMIT] = nfsd4_encode_commit,
+ [OP_CREATE] = nfsd4_encode_create,
+ [OP_DELEGPURGE] = nfsd4_encode_noop,
+ [OP_DELEGRETURN] = nfsd4_encode_noop,
+ [OP_GETATTR] = nfsd4_encode_getattr,
+ [OP_GETFH] = nfsd4_encode_getfh,
+ [OP_LINK] = nfsd4_encode_link,
+ [OP_LOCK] = nfsd4_encode_lock,
+ [OP_LOCKT] = nfsd4_encode_lockt,
+ [OP_LOCKU] = nfsd4_encode_locku,
+ [OP_LOOKUP] = nfsd4_encode_noop,
+ [OP_LOOKUPP] = nfsd4_encode_noop,
+ [OP_NVERIFY] = nfsd4_encode_noop,
+ [OP_OPEN] = nfsd4_encode_open,
+ [OP_OPENATTR] = nfsd4_encode_noop,
+ [OP_OPEN_CONFIRM] = nfsd4_encode_open_confirm,
+ [OP_OPEN_DOWNGRADE] = nfsd4_encode_open_downgrade,
+ [OP_PUTFH] = nfsd4_encode_noop,
+ [OP_PUTPUBFH] = nfsd4_encode_noop,
+ [OP_PUTROOTFH] = nfsd4_encode_noop,
+ [OP_READ] = nfsd4_encode_read,
+ [OP_READDIR] = nfsd4_encode_readdir,
+ [OP_READLINK] = nfsd4_encode_readlink,
+ [OP_REMOVE] = nfsd4_encode_remove,
+ [OP_RENAME] = nfsd4_encode_rename,
+ [OP_RENEW] = nfsd4_encode_noop,
+ [OP_RESTOREFH] = nfsd4_encode_noop,
+ [OP_SAVEFH] = nfsd4_encode_noop,
+ [OP_SECINFO] = nfsd4_encode_secinfo,
+ [OP_SETATTR] = nfsd4_encode_setattr,
+ [OP_SETCLIENTID] = nfsd4_encode_setclientid,
+ [OP_SETCLIENTID_CONFIRM] = nfsd4_encode_noop,
+ [OP_VERIFY] = nfsd4_encode_noop,
+ [OP_WRITE] = nfsd4_encode_write,
+ [OP_RELEASE_LOCKOWNER] = nfsd4_encode_noop,

/* NFSv4.1 operations */
- [OP_BACKCHANNEL_CTL] = (nfsd4_enc)nfsd4_encode_noop,
- [OP_BIND_CONN_TO_SESSION] = (nfsd4_enc)nfsd4_encode_bind_conn_to_session,
- [OP_EXCHANGE_ID] = (nfsd4_enc)nfsd4_encode_exchange_id,
- [OP_CREATE_SESSION] = (nfsd4_enc)nfsd4_encode_create_session,
- [OP_DESTROY_SESSION] = (nfsd4_enc)nfsd4_encode_noop,
- [OP_FREE_STATEID] = (nfsd4_enc)nfsd4_encode_noop,
- [OP_GET_DIR_DELEGATION] = (nfsd4_enc)nfsd4_encode_noop,
+ [OP_BACKCHANNEL_CTL] = nfsd4_encode_noop,
+ [OP_BIND_CONN_TO_SESSION] = nfsd4_encode_bind_conn_to_session,
+ [OP_EXCHANGE_ID] = nfsd4_encode_exchange_id,
+ [OP_CREATE_SESSION] = nfsd4_encode_create_session,
+ [OP_DESTROY_SESSION] = nfsd4_encode_noop,
+ [OP_FREE_STATEID] = nfsd4_encode_noop,
+ [OP_GET_DIR_DELEGATION] = nfsd4_encode_noop,
#ifdef CONFIG_NFSD_PNFS
- [OP_GETDEVICEINFO] = (nfsd4_enc)nfsd4_encode_getdeviceinfo,
- [OP_GETDEVICELIST] = (nfsd4_enc)nfsd4_encode_noop,
- [OP_LAYOUTCOMMIT] = (nfsd4_enc)nfsd4_encode_layoutcommit,
- [OP_LAYOUTGET] = (nfsd4_enc)nfsd4_encode_layoutget,
- [OP_LAYOUTRETURN] = (nfsd4_enc)nfsd4_encode_layoutreturn,
+ [OP_GETDEVICEINFO] = nfsd4_encode_getdeviceinfo,
+ [OP_GETDEVICELIST] = nfsd4_encode_noop,
+ [OP_LAYOUTCOMMIT] = nfsd4_encode_layoutcommit,
+ [OP_LAYOUTGET] = nfsd4_encode_layoutget,
+ [OP_LAYOUTRETURN] = nfsd4_encode_layoutreturn,
#else
- [OP_GETDEVICEINFO] = (nfsd4_enc)nfsd4_encode_noop,
- [OP_GETDEVICELIST] = (nfsd4_enc)nfsd4_encode_noop,
- [OP_LAYOUTCOMMIT] = (nfsd4_enc)nfsd4_encode_noop,
- [OP_LAYOUTGET] = (nfsd4_enc)nfsd4_encode_noop,
- [OP_LAYOUTRETURN] = (nfsd4_enc)nfsd4_encode_noop,
+ [OP_GETDEVICEINFO] = nfsd4_encode_noop,
+ [OP_GETDEVICELIST] = nfsd4_encode_noop,
+ [OP_LAYOUTCOMMIT] = nfsd4_encode_noop,
+ [OP_LAYOUTGET] = nfsd4_encode_noop,
+ [OP_LAYOUTRETURN] = nfsd4_encode_noop,
#endif
- [OP_SECINFO_NO_NAME] = (nfsd4_enc)nfsd4_encode_secinfo_no_name,
- [OP_SEQUENCE] = (nfsd4_enc)nfsd4_encode_sequence,
- [OP_SET_SSV] = (nfsd4_enc)nfsd4_encode_noop,
- [OP_TEST_STATEID] = (nfsd4_enc)nfsd4_encode_test_stateid,
- [OP_WANT_DELEGATION] = (nfsd4_enc)nfsd4_encode_noop,
- [OP_DESTROY_CLIENTID] = (nfsd4_enc)nfsd4_encode_noop,
- [OP_RECLAIM_COMPLETE] = (nfsd4_enc)nfsd4_encode_noop,
+ [OP_SECINFO_NO_NAME] = nfsd4_encode_secinfo_no_name,
+ [OP_SEQUENCE] = nfsd4_encode_sequence,
+ [OP_SET_SSV] = nfsd4_encode_noop,
+ [OP_TEST_STATEID] = nfsd4_encode_test_stateid,
+ [OP_WANT_DELEGATION] = nfsd4_encode_noop,
+ [OP_DESTROY_CLIENTID] = nfsd4_encode_noop,
+ [OP_RECLAIM_COMPLETE] = nfsd4_encode_noop,

/* NFSv4.2 operations */
- [OP_ALLOCATE] = (nfsd4_enc)nfsd4_encode_noop,
- [OP_COPY] = (nfsd4_enc)nfsd4_encode_copy,
- [OP_COPY_NOTIFY] = (nfsd4_enc)nfsd4_encode_copy_notify,
- [OP_DEALLOCATE] = (nfsd4_enc)nfsd4_encode_noop,
- [OP_IO_ADVISE] = (nfsd4_enc)nfsd4_encode_noop,
- [OP_LAYOUTERROR] = (nfsd4_enc)nfsd4_encode_noop,
- [OP_LAYOUTSTATS] = (nfsd4_enc)nfsd4_encode_noop,
- [OP_OFFLOAD_CANCEL] = (nfsd4_enc)nfsd4_encode_noop,
- [OP_OFFLOAD_STATUS] = (nfsd4_enc)nfsd4_encode_offload_status,
- [OP_READ_PLUS] = (nfsd4_enc)nfsd4_encode_read_plus,
- [OP_SEEK] = (nfsd4_enc)nfsd4_encode_seek,
- [OP_WRITE_SAME] = (nfsd4_enc)nfsd4_encode_noop,
- [OP_CLONE] = (nfsd4_enc)nfsd4_encode_noop,
+ [OP_ALLOCATE] = nfsd4_encode_noop,
+ [OP_COPY] = nfsd4_encode_copy,
+ [OP_COPY_NOTIFY] = nfsd4_encode_copy_notify,
+ [OP_DEALLOCATE] = nfsd4_encode_noop,
+ [OP_IO_ADVISE] = nfsd4_encode_noop,
+ [OP_LAYOUTERROR] = nfsd4_encode_noop,
+ [OP_LAYOUTSTATS] = nfsd4_encode_noop,
+ [OP_OFFLOAD_CANCEL] = nfsd4_encode_noop,
+ [OP_OFFLOAD_STATUS] = nfsd4_encode_offload_status,
+ [OP_READ_PLUS] = nfsd4_encode_read_plus,
+ [OP_SEEK] = nfsd4_encode_seek,
+ [OP_WRITE_SAME] = nfsd4_encode_noop,
+ [OP_CLONE] = nfsd4_encode_noop,

/* RFC 8276 extended atributes operations */
- [OP_GETXATTR] = (nfsd4_enc)nfsd4_encode_getxattr,
- [OP_SETXATTR] = (nfsd4_enc)nfsd4_encode_setxattr,
- [OP_LISTXATTRS] = (nfsd4_enc)nfsd4_encode_listxattrs,
- [OP_REMOVEXATTR] = (nfsd4_enc)nfsd4_encode_removexattr,
+ [OP_GETXATTR] = nfsd4_encode_getxattr,
+ [OP_SETXATTR] = nfsd4_encode_setxattr,
+ [OP_LISTXATTRS] = nfsd4_encode_listxattrs,
+ [OP_REMOVEXATTR] = nfsd4_encode_removexattr,
};

/*
diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c
index 573de0d49e17..76a60e7a7509 100644
--- a/fs/nfsd/nfsctl.c
+++ b/fs/nfsd/nfsctl.c
@@ -581,7 +581,9 @@ static ssize_t __write_versions(struct file *file, char *buf, size_t size)

cmd = sign == '-' ? NFSD_CLEAR : NFSD_SET;
switch(num) {
+#ifdef CONFIG_NFSD_V2
case 2:
+#endif
case 3:
nfsd_vers(nn, num, cmd);
break;
@@ -601,7 +603,9 @@ static ssize_t __write_versions(struct file *file, char *buf, size_t size)
}
break;
default:
- return -EINVAL;
+ /* Ignore requests to disable non-existent versions */
+ if (cmd == NFSD_SET)
+ return -EINVAL;
}
vers += len + 1;
} while ((len = qword_get(&mesg, vers, size)) > 0);
@@ -1448,9 +1452,7 @@ static __net_init int nfsd_init_net(struct net *net)
goto out_idmap_error;
nn->nfsd_versions = NULL;
nn->nfsd4_minorversions = NULL;
- retval = nfsd4_init_leases_net(nn);
- if (retval)
- goto out_drc_error;
+ nfsd4_init_leases_net(nn);
retval = nfsd_reply_cache_init(nn);
if (retval)
goto out_cache_error;
@@ -1460,8 +1462,6 @@ static __net_init int nfsd_init_net(struct net *net)
return 0;

out_cache_error:
- nfsd4_leases_net_shutdown(nn);
-out_drc_error:
nfsd_idmap_shutdown(net);
out_idmap_error:
nfsd_export_shutdown(net);
@@ -1477,7 +1477,6 @@ static __net_exit void nfsd_exit_net(struct net *net)
nfsd_idmap_shutdown(net);
nfsd_export_shutdown(net);
nfsd_netns_free_versions(net_generic(net, nfsd_net_id));
- nfsd4_leases_net_shutdown(nn);
}

static struct pernet_operations nfsd_net_ops = {
diff --git a/fs/nfsd/nfsd.h b/fs/nfsd/nfsd.h
index 09726c5b9a31..fa0144a74267 100644
--- a/fs/nfsd/nfsd.h
+++ b/fs/nfsd/nfsd.h
@@ -64,8 +64,7 @@ struct readdir_cd {


extern struct svc_program nfsd_program;
-extern const struct svc_version nfsd_version2, nfsd_version3,
- nfsd_version4;
+extern const struct svc_version nfsd_version2, nfsd_version3, nfsd_version4;
extern struct mutex nfsd_mutex;
extern spinlock_t nfsd_drc_lock;
extern unsigned long nfsd_drc_max_mem;
@@ -505,8 +504,7 @@ extern void unregister_cld_notifier(void);
extern void nfsd4_ssc_init_umount_work(struct nfsd_net *nn);
#endif

-extern int nfsd4_init_leases_net(struct nfsd_net *nn);
-extern void nfsd4_leases_net_shutdown(struct nfsd_net *nn);
+extern void nfsd4_init_leases_net(struct nfsd_net *nn);

#else /* CONFIG_NFSD_V4 */
static inline int nfsd4_is_junction(struct dentry *dentry)
@@ -514,8 +512,7 @@ static inline int nfsd4_is_junction(struct dentry *dentry)
return 0;
}

-static inline int nfsd4_init_leases_net(struct nfsd_net *nn) { return 0; };
-static inline void nfsd4_leases_net_shutdown(struct nfsd_net *nn) {};
+static inline void nfsd4_init_leases_net(struct nfsd_net *nn) { };

#define register_cld_notifier() 0
#define unregister_cld_notifier() do { } while(0)
diff --git a/fs/nfsd/nfsfh.h b/fs/nfsd/nfsfh.h
index c3ae6414fc5c..513e028b0bbe 100644
--- a/fs/nfsd/nfsfh.h
+++ b/fs/nfsd/nfsfh.h
@@ -220,7 +220,7 @@ __be32 fh_update(struct svc_fh *);
void fh_put(struct svc_fh *);

static __inline__ struct svc_fh *
-fh_copy(struct svc_fh *dst, struct svc_fh *src)
+fh_copy(struct svc_fh *dst, const struct svc_fh *src)
{
WARN_ON(src->fh_dentry);

@@ -229,7 +229,7 @@ fh_copy(struct svc_fh *dst, struct svc_fh *src)
}

static inline void
-fh_copy_shallow(struct knfsd_fh *dst, struct knfsd_fh *src)
+fh_copy_shallow(struct knfsd_fh *dst, const struct knfsd_fh *src)
{
dst->fh_size = src->fh_size;
memcpy(&dst->fh_raw, &src->fh_raw, src->fh_size);
@@ -243,7 +243,8 @@ fh_init(struct svc_fh *fhp, int maxsize)
return fhp;
}

-static inline bool fh_match(struct knfsd_fh *fh1, struct knfsd_fh *fh2)
+static inline bool fh_match(const struct knfsd_fh *fh1,
+ const struct knfsd_fh *fh2)
{
if (fh1->fh_size != fh2->fh_size)
return false;
@@ -252,7 +253,8 @@ static inline bool fh_match(struct knfsd_fh *fh1, struct knfsd_fh *fh2)
return true;
}

-static inline bool fh_fsid_match(struct knfsd_fh *fh1, struct knfsd_fh *fh2)
+static inline bool fh_fsid_match(const struct knfsd_fh *fh1,
+ const struct knfsd_fh *fh2)
{
if (fh1->fh_fsid_type != fh2->fh_fsid_type)
return false;
diff --git a/fs/nfsd/nfsproc.c b/fs/nfsd/nfsproc.c
index 82b3ddeacc33..9744443c3965 100644
--- a/fs/nfsd/nfsproc.c
+++ b/fs/nfsd/nfsproc.c
@@ -211,7 +211,7 @@ nfsd_proc_read(struct svc_rqst *rqstp)
if (resp->status == nfs_ok)
resp->status = fh_getattr(&resp->fh, &resp->stat);
else if (resp->status == nfserr_jukebox)
- return rpc_drop_reply;
+ set_bit(RQ_DROPME, &rqstp->rq_flags);
return rpc_success;
}

@@ -246,7 +246,7 @@ nfsd_proc_write(struct svc_rqst *rqstp)
if (resp->status == nfs_ok)
resp->status = fh_getattr(&resp->fh, &resp->stat);
else if (resp->status == nfserr_jukebox)
- return rpc_drop_reply;
+ set_bit(RQ_DROPME, &rqstp->rq_flags);
return rpc_success;
}

@@ -848,65 +848,3 @@ const struct svc_version nfsd_version2 = {
.vs_dispatch = nfsd_dispatch,
.vs_xdrsize = NFS2_SVC_XDRSIZE,
};
-
-/*
- * Map errnos to NFS errnos.
- */
-__be32
-nfserrno (int errno)
-{
- static struct {
- __be32 nfserr;
- int syserr;
- } nfs_errtbl[] = {
- { nfs_ok, 0 },
- { nfserr_perm, -EPERM },
- { nfserr_noent, -ENOENT },
- { nfserr_io, -EIO },
- { nfserr_nxio, -ENXIO },
- { nfserr_fbig, -E2BIG },
- { nfserr_stale, -EBADF },
- { nfserr_acces, -EACCES },
- { nfserr_exist, -EEXIST },
- { nfserr_xdev, -EXDEV },
- { nfserr_mlink, -EMLINK },
- { nfserr_nodev, -ENODEV },
- { nfserr_notdir, -ENOTDIR },
- { nfserr_isdir, -EISDIR },
- { nfserr_inval, -EINVAL },
- { nfserr_fbig, -EFBIG },
- { nfserr_nospc, -ENOSPC },
- { nfserr_rofs, -EROFS },
- { nfserr_mlink, -EMLINK },
- { nfserr_nametoolong, -ENAMETOOLONG },
- { nfserr_notempty, -ENOTEMPTY },
-#ifdef EDQUOT
- { nfserr_dquot, -EDQUOT },
-#endif
- { nfserr_stale, -ESTALE },
- { nfserr_jukebox, -ETIMEDOUT },
- { nfserr_jukebox, -ERESTARTSYS },
- { nfserr_jukebox, -EAGAIN },
- { nfserr_jukebox, -EWOULDBLOCK },
- { nfserr_jukebox, -ENOMEM },
- { nfserr_io, -ETXTBSY },
- { nfserr_notsupp, -EOPNOTSUPP },
- { nfserr_toosmall, -ETOOSMALL },
- { nfserr_serverfault, -ESERVERFAULT },
- { nfserr_serverfault, -ENFILE },
- { nfserr_io, -EREMOTEIO },
- { nfserr_stale, -EOPENSTALE },
- { nfserr_io, -EUCLEAN },
- { nfserr_perm, -ENOKEY },
- { nfserr_no_grace, -ENOGRACE},
- };
- int i;
-
- for (i = 0; i < ARRAY_SIZE(nfs_errtbl); i++) {
- if (nfs_errtbl[i].syserr == errno)
- return nfs_errtbl[i].nfserr;
- }
- WARN_ONCE(1, "nfsd: non-standard errno: %d\n", errno);
- return nfserr_io;
-}
-
diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c
index c7695ebd28dc..0c75636054a5 100644
--- a/fs/nfsd/nfssvc.c
+++ b/fs/nfsd/nfssvc.c
@@ -91,8 +91,12 @@ unsigned long nfsd_drc_mem_used;
#if defined(CONFIG_NFSD_V2_ACL) || defined(CONFIG_NFSD_V3_ACL)
static struct svc_stat nfsd_acl_svcstats;
static const struct svc_version *nfsd_acl_version[] = {
+# if defined(CONFIG_NFSD_V2_ACL)
[2] = &nfsd_acl_version2,
+# endif
+# if defined(CONFIG_NFSD_V3_ACL)
[3] = &nfsd_acl_version3,
+# endif
};

#define NFSD_ACL_MINVERS 2
@@ -116,7 +120,9 @@ static struct svc_stat nfsd_acl_svcstats = {
#endif /* defined(CONFIG_NFSD_V2_ACL) || defined(CONFIG_NFSD_V3_ACL) */

static const struct svc_version *nfsd_version[] = {
+#if defined(CONFIG_NFSD_V2)
[2] = &nfsd_version2,
+#endif
[3] = &nfsd_version3,
#if defined(CONFIG_NFSD_V4)
[4] = &nfsd_version4,
@@ -1065,7 +1071,7 @@ int nfsd_dispatch(struct svc_rqst *rqstp, __be32 *statp)

nfs_reply = xdr_inline_decode(&rqstp->rq_res_stream, 0);
*statp = proc->pc_func(rqstp);
- if (*statp == rpc_drop_reply || test_bit(RQ_DROPME, &rqstp->rq_flags))
+ if (test_bit(RQ_DROPME, &rqstp->rq_flags))
goto out_update_drop;

if (!proc->pc_encode(rqstp, &rqstp->rq_res_stream))
diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h
index e2daef3cc003..e94634d30591 100644
--- a/fs/nfsd/state.h
+++ b/fs/nfsd/state.h
@@ -368,6 +368,7 @@ struct nfs4_client {
#define NFSD4_CLIENT_UPCALL_LOCK (5) /* upcall serialization */
#define NFSD4_CLIENT_CB_FLAG_MASK (1 << NFSD4_CLIENT_CB_UPDATE | \
1 << NFSD4_CLIENT_CB_KILL)
+#define NFSD4_CLIENT_CB_RECALL_ANY (6)
unsigned long cl_flags;
const struct cred *cl_cb_cred;
struct rpc_clnt *cl_cb_client;
@@ -411,6 +412,10 @@ struct nfs4_client {

unsigned int cl_state;
atomic_t cl_delegs_in_recall;
+
+ struct nfsd4_cb_recall_any *cl_ra;
+ time64_t cl_ra_time;
+ struct list_head cl_ra_cblist;
};

/* struct nfs4_client_reset
@@ -536,16 +541,13 @@ struct nfs4_clnt_odstate {
* inode can have multiple filehandles associated with it, so there is
* (potentially) a many to one relationship between this struct and struct
* inode.
- *
- * These are hashed by filehandle in the file_hashtbl, which is protected by
- * the global state_lock spinlock.
*/
struct nfs4_file {
refcount_t fi_ref;
struct inode * fi_inode;
bool fi_aliased;
spinlock_t fi_lock;
- struct hlist_node fi_hash; /* hash on fi_fhandle */
+ struct rhlist_head fi_rlist;
struct list_head fi_stateids;
union {
struct list_head fi_delegations;
@@ -639,6 +641,7 @@ enum nfsd4_cb_op {
NFSPROC4_CLNT_CB_OFFLOAD,
NFSPROC4_CLNT_CB_SEQUENCE,
NFSPROC4_CLNT_CB_NOTIFY_LOCK,
+ NFSPROC4_CLNT_CB_RECALL_ANY,
};

/* Returns true iff a is later than b: */
diff --git a/fs/nfsd/trace.h b/fs/nfsd/trace.h
index 132335011cca..4183819ea082 100644
--- a/fs/nfsd/trace.h
+++ b/fs/nfsd/trace.h
@@ -9,9 +9,12 @@
#define _NFSD_TRACE_H

#include <linux/tracepoint.h>
+#include <linux/sunrpc/xprt.h>
+#include <trace/misc/nfs.h>

#include "export.h"
#include "nfsfh.h"
+#include "xdr4.h"

#define NFSD_TRACE_PROC_RES_FIELDS \
__field(unsigned int, netns_ino) \
@@ -604,6 +607,7 @@ DEFINE_STATEID_EVENT(layout_recall_release);

DEFINE_STATEID_EVENT(open);
DEFINE_STATEID_EVENT(deleg_read);
+DEFINE_STATEID_EVENT(deleg_return);
DEFINE_STATEID_EVENT(deleg_recall);

DECLARE_EVENT_CLASS(nfsd_stateseqid_class,
@@ -636,6 +640,61 @@ DEFINE_EVENT(nfsd_stateseqid_class, nfsd_##name, \
DEFINE_STATESEQID_EVENT(preprocess);
DEFINE_STATESEQID_EVENT(open_confirm);

+TRACE_DEFINE_ENUM(NFS4_OPEN_STID);
+TRACE_DEFINE_ENUM(NFS4_LOCK_STID);
+TRACE_DEFINE_ENUM(NFS4_DELEG_STID);
+TRACE_DEFINE_ENUM(NFS4_CLOSED_STID);
+TRACE_DEFINE_ENUM(NFS4_REVOKED_DELEG_STID);
+TRACE_DEFINE_ENUM(NFS4_CLOSED_DELEG_STID);
+TRACE_DEFINE_ENUM(NFS4_LAYOUT_STID);
+
+#define show_stid_type(x) \
+ __print_flags(x, "|", \
+ { NFS4_OPEN_STID, "OPEN" }, \
+ { NFS4_LOCK_STID, "LOCK" }, \
+ { NFS4_DELEG_STID, "DELEG" }, \
+ { NFS4_CLOSED_STID, "CLOSED" }, \
+ { NFS4_REVOKED_DELEG_STID, "REVOKED" }, \
+ { NFS4_CLOSED_DELEG_STID, "CLOSED_DELEG" }, \
+ { NFS4_LAYOUT_STID, "LAYOUT" })
+
+DECLARE_EVENT_CLASS(nfsd_stid_class,
+ TP_PROTO(
+ const struct nfs4_stid *stid
+ ),
+ TP_ARGS(stid),
+ TP_STRUCT__entry(
+ __field(unsigned long, sc_type)
+ __field(int, sc_count)
+ __field(u32, cl_boot)
+ __field(u32, cl_id)
+ __field(u32, si_id)
+ __field(u32, si_generation)
+ ),
+ TP_fast_assign(
+ const stateid_t *stp = &stid->sc_stateid;
+
+ __entry->sc_type = stid->sc_type;
+ __entry->sc_count = refcount_read(&stid->sc_count);
+ __entry->cl_boot = stp->si_opaque.so_clid.cl_boot;
+ __entry->cl_id = stp->si_opaque.so_clid.cl_id;
+ __entry->si_id = stp->si_opaque.so_id;
+ __entry->si_generation = stp->si_generation;
+ ),
+ TP_printk("client %08x:%08x stateid %08x:%08x ref=%d type=%s",
+ __entry->cl_boot, __entry->cl_id,
+ __entry->si_id, __entry->si_generation,
+ __entry->sc_count, show_stid_type(__entry->sc_type)
+ )
+);
+
+#define DEFINE_STID_EVENT(name) \
+DEFINE_EVENT(nfsd_stid_class, nfsd_stid_##name, \
+ TP_PROTO(const struct nfs4_stid *stid), \
+ TP_ARGS(stid))
+
+DEFINE_STID_EVENT(revoke);
+
DECLARE_EVENT_CLASS(nfsd_clientid_class,
TP_PROTO(const clientid_t *clid),
TP_ARGS(clid),
@@ -1436,6 +1495,32 @@ TRACE_EVENT(nfsd_cb_offload,
__entry->fh_hash, __entry->count, __entry->status)
);

+TRACE_EVENT(nfsd_cb_recall_any,
+ TP_PROTO(
+ const struct nfsd4_cb_recall_any *ra
+ ),
+ TP_ARGS(ra),
+ TP_STRUCT__entry(
+ __field(u32, cl_boot)
+ __field(u32, cl_id)
+ __field(u32, keep)
+ __field(unsigned long, bmval0)
+ __sockaddr(addr, ra->ra_cb.cb_clp->cl_cb_conn.cb_addrlen)
+ ),
+ TP_fast_assign(
+ __entry->cl_boot = ra->ra_cb.cb_clp->cl_clientid.cl_boot;
+ __entry->cl_id = ra->ra_cb.cb_clp->cl_clientid.cl_id;
+ __entry->keep = ra->ra_keep;
+ __entry->bmval0 = ra->ra_bmval[0];
+ __assign_sockaddr(addr, &ra->ra_cb.cb_clp->cl_addr,
+ ra->ra_cb.cb_clp->cl_cb_conn.cb_addrlen);
+ ),
+ TP_printk("addr=%pISpc client %08x:%08x keep=%u bmval0=%s",
+ __get_sockaddr(addr), __entry->cl_boot, __entry->cl_id,
+ __entry->keep, show_rca_mask(__entry->bmval0)
+ )
+);
+
DECLARE_EVENT_CLASS(nfsd_cb_done_class,
TP_PROTO(
const stateid_t *stp,
@@ -1475,6 +1560,27 @@ DEFINE_NFSD_CB_DONE_EVENT(nfsd_cb_notify_lock_done);
DEFINE_NFSD_CB_DONE_EVENT(nfsd_cb_layout_done);
DEFINE_NFSD_CB_DONE_EVENT(nfsd_cb_offload_done);

+TRACE_EVENT(nfsd_cb_recall_any_done,
+ TP_PROTO(
+ const struct nfsd4_callback *cb,
+ const struct rpc_task *task
+ ),
+ TP_ARGS(cb, task),
+ TP_STRUCT__entry(
+ __field(u32, cl_boot)
+ __field(u32, cl_id)
+ __field(int, status)
+ ),
+ TP_fast_assign(
+ __entry->status = task->tk_status;
+ __entry->cl_boot = cb->cb_clp->cl_clientid.cl_boot;
+ __entry->cl_id = cb->cb_clp->cl_clientid.cl_id;
+ ),
+ TP_printk("client %08x:%08x status=%d",
+ __entry->cl_boot, __entry->cl_id, __entry->status
+ )
+);
+
#endif /* _NFSD_TRACE_H */

#undef TRACE_INCLUDE_PATH
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index eccc6ce55a63..5d6a61d47a90 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -49,6 +49,69 @@

#define NFSDDBG_FACILITY NFSDDBG_FILEOP

+/**
+ * nfserrno - Map Linux errnos to NFS errnos
+ * @errno: POSIX(-ish) error code to be mapped
+ *
+ * Returns the appropriate (net-endian) nfserr_* (or nfs_ok if errno is 0). If
+ * it's an error we don't expect, log it once and return nfserr_io.
+ */
+__be32
+nfserrno (int errno)
+{
+ static struct {
+ __be32 nfserr;
+ int syserr;
+ } nfs_errtbl[] = {
+ { nfs_ok, 0 },
+ { nfserr_perm, -EPERM },
+ { nfserr_noent, -ENOENT },
+ { nfserr_io, -EIO },
+ { nfserr_nxio, -ENXIO },
+ { nfserr_fbig, -E2BIG },
+ { nfserr_stale, -EBADF },
+ { nfserr_acces, -EACCES },
+ { nfserr_exist, -EEXIST },
+ { nfserr_xdev, -EXDEV },
+ { nfserr_mlink, -EMLINK },
+ { nfserr_nodev, -ENODEV },
+ { nfserr_notdir, -ENOTDIR },
+ { nfserr_isdir, -EISDIR },
+ { nfserr_inval, -EINVAL },
+ { nfserr_fbig, -EFBIG },
+ { nfserr_nospc, -ENOSPC },
+ { nfserr_rofs, -EROFS },
+ { nfserr_mlink, -EMLINK },
+ { nfserr_nametoolong, -ENAMETOOLONG },
+ { nfserr_notempty, -ENOTEMPTY },
+ { nfserr_dquot, -EDQUOT },
+ { nfserr_stale, -ESTALE },
+ { nfserr_jukebox, -ETIMEDOUT },
+ { nfserr_jukebox, -ERESTARTSYS },
+ { nfserr_jukebox, -EAGAIN },
+ { nfserr_jukebox, -EWOULDBLOCK },
+ { nfserr_jukebox, -ENOMEM },
+ { nfserr_io, -ETXTBSY },
+ { nfserr_notsupp, -EOPNOTSUPP },
+ { nfserr_toosmall, -ETOOSMALL },
+ { nfserr_serverfault, -ESERVERFAULT },
+ { nfserr_serverfault, -ENFILE },
+ { nfserr_io, -EREMOTEIO },
+ { nfserr_stale, -EOPENSTALE },
+ { nfserr_io, -EUCLEAN },
+ { nfserr_perm, -ENOKEY },
+ { nfserr_no_grace, -ENOGRACE},
+ };
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(nfs_errtbl); i++) {
+ if (nfs_errtbl[i].syserr == errno)
+ return nfs_errtbl[i].nfserr;
+ }
+ WARN_ONCE(1, "nfsd: non-standard errno: %d\n", errno);
+ return nfserr_io;
+}
+
/*
* Called from nfsd_lookup and encode_dirent. Check if we have crossed
* a mount point.
@@ -1317,7 +1380,6 @@ nfsd_create_locked(struct svc_rqst *rqstp, struct svc_fh *fhp,
iap->ia_mode &= ~current_umask();

err = 0;
- host_err = 0;
switch (type) {
case S_IFREG:
host_err = vfs_create(&init_user_ns, dirp, dchild, iap->ia_mode, true);
diff --git a/fs/nfsd/vfs.h b/fs/nfsd/vfs.h
index 9744b041105b..dbdfef7ae85b 100644
--- a/fs/nfsd/vfs.h
+++ b/fs/nfsd/vfs.h
@@ -60,6 +60,7 @@ static inline void nfsd_attrs_free(struct nfsd_attrs *attrs)
posix_acl_release(attrs->na_dpacl);
}

+__be32 nfserrno (int errno);
int nfsd_cross_mnt(struct svc_rqst *rqstp, struct dentry **dpp,
struct svc_export **expp);
__be32 nfsd_lookup(struct svc_rqst *, struct svc_fh *,
diff --git a/fs/nfsd/xdr4.h b/fs/nfsd/xdr4.h
index 36c3340c1d54..510978e602da 100644
--- a/fs/nfsd/xdr4.h
+++ b/fs/nfsd/xdr4.h
@@ -896,5 +896,10 @@ struct nfsd4_operation {
union nfsd4_op_u *);
};

+struct nfsd4_cb_recall_any {
+ struct nfsd4_callback ra_cb;
+ u32 ra_keep;
+ u32 ra_bmval[1];
+};

#endif
diff --git a/fs/nfsd/xdr4cb.h b/fs/nfsd/xdr4cb.h
index 547cf07cf4e0..0d39af1b00a0 100644
--- a/fs/nfsd/xdr4cb.h
+++ b/fs/nfsd/xdr4cb.h
@@ -48,3 +48,9 @@
#define NFS4_dec_cb_offload_sz (cb_compound_dec_hdr_sz + \
cb_sequence_dec_sz + \
op_dec_sz)
+#define NFS4_enc_cb_recall_any_sz (cb_compound_enc_hdr_sz + \
+ cb_sequence_enc_sz + \
+ 1 + 1 + 1)
+#define NFS4_dec_cb_recall_any_sz (cb_compound_dec_hdr_sz + \
+ cb_sequence_dec_sz + \
+ op_dec_sz)
diff --git a/fs/ntfs3/frecord.c b/fs/ntfs3/frecord.c
index bb7e33c24073..d26026090024 100644
--- a/fs/ntfs3/frecord.c
+++ b/fs/ntfs3/frecord.c
@@ -102,7 +102,7 @@ void ni_clear(struct ntfs_inode *ni)
{
struct rb_node *node;

- if (!ni->vfs_inode.i_nlink && is_rec_inuse(ni->mi.mrec))
+ if (!ni->vfs_inode.i_nlink && ni->mi.mrec && is_rec_inuse(ni->mi.mrec))
ni_delete_all(ni);

al_destroy(ni);
@@ -3255,6 +3255,9 @@ int ni_write_inode(struct inode *inode, int sync, const char *hint)
return 0;
}

+ if (!ni->mi.mrec)
+ goto out;
+
if (is_rec_inuse(ni->mi.mrec) &&
!(sbi->flags & NTFS_FLAGS_LOG_REPLAYING) && inode->i_nlink) {
bool modified = false;
diff --git a/fs/ntfs3/fsntfs.c b/fs/ntfs3/fsntfs.c
index 1eac80d55b55..4c2d079b3d49 100644
--- a/fs/ntfs3/fsntfs.c
+++ b/fs/ntfs3/fsntfs.c
@@ -1674,6 +1674,7 @@ struct ntfs_inode *ntfs_new_inode(struct ntfs_sb_info *sbi, CLST rno, bool dir)

out:
if (err) {
+ make_bad_inode(inode);
iput(inode);
ni = ERR_PTR(err);
}
diff --git a/fs/ntfs3/index.c b/fs/ntfs3/index.c
index 7371f7855e4c..eee01db6e0cc 100644
--- a/fs/ntfs3/index.c
+++ b/fs/ntfs3/index.c
@@ -998,6 +998,7 @@ struct INDEX_ROOT *indx_get_root(struct ntfs_index *indx, struct ntfs_inode *ni,
struct ATTR_LIST_ENTRY *le = NULL;
struct ATTRIB *a;
const struct INDEX_NAMES *in = &s_index_names[indx->type];
+ struct INDEX_ROOT *root = NULL;

a = ni_find_attr(ni, NULL, &le, ATTR_ROOT, in->name, in->name_len, NULL,
mi);
@@ -1007,7 +1008,15 @@ struct INDEX_ROOT *indx_get_root(struct ntfs_index *indx, struct ntfs_inode *ni,
if (attr)
*attr = a;

- return resident_data_ex(a, sizeof(struct INDEX_ROOT));
+ root = resident_data_ex(a, sizeof(struct INDEX_ROOT));
+
+ /* length check */
+ if (root && offsetof(struct INDEX_ROOT, ihdr) + le32_to_cpu(root->ihdr.used) >
+ le32_to_cpu(a->res.data_size)) {
+ return NULL;
+ }
+
+ return root;
}

static int indx_write(struct ntfs_index *indx, struct ntfs_inode *ni,
diff --git a/include/linux/bvec.h b/include/linux/bvec.h
index 9e3dac51eb26..d4dbaae8b521 100644
--- a/include/linux/bvec.h
+++ b/include/linux/bvec.h
@@ -59,7 +59,7 @@ struct bvec_iter {

unsigned int bi_bvec_done; /* number of bytes completed in
current bvec */
-} __packed;
+} __packed __aligned(4);

struct bvec_iter_all {
struct bio_vec bv;
diff --git a/include/linux/decompress/mm.h b/include/linux/decompress/mm.h
index 9192986b1a73..ac862422df15 100644
--- a/include/linux/decompress/mm.h
+++ b/include/linux/decompress/mm.h
@@ -48,7 +48,7 @@ MALLOC_VISIBLE void *malloc(int size)
if (!malloc_ptr)
malloc_ptr = free_mem_ptr;

- malloc_ptr = (malloc_ptr + 3) & ~3; /* Align */
+ malloc_ptr = (malloc_ptr + 7) & ~7; /* Align */

p = (void *)malloc_ptr;
malloc_ptr += size;
diff --git a/include/linux/efi.h b/include/linux/efi.h
index 4e1bfee9675d..de6d6558a4d3 100644
--- a/include/linux/efi.h
+++ b/include/linux/efi.h
@@ -390,6 +390,7 @@ void efi_native_runtime_setup(void);
#define EFI_RT_PROPERTIES_TABLE_GUID EFI_GUID(0xeb66918a, 0x7eef, 0x402a, 0x84, 0x2e, 0x93, 0x1d, 0x21, 0xc3, 0x8a, 0xe9)
#define EFI_DXE_SERVICES_TABLE_GUID EFI_GUID(0x05ad34ba, 0x6f02, 0x4214, 0x95, 0x2e, 0x4d, 0xa0, 0x39, 0x8e, 0x2b, 0xb9)
#define EFI_SMBIOS_PROTOCOL_GUID EFI_GUID(0x03583ff6, 0xcb36, 0x4940, 0x94, 0x7e, 0xb9, 0xb3, 0x9f, 0x4a, 0xfa, 0xf7)
+#define EFI_MEMORY_ATTRIBUTE_PROTOCOL_GUID EFI_GUID(0xf4560cf6, 0x40ec, 0x4b4a, 0xa1, 0x92, 0xbf, 0x1d, 0x57, 0xd0, 0xb1, 0x89)

#define EFI_IMAGE_SECURITY_DATABASE_GUID EFI_GUID(0xd719b2cb, 0x3d3a, 0x4596, 0xa3, 0xbc, 0xda, 0xd0, 0x0e, 0x67, 0x65, 0x6f)
#define EFI_SHIM_LOCK_GUID EFI_GUID(0x605dab50, 0xe046, 0x4300, 0xab, 0xb6, 0x3d, 0xd8, 0x10, 0xdd, 0x8b, 0x23)
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 67313881f8ac..092d8fa10153 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1189,6 +1189,13 @@ extern void show_fd_locks(struct seq_file *f,
struct file *filp, struct files_struct *files);
extern bool locks_owner_has_blockers(struct file_lock_context *flctx,
fl_owner_t owner);
+
+static inline struct file_lock_context *
+locks_inode_context(const struct inode *inode)
+{
+ return smp_load_acquire(&inode->i_flctx);
+}
+
#else /* !CONFIG_FILE_LOCKING */
static inline int fcntl_getlk(struct file *file, unsigned int cmd,
struct flock __user *user)
@@ -1334,6 +1341,13 @@ static inline bool locks_owner_has_blockers(struct file_lock_context *flctx,
{
return false;
}
+
+static inline struct file_lock_context *
+locks_inode_context(const struct inode *inode)
+{
+ return NULL;
+}
+
#endif /* !CONFIG_FILE_LOCKING */

static inline struct inode *file_inode(const struct file *f)
diff --git a/include/linux/netfilter.h b/include/linux/netfilter.h
index bef8db9d6c08..e5f4b6f8d1c0 100644
--- a/include/linux/netfilter.h
+++ b/include/linux/netfilter.h
@@ -437,11 +437,13 @@ nf_nat_decode_session(struct sk_buff *skb, struct flowi *fl, u_int8_t family)
#include <linux/netfilter/nf_conntrack_zones_common.h>

void nf_ct_attach(struct sk_buff *, const struct sk_buff *);
+void nf_ct_set_closing(struct nf_conntrack *nfct);
struct nf_conntrack_tuple;
bool nf_ct_get_tuple_skb(struct nf_conntrack_tuple *dst_tuple,
const struct sk_buff *skb);
#else
static inline void nf_ct_attach(struct sk_buff *new, struct sk_buff *skb) {}
+static inline void nf_ct_set_closing(struct nf_conntrack *nfct) {}
struct nf_conntrack_tuple;
static inline bool nf_ct_get_tuple_skb(struct nf_conntrack_tuple *dst_tuple,
const struct sk_buff *skb)
@@ -459,6 +461,8 @@ struct nf_ct_hook {
bool (*get_tuple_skb)(struct nf_conntrack_tuple *,
const struct sk_buff *);
void (*attach)(struct sk_buff *nskb, const struct sk_buff *skb);
+ void (*set_closing)(struct nf_conntrack *nfct);
+ int (*confirm)(struct sk_buff *skb);
};
extern const struct nf_ct_hook __rcu *nf_ct_hook;

diff --git a/include/linux/nfs4.h b/include/linux/nfs4.h
index 8d04b6a5964c..730003c4f4af 100644
--- a/include/linux/nfs4.h
+++ b/include/linux/nfs4.h
@@ -732,4 +732,17 @@ enum nfs4_setxattr_options {
SETXATTR4_CREATE = 1,
SETXATTR4_REPLACE = 2,
};
+
+enum {
+ RCA4_TYPE_MASK_RDATA_DLG = 0,
+ RCA4_TYPE_MASK_WDATA_DLG = 1,
+ RCA4_TYPE_MASK_DIR_DLG = 2,
+ RCA4_TYPE_MASK_FILE_LAYOUT = 3,
+ RCA4_TYPE_MASK_BLK_LAYOUT = 4,
+ RCA4_TYPE_MASK_OBJ_LAYOUT_MIN = 8,
+ RCA4_TYPE_MASK_OBJ_LAYOUT_MAX = 9,
+ RCA4_TYPE_MASK_OTHER_LAYOUT_MIN = 12,
+ RCA4_TYPE_MASK_OTHER_LAYOUT_MAX = 15,
+};
+
#endif
diff --git a/include/linux/usb/composite.h b/include/linux/usb/composite.h
index 43ac3fa760db..9783b9107d76 100644
--- a/include/linux/usb/composite.h
+++ b/include/linux/usb/composite.h
@@ -412,6 +412,8 @@ extern int composite_dev_prepare(struct usb_composite_driver *composite,
extern int composite_os_desc_req_prepare(struct usb_composite_dev *cdev,
struct usb_ep *ep0);
void composite_dev_cleanup(struct usb_composite_dev *cdev);
+void check_remote_wakeup_config(struct usb_gadget *g,
+ struct usb_configuration *c);

static inline struct usb_composite_driver *to_cdriver(
struct usb_gadget_driver *gdrv)
diff --git a/include/linux/usb/gadget.h b/include/linux/usb/gadget.h
index dc3092cea99e..5bec668b41dc 100644
--- a/include/linux/usb/gadget.h
+++ b/include/linux/usb/gadget.h
@@ -309,6 +309,7 @@ struct usb_udc;
struct usb_gadget_ops {
int (*get_frame)(struct usb_gadget *);
int (*wakeup)(struct usb_gadget *);
+ int (*set_remote_wakeup)(struct usb_gadget *, int set);
int (*set_selfpowered) (struct usb_gadget *, int is_selfpowered);
int (*vbus_session) (struct usb_gadget *, int is_active);
int (*vbus_draw) (struct usb_gadget *, unsigned mA);
@@ -383,6 +384,8 @@ struct usb_gadget_ops {
* @connected: True if gadget is connected.
* @lpm_capable: If the gadget max_speed is FULL or HIGH, this flag
* indicates that it supports LPM as per the LPM ECN & errata.
+ * @wakeup_capable: True if gadget is capable of sending remote wakeup.
+ * @wakeup_armed: True if gadget is armed by the host for remote wakeup.
* @irq: the interrupt number for device controller.
* @id_number: a unique ID number for ensuring that gadget names are distinct
*
@@ -444,6 +447,8 @@ struct usb_gadget {
unsigned deactivated:1;
unsigned connected:1;
unsigned lpm_capable:1;
+ unsigned wakeup_capable:1;
+ unsigned wakeup_armed:1;
int irq;
int id_number;
};
@@ -600,6 +605,7 @@ static inline int gadget_is_otg(struct usb_gadget *g)
#if IS_ENABLED(CONFIG_USB_GADGET)
int usb_gadget_frame_number(struct usb_gadget *gadget);
int usb_gadget_wakeup(struct usb_gadget *gadget);
+int usb_gadget_set_remote_wakeup(struct usb_gadget *gadget, int set);
int usb_gadget_set_selfpowered(struct usb_gadget *gadget);
int usb_gadget_clear_selfpowered(struct usb_gadget *gadget);
int usb_gadget_vbus_connect(struct usb_gadget *gadget);
@@ -615,6 +621,8 @@ static inline int usb_gadget_frame_number(struct usb_gadget *gadget)
{ return 0; }
static inline int usb_gadget_wakeup(struct usb_gadget *gadget)
{ return 0; }
+static inline int usb_gadget_set_remote_wakeup(struct usb_gadget *gadget, int set)
+{ return 0; }
static inline int usb_gadget_set_selfpowered(struct usb_gadget *gadget)
{ return 0; }
static inline int usb_gadget_clear_selfpowered(struct usb_gadget *gadget)
diff --git a/include/net/ipv6_stubs.h b/include/net/ipv6_stubs.h
index c48186bf4737..21da31e1dff5 100644
--- a/include/net/ipv6_stubs.h
+++ b/include/net/ipv6_stubs.h
@@ -85,6 +85,11 @@ struct ipv6_bpf_stub {
sockptr_t optval, unsigned int optlen);
int (*ipv6_getsockopt)(struct sock *sk, int level, int optname,
sockptr_t optval, sockptr_t optlen);
+ int (*ipv6_dev_get_saddr)(struct net *net,
+ const struct net_device *dst_dev,
+ const struct in6_addr *daddr,
+ unsigned int prefs,
+ struct in6_addr *saddr);
};
extern const struct ipv6_bpf_stub *ipv6_bpf_stub __read_mostly;

diff --git a/include/net/mctp.h b/include/net/mctp.h
index 82800d521c3d..7ed84054f462 100644
--- a/include/net/mctp.h
+++ b/include/net/mctp.h
@@ -249,6 +249,7 @@ struct mctp_route {
struct mctp_route *mctp_route_lookup(struct net *net, unsigned int dnet,
mctp_eid_t daddr);

+/* always takes ownership of skb */
int mctp_local_output(struct sock *sk, struct mctp_route *rt,
struct sk_buff *skb, mctp_eid_t daddr, u8 req_tag);

diff --git a/include/net/netfilter/nf_conntrack.h b/include/net/netfilter/nf_conntrack.h
index 6a2019aaa464..3dbf947285be 100644
--- a/include/net/netfilter/nf_conntrack.h
+++ b/include/net/netfilter/nf_conntrack.h
@@ -125,6 +125,12 @@ struct nf_conn {
union nf_conntrack_proto proto;
};

+static inline struct nf_conn *
+nf_ct_to_nf_conn(const struct nf_conntrack *nfct)
+{
+ return container_of(nfct, struct nf_conn, ct_general);
+}
+
static inline struct nf_conn *
nf_ct_tuplehash_to_ctrack(const struct nf_conntrack_tuple_hash *hash)
{
@@ -175,6 +181,8 @@ nf_ct_get(const struct sk_buff *skb, enum ip_conntrack_info *ctinfo)

void nf_ct_destroy(struct nf_conntrack *nfct);

+void nf_conntrack_tcp_set_closing(struct nf_conn *ct);
+
/* decrement reference count on a conntrack */
static inline void nf_ct_put(struct nf_conn *ct)
{
diff --git a/include/scsi/scsi_device.h b/include/scsi/scsi_device.h
index d2751ed536df..a64713fe5264 100644
--- a/include/scsi/scsi_device.h
+++ b/include/scsi/scsi_device.h
@@ -204,6 +204,7 @@ struct scsi_device {
unsigned use_10_for_rw:1; /* first try 10-byte read / write */
unsigned use_10_for_ms:1; /* first try 10-byte mode sense/select */
unsigned set_dbd_for_ms:1; /* Set "DBD" field in mode sense */
+ unsigned read_before_ms:1; /* perform a READ before MODE SENSE */
unsigned no_report_opcodes:1; /* no REPORT SUPPORTED OPERATION CODES */
unsigned no_write_same:1; /* no WRITE SAME command */
unsigned use_16_for_rw:1; /* Use read/write(16) over read/write(10) */
@@ -479,28 +480,51 @@ extern const char *scsi_device_state_name(enum scsi_device_state);
extern int scsi_is_sdev_device(const struct device *);
extern int scsi_is_target_device(const struct device *);
extern void scsi_sanitize_inquiry_string(unsigned char *s, int len);
-extern int __scsi_execute(struct scsi_device *sdev, const unsigned char *cmd,
- int data_direction, void *buffer, unsigned bufflen,
- unsigned char *sense, struct scsi_sense_hdr *sshdr,
- int timeout, int retries, blk_opf_t flags,
- req_flags_t rq_flags, int *resid);
+
+/* Optional arguments to scsi_execute_cmd */
+struct scsi_exec_args {
+ unsigned char *sense; /* sense buffer */
+ unsigned int sense_len; /* sense buffer len */
+ struct scsi_sense_hdr *sshdr; /* decoded sense header */
+ blk_mq_req_flags_t req_flags; /* BLK_MQ_REQ flags */
+ int *resid; /* residual length */
+};
+
+int scsi_execute_cmd(struct scsi_device *sdev, const unsigned char *cmd,
+ blk_opf_t opf, void *buffer, unsigned int bufflen,
+ int timeout, int retries,
+ const struct scsi_exec_args *args);
+
/* Make sure any sense buffer is the correct size. */
-#define scsi_execute(sdev, cmd, data_direction, buffer, bufflen, sense, \
- sshdr, timeout, retries, flags, rq_flags, resid) \
+#define scsi_execute(_sdev, _cmd, _data_dir, _buffer, _bufflen, _sense, \
+ _sshdr, _timeout, _retries, _flags, _rq_flags, \
+ _resid) \
({ \
- BUILD_BUG_ON((sense) != NULL && \
- sizeof(sense) != SCSI_SENSE_BUFFERSIZE); \
- __scsi_execute(sdev, cmd, data_direction, buffer, bufflen, \
- sense, sshdr, timeout, retries, flags, rq_flags, \
- resid); \
+ scsi_execute_cmd(_sdev, _cmd, (_data_dir == DMA_TO_DEVICE ? \
+ REQ_OP_DRV_OUT : REQ_OP_DRV_IN) | _flags, \
+ _buffer, _bufflen, _timeout, _retries, \
+ &(struct scsi_exec_args) { \
+ .sense = _sense, \
+ .sshdr = _sshdr, \
+ .req_flags = _rq_flags & RQF_PM ? \
+ BLK_MQ_REQ_PM : 0, \
+ .resid = _resid, \
+ }); \
})
+
static inline int scsi_execute_req(struct scsi_device *sdev,
const unsigned char *cmd, int data_direction, void *buffer,
unsigned bufflen, struct scsi_sense_hdr *sshdr, int timeout,
int retries, int *resid)
{
- return scsi_execute(sdev, cmd, data_direction, buffer,
- bufflen, NULL, sshdr, timeout, retries, 0, 0, resid);
+ return scsi_execute_cmd(sdev, cmd,
+ data_direction == DMA_TO_DEVICE ?
+ REQ_OP_DRV_OUT : REQ_OP_DRV_IN, buffer,
+ bufflen, timeout, retries,
+ &(struct scsi_exec_args) {
+ .sshdr = sshdr,
+ .resid = resid,
+ });
}
extern void sdev_disable_disk_events(struct scsi_device *sdev);
extern void sdev_enable_disk_events(struct scsi_device *sdev);
diff --git a/include/trace/events/fs.h b/include/trace/events/fs.h
deleted file mode 100644
index 738b97f22f36..000000000000
--- a/include/trace/events/fs.h
+++ /dev/null
@@ -1,122 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * Display helpers for generic filesystem items
- *
- * Author: Chuck Lever <chuck.lever@xxxxxxxxxx>
- *
- * Copyright (c) 2020, Oracle and/or its affiliates.
- */
-
-#include <linux/fs.h>
-
-#define show_fs_dirent_type(x) \
- __print_symbolic(x, \
- { DT_UNKNOWN, "UNKNOWN" }, \
- { DT_FIFO, "FIFO" }, \
- { DT_CHR, "CHR" }, \
- { DT_DIR, "DIR" }, \
- { DT_BLK, "BLK" }, \
- { DT_REG, "REG" }, \
- { DT_LNK, "LNK" }, \
- { DT_SOCK, "SOCK" }, \
- { DT_WHT, "WHT" })
-
-#define show_fs_fcntl_open_flags(x) \
- __print_flags(x, "|", \
- { O_WRONLY, "O_WRONLY" }, \
- { O_RDWR, "O_RDWR" }, \
- { O_CREAT, "O_CREAT" }, \
- { O_EXCL, "O_EXCL" }, \
- { O_NOCTTY, "O_NOCTTY" }, \
- { O_TRUNC, "O_TRUNC" }, \
- { O_APPEND, "O_APPEND" }, \
- { O_NONBLOCK, "O_NONBLOCK" }, \
- { O_DSYNC, "O_DSYNC" }, \
- { O_DIRECT, "O_DIRECT" }, \
- { O_LARGEFILE, "O_LARGEFILE" }, \
- { O_DIRECTORY, "O_DIRECTORY" }, \
- { O_NOFOLLOW, "O_NOFOLLOW" }, \
- { O_NOATIME, "O_NOATIME" }, \
- { O_CLOEXEC, "O_CLOEXEC" })
-
-#define __fmode_flag(x) { (__force unsigned long)FMODE_##x, #x }
-#define show_fs_fmode_flags(x) \
- __print_flags(x, "|", \
- __fmode_flag(READ), \
- __fmode_flag(WRITE), \
- __fmode_flag(EXEC))
-
-#ifdef CONFIG_64BIT
-#define show_fs_fcntl_cmd(x) \
- __print_symbolic(x, \
- { F_DUPFD, "DUPFD" }, \
- { F_GETFD, "GETFD" }, \
- { F_SETFD, "SETFD" }, \
- { F_GETFL, "GETFL" }, \
- { F_SETFL, "SETFL" }, \
- { F_GETLK, "GETLK" }, \
- { F_SETLK, "SETLK" }, \
- { F_SETLKW, "SETLKW" }, \
- { F_SETOWN, "SETOWN" }, \
- { F_GETOWN, "GETOWN" }, \
- { F_SETSIG, "SETSIG" }, \
- { F_GETSIG, "GETSIG" }, \
- { F_SETOWN_EX, "SETOWN_EX" }, \
- { F_GETOWN_EX, "GETOWN_EX" }, \
- { F_GETOWNER_UIDS, "GETOWNER_UIDS" }, \
- { F_OFD_GETLK, "OFD_GETLK" }, \
- { F_OFD_SETLK, "OFD_SETLK" }, \
- { F_OFD_SETLKW, "OFD_SETLKW" })
-#else /* CONFIG_64BIT */
-#define show_fs_fcntl_cmd(x) \
- __print_symbolic(x, \
- { F_DUPFD, "DUPFD" }, \
- { F_GETFD, "GETFD" }, \
- { F_SETFD, "SETFD" }, \
- { F_GETFL, "GETFL" }, \
- { F_SETFL, "SETFL" }, \
- { F_GETLK, "GETLK" }, \
- { F_SETLK, "SETLK" }, \
- { F_SETLKW, "SETLKW" }, \
- { F_SETOWN, "SETOWN" }, \
- { F_GETOWN, "GETOWN" }, \
- { F_SETSIG, "SETSIG" }, \
- { F_GETSIG, "GETSIG" }, \
- { F_GETLK64, "GETLK64" }, \
- { F_SETLK64, "SETLK64" }, \
- { F_SETLKW64, "SETLKW64" }, \
- { F_SETOWN_EX, "SETOWN_EX" }, \
- { F_GETOWN_EX, "GETOWN_EX" }, \
- { F_GETOWNER_UIDS, "GETOWNER_UIDS" }, \
- { F_OFD_GETLK, "OFD_GETLK" }, \
- { F_OFD_SETLK, "OFD_SETLK" }, \
- { F_OFD_SETLKW, "OFD_SETLKW" })
-#endif /* CONFIG_64BIT */
-
-#define show_fs_fcntl_lock_type(x) \
- __print_symbolic(x, \
- { F_RDLCK, "RDLCK" }, \
- { F_WRLCK, "WRLCK" }, \
- { F_UNLCK, "UNLCK" })
-
-#define show_fs_lookup_flags(flags) \
- __print_flags(flags, "|", \
- { LOOKUP_FOLLOW, "FOLLOW" }, \
- { LOOKUP_DIRECTORY, "DIRECTORY" }, \
- { LOOKUP_AUTOMOUNT, "AUTOMOUNT" }, \
- { LOOKUP_EMPTY, "EMPTY" }, \
- { LOOKUP_DOWN, "DOWN" }, \
- { LOOKUP_MOUNTPOINT, "MOUNTPOINT" }, \
- { LOOKUP_REVAL, "REVAL" }, \
- { LOOKUP_RCU, "RCU" }, \
- { LOOKUP_OPEN, "OPEN" }, \
- { LOOKUP_CREATE, "CREATE" }, \
- { LOOKUP_EXCL, "EXCL" }, \
- { LOOKUP_RENAME_TARGET, "RENAME_TARGET" }, \
- { LOOKUP_PARENT, "PARENT" }, \
- { LOOKUP_NO_SYMLINKS, "NO_SYMLINKS" }, \
- { LOOKUP_NO_MAGICLINKS, "NO_MAGICLINKS" }, \
- { LOOKUP_NO_XDEV, "NO_XDEV" }, \
- { LOOKUP_BENEATH, "BENEATH" }, \
- { LOOKUP_IN_ROOT, "IN_ROOT" }, \
- { LOOKUP_CACHED, "CACHED" })
diff --git a/include/trace/events/nfs.h b/include/trace/events/nfs.h
deleted file mode 100644
index 09ffdbb04134..000000000000
--- a/include/trace/events/nfs.h
+++ /dev/null
@@ -1,375 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * Display helpers for NFS protocol elements
- *
- * Author: Chuck Lever <chuck.lever@xxxxxxxxxx>
- *
- * Copyright (c) 2020, Oracle and/or its affiliates.
- */
-
-#include <linux/nfs.h>
-#include <linux/nfs4.h>
-#include <uapi/linux/nfs.h>
-
-TRACE_DEFINE_ENUM(NFS_OK);
-TRACE_DEFINE_ENUM(NFSERR_PERM);
-TRACE_DEFINE_ENUM(NFSERR_NOENT);
-TRACE_DEFINE_ENUM(NFSERR_IO);
-TRACE_DEFINE_ENUM(NFSERR_NXIO);
-TRACE_DEFINE_ENUM(NFSERR_EAGAIN);
-TRACE_DEFINE_ENUM(NFSERR_ACCES);
-TRACE_DEFINE_ENUM(NFSERR_EXIST);
-TRACE_DEFINE_ENUM(NFSERR_XDEV);
-TRACE_DEFINE_ENUM(NFSERR_NODEV);
-TRACE_DEFINE_ENUM(NFSERR_NOTDIR);
-TRACE_DEFINE_ENUM(NFSERR_ISDIR);
-TRACE_DEFINE_ENUM(NFSERR_INVAL);
-TRACE_DEFINE_ENUM(NFSERR_FBIG);
-TRACE_DEFINE_ENUM(NFSERR_NOSPC);
-TRACE_DEFINE_ENUM(NFSERR_ROFS);
-TRACE_DEFINE_ENUM(NFSERR_MLINK);
-TRACE_DEFINE_ENUM(NFSERR_OPNOTSUPP);
-TRACE_DEFINE_ENUM(NFSERR_NAMETOOLONG);
-TRACE_DEFINE_ENUM(NFSERR_NOTEMPTY);
-TRACE_DEFINE_ENUM(NFSERR_DQUOT);
-TRACE_DEFINE_ENUM(NFSERR_STALE);
-TRACE_DEFINE_ENUM(NFSERR_REMOTE);
-TRACE_DEFINE_ENUM(NFSERR_WFLUSH);
-TRACE_DEFINE_ENUM(NFSERR_BADHANDLE);
-TRACE_DEFINE_ENUM(NFSERR_NOT_SYNC);
-TRACE_DEFINE_ENUM(NFSERR_BAD_COOKIE);
-TRACE_DEFINE_ENUM(NFSERR_NOTSUPP);
-TRACE_DEFINE_ENUM(NFSERR_TOOSMALL);
-TRACE_DEFINE_ENUM(NFSERR_SERVERFAULT);
-TRACE_DEFINE_ENUM(NFSERR_BADTYPE);
-TRACE_DEFINE_ENUM(NFSERR_JUKEBOX);
-
-#define show_nfs_status(x) \
- __print_symbolic(x, \
- { NFS_OK, "OK" }, \
- { NFSERR_PERM, "PERM" }, \
- { NFSERR_NOENT, "NOENT" }, \
- { NFSERR_IO, "IO" }, \
- { NFSERR_NXIO, "NXIO" }, \
- { ECHILD, "CHILD" }, \
- { NFSERR_EAGAIN, "AGAIN" }, \
- { NFSERR_ACCES, "ACCES" }, \
- { NFSERR_EXIST, "EXIST" }, \
- { NFSERR_XDEV, "XDEV" }, \
- { NFSERR_NODEV, "NODEV" }, \
- { NFSERR_NOTDIR, "NOTDIR" }, \
- { NFSERR_ISDIR, "ISDIR" }, \
- { NFSERR_INVAL, "INVAL" }, \
- { NFSERR_FBIG, "FBIG" }, \
- { NFSERR_NOSPC, "NOSPC" }, \
- { NFSERR_ROFS, "ROFS" }, \
- { NFSERR_MLINK, "MLINK" }, \
- { NFSERR_OPNOTSUPP, "OPNOTSUPP" }, \
- { NFSERR_NAMETOOLONG, "NAMETOOLONG" }, \
- { NFSERR_NOTEMPTY, "NOTEMPTY" }, \
- { NFSERR_DQUOT, "DQUOT" }, \
- { NFSERR_STALE, "STALE" }, \
- { NFSERR_REMOTE, "REMOTE" }, \
- { NFSERR_WFLUSH, "WFLUSH" }, \
- { NFSERR_BADHANDLE, "BADHANDLE" }, \
- { NFSERR_NOT_SYNC, "NOTSYNC" }, \
- { NFSERR_BAD_COOKIE, "BADCOOKIE" }, \
- { NFSERR_NOTSUPP, "NOTSUPP" }, \
- { NFSERR_TOOSMALL, "TOOSMALL" }, \
- { NFSERR_SERVERFAULT, "REMOTEIO" }, \
- { NFSERR_BADTYPE, "BADTYPE" }, \
- { NFSERR_JUKEBOX, "JUKEBOX" })
-
-TRACE_DEFINE_ENUM(NFS_UNSTABLE);
-TRACE_DEFINE_ENUM(NFS_DATA_SYNC);
-TRACE_DEFINE_ENUM(NFS_FILE_SYNC);
-
-#define show_nfs_stable_how(x) \
- __print_symbolic(x, \
- { NFS_UNSTABLE, "UNSTABLE" }, \
- { NFS_DATA_SYNC, "DATA_SYNC" }, \
- { NFS_FILE_SYNC, "FILE_SYNC" })
-
-TRACE_DEFINE_ENUM(NFS4_OK);
-TRACE_DEFINE_ENUM(NFS4ERR_ACCESS);
-TRACE_DEFINE_ENUM(NFS4ERR_ATTRNOTSUPP);
-TRACE_DEFINE_ENUM(NFS4ERR_ADMIN_REVOKED);
-TRACE_DEFINE_ENUM(NFS4ERR_BACK_CHAN_BUSY);
-TRACE_DEFINE_ENUM(NFS4ERR_BADCHAR);
-TRACE_DEFINE_ENUM(NFS4ERR_BADHANDLE);
-TRACE_DEFINE_ENUM(NFS4ERR_BADIOMODE);
-TRACE_DEFINE_ENUM(NFS4ERR_BADLAYOUT);
-TRACE_DEFINE_ENUM(NFS4ERR_BADLABEL);
-TRACE_DEFINE_ENUM(NFS4ERR_BADNAME);
-TRACE_DEFINE_ENUM(NFS4ERR_BADOWNER);
-TRACE_DEFINE_ENUM(NFS4ERR_BADSESSION);
-TRACE_DEFINE_ENUM(NFS4ERR_BADSLOT);
-TRACE_DEFINE_ENUM(NFS4ERR_BADTYPE);
-TRACE_DEFINE_ENUM(NFS4ERR_BADXDR);
-TRACE_DEFINE_ENUM(NFS4ERR_BAD_COOKIE);
-TRACE_DEFINE_ENUM(NFS4ERR_BAD_HIGH_SLOT);
-TRACE_DEFINE_ENUM(NFS4ERR_BAD_RANGE);
-TRACE_DEFINE_ENUM(NFS4ERR_BAD_SEQID);
-TRACE_DEFINE_ENUM(NFS4ERR_BAD_SESSION_DIGEST);
-TRACE_DEFINE_ENUM(NFS4ERR_BAD_STATEID);
-TRACE_DEFINE_ENUM(NFS4ERR_CB_PATH_DOWN);
-TRACE_DEFINE_ENUM(NFS4ERR_CLID_INUSE);
-TRACE_DEFINE_ENUM(NFS4ERR_CLIENTID_BUSY);
-TRACE_DEFINE_ENUM(NFS4ERR_COMPLETE_ALREADY);
-TRACE_DEFINE_ENUM(NFS4ERR_CONN_NOT_BOUND_TO_SESSION);
-TRACE_DEFINE_ENUM(NFS4ERR_DEADLOCK);
-TRACE_DEFINE_ENUM(NFS4ERR_DEADSESSION);
-TRACE_DEFINE_ENUM(NFS4ERR_DELAY);
-TRACE_DEFINE_ENUM(NFS4ERR_DELEG_ALREADY_WANTED);
-TRACE_DEFINE_ENUM(NFS4ERR_DELEG_REVOKED);
-TRACE_DEFINE_ENUM(NFS4ERR_DENIED);
-TRACE_DEFINE_ENUM(NFS4ERR_DIRDELEG_UNAVAIL);
-TRACE_DEFINE_ENUM(NFS4ERR_DQUOT);
-TRACE_DEFINE_ENUM(NFS4ERR_ENCR_ALG_UNSUPP);
-TRACE_DEFINE_ENUM(NFS4ERR_EXIST);
-TRACE_DEFINE_ENUM(NFS4ERR_EXPIRED);
-TRACE_DEFINE_ENUM(NFS4ERR_FBIG);
-TRACE_DEFINE_ENUM(NFS4ERR_FHEXPIRED);
-TRACE_DEFINE_ENUM(NFS4ERR_FILE_OPEN);
-TRACE_DEFINE_ENUM(NFS4ERR_GRACE);
-TRACE_DEFINE_ENUM(NFS4ERR_HASH_ALG_UNSUPP);
-TRACE_DEFINE_ENUM(NFS4ERR_INVAL);
-TRACE_DEFINE_ENUM(NFS4ERR_IO);
-TRACE_DEFINE_ENUM(NFS4ERR_ISDIR);
-TRACE_DEFINE_ENUM(NFS4ERR_LAYOUTTRYLATER);
-TRACE_DEFINE_ENUM(NFS4ERR_LAYOUTUNAVAILABLE);
-TRACE_DEFINE_ENUM(NFS4ERR_LEASE_MOVED);
-TRACE_DEFINE_ENUM(NFS4ERR_LOCKED);
-TRACE_DEFINE_ENUM(NFS4ERR_LOCKS_HELD);
-TRACE_DEFINE_ENUM(NFS4ERR_LOCK_RANGE);
-TRACE_DEFINE_ENUM(NFS4ERR_MINOR_VERS_MISMATCH);
-TRACE_DEFINE_ENUM(NFS4ERR_MLINK);
-TRACE_DEFINE_ENUM(NFS4ERR_MOVED);
-TRACE_DEFINE_ENUM(NFS4ERR_NAMETOOLONG);
-TRACE_DEFINE_ENUM(NFS4ERR_NOENT);
-TRACE_DEFINE_ENUM(NFS4ERR_NOFILEHANDLE);
-TRACE_DEFINE_ENUM(NFS4ERR_NOMATCHING_LAYOUT);
-TRACE_DEFINE_ENUM(NFS4ERR_NOSPC);
-TRACE_DEFINE_ENUM(NFS4ERR_NOTDIR);
-TRACE_DEFINE_ENUM(NFS4ERR_NOTEMPTY);
-TRACE_DEFINE_ENUM(NFS4ERR_NOTSUPP);
-TRACE_DEFINE_ENUM(NFS4ERR_NOT_ONLY_OP);
-TRACE_DEFINE_ENUM(NFS4ERR_NOT_SAME);
-TRACE_DEFINE_ENUM(NFS4ERR_NO_GRACE);
-TRACE_DEFINE_ENUM(NFS4ERR_NXIO);
-TRACE_DEFINE_ENUM(NFS4ERR_OLD_STATEID);
-TRACE_DEFINE_ENUM(NFS4ERR_OPENMODE);
-TRACE_DEFINE_ENUM(NFS4ERR_OP_ILLEGAL);
-TRACE_DEFINE_ENUM(NFS4ERR_OP_NOT_IN_SESSION);
-TRACE_DEFINE_ENUM(NFS4ERR_PERM);
-TRACE_DEFINE_ENUM(NFS4ERR_PNFS_IO_HOLE);
-TRACE_DEFINE_ENUM(NFS4ERR_PNFS_NO_LAYOUT);
-TRACE_DEFINE_ENUM(NFS4ERR_RECALLCONFLICT);
-TRACE_DEFINE_ENUM(NFS4ERR_RECLAIM_BAD);
-TRACE_DEFINE_ENUM(NFS4ERR_RECLAIM_CONFLICT);
-TRACE_DEFINE_ENUM(NFS4ERR_REJECT_DELEG);
-TRACE_DEFINE_ENUM(NFS4ERR_REP_TOO_BIG);
-TRACE_DEFINE_ENUM(NFS4ERR_REP_TOO_BIG_TO_CACHE);
-TRACE_DEFINE_ENUM(NFS4ERR_REQ_TOO_BIG);
-TRACE_DEFINE_ENUM(NFS4ERR_RESOURCE);
-TRACE_DEFINE_ENUM(NFS4ERR_RESTOREFH);
-TRACE_DEFINE_ENUM(NFS4ERR_RETRY_UNCACHED_REP);
-TRACE_DEFINE_ENUM(NFS4ERR_RETURNCONFLICT);
-TRACE_DEFINE_ENUM(NFS4ERR_ROFS);
-TRACE_DEFINE_ENUM(NFS4ERR_SAME);
-TRACE_DEFINE_ENUM(NFS4ERR_SHARE_DENIED);
-TRACE_DEFINE_ENUM(NFS4ERR_SEQUENCE_POS);
-TRACE_DEFINE_ENUM(NFS4ERR_SEQ_FALSE_RETRY);
-TRACE_DEFINE_ENUM(NFS4ERR_SEQ_MISORDERED);
-TRACE_DEFINE_ENUM(NFS4ERR_SERVERFAULT);
-TRACE_DEFINE_ENUM(NFS4ERR_STALE);
-TRACE_DEFINE_ENUM(NFS4ERR_STALE_CLIENTID);
-TRACE_DEFINE_ENUM(NFS4ERR_STALE_STATEID);
-TRACE_DEFINE_ENUM(NFS4ERR_SYMLINK);
-TRACE_DEFINE_ENUM(NFS4ERR_TOOSMALL);
-TRACE_DEFINE_ENUM(NFS4ERR_TOO_MANY_OPS);
-TRACE_DEFINE_ENUM(NFS4ERR_UNKNOWN_LAYOUTTYPE);
-TRACE_DEFINE_ENUM(NFS4ERR_UNSAFE_COMPOUND);
-TRACE_DEFINE_ENUM(NFS4ERR_WRONGSEC);
-TRACE_DEFINE_ENUM(NFS4ERR_WRONG_CRED);
-TRACE_DEFINE_ENUM(NFS4ERR_WRONG_TYPE);
-TRACE_DEFINE_ENUM(NFS4ERR_XDEV);
-
-TRACE_DEFINE_ENUM(NFS4ERR_RESET_TO_MDS);
-TRACE_DEFINE_ENUM(NFS4ERR_RESET_TO_PNFS);
-
-#define show_nfs4_status(x) \
- __print_symbolic(x, \
- { NFS4_OK, "OK" }, \
- { EPERM, "EPERM" }, \
- { ENOENT, "ENOENT" }, \
- { EIO, "EIO" }, \
- { ENXIO, "ENXIO" }, \
- { EACCES, "EACCES" }, \
- { EEXIST, "EEXIST" }, \
- { EXDEV, "EXDEV" }, \
- { ENOTDIR, "ENOTDIR" }, \
- { EISDIR, "EISDIR" }, \
- { EFBIG, "EFBIG" }, \
- { ENOSPC, "ENOSPC" }, \
- { EROFS, "EROFS" }, \
- { EMLINK, "EMLINK" }, \
- { ENAMETOOLONG, "ENAMETOOLONG" }, \
- { ENOTEMPTY, "ENOTEMPTY" }, \
- { EDQUOT, "EDQUOT" }, \
- { ESTALE, "ESTALE" }, \
- { EBADHANDLE, "EBADHANDLE" }, \
- { EBADCOOKIE, "EBADCOOKIE" }, \
- { ENOTSUPP, "ENOTSUPP" }, \
- { ETOOSMALL, "ETOOSMALL" }, \
- { EREMOTEIO, "EREMOTEIO" }, \
- { EBADTYPE, "EBADTYPE" }, \
- { EAGAIN, "EAGAIN" }, \
- { ELOOP, "ELOOP" }, \
- { EOPNOTSUPP, "EOPNOTSUPP" }, \
- { EDEADLK, "EDEADLK" }, \
- { ENOMEM, "ENOMEM" }, \
- { EKEYEXPIRED, "EKEYEXPIRED" }, \
- { ETIMEDOUT, "ETIMEDOUT" }, \
- { ERESTARTSYS, "ERESTARTSYS" }, \
- { ECONNREFUSED, "ECONNREFUSED" }, \
- { ECONNRESET, "ECONNRESET" }, \
- { ENETUNREACH, "ENETUNREACH" }, \
- { EHOSTUNREACH, "EHOSTUNREACH" }, \
- { EHOSTDOWN, "EHOSTDOWN" }, \
- { EPIPE, "EPIPE" }, \
- { EPFNOSUPPORT, "EPFNOSUPPORT" }, \
- { EPROTONOSUPPORT, "EPROTONOSUPPORT" }, \
- { NFS4ERR_ACCESS, "ACCESS" }, \
- { NFS4ERR_ATTRNOTSUPP, "ATTRNOTSUPP" }, \
- { NFS4ERR_ADMIN_REVOKED, "ADMIN_REVOKED" }, \
- { NFS4ERR_BACK_CHAN_BUSY, "BACK_CHAN_BUSY" }, \
- { NFS4ERR_BADCHAR, "BADCHAR" }, \
- { NFS4ERR_BADHANDLE, "BADHANDLE" }, \
- { NFS4ERR_BADIOMODE, "BADIOMODE" }, \
- { NFS4ERR_BADLAYOUT, "BADLAYOUT" }, \
- { NFS4ERR_BADLABEL, "BADLABEL" }, \
- { NFS4ERR_BADNAME, "BADNAME" }, \
- { NFS4ERR_BADOWNER, "BADOWNER" }, \
- { NFS4ERR_BADSESSION, "BADSESSION" }, \
- { NFS4ERR_BADSLOT, "BADSLOT" }, \
- { NFS4ERR_BADTYPE, "BADTYPE" }, \
- { NFS4ERR_BADXDR, "BADXDR" }, \
- { NFS4ERR_BAD_COOKIE, "BAD_COOKIE" }, \
- { NFS4ERR_BAD_HIGH_SLOT, "BAD_HIGH_SLOT" }, \
- { NFS4ERR_BAD_RANGE, "BAD_RANGE" }, \
- { NFS4ERR_BAD_SEQID, "BAD_SEQID" }, \
- { NFS4ERR_BAD_SESSION_DIGEST, "BAD_SESSION_DIGEST" }, \
- { NFS4ERR_BAD_STATEID, "BAD_STATEID" }, \
- { NFS4ERR_CB_PATH_DOWN, "CB_PATH_DOWN" }, \
- { NFS4ERR_CLID_INUSE, "CLID_INUSE" }, \
- { NFS4ERR_CLIENTID_BUSY, "CLIENTID_BUSY" }, \
- { NFS4ERR_COMPLETE_ALREADY, "COMPLETE_ALREADY" }, \
- { NFS4ERR_CONN_NOT_BOUND_TO_SESSION, "CONN_NOT_BOUND_TO_SESSION" }, \
- { NFS4ERR_DEADLOCK, "DEADLOCK" }, \
- { NFS4ERR_DEADSESSION, "DEAD_SESSION" }, \
- { NFS4ERR_DELAY, "DELAY" }, \
- { NFS4ERR_DELEG_ALREADY_WANTED, "DELEG_ALREADY_WANTED" }, \
- { NFS4ERR_DELEG_REVOKED, "DELEG_REVOKED" }, \
- { NFS4ERR_DENIED, "DENIED" }, \
- { NFS4ERR_DIRDELEG_UNAVAIL, "DIRDELEG_UNAVAIL" }, \
- { NFS4ERR_DQUOT, "DQUOT" }, \
- { NFS4ERR_ENCR_ALG_UNSUPP, "ENCR_ALG_UNSUPP" }, \
- { NFS4ERR_EXIST, "EXIST" }, \
- { NFS4ERR_EXPIRED, "EXPIRED" }, \
- { NFS4ERR_FBIG, "FBIG" }, \
- { NFS4ERR_FHEXPIRED, "FHEXPIRED" }, \
- { NFS4ERR_FILE_OPEN, "FILE_OPEN" }, \
- { NFS4ERR_GRACE, "GRACE" }, \
- { NFS4ERR_HASH_ALG_UNSUPP, "HASH_ALG_UNSUPP" }, \
- { NFS4ERR_INVAL, "INVAL" }, \
- { NFS4ERR_IO, "IO" }, \
- { NFS4ERR_ISDIR, "ISDIR" }, \
- { NFS4ERR_LAYOUTTRYLATER, "LAYOUTTRYLATER" }, \
- { NFS4ERR_LAYOUTUNAVAILABLE, "LAYOUTUNAVAILABLE" }, \
- { NFS4ERR_LEASE_MOVED, "LEASE_MOVED" }, \
- { NFS4ERR_LOCKED, "LOCKED" }, \
- { NFS4ERR_LOCKS_HELD, "LOCKS_HELD" }, \
- { NFS4ERR_LOCK_RANGE, "LOCK_RANGE" }, \
- { NFS4ERR_MINOR_VERS_MISMATCH, "MINOR_VERS_MISMATCH" }, \
- { NFS4ERR_MLINK, "MLINK" }, \
- { NFS4ERR_MOVED, "MOVED" }, \
- { NFS4ERR_NAMETOOLONG, "NAMETOOLONG" }, \
- { NFS4ERR_NOENT, "NOENT" }, \
- { NFS4ERR_NOFILEHANDLE, "NOFILEHANDLE" }, \
- { NFS4ERR_NOMATCHING_LAYOUT, "NOMATCHING_LAYOUT" }, \
- { NFS4ERR_NOSPC, "NOSPC" }, \
- { NFS4ERR_NOTDIR, "NOTDIR" }, \
- { NFS4ERR_NOTEMPTY, "NOTEMPTY" }, \
- { NFS4ERR_NOTSUPP, "NOTSUPP" }, \
- { NFS4ERR_NOT_ONLY_OP, "NOT_ONLY_OP" }, \
- { NFS4ERR_NOT_SAME, "NOT_SAME" }, \
- { NFS4ERR_NO_GRACE, "NO_GRACE" }, \
- { NFS4ERR_NXIO, "NXIO" }, \
- { NFS4ERR_OLD_STATEID, "OLD_STATEID" }, \
- { NFS4ERR_OPENMODE, "OPENMODE" }, \
- { NFS4ERR_OP_ILLEGAL, "OP_ILLEGAL" }, \
- { NFS4ERR_OP_NOT_IN_SESSION, "OP_NOT_IN_SESSION" }, \
- { NFS4ERR_PERM, "PERM" }, \
- { NFS4ERR_PNFS_IO_HOLE, "PNFS_IO_HOLE" }, \
- { NFS4ERR_PNFS_NO_LAYOUT, "PNFS_NO_LAYOUT" }, \
- { NFS4ERR_RECALLCONFLICT, "RECALLCONFLICT" }, \
- { NFS4ERR_RECLAIM_BAD, "RECLAIM_BAD" }, \
- { NFS4ERR_RECLAIM_CONFLICT, "RECLAIM_CONFLICT" }, \
- { NFS4ERR_REJECT_DELEG, "REJECT_DELEG" }, \
- { NFS4ERR_REP_TOO_BIG, "REP_TOO_BIG" }, \
- { NFS4ERR_REP_TOO_BIG_TO_CACHE, "REP_TOO_BIG_TO_CACHE" }, \
- { NFS4ERR_REQ_TOO_BIG, "REQ_TOO_BIG" }, \
- { NFS4ERR_RESOURCE, "RESOURCE" }, \
- { NFS4ERR_RESTOREFH, "RESTOREFH" }, \
- { NFS4ERR_RETRY_UNCACHED_REP, "RETRY_UNCACHED_REP" }, \
- { NFS4ERR_RETURNCONFLICT, "RETURNCONFLICT" }, \
- { NFS4ERR_ROFS, "ROFS" }, \
- { NFS4ERR_SAME, "SAME" }, \
- { NFS4ERR_SHARE_DENIED, "SHARE_DENIED" }, \
- { NFS4ERR_SEQUENCE_POS, "SEQUENCE_POS" }, \
- { NFS4ERR_SEQ_FALSE_RETRY, "SEQ_FALSE_RETRY" }, \
- { NFS4ERR_SEQ_MISORDERED, "SEQ_MISORDERED" }, \
- { NFS4ERR_SERVERFAULT, "SERVERFAULT" }, \
- { NFS4ERR_STALE, "STALE" }, \
- { NFS4ERR_STALE_CLIENTID, "STALE_CLIENTID" }, \
- { NFS4ERR_STALE_STATEID, "STALE_STATEID" }, \
- { NFS4ERR_SYMLINK, "SYMLINK" }, \
- { NFS4ERR_TOOSMALL, "TOOSMALL" }, \
- { NFS4ERR_TOO_MANY_OPS, "TOO_MANY_OPS" }, \
- { NFS4ERR_UNKNOWN_LAYOUTTYPE, "UNKNOWN_LAYOUTTYPE" }, \
- { NFS4ERR_UNSAFE_COMPOUND, "UNSAFE_COMPOUND" }, \
- { NFS4ERR_WRONGSEC, "WRONGSEC" }, \
- { NFS4ERR_WRONG_CRED, "WRONG_CRED" }, \
- { NFS4ERR_WRONG_TYPE, "WRONG_TYPE" }, \
- { NFS4ERR_XDEV, "XDEV" }, \
- /* ***** Internal to Linux NFS client ***** */ \
- { NFS4ERR_RESET_TO_MDS, "RESET_TO_MDS" }, \
- { NFS4ERR_RESET_TO_PNFS, "RESET_TO_PNFS" })
-
-#define show_nfs4_verifier(x) \
- __print_hex_str(x, NFS4_VERIFIER_SIZE)
-
-TRACE_DEFINE_ENUM(IOMODE_READ);
-TRACE_DEFINE_ENUM(IOMODE_RW);
-TRACE_DEFINE_ENUM(IOMODE_ANY);
-
-#define show_pnfs_layout_iomode(x) \
- __print_symbolic(x, \
- { IOMODE_READ, "READ" }, \
- { IOMODE_RW, "RW" }, \
- { IOMODE_ANY, "ANY" })
-
-#define show_nfs4_seq4_status(x) \
- __print_flags(x, "|", \
- { SEQ4_STATUS_CB_PATH_DOWN, "CB_PATH_DOWN" }, \
- { SEQ4_STATUS_CB_GSS_CONTEXTS_EXPIRING, "CB_GSS_CONTEXTS_EXPIRING" }, \
- { SEQ4_STATUS_CB_GSS_CONTEXTS_EXPIRED, "CB_GSS_CONTEXTS_EXPIRED" }, \
- { SEQ4_STATUS_EXPIRED_ALL_STATE_REVOKED, "EXPIRED_ALL_STATE_REVOKED" }, \
- { SEQ4_STATUS_EXPIRED_SOME_STATE_REVOKED, "EXPIRED_SOME_STATE_REVOKED" }, \
- { SEQ4_STATUS_ADMIN_STATE_REVOKED, "ADMIN_STATE_REVOKED" }, \
- { SEQ4_STATUS_RECALLABLE_STATE_REVOKED, "RECALLABLE_STATE_REVOKED" }, \
- { SEQ4_STATUS_LEASE_MOVED, "LEASE_MOVED" }, \
- { SEQ4_STATUS_RESTART_RECLAIM_NEEDED, "RESTART_RECLAIM_NEEDED" }, \
- { SEQ4_STATUS_CB_PATH_DOWN_SESSION, "CB_PATH_DOWN_SESSION" }, \
- { SEQ4_STATUS_BACKCHANNEL_FAULT, "BACKCHANNEL_FAULT" })
diff --git a/include/trace/events/rdma.h b/include/trace/events/rdma.h
deleted file mode 100644
index 81bb454fc288..000000000000
--- a/include/trace/events/rdma.h
+++ /dev/null
@@ -1,168 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * Copyright (c) 2017 Oracle. All rights reserved.
- */
-
-/*
- * enum ib_event_type, from include/rdma/ib_verbs.h
- */
-#define IB_EVENT_LIST \
- ib_event(CQ_ERR) \
- ib_event(QP_FATAL) \
- ib_event(QP_REQ_ERR) \
- ib_event(QP_ACCESS_ERR) \
- ib_event(COMM_EST) \
- ib_event(SQ_DRAINED) \
- ib_event(PATH_MIG) \
- ib_event(PATH_MIG_ERR) \
- ib_event(DEVICE_FATAL) \
- ib_event(PORT_ACTIVE) \
- ib_event(PORT_ERR) \
- ib_event(LID_CHANGE) \
- ib_event(PKEY_CHANGE) \
- ib_event(SM_CHANGE) \
- ib_event(SRQ_ERR) \
- ib_event(SRQ_LIMIT_REACHED) \
- ib_event(QP_LAST_WQE_REACHED) \
- ib_event(CLIENT_REREGISTER) \
- ib_event(GID_CHANGE) \
- ib_event_end(WQ_FATAL)
-
-#undef ib_event
-#undef ib_event_end
-
-#define ib_event(x) TRACE_DEFINE_ENUM(IB_EVENT_##x);
-#define ib_event_end(x) TRACE_DEFINE_ENUM(IB_EVENT_##x);
-
-IB_EVENT_LIST
-
-#undef ib_event
-#undef ib_event_end
-
-#define ib_event(x) { IB_EVENT_##x, #x },
-#define ib_event_end(x) { IB_EVENT_##x, #x }
-
-#define rdma_show_ib_event(x) \
- __print_symbolic(x, IB_EVENT_LIST)
-
-/*
- * enum ib_wc_status type, from include/rdma/ib_verbs.h
- */
-#define IB_WC_STATUS_LIST \
- ib_wc_status(SUCCESS) \
- ib_wc_status(LOC_LEN_ERR) \
- ib_wc_status(LOC_QP_OP_ERR) \
- ib_wc_status(LOC_EEC_OP_ERR) \
- ib_wc_status(LOC_PROT_ERR) \
- ib_wc_status(WR_FLUSH_ERR) \
- ib_wc_status(MW_BIND_ERR) \
- ib_wc_status(BAD_RESP_ERR) \
- ib_wc_status(LOC_ACCESS_ERR) \
- ib_wc_status(REM_INV_REQ_ERR) \
- ib_wc_status(REM_ACCESS_ERR) \
- ib_wc_status(REM_OP_ERR) \
- ib_wc_status(RETRY_EXC_ERR) \
- ib_wc_status(RNR_RETRY_EXC_ERR) \
- ib_wc_status(LOC_RDD_VIOL_ERR) \
- ib_wc_status(REM_INV_RD_REQ_ERR) \
- ib_wc_status(REM_ABORT_ERR) \
- ib_wc_status(INV_EECN_ERR) \
- ib_wc_status(INV_EEC_STATE_ERR) \
- ib_wc_status(FATAL_ERR) \
- ib_wc_status(RESP_TIMEOUT_ERR) \
- ib_wc_status_end(GENERAL_ERR)
-
-#undef ib_wc_status
-#undef ib_wc_status_end
-
-#define ib_wc_status(x) TRACE_DEFINE_ENUM(IB_WC_##x);
-#define ib_wc_status_end(x) TRACE_DEFINE_ENUM(IB_WC_##x);
-
-IB_WC_STATUS_LIST
-
-#undef ib_wc_status
-#undef ib_wc_status_end
-
-#define ib_wc_status(x) { IB_WC_##x, #x },
-#define ib_wc_status_end(x) { IB_WC_##x, #x }
-
-#define rdma_show_wc_status(x) \
- __print_symbolic(x, IB_WC_STATUS_LIST)
-
-/*
- * enum ib_cm_event_type, from include/rdma/ib_cm.h
- */
-#define IB_CM_EVENT_LIST \
- ib_cm_event(REQ_ERROR) \
- ib_cm_event(REQ_RECEIVED) \
- ib_cm_event(REP_ERROR) \
- ib_cm_event(REP_RECEIVED) \
- ib_cm_event(RTU_RECEIVED) \
- ib_cm_event(USER_ESTABLISHED) \
- ib_cm_event(DREQ_ERROR) \
- ib_cm_event(DREQ_RECEIVED) \
- ib_cm_event(DREP_RECEIVED) \
- ib_cm_event(TIMEWAIT_EXIT) \
- ib_cm_event(MRA_RECEIVED) \
- ib_cm_event(REJ_RECEIVED) \
- ib_cm_event(LAP_ERROR) \
- ib_cm_event(LAP_RECEIVED) \
- ib_cm_event(APR_RECEIVED) \
- ib_cm_event(SIDR_REQ_ERROR) \
- ib_cm_event(SIDR_REQ_RECEIVED) \
- ib_cm_event_end(SIDR_REP_RECEIVED)
-
-#undef ib_cm_event
-#undef ib_cm_event_end
-
-#define ib_cm_event(x) TRACE_DEFINE_ENUM(IB_CM_##x);
-#define ib_cm_event_end(x) TRACE_DEFINE_ENUM(IB_CM_##x);
-
-IB_CM_EVENT_LIST
-
-#undef ib_cm_event
-#undef ib_cm_event_end
-
-#define ib_cm_event(x) { IB_CM_##x, #x },
-#define ib_cm_event_end(x) { IB_CM_##x, #x }
-
-#define rdma_show_ib_cm_event(x) \
- __print_symbolic(x, IB_CM_EVENT_LIST)
-
-/*
- * enum rdma_cm_event_type, from include/rdma/rdma_cm.h
- */
-#define RDMA_CM_EVENT_LIST \
- rdma_cm_event(ADDR_RESOLVED) \
- rdma_cm_event(ADDR_ERROR) \
- rdma_cm_event(ROUTE_RESOLVED) \
- rdma_cm_event(ROUTE_ERROR) \
- rdma_cm_event(CONNECT_REQUEST) \
- rdma_cm_event(CONNECT_RESPONSE) \
- rdma_cm_event(CONNECT_ERROR) \
- rdma_cm_event(UNREACHABLE) \
- rdma_cm_event(REJECTED) \
- rdma_cm_event(ESTABLISHED) \
- rdma_cm_event(DISCONNECTED) \
- rdma_cm_event(DEVICE_REMOVAL) \
- rdma_cm_event(MULTICAST_JOIN) \
- rdma_cm_event(MULTICAST_ERROR) \
- rdma_cm_event(ADDR_CHANGE) \
- rdma_cm_event_end(TIMEWAIT_EXIT)
-
-#undef rdma_cm_event
-#undef rdma_cm_event_end
-
-#define rdma_cm_event(x) TRACE_DEFINE_ENUM(RDMA_CM_EVENT_##x);
-#define rdma_cm_event_end(x) TRACE_DEFINE_ENUM(RDMA_CM_EVENT_##x);
-
-RDMA_CM_EVENT_LIST
-
-#undef rdma_cm_event
-#undef rdma_cm_event_end
-
-#define rdma_cm_event(x) { RDMA_CM_EVENT_##x, #x },
-#define rdma_cm_event_end(x) { RDMA_CM_EVENT_##x, #x }
-
-#define rdma_show_cm_event(x) \
- __print_symbolic(x, RDMA_CM_EVENT_LIST)
diff --git a/include/trace/events/rpcgss.h b/include/trace/events/rpcgss.h
index c9048f3e471b..3f121eed369e 100644
--- a/include/trace/events/rpcgss.h
+++ b/include/trace/events/rpcgss.h
@@ -13,7 +13,7 @@

#include <linux/tracepoint.h>

-#include <trace/events/sunrpc_base.h>
+#include <trace/misc/sunrpc.h>

/**
** GSS-API related trace events
diff --git a/include/trace/events/rpcrdma.h b/include/trace/events/rpcrdma.h
index fcd3b3f1020a..8f461e04e5f0 100644
--- a/include/trace/events/rpcrdma.h
+++ b/include/trace/events/rpcrdma.h
@@ -15,8 +15,8 @@
#include <linux/tracepoint.h>
#include <rdma/ib_cm.h>

-#include <trace/events/rdma.h>
-#include <trace/events/sunrpc_base.h>
+#include <trace/misc/rdma.h>
+#include <trace/misc/sunrpc.h>

/**
** Event classes
diff --git a/include/trace/events/sunrpc.h b/include/trace/events/sunrpc.h
index f48f2ab9d238..ffe2679a13ce 100644
--- a/include/trace/events/sunrpc.h
+++ b/include/trace/events/sunrpc.h
@@ -14,7 +14,7 @@
#include <linux/net.h>
#include <linux/tracepoint.h>

-#include <trace/events/sunrpc_base.h>
+#include <trace/misc/sunrpc.h>

TRACE_DEFINE_ENUM(SOCK_STREAM);
TRACE_DEFINE_ENUM(SOCK_DGRAM);
diff --git a/include/trace/events/sunrpc_base.h b/include/trace/events/sunrpc_base.h
deleted file mode 100644
index 588557d07ea8..000000000000
--- a/include/trace/events/sunrpc_base.h
+++ /dev/null
@@ -1,18 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * Copyright (c) 2021 Oracle and/or its affiliates.
- *
- * Common types and format specifiers for sunrpc.
- */
-
-#if !defined(_TRACE_SUNRPC_BASE_H)
-#define _TRACE_SUNRPC_BASE_H
-
-#include <linux/tracepoint.h>
-
-#define SUNRPC_TRACE_PID_SPECIFIER "%08x"
-#define SUNRPC_TRACE_CLID_SPECIFIER "%08x"
-#define SUNRPC_TRACE_TASK_SPECIFIER \
- "task:" SUNRPC_TRACE_PID_SPECIFIER "@" SUNRPC_TRACE_CLID_SPECIFIER
-
-#endif /* _TRACE_SUNRPC_BASE_H */
diff --git a/include/trace/misc/fs.h b/include/trace/misc/fs.h
new file mode 100644
index 000000000000..738b97f22f36
--- /dev/null
+++ b/include/trace/misc/fs.h
@@ -0,0 +1,122 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Display helpers for generic filesystem items
+ *
+ * Author: Chuck Lever <chuck.lever@xxxxxxxxxx>
+ *
+ * Copyright (c) 2020, Oracle and/or its affiliates.
+ */
+
+#include <linux/fs.h>
+
+#define show_fs_dirent_type(x) \
+ __print_symbolic(x, \
+ { DT_UNKNOWN, "UNKNOWN" }, \
+ { DT_FIFO, "FIFO" }, \
+ { DT_CHR, "CHR" }, \
+ { DT_DIR, "DIR" }, \
+ { DT_BLK, "BLK" }, \
+ { DT_REG, "REG" }, \
+ { DT_LNK, "LNK" }, \
+ { DT_SOCK, "SOCK" }, \
+ { DT_WHT, "WHT" })
+
+#define show_fs_fcntl_open_flags(x) \
+ __print_flags(x, "|", \
+ { O_WRONLY, "O_WRONLY" }, \
+ { O_RDWR, "O_RDWR" }, \
+ { O_CREAT, "O_CREAT" }, \
+ { O_EXCL, "O_EXCL" }, \
+ { O_NOCTTY, "O_NOCTTY" }, \
+ { O_TRUNC, "O_TRUNC" }, \
+ { O_APPEND, "O_APPEND" }, \
+ { O_NONBLOCK, "O_NONBLOCK" }, \
+ { O_DSYNC, "O_DSYNC" }, \
+ { O_DIRECT, "O_DIRECT" }, \
+ { O_LARGEFILE, "O_LARGEFILE" }, \
+ { O_DIRECTORY, "O_DIRECTORY" }, \
+ { O_NOFOLLOW, "O_NOFOLLOW" }, \
+ { O_NOATIME, "O_NOATIME" }, \
+ { O_CLOEXEC, "O_CLOEXEC" })
+
+#define __fmode_flag(x) { (__force unsigned long)FMODE_##x, #x }
+#define show_fs_fmode_flags(x) \
+ __print_flags(x, "|", \
+ __fmode_flag(READ), \
+ __fmode_flag(WRITE), \
+ __fmode_flag(EXEC))
+
+#ifdef CONFIG_64BIT
+#define show_fs_fcntl_cmd(x) \
+ __print_symbolic(x, \
+ { F_DUPFD, "DUPFD" }, \
+ { F_GETFD, "GETFD" }, \
+ { F_SETFD, "SETFD" }, \
+ { F_GETFL, "GETFL" }, \
+ { F_SETFL, "SETFL" }, \
+ { F_GETLK, "GETLK" }, \
+ { F_SETLK, "SETLK" }, \
+ { F_SETLKW, "SETLKW" }, \
+ { F_SETOWN, "SETOWN" }, \
+ { F_GETOWN, "GETOWN" }, \
+ { F_SETSIG, "SETSIG" }, \
+ { F_GETSIG, "GETSIG" }, \
+ { F_SETOWN_EX, "SETOWN_EX" }, \
+ { F_GETOWN_EX, "GETOWN_EX" }, \
+ { F_GETOWNER_UIDS, "GETOWNER_UIDS" }, \
+ { F_OFD_GETLK, "OFD_GETLK" }, \
+ { F_OFD_SETLK, "OFD_SETLK" }, \
+ { F_OFD_SETLKW, "OFD_SETLKW" })
+#else /* CONFIG_64BIT */
+#define show_fs_fcntl_cmd(x) \
+ __print_symbolic(x, \
+ { F_DUPFD, "DUPFD" }, \
+ { F_GETFD, "GETFD" }, \
+ { F_SETFD, "SETFD" }, \
+ { F_GETFL, "GETFL" }, \
+ { F_SETFL, "SETFL" }, \
+ { F_GETLK, "GETLK" }, \
+ { F_SETLK, "SETLK" }, \
+ { F_SETLKW, "SETLKW" }, \
+ { F_SETOWN, "SETOWN" }, \
+ { F_GETOWN, "GETOWN" }, \
+ { F_SETSIG, "SETSIG" }, \
+ { F_GETSIG, "GETSIG" }, \
+ { F_GETLK64, "GETLK64" }, \
+ { F_SETLK64, "SETLK64" }, \
+ { F_SETLKW64, "SETLKW64" }, \
+ { F_SETOWN_EX, "SETOWN_EX" }, \
+ { F_GETOWN_EX, "GETOWN_EX" }, \
+ { F_GETOWNER_UIDS, "GETOWNER_UIDS" }, \
+ { F_OFD_GETLK, "OFD_GETLK" }, \
+ { F_OFD_SETLK, "OFD_SETLK" }, \
+ { F_OFD_SETLKW, "OFD_SETLKW" })
+#endif /* CONFIG_64BIT */
+
+#define show_fs_fcntl_lock_type(x) \
+ __print_symbolic(x, \
+ { F_RDLCK, "RDLCK" }, \
+ { F_WRLCK, "WRLCK" }, \
+ { F_UNLCK, "UNLCK" })
+
+#define show_fs_lookup_flags(flags) \
+ __print_flags(flags, "|", \
+ { LOOKUP_FOLLOW, "FOLLOW" }, \
+ { LOOKUP_DIRECTORY, "DIRECTORY" }, \
+ { LOOKUP_AUTOMOUNT, "AUTOMOUNT" }, \
+ { LOOKUP_EMPTY, "EMPTY" }, \
+ { LOOKUP_DOWN, "DOWN" }, \
+ { LOOKUP_MOUNTPOINT, "MOUNTPOINT" }, \
+ { LOOKUP_REVAL, "REVAL" }, \
+ { LOOKUP_RCU, "RCU" }, \
+ { LOOKUP_OPEN, "OPEN" }, \
+ { LOOKUP_CREATE, "CREATE" }, \
+ { LOOKUP_EXCL, "EXCL" }, \
+ { LOOKUP_RENAME_TARGET, "RENAME_TARGET" }, \
+ { LOOKUP_PARENT, "PARENT" }, \
+ { LOOKUP_NO_SYMLINKS, "NO_SYMLINKS" }, \
+ { LOOKUP_NO_MAGICLINKS, "NO_MAGICLINKS" }, \
+ { LOOKUP_NO_XDEV, "NO_XDEV" }, \
+ { LOOKUP_BENEATH, "BENEATH" }, \
+ { LOOKUP_IN_ROOT, "IN_ROOT" }, \
+ { LOOKUP_CACHED, "CACHED" })
diff --git a/include/trace/misc/nfs.h b/include/trace/misc/nfs.h
new file mode 100644
index 000000000000..0d9d48dca38a
--- /dev/null
+++ b/include/trace/misc/nfs.h
@@ -0,0 +1,387 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Display helpers for NFS protocol elements
+ *
+ * Author: Chuck Lever <chuck.lever@xxxxxxxxxx>
+ *
+ * Copyright (c) 2020, Oracle and/or its affiliates.
+ */
+
+#include <linux/nfs.h>
+#include <linux/nfs4.h>
+#include <uapi/linux/nfs.h>
+
+TRACE_DEFINE_ENUM(NFS_OK);
+TRACE_DEFINE_ENUM(NFSERR_PERM);
+TRACE_DEFINE_ENUM(NFSERR_NOENT);
+TRACE_DEFINE_ENUM(NFSERR_IO);
+TRACE_DEFINE_ENUM(NFSERR_NXIO);
+TRACE_DEFINE_ENUM(NFSERR_EAGAIN);
+TRACE_DEFINE_ENUM(NFSERR_ACCES);
+TRACE_DEFINE_ENUM(NFSERR_EXIST);
+TRACE_DEFINE_ENUM(NFSERR_XDEV);
+TRACE_DEFINE_ENUM(NFSERR_NODEV);
+TRACE_DEFINE_ENUM(NFSERR_NOTDIR);
+TRACE_DEFINE_ENUM(NFSERR_ISDIR);
+TRACE_DEFINE_ENUM(NFSERR_INVAL);
+TRACE_DEFINE_ENUM(NFSERR_FBIG);
+TRACE_DEFINE_ENUM(NFSERR_NOSPC);
+TRACE_DEFINE_ENUM(NFSERR_ROFS);
+TRACE_DEFINE_ENUM(NFSERR_MLINK);
+TRACE_DEFINE_ENUM(NFSERR_OPNOTSUPP);
+TRACE_DEFINE_ENUM(NFSERR_NAMETOOLONG);
+TRACE_DEFINE_ENUM(NFSERR_NOTEMPTY);
+TRACE_DEFINE_ENUM(NFSERR_DQUOT);
+TRACE_DEFINE_ENUM(NFSERR_STALE);
+TRACE_DEFINE_ENUM(NFSERR_REMOTE);
+TRACE_DEFINE_ENUM(NFSERR_WFLUSH);
+TRACE_DEFINE_ENUM(NFSERR_BADHANDLE);
+TRACE_DEFINE_ENUM(NFSERR_NOT_SYNC);
+TRACE_DEFINE_ENUM(NFSERR_BAD_COOKIE);
+TRACE_DEFINE_ENUM(NFSERR_NOTSUPP);
+TRACE_DEFINE_ENUM(NFSERR_TOOSMALL);
+TRACE_DEFINE_ENUM(NFSERR_SERVERFAULT);
+TRACE_DEFINE_ENUM(NFSERR_BADTYPE);
+TRACE_DEFINE_ENUM(NFSERR_JUKEBOX);
+
+#define show_nfs_status(x) \
+ __print_symbolic(x, \
+ { NFS_OK, "OK" }, \
+ { NFSERR_PERM, "PERM" }, \
+ { NFSERR_NOENT, "NOENT" }, \
+ { NFSERR_IO, "IO" }, \
+ { NFSERR_NXIO, "NXIO" }, \
+ { ECHILD, "CHILD" }, \
+ { NFSERR_EAGAIN, "AGAIN" }, \
+ { NFSERR_ACCES, "ACCES" }, \
+ { NFSERR_EXIST, "EXIST" }, \
+ { NFSERR_XDEV, "XDEV" }, \
+ { NFSERR_NODEV, "NODEV" }, \
+ { NFSERR_NOTDIR, "NOTDIR" }, \
+ { NFSERR_ISDIR, "ISDIR" }, \
+ { NFSERR_INVAL, "INVAL" }, \
+ { NFSERR_FBIG, "FBIG" }, \
+ { NFSERR_NOSPC, "NOSPC" }, \
+ { NFSERR_ROFS, "ROFS" }, \
+ { NFSERR_MLINK, "MLINK" }, \
+ { NFSERR_OPNOTSUPP, "OPNOTSUPP" }, \
+ { NFSERR_NAMETOOLONG, "NAMETOOLONG" }, \
+ { NFSERR_NOTEMPTY, "NOTEMPTY" }, \
+ { NFSERR_DQUOT, "DQUOT" }, \
+ { NFSERR_STALE, "STALE" }, \
+ { NFSERR_REMOTE, "REMOTE" }, \
+ { NFSERR_WFLUSH, "WFLUSH" }, \
+ { NFSERR_BADHANDLE, "BADHANDLE" }, \
+ { NFSERR_NOT_SYNC, "NOTSYNC" }, \
+ { NFSERR_BAD_COOKIE, "BADCOOKIE" }, \
+ { NFSERR_NOTSUPP, "NOTSUPP" }, \
+ { NFSERR_TOOSMALL, "TOOSMALL" }, \
+ { NFSERR_SERVERFAULT, "REMOTEIO" }, \
+ { NFSERR_BADTYPE, "BADTYPE" }, \
+ { NFSERR_JUKEBOX, "JUKEBOX" })
+
+TRACE_DEFINE_ENUM(NFS_UNSTABLE);
+TRACE_DEFINE_ENUM(NFS_DATA_SYNC);
+TRACE_DEFINE_ENUM(NFS_FILE_SYNC);
+
+#define show_nfs_stable_how(x) \
+ __print_symbolic(x, \
+ { NFS_UNSTABLE, "UNSTABLE" }, \
+ { NFS_DATA_SYNC, "DATA_SYNC" }, \
+ { NFS_FILE_SYNC, "FILE_SYNC" })
+
+TRACE_DEFINE_ENUM(NFS4_OK);
+TRACE_DEFINE_ENUM(NFS4ERR_ACCESS);
+TRACE_DEFINE_ENUM(NFS4ERR_ATTRNOTSUPP);
+TRACE_DEFINE_ENUM(NFS4ERR_ADMIN_REVOKED);
+TRACE_DEFINE_ENUM(NFS4ERR_BACK_CHAN_BUSY);
+TRACE_DEFINE_ENUM(NFS4ERR_BADCHAR);
+TRACE_DEFINE_ENUM(NFS4ERR_BADHANDLE);
+TRACE_DEFINE_ENUM(NFS4ERR_BADIOMODE);
+TRACE_DEFINE_ENUM(NFS4ERR_BADLAYOUT);
+TRACE_DEFINE_ENUM(NFS4ERR_BADLABEL);
+TRACE_DEFINE_ENUM(NFS4ERR_BADNAME);
+TRACE_DEFINE_ENUM(NFS4ERR_BADOWNER);
+TRACE_DEFINE_ENUM(NFS4ERR_BADSESSION);
+TRACE_DEFINE_ENUM(NFS4ERR_BADSLOT);
+TRACE_DEFINE_ENUM(NFS4ERR_BADTYPE);
+TRACE_DEFINE_ENUM(NFS4ERR_BADXDR);
+TRACE_DEFINE_ENUM(NFS4ERR_BAD_COOKIE);
+TRACE_DEFINE_ENUM(NFS4ERR_BAD_HIGH_SLOT);
+TRACE_DEFINE_ENUM(NFS4ERR_BAD_RANGE);
+TRACE_DEFINE_ENUM(NFS4ERR_BAD_SEQID);
+TRACE_DEFINE_ENUM(NFS4ERR_BAD_SESSION_DIGEST);
+TRACE_DEFINE_ENUM(NFS4ERR_BAD_STATEID);
+TRACE_DEFINE_ENUM(NFS4ERR_CB_PATH_DOWN);
+TRACE_DEFINE_ENUM(NFS4ERR_CLID_INUSE);
+TRACE_DEFINE_ENUM(NFS4ERR_CLIENTID_BUSY);
+TRACE_DEFINE_ENUM(NFS4ERR_COMPLETE_ALREADY);
+TRACE_DEFINE_ENUM(NFS4ERR_CONN_NOT_BOUND_TO_SESSION);
+TRACE_DEFINE_ENUM(NFS4ERR_DEADLOCK);
+TRACE_DEFINE_ENUM(NFS4ERR_DEADSESSION);
+TRACE_DEFINE_ENUM(NFS4ERR_DELAY);
+TRACE_DEFINE_ENUM(NFS4ERR_DELEG_ALREADY_WANTED);
+TRACE_DEFINE_ENUM(NFS4ERR_DELEG_REVOKED);
+TRACE_DEFINE_ENUM(NFS4ERR_DENIED);
+TRACE_DEFINE_ENUM(NFS4ERR_DIRDELEG_UNAVAIL);
+TRACE_DEFINE_ENUM(NFS4ERR_DQUOT);
+TRACE_DEFINE_ENUM(NFS4ERR_ENCR_ALG_UNSUPP);
+TRACE_DEFINE_ENUM(NFS4ERR_EXIST);
+TRACE_DEFINE_ENUM(NFS4ERR_EXPIRED);
+TRACE_DEFINE_ENUM(NFS4ERR_FBIG);
+TRACE_DEFINE_ENUM(NFS4ERR_FHEXPIRED);
+TRACE_DEFINE_ENUM(NFS4ERR_FILE_OPEN);
+TRACE_DEFINE_ENUM(NFS4ERR_GRACE);
+TRACE_DEFINE_ENUM(NFS4ERR_HASH_ALG_UNSUPP);
+TRACE_DEFINE_ENUM(NFS4ERR_INVAL);
+TRACE_DEFINE_ENUM(NFS4ERR_IO);
+TRACE_DEFINE_ENUM(NFS4ERR_ISDIR);
+TRACE_DEFINE_ENUM(NFS4ERR_LAYOUTTRYLATER);
+TRACE_DEFINE_ENUM(NFS4ERR_LAYOUTUNAVAILABLE);
+TRACE_DEFINE_ENUM(NFS4ERR_LEASE_MOVED);
+TRACE_DEFINE_ENUM(NFS4ERR_LOCKED);
+TRACE_DEFINE_ENUM(NFS4ERR_LOCKS_HELD);
+TRACE_DEFINE_ENUM(NFS4ERR_LOCK_RANGE);
+TRACE_DEFINE_ENUM(NFS4ERR_MINOR_VERS_MISMATCH);
+TRACE_DEFINE_ENUM(NFS4ERR_MLINK);
+TRACE_DEFINE_ENUM(NFS4ERR_MOVED);
+TRACE_DEFINE_ENUM(NFS4ERR_NAMETOOLONG);
+TRACE_DEFINE_ENUM(NFS4ERR_NOENT);
+TRACE_DEFINE_ENUM(NFS4ERR_NOFILEHANDLE);
+TRACE_DEFINE_ENUM(NFS4ERR_NOMATCHING_LAYOUT);
+TRACE_DEFINE_ENUM(NFS4ERR_NOSPC);
+TRACE_DEFINE_ENUM(NFS4ERR_NOTDIR);
+TRACE_DEFINE_ENUM(NFS4ERR_NOTEMPTY);
+TRACE_DEFINE_ENUM(NFS4ERR_NOTSUPP);
+TRACE_DEFINE_ENUM(NFS4ERR_NOT_ONLY_OP);
+TRACE_DEFINE_ENUM(NFS4ERR_NOT_SAME);
+TRACE_DEFINE_ENUM(NFS4ERR_NO_GRACE);
+TRACE_DEFINE_ENUM(NFS4ERR_NXIO);
+TRACE_DEFINE_ENUM(NFS4ERR_OLD_STATEID);
+TRACE_DEFINE_ENUM(NFS4ERR_OPENMODE);
+TRACE_DEFINE_ENUM(NFS4ERR_OP_ILLEGAL);
+TRACE_DEFINE_ENUM(NFS4ERR_OP_NOT_IN_SESSION);
+TRACE_DEFINE_ENUM(NFS4ERR_PERM);
+TRACE_DEFINE_ENUM(NFS4ERR_PNFS_IO_HOLE);
+TRACE_DEFINE_ENUM(NFS4ERR_PNFS_NO_LAYOUT);
+TRACE_DEFINE_ENUM(NFS4ERR_RECALLCONFLICT);
+TRACE_DEFINE_ENUM(NFS4ERR_RECLAIM_BAD);
+TRACE_DEFINE_ENUM(NFS4ERR_RECLAIM_CONFLICT);
+TRACE_DEFINE_ENUM(NFS4ERR_REJECT_DELEG);
+TRACE_DEFINE_ENUM(NFS4ERR_REP_TOO_BIG);
+TRACE_DEFINE_ENUM(NFS4ERR_REP_TOO_BIG_TO_CACHE);
+TRACE_DEFINE_ENUM(NFS4ERR_REQ_TOO_BIG);
+TRACE_DEFINE_ENUM(NFS4ERR_RESOURCE);
+TRACE_DEFINE_ENUM(NFS4ERR_RESTOREFH);
+TRACE_DEFINE_ENUM(NFS4ERR_RETRY_UNCACHED_REP);
+TRACE_DEFINE_ENUM(NFS4ERR_RETURNCONFLICT);
+TRACE_DEFINE_ENUM(NFS4ERR_ROFS);
+TRACE_DEFINE_ENUM(NFS4ERR_SAME);
+TRACE_DEFINE_ENUM(NFS4ERR_SHARE_DENIED);
+TRACE_DEFINE_ENUM(NFS4ERR_SEQUENCE_POS);
+TRACE_DEFINE_ENUM(NFS4ERR_SEQ_FALSE_RETRY);
+TRACE_DEFINE_ENUM(NFS4ERR_SEQ_MISORDERED);
+TRACE_DEFINE_ENUM(NFS4ERR_SERVERFAULT);
+TRACE_DEFINE_ENUM(NFS4ERR_STALE);
+TRACE_DEFINE_ENUM(NFS4ERR_STALE_CLIENTID);
+TRACE_DEFINE_ENUM(NFS4ERR_STALE_STATEID);
+TRACE_DEFINE_ENUM(NFS4ERR_SYMLINK);
+TRACE_DEFINE_ENUM(NFS4ERR_TOOSMALL);
+TRACE_DEFINE_ENUM(NFS4ERR_TOO_MANY_OPS);
+TRACE_DEFINE_ENUM(NFS4ERR_UNKNOWN_LAYOUTTYPE);
+TRACE_DEFINE_ENUM(NFS4ERR_UNSAFE_COMPOUND);
+TRACE_DEFINE_ENUM(NFS4ERR_WRONGSEC);
+TRACE_DEFINE_ENUM(NFS4ERR_WRONG_CRED);
+TRACE_DEFINE_ENUM(NFS4ERR_WRONG_TYPE);
+TRACE_DEFINE_ENUM(NFS4ERR_XDEV);
+
+TRACE_DEFINE_ENUM(NFS4ERR_RESET_TO_MDS);
+TRACE_DEFINE_ENUM(NFS4ERR_RESET_TO_PNFS);
+
+#define show_nfs4_status(x) \
+ __print_symbolic(x, \
+ { NFS4_OK, "OK" }, \
+ { EPERM, "EPERM" }, \
+ { ENOENT, "ENOENT" }, \
+ { EIO, "EIO" }, \
+ { ENXIO, "ENXIO" }, \
+ { EACCES, "EACCES" }, \
+ { EEXIST, "EEXIST" }, \
+ { EXDEV, "EXDEV" }, \
+ { ENOTDIR, "ENOTDIR" }, \
+ { EISDIR, "EISDIR" }, \
+ { EFBIG, "EFBIG" }, \
+ { ENOSPC, "ENOSPC" }, \
+ { EROFS, "EROFS" }, \
+ { EMLINK, "EMLINK" }, \
+ { ENAMETOOLONG, "ENAMETOOLONG" }, \
+ { ENOTEMPTY, "ENOTEMPTY" }, \
+ { EDQUOT, "EDQUOT" }, \
+ { ESTALE, "ESTALE" }, \
+ { EBADHANDLE, "EBADHANDLE" }, \
+ { EBADCOOKIE, "EBADCOOKIE" }, \
+ { ENOTSUPP, "ENOTSUPP" }, \
+ { ETOOSMALL, "ETOOSMALL" }, \
+ { EREMOTEIO, "EREMOTEIO" }, \
+ { EBADTYPE, "EBADTYPE" }, \
+ { EAGAIN, "EAGAIN" }, \
+ { ELOOP, "ELOOP" }, \
+ { EOPNOTSUPP, "EOPNOTSUPP" }, \
+ { EDEADLK, "EDEADLK" }, \
+ { ENOMEM, "ENOMEM" }, \
+ { EKEYEXPIRED, "EKEYEXPIRED" }, \
+ { ETIMEDOUT, "ETIMEDOUT" }, \
+ { ERESTARTSYS, "ERESTARTSYS" }, \
+ { ECONNREFUSED, "ECONNREFUSED" }, \
+ { ECONNRESET, "ECONNRESET" }, \
+ { ENETUNREACH, "ENETUNREACH" }, \
+ { EHOSTUNREACH, "EHOSTUNREACH" }, \
+ { EHOSTDOWN, "EHOSTDOWN" }, \
+ { EPIPE, "EPIPE" }, \
+ { EPFNOSUPPORT, "EPFNOSUPPORT" }, \
+ { EPROTONOSUPPORT, "EPROTONOSUPPORT" }, \
+ { NFS4ERR_ACCESS, "ACCESS" }, \
+ { NFS4ERR_ATTRNOTSUPP, "ATTRNOTSUPP" }, \
+ { NFS4ERR_ADMIN_REVOKED, "ADMIN_REVOKED" }, \
+ { NFS4ERR_BACK_CHAN_BUSY, "BACK_CHAN_BUSY" }, \
+ { NFS4ERR_BADCHAR, "BADCHAR" }, \
+ { NFS4ERR_BADHANDLE, "BADHANDLE" }, \
+ { NFS4ERR_BADIOMODE, "BADIOMODE" }, \
+ { NFS4ERR_BADLAYOUT, "BADLAYOUT" }, \
+ { NFS4ERR_BADLABEL, "BADLABEL" }, \
+ { NFS4ERR_BADNAME, "BADNAME" }, \
+ { NFS4ERR_BADOWNER, "BADOWNER" }, \
+ { NFS4ERR_BADSESSION, "BADSESSION" }, \
+ { NFS4ERR_BADSLOT, "BADSLOT" }, \
+ { NFS4ERR_BADTYPE, "BADTYPE" }, \
+ { NFS4ERR_BADXDR, "BADXDR" }, \
+ { NFS4ERR_BAD_COOKIE, "BAD_COOKIE" }, \
+ { NFS4ERR_BAD_HIGH_SLOT, "BAD_HIGH_SLOT" }, \
+ { NFS4ERR_BAD_RANGE, "BAD_RANGE" }, \
+ { NFS4ERR_BAD_SEQID, "BAD_SEQID" }, \
+ { NFS4ERR_BAD_SESSION_DIGEST, "BAD_SESSION_DIGEST" }, \
+ { NFS4ERR_BAD_STATEID, "BAD_STATEID" }, \
+ { NFS4ERR_CB_PATH_DOWN, "CB_PATH_DOWN" }, \
+ { NFS4ERR_CLID_INUSE, "CLID_INUSE" }, \
+ { NFS4ERR_CLIENTID_BUSY, "CLIENTID_BUSY" }, \
+ { NFS4ERR_COMPLETE_ALREADY, "COMPLETE_ALREADY" }, \
+ { NFS4ERR_CONN_NOT_BOUND_TO_SESSION, "CONN_NOT_BOUND_TO_SESSION" }, \
+ { NFS4ERR_DEADLOCK, "DEADLOCK" }, \
+ { NFS4ERR_DEADSESSION, "DEAD_SESSION" }, \
+ { NFS4ERR_DELAY, "DELAY" }, \
+ { NFS4ERR_DELEG_ALREADY_WANTED, "DELEG_ALREADY_WANTED" }, \
+ { NFS4ERR_DELEG_REVOKED, "DELEG_REVOKED" }, \
+ { NFS4ERR_DENIED, "DENIED" }, \
+ { NFS4ERR_DIRDELEG_UNAVAIL, "DIRDELEG_UNAVAIL" }, \
+ { NFS4ERR_DQUOT, "DQUOT" }, \
+ { NFS4ERR_ENCR_ALG_UNSUPP, "ENCR_ALG_UNSUPP" }, \
+ { NFS4ERR_EXIST, "EXIST" }, \
+ { NFS4ERR_EXPIRED, "EXPIRED" }, \
+ { NFS4ERR_FBIG, "FBIG" }, \
+ { NFS4ERR_FHEXPIRED, "FHEXPIRED" }, \
+ { NFS4ERR_FILE_OPEN, "FILE_OPEN" }, \
+ { NFS4ERR_GRACE, "GRACE" }, \
+ { NFS4ERR_HASH_ALG_UNSUPP, "HASH_ALG_UNSUPP" }, \
+ { NFS4ERR_INVAL, "INVAL" }, \
+ { NFS4ERR_IO, "IO" }, \
+ { NFS4ERR_ISDIR, "ISDIR" }, \
+ { NFS4ERR_LAYOUTTRYLATER, "LAYOUTTRYLATER" }, \
+ { NFS4ERR_LAYOUTUNAVAILABLE, "LAYOUTUNAVAILABLE" }, \
+ { NFS4ERR_LEASE_MOVED, "LEASE_MOVED" }, \
+ { NFS4ERR_LOCKED, "LOCKED" }, \
+ { NFS4ERR_LOCKS_HELD, "LOCKS_HELD" }, \
+ { NFS4ERR_LOCK_RANGE, "LOCK_RANGE" }, \
+ { NFS4ERR_MINOR_VERS_MISMATCH, "MINOR_VERS_MISMATCH" }, \
+ { NFS4ERR_MLINK, "MLINK" }, \
+ { NFS4ERR_MOVED, "MOVED" }, \
+ { NFS4ERR_NAMETOOLONG, "NAMETOOLONG" }, \
+ { NFS4ERR_NOENT, "NOENT" }, \
+ { NFS4ERR_NOFILEHANDLE, "NOFILEHANDLE" }, \
+ { NFS4ERR_NOMATCHING_LAYOUT, "NOMATCHING_LAYOUT" }, \
+ { NFS4ERR_NOSPC, "NOSPC" }, \
+ { NFS4ERR_NOTDIR, "NOTDIR" }, \
+ { NFS4ERR_NOTEMPTY, "NOTEMPTY" }, \
+ { NFS4ERR_NOTSUPP, "NOTSUPP" }, \
+ { NFS4ERR_NOT_ONLY_OP, "NOT_ONLY_OP" }, \
+ { NFS4ERR_NOT_SAME, "NOT_SAME" }, \
+ { NFS4ERR_NO_GRACE, "NO_GRACE" }, \
+ { NFS4ERR_NXIO, "NXIO" }, \
+ { NFS4ERR_OLD_STATEID, "OLD_STATEID" }, \
+ { NFS4ERR_OPENMODE, "OPENMODE" }, \
+ { NFS4ERR_OP_ILLEGAL, "OP_ILLEGAL" }, \
+ { NFS4ERR_OP_NOT_IN_SESSION, "OP_NOT_IN_SESSION" }, \
+ { NFS4ERR_PERM, "PERM" }, \
+ { NFS4ERR_PNFS_IO_HOLE, "PNFS_IO_HOLE" }, \
+ { NFS4ERR_PNFS_NO_LAYOUT, "PNFS_NO_LAYOUT" }, \
+ { NFS4ERR_RECALLCONFLICT, "RECALLCONFLICT" }, \
+ { NFS4ERR_RECLAIM_BAD, "RECLAIM_BAD" }, \
+ { NFS4ERR_RECLAIM_CONFLICT, "RECLAIM_CONFLICT" }, \
+ { NFS4ERR_REJECT_DELEG, "REJECT_DELEG" }, \
+ { NFS4ERR_REP_TOO_BIG, "REP_TOO_BIG" }, \
+ { NFS4ERR_REP_TOO_BIG_TO_CACHE, "REP_TOO_BIG_TO_CACHE" }, \
+ { NFS4ERR_REQ_TOO_BIG, "REQ_TOO_BIG" }, \
+ { NFS4ERR_RESOURCE, "RESOURCE" }, \
+ { NFS4ERR_RESTOREFH, "RESTOREFH" }, \
+ { NFS4ERR_RETRY_UNCACHED_REP, "RETRY_UNCACHED_REP" }, \
+ { NFS4ERR_RETURNCONFLICT, "RETURNCONFLICT" }, \
+ { NFS4ERR_ROFS, "ROFS" }, \
+ { NFS4ERR_SAME, "SAME" }, \
+ { NFS4ERR_SHARE_DENIED, "SHARE_DENIED" }, \
+ { NFS4ERR_SEQUENCE_POS, "SEQUENCE_POS" }, \
+ { NFS4ERR_SEQ_FALSE_RETRY, "SEQ_FALSE_RETRY" }, \
+ { NFS4ERR_SEQ_MISORDERED, "SEQ_MISORDERED" }, \
+ { NFS4ERR_SERVERFAULT, "SERVERFAULT" }, \
+ { NFS4ERR_STALE, "STALE" }, \
+ { NFS4ERR_STALE_CLIENTID, "STALE_CLIENTID" }, \
+ { NFS4ERR_STALE_STATEID, "STALE_STATEID" }, \
+ { NFS4ERR_SYMLINK, "SYMLINK" }, \
+ { NFS4ERR_TOOSMALL, "TOOSMALL" }, \
+ { NFS4ERR_TOO_MANY_OPS, "TOO_MANY_OPS" }, \
+ { NFS4ERR_UNKNOWN_LAYOUTTYPE, "UNKNOWN_LAYOUTTYPE" }, \
+ { NFS4ERR_UNSAFE_COMPOUND, "UNSAFE_COMPOUND" }, \
+ { NFS4ERR_WRONGSEC, "WRONGSEC" }, \
+ { NFS4ERR_WRONG_CRED, "WRONG_CRED" }, \
+ { NFS4ERR_WRONG_TYPE, "WRONG_TYPE" }, \
+ { NFS4ERR_XDEV, "XDEV" }, \
+ /* ***** Internal to Linux NFS client ***** */ \
+ { NFS4ERR_RESET_TO_MDS, "RESET_TO_MDS" }, \
+ { NFS4ERR_RESET_TO_PNFS, "RESET_TO_PNFS" })
+
+#define show_nfs4_verifier(x) \
+ __print_hex_str(x, NFS4_VERIFIER_SIZE)
+
+TRACE_DEFINE_ENUM(IOMODE_READ);
+TRACE_DEFINE_ENUM(IOMODE_RW);
+TRACE_DEFINE_ENUM(IOMODE_ANY);
+
+#define show_pnfs_layout_iomode(x) \
+ __print_symbolic(x, \
+ { IOMODE_READ, "READ" }, \
+ { IOMODE_RW, "RW" }, \
+ { IOMODE_ANY, "ANY" })
+
+#define show_rca_mask(x) \
+ __print_flags(x, "|", \
+ { BIT(RCA4_TYPE_MASK_RDATA_DLG), "RDATA_DLG" }, \
+ { BIT(RCA4_TYPE_MASK_WDATA_DLG), "WDATA_DLG" }, \
+ { BIT(RCA4_TYPE_MASK_DIR_DLG), "DIR_DLG" }, \
+ { BIT(RCA4_TYPE_MASK_FILE_LAYOUT), "FILE_LAYOUT" }, \
+ { BIT(RCA4_TYPE_MASK_BLK_LAYOUT), "BLK_LAYOUT" }, \
+ { BIT(RCA4_TYPE_MASK_OBJ_LAYOUT_MIN), "OBJ_LAYOUT_MIN" }, \
+ { BIT(RCA4_TYPE_MASK_OBJ_LAYOUT_MAX), "OBJ_LAYOUT_MAX" }, \
+ { BIT(RCA4_TYPE_MASK_OTHER_LAYOUT_MIN), "OTHER_LAYOUT_MIN" }, \
+ { BIT(RCA4_TYPE_MASK_OTHER_LAYOUT_MAX), "OTHER_LAYOUT_MAX" })
+
+#define show_nfs4_seq4_status(x) \
+ __print_flags(x, "|", \
+ { SEQ4_STATUS_CB_PATH_DOWN, "CB_PATH_DOWN" }, \
+ { SEQ4_STATUS_CB_GSS_CONTEXTS_EXPIRING, "CB_GSS_CONTEXTS_EXPIRING" }, \
+ { SEQ4_STATUS_CB_GSS_CONTEXTS_EXPIRED, "CB_GSS_CONTEXTS_EXPIRED" }, \
+ { SEQ4_STATUS_EXPIRED_ALL_STATE_REVOKED, "EXPIRED_ALL_STATE_REVOKED" }, \
+ { SEQ4_STATUS_EXPIRED_SOME_STATE_REVOKED, "EXPIRED_SOME_STATE_REVOKED" }, \
+ { SEQ4_STATUS_ADMIN_STATE_REVOKED, "ADMIN_STATE_REVOKED" }, \
+ { SEQ4_STATUS_RECALLABLE_STATE_REVOKED, "RECALLABLE_STATE_REVOKED" }, \
+ { SEQ4_STATUS_LEASE_MOVED, "LEASE_MOVED" }, \
+ { SEQ4_STATUS_RESTART_RECLAIM_NEEDED, "RESTART_RECLAIM_NEEDED" }, \
+ { SEQ4_STATUS_CB_PATH_DOWN_SESSION, "CB_PATH_DOWN_SESSION" }, \
+ { SEQ4_STATUS_BACKCHANNEL_FAULT, "BACKCHANNEL_FAULT" })
diff --git a/include/trace/misc/rdma.h b/include/trace/misc/rdma.h
new file mode 100644
index 000000000000..81bb454fc288
--- /dev/null
+++ b/include/trace/misc/rdma.h
@@ -0,0 +1,168 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (c) 2017 Oracle. All rights reserved.
+ */
+
+/*
+ * enum ib_event_type, from include/rdma/ib_verbs.h
+ */
+#define IB_EVENT_LIST \
+ ib_event(CQ_ERR) \
+ ib_event(QP_FATAL) \
+ ib_event(QP_REQ_ERR) \
+ ib_event(QP_ACCESS_ERR) \
+ ib_event(COMM_EST) \
+ ib_event(SQ_DRAINED) \
+ ib_event(PATH_MIG) \
+ ib_event(PATH_MIG_ERR) \
+ ib_event(DEVICE_FATAL) \
+ ib_event(PORT_ACTIVE) \
+ ib_event(PORT_ERR) \
+ ib_event(LID_CHANGE) \
+ ib_event(PKEY_CHANGE) \
+ ib_event(SM_CHANGE) \
+ ib_event(SRQ_ERR) \
+ ib_event(SRQ_LIMIT_REACHED) \
+ ib_event(QP_LAST_WQE_REACHED) \
+ ib_event(CLIENT_REREGISTER) \
+ ib_event(GID_CHANGE) \
+ ib_event_end(WQ_FATAL)
+
+#undef ib_event
+#undef ib_event_end
+
+#define ib_event(x) TRACE_DEFINE_ENUM(IB_EVENT_##x);
+#define ib_event_end(x) TRACE_DEFINE_ENUM(IB_EVENT_##x);
+
+IB_EVENT_LIST
+
+#undef ib_event
+#undef ib_event_end
+
+#define ib_event(x) { IB_EVENT_##x, #x },
+#define ib_event_end(x) { IB_EVENT_##x, #x }
+
+#define rdma_show_ib_event(x) \
+ __print_symbolic(x, IB_EVENT_LIST)
+
+/*
+ * enum ib_wc_status type, from include/rdma/ib_verbs.h
+ */
+#define IB_WC_STATUS_LIST \
+ ib_wc_status(SUCCESS) \
+ ib_wc_status(LOC_LEN_ERR) \
+ ib_wc_status(LOC_QP_OP_ERR) \
+ ib_wc_status(LOC_EEC_OP_ERR) \
+ ib_wc_status(LOC_PROT_ERR) \
+ ib_wc_status(WR_FLUSH_ERR) \
+ ib_wc_status(MW_BIND_ERR) \
+ ib_wc_status(BAD_RESP_ERR) \
+ ib_wc_status(LOC_ACCESS_ERR) \
+ ib_wc_status(REM_INV_REQ_ERR) \
+ ib_wc_status(REM_ACCESS_ERR) \
+ ib_wc_status(REM_OP_ERR) \
+ ib_wc_status(RETRY_EXC_ERR) \
+ ib_wc_status(RNR_RETRY_EXC_ERR) \
+ ib_wc_status(LOC_RDD_VIOL_ERR) \
+ ib_wc_status(REM_INV_RD_REQ_ERR) \
+ ib_wc_status(REM_ABORT_ERR) \
+ ib_wc_status(INV_EECN_ERR) \
+ ib_wc_status(INV_EEC_STATE_ERR) \
+ ib_wc_status(FATAL_ERR) \
+ ib_wc_status(RESP_TIMEOUT_ERR) \
+ ib_wc_status_end(GENERAL_ERR)
+
+#undef ib_wc_status
+#undef ib_wc_status_end
+
+#define ib_wc_status(x) TRACE_DEFINE_ENUM(IB_WC_##x);
+#define ib_wc_status_end(x) TRACE_DEFINE_ENUM(IB_WC_##x);
+
+IB_WC_STATUS_LIST
+
+#undef ib_wc_status
+#undef ib_wc_status_end
+
+#define ib_wc_status(x) { IB_WC_##x, #x },
+#define ib_wc_status_end(x) { IB_WC_##x, #x }
+
+#define rdma_show_wc_status(x) \
+ __print_symbolic(x, IB_WC_STATUS_LIST)
+
+/*
+ * enum ib_cm_event_type, from include/rdma/ib_cm.h
+ */
+#define IB_CM_EVENT_LIST \
+ ib_cm_event(REQ_ERROR) \
+ ib_cm_event(REQ_RECEIVED) \
+ ib_cm_event(REP_ERROR) \
+ ib_cm_event(REP_RECEIVED) \
+ ib_cm_event(RTU_RECEIVED) \
+ ib_cm_event(USER_ESTABLISHED) \
+ ib_cm_event(DREQ_ERROR) \
+ ib_cm_event(DREQ_RECEIVED) \
+ ib_cm_event(DREP_RECEIVED) \
+ ib_cm_event(TIMEWAIT_EXIT) \
+ ib_cm_event(MRA_RECEIVED) \
+ ib_cm_event(REJ_RECEIVED) \
+ ib_cm_event(LAP_ERROR) \
+ ib_cm_event(LAP_RECEIVED) \
+ ib_cm_event(APR_RECEIVED) \
+ ib_cm_event(SIDR_REQ_ERROR) \
+ ib_cm_event(SIDR_REQ_RECEIVED) \
+ ib_cm_event_end(SIDR_REP_RECEIVED)
+
+#undef ib_cm_event
+#undef ib_cm_event_end
+
+#define ib_cm_event(x) TRACE_DEFINE_ENUM(IB_CM_##x);
+#define ib_cm_event_end(x) TRACE_DEFINE_ENUM(IB_CM_##x);
+
+IB_CM_EVENT_LIST
+
+#undef ib_cm_event
+#undef ib_cm_event_end
+
+#define ib_cm_event(x) { IB_CM_##x, #x },
+#define ib_cm_event_end(x) { IB_CM_##x, #x }
+
+#define rdma_show_ib_cm_event(x) \
+ __print_symbolic(x, IB_CM_EVENT_LIST)
+
+/*
+ * enum rdma_cm_event_type, from include/rdma/rdma_cm.h
+ */
+#define RDMA_CM_EVENT_LIST \
+ rdma_cm_event(ADDR_RESOLVED) \
+ rdma_cm_event(ADDR_ERROR) \
+ rdma_cm_event(ROUTE_RESOLVED) \
+ rdma_cm_event(ROUTE_ERROR) \
+ rdma_cm_event(CONNECT_REQUEST) \
+ rdma_cm_event(CONNECT_RESPONSE) \
+ rdma_cm_event(CONNECT_ERROR) \
+ rdma_cm_event(UNREACHABLE) \
+ rdma_cm_event(REJECTED) \
+ rdma_cm_event(ESTABLISHED) \
+ rdma_cm_event(DISCONNECTED) \
+ rdma_cm_event(DEVICE_REMOVAL) \
+ rdma_cm_event(MULTICAST_JOIN) \
+ rdma_cm_event(MULTICAST_ERROR) \
+ rdma_cm_event(ADDR_CHANGE) \
+ rdma_cm_event_end(TIMEWAIT_EXIT)
+
+#undef rdma_cm_event
+#undef rdma_cm_event_end
+
+#define rdma_cm_event(x) TRACE_DEFINE_ENUM(RDMA_CM_EVENT_##x);
+#define rdma_cm_event_end(x) TRACE_DEFINE_ENUM(RDMA_CM_EVENT_##x);
+
+RDMA_CM_EVENT_LIST
+
+#undef rdma_cm_event
+#undef rdma_cm_event_end
+
+#define rdma_cm_event(x) { RDMA_CM_EVENT_##x, #x },
+#define rdma_cm_event_end(x) { RDMA_CM_EVENT_##x, #x }
+
+#define rdma_show_cm_event(x) \
+ __print_symbolic(x, RDMA_CM_EVENT_LIST)
diff --git a/include/trace/misc/sunrpc.h b/include/trace/misc/sunrpc.h
new file mode 100644
index 000000000000..588557d07ea8
--- /dev/null
+++ b/include/trace/misc/sunrpc.h
@@ -0,0 +1,18 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (c) 2021 Oracle and/or its affiliates.
+ *
+ * Common types and format specifiers for sunrpc.
+ */
+
+#if !defined(_TRACE_SUNRPC_BASE_H)
+#define _TRACE_SUNRPC_BASE_H
+
+#include <linux/tracepoint.h>
+
+#define SUNRPC_TRACE_PID_SPECIFIER "%08x"
+#define SUNRPC_TRACE_CLID_SPECIFIER "%08x"
+#define SUNRPC_TRACE_TASK_SPECIFIER \
+ "task:" SUNRPC_TRACE_PID_SPECIFIER "@" SUNRPC_TRACE_CLID_SPECIFIER
+
+#endif /* _TRACE_SUNRPC_BASE_H */
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 201dc77ebbd7..d5d2183730b9 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -3109,6 +3109,10 @@ union bpf_attr {
* **BPF_FIB_LOOKUP_DIRECT**
* Do a direct table lookup vs full lookup using FIB
* rules.
+ * **BPF_FIB_LOOKUP_TBID**
+ * Used with BPF_FIB_LOOKUP_DIRECT.
+ * Use the routing table ID present in *params*->tbid
+ * for the fib lookup.
* **BPF_FIB_LOOKUP_OUTPUT**
* Perform lookup from an egress perspective (default is
* ingress).
@@ -3117,6 +3121,11 @@ union bpf_attr {
* and *params*->smac will not be set as output. A common
* use case is to call **bpf_redirect_neigh**\ () after
* doing **bpf_fib_lookup**\ ().
+ * **BPF_FIB_LOOKUP_SRC**
+ * Derive and set source IP addr in *params*->ipv{4,6}_src
+ * for the nexthop. If the src addr cannot be derived,
+ * **BPF_FIB_LKUP_RET_NO_SRC_ADDR** is returned. In this
+ * case, *params*->dmac and *params*->smac are not set either.
*
* *ctx* is either **struct xdp_md** for XDP programs or
* **struct sk_buff** tc cls_act programs.
@@ -6687,6 +6696,8 @@ enum {
BPF_FIB_LOOKUP_DIRECT = (1U << 0),
BPF_FIB_LOOKUP_OUTPUT = (1U << 1),
BPF_FIB_LOOKUP_SKIP_NEIGH = (1U << 2),
+ BPF_FIB_LOOKUP_TBID = (1U << 3),
+ BPF_FIB_LOOKUP_SRC = (1U << 4),
};

enum {
@@ -6699,6 +6710,7 @@ enum {
BPF_FIB_LKUP_RET_UNSUPP_LWT, /* fwd requires encapsulation */
BPF_FIB_LKUP_RET_NO_NEIGH, /* no neighbor entry for nh */
BPF_FIB_LKUP_RET_FRAG_NEEDED, /* fragmentation required to fwd */
+ BPF_FIB_LKUP_RET_NO_SRC_ADDR, /* failed to derive IP src addr */
};

struct bpf_fib_lookup {
@@ -6733,6 +6745,9 @@ struct bpf_fib_lookup {
__u32 rt_metric;
};

+ /* input: source address to consider for lookup
+ * output: source address result from lookup
+ */
union {
__be32 ipv4_src;
__u32 ipv6_src[4]; /* in6_addr; network order */
@@ -6747,9 +6762,19 @@ struct bpf_fib_lookup {
__u32 ipv6_dst[4]; /* in6_addr; network order */
};

- /* output */
- __be16 h_vlan_proto;
- __be16 h_vlan_TCI;
+ union {
+ struct {
+ /* output */
+ __be16 h_vlan_proto;
+ __be16 h_vlan_TCI;
+ };
+ /* input: when accompanied with the
+ * 'BPF_FIB_LOOKUP_DIRECT | BPF_FIB_LOOKUP_TBID` flags, a
+ * specific routing table to use for the fib lookup.
+ */
+ __u32 tbid;
+ };
+
__u8 smac[6]; /* ETH_ALEN */
__u8 dmac[6]; /* ETH_ALEN */
};
diff --git a/include/uapi/linux/in6.h b/include/uapi/linux/in6.h
index c4c53a9ab959..ff8d21f9e95b 100644
--- a/include/uapi/linux/in6.h
+++ b/include/uapi/linux/in6.h
@@ -145,7 +145,7 @@ struct in6_flowlabel_req {
#define IPV6_TLV_PADN 1
#define IPV6_TLV_ROUTERALERT 5
#define IPV6_TLV_CALIPSO 7 /* RFC 5570 */
-#define IPV6_TLV_IOAM 49 /* TEMPORARY IANA allocation for IOAM */
+#define IPV6_TLV_IOAM 49 /* RFC 9486 */
#define IPV6_TLV_JUMBO 194
#define IPV6_TLV_HAO 201 /* home address option */

diff --git a/lib/nlattr.c b/lib/nlattr.c
index dffd60e4065f..86344df0ccf7 100644
--- a/lib/nlattr.c
+++ b/lib/nlattr.c
@@ -30,6 +30,8 @@ static const u8 nla_attr_len[NLA_TYPE_MAX+1] = {
[NLA_S16] = sizeof(s16),
[NLA_S32] = sizeof(s32),
[NLA_S64] = sizeof(s64),
+ [NLA_BE16] = sizeof(__be16),
+ [NLA_BE32] = sizeof(__be32),
};

static const u8 nla_attr_minlen[NLA_TYPE_MAX+1] = {
@@ -43,6 +45,8 @@ static const u8 nla_attr_minlen[NLA_TYPE_MAX+1] = {
[NLA_S16] = sizeof(s16),
[NLA_S32] = sizeof(s32),
[NLA_S64] = sizeof(s64),
+ [NLA_BE16] = sizeof(__be16),
+ [NLA_BE32] = sizeof(__be32),
};

/*
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 59577946735b..9736e762184b 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -37,6 +37,7 @@
#include <linux/page_owner.h>
#include <linux/sched/sysctl.h>
#include <linux/memory-tiers.h>
+#include <linux/compat.h>

#include <asm/tlb.h>
#include <asm/pgalloc.h>
@@ -607,6 +608,9 @@ static unsigned long __thp_get_unmapped_area(struct file *filp,
loff_t off_align = round_up(off, size);
unsigned long len_pad, ret;

+ if (IS_ENABLED(CONFIG_32BIT) || in_compat_syscall())
+ return 0;
+
if (off_end <= off_align || (off_end - off_align) < size)
return 0;

diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c
index 6a1db678d032..a8932d449eb6 100644
--- a/net/bluetooth/hci_core.c
+++ b/net/bluetooth/hci_core.c
@@ -1049,6 +1049,7 @@ static void hci_error_reset(struct work_struct *work)
{
struct hci_dev *hdev = container_of(work, struct hci_dev, error_reset);

+ hci_dev_hold(hdev);
BT_DBG("%s", hdev->name);

if (hdev->hw_error)
@@ -1056,10 +1057,10 @@ static void hci_error_reset(struct work_struct *work)
else
bt_dev_err(hdev, "hardware error 0x%2.2x", hdev->hw_error_code);

- if (hci_dev_do_close(hdev))
- return;
+ if (!hci_dev_do_close(hdev))
+ hci_dev_do_open(hdev);

- hci_dev_do_open(hdev);
+ hci_dev_put(hdev);
}

void hci_uuids_clear(struct hci_dev *hdev)
diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c
index 56ecc5f97b91..452d839c152f 100644
--- a/net/bluetooth/hci_event.c
+++ b/net/bluetooth/hci_event.c
@@ -5282,9 +5282,12 @@ static void hci_io_capa_request_evt(struct hci_dev *hdev, void *data,
hci_dev_lock(hdev);

conn = hci_conn_hash_lookup_ba(hdev, ACL_LINK, &ev->bdaddr);
- if (!conn || !hci_conn_ssp_enabled(conn))
+ if (!conn || !hci_dev_test_flag(hdev, HCI_SSP_ENABLED))
goto unlock;

+ /* Assume remote supports SSP since it has triggered this event */
+ set_bit(HCI_CONN_SSP_ENABLED, &conn->flags);
+
hci_conn_hold(conn);

if (!hci_dev_test_flag(hdev, HCI_MGMT))
@@ -6716,6 +6719,10 @@ static void hci_le_remote_conn_param_req_evt(struct hci_dev *hdev, void *data,
return send_conn_param_neg_reply(hdev, handle,
HCI_ERROR_UNKNOWN_CONN_ID);

+ if (max > hcon->le_conn_max_interval)
+ return send_conn_param_neg_reply(hdev, handle,
+ HCI_ERROR_INVALID_LL_PARAMS);
+
if (hci_check_conn_params(min, max, latency, timeout))
return send_conn_param_neg_reply(hdev, handle,
HCI_ERROR_INVALID_LL_PARAMS);
@@ -7245,10 +7252,10 @@ static void hci_store_wake_reason(struct hci_dev *hdev, u8 event,
* keep track of the bdaddr of the connection event that woke us up.
*/
if (event == HCI_EV_CONN_REQUEST) {
- bacpy(&hdev->wake_addr, &conn_complete->bdaddr);
+ bacpy(&hdev->wake_addr, &conn_request->bdaddr);
hdev->wake_addr_type = BDADDR_BREDR;
} else if (event == HCI_EV_CONN_COMPLETE) {
- bacpy(&hdev->wake_addr, &conn_request->bdaddr);
+ bacpy(&hdev->wake_addr, &conn_complete->bdaddr);
hdev->wake_addr_type = BDADDR_BREDR;
} else if (event == HCI_EV_LE_META) {
struct hci_ev_le_meta *le_ev = (void *)skb->data;
diff --git a/net/bluetooth/hci_sync.c b/net/bluetooth/hci_sync.c
index 45d19294aa77..a33734046456 100644
--- a/net/bluetooth/hci_sync.c
+++ b/net/bluetooth/hci_sync.c
@@ -2251,8 +2251,11 @@ static int hci_le_add_accept_list_sync(struct hci_dev *hdev,

/* During suspend, only wakeable devices can be in acceptlist */
if (hdev->suspended &&
- !(params->flags & HCI_CONN_FLAG_REMOTE_WAKEUP))
+ !(params->flags & HCI_CONN_FLAG_REMOTE_WAKEUP)) {
+ hci_le_del_accept_list_sync(hdev, &params->addr,
+ params->addr_type);
return 0;
+ }

/* Select filter policy to accept all advertising */
if (*num_entries >= hdev->le_accept_list_size)
@@ -5482,7 +5485,7 @@ static int hci_inquiry_sync(struct hci_dev *hdev, u8 length)

bt_dev_dbg(hdev, "");

- if (hci_dev_test_flag(hdev, HCI_INQUIRY))
+ if (test_bit(HCI_INQUIRY, &hdev->flags))
return 0;

hci_dev_lock(hdev);
diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c
index 81f5974e5eb5..b4cba55be5ad 100644
--- a/net/bluetooth/l2cap_core.c
+++ b/net/bluetooth/l2cap_core.c
@@ -5614,7 +5614,13 @@ static inline int l2cap_conn_param_update_req(struct l2cap_conn *conn,

memset(&rsp, 0, sizeof(rsp));

- err = hci_check_conn_params(min, max, latency, to_multiplier);
+ if (max > hcon->le_conn_max_interval) {
+ BT_DBG("requested connection interval exceeds current bounds.");
+ err = -EINVAL;
+ } else {
+ err = hci_check_conn_params(min, max, latency, to_multiplier);
+ }
+
if (err)
rsp.result = cpu_to_le16(L2CAP_CONN_PARAM_REJECTED);
else
diff --git a/net/bridge/br_netfilter_hooks.c b/net/bridge/br_netfilter_hooks.c
index 202ad43e35d6..bff48d576363 100644
--- a/net/bridge/br_netfilter_hooks.c
+++ b/net/bridge/br_netfilter_hooks.c
@@ -43,6 +43,10 @@
#include <linux/sysctl.h>
#endif

+#if IS_ENABLED(CONFIG_NF_CONNTRACK)
+#include <net/netfilter/nf_conntrack_core.h>
+#endif
+
static unsigned int brnf_net_id __read_mostly;

struct brnf_net {
@@ -553,6 +557,90 @@ static unsigned int br_nf_pre_routing(void *priv,
return NF_STOLEN;
}

+#if IS_ENABLED(CONFIG_NF_CONNTRACK)
+/* conntracks' nf_confirm logic cannot handle cloned skbs referencing
+ * the same nf_conn entry, which will happen for multicast (broadcast)
+ * Frames on bridges.
+ *
+ * Example:
+ * macvlan0
+ * br0
+ * ethX ethY
+ *
+ * ethX (or Y) receives multicast or broadcast packet containing
+ * an IP packet, not yet in conntrack table.
+ *
+ * 1. skb passes through bridge and fake-ip (br_netfilter)Prerouting.
+ * -> skb->_nfct now references a unconfirmed entry
+ * 2. skb is broad/mcast packet. bridge now passes clones out on each bridge
+ * interface.
+ * 3. skb gets passed up the stack.
+ * 4. In macvlan case, macvlan driver retains clone(s) of the mcast skb
+ * and schedules a work queue to send them out on the lower devices.
+ *
+ * The clone skb->_nfct is not a copy, it is the same entry as the
+ * original skb. The macvlan rx handler then returns RX_HANDLER_PASS.
+ * 5. Normal conntrack hooks (in NF_INET_LOCAL_IN) confirm the orig skb.
+ *
+ * The Macvlan broadcast worker and normal confirm path will race.
+ *
+ * This race will not happen if step 2 already confirmed a clone. In that
+ * case later steps perform skb_clone() with skb->_nfct already confirmed (in
+ * hash table). This works fine.
+ *
+ * But such confirmation won't happen when eb/ip/nftables rules dropped the
+ * packets before they reached the nf_confirm step in postrouting.
+ *
+ * Work around this problem by explicit confirmation of the entry at
+ * LOCAL_IN time, before upper layer has a chance to clone the unconfirmed
+ * entry.
+ *
+ */
+static unsigned int br_nf_local_in(void *priv,
+ struct sk_buff *skb,
+ const struct nf_hook_state *state)
+{
+ struct nf_conntrack *nfct = skb_nfct(skb);
+ const struct nf_ct_hook *ct_hook;
+ struct nf_conn *ct;
+ int ret;
+
+ if (!nfct || skb->pkt_type == PACKET_HOST)
+ return NF_ACCEPT;
+
+ ct = container_of(nfct, struct nf_conn, ct_general);
+ if (likely(nf_ct_is_confirmed(ct)))
+ return NF_ACCEPT;
+
+ WARN_ON_ONCE(skb_shared(skb));
+ WARN_ON_ONCE(refcount_read(&nfct->use) != 1);
+
+ /* We can't call nf_confirm here, it would create a dependency
+ * on nf_conntrack module.
+ */
+ ct_hook = rcu_dereference(nf_ct_hook);
+ if (!ct_hook) {
+ skb->_nfct = 0ul;
+ nf_conntrack_put(nfct);
+ return NF_ACCEPT;
+ }
+
+ nf_bridge_pull_encap_header(skb);
+ ret = ct_hook->confirm(skb);
+ switch (ret & NF_VERDICT_MASK) {
+ case NF_STOLEN:
+ return NF_STOLEN;
+ default:
+ nf_bridge_push_encap_header(skb);
+ break;
+ }
+
+ ct = container_of(nfct, struct nf_conn, ct_general);
+ WARN_ON_ONCE(!nf_ct_is_confirmed(ct));
+
+ return ret;
+}
+#endif

/* PF_BRIDGE/FORWARD *************************************************/
static int br_nf_forward_finish(struct net *net, struct sock *sk, struct sk_buff *skb)
@@ -962,6 +1050,14 @@ static const struct nf_hook_ops br_nf_ops[] = {
.hooknum = NF_BR_PRE_ROUTING,
.priority = NF_BR_PRI_BRNF,
},
+#if IS_ENABLED(CONFIG_NF_CONNTRACK)
+ {
+ .hook = br_nf_local_in,
+ .pf = NFPROTO_BRIDGE,
+ .hooknum = NF_BR_LOCAL_IN,
+ .priority = NF_BR_PRI_LAST,
+ },
+#endif
{
.hook = br_nf_forward_ip,
.pf = NFPROTO_BRIDGE,
diff --git a/net/bridge/netfilter/nf_conntrack_bridge.c b/net/bridge/netfilter/nf_conntrack_bridge.c
index 06d94b2c6b5d..c7c27ada6704 100644
--- a/net/bridge/netfilter/nf_conntrack_bridge.c
+++ b/net/bridge/netfilter/nf_conntrack_bridge.c
@@ -291,6 +291,30 @@ static unsigned int nf_ct_bridge_pre(void *priv, struct sk_buff *skb,
return nf_conntrack_in(skb, &bridge_state);
}

+static unsigned int nf_ct_bridge_in(void *priv, struct sk_buff *skb,
+ const struct nf_hook_state *state)
+{
+ enum ip_conntrack_info ctinfo;
+ struct nf_conn *ct;
+
+ if (skb->pkt_type == PACKET_HOST)
+ return NF_ACCEPT;
+
+ /* nf_conntrack_confirm() cannot handle concurrent clones,
+ * this happens for broad/multicast frames with e.g. macvlan on top
+ * of the bridge device.
+ */
+ ct = nf_ct_get(skb, &ctinfo);
+ if (!ct || nf_ct_is_confirmed(ct) || nf_ct_is_template(ct))
+ return NF_ACCEPT;
+
+ /* let inet prerouting call conntrack again */
+ skb->_nfct = 0;
+ nf_ct_put(ct);
+
+ return NF_ACCEPT;
+}
+
static void nf_ct_bridge_frag_save(struct sk_buff *skb,
struct nf_bridge_frag_data *data)
{
@@ -415,6 +439,12 @@ static struct nf_hook_ops nf_ct_bridge_hook_ops[] __read_mostly = {
.hooknum = NF_BR_PRE_ROUTING,
.priority = NF_IP_PRI_CONNTRACK,
},
+ {
+ .hook = nf_ct_bridge_in,
+ .pf = NFPROTO_BRIDGE,
+ .hooknum = NF_BR_LOCAL_IN,
+ .priority = NF_IP_PRI_CONNTRACK_CONFIRM,
+ },
{
.hook = nf_ct_bridge_post,
.pf = NFPROTO_BRIDGE,
diff --git a/net/core/filter.c b/net/core/filter.c
index 3a6110ea4009..cb7c4651eaec 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -5752,6 +5752,12 @@ static int bpf_ipv4_fib_lookup(struct net *net, struct bpf_fib_lookup *params,
u32 tbid = l3mdev_fib_table_rcu(dev) ? : RT_TABLE_MAIN;
struct fib_table *tb;

+ if (flags & BPF_FIB_LOOKUP_TBID) {
+ tbid = params->tbid;
+ /* zero out for vlan output */
+ params->tbid = 0;
+ }
+
tb = fib_get_table(net, tbid);
if (unlikely(!tb))
return BPF_FIB_LKUP_RET_NOT_FWDED;
@@ -5803,6 +5809,9 @@ static int bpf_ipv4_fib_lookup(struct net *net, struct bpf_fib_lookup *params,
params->rt_metric = res.fi->fib_priority;
params->ifindex = dev->ifindex;

+ if (flags & BPF_FIB_LOOKUP_SRC)
+ params->ipv4_src = fib_result_prefsrc(net, &res);
+
/* xdp and cls_bpf programs are run in RCU-bh so
* rcu_read_lock_bh is not needed here
*/
@@ -5885,6 +5894,12 @@ static int bpf_ipv6_fib_lookup(struct net *net, struct bpf_fib_lookup *params,
u32 tbid = l3mdev_fib_table_rcu(dev) ? : RT_TABLE_MAIN;
struct fib6_table *tb;

+ if (flags & BPF_FIB_LOOKUP_TBID) {
+ tbid = params->tbid;
+ /* zero out for vlan output */
+ params->tbid = 0;
+ }
+
tb = ipv6_stub->fib6_get_table(net, tbid);
if (unlikely(!tb))
return BPF_FIB_LKUP_RET_NOT_FWDED;
@@ -5939,6 +5954,18 @@ static int bpf_ipv6_fib_lookup(struct net *net, struct bpf_fib_lookup *params,
params->rt_metric = res.f6i->fib6_metric;
params->ifindex = dev->ifindex;

+ if (flags & BPF_FIB_LOOKUP_SRC) {
+ if (res.f6i->fib6_prefsrc.plen) {
+ *src = res.f6i->fib6_prefsrc.addr;
+ } else {
+ err = ipv6_bpf_stub->ipv6_dev_get_saddr(net, dev,
+ &fl6.daddr, 0,
+ src);
+ if (err)
+ return BPF_FIB_LKUP_RET_NO_SRC_ADDR;
+ }
+ }
+
if (flags & BPF_FIB_LOOKUP_SKIP_NEIGH)
goto set_fwd_params;

@@ -5957,7 +5984,8 @@ static int bpf_ipv6_fib_lookup(struct net *net, struct bpf_fib_lookup *params,
#endif

#define BPF_FIB_LOOKUP_MASK (BPF_FIB_LOOKUP_DIRECT | BPF_FIB_LOOKUP_OUTPUT | \
- BPF_FIB_LOOKUP_SKIP_NEIGH)
+ BPF_FIB_LOOKUP_SKIP_NEIGH | BPF_FIB_LOOKUP_TBID | \
+ BPF_FIB_LOOKUP_SRC)

BPF_CALL_4(bpf_xdp_fib_lookup, struct xdp_buff *, ctx,
struct bpf_fib_lookup *, params, int, plen, u32, flags)
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 7cf1e42d7f93..ac379e4590f8 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -5026,10 +5026,9 @@ static int rtnl_bridge_setlink(struct sk_buff *skb, struct nlmsghdr *nlh,
struct net *net = sock_net(skb->sk);
struct ifinfomsg *ifm;
struct net_device *dev;
- struct nlattr *br_spec, *attr = NULL;
+ struct nlattr *br_spec, *attr, *br_flags_attr = NULL;
int rem, err = -EOPNOTSUPP;
u16 flags = 0;
- bool have_flags = false;

if (nlmsg_len(nlh) < sizeof(*ifm))
return -EINVAL;
@@ -5047,11 +5046,11 @@ static int rtnl_bridge_setlink(struct sk_buff *skb, struct nlmsghdr *nlh,
br_spec = nlmsg_find_attr(nlh, sizeof(struct ifinfomsg), IFLA_AF_SPEC);
if (br_spec) {
nla_for_each_nested(attr, br_spec, rem) {
- if (nla_type(attr) == IFLA_BRIDGE_FLAGS && !have_flags) {
+ if (nla_type(attr) == IFLA_BRIDGE_FLAGS && !br_flags_attr) {
if (nla_len(attr) < sizeof(flags))
return -EINVAL;

- have_flags = true;
+ br_flags_attr = attr;
flags = nla_get_u16(attr);
}

@@ -5095,8 +5094,8 @@ static int rtnl_bridge_setlink(struct sk_buff *skb, struct nlmsghdr *nlh,
}
}

- if (have_flags)
- memcpy(nla_data(attr), &flags, sizeof(flags));
+ if (br_flags_attr)
+ memcpy(nla_data(br_flags_attr), &flags, sizeof(flags));
out:
return err;
}
diff --git a/net/hsr/hsr_forward.c b/net/hsr/hsr_forward.c
index 80cdc6f6b34c..0323ab5023c6 100644
--- a/net/hsr/hsr_forward.c
+++ b/net/hsr/hsr_forward.c
@@ -83,7 +83,7 @@ static bool is_supervision_frame(struct hsr_priv *hsr, struct sk_buff *skb)
return false;

/* Get next tlv */
- total_length += sizeof(struct hsr_sup_tlv) + hsr_sup_tag->tlv.HSR_TLV_length;
+ total_length += hsr_sup_tag->tlv.HSR_TLV_length;
if (!pskb_may_pull(skb, total_length))
return false;
skb_pull(skb, total_length);
diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c
index 24961b304dad..328f9068c6a4 100644
--- a/net/ipv4/ip_tunnel.c
+++ b/net/ipv4/ip_tunnel.c
@@ -540,6 +540,20 @@ static int tnl_update_pmtu(struct net_device *dev, struct sk_buff *skb,
return 0;
}

+static void ip_tunnel_adj_headroom(struct net_device *dev, unsigned int headroom)
+{
+ /* we must cap headroom to some upperlimit, else pskb_expand_head
+ * will overflow header offsets in skb_headers_offset_update().
+ */
+ static const unsigned int max_allowed = 512;
+
+ if (headroom > max_allowed)
+ headroom = max_allowed;
+
+ if (headroom > READ_ONCE(dev->needed_headroom))
+ WRITE_ONCE(dev->needed_headroom, headroom);
+}
+
void ip_md_tunnel_xmit(struct sk_buff *skb, struct net_device *dev,
u8 proto, int tunnel_hlen)
{
@@ -614,13 +628,13 @@ void ip_md_tunnel_xmit(struct sk_buff *skb, struct net_device *dev,
}

headroom += LL_RESERVED_SPACE(rt->dst.dev) + rt->dst.header_len;
- if (headroom > READ_ONCE(dev->needed_headroom))
- WRITE_ONCE(dev->needed_headroom, headroom);
-
- if (skb_cow_head(skb, READ_ONCE(dev->needed_headroom))) {
+ if (skb_cow_head(skb, headroom)) {
ip_rt_put(rt);
goto tx_dropped;
}
+
+ ip_tunnel_adj_headroom(dev, headroom);
+
iptunnel_xmit(NULL, rt, skb, fl4.saddr, fl4.daddr, proto, tos, ttl,
df, !net_eq(tunnel->net, dev_net(dev)));
return;
@@ -800,16 +814,16 @@ void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev,

max_headroom = LL_RESERVED_SPACE(rt->dst.dev) + sizeof(struct iphdr)
+ rt->dst.header_len + ip_encap_hlen(&tunnel->encap);
- if (max_headroom > READ_ONCE(dev->needed_headroom))
- WRITE_ONCE(dev->needed_headroom, max_headroom);

- if (skb_cow_head(skb, READ_ONCE(dev->needed_headroom))) {
+ if (skb_cow_head(skb, max_headroom)) {
ip_rt_put(rt);
dev->stats.tx_dropped++;
kfree_skb(skb);
return;
}

+ ip_tunnel_adj_headroom(dev, max_headroom);
+
iptunnel_xmit(NULL, rt, skb, fl4.saddr, fl4.daddr, protocol, tos, ttl,
df, !net_eq(tunnel->net, dev_net(dev)));
return;
diff --git a/net/ipv4/netfilter/nf_reject_ipv4.c b/net/ipv4/netfilter/nf_reject_ipv4.c
index 4073762996e2..fc761915c5f6 100644
--- a/net/ipv4/netfilter/nf_reject_ipv4.c
+++ b/net/ipv4/netfilter/nf_reject_ipv4.c
@@ -279,6 +279,7 @@ void nf_send_reset(struct net *net, struct sock *sk, struct sk_buff *oldskb,
goto free_nskb;

nf_ct_attach(nskb, oldskb);
+ nf_ct_set_closing(skb_nfct(oldskb));

#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
/* If we use ip_local_out for bridged traffic, the MAC source on
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 46527b5cc8f0..1648373692a9 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -5473,9 +5473,10 @@ static int inet6_rtm_getaddr(struct sk_buff *in_skb, struct nlmsghdr *nlh,
}

addr = extract_addr(tb[IFA_ADDRESS], tb[IFA_LOCAL], &peer);
- if (!addr)
- return -EINVAL;
-
+ if (!addr) {
+ err = -EINVAL;
+ goto errout;
+ }
ifm = nlmsg_data(nlh);
if (ifm->ifa_index)
dev = dev_get_by_index(tgt_net, ifm->ifa_index);
diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
index 0b42eb8c55aa..62247621cea5 100644
--- a/net/ipv6/af_inet6.c
+++ b/net/ipv6/af_inet6.c
@@ -1077,6 +1077,7 @@ static const struct ipv6_bpf_stub ipv6_bpf_stub_impl = {
.udp6_lib_lookup = __udp6_lib_lookup,
.ipv6_setsockopt = do_ipv6_setsockopt,
.ipv6_getsockopt = do_ipv6_getsockopt,
+ .ipv6_dev_get_saddr = ipv6_dev_get_saddr,
};

static int __init inet6_init(void)
diff --git a/net/ipv6/netfilter/nf_reject_ipv6.c b/net/ipv6/netfilter/nf_reject_ipv6.c
index 433d98bbe33f..71d692728230 100644
--- a/net/ipv6/netfilter/nf_reject_ipv6.c
+++ b/net/ipv6/netfilter/nf_reject_ipv6.c
@@ -344,6 +344,7 @@ void nf_send_reset6(struct net *net, struct sock *sk, struct sk_buff *oldskb,
nf_reject_ip6_tcphdr_put(nskb, oldskb, otcph, otcplen);

nf_ct_attach(nskb, oldskb);
+ nf_ct_set_closing(skb_nfct(oldskb));

#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
/* If we use ip6_local_out for bridged traffic, the MAC source on
diff --git a/net/mctp/route.c b/net/mctp/route.c
index 256bf0b89e6c..0144d8ebdaef 100644
--- a/net/mctp/route.c
+++ b/net/mctp/route.c
@@ -888,7 +888,7 @@ int mctp_local_output(struct sock *sk, struct mctp_route *rt,
dev = dev_get_by_index_rcu(sock_net(sk), cb->ifindex);
if (!dev) {
rcu_read_unlock();
- return rc;
+ goto out_free;
}
rt->dev = __mctp_dev_get(dev);
rcu_read_unlock();
@@ -903,7 +903,8 @@ int mctp_local_output(struct sock *sk, struct mctp_route *rt,
rt->mtu = 0;

} else {
- return -EINVAL;
+ rc = -EINVAL;
+ goto out_free;
}

spin_lock_irqsave(&rt->dev->addrs_lock, flags);
@@ -966,12 +967,17 @@ int mctp_local_output(struct sock *sk, struct mctp_route *rt,
rc = mctp_do_fragment_route(rt, skb, mtu, tag);
}

+ /* route output functions consume the skb, even on error */
+ skb = NULL;
+
out_release:
if (!ext_rt)
mctp_route_release(rt);

mctp_dev_put(tmp_rt.dev);

+out_free:
+ kfree_skb(skb);
return rc;
}

diff --git a/net/mptcp/diag.c b/net/mptcp/diag.c
index e57c5f47f035..7017dd60659d 100644
--- a/net/mptcp/diag.c
+++ b/net/mptcp/diag.c
@@ -21,6 +21,9 @@ static int subflow_get_info(struct sock *sk, struct sk_buff *skb)
bool slow;
int err;

+ if (inet_sk_state_load(sk) == TCP_LISTEN)
+ return 0;
+
start = nla_nest_start_noflag(skb, INET_ULP_INFO_MPTCP);
if (!start)
return -EMSGSIZE;
@@ -65,7 +68,7 @@ static int subflow_get_info(struct sock *sk, struct sk_buff *skb)
sf->map_data_len) ||
nla_put_u32(skb, MPTCP_SUBFLOW_ATTR_FLAGS, flags) ||
nla_put_u8(skb, MPTCP_SUBFLOW_ATTR_ID_REM, sf->remote_id) ||
- nla_put_u8(skb, MPTCP_SUBFLOW_ATTR_ID_LOC, sf->local_id)) {
+ nla_put_u8(skb, MPTCP_SUBFLOW_ATTR_ID_LOC, subflow_get_local_id(sf))) {
err = -EMSGSIZE;
goto nla_failure;
}
diff --git a/net/mptcp/pm_netlink.c b/net/mptcp/pm_netlink.c
index 70a1025f093c..3328870b0c1f 100644
--- a/net/mptcp/pm_netlink.c
+++ b/net/mptcp/pm_netlink.c
@@ -407,23 +407,12 @@ void mptcp_pm_free_anno_list(struct mptcp_sock *msk)
}
}

-static bool lookup_address_in_vec(const struct mptcp_addr_info *addrs, unsigned int nr,
- const struct mptcp_addr_info *addr)
-{
- int i;
-
- for (i = 0; i < nr; i++) {
- if (addrs[i].id == addr->id)
- return true;
- }
-
- return false;
-}
-
/* Fill all the remote addresses into the array addrs[],
* and return the array size.
*/
-static unsigned int fill_remote_addresses_vec(struct mptcp_sock *msk, bool fullmesh,
+static unsigned int fill_remote_addresses_vec(struct mptcp_sock *msk,
+ struct mptcp_addr_info *local,
+ bool fullmesh,
struct mptcp_addr_info *addrs)
{
bool deny_id0 = READ_ONCE(msk->pm.remote_deny_join_id0);
@@ -446,15 +435,28 @@ static unsigned int fill_remote_addresses_vec(struct mptcp_sock *msk, bool fullm
msk->pm.subflows++;
addrs[i++] = remote;
} else {
+ DECLARE_BITMAP(unavail_id, MPTCP_PM_MAX_ADDR_ID + 1);
+
+ /* Forbid creation of new subflows matching existing
+ * ones, possibly already created by incoming ADD_ADDR
+ */
+ bitmap_zero(unavail_id, MPTCP_PM_MAX_ADDR_ID + 1);
+ mptcp_for_each_subflow(msk, subflow)
+ if (READ_ONCE(subflow->local_id) == local->id)
+ __set_bit(subflow->remote_id, unavail_id);
+
mptcp_for_each_subflow(msk, subflow) {
ssk = mptcp_subflow_tcp_sock(subflow);
remote_address((struct sock_common *)ssk, &addrs[i]);
- addrs[i].id = subflow->remote_id;
+ addrs[i].id = READ_ONCE(subflow->remote_id);
if (deny_id0 && !addrs[i].id)
continue;

- if (!lookup_address_in_vec(addrs, i, &addrs[i]) &&
- msk->pm.subflows < subflows_max) {
+ if (msk->pm.subflows < subflows_max) {
+ /* forbid creating multiple address towards
+ * this id
+ */
+ __set_bit(addrs[i].id, unavail_id);
msk->pm.subflows++;
i++;
}
@@ -603,7 +605,7 @@ static void mptcp_pm_create_subflow_or_signal_addr(struct mptcp_sock *msk)
fullmesh = !!(local->flags & MPTCP_PM_ADDR_FLAG_FULLMESH);

msk->pm.local_addr_used++;
- nr = fill_remote_addresses_vec(msk, fullmesh, addrs);
+ nr = fill_remote_addresses_vec(msk, &local->addr, fullmesh, addrs);
if (nr)
__clear_bit(local->addr.id, msk->pm.id_avail_bitmap);
spin_unlock_bh(&msk->pm.lock);
@@ -798,18 +800,18 @@ static void mptcp_pm_nl_rm_addr_or_subflow(struct mptcp_sock *msk,

mptcp_for_each_subflow_safe(msk, subflow, tmp) {
struct sock *ssk = mptcp_subflow_tcp_sock(subflow);
+ u8 remote_id = READ_ONCE(subflow->remote_id);
int how = RCV_SHUTDOWN | SEND_SHUTDOWN;
- u8 id = subflow->local_id;
+ u8 id = subflow_get_local_id(subflow);

- if (rm_type == MPTCP_MIB_RMADDR && subflow->remote_id != rm_id)
+ if (rm_type == MPTCP_MIB_RMADDR && remote_id != rm_id)
continue;
if (rm_type == MPTCP_MIB_RMSUBFLOW && !mptcp_local_id_match(msk, id, rm_id))
continue;

pr_debug(" -> %s rm_list_ids[%d]=%u local_id=%u remote_id=%u mpc_id=%u",
rm_type == MPTCP_MIB_RMADDR ? "address" : "subflow",
- i, rm_id, subflow->local_id, subflow->remote_id,
- msk->mpc_endpoint_id);
+ i, rm_id, id, remote_id, msk->mpc_endpoint_id);
spin_unlock_bh(&msk->pm.lock);
mptcp_subflow_shutdown(sk, ssk, how);

@@ -2028,7 +2030,7 @@ static int mptcp_event_add_subflow(struct sk_buff *skb, const struct sock *ssk)
if (WARN_ON_ONCE(!sf))
return -EINVAL;

- if (nla_put_u8(skb, MPTCP_ATTR_LOC_ID, sf->local_id))
+ if (nla_put_u8(skb, MPTCP_ATTR_LOC_ID, subflow_get_local_id(sf)))
return -EMSGSIZE;

if (nla_put_u8(skb, MPTCP_ATTR_REM_ID, sf->remote_id))
diff --git a/net/mptcp/pm_userspace.c b/net/mptcp/pm_userspace.c
index 631fa104617c..414ed70e7ba2 100644
--- a/net/mptcp/pm_userspace.c
+++ b/net/mptcp/pm_userspace.c
@@ -233,7 +233,7 @@ static int mptcp_userspace_pm_remove_id_zero_address(struct mptcp_sock *msk,

lock_sock(sk);
mptcp_for_each_subflow(msk, subflow) {
- if (subflow->local_id == 0) {
+ if (READ_ONCE(subflow->local_id) == 0) {
has_id_0 = true;
break;
}
@@ -489,6 +489,16 @@ int mptcp_nl_cmd_sf_destroy(struct sk_buff *skb, struct genl_info *info)
goto destroy_err;
}

+#if IS_ENABLED(CONFIG_MPTCP_IPV6)
+ if (addr_l.family == AF_INET && ipv6_addr_v4mapped(&addr_r.addr6)) {
+ ipv6_addr_set_v4mapped(addr_l.addr.s_addr, &addr_l.addr6);
+ addr_l.family = AF_INET6;
+ }
+ if (addr_r.family == AF_INET && ipv6_addr_v4mapped(&addr_l.addr6)) {
+ ipv6_addr_set_v4mapped(addr_r.addr.s_addr, &addr_r.addr6);
+ addr_r.family = AF_INET6;
+ }
+#endif
if (addr_l.family != addr_r.family) {
GENL_SET_ERR_MSG(info, "address families do not match");
err = -EINVAL;
diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
index 859b18cb8e4f..3bc21581486a 100644
--- a/net/mptcp/protocol.c
+++ b/net/mptcp/protocol.c
@@ -119,7 +119,7 @@ static int __mptcp_socket_create(struct mptcp_sock *msk)
subflow->request_mptcp = 1;

/* This is the first subflow, always with id 0 */
- subflow->local_id_valid = 1;
+ WRITE_ONCE(subflow->local_id, 0);
mptcp_sock_graft(msk->first, sk->sk_socket);

return 0;
@@ -1319,6 +1319,7 @@ static int mptcp_sendmsg_frag(struct sock *sk, struct sock *ssk,
mpext = skb_ext_find(skb, SKB_EXT_MPTCP);
if (!mptcp_skb_can_collapse_to(data_seq, skb, mpext)) {
TCP_SKB_CB(skb)->eor = 1;
+ tcp_mark_push(tcp_sk(ssk), skb);
goto alloc_skb;
}

@@ -2440,6 +2441,8 @@ static void __mptcp_close_ssk(struct sock *sk, struct sock *ssk,
need_push = (flags & MPTCP_CF_PUSH) && __mptcp_retransmit_pending_data(sk);
if (!dispose_it) {
__mptcp_subflow_disconnect(ssk, subflow, flags);
+ if (msk->subflow && ssk == msk->subflow->sk)
+ msk->subflow->state = SS_UNCONNECTED;
release_sock(ssk);

goto out;
@@ -3166,8 +3169,50 @@ static struct ipv6_pinfo *mptcp_inet6_sk(const struct sock *sk)

return (struct ipv6_pinfo *)(((u8 *)sk) + offset);
}
+
+static void mptcp_copy_ip6_options(struct sock *newsk, const struct sock *sk)
+{
+ const struct ipv6_pinfo *np = inet6_sk(sk);
+ struct ipv6_txoptions *opt;
+ struct ipv6_pinfo *newnp;
+
+ newnp = inet6_sk(newsk);
+
+ rcu_read_lock();
+ opt = rcu_dereference(np->opt);
+ if (opt) {
+ opt = ipv6_dup_options(newsk, opt);
+ if (!opt)
+ net_warn_ratelimited("%s: Failed to copy ip6 options\n", __func__);
+ }
+ RCU_INIT_POINTER(newnp->opt, opt);
+ rcu_read_unlock();
+}
#endif

+static void mptcp_copy_ip_options(struct sock *newsk, const struct sock *sk)
+{
+ struct ip_options_rcu *inet_opt, *newopt = NULL;
+ const struct inet_sock *inet = inet_sk(sk);
+ struct inet_sock *newinet;
+
+ newinet = inet_sk(newsk);
+
+ rcu_read_lock();
+ inet_opt = rcu_dereference(inet->inet_opt);
+ if (inet_opt) {
+ newopt = sock_kmalloc(newsk, sizeof(*inet_opt) +
+ inet_opt->opt.optlen, GFP_ATOMIC);
+ if (newopt)
+ memcpy(newopt, inet_opt, sizeof(*inet_opt) +
+ inet_opt->opt.optlen);
+ else
+ net_warn_ratelimited("%s: Failed to copy ip options\n", __func__);
+ }
+ RCU_INIT_POINTER(newinet->inet_opt, newopt);
+ rcu_read_unlock();
+}
+
struct sock *mptcp_sk_clone_init(const struct sock *sk,
const struct mptcp_options_received *mp_opt,
struct sock *ssk,
@@ -3188,6 +3233,13 @@ struct sock *mptcp_sk_clone_init(const struct sock *sk,

__mptcp_init_sock(nsk);

+#if IS_ENABLED(CONFIG_MPTCP_IPV6)
+ if (nsk->sk_family == AF_INET6)
+ mptcp_copy_ip6_options(nsk, sk);
+ else
+#endif
+ mptcp_copy_ip_options(nsk, sk);
+
msk = mptcp_sk(nsk);
msk->local_key = subflow_req->local_key;
msk->token = subflow_req->token;
@@ -3200,7 +3252,7 @@ struct sock *mptcp_sk_clone_init(const struct sock *sk,
msk->write_seq = subflow_req->idsn + 1;
msk->snd_nxt = msk->write_seq;
msk->snd_una = msk->write_seq;
- msk->wnd_end = msk->snd_nxt + req->rsk_rcv_wnd;
+ msk->wnd_end = msk->snd_nxt + tcp_sk(ssk)->snd_wnd;
msk->setsockopt_seq = mptcp_sk(sk)->setsockopt_seq;

if (mp_opt->suboptions & OPTIONS_MPTCP_MPC) {
diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h
index b09220521323..2bc37773e780 100644
--- a/net/mptcp/protocol.h
+++ b/net/mptcp/protocol.h
@@ -475,7 +475,6 @@ struct mptcp_subflow_context {
can_ack : 1, /* only after processing the remote a key */
disposable : 1, /* ctx can be free at ulp release time */
stale : 1, /* unable to snd/rcv data, do not use for xmit */
- local_id_valid : 1, /* local_id is correctly initialized */
valid_csum_seen : 1; /* at least one csum validated */
enum mptcp_data_avail data_avail;
u32 remote_nonce;
@@ -483,7 +482,7 @@ struct mptcp_subflow_context {
u32 local_nonce;
u32 remote_token;
u8 hmac[MPTCPOPT_HMAC_LEN];
- u8 local_id;
+ s16 local_id; /* if negative not initialized yet */
u8 remote_id;
u8 reset_seen:1;
u8 reset_transient:1;
@@ -529,6 +528,7 @@ mptcp_subflow_ctx_reset(struct mptcp_subflow_context *subflow)
{
memset(&subflow->reset, 0, sizeof(subflow->reset));
subflow->request_mptcp = 1;
+ WRITE_ONCE(subflow->local_id, -1);
}

static inline u64
@@ -909,6 +909,15 @@ bool mptcp_pm_rm_addr_signal(struct mptcp_sock *msk, unsigned int remaining,
int mptcp_pm_get_local_id(struct mptcp_sock *msk, struct sock_common *skc);
int mptcp_userspace_pm_get_local_id(struct mptcp_sock *msk, struct mptcp_addr_info *skc);

+static inline u8 subflow_get_local_id(const struct mptcp_subflow_context *subflow)
+{
+ int local_id = READ_ONCE(subflow->local_id);
+
+ if (local_id < 0)
+ return 0;
+ return local_id;
+}
+
void __init mptcp_pm_nl_init(void);
void mptcp_pm_nl_work(struct mptcp_sock *msk);
void mptcp_pm_nl_rm_subflow_received(struct mptcp_sock *msk,
diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c
index 45d20e20cfc0..891c2f4fed08 100644
--- a/net/mptcp/subflow.c
+++ b/net/mptcp/subflow.c
@@ -446,7 +446,7 @@ static void subflow_finish_connect(struct sock *sk, const struct sk_buff *skb)
subflow->backup = mp_opt.backup;
subflow->thmac = mp_opt.thmac;
subflow->remote_nonce = mp_opt.nonce;
- subflow->remote_id = mp_opt.join_id;
+ WRITE_ONCE(subflow->remote_id, mp_opt.join_id);
pr_debug("subflow=%p, thmac=%llu, remote_nonce=%u backup=%d",
subflow, subflow->thmac, subflow->remote_nonce,
subflow->backup);
@@ -489,8 +489,8 @@ static void subflow_finish_connect(struct sock *sk, const struct sk_buff *skb)

static void subflow_set_local_id(struct mptcp_subflow_context *subflow, int local_id)
{
- subflow->local_id = local_id;
- subflow->local_id_valid = 1;
+ WARN_ON_ONCE(local_id < 0 || local_id > 255);
+ WRITE_ONCE(subflow->local_id, local_id);
}

static int subflow_chk_local_id(struct sock *sk)
@@ -499,7 +499,7 @@ static int subflow_chk_local_id(struct sock *sk)
struct mptcp_sock *msk = mptcp_sk(subflow->conn);
int err;

- if (likely(subflow->local_id_valid))
+ if (likely(subflow->local_id >= 0))
return 0;

err = mptcp_pm_get_local_id(msk, (struct sock_common *)sk);
@@ -1477,7 +1477,7 @@ int __mptcp_subflow_connect(struct sock *sk, const struct mptcp_addr_info *loc,
pr_debug("msk=%p remote_token=%u local_id=%d remote_id=%d", msk,
remote_token, local_id, remote_id);
subflow->remote_token = remote_token;
- subflow->remote_id = remote_id;
+ WRITE_ONCE(subflow->remote_id, remote_id);
subflow->request_join = 1;
subflow->request_bkup = !!(flags & MPTCP_PM_ADDR_FLAG_BACKUP);
mptcp_info2sockaddr(remote, &addr, ssk->sk_family);
@@ -1630,6 +1630,7 @@ static struct mptcp_subflow_context *subflow_create_ctx(struct sock *sk,
pr_debug("subflow=%p", ctx);

ctx->tcp_sock = sk;
+ WRITE_ONCE(ctx->local_id, -1);

return ctx;
}
@@ -1867,13 +1868,13 @@ static void subflow_ulp_clone(const struct request_sock *req,
new_ctx->idsn = subflow_req->idsn;

/* this is the first subflow, id is always 0 */
- new_ctx->local_id_valid = 1;
+ subflow_set_local_id(new_ctx, 0);
} else if (subflow_req->mp_join) {
new_ctx->ssn_offset = subflow_req->ssn_offset;
new_ctx->mp_join = 1;
new_ctx->fully_established = 1;
new_ctx->backup = subflow_req->backup;
- new_ctx->remote_id = subflow_req->remote_id;
+ WRITE_ONCE(new_ctx->remote_id, subflow_req->remote_id);
new_ctx->token = subflow_req->token;
new_ctx->thmac = subflow_req->thmac;

diff --git a/net/netfilter/core.c b/net/netfilter/core.c
index 55a7f72d547c..edf92074221e 100644
--- a/net/netfilter/core.c
+++ b/net/netfilter/core.c
@@ -707,6 +707,22 @@ void nf_conntrack_destroy(struct nf_conntrack *nfct)
}
EXPORT_SYMBOL(nf_conntrack_destroy);

+void nf_ct_set_closing(struct nf_conntrack *nfct)
+{
+ const struct nf_ct_hook *ct_hook;
+
+ if (!nfct)
+ return;
+
+ rcu_read_lock();
+ ct_hook = rcu_dereference(nf_ct_hook);
+ if (ct_hook)
+ ct_hook->set_closing(nfct);
+
+ rcu_read_unlock();
+}
+EXPORT_SYMBOL_GPL(nf_ct_set_closing);
+
bool nf_ct_get_tuple_skb(struct nf_conntrack_tuple *dst_tuple,
const struct sk_buff *skb)
{
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index 796026296609..024f93fc8c0b 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -2772,11 +2772,24 @@ int nf_conntrack_init_start(void)
return ret;
}

+static void nf_conntrack_set_closing(struct nf_conntrack *nfct)
+{
+ struct nf_conn *ct = nf_ct_to_nf_conn(nfct);
+
+ switch (nf_ct_protonum(ct)) {
+ case IPPROTO_TCP:
+ nf_conntrack_tcp_set_closing(ct);
+ break;
+ }
+}
+
static const struct nf_ct_hook nf_conntrack_hook = {
.update = nf_conntrack_update,
.destroy = nf_ct_destroy,
.get_tuple_skb = nf_conntrack_get_tuple_skb,
.attach = nf_conntrack_attach,
+ .set_closing = nf_conntrack_set_closing,
+ .confirm = __nf_conntrack_confirm,
};

void nf_conntrack_init_end(void)
diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c
index e0092bf273fd..9480e638e5d1 100644
--- a/net/netfilter/nf_conntrack_proto_tcp.c
+++ b/net/netfilter/nf_conntrack_proto_tcp.c
@@ -913,6 +913,41 @@ static bool tcp_can_early_drop(const struct nf_conn *ct)
return false;
}

+void nf_conntrack_tcp_set_closing(struct nf_conn *ct)
+{
+ enum tcp_conntrack old_state;
+ const unsigned int *timeouts;
+ u32 timeout;
+
+ if (!nf_ct_is_confirmed(ct))
+ return;
+
+ spin_lock_bh(&ct->lock);
+ old_state = ct->proto.tcp.state;
+ ct->proto.tcp.state = TCP_CONNTRACK_CLOSE;
+
+ if (old_state == TCP_CONNTRACK_CLOSE ||
+ test_bit(IPS_FIXED_TIMEOUT_BIT, &ct->status)) {
+ spin_unlock_bh(&ct->lock);
+ return;
+ }
+
+ timeouts = nf_ct_timeout_lookup(ct);
+ if (!timeouts) {
+ const struct nf_tcp_net *tn;
+
+ tn = nf_tcp_pernet(nf_ct_net(ct));
+ timeouts = tn->timeouts;
+ }
+
+ timeout = timeouts[TCP_CONNTRACK_CLOSE];
+ WRITE_ONCE(ct->timeout, timeout + nfct_time_stamp);
+
+ spin_unlock_bh(&ct->lock);
+
+ nf_conntrack_event_cache(IPCT_PROTOINFO, ct);
+}
+
static void nf_ct_tcp_state_reset(struct ip_ct_tcp_state *state)
{
state->td_end = 0;
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index e21ec3ad8093..d3ba947f4376 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -4752,6 +4752,9 @@ static int nf_tables_newset(struct sk_buff *skb, const struct nfnl_info *info,
if (!(flags & NFT_SET_TIMEOUT))
return -EINVAL;

+ if (flags & NFT_SET_ANONYMOUS)
+ return -EOPNOTSUPP;
+
err = nf_msecs_to_jiffies64(nla[NFTA_SET_TIMEOUT], &desc.timeout);
if (err)
return err;
@@ -4760,6 +4763,10 @@ static int nf_tables_newset(struct sk_buff *skb, const struct nfnl_info *info,
if (nla[NFTA_SET_GC_INTERVAL] != NULL) {
if (!(flags & NFT_SET_TIMEOUT))
return -EINVAL;
+
+ if (flags & NFT_SET_ANONYMOUS)
+ return -EOPNOTSUPP;
+
desc.gc_int = ntohl(nla_get_be32(nla[NFTA_SET_GC_INTERVAL]));
}

diff --git a/net/netfilter/nft_compat.c b/net/netfilter/nft_compat.c
index e1623fbf3654..e4b8c02c5e6a 100644
--- a/net/netfilter/nft_compat.c
+++ b/net/netfilter/nft_compat.c
@@ -358,10 +358,20 @@ static int nft_target_validate(const struct nft_ctx *ctx,

if (ctx->family != NFPROTO_IPV4 &&
ctx->family != NFPROTO_IPV6 &&
+ ctx->family != NFPROTO_INET &&
ctx->family != NFPROTO_BRIDGE &&
ctx->family != NFPROTO_ARP)
return -EOPNOTSUPP;

+ ret = nft_chain_validate_hooks(ctx->chain,
+ (1 << NF_INET_PRE_ROUTING) |
+ (1 << NF_INET_LOCAL_IN) |
+ (1 << NF_INET_FORWARD) |
+ (1 << NF_INET_LOCAL_OUT) |
+ (1 << NF_INET_POST_ROUTING));
+ if (ret)
+ return ret;
+
if (nft_is_base_chain(ctx->chain)) {
const struct nft_base_chain *basechain =
nft_base_chain(ctx->chain);
@@ -607,10 +617,20 @@ static int nft_match_validate(const struct nft_ctx *ctx,

if (ctx->family != NFPROTO_IPV4 &&
ctx->family != NFPROTO_IPV6 &&
+ ctx->family != NFPROTO_INET &&
ctx->family != NFPROTO_BRIDGE &&
ctx->family != NFPROTO_ARP)
return -EOPNOTSUPP;

+ ret = nft_chain_validate_hooks(ctx->chain,
+ (1 << NF_INET_PRE_ROUTING) |
+ (1 << NF_INET_LOCAL_IN) |
+ (1 << NF_INET_FORWARD) |
+ (1 << NF_INET_LOCAL_OUT) |
+ (1 << NF_INET_POST_ROUTING));
+ if (ret)
+ return ret;
+
if (nft_is_base_chain(ctx->chain)) {
const struct nft_base_chain *basechain =
nft_base_chain(ctx->chain);
diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index 6857a4965fe8..e9b81cba1e2b 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -167,7 +167,7 @@ static inline u32 netlink_group_mask(u32 group)
static struct sk_buff *netlink_to_full_skb(const struct sk_buff *skb,
gfp_t gfp_mask)
{
- unsigned int len = skb_end_offset(skb);
+ unsigned int len = skb->len;
struct sk_buff *new;

new = alloc_skb(len, gfp_mask);
diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c
index 93e1bfa72d79..2bd27b77769c 100644
--- a/net/tls/tls_sw.c
+++ b/net/tls/tls_sw.c
@@ -273,6 +273,8 @@ static int tls_do_decryption(struct sock *sk,
return 0;

ret = crypto_wait_req(ret, &ctx->async_wait);
+ } else if (darg->async) {
+ atomic_dec(&ctx->decrypt_pending);
}
darg->async = false;

@@ -2021,6 +2023,7 @@ int tls_sw_recvmsg(struct sock *sk,
struct strp_msg *rxm;
struct tls_msg *tlm;
ssize_t copied = 0;
+ ssize_t peeked = 0;
bool async = false;
int target, err;
bool is_kvec = iov_iter_is_kvec(&msg->msg_iter);
@@ -2168,8 +2171,10 @@ int tls_sw_recvmsg(struct sock *sk,
if (err < 0)
goto put_on_rx_list_err;

- if (is_peek)
+ if (is_peek) {
+ peeked += chunk;
goto put_on_rx_list;
+ }

if (partially_consumed) {
rxm->offset += chunk;
@@ -2208,8 +2213,8 @@ int tls_sw_recvmsg(struct sock *sk,

/* Drain records from the rx_list & copy if required */
if (is_peek || is_kvec)
- err = process_rx_list(ctx, msg, &control, copied,
- decrypted, is_peek, NULL);
+ err = process_rx_list(ctx, msg, &control, copied + peeked,
+ decrypted - peeked, is_peek, NULL);
else
err = process_rx_list(ctx, msg, &control, 0,
async_copy_bytes, is_peek, NULL);
diff --git a/net/unix/garbage.c b/net/unix/garbage.c
index 767b338a7a2d..ab2c83d58b62 100644
--- a/net/unix/garbage.c
+++ b/net/unix/garbage.c
@@ -284,9 +284,17 @@ void unix_gc(void)
* which are creating the cycle(s).
*/
skb_queue_head_init(&hitlist);
- list_for_each_entry(u, &gc_candidates, link)
+ list_for_each_entry(u, &gc_candidates, link) {
scan_children(&u->sk, inc_inflight, &hitlist);

+#if IS_ENABLED(CONFIG_AF_UNIX_OOB)
+ if (u->oob_skb) {
+ kfree_skb(u->oob_skb);
+ u->oob_skb = NULL;
+ }
+#endif
+ }
+
/* not_cycle_list contains those sockets which do not make up a
* cycle. Restore these to the inflight list.
*/
@@ -314,17 +322,6 @@ void unix_gc(void)
/* Here we are. Hitlist is filled. Die. */
__skb_queue_purge(&hitlist);

-#if IS_ENABLED(CONFIG_AF_UNIX_OOB)
- list_for_each_entry_safe(u, next, &gc_candidates, link) {
- struct sk_buff *skb = u->oob_skb;
-
- if (skb) {
- u->oob_skb = NULL;
- kfree_skb(skb);
- }
- }
-#endif
-
spin_lock(&unix_gc_lock);

/* There could be io_uring registered files, just push them back to
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index c259d3227a9e..1a3bd554e258 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -4137,6 +4137,8 @@ static int nl80211_set_interface(struct sk_buff *skb, struct genl_info *info)

if (ntype != NL80211_IFTYPE_MESH_POINT)
return -EINVAL;
+ if (otype != NL80211_IFTYPE_MESH_POINT)
+ return -EINVAL;
if (netif_running(dev))
return -EBUSY;

diff --git a/security/landlock/fs.c b/security/landlock/fs.c
index 64ed7665455f..d328965f32f7 100644
--- a/security/landlock/fs.c
+++ b/security/landlock/fs.c
@@ -824,8 +824,8 @@ static int current_check_refer_path(struct dentry *const old_dentry,
bool allow_parent1, allow_parent2;
access_mask_t access_request_parent1, access_request_parent2;
struct path mnt_dir;
- layer_mask_t layer_masks_parent1[LANDLOCK_NUM_ACCESS_FS],
- layer_masks_parent2[LANDLOCK_NUM_ACCESS_FS];
+ layer_mask_t layer_masks_parent1[LANDLOCK_NUM_ACCESS_FS] = {},
+ layer_masks_parent2[LANDLOCK_NUM_ACCESS_FS] = {};

if (!dom)
return 0;
diff --git a/security/tomoyo/common.c b/security/tomoyo/common.c
index f4cd9b58b205..a7af085550b2 100644
--- a/security/tomoyo/common.c
+++ b/security/tomoyo/common.c
@@ -2648,13 +2648,14 @@ ssize_t tomoyo_write_control(struct tomoyo_io_buffer *head,
{
int error = buffer_len;
size_t avail_len = buffer_len;
- char *cp0 = head->write_buf;
+ char *cp0;
int idx;

if (!head->write)
return -EINVAL;
if (mutex_lock_interruptible(&head->io_sem))
return -EINTR;
+ cp0 = head->write_buf;
head->read_user_buf_avail = 0;
idx = tomoyo_read_lock();
/* Read a line and dispatch it to the policy handler. */
diff --git a/sound/core/Makefile b/sound/core/Makefile
index 2762f03d9b7b..a7a1590b2952 100644
--- a/sound/core/Makefile
+++ b/sound/core/Makefile
@@ -30,7 +30,6 @@ snd-ctl-led-objs := control_led.o
snd-rawmidi-objs := rawmidi.o
snd-timer-objs := timer.o
snd-hrtimer-objs := hrtimer.o
-snd-rtctimer-objs := rtctimer.o
snd-hwdep-objs := hwdep.o
snd-seq-device-objs := seq_device.o

diff --git a/sound/firewire/amdtp-stream.c b/sound/firewire/amdtp-stream.c
index 9be2260e4ca2..f8b644cb9157 100644
--- a/sound/firewire/amdtp-stream.c
+++ b/sound/firewire/amdtp-stream.c
@@ -934,7 +934,7 @@ static int generate_device_pkt_descs(struct amdtp_stream *s,
// to the reason.
unsigned int safe_cycle = increment_ohci_cycle_count(next_cycle,
IR_JUMBO_PAYLOAD_MAX_SKIP_CYCLES);
- lost = (compare_ohci_cycle_count(safe_cycle, cycle) > 0);
+ lost = (compare_ohci_cycle_count(safe_cycle, cycle) < 0);
}
if (lost) {
dev_err(&s->unit->device, "Detect discontinuity of cycle: %d %d\n",
diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c
index 92a656fb5321..75bd7b2fa4ee 100644
--- a/sound/pci/hda/patch_realtek.c
+++ b/sound/pci/hda/patch_realtek.c
@@ -9662,6 +9662,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = {
SND_PCI_QUIRK(0x103c, 0x8973, "HP EliteBook 860 G9", ALC245_FIXUP_CS35L41_SPI_2_HP_GPIO_LED),
SND_PCI_QUIRK(0x103c, 0x8974, "HP EliteBook 840 Aero G9", ALC245_FIXUP_CS35L41_SPI_2_HP_GPIO_LED),
SND_PCI_QUIRK(0x103c, 0x8975, "HP EliteBook x360 840 Aero G9", ALC245_FIXUP_CS35L41_SPI_2_HP_GPIO_LED),
+ SND_PCI_QUIRK(0x103c, 0x897d, "HP mt440 Mobile Thin Client U74", ALC236_FIXUP_HP_GPIO_LED),
SND_PCI_QUIRK(0x103c, 0x8981, "HP Elite Dragonfly G3", ALC245_FIXUP_CS35L41_SPI_4),
SND_PCI_QUIRK(0x103c, 0x898e, "HP EliteBook 835 G9", ALC287_FIXUP_CS35L41_I2C_2),
SND_PCI_QUIRK(0x103c, 0x898f, "HP EliteBook 835 G9", ALC287_FIXUP_CS35L41_I2C_2),
@@ -9687,11 +9688,13 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = {
SND_PCI_QUIRK(0x103c, 0x8aa3, "HP ProBook 450 G9 (MB 8AA1)", ALC236_FIXUP_HP_GPIO_LED),
SND_PCI_QUIRK(0x103c, 0x8aa8, "HP EliteBook 640 G9 (MB 8AA6)", ALC236_FIXUP_HP_GPIO_LED),
SND_PCI_QUIRK(0x103c, 0x8aab, "HP EliteBook 650 G9 (MB 8AA9)", ALC236_FIXUP_HP_GPIO_LED),
+ SND_PCI_QUIRK(0x103c, 0x8ab9, "HP EliteBook 840 G8 (MB 8AB8)", ALC285_FIXUP_HP_GPIO_LED),
SND_PCI_QUIRK(0x103c, 0x8abb, "HP ZBook Firefly 14 G9", ALC245_FIXUP_CS35L41_SPI_2_HP_GPIO_LED),
SND_PCI_QUIRK(0x103c, 0x8ad1, "HP EliteBook 840 14 inch G9 Notebook PC", ALC245_FIXUP_CS35L41_SPI_2_HP_GPIO_LED),
SND_PCI_QUIRK(0x103c, 0x8ad2, "HP EliteBook 860 16 inch G9 Notebook PC", ALC245_FIXUP_CS35L41_SPI_2_HP_GPIO_LED),
SND_PCI_QUIRK(0x103c, 0x8b0f, "HP Elite mt645 G7 Mobile Thin Client U81", ALC236_FIXUP_HP_MUTE_LED_MICMUTE_VREF),
SND_PCI_QUIRK(0x103c, 0x8b2f, "HP 255 15.6 inch G10 Notebook PC", ALC236_FIXUP_HP_MUTE_LED_COEFBIT2),
+ SND_PCI_QUIRK(0x103c, 0x8b3f, "HP mt440 Mobile Thin Client U91", ALC236_FIXUP_HP_GPIO_LED),
SND_PCI_QUIRK(0x103c, 0x8b42, "HP", ALC245_FIXUP_CS35L41_SPI_2_HP_GPIO_LED),
SND_PCI_QUIRK(0x103c, 0x8b43, "HP", ALC245_FIXUP_CS35L41_SPI_2_HP_GPIO_LED),
SND_PCI_QUIRK(0x103c, 0x8b44, "HP", ALC245_FIXUP_CS35L41_SPI_2_HP_GPIO_LED),
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index 201dc77ebbd7..d5d2183730b9 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -3109,6 +3109,10 @@ union bpf_attr {
* **BPF_FIB_LOOKUP_DIRECT**
* Do a direct table lookup vs full lookup using FIB
* rules.
+ * **BPF_FIB_LOOKUP_TBID**
+ * Used with BPF_FIB_LOOKUP_DIRECT.
+ * Use the routing table ID present in *params*->tbid
+ * for the fib lookup.
* **BPF_FIB_LOOKUP_OUTPUT**
* Perform lookup from an egress perspective (default is
* ingress).
@@ -3117,6 +3121,11 @@ union bpf_attr {
* and *params*->smac will not be set as output. A common
* use case is to call **bpf_redirect_neigh**\ () after
* doing **bpf_fib_lookup**\ ().
+ * **BPF_FIB_LOOKUP_SRC**
+ * Derive and set source IP addr in *params*->ipv{4,6}_src
+ * for the nexthop. If the src addr cannot be derived,
+ * **BPF_FIB_LKUP_RET_NO_SRC_ADDR** is returned. In this
+ * case, *params*->dmac and *params*->smac are not set either.
*
* *ctx* is either **struct xdp_md** for XDP programs or
* **struct sk_buff** tc cls_act programs.
@@ -6687,6 +6696,8 @@ enum {
BPF_FIB_LOOKUP_DIRECT = (1U << 0),
BPF_FIB_LOOKUP_OUTPUT = (1U << 1),
BPF_FIB_LOOKUP_SKIP_NEIGH = (1U << 2),
+ BPF_FIB_LOOKUP_TBID = (1U << 3),
+ BPF_FIB_LOOKUP_SRC = (1U << 4),
};

enum {
@@ -6699,6 +6710,7 @@ enum {
BPF_FIB_LKUP_RET_UNSUPP_LWT, /* fwd requires encapsulation */
BPF_FIB_LKUP_RET_NO_NEIGH, /* no neighbor entry for nh */
BPF_FIB_LKUP_RET_FRAG_NEEDED, /* fragmentation required to fwd */
+ BPF_FIB_LKUP_RET_NO_SRC_ADDR, /* failed to derive IP src addr */
};

struct bpf_fib_lookup {
@@ -6733,6 +6745,9 @@ struct bpf_fib_lookup {
__u32 rt_metric;
};

+ /* input: source address to consider for lookup
+ * output: source address result from lookup
+ */
union {
__be32 ipv4_src;
__u32 ipv6_src[4]; /* in6_addr; network order */
@@ -6747,9 +6762,19 @@ struct bpf_fib_lookup {
__u32 ipv6_dst[4]; /* in6_addr; network order */
};

- /* output */
- __be16 h_vlan_proto;
- __be16 h_vlan_TCI;
+ union {
+ struct {
+ /* output */
+ __be16 h_vlan_proto;
+ __be16 h_vlan_TCI;
+ };
+ /* input: when accompanied with the
+ * 'BPF_FIB_LOOKUP_DIRECT | BPF_FIB_LOOKUP_TBID` flags, a
+ * specific routing table to use for the fib lookup.
+ */
+ __u32 tbid;
+ };
+
__u8 smac[6]; /* ETH_ALEN */
__u8 dmac[6]; /* ETH_ALEN */
};
diff --git a/tools/testing/selftests/net/mptcp/mptcp_join.sh b/tools/testing/selftests/net/mptcp/mptcp_join.sh
index 2107579e2939..a20dca9d26d6 100755
--- a/tools/testing/selftests/net/mptcp/mptcp_join.sh
+++ b/tools/testing/selftests/net/mptcp/mptcp_join.sh
@@ -144,6 +144,11 @@ check_tools()
exit $ksft_skip
fi

+ if ! ss -h | grep -q MPTCP; then
+ echo "SKIP: ss tool does not support MPTCP"
+ exit $ksft_skip
+ fi
+
# Use the legacy version if available to support old kernel versions
if iptables-legacy -V &> /dev/null; then
iptables="iptables-legacy"