Re: Linux 6.6.18

From: Greg Kroah-Hartman
Date: Fri Feb 23 2024 - 04:11:39 EST


diff --git a/Documentation/ABI/testing/sysfs-class-net-statistics b/Documentation/ABI/testing/sysfs-class-net-statistics
index 55db27815361..53e508c6936a 100644
--- a/Documentation/ABI/testing/sysfs-class-net-statistics
+++ b/Documentation/ABI/testing/sysfs-class-net-statistics
@@ -1,4 +1,4 @@
-What: /sys/class/<iface>/statistics/collisions
+What: /sys/class/net/<iface>/statistics/collisions
Date: April 2005
KernelVersion: 2.6.12
Contact: netdev@xxxxxxxxxxxxxxx
@@ -6,7 +6,7 @@ Description:
Indicates the number of collisions seen by this network device.
This value might not be relevant with all MAC layers.

-What: /sys/class/<iface>/statistics/multicast
+What: /sys/class/net/<iface>/statistics/multicast
Date: April 2005
KernelVersion: 2.6.12
Contact: netdev@xxxxxxxxxxxxxxx
@@ -14,7 +14,7 @@ Description:
Indicates the number of multicast packets received by this
network device.

-What: /sys/class/<iface>/statistics/rx_bytes
+What: /sys/class/net/<iface>/statistics/rx_bytes
Date: April 2005
KernelVersion: 2.6.12
Contact: netdev@xxxxxxxxxxxxxxx
@@ -23,7 +23,7 @@ Description:
See the network driver for the exact meaning of when this
value is incremented.

-What: /sys/class/<iface>/statistics/rx_compressed
+What: /sys/class/net/<iface>/statistics/rx_compressed
Date: April 2005
KernelVersion: 2.6.12
Contact: netdev@xxxxxxxxxxxxxxx
@@ -32,7 +32,7 @@ Description:
network device. This value might only be relevant for interfaces
that support packet compression (e.g: PPP).

-What: /sys/class/<iface>/statistics/rx_crc_errors
+What: /sys/class/net/<iface>/statistics/rx_crc_errors
Date: April 2005
KernelVersion: 2.6.12
Contact: netdev@xxxxxxxxxxxxxxx
@@ -41,7 +41,7 @@ Description:
by this network device. Note that the specific meaning might
depend on the MAC layer used by the interface.

-What: /sys/class/<iface>/statistics/rx_dropped
+What: /sys/class/net/<iface>/statistics/rx_dropped
Date: April 2005
KernelVersion: 2.6.12
Contact: netdev@xxxxxxxxxxxxxxx
@@ -51,7 +51,7 @@ Description:
packet processing. See the network driver for the exact
meaning of this value.

-What: /sys/class/<iface>/statistics/rx_errors
+What: /sys/class/net/<iface>/statistics/rx_errors
Date: April 2005
KernelVersion: 2.6.12
Contact: netdev@xxxxxxxxxxxxxxx
@@ -59,7 +59,7 @@ Description:
Indicates the number of receive errors on this network device.
See the network driver for the exact meaning of this value.

-What: /sys/class/<iface>/statistics/rx_fifo_errors
+What: /sys/class/net/<iface>/statistics/rx_fifo_errors
Date: April 2005
KernelVersion: 2.6.12
Contact: netdev@xxxxxxxxxxxxxxx
@@ -68,7 +68,7 @@ Description:
network device. See the network driver for the exact
meaning of this value.

-What: /sys/class/<iface>/statistics/rx_frame_errors
+What: /sys/class/net/<iface>/statistics/rx_frame_errors
Date: April 2005
KernelVersion: 2.6.12
Contact: netdev@xxxxxxxxxxxxxxx
@@ -78,7 +78,7 @@ Description:
on the MAC layer protocol used. See the network driver for
the exact meaning of this value.

-What: /sys/class/<iface>/statistics/rx_length_errors
+What: /sys/class/net/<iface>/statistics/rx_length_errors
Date: April 2005
KernelVersion: 2.6.12
Contact: netdev@xxxxxxxxxxxxxxx
@@ -87,7 +87,7 @@ Description:
error, oversized or undersized. See the network driver for the
exact meaning of this value.

-What: /sys/class/<iface>/statistics/rx_missed_errors
+What: /sys/class/net/<iface>/statistics/rx_missed_errors
Date: April 2005
KernelVersion: 2.6.12
Contact: netdev@xxxxxxxxxxxxxxx
@@ -96,7 +96,7 @@ Description:
due to lack of capacity in the receive side. See the network
driver for the exact meaning of this value.

-What: /sys/class/<iface>/statistics/rx_nohandler
+What: /sys/class/net/<iface>/statistics/rx_nohandler
Date: February 2016
KernelVersion: 4.6
Contact: netdev@xxxxxxxxxxxxxxx
@@ -104,7 +104,7 @@ Description:
Indicates the number of received packets that were dropped on
an inactive device by the network core.

-What: /sys/class/<iface>/statistics/rx_over_errors
+What: /sys/class/net/<iface>/statistics/rx_over_errors
Date: April 2005
KernelVersion: 2.6.12
Contact: netdev@xxxxxxxxxxxxxxx
@@ -114,7 +114,7 @@ Description:
(e.g: larger than MTU). See the network driver for the exact
meaning of this value.

-What: /sys/class/<iface>/statistics/rx_packets
+What: /sys/class/net/<iface>/statistics/rx_packets
Date: April 2005
KernelVersion: 2.6.12
Contact: netdev@xxxxxxxxxxxxxxx
@@ -122,7 +122,7 @@ Description:
Indicates the total number of good packets received by this
network device.

-What: /sys/class/<iface>/statistics/tx_aborted_errors
+What: /sys/class/net/<iface>/statistics/tx_aborted_errors
Date: April 2005
KernelVersion: 2.6.12
Contact: netdev@xxxxxxxxxxxxxxx
@@ -132,7 +132,7 @@ Description:
a medium collision). See the network driver for the exact
meaning of this value.

-What: /sys/class/<iface>/statistics/tx_bytes
+What: /sys/class/net/<iface>/statistics/tx_bytes
Date: April 2005
KernelVersion: 2.6.12
Contact: netdev@xxxxxxxxxxxxxxx
@@ -143,7 +143,7 @@ Description:
transmitted packets or all packets that have been queued for
transmission.

-What: /sys/class/<iface>/statistics/tx_carrier_errors
+What: /sys/class/net/<iface>/statistics/tx_carrier_errors
Date: April 2005
KernelVersion: 2.6.12
Contact: netdev@xxxxxxxxxxxxxxx
@@ -152,7 +152,7 @@ Description:
because of carrier errors (e.g: physical link down). See the
network driver for the exact meaning of this value.

-What: /sys/class/<iface>/statistics/tx_compressed
+What: /sys/class/net/<iface>/statistics/tx_compressed
Date: April 2005
KernelVersion: 2.6.12
Contact: netdev@xxxxxxxxxxxxxxx
@@ -161,7 +161,7 @@ Description:
this might only be relevant for devices that support
compression (e.g: PPP).

-What: /sys/class/<iface>/statistics/tx_dropped
+What: /sys/class/net/<iface>/statistics/tx_dropped
Date: April 2005
KernelVersion: 2.6.12
Contact: netdev@xxxxxxxxxxxxxxx
@@ -170,7 +170,7 @@ Description:
See the driver for the exact reasons as to why the packets were
dropped.

-What: /sys/class/<iface>/statistics/tx_errors
+What: /sys/class/net/<iface>/statistics/tx_errors
Date: April 2005
KernelVersion: 2.6.12
Contact: netdev@xxxxxxxxxxxxxxx
@@ -179,7 +179,7 @@ Description:
a network device. See the driver for the exact reasons as to
why the packets were dropped.

-What: /sys/class/<iface>/statistics/tx_fifo_errors
+What: /sys/class/net/<iface>/statistics/tx_fifo_errors
Date: April 2005
KernelVersion: 2.6.12
Contact: netdev@xxxxxxxxxxxxxxx
@@ -188,7 +188,7 @@ Description:
FIFO error. See the driver for the exact reasons as to why the
packets were dropped.

-What: /sys/class/<iface>/statistics/tx_heartbeat_errors
+What: /sys/class/net/<iface>/statistics/tx_heartbeat_errors
Date: April 2005
KernelVersion: 2.6.12
Contact: netdev@xxxxxxxxxxxxxxx
@@ -197,7 +197,7 @@ Description:
reported as heartbeat errors. See the driver for the exact
reasons as to why the packets were dropped.

-What: /sys/class/<iface>/statistics/tx_packets
+What: /sys/class/net/<iface>/statistics/tx_packets
Date: April 2005
KernelVersion: 2.6.12
Contact: netdev@xxxxxxxxxxxxxxx
@@ -206,7 +206,7 @@ Description:
device. See the driver for whether this reports the number of all
attempted or successful transmissions.

-What: /sys/class/<iface>/statistics/tx_window_errors
+What: /sys/class/net/<iface>/statistics/tx_window_errors
Date: April 2005
KernelVersion: 2.6.12
Contact: netdev@xxxxxxxxxxxxxxx
diff --git a/Documentation/arch/arm64/silicon-errata.rst b/Documentation/arch/arm64/silicon-errata.rst
index 7acd64c61f50..29fd5213eeb2 100644
--- a/Documentation/arch/arm64/silicon-errata.rst
+++ b/Documentation/arch/arm64/silicon-errata.rst
@@ -235,3 +235,10 @@ stable kernels.
+----------------+-----------------+-----------------+-----------------------------+
| ASR | ASR8601 | #8601001 | N/A |
+----------------+-----------------+-----------------+-----------------------------+
++----------------+-----------------+-----------------+-----------------------------+
+| Microsoft | Azure Cobalt 100| #2139208 | ARM64_ERRATUM_2139208 |
++----------------+-----------------+-----------------+-----------------------------+
+| Microsoft | Azure Cobalt 100| #2067961 | ARM64_ERRATUM_2067961 |
++----------------+-----------------+-----------------+-----------------------------+
+| Microsoft | Azure Cobalt 100| #2253138 | ARM64_ERRATUM_2253138 |
++----------------+-----------------+-----------------+-----------------------------+
diff --git a/Documentation/arch/ia64/features.rst b/Documentation/arch/ia64/features.rst
index d7226fdcf5f8..056838d2ab55 100644
--- a/Documentation/arch/ia64/features.rst
+++ b/Documentation/arch/ia64/features.rst
@@ -1,3 +1,3 @@
.. SPDX-License-Identifier: GPL-2.0

-.. kernel-feat:: $srctree/Documentation/features ia64
+.. kernel-feat:: features ia64
diff --git a/Documentation/networking/devlink/devlink-port.rst b/Documentation/networking/devlink/devlink-port.rst
index e33ad2401ad7..562f46b41274 100644
--- a/Documentation/networking/devlink/devlink-port.rst
+++ b/Documentation/networking/devlink/devlink-port.rst
@@ -126,7 +126,7 @@ Users may also set the RoCE capability of the function using
`devlink port function set roce` command.

Users may also set the function as migratable using
-'devlink port function set migratable' command.
+`devlink port function set migratable` command.

Users may also set the IPsec crypto capability of the function using
`devlink port function set ipsec_crypto` command.
diff --git a/Documentation/sphinx/kernel_feat.py b/Documentation/sphinx/kernel_feat.py
index b9df61eb4501..03ace5f01b5c 100644
--- a/Documentation/sphinx/kernel_feat.py
+++ b/Documentation/sphinx/kernel_feat.py
@@ -109,7 +109,7 @@ class KernelFeat(Directive):
else:
out_lines += line + "\n"

- nodeList = self.nestedParse(out_lines, fname)
+ nodeList = self.nestedParse(out_lines, self.arguments[0])
return nodeList

def nestedParse(self, lines, fname):
diff --git a/Makefile b/Makefile
index 3330c00c0a47..b7198af9e59b 100644
--- a/Makefile
+++ b/Makefile
@@ -1,7 +1,7 @@
# SPDX-License-Identifier: GPL-2.0
VERSION = 6
PATCHLEVEL = 6
-SUBLEVEL = 17
+SUBLEVEL = 18
EXTRAVERSION =
NAME = Hurr durr I'ma ninja sloth

diff --git a/arch/Kconfig b/arch/Kconfig
index 12d51495caec..20c2c93d2c88 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -681,6 +681,7 @@ config SHADOW_CALL_STACK
bool "Shadow Call Stack"
depends on ARCH_SUPPORTS_SHADOW_CALL_STACK
depends on DYNAMIC_FTRACE_WITH_ARGS || DYNAMIC_FTRACE_WITH_REGS || !FUNCTION_GRAPH_TRACER
+ depends on MMU
help
This option enables the compiler's Shadow Call Stack, which
uses a shadow stack to protect function return addresses from
diff --git a/arch/arc/include/asm/jump_label.h b/arch/arc/include/asm/jump_label.h
index 9d9618079739..a339223d9e05 100644
--- a/arch/arc/include/asm/jump_label.h
+++ b/arch/arc/include/asm/jump_label.h
@@ -31,7 +31,7 @@
static __always_inline bool arch_static_branch(struct static_key *key,
bool branch)
{
- asm_volatile_goto(".balign "__stringify(JUMP_LABEL_NOP_SIZE)" \n"
+ asm goto(".balign "__stringify(JUMP_LABEL_NOP_SIZE)" \n"
"1: \n"
"nop \n"
".pushsection __jump_table, \"aw\" \n"
@@ -47,7 +47,7 @@ static __always_inline bool arch_static_branch(struct static_key *key,
static __always_inline bool arch_static_branch_jump(struct static_key *key,
bool branch)
{
- asm_volatile_goto(".balign "__stringify(JUMP_LABEL_NOP_SIZE)" \n"
+ asm goto(".balign "__stringify(JUMP_LABEL_NOP_SIZE)" \n"
"1: \n"
"b %l[l_yes] \n"
".pushsection __jump_table, \"aw\" \n"
diff --git a/arch/arm/include/asm/jump_label.h b/arch/arm/include/asm/jump_label.h
index e12d7d096fc0..e4eb54f6cd9f 100644
--- a/arch/arm/include/asm/jump_label.h
+++ b/arch/arm/include/asm/jump_label.h
@@ -11,7 +11,7 @@

static __always_inline bool arch_static_branch(struct static_key *key, bool branch)
{
- asm_volatile_goto("1:\n\t"
+ asm goto("1:\n\t"
WASM(nop) "\n\t"
".pushsection __jump_table, \"aw\"\n\t"
".word 1b, %l[l_yes], %c0\n\t"
@@ -25,7 +25,7 @@ static __always_inline bool arch_static_branch(struct static_key *key, bool bran

static __always_inline bool arch_static_branch_jump(struct static_key *key, bool branch)
{
- asm_volatile_goto("1:\n\t"
+ asm goto("1:\n\t"
WASM(b) " %l[l_yes]\n\t"
".pushsection __jump_table, \"aw\"\n\t"
".word 1b, %l[l_yes], %c0\n\t"
diff --git a/arch/arm64/include/asm/alternative-macros.h b/arch/arm64/include/asm/alternative-macros.h
index 94b486192e1f..a3652b6bb740 100644
--- a/arch/arm64/include/asm/alternative-macros.h
+++ b/arch/arm64/include/asm/alternative-macros.h
@@ -229,7 +229,7 @@ alternative_has_cap_likely(const unsigned long cpucap)
compiletime_assert(cpucap < ARM64_NCAPS,
"cpucap must be < ARM64_NCAPS");

- asm_volatile_goto(
+ asm goto(
ALTERNATIVE_CB("b %l[l_no]", %[cpucap], alt_cb_patch_nops)
:
: [cpucap] "i" (cpucap)
@@ -247,7 +247,7 @@ alternative_has_cap_unlikely(const unsigned long cpucap)
compiletime_assert(cpucap < ARM64_NCAPS,
"cpucap must be < ARM64_NCAPS");

- asm_volatile_goto(
+ asm goto(
ALTERNATIVE("nop", "b %l[l_yes]", %[cpucap])
:
: [cpucap] "i" (cpucap)
diff --git a/arch/arm64/include/asm/cputype.h b/arch/arm64/include/asm/cputype.h
index 7c7493cb571f..52f076afeb96 100644
--- a/arch/arm64/include/asm/cputype.h
+++ b/arch/arm64/include/asm/cputype.h
@@ -61,6 +61,7 @@
#define ARM_CPU_IMP_HISI 0x48
#define ARM_CPU_IMP_APPLE 0x61
#define ARM_CPU_IMP_AMPERE 0xC0
+#define ARM_CPU_IMP_MICROSOFT 0x6D

#define ARM_CPU_PART_AEM_V8 0xD0F
#define ARM_CPU_PART_FOUNDATION 0xD00
@@ -135,6 +136,8 @@

#define AMPERE_CPU_PART_AMPERE1 0xAC3

+#define MICROSOFT_CPU_PART_AZURE_COBALT_100 0xD49 /* Based on r0p0 of ARM Neoverse N2 */
+
#define MIDR_CORTEX_A53 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A53)
#define MIDR_CORTEX_A57 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A57)
#define MIDR_CORTEX_A72 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A72)
@@ -193,6 +196,7 @@
#define MIDR_APPLE_M2_BLIZZARD_MAX MIDR_CPU_MODEL(ARM_CPU_IMP_APPLE, APPLE_CPU_PART_M2_BLIZZARD_MAX)
#define MIDR_APPLE_M2_AVALANCHE_MAX MIDR_CPU_MODEL(ARM_CPU_IMP_APPLE, APPLE_CPU_PART_M2_AVALANCHE_MAX)
#define MIDR_AMPERE1 MIDR_CPU_MODEL(ARM_CPU_IMP_AMPERE, AMPERE_CPU_PART_AMPERE1)
+#define MIDR_MICROSOFT_AZURE_COBALT_100 MIDR_CPU_MODEL(ARM_CPU_IMP_MICROSOFT, MICROSOFT_CPU_PART_AZURE_COBALT_100)

/* Fujitsu Erratum 010001 affects A64FX 1.0 and 1.1, (v0r0 and v1r0) */
#define MIDR_FUJITSU_ERRATUM_010001 MIDR_FUJITSU_A64FX
diff --git a/arch/arm64/include/asm/jump_label.h b/arch/arm64/include/asm/jump_label.h
index 48ddc0f45d22..6aafbb789991 100644
--- a/arch/arm64/include/asm/jump_label.h
+++ b/arch/arm64/include/asm/jump_label.h
@@ -18,7 +18,7 @@
static __always_inline bool arch_static_branch(struct static_key * const key,
const bool branch)
{
- asm_volatile_goto(
+ asm goto(
"1: nop \n\t"
" .pushsection __jump_table, \"aw\" \n\t"
" .align 3 \n\t"
@@ -35,7 +35,7 @@ static __always_inline bool arch_static_branch(struct static_key * const key,
static __always_inline bool arch_static_branch_jump(struct static_key * const key,
const bool branch)
{
- asm_volatile_goto(
+ asm goto(
"1: b %l[l_yes] \n\t"
" .pushsection __jump_table, \"aw\" \n\t"
" .align 3 \n\t"
diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c
index 87787a012bea..7bba831f62c3 100644
--- a/arch/arm64/kernel/cpu_errata.c
+++ b/arch/arm64/kernel/cpu_errata.c
@@ -390,6 +390,7 @@ static const struct midr_range erratum_1463225[] = {
static const struct midr_range trbe_overwrite_fill_mode_cpus[] = {
#ifdef CONFIG_ARM64_ERRATUM_2139208
MIDR_ALL_VERSIONS(MIDR_NEOVERSE_N2),
+ MIDR_ALL_VERSIONS(MIDR_MICROSOFT_AZURE_COBALT_100),
#endif
#ifdef CONFIG_ARM64_ERRATUM_2119858
MIDR_ALL_VERSIONS(MIDR_CORTEX_A710),
@@ -403,6 +404,7 @@ static const struct midr_range trbe_overwrite_fill_mode_cpus[] = {
static const struct midr_range tsb_flush_fail_cpus[] = {
#ifdef CONFIG_ARM64_ERRATUM_2067961
MIDR_ALL_VERSIONS(MIDR_NEOVERSE_N2),
+ MIDR_ALL_VERSIONS(MIDR_MICROSOFT_AZURE_COBALT_100),
#endif
#ifdef CONFIG_ARM64_ERRATUM_2054223
MIDR_ALL_VERSIONS(MIDR_CORTEX_A710),
@@ -415,6 +417,7 @@ static const struct midr_range tsb_flush_fail_cpus[] = {
static struct midr_range trbe_write_out_of_range_cpus[] = {
#ifdef CONFIG_ARM64_ERRATUM_2253138
MIDR_ALL_VERSIONS(MIDR_NEOVERSE_N2),
+ MIDR_ALL_VERSIONS(MIDR_MICROSOFT_AZURE_COBALT_100),
#endif
#ifdef CONFIG_ARM64_ERRATUM_2224489
MIDR_ALL_VERSIONS(MIDR_CORTEX_A710),
diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c
index f9b3adebcb18..1e1e0511c008 100644
--- a/arch/arm64/kernel/fpsimd.c
+++ b/arch/arm64/kernel/fpsimd.c
@@ -1686,7 +1686,7 @@ void fpsimd_preserve_current_state(void)
void fpsimd_signal_preserve_current_state(void)
{
fpsimd_preserve_current_state();
- if (test_thread_flag(TIF_SVE))
+ if (current->thread.fp_type == FP_STATE_SVE)
sve_to_fpsimd(current);
}

diff --git a/arch/arm64/kernel/signal.c b/arch/arm64/kernel/signal.c
index 0e8beb3349ea..425b1bc17a3f 100644
--- a/arch/arm64/kernel/signal.c
+++ b/arch/arm64/kernel/signal.c
@@ -242,7 +242,7 @@ static int preserve_sve_context(struct sve_context __user *ctx)
vl = task_get_sme_vl(current);
vq = sve_vq_from_vl(vl);
flags |= SVE_SIG_FLAG_SM;
- } else if (test_thread_flag(TIF_SVE)) {
+ } else if (current->thread.fp_type == FP_STATE_SVE) {
vq = sve_vq_from_vl(vl);
}

@@ -878,7 +878,7 @@ static int setup_sigframe_layout(struct rt_sigframe_user_layout *user,
if (system_supports_sve() || system_supports_sme()) {
unsigned int vq = 0;

- if (add_all || test_thread_flag(TIF_SVE) ||
+ if (add_all || current->thread.fp_type == FP_STATE_SVE ||
thread_sm_enabled(&current->thread)) {
int vl = max(sve_max_vl(), sme_max_vl());

diff --git a/arch/arm64/kvm/pkvm.c b/arch/arm64/kvm/pkvm.c
index 6ff3ec18c925..b2c8084cdb95 100644
--- a/arch/arm64/kvm/pkvm.c
+++ b/arch/arm64/kvm/pkvm.c
@@ -101,6 +101,17 @@ void __init kvm_hyp_reserve(void)
hyp_mem_base);
}

+static void __pkvm_destroy_hyp_vm(struct kvm *host_kvm)
+{
+ if (host_kvm->arch.pkvm.handle) {
+ WARN_ON(kvm_call_hyp_nvhe(__pkvm_teardown_vm,
+ host_kvm->arch.pkvm.handle));
+ }
+
+ host_kvm->arch.pkvm.handle = 0;
+ free_hyp_memcache(&host_kvm->arch.pkvm.teardown_mc);
+}
+
/*
* Allocates and donates memory for hypervisor VM structs at EL2.
*
@@ -181,7 +192,7 @@ static int __pkvm_create_hyp_vm(struct kvm *host_kvm)
return 0;

destroy_vm:
- pkvm_destroy_hyp_vm(host_kvm);
+ __pkvm_destroy_hyp_vm(host_kvm);
return ret;
free_vm:
free_pages_exact(hyp_vm, hyp_vm_sz);
@@ -194,23 +205,19 @@ int pkvm_create_hyp_vm(struct kvm *host_kvm)
{
int ret = 0;

- mutex_lock(&host_kvm->lock);
+ mutex_lock(&host_kvm->arch.config_lock);
if (!host_kvm->arch.pkvm.handle)
ret = __pkvm_create_hyp_vm(host_kvm);
- mutex_unlock(&host_kvm->lock);
+ mutex_unlock(&host_kvm->arch.config_lock);

return ret;
}

void pkvm_destroy_hyp_vm(struct kvm *host_kvm)
{
- if (host_kvm->arch.pkvm.handle) {
- WARN_ON(kvm_call_hyp_nvhe(__pkvm_teardown_vm,
- host_kvm->arch.pkvm.handle));
- }
-
- host_kvm->arch.pkvm.handle = 0;
- free_hyp_memcache(&host_kvm->arch.pkvm.teardown_mc);
+ mutex_lock(&host_kvm->arch.config_lock);
+ __pkvm_destroy_hyp_vm(host_kvm);
+ mutex_unlock(&host_kvm->arch.config_lock);
}

int pkvm_init_host_vm(struct kvm *host_kvm)
diff --git a/arch/csky/include/asm/jump_label.h b/arch/csky/include/asm/jump_label.h
index 98a3f4b168bd..ef2e37a10a0f 100644
--- a/arch/csky/include/asm/jump_label.h
+++ b/arch/csky/include/asm/jump_label.h
@@ -12,7 +12,7 @@
static __always_inline bool arch_static_branch(struct static_key *key,
bool branch)
{
- asm_volatile_goto(
+ asm goto(
"1: nop32 \n"
" .pushsection __jump_table, \"aw\" \n"
" .align 2 \n"
@@ -29,7 +29,7 @@ static __always_inline bool arch_static_branch(struct static_key *key,
static __always_inline bool arch_static_branch_jump(struct static_key *key,
bool branch)
{
- asm_volatile_goto(
+ asm goto(
"1: bsr32 %l[label] \n"
" .pushsection __jump_table, \"aw\" \n"
" .align 2 \n"
diff --git a/arch/loongarch/include/asm/jump_label.h b/arch/loongarch/include/asm/jump_label.h
index 3cea299a5ef5..29acfe3de3fa 100644
--- a/arch/loongarch/include/asm/jump_label.h
+++ b/arch/loongarch/include/asm/jump_label.h
@@ -22,7 +22,7 @@

static __always_inline bool arch_static_branch(struct static_key * const key, const bool branch)
{
- asm_volatile_goto(
+ asm goto(
"1: nop \n\t"
JUMP_TABLE_ENTRY
: : "i"(&((char *)key)[branch]) : : l_yes);
@@ -35,7 +35,7 @@ static __always_inline bool arch_static_branch(struct static_key * const key, co

static __always_inline bool arch_static_branch_jump(struct static_key * const key, const bool branch)
{
- asm_volatile_goto(
+ asm goto(
"1: b %l[l_yes] \n\t"
JUMP_TABLE_ENTRY
: : "i"(&((char *)key)[branch]) : : l_yes);
diff --git a/arch/loongarch/mm/kasan_init.c b/arch/loongarch/mm/kasan_init.c
index cc3e81fe0186..c608adc99845 100644
--- a/arch/loongarch/mm/kasan_init.c
+++ b/arch/loongarch/mm/kasan_init.c
@@ -44,6 +44,9 @@ void *kasan_mem_to_shadow(const void *addr)
unsigned long xrange = (maddr >> XRANGE_SHIFT) & 0xffff;
unsigned long offset = 0;

+ if (maddr >= FIXADDR_START)
+ return (void *)(kasan_early_shadow_page);
+
maddr &= XRANGE_SHADOW_MASK;
switch (xrange) {
case XKPRANGE_CC_SEG:
diff --git a/arch/mips/include/asm/checksum.h b/arch/mips/include/asm/checksum.h
index 4044eaf989ac..0921ddda11a4 100644
--- a/arch/mips/include/asm/checksum.h
+++ b/arch/mips/include/asm/checksum.h
@@ -241,7 +241,8 @@ static __inline__ __sum16 csum_ipv6_magic(const struct in6_addr *saddr,
" .set pop"
: "=&r" (sum), "=&r" (tmp)
: "r" (saddr), "r" (daddr),
- "0" (htonl(len)), "r" (htonl(proto)), "r" (sum));
+ "0" (htonl(len)), "r" (htonl(proto)), "r" (sum)
+ : "memory");

return csum_fold(sum);
}
diff --git a/arch/mips/include/asm/jump_label.h b/arch/mips/include/asm/jump_label.h
index c5c6864e64bc..405c85173f2c 100644
--- a/arch/mips/include/asm/jump_label.h
+++ b/arch/mips/include/asm/jump_label.h
@@ -36,7 +36,7 @@

static __always_inline bool arch_static_branch(struct static_key *key, bool branch)
{
- asm_volatile_goto("1:\t" B_INSN " 2f\n\t"
+ asm goto("1:\t" B_INSN " 2f\n\t"
"2:\t.insn\n\t"
".pushsection __jump_table, \"aw\"\n\t"
WORD_INSN " 1b, %l[l_yes], %0\n\t"
@@ -50,7 +50,7 @@ static __always_inline bool arch_static_branch(struct static_key *key, bool bran

static __always_inline bool arch_static_branch_jump(struct static_key *key, bool branch)
{
- asm_volatile_goto("1:\t" J_INSN " %l[l_yes]\n\t"
+ asm goto("1:\t" J_INSN " %l[l_yes]\n\t"
".pushsection __jump_table, \"aw\"\n\t"
WORD_INSN " 1b, %l[l_yes], %0\n\t"
".popsection\n\t"
diff --git a/arch/mips/include/asm/ptrace.h b/arch/mips/include/asm/ptrace.h
index daf3cf244ea9..701a233583c2 100644
--- a/arch/mips/include/asm/ptrace.h
+++ b/arch/mips/include/asm/ptrace.h
@@ -154,6 +154,8 @@ static inline long regs_return_value(struct pt_regs *regs)
}

#define instruction_pointer(regs) ((regs)->cp0_epc)
+extern unsigned long exception_ip(struct pt_regs *regs);
+#define exception_ip(regs) exception_ip(regs)
#define profile_pc(regs) instruction_pointer(regs)

extern asmlinkage long syscall_trace_enter(struct pt_regs *regs, long syscall);
diff --git a/arch/mips/kernel/ptrace.c b/arch/mips/kernel/ptrace.c
index d9df543f7e2c..59288c13b581 100644
--- a/arch/mips/kernel/ptrace.c
+++ b/arch/mips/kernel/ptrace.c
@@ -31,6 +31,7 @@
#include <linux/seccomp.h>
#include <linux/ftrace.h>

+#include <asm/branch.h>
#include <asm/byteorder.h>
#include <asm/cpu.h>
#include <asm/cpu-info.h>
@@ -48,6 +49,12 @@
#define CREATE_TRACE_POINTS
#include <trace/events/syscalls.h>

+unsigned long exception_ip(struct pt_regs *regs)
+{
+ return exception_epc(regs);
+}
+EXPORT_SYMBOL(exception_ip);
+
/*
* Called by kernel/ptrace.c when detaching..
*
diff --git a/arch/parisc/Kconfig b/arch/parisc/Kconfig
index 8c45b98dfe0e..4adeb73d5885 100644
--- a/arch/parisc/Kconfig
+++ b/arch/parisc/Kconfig
@@ -24,7 +24,6 @@ config PARISC
select RTC_DRV_GENERIC
select INIT_ALL_POSSIBLE
select BUG
- select BUILDTIME_TABLE_SORT
select HAVE_PCI
select HAVE_PERF_EVENTS
select HAVE_KERNEL_BZIP2
diff --git a/arch/parisc/include/asm/assembly.h b/arch/parisc/include/asm/assembly.h
index 74d17d7e759d..5937d5edaba1 100644
--- a/arch/parisc/include/asm/assembly.h
+++ b/arch/parisc/include/asm/assembly.h
@@ -576,6 +576,7 @@
.section __ex_table,"aw" ! \
.align 4 ! \
.word (fault_addr - .), (except_addr - .) ! \
+ or %r0,%r0,%r0 ! \
.previous


diff --git a/arch/parisc/include/asm/extable.h b/arch/parisc/include/asm/extable.h
new file mode 100644
index 000000000000..4ea23e3d79dc
--- /dev/null
+++ b/arch/parisc/include/asm/extable.h
@@ -0,0 +1,64 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __PARISC_EXTABLE_H
+#define __PARISC_EXTABLE_H
+
+#include <asm/ptrace.h>
+#include <linux/compiler.h>
+
+/*
+ * The exception table consists of three addresses:
+ *
+ * - A relative address to the instruction that is allowed to fault.
+ * - A relative address at which the program should continue (fixup routine)
+ * - An asm statement which specifies which CPU register will
+ * receive -EFAULT when an exception happens if the lowest bit in
+ * the fixup address is set.
+ *
+ * Note: The register specified in the err_opcode instruction will be
+ * modified at runtime if a fault happens. Register %r0 will be ignored.
+ *
+ * Since relative addresses are used, 32bit values are sufficient even on
+ * 64bit kernel.
+ */
+
+struct pt_regs;
+int fixup_exception(struct pt_regs *regs);
+
+#define ARCH_HAS_RELATIVE_EXTABLE
+struct exception_table_entry {
+ int insn; /* relative address of insn that is allowed to fault. */
+ int fixup; /* relative address of fixup routine */
+ int err_opcode; /* sample opcode with register which holds error code */
+};
+
+#define ASM_EXCEPTIONTABLE_ENTRY( fault_addr, except_addr, opcode )\
+ ".section __ex_table,\"aw\"\n" \
+ ".align 4\n" \
+ ".word (" #fault_addr " - .), (" #except_addr " - .)\n" \
+ opcode "\n" \
+ ".previous\n"
+
+/*
+ * ASM_EXCEPTIONTABLE_ENTRY_EFAULT() creates a special exception table entry
+ * (with lowest bit set) for which the fault handler in fixup_exception() will
+ * load -EFAULT on fault into the register specified by the err_opcode instruction,
+ * and zeroes the target register in case of a read fault in get_user().
+ */
+#define ASM_EXCEPTIONTABLE_VAR(__err_var) \
+ int __err_var = 0
+#define ASM_EXCEPTIONTABLE_ENTRY_EFAULT( fault_addr, except_addr, register )\
+ ASM_EXCEPTIONTABLE_ENTRY( fault_addr, except_addr + 1, "or %%r0,%%r0," register)
+
+static inline void swap_ex_entry_fixup(struct exception_table_entry *a,
+ struct exception_table_entry *b,
+ struct exception_table_entry tmp,
+ int delta)
+{
+ a->fixup = b->fixup + delta;
+ b->fixup = tmp.fixup - delta;
+ a->err_opcode = b->err_opcode;
+ b->err_opcode = tmp.err_opcode;
+}
+#define swap_ex_entry_fixup swap_ex_entry_fixup
+
+#endif
diff --git a/arch/parisc/include/asm/jump_label.h b/arch/parisc/include/asm/jump_label.h
index 94428798b6aa..317ebc5edc9f 100644
--- a/arch/parisc/include/asm/jump_label.h
+++ b/arch/parisc/include/asm/jump_label.h
@@ -12,7 +12,7 @@

static __always_inline bool arch_static_branch(struct static_key *key, bool branch)
{
- asm_volatile_goto("1:\n\t"
+ asm goto("1:\n\t"
"nop\n\t"
".pushsection __jump_table, \"aw\"\n\t"
".align %1\n\t"
@@ -29,7 +29,7 @@ static __always_inline bool arch_static_branch(struct static_key *key, bool bran

static __always_inline bool arch_static_branch_jump(struct static_key *key, bool branch)
{
- asm_volatile_goto("1:\n\t"
+ asm goto("1:\n\t"
"b,n %l[l_yes]\n\t"
".pushsection __jump_table, \"aw\"\n\t"
".align %1\n\t"
diff --git a/arch/parisc/include/asm/special_insns.h b/arch/parisc/include/asm/special_insns.h
index c822bd0c0e3c..51f40eaf7780 100644
--- a/arch/parisc/include/asm/special_insns.h
+++ b/arch/parisc/include/asm/special_insns.h
@@ -8,7 +8,8 @@
"copy %%r0,%0\n" \
"8:\tlpa %%r0(%1),%0\n" \
"9:\n" \
- ASM_EXCEPTIONTABLE_ENTRY(8b, 9b) \
+ ASM_EXCEPTIONTABLE_ENTRY(8b, 9b, \
+ "or %%r0,%%r0,%%r0") \
: "=&r" (pa) \
: "r" (va) \
: "memory" \
@@ -22,7 +23,8 @@
"copy %%r0,%0\n" \
"8:\tlpa %%r0(%%sr3,%1),%0\n" \
"9:\n" \
- ASM_EXCEPTIONTABLE_ENTRY(8b, 9b) \
+ ASM_EXCEPTIONTABLE_ENTRY(8b, 9b, \
+ "or %%r0,%%r0,%%r0") \
: "=&r" (pa) \
: "r" (va) \
: "memory" \
diff --git a/arch/parisc/include/asm/uaccess.h b/arch/parisc/include/asm/uaccess.h
index 4165079898d9..88d0ae5769dd 100644
--- a/arch/parisc/include/asm/uaccess.h
+++ b/arch/parisc/include/asm/uaccess.h
@@ -7,6 +7,7 @@
*/
#include <asm/page.h>
#include <asm/cache.h>
+#include <asm/extable.h>

#include <linux/bug.h>
#include <linux/string.h>
@@ -26,37 +27,6 @@
#define STD_USER(sr, x, ptr) __put_user_asm(sr, "std", x, ptr)
#endif

-/*
- * The exception table contains two values: the first is the relative offset to
- * the address of the instruction that is allowed to fault, and the second is
- * the relative offset to the address of the fixup routine. Since relative
- * addresses are used, 32bit values are sufficient even on 64bit kernel.
- */
-
-#define ARCH_HAS_RELATIVE_EXTABLE
-struct exception_table_entry {
- int insn; /* relative address of insn that is allowed to fault. */
- int fixup; /* relative address of fixup routine */
-};
-
-#define ASM_EXCEPTIONTABLE_ENTRY( fault_addr, except_addr )\
- ".section __ex_table,\"aw\"\n" \
- ".align 4\n" \
- ".word (" #fault_addr " - .), (" #except_addr " - .)\n\t" \
- ".previous\n"
-
-/*
- * ASM_EXCEPTIONTABLE_ENTRY_EFAULT() creates a special exception table entry
- * (with lowest bit set) for which the fault handler in fixup_exception() will
- * load -EFAULT into %r29 for a read or write fault, and zeroes the target
- * register in case of a read fault in get_user().
- */
-#define ASM_EXCEPTIONTABLE_REG 29
-#define ASM_EXCEPTIONTABLE_VAR(__variable) \
- register long __variable __asm__ ("r29") = 0
-#define ASM_EXCEPTIONTABLE_ENTRY_EFAULT( fault_addr, except_addr )\
- ASM_EXCEPTIONTABLE_ENTRY( fault_addr, except_addr + 1)
-
#define __get_user_internal(sr, val, ptr) \
({ \
ASM_EXCEPTIONTABLE_VAR(__gu_err); \
@@ -83,7 +53,7 @@ struct exception_table_entry {
\
__asm__("1: " ldx " 0(%%sr%2,%3),%0\n" \
"9:\n" \
- ASM_EXCEPTIONTABLE_ENTRY_EFAULT(1b, 9b) \
+ ASM_EXCEPTIONTABLE_ENTRY_EFAULT(1b, 9b, "%1") \
: "=r"(__gu_val), "+r"(__gu_err) \
: "i"(sr), "r"(ptr)); \
\
@@ -115,8 +85,8 @@ struct exception_table_entry {
"1: ldw 0(%%sr%2,%3),%0\n" \
"2: ldw 4(%%sr%2,%3),%R0\n" \
"9:\n" \
- ASM_EXCEPTIONTABLE_ENTRY_EFAULT(1b, 9b) \
- ASM_EXCEPTIONTABLE_ENTRY_EFAULT(2b, 9b) \
+ ASM_EXCEPTIONTABLE_ENTRY_EFAULT(1b, 9b, "%1") \
+ ASM_EXCEPTIONTABLE_ENTRY_EFAULT(2b, 9b, "%1") \
: "=&r"(__gu_tmp.l), "+r"(__gu_err) \
: "i"(sr), "r"(ptr)); \
\
@@ -174,7 +144,7 @@ struct exception_table_entry {
__asm__ __volatile__ ( \
"1: " stx " %1,0(%%sr%2,%3)\n" \
"9:\n" \
- ASM_EXCEPTIONTABLE_ENTRY_EFAULT(1b, 9b) \
+ ASM_EXCEPTIONTABLE_ENTRY_EFAULT(1b, 9b, "%0") \
: "+r"(__pu_err) \
: "r"(x), "i"(sr), "r"(ptr))

@@ -186,15 +156,14 @@ struct exception_table_entry {
"1: stw %1,0(%%sr%2,%3)\n" \
"2: stw %R1,4(%%sr%2,%3)\n" \
"9:\n" \
- ASM_EXCEPTIONTABLE_ENTRY_EFAULT(1b, 9b) \
- ASM_EXCEPTIONTABLE_ENTRY_EFAULT(2b, 9b) \
+ ASM_EXCEPTIONTABLE_ENTRY_EFAULT(1b, 9b, "%0") \
+ ASM_EXCEPTIONTABLE_ENTRY_EFAULT(2b, 9b, "%0") \
: "+r"(__pu_err) \
: "r"(__val), "i"(sr), "r"(ptr)); \
} while (0)

#endif /* !defined(CONFIG_64BIT) */

-
/*
* Complex access routines -- external declarations
*/
@@ -216,7 +185,4 @@ unsigned long __must_check raw_copy_from_user(void *dst, const void __user *src,
#define INLINE_COPY_TO_USER
#define INLINE_COPY_FROM_USER

-struct pt_regs;
-int fixup_exception(struct pt_regs *regs);
-
#endif /* __PARISC_UACCESS_H */
diff --git a/arch/parisc/kernel/cache.c b/arch/parisc/kernel/cache.c
index 268d90a9325b..393822f16727 100644
--- a/arch/parisc/kernel/cache.c
+++ b/arch/parisc/kernel/cache.c
@@ -58,7 +58,7 @@ int pa_serialize_tlb_flushes __ro_after_init;

struct pdc_cache_info cache_info __ro_after_init;
#ifndef CONFIG_PA20
-struct pdc_btlb_info btlb_info __ro_after_init;
+struct pdc_btlb_info btlb_info;
#endif

DEFINE_STATIC_KEY_TRUE(parisc_has_cache);
@@ -850,7 +850,7 @@ SYSCALL_DEFINE3(cacheflush, unsigned long, addr, unsigned long, bytes,
#endif
" fic,m %3(%4,%0)\n"
"2: sync\n"
- ASM_EXCEPTIONTABLE_ENTRY_EFAULT(1b, 2b)
+ ASM_EXCEPTIONTABLE_ENTRY_EFAULT(1b, 2b, "%1")
: "+r" (start), "+r" (error)
: "r" (end), "r" (dcache_stride), "i" (SR_USER));
}
@@ -865,7 +865,7 @@ SYSCALL_DEFINE3(cacheflush, unsigned long, addr, unsigned long, bytes,
#endif
" fdc,m %3(%4,%0)\n"
"2: sync\n"
- ASM_EXCEPTIONTABLE_ENTRY_EFAULT(1b, 2b)
+ ASM_EXCEPTIONTABLE_ENTRY_EFAULT(1b, 2b, "%1")
: "+r" (start), "+r" (error)
: "r" (end), "r" (icache_stride), "i" (SR_USER));
}
diff --git a/arch/parisc/kernel/drivers.c b/arch/parisc/kernel/drivers.c
index ed8b75948061..8be4558ef33c 100644
--- a/arch/parisc/kernel/drivers.c
+++ b/arch/parisc/kernel/drivers.c
@@ -1004,6 +1004,9 @@ static __init int qemu_print_iodc_data(struct device *lin_dev, void *data)

pr_info("\n");

+ /* Prevent hung task messages when printing on serial console */
+ cond_resched();
+
pr_info("#define HPA_%08lx_DESCRIPTION \"%s\"\n",
hpa, parisc_hardware_description(&dev->id));

diff --git a/arch/parisc/kernel/unaligned.c b/arch/parisc/kernel/unaligned.c
index ce25acfe4889..c520e551a165 100644
--- a/arch/parisc/kernel/unaligned.c
+++ b/arch/parisc/kernel/unaligned.c
@@ -120,8 +120,8 @@ static int emulate_ldh(struct pt_regs *regs, int toreg)
"2: ldbs 1(%%sr1,%3), %0\n"
" depw %2, 23, 24, %0\n"
"3: \n"
- ASM_EXCEPTIONTABLE_ENTRY_EFAULT(1b, 3b)
- ASM_EXCEPTIONTABLE_ENTRY_EFAULT(2b, 3b)
+ ASM_EXCEPTIONTABLE_ENTRY_EFAULT(1b, 3b, "%1")
+ ASM_EXCEPTIONTABLE_ENTRY_EFAULT(2b, 3b, "%1")
: "+r" (val), "+r" (ret), "=&r" (temp1)
: "r" (saddr), "r" (regs->isr) );

@@ -152,8 +152,8 @@ static int emulate_ldw(struct pt_regs *regs, int toreg, int flop)
" mtctl %2,11\n"
" vshd %0,%3,%0\n"
"3: \n"
- ASM_EXCEPTIONTABLE_ENTRY_EFAULT(1b, 3b)
- ASM_EXCEPTIONTABLE_ENTRY_EFAULT(2b, 3b)
+ ASM_EXCEPTIONTABLE_ENTRY_EFAULT(1b, 3b, "%1")
+ ASM_EXCEPTIONTABLE_ENTRY_EFAULT(2b, 3b, "%1")
: "+r" (val), "+r" (ret), "=&r" (temp1), "=&r" (temp2)
: "r" (saddr), "r" (regs->isr) );

@@ -189,8 +189,8 @@ static int emulate_ldd(struct pt_regs *regs, int toreg, int flop)
" mtsar %%r19\n"
" shrpd %0,%%r20,%%sar,%0\n"
"3: \n"
- ASM_EXCEPTIONTABLE_ENTRY_EFAULT(1b, 3b)
- ASM_EXCEPTIONTABLE_ENTRY_EFAULT(2b, 3b)
+ ASM_EXCEPTIONTABLE_ENTRY_EFAULT(1b, 3b, "%1")
+ ASM_EXCEPTIONTABLE_ENTRY_EFAULT(2b, 3b, "%1")
: "=r" (val), "+r" (ret)
: "0" (val), "r" (saddr), "r" (regs->isr)
: "r19", "r20" );
@@ -209,9 +209,9 @@ static int emulate_ldd(struct pt_regs *regs, int toreg, int flop)
" vshd %0,%R0,%0\n"
" vshd %R0,%4,%R0\n"
"4: \n"
- ASM_EXCEPTIONTABLE_ENTRY_EFAULT(1b, 4b)
- ASM_EXCEPTIONTABLE_ENTRY_EFAULT(2b, 4b)
- ASM_EXCEPTIONTABLE_ENTRY_EFAULT(3b, 4b)
+ ASM_EXCEPTIONTABLE_ENTRY_EFAULT(1b, 4b, "%1")
+ ASM_EXCEPTIONTABLE_ENTRY_EFAULT(2b, 4b, "%1")
+ ASM_EXCEPTIONTABLE_ENTRY_EFAULT(3b, 4b, "%1")
: "+r" (val), "+r" (ret), "+r" (saddr), "=&r" (shift), "=&r" (temp1)
: "r" (regs->isr) );
}
@@ -244,8 +244,8 @@ static int emulate_sth(struct pt_regs *regs, int frreg)
"1: stb %1, 0(%%sr1, %3)\n"
"2: stb %2, 1(%%sr1, %3)\n"
"3: \n"
- ASM_EXCEPTIONTABLE_ENTRY_EFAULT(1b, 3b)
- ASM_EXCEPTIONTABLE_ENTRY_EFAULT(2b, 3b)
+ ASM_EXCEPTIONTABLE_ENTRY_EFAULT(1b, 3b, "%0")
+ ASM_EXCEPTIONTABLE_ENTRY_EFAULT(2b, 3b, "%0")
: "+r" (ret), "=&r" (temp1)
: "r" (val), "r" (regs->ior), "r" (regs->isr) );

@@ -285,8 +285,8 @@ static int emulate_stw(struct pt_regs *regs, int frreg, int flop)
" stw %%r20,0(%%sr1,%2)\n"
" stw %%r21,4(%%sr1,%2)\n"
"3: \n"
- ASM_EXCEPTIONTABLE_ENTRY_EFAULT(1b, 3b)
- ASM_EXCEPTIONTABLE_ENTRY_EFAULT(2b, 3b)
+ ASM_EXCEPTIONTABLE_ENTRY_EFAULT(1b, 3b, "%0")
+ ASM_EXCEPTIONTABLE_ENTRY_EFAULT(2b, 3b, "%0")
: "+r" (ret)
: "r" (val), "r" (regs->ior), "r" (regs->isr)
: "r19", "r20", "r21", "r22", "r1" );
@@ -329,10 +329,10 @@ static int emulate_std(struct pt_regs *regs, int frreg, int flop)
"3: std %%r20,0(%%sr1,%2)\n"
"4: std %%r21,8(%%sr1,%2)\n"
"5: \n"
- ASM_EXCEPTIONTABLE_ENTRY_EFAULT(1b, 5b)
- ASM_EXCEPTIONTABLE_ENTRY_EFAULT(2b, 5b)
- ASM_EXCEPTIONTABLE_ENTRY_EFAULT(3b, 5b)
- ASM_EXCEPTIONTABLE_ENTRY_EFAULT(4b, 5b)
+ ASM_EXCEPTIONTABLE_ENTRY_EFAULT(1b, 5b, "%0")
+ ASM_EXCEPTIONTABLE_ENTRY_EFAULT(2b, 5b, "%0")
+ ASM_EXCEPTIONTABLE_ENTRY_EFAULT(3b, 5b, "%0")
+ ASM_EXCEPTIONTABLE_ENTRY_EFAULT(4b, 5b, "%0")
: "+r" (ret)
: "r" (val), "r" (regs->ior), "r" (regs->isr)
: "r19", "r20", "r21", "r22", "r1" );
@@ -357,11 +357,11 @@ static int emulate_std(struct pt_regs *regs, int frreg, int flop)
"4: stw %%r1,4(%%sr1,%2)\n"
"5: stw %R1,8(%%sr1,%2)\n"
"6: \n"
- ASM_EXCEPTIONTABLE_ENTRY_EFAULT(1b, 6b)
- ASM_EXCEPTIONTABLE_ENTRY_EFAULT(2b, 6b)
- ASM_EXCEPTIONTABLE_ENTRY_EFAULT(3b, 6b)
- ASM_EXCEPTIONTABLE_ENTRY_EFAULT(4b, 6b)
- ASM_EXCEPTIONTABLE_ENTRY_EFAULT(5b, 6b)
+ ASM_EXCEPTIONTABLE_ENTRY_EFAULT(1b, 6b, "%0")
+ ASM_EXCEPTIONTABLE_ENTRY_EFAULT(2b, 6b, "%0")
+ ASM_EXCEPTIONTABLE_ENTRY_EFAULT(3b, 6b, "%0")
+ ASM_EXCEPTIONTABLE_ENTRY_EFAULT(4b, 6b, "%0")
+ ASM_EXCEPTIONTABLE_ENTRY_EFAULT(5b, 6b, "%0")
: "+r" (ret)
: "r" (val), "r" (regs->ior), "r" (regs->isr)
: "r19", "r20", "r21", "r1" );
diff --git a/arch/parisc/mm/fault.c b/arch/parisc/mm/fault.c
index 2fe5b44986e0..c39de84e98b0 100644
--- a/arch/parisc/mm/fault.c
+++ b/arch/parisc/mm/fault.c
@@ -150,11 +150,16 @@ int fixup_exception(struct pt_regs *regs)
* Fix up get_user() and put_user().
* ASM_EXCEPTIONTABLE_ENTRY_EFAULT() sets the least-significant
* bit in the relative address of the fixup routine to indicate
- * that gr[ASM_EXCEPTIONTABLE_REG] should be loaded with
- * -EFAULT to report a userspace access error.
+ * that the register encoded in the "or %r0,%r0,register"
+ * opcode should be loaded with -EFAULT to report a userspace
+ * access error.
*/
if (fix->fixup & 1) {
- regs->gr[ASM_EXCEPTIONTABLE_REG] = -EFAULT;
+ int fault_error_reg = fix->err_opcode & 0x1f;
+ if (!WARN_ON(!fault_error_reg))
+ regs->gr[fault_error_reg] = -EFAULT;
+ pr_debug("Unalignment fixup of register %d at %pS\n",
+ fault_error_reg, (void*)regs->iaoq[0]);

/* zero target register for get_user() */
if (parisc_acctyp(0, regs->iir) == VM_READ) {
diff --git a/arch/powerpc/include/asm/jump_label.h b/arch/powerpc/include/asm/jump_label.h
index 93ce3ec25387..2f2a86ed2280 100644
--- a/arch/powerpc/include/asm/jump_label.h
+++ b/arch/powerpc/include/asm/jump_label.h
@@ -17,7 +17,7 @@

static __always_inline bool arch_static_branch(struct static_key *key, bool branch)
{
- asm_volatile_goto("1:\n\t"
+ asm goto("1:\n\t"
"nop # arch_static_branch\n\t"
".pushsection __jump_table, \"aw\"\n\t"
".long 1b - ., %l[l_yes] - .\n\t"
@@ -32,7 +32,7 @@ static __always_inline bool arch_static_branch(struct static_key *key, bool bran

static __always_inline bool arch_static_branch_jump(struct static_key *key, bool branch)
{
- asm_volatile_goto("1:\n\t"
+ asm goto("1:\n\t"
"b %l[l_yes] # arch_static_branch_jump\n\t"
".pushsection __jump_table, \"aw\"\n\t"
".long 1b - ., %l[l_yes] - .\n\t"
diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h
index 4ae4ab9090a2..ade5f094dbd2 100644
--- a/arch/powerpc/include/asm/reg.h
+++ b/arch/powerpc/include/asm/reg.h
@@ -617,6 +617,8 @@
#endif
#define SPRN_HID2 0x3F8 /* Hardware Implementation Register 2 */
#define SPRN_HID2_GEKKO 0x398 /* Gekko HID2 Register */
+#define SPRN_HID2_G2_LE 0x3F3 /* G2_LE HID2 Register */
+#define HID2_G2_LE_HBE (1<<18) /* High BAT Enable (G2_LE) */
#define SPRN_IABR 0x3F2 /* Instruction Address Breakpoint Register */
#define SPRN_IABR2 0x3FA /* 83xx */
#define SPRN_IBCR 0x135 /* 83xx Insn Breakpoint Control Reg */
diff --git a/arch/powerpc/include/asm/thread_info.h b/arch/powerpc/include/asm/thread_info.h
index bf5dde1a4114..15c5691dd218 100644
--- a/arch/powerpc/include/asm/thread_info.h
+++ b/arch/powerpc/include/asm/thread_info.h
@@ -14,7 +14,7 @@

#ifdef __KERNEL__

-#ifdef CONFIG_KASAN
+#if defined(CONFIG_KASAN) && CONFIG_THREAD_SHIFT < 15
#define MIN_THREAD_SHIFT (CONFIG_THREAD_SHIFT + 1)
#else
#define MIN_THREAD_SHIFT CONFIG_THREAD_SHIFT
diff --git a/arch/powerpc/include/asm/uaccess.h b/arch/powerpc/include/asm/uaccess.h
index fb725ec77926..4c96de9cd1e9 100644
--- a/arch/powerpc/include/asm/uaccess.h
+++ b/arch/powerpc/include/asm/uaccess.h
@@ -74,7 +74,7 @@ __pu_failed: \
/* -mprefixed can generate offsets beyond range, fall back hack */
#ifdef CONFIG_PPC_KERNEL_PREFIXED
#define __put_user_asm_goto(x, addr, label, op) \
- asm_volatile_goto( \
+ asm goto( \
"1: " op " %0,0(%1) # put_user\n" \
EX_TABLE(1b, %l2) \
: \
@@ -83,7 +83,7 @@ __pu_failed: \
: label)
#else
#define __put_user_asm_goto(x, addr, label, op) \
- asm_volatile_goto( \
+ asm goto( \
"1: " op "%U1%X1 %0,%1 # put_user\n" \
EX_TABLE(1b, %l2) \
: \
@@ -97,7 +97,7 @@ __pu_failed: \
__put_user_asm_goto(x, ptr, label, "std")
#else /* __powerpc64__ */
#define __put_user_asm2_goto(x, addr, label) \
- asm_volatile_goto( \
+ asm goto( \
"1: stw%X1 %0, %1\n" \
"2: stw%X1 %L0, %L1\n" \
EX_TABLE(1b, %l2) \
@@ -146,7 +146,7 @@ do { \
/* -mprefixed can generate offsets beyond range, fall back hack */
#ifdef CONFIG_PPC_KERNEL_PREFIXED
#define __get_user_asm_goto(x, addr, label, op) \
- asm_volatile_goto( \
+ asm_goto_output( \
"1: "op" %0,0(%1) # get_user\n" \
EX_TABLE(1b, %l2) \
: "=r" (x) \
@@ -155,7 +155,7 @@ do { \
: label)
#else
#define __get_user_asm_goto(x, addr, label, op) \
- asm_volatile_goto( \
+ asm_goto_output( \
"1: "op"%U1%X1 %0, %1 # get_user\n" \
EX_TABLE(1b, %l2) \
: "=r" (x) \
@@ -169,7 +169,7 @@ do { \
__get_user_asm_goto(x, addr, label, "ld")
#else /* __powerpc64__ */
#define __get_user_asm2_goto(x, addr, label) \
- asm_volatile_goto( \
+ asm_goto_output( \
"1: lwz%X1 %0, %1\n" \
"2: lwz%X1 %L0, %L1\n" \
EX_TABLE(1b, %l2) \
diff --git a/arch/powerpc/kernel/cpu_setup_6xx.S b/arch/powerpc/kernel/cpu_setup_6xx.S
index f29ce3dd6140..bfd3f442e5eb 100644
--- a/arch/powerpc/kernel/cpu_setup_6xx.S
+++ b/arch/powerpc/kernel/cpu_setup_6xx.S
@@ -26,6 +26,15 @@ BEGIN_FTR_SECTION
bl __init_fpu_registers
END_FTR_SECTION_IFCLR(CPU_FTR_FPU_UNAVAILABLE)
bl setup_common_caches
+
+ /*
+ * This assumes that all cores using __setup_cpu_603 with
+ * MMU_FTR_USE_HIGH_BATS are G2_LE compatible
+ */
+BEGIN_MMU_FTR_SECTION
+ bl setup_g2_le_hid2
+END_MMU_FTR_SECTION_IFSET(MMU_FTR_USE_HIGH_BATS)
+
mtlr r5
blr
_GLOBAL(__setup_cpu_604)
@@ -115,6 +124,16 @@ SYM_FUNC_START_LOCAL(setup_604_hid0)
blr
SYM_FUNC_END(setup_604_hid0)

+/* Enable high BATs for G2_LE and derivatives like e300cX */
+SYM_FUNC_START_LOCAL(setup_g2_le_hid2)
+ mfspr r11,SPRN_HID2_G2_LE
+ oris r11,r11,HID2_G2_LE_HBE@h
+ mtspr SPRN_HID2_G2_LE,r11
+ sync
+ isync
+ blr
+SYM_FUNC_END(setup_g2_le_hid2)
+
/* 7400 <= rev 2.7 and 7410 rev = 1.0 suffer from some
* erratas we work around here.
* Moto MPC710CE.pdf describes them, those are errata
@@ -495,4 +514,3 @@ _GLOBAL(__restore_cpu_setup)
mtcr r7
blr
_ASM_NOKPROBE_SYMBOL(__restore_cpu_setup)
-
diff --git a/arch/powerpc/kernel/cpu_specs_e500mc.h b/arch/powerpc/kernel/cpu_specs_e500mc.h
index ceb06b109f83..2ae8e9a7b461 100644
--- a/arch/powerpc/kernel/cpu_specs_e500mc.h
+++ b/arch/powerpc/kernel/cpu_specs_e500mc.h
@@ -8,7 +8,8 @@

#ifdef CONFIG_PPC64
#define COMMON_USER_BOOKE (PPC_FEATURE_32 | PPC_FEATURE_HAS_MMU | \
- PPC_FEATURE_HAS_FPU | PPC_FEATURE_64)
+ PPC_FEATURE_HAS_FPU | PPC_FEATURE_64 | \
+ PPC_FEATURE_BOOKE)
#else
#define COMMON_USER_BOOKE (PPC_FEATURE_32 | PPC_FEATURE_HAS_MMU | \
PPC_FEATURE_BOOKE)
diff --git a/arch/powerpc/kernel/interrupt_64.S b/arch/powerpc/kernel/interrupt_64.S
index bd863702d812..1ad059a9e2fe 100644
--- a/arch/powerpc/kernel/interrupt_64.S
+++ b/arch/powerpc/kernel/interrupt_64.S
@@ -52,7 +52,8 @@ _ASM_NOKPROBE_SYMBOL(system_call_vectored_\name)
mr r10,r1
ld r1,PACAKSAVE(r13)
std r10,0(r1)
- std r11,_NIP(r1)
+ std r11,_LINK(r1)
+ std r11,_NIP(r1) /* Saved LR is also the next instruction */
std r12,_MSR(r1)
std r0,GPR0(r1)
std r10,GPR1(r1)
@@ -70,7 +71,6 @@ _ASM_NOKPROBE_SYMBOL(system_call_vectored_\name)
std r9,GPR13(r1)
SAVE_NVGPRS(r1)
std r11,_XER(r1)
- std r11,_LINK(r1)
std r11,_CTR(r1)

li r11,\trapnr
diff --git a/arch/powerpc/kernel/irq_64.c b/arch/powerpc/kernel/irq_64.c
index 938e66829eae..d5c48d1b0a31 100644
--- a/arch/powerpc/kernel/irq_64.c
+++ b/arch/powerpc/kernel/irq_64.c
@@ -230,7 +230,7 @@ notrace __no_kcsan void arch_local_irq_restore(unsigned long mask)
* This allows interrupts to be unmasked without hard disabling, and
* also without new hard interrupts coming in ahead of pending ones.
*/
- asm_volatile_goto(
+ asm goto(
"1: \n"
" lbz 9,%0(13) \n"
" cmpwi 9,0 \n"
diff --git a/arch/powerpc/mm/kasan/init_32.c b/arch/powerpc/mm/kasan/init_32.c
index a70828a6d935..aa9aa11927b2 100644
--- a/arch/powerpc/mm/kasan/init_32.c
+++ b/arch/powerpc/mm/kasan/init_32.c
@@ -64,6 +64,7 @@ int __init __weak kasan_init_region(void *start, size_t size)
if (ret)
return ret;

+ k_start = k_start & PAGE_MASK;
block = memblock_alloc(k_end - k_start, PAGE_SIZE);
if (!block)
return -ENOMEM;
diff --git a/arch/powerpc/platforms/pseries/lpar.c b/arch/powerpc/platforms/pseries/lpar.c
index d4d6de0628b0..47d9a6532447 100644
--- a/arch/powerpc/platforms/pseries/lpar.c
+++ b/arch/powerpc/platforms/pseries/lpar.c
@@ -662,8 +662,12 @@ u64 pseries_paravirt_steal_clock(int cpu)
{
struct lppaca *lppaca = &lppaca_of(cpu);

- return be64_to_cpu(READ_ONCE(lppaca->enqueue_dispatch_tb)) +
- be64_to_cpu(READ_ONCE(lppaca->ready_enqueue_tb));
+ /*
+ * VPA steal time counters are reported at TB frequency. Hence do a
+ * conversion to ns before returning
+ */
+ return tb_to_ns(be64_to_cpu(READ_ONCE(lppaca->enqueue_dispatch_tb)) +
+ be64_to_cpu(READ_ONCE(lppaca->ready_enqueue_tb)));
}
#endif

diff --git a/arch/riscv/include/asm/hwcap.h b/arch/riscv/include/asm/hwcap.h
index b7b58258f6c7..f4157034efa9 100644
--- a/arch/riscv/include/asm/hwcap.h
+++ b/arch/riscv/include/asm/hwcap.h
@@ -98,7 +98,7 @@ riscv_has_extension_likely(const unsigned long ext)
"ext must be < RISCV_ISA_EXT_MAX");

if (IS_ENABLED(CONFIG_RISCV_ALTERNATIVE)) {
- asm_volatile_goto(
+ asm goto(
ALTERNATIVE("j %l[l_no]", "nop", 0, %[ext], 1)
:
: [ext] "i" (ext)
@@ -121,7 +121,7 @@ riscv_has_extension_unlikely(const unsigned long ext)
"ext must be < RISCV_ISA_EXT_MAX");

if (IS_ENABLED(CONFIG_RISCV_ALTERNATIVE)) {
- asm_volatile_goto(
+ asm goto(
ALTERNATIVE("nop", "j %l[l_yes]", 0, %[ext], 1)
:
: [ext] "i" (ext)
diff --git a/arch/riscv/include/asm/jump_label.h b/arch/riscv/include/asm/jump_label.h
index 14a5ea8d8ef0..4a35d787c019 100644
--- a/arch/riscv/include/asm/jump_label.h
+++ b/arch/riscv/include/asm/jump_label.h
@@ -17,7 +17,7 @@
static __always_inline bool arch_static_branch(struct static_key * const key,
const bool branch)
{
- asm_volatile_goto(
+ asm goto(
" .align 2 \n\t"
" .option push \n\t"
" .option norelax \n\t"
@@ -39,7 +39,7 @@ static __always_inline bool arch_static_branch(struct static_key * const key,
static __always_inline bool arch_static_branch_jump(struct static_key * const key,
const bool branch)
{
- asm_volatile_goto(
+ asm goto(
" .align 2 \n\t"
" .option push \n\t"
" .option norelax \n\t"
diff --git a/arch/s390/include/asm/jump_label.h b/arch/s390/include/asm/jump_label.h
index 895f774bbcc5..bf78cf381dfc 100644
--- a/arch/s390/include/asm/jump_label.h
+++ b/arch/s390/include/asm/jump_label.h
@@ -25,7 +25,7 @@
*/
static __always_inline bool arch_static_branch(struct static_key *key, bool branch)
{
- asm_volatile_goto("0: brcl 0,%l[label]\n"
+ asm goto("0: brcl 0,%l[label]\n"
".pushsection __jump_table,\"aw\"\n"
".balign 8\n"
".long 0b-.,%l[label]-.\n"
@@ -39,7 +39,7 @@ static __always_inline bool arch_static_branch(struct static_key *key, bool bran

static __always_inline bool arch_static_branch_jump(struct static_key *key, bool branch)
{
- asm_volatile_goto("0: brcl 15,%l[label]\n"
+ asm goto("0: brcl 15,%l[label]\n"
".pushsection __jump_table,\"aw\"\n"
".balign 8\n"
".long 0b-.,%l[label]-.\n"
diff --git a/arch/sparc/include/asm/jump_label.h b/arch/sparc/include/asm/jump_label.h
index 94eb529dcb77..2718cbea826a 100644
--- a/arch/sparc/include/asm/jump_label.h
+++ b/arch/sparc/include/asm/jump_label.h
@@ -10,7 +10,7 @@

static __always_inline bool arch_static_branch(struct static_key *key, bool branch)
{
- asm_volatile_goto("1:\n\t"
+ asm goto("1:\n\t"
"nop\n\t"
"nop\n\t"
".pushsection __jump_table, \"aw\"\n\t"
@@ -26,7 +26,7 @@ static __always_inline bool arch_static_branch(struct static_key *key, bool bran

static __always_inline bool arch_static_branch_jump(struct static_key *key, bool branch)
{
- asm_volatile_goto("1:\n\t"
+ asm goto("1:\n\t"
"b %l[l_yes]\n\t"
"nop\n\t"
".pushsection __jump_table, \"aw\"\n\t"
diff --git a/arch/um/Makefile b/arch/um/Makefile
index 82f05f250634..34957dcb88b9 100644
--- a/arch/um/Makefile
+++ b/arch/um/Makefile
@@ -115,7 +115,9 @@ archprepare:
$(Q)$(MAKE) $(build)=$(HOST_DIR)/um include/generated/user_constants.h

LINK-$(CONFIG_LD_SCRIPT_STATIC) += -static
-LINK-$(CONFIG_LD_SCRIPT_DYN) += $(call cc-option, -no-pie)
+ifdef CONFIG_LD_SCRIPT_DYN
+LINK-$(call gcc-min-version, 60100)$(CONFIG_CC_IS_CLANG) += -no-pie
+endif
LINK-$(CONFIG_LD_SCRIPT_DYN_RPATH) += -Wl,-rpath,/lib

CFLAGS_NO_HARDENING := $(call cc-option, -fno-PIC,) $(call cc-option, -fno-pic,) \
diff --git a/arch/um/include/asm/cpufeature.h b/arch/um/include/asm/cpufeature.h
index 4b6d1b526bc1..66fe06db872f 100644
--- a/arch/um/include/asm/cpufeature.h
+++ b/arch/um/include/asm/cpufeature.h
@@ -75,7 +75,7 @@ extern void setup_clear_cpu_cap(unsigned int bit);
*/
static __always_inline bool _static_cpu_has(u16 bit)
{
- asm_volatile_goto("1: jmp 6f\n"
+ asm goto("1: jmp 6f\n"
"2:\n"
".skip -(((5f-4f) - (2b-1b)) > 0) * "
"((5f-4f) - (2b-1b)),0x90\n"
diff --git a/arch/x86/Kconfig.cpu b/arch/x86/Kconfig.cpu
index 00468adf180f..87396575cfa7 100644
--- a/arch/x86/Kconfig.cpu
+++ b/arch/x86/Kconfig.cpu
@@ -375,7 +375,7 @@ config X86_CMOV
config X86_MINIMUM_CPU_FAMILY
int
default "64" if X86_64
- default "6" if X86_32 && (MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || MVIAC3_2 || MVIAC7 || MEFFICEON || MATOM || MCRUSOE || MCORE2 || MK7 || MK8)
+ default "6" if X86_32 && (MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || MVIAC3_2 || MVIAC7 || MEFFICEON || MATOM || MCORE2 || MK7 || MK8)
default "5" if X86_32 && X86_CMPXCHG64
default "4"

diff --git a/arch/x86/boot/Makefile b/arch/x86/boot/Makefile
index f33e45ed1437..3cece19b7473 100644
--- a/arch/x86/boot/Makefile
+++ b/arch/x86/boot/Makefile
@@ -89,7 +89,7 @@ $(obj)/vmlinux.bin: $(obj)/compressed/vmlinux FORCE

SETUP_OBJS = $(addprefix $(obj)/,$(setup-y))

-sed-zoffset := -e 's/^\([0-9a-fA-F]*\) [a-zA-Z] \(startup_32\|startup_64\|efi32_stub_entry\|efi64_stub_entry\|efi_pe_entry\|efi32_pe_entry\|input_data\|kernel_info\|_end\|_ehead\|_text\|z_.*\)$$/\#define ZO_\2 0x\1/p'
+sed-zoffset := -e 's/^\([0-9a-fA-F]*\) [a-zA-Z] \(startup_32\|efi.._stub_entry\|efi\(32\)\?_pe_entry\|input_data\|kernel_info\|_end\|_ehead\|_text\|_e\?data\|z_.*\)$$/\#define ZO_\2 0x\1/p'

quiet_cmd_zoffset = ZOFFSET $@
cmd_zoffset = $(NM) $< | sed -n $(sed-zoffset) > $@
diff --git a/arch/x86/boot/compressed/vmlinux.lds.S b/arch/x86/boot/compressed/vmlinux.lds.S
index b22f34b8684a..083ec6d7722a 100644
--- a/arch/x86/boot/compressed/vmlinux.lds.S
+++ b/arch/x86/boot/compressed/vmlinux.lds.S
@@ -43,11 +43,13 @@ SECTIONS
*(.rodata.*)
_erodata = . ;
}
- .data : {
+ .data : ALIGN(0x1000) {
_data = . ;
*(.data)
*(.data.*)
- *(.bss.efistub)
+
+ /* Add 4 bytes of extra space for a CRC-32 checksum */
+ . = ALIGN(. + 4, 0x200);
_edata = . ;
}
. = ALIGN(L1_CACHE_BYTES);
diff --git a/arch/x86/boot/header.S b/arch/x86/boot/header.S
index b04ca8e2b213..a1bbedd989e4 100644
--- a/arch/x86/boot/header.S
+++ b/arch/x86/boot/header.S
@@ -36,66 +36,20 @@ SYSSEG = 0x1000 /* historical load address >> 4 */
#define ROOT_RDONLY 1
#endif

+ .set salign, 0x1000
+ .set falign, 0x200
+
.code16
.section ".bstext", "ax"
-
- .global bootsect_start
-bootsect_start:
#ifdef CONFIG_EFI_STUB
# "MZ", MS-DOS header
.word MZ_MAGIC
-#endif
-
- # Normalize the start address
- ljmp $BOOTSEG, $start2
-
-start2:
- movw %cs, %ax
- movw %ax, %ds
- movw %ax, %es
- movw %ax, %ss
- xorw %sp, %sp
- sti
- cld
-
- movw $bugger_off_msg, %si
-
-msg_loop:
- lodsb
- andb %al, %al
- jz bs_die
- movb $0xe, %ah
- movw $7, %bx
- int $0x10
- jmp msg_loop
-
-bs_die:
- # Allow the user to press a key, then reboot
- xorw %ax, %ax
- int $0x16
- int $0x19
-
- # int 0x19 should never return. In case it does anyway,
- # invoke the BIOS reset code...
- ljmp $0xf000,$0xfff0
-
-#ifdef CONFIG_EFI_STUB
.org 0x38
#
# Offset to the PE header.
#
.long LINUX_PE_MAGIC
.long pe_header
-#endif /* CONFIG_EFI_STUB */
-
- .section ".bsdata", "a"
-bugger_off_msg:
- .ascii "Use a boot loader.\r\n"
- .ascii "\n"
- .ascii "Remove disk and press any key to reboot...\r\n"
- .byte 0
-
-#ifdef CONFIG_EFI_STUB
pe_header:
.long PE_MAGIC

@@ -124,30 +78,26 @@ optional_header:
.byte 0x02 # MajorLinkerVersion
.byte 0x14 # MinorLinkerVersion

- # Filled in by build.c
- .long 0 # SizeOfCode
+ .long ZO__data # SizeOfCode

- .long 0 # SizeOfInitializedData
+ .long ZO__end - ZO__data # SizeOfInitializedData
.long 0 # SizeOfUninitializedData

- # Filled in by build.c
- .long 0x0000 # AddressOfEntryPoint
+ .long setup_size + ZO_efi_pe_entry # AddressOfEntryPoint

- .long 0x0200 # BaseOfCode
+ .long setup_size # BaseOfCode
#ifdef CONFIG_X86_32
.long 0 # data
#endif

extra_header_fields:
- # PE specification requires ImageBase to be 64k aligned
- .set image_base, (LOAD_PHYSICAL_ADDR + 0xffff) & ~0xffff
#ifdef CONFIG_X86_32
- .long image_base # ImageBase
+ .long 0 # ImageBase
#else
- .quad image_base # ImageBase
+ .quad 0 # ImageBase
#endif
- .long 0x20 # SectionAlignment
- .long 0x20 # FileAlignment
+ .long salign # SectionAlignment
+ .long falign # FileAlignment
.word 0 # MajorOperatingSystemVersion
.word 0 # MinorOperatingSystemVersion
.word LINUX_EFISTUB_MAJOR_VERSION # MajorImageVersion
@@ -156,12 +106,9 @@ extra_header_fields:
.word 0 # MinorSubsystemVersion
.long 0 # Win32VersionValue

- #
- # The size of the bzImage is written in tools/build.c
- #
- .long 0 # SizeOfImage
+ .long setup_size + ZO__end # SizeOfImage

- .long 0x200 # SizeOfHeaders
+ .long salign # SizeOfHeaders
.long 0 # CheckSum
.word IMAGE_SUBSYSTEM_EFI_APPLICATION # Subsystem (EFI application)
#ifdef CONFIG_EFI_DXE_MEM_ATTRIBUTES
@@ -192,87 +139,77 @@ extra_header_fields:

# Section table
section_table:
- #
- # The offset & size fields are filled in by build.c.
- #
.ascii ".setup"
.byte 0
.byte 0
- .long 0
- .long 0x0 # startup_{32,64}
- .long 0 # Size of initialized data
- # on disk
- .long 0x0 # startup_{32,64}
- .long 0 # PointerToRelocations
- .long 0 # PointerToLineNumbers
- .word 0 # NumberOfRelocations
- .word 0 # NumberOfLineNumbers
- .long IMAGE_SCN_CNT_CODE | \
- IMAGE_SCN_MEM_READ | \
- IMAGE_SCN_MEM_EXECUTE | \
- IMAGE_SCN_ALIGN_16BYTES # Characteristics
+ .long pecompat_fstart - salign # VirtualSize
+ .long salign # VirtualAddress
+ .long pecompat_fstart - salign # SizeOfRawData
+ .long salign # PointerToRawData

- #
- # The EFI application loader requires a relocation section
- # because EFI applications must be relocatable. The .reloc
- # offset & size fields are filled in by build.c.
- #
- .ascii ".reloc"
- .byte 0
- .byte 0
- .long 0
- .long 0
- .long 0 # SizeOfRawData
- .long 0 # PointerToRawData
- .long 0 # PointerToRelocations
- .long 0 # PointerToLineNumbers
- .word 0 # NumberOfRelocations
- .word 0 # NumberOfLineNumbers
+ .long 0, 0, 0
.long IMAGE_SCN_CNT_INITIALIZED_DATA | \
IMAGE_SCN_MEM_READ | \
- IMAGE_SCN_MEM_DISCARDABLE | \
- IMAGE_SCN_ALIGN_1BYTES # Characteristics
+ IMAGE_SCN_MEM_DISCARDABLE # Characteristics

#ifdef CONFIG_EFI_MIXED
- #
- # The offset & size fields are filled in by build.c.
- #
.asciz ".compat"
- .long 0
- .long 0x0
- .long 0 # Size of initialized data
- # on disk
- .long 0x0
- .long 0 # PointerToRelocations
- .long 0 # PointerToLineNumbers
- .word 0 # NumberOfRelocations
- .word 0 # NumberOfLineNumbers
+
+ .long pecompat_fsize # VirtualSize
+ .long pecompat_fstart # VirtualAddress
+ .long pecompat_fsize # SizeOfRawData
+ .long pecompat_fstart # PointerToRawData
+
+ .long 0, 0, 0
.long IMAGE_SCN_CNT_INITIALIZED_DATA | \
IMAGE_SCN_MEM_READ | \
- IMAGE_SCN_MEM_DISCARDABLE | \
- IMAGE_SCN_ALIGN_1BYTES # Characteristics
+ IMAGE_SCN_MEM_DISCARDABLE # Characteristics
+
+ /*
+ * Put the IA-32 machine type and the associated entry point address in
+ * the .compat section, so loaders can figure out which other execution
+ * modes this image supports.
+ */
+ .pushsection ".pecompat", "a", @progbits
+ .balign salign
+ .globl pecompat_fstart
+pecompat_fstart:
+ .byte 0x1 # Version
+ .byte 8 # Size
+ .word IMAGE_FILE_MACHINE_I386 # PE machine type
+ .long setup_size + ZO_efi32_pe_entry # Entrypoint
+ .byte 0x0 # Sentinel
+ .popsection
+#else
+ .set pecompat_fstart, setup_size
#endif
-
- #
- # The offset & size fields are filled in by build.c.
- #
.ascii ".text"
.byte 0
.byte 0
.byte 0
- .long 0
- .long 0x0 # startup_{32,64}
- .long 0 # Size of initialized data
+ .long ZO__data
+ .long setup_size
+ .long ZO__data # Size of initialized data
# on disk
- .long 0x0 # startup_{32,64}
+ .long setup_size
.long 0 # PointerToRelocations
.long 0 # PointerToLineNumbers
.word 0 # NumberOfRelocations
.word 0 # NumberOfLineNumbers
.long IMAGE_SCN_CNT_CODE | \
IMAGE_SCN_MEM_READ | \
- IMAGE_SCN_MEM_EXECUTE | \
- IMAGE_SCN_ALIGN_16BYTES # Characteristics
+ IMAGE_SCN_MEM_EXECUTE # Characteristics
+
+ .ascii ".data\0\0\0"
+ .long ZO__end - ZO__data # VirtualSize
+ .long setup_size + ZO__data # VirtualAddress
+ .long ZO__edata - ZO__data # SizeOfRawData
+ .long setup_size + ZO__data # PointerToRawData
+
+ .long 0, 0, 0
+ .long IMAGE_SCN_CNT_INITIALIZED_DATA | \
+ IMAGE_SCN_MEM_READ | \
+ IMAGE_SCN_MEM_WRITE # Characteristics

.set section_count, (. - section_table) / 40
#endif /* CONFIG_EFI_STUB */
@@ -286,12 +223,12 @@ sentinel: .byte 0xff, 0xff /* Used to detect broken loaders */

.globl hdr
hdr:
-setup_sects: .byte 0 /* Filled in by build.c */
+ .byte setup_sects - 1
root_flags: .word ROOT_RDONLY
-syssize: .long 0 /* Filled in by build.c */
+syssize: .long ZO__edata / 16
ram_size: .word 0 /* Obsolete */
vid_mode: .word SVGA_MODE
-root_dev: .word 0 /* Filled in by build.c */
+root_dev: .word 0 /* Default to major/minor 0/0 */
boot_flag: .word 0xAA55

# offset 512, entry point
@@ -579,9 +516,25 @@ pref_address: .quad LOAD_PHYSICAL_ADDR # preferred load addr
# define INIT_SIZE VO_INIT_SIZE
#endif

+ .macro __handover_offset
+#ifndef CONFIG_EFI_HANDOVER_PROTOCOL
+ .long 0
+#elif !defined(CONFIG_X86_64)
+ .long ZO_efi32_stub_entry
+#else
+ /* Yes, this is really how we defined it :( */
+ .long ZO_efi64_stub_entry - 0x200
+#ifdef CONFIG_EFI_MIXED
+ .if ZO_efi32_stub_entry != ZO_efi64_stub_entry - 0x200
+ .error "32-bit and 64-bit EFI entry points do not match"
+ .endif
+#endif
+#endif
+ .endm
+
init_size: .long INIT_SIZE # kernel initialization size
-handover_offset: .long 0 # Filled in by build.c
-kernel_info_offset: .long 0 # Filled in by build.c
+handover_offset: __handover_offset
+kernel_info_offset: .long ZO_kernel_info

# End of setup header #####################################################

diff --git a/arch/x86/boot/setup.ld b/arch/x86/boot/setup.ld
index 49546c247ae2..3a2d1360abb0 100644
--- a/arch/x86/boot/setup.ld
+++ b/arch/x86/boot/setup.ld
@@ -10,10 +10,11 @@ ENTRY(_start)
SECTIONS
{
. = 0;
- .bstext : { *(.bstext) }
- .bsdata : { *(.bsdata) }
+ .bstext : {
+ *(.bstext)
+ . = 495;
+ } =0xffffffff

- . = 495;
.header : { *(.header) }
.entrytext : { *(.entrytext) }
.inittext : { *(.inittext) }
@@ -23,6 +24,9 @@ SECTIONS
.text : { *(.text .text.*) }
.text32 : { *(.text32) }

+ .pecompat : { *(.pecompat) }
+ PROVIDE(pecompat_fsize = setup_size - pecompat_fstart);
+
. = ALIGN(16);
.rodata : { *(.rodata*) }

@@ -38,8 +42,10 @@ SECTIONS
.signature : {
setup_sig = .;
LONG(0x5a5aaa55)
- }

+ setup_size = ALIGN(ABSOLUTE(.), 4096);
+ setup_sects = ABSOLUTE(setup_size / 512);
+ }

. = ALIGN(16);
.bss :
diff --git a/arch/x86/boot/tools/build.c b/arch/x86/boot/tools/build.c
index bd247692b701..10311d77c67f 100644
--- a/arch/x86/boot/tools/build.c
+++ b/arch/x86/boot/tools/build.c
@@ -40,10 +40,6 @@ typedef unsigned char u8;
typedef unsigned short u16;
typedef unsigned int u32;

-#define DEFAULT_MAJOR_ROOT 0
-#define DEFAULT_MINOR_ROOT 0
-#define DEFAULT_ROOT_DEV (DEFAULT_MAJOR_ROOT << 8 | DEFAULT_MINOR_ROOT)
-
/* Minimal number of setup sectors */
#define SETUP_SECT_MIN 5
#define SETUP_SECT_MAX 64
@@ -51,22 +47,7 @@ typedef unsigned int u32;
/* This must be large enough to hold the entire setup */
u8 buf[SETUP_SECT_MAX*512];

-#define PECOFF_RELOC_RESERVE 0x20
-
-#ifdef CONFIG_EFI_MIXED
-#define PECOFF_COMPAT_RESERVE 0x20
-#else
-#define PECOFF_COMPAT_RESERVE 0x0
-#endif
-
-static unsigned long efi32_stub_entry;
-static unsigned long efi64_stub_entry;
-static unsigned long efi_pe_entry;
-static unsigned long efi32_pe_entry;
-static unsigned long kernel_info;
-static unsigned long startup_64;
-static unsigned long _ehead;
-static unsigned long _end;
+static unsigned long _edata;

/*----------------------------------------------------------------------*/

@@ -152,180 +133,6 @@ static void usage(void)
die("Usage: build setup system zoffset.h image");
}

-#ifdef CONFIG_EFI_STUB
-
-static void update_pecoff_section_header_fields(char *section_name, u32 vma, u32 size, u32 datasz, u32 offset)
-{
- unsigned int pe_header;
- unsigned short num_sections;
- u8 *section;
-
- pe_header = get_unaligned_le32(&buf[0x3c]);
- num_sections = get_unaligned_le16(&buf[pe_header + 6]);
-
-#ifdef CONFIG_X86_32
- section = &buf[pe_header + 0xa8];
-#else
- section = &buf[pe_header + 0xb8];
-#endif
-
- while (num_sections > 0) {
- if (strncmp((char*)section, section_name, 8) == 0) {
- /* section header size field */
- put_unaligned_le32(size, section + 0x8);
-
- /* section header vma field */
- put_unaligned_le32(vma, section + 0xc);
-
- /* section header 'size of initialised data' field */
- put_unaligned_le32(datasz, section + 0x10);
-
- /* section header 'file offset' field */
- put_unaligned_le32(offset, section + 0x14);
-
- break;
- }
- section += 0x28;
- num_sections--;
- }
-}
-
-static void update_pecoff_section_header(char *section_name, u32 offset, u32 size)
-{
- update_pecoff_section_header_fields(section_name, offset, size, size, offset);
-}
-
-static void update_pecoff_setup_and_reloc(unsigned int size)
-{
- u32 setup_offset = 0x200;
- u32 reloc_offset = size - PECOFF_RELOC_RESERVE - PECOFF_COMPAT_RESERVE;
-#ifdef CONFIG_EFI_MIXED
- u32 compat_offset = reloc_offset + PECOFF_RELOC_RESERVE;
-#endif
- u32 setup_size = reloc_offset - setup_offset;
-
- update_pecoff_section_header(".setup", setup_offset, setup_size);
- update_pecoff_section_header(".reloc", reloc_offset, PECOFF_RELOC_RESERVE);
-
- /*
- * Modify .reloc section contents with a single entry. The
- * relocation is applied to offset 10 of the relocation section.
- */
- put_unaligned_le32(reloc_offset + 10, &buf[reloc_offset]);
- put_unaligned_le32(10, &buf[reloc_offset + 4]);
-
-#ifdef CONFIG_EFI_MIXED
- update_pecoff_section_header(".compat", compat_offset, PECOFF_COMPAT_RESERVE);
-
- /*
- * Put the IA-32 machine type (0x14c) and the associated entry point
- * address in the .compat section, so loaders can figure out which other
- * execution modes this image supports.
- */
- buf[compat_offset] = 0x1;
- buf[compat_offset + 1] = 0x8;
- put_unaligned_le16(0x14c, &buf[compat_offset + 2]);
- put_unaligned_le32(efi32_pe_entry + size, &buf[compat_offset + 4]);
-#endif
-}
-
-static void update_pecoff_text(unsigned int text_start, unsigned int file_sz,
- unsigned int init_sz)
-{
- unsigned int pe_header;
- unsigned int text_sz = file_sz - text_start;
- unsigned int bss_sz = init_sz - file_sz;
-
- pe_header = get_unaligned_le32(&buf[0x3c]);
-
- /*
- * The PE/COFF loader may load the image at an address which is
- * misaligned with respect to the kernel_alignment field in the setup
- * header.
- *
- * In order to avoid relocating the kernel to correct the misalignment,
- * add slack to allow the buffer to be aligned within the declared size
- * of the image.
- */
- bss_sz += CONFIG_PHYSICAL_ALIGN;
- init_sz += CONFIG_PHYSICAL_ALIGN;
-
- /*
- * Size of code: Subtract the size of the first sector (512 bytes)
- * which includes the header.
- */
- put_unaligned_le32(file_sz - 512 + bss_sz, &buf[pe_header + 0x1c]);
-
- /* Size of image */
- put_unaligned_le32(init_sz, &buf[pe_header + 0x50]);
-
- /*
- * Address of entry point for PE/COFF executable
- */
- put_unaligned_le32(text_start + efi_pe_entry, &buf[pe_header + 0x28]);
-
- update_pecoff_section_header_fields(".text", text_start, text_sz + bss_sz,
- text_sz, text_start);
-}
-
-static int reserve_pecoff_reloc_section(int c)
-{
- /* Reserve 0x20 bytes for .reloc section */
- memset(buf+c, 0, PECOFF_RELOC_RESERVE);
- return PECOFF_RELOC_RESERVE;
-}
-
-static void efi_stub_defaults(void)
-{
- /* Defaults for old kernel */
-#ifdef CONFIG_X86_32
- efi_pe_entry = 0x10;
-#else
- efi_pe_entry = 0x210;
- startup_64 = 0x200;
-#endif
-}
-
-static void efi_stub_entry_update(void)
-{
- unsigned long addr = efi32_stub_entry;
-
-#ifdef CONFIG_EFI_HANDOVER_PROTOCOL
-#ifdef CONFIG_X86_64
- /* Yes, this is really how we defined it :( */
- addr = efi64_stub_entry - 0x200;
-#endif
-
-#ifdef CONFIG_EFI_MIXED
- if (efi32_stub_entry != addr)
- die("32-bit and 64-bit EFI entry points do not match\n");
-#endif
-#endif
- put_unaligned_le32(addr, &buf[0x264]);
-}
-
-#else
-
-static inline void update_pecoff_setup_and_reloc(unsigned int size) {}
-static inline void update_pecoff_text(unsigned int text_start,
- unsigned int file_sz,
- unsigned int init_sz) {}
-static inline void efi_stub_defaults(void) {}
-static inline void efi_stub_entry_update(void) {}
-
-static inline int reserve_pecoff_reloc_section(int c)
-{
- return 0;
-}
-#endif /* CONFIG_EFI_STUB */
-
-static int reserve_pecoff_compat_section(int c)
-{
- /* Reserve 0x20 bytes for .compat section */
- memset(buf+c, 0, PECOFF_COMPAT_RESERVE);
- return PECOFF_COMPAT_RESERVE;
-}
-
/*
* Parse zoffset.h and find the entry points. We could just #include zoffset.h
* but that would mean tools/build would have to be rebuilt every time. It's
@@ -354,14 +161,7 @@ static void parse_zoffset(char *fname)
p = (char *)buf;

while (p && *p) {
- PARSE_ZOFS(p, efi32_stub_entry);
- PARSE_ZOFS(p, efi64_stub_entry);
- PARSE_ZOFS(p, efi_pe_entry);
- PARSE_ZOFS(p, efi32_pe_entry);
- PARSE_ZOFS(p, kernel_info);
- PARSE_ZOFS(p, startup_64);
- PARSE_ZOFS(p, _ehead);
- PARSE_ZOFS(p, _end);
+ PARSE_ZOFS(p, _edata);

p = strchr(p, '\n');
while (p && (*p == '\r' || *p == '\n'))
@@ -371,17 +171,14 @@ static void parse_zoffset(char *fname)

int main(int argc, char ** argv)
{
- unsigned int i, sz, setup_sectors, init_sz;
+ unsigned int i, sz, setup_sectors;
int c;
- u32 sys_size;
struct stat sb;
FILE *file, *dest;
int fd;
void *kernel;
u32 crc = 0xffffffffUL;

- efi_stub_defaults();
-
if (argc != 5)
usage();
parse_zoffset(argv[3]);
@@ -403,72 +200,27 @@ int main(int argc, char ** argv)
die("Boot block hasn't got boot flag (0xAA55)");
fclose(file);

- c += reserve_pecoff_compat_section(c);
- c += reserve_pecoff_reloc_section(c);
-
/* Pad unused space with zeros */
- setup_sectors = (c + 511) / 512;
+ setup_sectors = (c + 4095) / 4096;
+ setup_sectors *= 8;
if (setup_sectors < SETUP_SECT_MIN)
setup_sectors = SETUP_SECT_MIN;
i = setup_sectors*512;
memset(buf+c, 0, i-c);

- update_pecoff_setup_and_reloc(i);
-
- /* Set the default root device */
- put_unaligned_le16(DEFAULT_ROOT_DEV, &buf[508]);
-
/* Open and stat the kernel file */
fd = open(argv[2], O_RDONLY);
if (fd < 0)
die("Unable to open `%s': %m", argv[2]);
if (fstat(fd, &sb))
die("Unable to stat `%s': %m", argv[2]);
- sz = sb.st_size;
+ if (_edata != sb.st_size)
+ die("Unexpected file size `%s': %u != %u", argv[2], _edata,
+ sb.st_size);
+ sz = _edata - 4;
kernel = mmap(NULL, sz, PROT_READ, MAP_SHARED, fd, 0);
if (kernel == MAP_FAILED)
die("Unable to mmap '%s': %m", argv[2]);
- /* Number of 16-byte paragraphs, including space for a 4-byte CRC */
- sys_size = (sz + 15 + 4) / 16;
-#ifdef CONFIG_EFI_STUB
- /*
- * COFF requires minimum 32-byte alignment of sections, and
- * adding a signature is problematic without that alignment.
- */
- sys_size = (sys_size + 1) & ~1;
-#endif
-
- /* Patch the setup code with the appropriate size parameters */
- buf[0x1f1] = setup_sectors-1;
- put_unaligned_le32(sys_size, &buf[0x1f4]);
-
- init_sz = get_unaligned_le32(&buf[0x260]);
-#ifdef CONFIG_EFI_STUB
- /*
- * The decompression buffer will start at ImageBase. When relocating
- * the compressed kernel to its end, we must ensure that the head
- * section does not get overwritten. The head section occupies
- * [i, i + _ehead), and the destination is [init_sz - _end, init_sz).
- *
- * At present these should never overlap, because 'i' is at most 32k
- * because of SETUP_SECT_MAX, '_ehead' is less than 1k, and the
- * calculation of INIT_SIZE in boot/header.S ensures that
- * 'init_sz - _end' is at least 64k.
- *
- * For future-proofing, increase init_sz if necessary.
- */
-
- if (init_sz - _end < i + _ehead) {
- init_sz = (i + _ehead + _end + 4095) & ~4095;
- put_unaligned_le32(init_sz, &buf[0x260]);
- }
-#endif
- update_pecoff_text(setup_sectors * 512, i + (sys_size * 16), init_sz);
-
- efi_stub_entry_update();
-
- /* Update kernel_info offset. */
- put_unaligned_le32(kernel_info, &buf[0x268]);

crc = partial_crc32(buf, i, crc);
if (fwrite(buf, 1, i, dest) != i)
@@ -479,13 +231,6 @@ int main(int argc, char ** argv)
if (fwrite(kernel, 1, sz, dest) != sz)
die("Writing kernel failed");

- /* Add padding leaving 4 bytes for the checksum */
- while (sz++ < (sys_size*16) - 4) {
- crc = partial_crc32_one('\0', crc);
- if (fwrite("\0", 1, 1, dest) != 1)
- die("Writing padding failed");
- }
-
/* Write the CRC */
put_unaligned_le32(crc, buf);
if (fwrite(buf, 1, 4, dest) != 4)
diff --git a/arch/x86/include/asm/barrier.h b/arch/x86/include/asm/barrier.h
index 35389b2af88e..0216f63a366b 100644
--- a/arch/x86/include/asm/barrier.h
+++ b/arch/x86/include/asm/barrier.h
@@ -81,22 +81,4 @@ do { \

#include <asm-generic/barrier.h>

-/*
- * Make previous memory operations globally visible before
- * a WRMSR.
- *
- * MFENCE makes writes visible, but only affects load/store
- * instructions. WRMSR is unfortunately not a load/store
- * instruction and is unaffected by MFENCE. The LFENCE ensures
- * that the WRMSR is not reordered.
- *
- * Most WRMSRs are full serializing instructions themselves and
- * do not require this barrier. This is only required for the
- * IA32_TSC_DEADLINE and X2APIC MSRs.
- */
-static inline void weak_wrmsr_fence(void)
-{
- asm volatile("mfence; lfence" : : : "memory");
-}
-
#endif /* _ASM_X86_BARRIER_H */
diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h
index a26bebbdff87..a1273698fc43 100644
--- a/arch/x86/include/asm/cpufeature.h
+++ b/arch/x86/include/asm/cpufeature.h
@@ -168,7 +168,7 @@ extern void clear_cpu_cap(struct cpuinfo_x86 *c, unsigned int bit);
*/
static __always_inline bool _static_cpu_has(u16 bit)
{
- asm_volatile_goto(
+ asm goto(
ALTERNATIVE_TERNARY("jmp 6f", %P[feature], "", "jmp %l[t_no]")
".pushsection .altinstr_aux,\"ax\"\n"
"6:\n"
diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h
index 58cb9495e40f..0091f1008314 100644
--- a/arch/x86/include/asm/cpufeatures.h
+++ b/arch/x86/include/asm/cpufeatures.h
@@ -308,10 +308,10 @@
#define X86_FEATURE_SMBA (11*32+21) /* "" Slow Memory Bandwidth Allocation */
#define X86_FEATURE_BMEC (11*32+22) /* "" Bandwidth Monitoring Event Configuration */
#define X86_FEATURE_USER_SHSTK (11*32+23) /* Shadow stack support for user mode applications */
-
#define X86_FEATURE_SRSO (11*32+24) /* "" AMD BTB untrain RETs */
#define X86_FEATURE_SRSO_ALIAS (11*32+25) /* "" AMD BTB untrain RETs through aliasing */
#define X86_FEATURE_IBPB_ON_VMEXIT (11*32+26) /* "" Issue an IBPB only on VMEXIT */
+#define X86_FEATURE_APIC_MSRS_FENCE (11*32+27) /* "" IA32_TSC_DEADLINE and X2APIC MSRs need fencing */

/* Intel-defined CPU features, CPUID level 0x00000007:1 (EAX), word 12 */
#define X86_FEATURE_AVX_VNNI (12*32+ 4) /* AVX VNNI instructions */
diff --git a/arch/x86/include/asm/jump_label.h b/arch/x86/include/asm/jump_label.h
index 071572e23d3a..cbbef32517f0 100644
--- a/arch/x86/include/asm/jump_label.h
+++ b/arch/x86/include/asm/jump_label.h
@@ -24,7 +24,7 @@

static __always_inline bool arch_static_branch(struct static_key *key, bool branch)
{
- asm_volatile_goto("1:"
+ asm goto("1:"
"jmp %l[l_yes] # objtool NOPs this \n\t"
JUMP_TABLE_ENTRY
: : "i" (key), "i" (2 | branch) : : l_yes);
@@ -38,7 +38,7 @@ static __always_inline bool arch_static_branch(struct static_key *key, bool bran

static __always_inline bool arch_static_branch(struct static_key * const key, const bool branch)
{
- asm_volatile_goto("1:"
+ asm goto("1:"
".byte " __stringify(BYTES_NOP5) "\n\t"
JUMP_TABLE_ENTRY
: : "i" (key), "i" (branch) : : l_yes);
@@ -52,7 +52,7 @@ static __always_inline bool arch_static_branch(struct static_key * const key, co

static __always_inline bool arch_static_branch_jump(struct static_key * const key, const bool branch)
{
- asm_volatile_goto("1:"
+ asm goto("1:"
"jmp %l[l_yes]\n\t"
JUMP_TABLE_ENTRY
: : "i" (key), "i" (branch) : : l_yes);
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index a3669a7774ed..191f1d8f0506 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -734,4 +734,22 @@ bool arch_is_platform_page(u64 paddr);

extern bool gds_ucode_mitigated(void);

+/*
+ * Make previous memory operations globally visible before
+ * a WRMSR.
+ *
+ * MFENCE makes writes visible, but only affects load/store
+ * instructions. WRMSR is unfortunately not a load/store
+ * instruction and is unaffected by MFENCE. The LFENCE ensures
+ * that the WRMSR is not reordered.
+ *
+ * Most WRMSRs are full serializing instructions themselves and
+ * do not require this barrier. This is only required for the
+ * IA32_TSC_DEADLINE and X2APIC MSRs.
+ */
+static inline void weak_wrmsr_fence(void)
+{
+ alternative("mfence; lfence", "", ALT_NOT(X86_FEATURE_APIC_MSRS_FENCE));
+}
+
#endif /* _ASM_X86_PROCESSOR_H */
diff --git a/arch/x86/include/asm/rmwcc.h b/arch/x86/include/asm/rmwcc.h
index 4b081e0d3306..363266cbcada 100644
--- a/arch/x86/include/asm/rmwcc.h
+++ b/arch/x86/include/asm/rmwcc.h
@@ -13,7 +13,7 @@
#define __GEN_RMWcc(fullop, _var, cc, clobbers, ...) \
({ \
bool c = false; \
- asm_volatile_goto (fullop "; j" #cc " %l[cc_label]" \
+ asm goto (fullop "; j" #cc " %l[cc_label]" \
: : [var] "m" (_var), ## __VA_ARGS__ \
: clobbers : cc_label); \
if (0) { \
diff --git a/arch/x86/include/asm/special_insns.h b/arch/x86/include/asm/special_insns.h
index d6cd9344f6c7..48f8dd47cf68 100644
--- a/arch/x86/include/asm/special_insns.h
+++ b/arch/x86/include/asm/special_insns.h
@@ -205,7 +205,7 @@ static inline void clwb(volatile void *__p)
#ifdef CONFIG_X86_USER_SHADOW_STACK
static inline int write_user_shstk_64(u64 __user *addr, u64 val)
{
- asm_volatile_goto("1: wrussq %[val], (%[addr])\n"
+ asm goto("1: wrussq %[val], (%[addr])\n"
_ASM_EXTABLE(1b, %l[fail])
:: [addr] "r" (addr), [val] "r" (val)
:: fail);
diff --git a/arch/x86/include/asm/uaccess.h b/arch/x86/include/asm/uaccess.h
index 5c367c1290c3..237dc8cdd12b 100644
--- a/arch/x86/include/asm/uaccess.h
+++ b/arch/x86/include/asm/uaccess.h
@@ -133,7 +133,7 @@ extern int __get_user_bad(void);

#ifdef CONFIG_X86_32
#define __put_user_goto_u64(x, addr, label) \
- asm_volatile_goto("\n" \
+ asm goto("\n" \
"1: movl %%eax,0(%1)\n" \
"2: movl %%edx,4(%1)\n" \
_ASM_EXTABLE_UA(1b, %l2) \
@@ -295,7 +295,7 @@ do { \
} while (0)

#define __get_user_asm(x, addr, itype, ltype, label) \
- asm_volatile_goto("\n" \
+ asm_goto_output("\n" \
"1: mov"itype" %[umem],%[output]\n" \
_ASM_EXTABLE_UA(1b, %l2) \
: [output] ltype(x) \
@@ -375,7 +375,7 @@ do { \
__typeof__(_ptr) _old = (__typeof__(_ptr))(_pold); \
__typeof__(*(_ptr)) __old = *_old; \
__typeof__(*(_ptr)) __new = (_new); \
- asm_volatile_goto("\n" \
+ asm_goto_output("\n" \
"1: " LOCK_PREFIX "cmpxchg"itype" %[new], %[ptr]\n"\
_ASM_EXTABLE_UA(1b, %l[label]) \
: CC_OUT(z) (success), \
@@ -394,7 +394,7 @@ do { \
__typeof__(_ptr) _old = (__typeof__(_ptr))(_pold); \
__typeof__(*(_ptr)) __old = *_old; \
__typeof__(*(_ptr)) __new = (_new); \
- asm_volatile_goto("\n" \
+ asm_goto_output("\n" \
"1: " LOCK_PREFIX "cmpxchg8b %[ptr]\n" \
_ASM_EXTABLE_UA(1b, %l[label]) \
: CC_OUT(z) (success), \
@@ -477,7 +477,7 @@ struct __large_struct { unsigned long buf[100]; };
* aliasing issues.
*/
#define __put_user_goto(x, addr, itype, ltype, label) \
- asm_volatile_goto("\n" \
+ asm goto("\n" \
"1: mov"itype" %0,%1\n" \
_ASM_EXTABLE_UA(1b, %l2) \
: : ltype(x), "m" (__m(addr)) \
diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
index 6e4f23f314ac..bb3efc825bf4 100644
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -1157,6 +1157,9 @@ static void init_amd(struct cpuinfo_x86 *c)
if (!cpu_has(c, X86_FEATURE_HYPERVISOR) &&
cpu_has_amd_erratum(c, amd_erratum_1485))
msr_set_bit(MSR_ZEN4_BP_CFG, MSR_ZEN4_BP_CFG_SHARED_BTB_FIX_BIT);
+
+ /* AMD CPUs don't need fencing after x2APIC/TSC_DEADLINE MSR writes. */
+ clear_cpu_cap(c, X86_FEATURE_APIC_MSRS_FENCE);
}

#ifdef CONFIG_X86_32
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 4e5ffc8b0e46..d98d023ae497 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -1858,6 +1858,13 @@ static void identify_cpu(struct cpuinfo_x86 *c)
c->apicid = apic->phys_pkg_id(c->initial_apicid, 0);
#endif

+
+ /*
+ * Set default APIC and TSC_DEADLINE MSR fencing flag. AMD and
+ * Hygon will clear it in ->c_init() below.
+ */
+ set_cpu_cap(c, X86_FEATURE_APIC_MSRS_FENCE);
+
/*
* Vendor-specific initialization. In this section we
* canonicalize the feature flags, meaning if there are
diff --git a/arch/x86/kernel/cpu/hygon.c b/arch/x86/kernel/cpu/hygon.c
index a7b3ef4c4de9..6e738759779e 100644
--- a/arch/x86/kernel/cpu/hygon.c
+++ b/arch/x86/kernel/cpu/hygon.c
@@ -348,6 +348,9 @@ static void init_hygon(struct cpuinfo_x86 *c)
set_cpu_bug(c, X86_BUG_SYSRET_SS_ATTRS);

check_null_seg_clears_base(c);
+
+ /* Hygon CPUs don't need fencing after x2APIC/TSC_DEADLINE MSR writes. */
+ clear_cpu_cap(c, X86_FEATURE_APIC_MSRS_FENCE);
}

static void cpu_detect_tlb_hygon(struct cpuinfo_x86 *c)
diff --git a/arch/x86/kernel/fpu/signal.c b/arch/x86/kernel/fpu/signal.c
index 558076dbde5b..247f2225aa9f 100644
--- a/arch/x86/kernel/fpu/signal.c
+++ b/arch/x86/kernel/fpu/signal.c
@@ -274,12 +274,13 @@ static int __restore_fpregs_from_user(void __user *buf, u64 ufeatures,
* Attempt to restore the FPU registers directly from user memory.
* Pagefaults are handled and any errors returned are fatal.
*/
-static bool restore_fpregs_from_user(void __user *buf, u64 xrestore,
- bool fx_only, unsigned int size)
+static bool restore_fpregs_from_user(void __user *buf, u64 xrestore, bool fx_only)
{
struct fpu *fpu = &current->thread.fpu;
int ret;

+ /* Restore enabled features only. */
+ xrestore &= fpu->fpstate->user_xfeatures;
retry:
fpregs_lock();
/* Ensure that XFD is up to date */
@@ -309,7 +310,7 @@ static bool restore_fpregs_from_user(void __user *buf, u64 xrestore,
if (ret != X86_TRAP_PF)
return false;

- if (!fault_in_readable(buf, size))
+ if (!fault_in_readable(buf, fpu->fpstate->user_size))
goto retry;
return false;
}
@@ -339,7 +340,6 @@ static bool __fpu_restore_sig(void __user *buf, void __user *buf_fx,
struct user_i387_ia32_struct env;
bool success, fx_only = false;
union fpregs_state *fpregs;
- unsigned int state_size;
u64 user_xfeatures = 0;

if (use_xsave()) {
@@ -349,17 +349,14 @@ static bool __fpu_restore_sig(void __user *buf, void __user *buf_fx,
return false;

fx_only = !fx_sw_user.magic1;
- state_size = fx_sw_user.xstate_size;
user_xfeatures = fx_sw_user.xfeatures;
} else {
user_xfeatures = XFEATURE_MASK_FPSSE;
- state_size = fpu->fpstate->user_size;
}

if (likely(!ia32_fxstate)) {
/* Restore the FPU registers directly from user memory. */
- return restore_fpregs_from_user(buf_fx, user_xfeatures, fx_only,
- state_size);
+ return restore_fpregs_from_user(buf_fx, user_xfeatures, fx_only);
}

/*
diff --git a/arch/x86/kvm/svm/svm_ops.h b/arch/x86/kvm/svm/svm_ops.h
index 36c8af87a707..4e725854c63a 100644
--- a/arch/x86/kvm/svm/svm_ops.h
+++ b/arch/x86/kvm/svm/svm_ops.h
@@ -8,7 +8,7 @@

#define svm_asm(insn, clobber...) \
do { \
- asm_volatile_goto("1: " __stringify(insn) "\n\t" \
+ asm goto("1: " __stringify(insn) "\n\t" \
_ASM_EXTABLE(1b, %l[fault]) \
::: clobber : fault); \
return; \
@@ -18,7 +18,7 @@ fault: \

#define svm_asm1(insn, op1, clobber...) \
do { \
- asm_volatile_goto("1: " __stringify(insn) " %0\n\t" \
+ asm goto("1: " __stringify(insn) " %0\n\t" \
_ASM_EXTABLE(1b, %l[fault]) \
:: op1 : clobber : fault); \
return; \
@@ -28,7 +28,7 @@ fault: \

#define svm_asm2(insn, op1, op2, clobber...) \
do { \
- asm_volatile_goto("1: " __stringify(insn) " %1, %0\n\t" \
+ asm goto("1: " __stringify(insn) " %1, %0\n\t" \
_ASM_EXTABLE(1b, %l[fault]) \
:: op1, op2 : clobber : fault); \
return; \
diff --git a/arch/x86/kvm/vmx/pmu_intel.c b/arch/x86/kvm/vmx/pmu_intel.c
index 90c1f7f07e53..1549461fa42b 100644
--- a/arch/x86/kvm/vmx/pmu_intel.c
+++ b/arch/x86/kvm/vmx/pmu_intel.c
@@ -71,7 +71,7 @@ static int fixed_pmc_events[] = {
static void reprogram_fixed_counters(struct kvm_pmu *pmu, u64 data)
{
struct kvm_pmc *pmc;
- u8 old_fixed_ctr_ctrl = pmu->fixed_ctr_ctrl;
+ u64 old_fixed_ctr_ctrl = pmu->fixed_ctr_ctrl;
int i;

pmu->fixed_ctr_ctrl = data;
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index 9bba5352582c..792245d7aa35 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -745,7 +745,7 @@ static int vmx_set_guest_uret_msr(struct vcpu_vmx *vmx,
*/
static int kvm_cpu_vmxoff(void)
{
- asm_volatile_goto("1: vmxoff\n\t"
+ asm goto("1: vmxoff\n\t"
_ASM_EXTABLE(1b, %l[fault])
::: "cc", "memory" : fault);

@@ -2789,7 +2789,7 @@ static int kvm_cpu_vmxon(u64 vmxon_pointer)

cr4_set_bits(X86_CR4_VMXE);

- asm_volatile_goto("1: vmxon %[vmxon_pointer]\n\t"
+ asm goto("1: vmxon %[vmxon_pointer]\n\t"
_ASM_EXTABLE(1b, %l[fault])
: : [vmxon_pointer] "m"(vmxon_pointer)
: : fault);
diff --git a/arch/x86/kvm/vmx/vmx_ops.h b/arch/x86/kvm/vmx/vmx_ops.h
index 33af7b4c6eb4..6a0c6e81f7f3 100644
--- a/arch/x86/kvm/vmx/vmx_ops.h
+++ b/arch/x86/kvm/vmx/vmx_ops.h
@@ -94,7 +94,7 @@ static __always_inline unsigned long __vmcs_readl(unsigned long field)

#ifdef CONFIG_CC_HAS_ASM_GOTO_OUTPUT

- asm_volatile_goto("1: vmread %[field], %[output]\n\t"
+ asm_goto_output("1: vmread %[field], %[output]\n\t"
"jna %l[do_fail]\n\t"

_ASM_EXTABLE(1b, %l[do_exception])
@@ -188,7 +188,7 @@ static __always_inline unsigned long vmcs_readl(unsigned long field)

#define vmx_asm1(insn, op1, error_args...) \
do { \
- asm_volatile_goto("1: " __stringify(insn) " %0\n\t" \
+ asm goto("1: " __stringify(insn) " %0\n\t" \
".byte 0x2e\n\t" /* branch not taken hint */ \
"jna %l[error]\n\t" \
_ASM_EXTABLE(1b, %l[fault]) \
@@ -205,7 +205,7 @@ fault: \

#define vmx_asm2(insn, op1, op2, error_args...) \
do { \
- asm_volatile_goto("1: " __stringify(insn) " %1, %0\n\t" \
+ asm goto("1: " __stringify(insn) " %1, %0\n\t" \
".byte 0x2e\n\t" /* branch not taken hint */ \
"jna %l[error]\n\t" \
_ASM_EXTABLE(1b, %l[fault]) \
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index e179db7c17da..3d8472d00024 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -5300,7 +5300,8 @@ static int kvm_vcpu_ioctl_x86_set_vcpu_events(struct kvm_vcpu *vcpu,
if (events->flags & KVM_VCPUEVENT_VALID_NMI_PENDING) {
vcpu->arch.nmi_pending = 0;
atomic_set(&vcpu->arch.nmi_queued, events->nmi.pending);
- kvm_make_request(KVM_REQ_NMI, vcpu);
+ if (events->nmi.pending)
+ kvm_make_request(KVM_REQ_NMI, vcpu);
}
static_call(kvm_x86_set_nmi_mask)(vcpu, events->nmi.masked);

diff --git a/arch/x86/mm/ident_map.c b/arch/x86/mm/ident_map.c
index 968d7005f4a7..f50cc210a981 100644
--- a/arch/x86/mm/ident_map.c
+++ b/arch/x86/mm/ident_map.c
@@ -26,18 +26,31 @@ static int ident_pud_init(struct x86_mapping_info *info, pud_t *pud_page,
for (; addr < end; addr = next) {
pud_t *pud = pud_page + pud_index(addr);
pmd_t *pmd;
+ bool use_gbpage;

next = (addr & PUD_MASK) + PUD_SIZE;
if (next > end)
next = end;

- if (info->direct_gbpages) {
- pud_t pudval;
+ /* if this is already a gbpage, this portion is already mapped */
+ if (pud_large(*pud))
+ continue;
+
+ /* Is using a gbpage allowed? */
+ use_gbpage = info->direct_gbpages;

- if (pud_present(*pud))
- continue;
+ /* Don't use gbpage if it maps more than the requested region. */
+ /* at the begining: */
+ use_gbpage &= ((addr & ~PUD_MASK) == 0);
+ /* ... or at the end: */
+ use_gbpage &= ((next & ~PUD_MASK) == 0);
+
+ /* Never overwrite existing mappings */
+ use_gbpage &= !pud_present(*pud);
+
+ if (use_gbpage) {
+ pud_t pudval;

- addr &= PUD_MASK;
pudval = __pud((addr - info->offset) | info->page_flag);
set_pud(pud, pudval);
continue;
diff --git a/arch/xtensa/include/asm/jump_label.h b/arch/xtensa/include/asm/jump_label.h
index c812bf85021c..46c8596259d2 100644
--- a/arch/xtensa/include/asm/jump_label.h
+++ b/arch/xtensa/include/asm/jump_label.h
@@ -13,7 +13,7 @@
static __always_inline bool arch_static_branch(struct static_key *key,
bool branch)
{
- asm_volatile_goto("1:\n\t"
+ asm goto("1:\n\t"
"_nop\n\t"
".pushsection __jump_table, \"aw\"\n\t"
".word 1b, %l[l_yes], %c0\n\t"
@@ -38,7 +38,7 @@ static __always_inline bool arch_static_branch_jump(struct static_key *key,
* make it reachable and wrap both into a no-transform block
* to avoid any assembler interference with this.
*/
- asm_volatile_goto("1:\n\t"
+ asm goto("1:\n\t"
".begin no-transform\n\t"
"_j %l[l_yes]\n\t"
"2:\n\t"
diff --git a/block/blk-mq.c b/block/blk-mq.c
index 257b0addd47e..d8b47f534df9 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -767,11 +767,16 @@ static void req_bio_endio(struct request *rq, struct bio *bio,
/*
* Partial zone append completions cannot be supported as the
* BIO fragments may end up not being written sequentially.
+ * For such case, force the completed nbytes to be equal to
+ * the BIO size so that bio_advance() sets the BIO remaining
+ * size to 0 and we end up calling bio_endio() before returning.
*/
- if (bio->bi_iter.bi_size != nbytes)
+ if (bio->bi_iter.bi_size != nbytes) {
bio->bi_status = BLK_STS_IOERR;
- else
+ nbytes = bio->bi_iter.bi_size;
+ } else {
bio->bi_iter.bi_sector = rq->__sector;
+ }
}

bio_advance(bio, nbytes);
diff --git a/block/blk-wbt.c b/block/blk-wbt.c
index 0bb613139bec..f8fda9cf583e 100644
--- a/block/blk-wbt.c
+++ b/block/blk-wbt.c
@@ -165,9 +165,9 @@ static void wb_timestamp(struct rq_wb *rwb, unsigned long *var)
*/
static bool wb_recent_wait(struct rq_wb *rwb)
{
- struct bdi_writeback *wb = &rwb->rqos.disk->bdi->wb;
+ struct backing_dev_info *bdi = rwb->rqos.disk->bdi;

- return time_before(jiffies, wb->dirty_sleep + HZ);
+ return time_before(jiffies, bdi->last_bdp_sleep + HZ);
}

static inline struct rq_wait *get_rq_wait(struct rq_wb *rwb,
diff --git a/crypto/algif_hash.c b/crypto/algif_hash.c
index 82c44d4899b9..e24c829d7a01 100644
--- a/crypto/algif_hash.c
+++ b/crypto/algif_hash.c
@@ -91,13 +91,13 @@ static int hash_sendmsg(struct socket *sock, struct msghdr *msg,
if (!(msg->msg_flags & MSG_MORE)) {
err = hash_alloc_result(sk, ctx);
if (err)
- goto unlock_free;
+ goto unlock_free_result;
ahash_request_set_crypt(&ctx->req, NULL,
ctx->result, 0);
err = crypto_wait_req(crypto_ahash_final(&ctx->req),
&ctx->wait);
if (err)
- goto unlock_free;
+ goto unlock_free_result;
}
goto done_more;
}
@@ -170,6 +170,7 @@ static int hash_sendmsg(struct socket *sock, struct msghdr *msg,

unlock_free:
af_alg_free_sg(&ctx->sgl);
+unlock_free_result:
hash_free_result(sk, ctx);
ctx->more = false;
goto unlock;
diff --git a/drivers/android/binder.c b/drivers/android/binder.c
index 71a40a4c546f..8460458ebe3d 100644
--- a/drivers/android/binder.c
+++ b/drivers/android/binder.c
@@ -478,6 +478,16 @@ binder_enqueue_thread_work_ilocked(struct binder_thread *thread,
{
WARN_ON(!list_empty(&thread->waiting_thread_node));
binder_enqueue_work_ilocked(work, &thread->todo);
+
+ /* (e)poll-based threads require an explicit wakeup signal when
+ * queuing their own work; they rely on these events to consume
+ * messages without I/O block. Without it, threads risk waiting
+ * indefinitely without handling the work.
+ */
+ if (thread->looper & BINDER_LOOPER_STATE_POLL &&
+ thread->pid == current->pid && !thread->process_todo)
+ wake_up_interruptible_sync(&thread->wait);
+
thread->process_todo = true;
}

diff --git a/drivers/base/core.c b/drivers/base/core.c
index 4d8b315c48a1..2cc0ab854168 100644
--- a/drivers/base/core.c
+++ b/drivers/base/core.c
@@ -283,10 +283,12 @@ static bool device_is_ancestor(struct device *dev, struct device *target)
return false;
}

+#define DL_MARKER_FLAGS (DL_FLAG_INFERRED | \
+ DL_FLAG_CYCLE | \
+ DL_FLAG_MANAGED)
static inline bool device_link_flag_is_sync_state_only(u32 flags)
{
- return (flags & ~(DL_FLAG_INFERRED | DL_FLAG_CYCLE)) ==
- (DL_FLAG_SYNC_STATE_ONLY | DL_FLAG_MANAGED);
+ return (flags & ~DL_MARKER_FLAGS) == DL_FLAG_SYNC_STATE_ONLY;
}

/**
@@ -2057,9 +2059,14 @@ static int fw_devlink_create_devlink(struct device *con,

/*
* SYNC_STATE_ONLY device links don't block probing and supports cycles.
- * So cycle detection isn't necessary and shouldn't be done.
+ * So, one might expect that cycle detection isn't necessary for them.
+ * However, if the device link was marked as SYNC_STATE_ONLY because
+ * it's part of a cycle, then we still need to do cycle detection. This
+ * is because the consumer and supplier might be part of multiple cycles
+ * and we need to detect all those cycles.
*/
- if (!(flags & DL_FLAG_SYNC_STATE_ONLY)) {
+ if (!device_link_flag_is_sync_state_only(flags) ||
+ flags & DL_FLAG_CYCLE) {
device_links_write_lock();
if (__fw_devlink_relax_cycles(con, sup_handle)) {
__fwnode_link_cycle(link);
diff --git a/drivers/base/power/domain.c b/drivers/base/power/domain.c
index 5cb2023581d4..84443b6bd882 100644
--- a/drivers/base/power/domain.c
+++ b/drivers/base/power/domain.c
@@ -1102,7 +1102,7 @@ static int __init genpd_power_off_unused(void)

return 0;
}
-late_initcall(genpd_power_off_unused);
+late_initcall_sync(genpd_power_off_unused);

#ifdef CONFIG_PM_SLEEP

diff --git a/drivers/connector/cn_proc.c b/drivers/connector/cn_proc.c
index 3d5e6d705fc6..44b19e696176 100644
--- a/drivers/connector/cn_proc.c
+++ b/drivers/connector/cn_proc.c
@@ -108,9 +108,8 @@ static inline void send_msg(struct cn_msg *msg)
filter_data[1] = 0;
}

- if (cn_netlink_send_mult(msg, msg->len, 0, CN_IDX_PROC, GFP_NOWAIT,
- cn_filter, (void *)filter_data) == -ESRCH)
- atomic_set(&proc_event_num_listeners, 0);
+ cn_netlink_send_mult(msg, msg->len, 0, CN_IDX_PROC, GFP_NOWAIT,
+ cn_filter, (void *)filter_data);

local_unlock(&local_event.lock);
}
diff --git a/drivers/crypto/ccp/sev-dev.c b/drivers/crypto/ccp/sev-dev.c
index f97166fba9d9..17fb01853dbf 100644
--- a/drivers/crypto/ccp/sev-dev.c
+++ b/drivers/crypto/ccp/sev-dev.c
@@ -520,10 +520,16 @@ EXPORT_SYMBOL_GPL(sev_platform_init);

static int __sev_platform_shutdown_locked(int *error)
{
- struct sev_device *sev = psp_master->sev_data;
+ struct psp_device *psp = psp_master;
+ struct sev_device *sev;
int ret;

- if (!sev || sev->state == SEV_STATE_UNINIT)
+ if (!psp || !psp->sev_data)
+ return 0;
+
+ sev = psp->sev_data;
+
+ if (sev->state == SEV_STATE_UNINIT)
return 0;

ret = __sev_do_cmd_locked(SEV_CMD_SHUTDOWN, NULL, error);
diff --git a/drivers/firewire/core-device.c b/drivers/firewire/core-device.c
index 2828e9573e90..da8a4c8f2876 100644
--- a/drivers/firewire/core-device.c
+++ b/drivers/firewire/core-device.c
@@ -100,10 +100,9 @@ static int textual_leaf_to_string(const u32 *block, char *buf, size_t size)
* @buf: where to put the string
* @size: size of @buf, in bytes
*
- * The string is taken from a minimal ASCII text descriptor leaf after
- * the immediate entry with @key. The string is zero-terminated.
- * An overlong string is silently truncated such that it and the
- * zero byte fit into @size.
+ * The string is taken from a minimal ASCII text descriptor leaf just after the entry with the
+ * @key. The string is zero-terminated. An overlong string is silently truncated such that it
+ * and the zero byte fit into @size.
*
* Returns strlen(buf) or a negative error code.
*/
diff --git a/drivers/firmware/efi/libstub/Makefile b/drivers/firmware/efi/libstub/Makefile
index a1157c2a7170..ef4c12f0877b 100644
--- a/drivers/firmware/efi/libstub/Makefile
+++ b/drivers/firmware/efi/libstub/Makefile
@@ -108,13 +108,6 @@ lib-y := $(patsubst %.o,%.stub.o,$(lib-y))
# https://bugs.llvm.org/show_bug.cgi?id=46480
STUBCOPY_FLAGS-y += --remove-section=.note.gnu.property

-#
-# For x86, bootloaders like systemd-boot or grub-efi do not zero-initialize the
-# .bss section, so the .bss section of the EFI stub needs to be included in the
-# .data section of the compressed kernel to ensure initialization. Rename the
-# .bss section here so it's easy to pick out in the linker script.
-#
-STUBCOPY_FLAGS-$(CONFIG_X86) += --rename-section .bss=.bss.efistub,load,alloc
STUBCOPY_RELOC-$(CONFIG_X86_32) := R_386_32
STUBCOPY_RELOC-$(CONFIG_X86_64) := R_X86_64_64

diff --git a/drivers/firmware/efi/libstub/x86-stub.c b/drivers/firmware/efi/libstub/x86-stub.c
index 4a11470bed5e..7bcc5170043f 100644
--- a/drivers/firmware/efi/libstub/x86-stub.c
+++ b/drivers/firmware/efi/libstub/x86-stub.c
@@ -458,9 +458,8 @@ void __noreturn efi_stub_entry(efi_handle_t handle,
efi_status_t __efiapi efi_pe_entry(efi_handle_t handle,
efi_system_table_t *sys_table_arg)
{
- struct boot_params *boot_params;
- struct setup_header *hdr;
- void *image_base;
+ static struct boot_params boot_params __page_aligned_bss;
+ struct setup_header *hdr = &boot_params.hdr;
efi_guid_t proto = LOADED_IMAGE_PROTOCOL_GUID;
int options_size = 0;
efi_status_t status;
@@ -478,30 +477,9 @@ efi_status_t __efiapi efi_pe_entry(efi_handle_t handle,
efi_exit(handle, status);
}

- image_base = efi_table_attr(image, image_base);
-
- status = efi_allocate_pages(sizeof(struct boot_params),
- (unsigned long *)&boot_params, ULONG_MAX);
- if (status != EFI_SUCCESS) {
- efi_err("Failed to allocate lowmem for boot params\n");
- efi_exit(handle, status);
- }
-
- memset(boot_params, 0x0, sizeof(struct boot_params));
-
- hdr = &boot_params->hdr;
-
- /* Copy the setup header from the second sector to boot_params */
- memcpy(&hdr->jump, image_base + 512,
- sizeof(struct setup_header) - offsetof(struct setup_header, jump));
-
- /*
- * Fill out some of the header fields ourselves because the
- * EFI firmware loader doesn't load the first sector.
- */
+ /* Assign the setup_header fields that the kernel actually cares about */
hdr->root_flags = 1;
hdr->vid_mode = 0xffff;
- hdr->boot_flag = 0xAA55;

hdr->type_of_loader = 0x21;

@@ -510,25 +488,13 @@ efi_status_t __efiapi efi_pe_entry(efi_handle_t handle,
if (!cmdline_ptr)
goto fail;

- efi_set_u64_split((unsigned long)cmdline_ptr,
- &hdr->cmd_line_ptr, &boot_params->ext_cmd_line_ptr);
-
- hdr->ramdisk_image = 0;
- hdr->ramdisk_size = 0;
-
- /*
- * Disregard any setup data that was provided by the bootloader:
- * setup_data could be pointing anywhere, and we have no way of
- * authenticating or validating the payload.
- */
- hdr->setup_data = 0;
+ efi_set_u64_split((unsigned long)cmdline_ptr, &hdr->cmd_line_ptr,
+ &boot_params.ext_cmd_line_ptr);

- efi_stub_entry(handle, sys_table_arg, boot_params);
+ efi_stub_entry(handle, sys_table_arg, &boot_params);
/* not reached */

fail:
- efi_free(sizeof(struct boot_params), (unsigned long)boot_params);
-
efi_exit(handle, status);
}

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index 7791367e7c02..79261bec2654 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -4133,7 +4133,6 @@ int amdgpu_device_suspend(struct drm_device *dev, bool fbcon)
drm_fb_helper_set_suspend_unlocked(adev_to_drm(adev)->fb_helper, true);

cancel_delayed_work_sync(&adev->delayed_init_work);
- flush_delayed_work(&adev->gfx.gfx_off_delay_work);

amdgpu_ras_suspend(adev);

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
index ef4cb921781d..053983e9f4ae 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
@@ -702,8 +702,15 @@ void amdgpu_gfx_off_ctrl(struct amdgpu_device *adev, bool enable)

if (adev->gfx.gfx_off_req_count == 0 &&
!adev->gfx.gfx_off_state) {
- schedule_delayed_work(&adev->gfx.gfx_off_delay_work,
+ /* If going to s2idle, no need to wait */
+ if (adev->in_s0ix) {
+ if (!amdgpu_dpm_set_powergating_by_smu(adev,
+ AMD_IP_BLOCK_TYPE_GFX, true))
+ adev->gfx.gfx_off_state = true;
+ } else {
+ schedule_delayed_work(&adev->gfx.gfx_off_delay_work,
delay);
+ }
}
} else {
if (adev->gfx.gfx_off_req_count == 0) {
diff --git a/drivers/gpu/drm/amd/amdgpu/cik_ih.c b/drivers/gpu/drm/amd/amdgpu/cik_ih.c
index 6f7c031dd197..f24e34dc33d1 100644
--- a/drivers/gpu/drm/amd/amdgpu/cik_ih.c
+++ b/drivers/gpu/drm/amd/amdgpu/cik_ih.c
@@ -204,6 +204,12 @@ static u32 cik_ih_get_wptr(struct amdgpu_device *adev,
tmp = RREG32(mmIH_RB_CNTL);
tmp |= IH_RB_CNTL__WPTR_OVERFLOW_CLEAR_MASK;
WREG32(mmIH_RB_CNTL, tmp);
+
+ /* Unset the CLEAR_OVERFLOW bit immediately so new overflows
+ * can be detected.
+ */
+ tmp &= ~IH_RB_CNTL__WPTR_OVERFLOW_CLEAR_MASK;
+ WREG32(mmIH_RB_CNTL, tmp);
}
return (wptr & ih->ptr_mask);
}
diff --git a/drivers/gpu/drm/amd/amdgpu/cz_ih.c b/drivers/gpu/drm/amd/amdgpu/cz_ih.c
index b8c47e0cf37a..c19681492efa 100644
--- a/drivers/gpu/drm/amd/amdgpu/cz_ih.c
+++ b/drivers/gpu/drm/amd/amdgpu/cz_ih.c
@@ -216,6 +216,11 @@ static u32 cz_ih_get_wptr(struct amdgpu_device *adev,
tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, WPTR_OVERFLOW_CLEAR, 1);
WREG32(mmIH_RB_CNTL, tmp);

+ /* Unset the CLEAR_OVERFLOW bit immediately so new overflows
+ * can be detected.
+ */
+ tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, WPTR_OVERFLOW_CLEAR, 0);
+ WREG32(mmIH_RB_CNTL, tmp);

out:
return (wptr & ih->ptr_mask);
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
index c2b9dfc6451d..495eb4cad0e1 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
@@ -4020,8 +4020,6 @@ static int gfx_v10_0_init_microcode(struct amdgpu_device *adev)
err = 0;
adev->gfx.mec2_fw = NULL;
}
- amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_MEC2);
- amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_MEC2_JT);

gfx_v10_0_check_fw_write_wait(adev);
out:
diff --git a/drivers/gpu/drm/amd/amdgpu/iceland_ih.c b/drivers/gpu/drm/amd/amdgpu/iceland_ih.c
index aecad530b10a..2c02ae69883d 100644
--- a/drivers/gpu/drm/amd/amdgpu/iceland_ih.c
+++ b/drivers/gpu/drm/amd/amdgpu/iceland_ih.c
@@ -215,6 +215,11 @@ static u32 iceland_ih_get_wptr(struct amdgpu_device *adev,
tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, WPTR_OVERFLOW_CLEAR, 1);
WREG32(mmIH_RB_CNTL, tmp);

+ /* Unset the CLEAR_OVERFLOW bit immediately so new overflows
+ * can be detected.
+ */
+ tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, WPTR_OVERFLOW_CLEAR, 0);
+ WREG32(mmIH_RB_CNTL, tmp);

out:
return (wptr & ih->ptr_mask);
diff --git a/drivers/gpu/drm/amd/amdgpu/ih_v6_0.c b/drivers/gpu/drm/amd/amdgpu/ih_v6_0.c
index ec0c8f8b465a..f432dc72df6a 100644
--- a/drivers/gpu/drm/amd/amdgpu/ih_v6_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/ih_v6_0.c
@@ -418,6 +418,12 @@ static u32 ih_v6_0_get_wptr(struct amdgpu_device *adev,
tmp = RREG32_NO_KIQ(ih_regs->ih_rb_cntl);
tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, WPTR_OVERFLOW_CLEAR, 1);
WREG32_NO_KIQ(ih_regs->ih_rb_cntl, tmp);
+
+ /* Unset the CLEAR_OVERFLOW bit immediately so new overflows
+ * can be detected.
+ */
+ tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, WPTR_OVERFLOW_CLEAR, 0);
+ WREG32_NO_KIQ(ih_regs->ih_rb_cntl, tmp);
out:
return (wptr & ih->ptr_mask);
}
diff --git a/drivers/gpu/drm/amd/amdgpu/ih_v6_1.c b/drivers/gpu/drm/amd/amdgpu/ih_v6_1.c
index 8fb05eae340a..b8da0fc29378 100644
--- a/drivers/gpu/drm/amd/amdgpu/ih_v6_1.c
+++ b/drivers/gpu/drm/amd/amdgpu/ih_v6_1.c
@@ -418,6 +418,13 @@ static u32 ih_v6_1_get_wptr(struct amdgpu_device *adev,
tmp = RREG32_NO_KIQ(ih_regs->ih_rb_cntl);
tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, WPTR_OVERFLOW_CLEAR, 1);
WREG32_NO_KIQ(ih_regs->ih_rb_cntl, tmp);
+
+ /* Unset the CLEAR_OVERFLOW bit immediately so new overflows
+ * can be detected.
+ */
+ tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, WPTR_OVERFLOW_CLEAR, 0);
+ WREG32_NO_KIQ(ih_regs->ih_rb_cntl, tmp);
+
out:
return (wptr & ih->ptr_mask);
}
diff --git a/drivers/gpu/drm/amd/amdgpu/navi10_ih.c b/drivers/gpu/drm/amd/amdgpu/navi10_ih.c
index b6a8478dabf4..737eff53f54f 100644
--- a/drivers/gpu/drm/amd/amdgpu/navi10_ih.c
+++ b/drivers/gpu/drm/amd/amdgpu/navi10_ih.c
@@ -442,6 +442,12 @@ static u32 navi10_ih_get_wptr(struct amdgpu_device *adev,
tmp = RREG32_NO_KIQ(ih_regs->ih_rb_cntl);
tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, WPTR_OVERFLOW_CLEAR, 1);
WREG32_NO_KIQ(ih_regs->ih_rb_cntl, tmp);
+
+ /* Unset the CLEAR_OVERFLOW bit immediately so new overflows
+ * can be detected.
+ */
+ tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, WPTR_OVERFLOW_CLEAR, 0);
+ WREG32_NO_KIQ(ih_regs->ih_rb_cntl, tmp);
out:
return (wptr & ih->ptr_mask);
}
diff --git a/drivers/gpu/drm/amd/amdgpu/si_ih.c b/drivers/gpu/drm/amd/amdgpu/si_ih.c
index 9a24f17a5750..cada9f300a7f 100644
--- a/drivers/gpu/drm/amd/amdgpu/si_ih.c
+++ b/drivers/gpu/drm/amd/amdgpu/si_ih.c
@@ -119,6 +119,12 @@ static u32 si_ih_get_wptr(struct amdgpu_device *adev,
tmp = RREG32(IH_RB_CNTL);
tmp |= IH_RB_CNTL__WPTR_OVERFLOW_CLEAR_MASK;
WREG32(IH_RB_CNTL, tmp);
+
+ /* Unset the CLEAR_OVERFLOW bit immediately so new overflows
+ * can be detected.
+ */
+ tmp &= ~IH_RB_CNTL__WPTR_OVERFLOW_CLEAR_MASK;
+ WREG32(IH_RB_CNTL, tmp);
}
return (wptr & ih->ptr_mask);
}
diff --git a/drivers/gpu/drm/amd/amdgpu/soc21.c b/drivers/gpu/drm/amd/amdgpu/soc21.c
index 8b2ff2b281b0..5a77ab587b59 100644
--- a/drivers/gpu/drm/amd/amdgpu/soc21.c
+++ b/drivers/gpu/drm/amd/amdgpu/soc21.c
@@ -50,13 +50,13 @@ static const struct amd_ip_funcs soc21_common_ip_funcs;
/* SOC21 */
static const struct amdgpu_video_codec_info vcn_4_0_0_video_codecs_encode_array_vcn0[] = {
{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 2304, 0)},
- {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC, 4096, 2304, 0)},
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC, 8192, 4352, 0)},
{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_AV1, 8192, 4352, 0)},
};

static const struct amdgpu_video_codec_info vcn_4_0_0_video_codecs_encode_array_vcn1[] = {
{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 2304, 0)},
- {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC, 4096, 2304, 0)},
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC, 8192, 4352, 0)},
};

static const struct amdgpu_video_codecs vcn_4_0_0_video_codecs_encode_vcn0 = {
diff --git a/drivers/gpu/drm/amd/amdgpu/tonga_ih.c b/drivers/gpu/drm/amd/amdgpu/tonga_ih.c
index 917707bba7f3..450b6e831509 100644
--- a/drivers/gpu/drm/amd/amdgpu/tonga_ih.c
+++ b/drivers/gpu/drm/amd/amdgpu/tonga_ih.c
@@ -219,6 +219,12 @@ static u32 tonga_ih_get_wptr(struct amdgpu_device *adev,
tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, WPTR_OVERFLOW_CLEAR, 1);
WREG32(mmIH_RB_CNTL, tmp);

+ /* Unset the CLEAR_OVERFLOW bit immediately so new overflows
+ * can be detected.
+ */
+ tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, WPTR_OVERFLOW_CLEAR, 0);
+ WREG32(mmIH_RB_CNTL, tmp);
+
out:
return (wptr & ih->ptr_mask);
}
diff --git a/drivers/gpu/drm/amd/amdgpu/vega10_ih.c b/drivers/gpu/drm/amd/amdgpu/vega10_ih.c
index d364c6dd152c..bf68e18e3824 100644
--- a/drivers/gpu/drm/amd/amdgpu/vega10_ih.c
+++ b/drivers/gpu/drm/amd/amdgpu/vega10_ih.c
@@ -373,6 +373,12 @@ static u32 vega10_ih_get_wptr(struct amdgpu_device *adev,
tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, WPTR_OVERFLOW_CLEAR, 1);
WREG32_NO_KIQ(ih_regs->ih_rb_cntl, tmp);

+ /* Unset the CLEAR_OVERFLOW bit immediately so new overflows
+ * can be detected.
+ */
+ tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, WPTR_OVERFLOW_CLEAR, 0);
+ WREG32_NO_KIQ(ih_regs->ih_rb_cntl, tmp);
+
out:
return (wptr & ih->ptr_mask);
}
diff --git a/drivers/gpu/drm/amd/amdgpu/vega20_ih.c b/drivers/gpu/drm/amd/amdgpu/vega20_ih.c
index dbc99536440f..131e7b769519 100644
--- a/drivers/gpu/drm/amd/amdgpu/vega20_ih.c
+++ b/drivers/gpu/drm/amd/amdgpu/vega20_ih.c
@@ -421,6 +421,12 @@ static u32 vega20_ih_get_wptr(struct amdgpu_device *adev,
tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, WPTR_OVERFLOW_CLEAR, 1);
WREG32_NO_KIQ(ih_regs->ih_rb_cntl, tmp);

+ /* Unset the CLEAR_OVERFLOW bit immediately so new overflows
+ * can be detected.
+ */
+ tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, WPTR_OVERFLOW_CLEAR, 0);
+ WREG32_NO_KIQ(ih_regs->ih_rb_cntl, tmp);
+
out:
return (wptr & ih->ptr_mask);
}
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
index 56a61ac2b3f5..83c263e2d717 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
@@ -6072,7 +6072,9 @@ create_stream_for_sink(struct amdgpu_dm_connector *aconnector,
if (recalculate_timing) {
freesync_mode = get_highest_refresh_rate_mode(aconnector, false);
drm_mode_copy(&saved_mode, &mode);
+ saved_mode.picture_aspect_ratio = mode.picture_aspect_ratio;
drm_mode_copy(&mode, freesync_mode);
+ mode.picture_aspect_ratio = saved_mode.picture_aspect_ratio;
} else {
decide_crtc_timing_for_drm_display_mode(
&mode, preferred_mode, scale);
@@ -10358,11 +10360,13 @@ static int amdgpu_dm_atomic_check(struct drm_device *dev,
goto fail;
}

- ret = compute_mst_dsc_configs_for_state(state, dm_state->context, vars);
- if (ret) {
- DRM_DEBUG_DRIVER("compute_mst_dsc_configs_for_state() failed\n");
- ret = -EINVAL;
- goto fail;
+ if (dc_resource_is_dsc_encoding_supported(dc)) {
+ ret = compute_mst_dsc_configs_for_state(state, dm_state->context, vars);
+ if (ret) {
+ DRM_DEBUG_DRIVER("compute_mst_dsc_configs_for_state() failed\n");
+ ret = -EINVAL;
+ goto fail;
+ }
}

ret = dm_update_mst_vcpi_slots_for_dsc(state, dm_state->context, vars);
diff --git a/drivers/gpu/drm/amd/display/dc/dml/Makefile b/drivers/gpu/drm/amd/display/dc/dml/Makefile
index c206812dc689..0ba9a7997d56 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/Makefile
+++ b/drivers/gpu/drm/amd/display/dc/dml/Makefile
@@ -72,11 +72,11 @@ CFLAGS_$(AMDDALPATH)/dc/dml/display_mode_lib.o := $(dml_ccflags)
CFLAGS_$(AMDDALPATH)/dc/dml/display_mode_vba.o := $(dml_ccflags)
CFLAGS_$(AMDDALPATH)/dc/dml/dcn10/dcn10_fpu.o := $(dml_ccflags)
CFLAGS_$(AMDDALPATH)/dc/dml/dcn20/dcn20_fpu.o := $(dml_ccflags)
-CFLAGS_$(AMDDALPATH)/dc/dml/dcn20/display_mode_vba_20.o := $(dml_ccflags)
+CFLAGS_$(AMDDALPATH)/dc/dml/dcn20/display_mode_vba_20.o := $(dml_ccflags) $(frame_warn_flag)
CFLAGS_$(AMDDALPATH)/dc/dml/dcn20/display_rq_dlg_calc_20.o := $(dml_ccflags)
-CFLAGS_$(AMDDALPATH)/dc/dml/dcn20/display_mode_vba_20v2.o := $(dml_ccflags)
+CFLAGS_$(AMDDALPATH)/dc/dml/dcn20/display_mode_vba_20v2.o := $(dml_ccflags) $(frame_warn_flag)
CFLAGS_$(AMDDALPATH)/dc/dml/dcn20/display_rq_dlg_calc_20v2.o := $(dml_ccflags)
-CFLAGS_$(AMDDALPATH)/dc/dml/dcn21/display_mode_vba_21.o := $(dml_ccflags)
+CFLAGS_$(AMDDALPATH)/dc/dml/dcn21/display_mode_vba_21.o := $(dml_ccflags) $(frame_warn_flag)
CFLAGS_$(AMDDALPATH)/dc/dml/dcn21/display_rq_dlg_calc_21.o := $(dml_ccflags)
CFLAGS_$(AMDDALPATH)/dc/dml/dcn30/display_mode_vba_30.o := $(dml_ccflags) $(frame_warn_flag)
CFLAGS_$(AMDDALPATH)/dc/dml/dcn30/display_rq_dlg_calc_30.o := $(dml_ccflags)
diff --git a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_training.c b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_training.c
index 5a0b04518956..16a62e018712 100644
--- a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_training.c
+++ b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_training.c
@@ -517,6 +517,7 @@ enum link_training_result dp_check_link_loss_status(
{
enum link_training_result status = LINK_TRAINING_SUCCESS;
union lane_status lane_status;
+ union lane_align_status_updated dpcd_lane_status_updated;
uint8_t dpcd_buf[6] = {0};
uint32_t lane;

@@ -532,10 +533,12 @@ enum link_training_result dp_check_link_loss_status(
* check lanes status
*/
lane_status.raw = dp_get_nibble_at_index(&dpcd_buf[2], lane);
+ dpcd_lane_status_updated.raw = dpcd_buf[4];

if (!lane_status.bits.CHANNEL_EQ_DONE_0 ||
!lane_status.bits.CR_DONE_0 ||
- !lane_status.bits.SYMBOL_LOCKED_0) {
+ !lane_status.bits.SYMBOL_LOCKED_0 ||
+ !dp_is_interlane_aligned(dpcd_lane_status_updated)) {
/* if one of the channel equalization, clock
* recovery or symbol lock is dropped
* consider it as (link has been
diff --git a/drivers/gpu/drm/drm_prime.c b/drivers/gpu/drm/drm_prime.c
index 834a5e28abbe..7352bde299d5 100644
--- a/drivers/gpu/drm/drm_prime.c
+++ b/drivers/gpu/drm/drm_prime.c
@@ -820,7 +820,7 @@ struct sg_table *drm_prime_pages_to_sg(struct drm_device *dev,
if (max_segment == 0)
max_segment = UINT_MAX;
err = sg_alloc_table_from_pages_segment(sg, pages, nr_pages, 0,
- nr_pages << PAGE_SHIFT,
+ (unsigned long)nr_pages << PAGE_SHIFT,
max_segment, GFP_KERNEL);
if (err) {
kfree(sg);
diff --git a/drivers/gpu/drm/msm/msm_gem_prime.c b/drivers/gpu/drm/msm/msm_gem_prime.c
index 5f68e31a3e4e..0915f3b68752 100644
--- a/drivers/gpu/drm/msm/msm_gem_prime.c
+++ b/drivers/gpu/drm/msm/msm_gem_prime.c
@@ -26,7 +26,7 @@ int msm_gem_prime_vmap(struct drm_gem_object *obj, struct iosys_map *map)
{
void *vaddr;

- vaddr = msm_gem_get_vaddr(obj);
+ vaddr = msm_gem_get_vaddr_locked(obj);
if (IS_ERR(vaddr))
return PTR_ERR(vaddr);
iosys_map_set_vaddr(map, vaddr);
@@ -36,7 +36,7 @@ int msm_gem_prime_vmap(struct drm_gem_object *obj, struct iosys_map *map)

void msm_gem_prime_vunmap(struct drm_gem_object *obj, struct iosys_map *map)
{
- msm_gem_put_vaddr(obj);
+ msm_gem_put_vaddr_locked(obj);
}

struct drm_gem_object *msm_gem_prime_import_sg_table(struct drm_device *dev,
diff --git a/drivers/gpu/drm/msm/msm_gpu.c b/drivers/gpu/drm/msm/msm_gpu.c
index 7f64c6667300..5c10b559a595 100644
--- a/drivers/gpu/drm/msm/msm_gpu.c
+++ b/drivers/gpu/drm/msm/msm_gpu.c
@@ -749,12 +749,14 @@ void msm_gpu_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit)
struct msm_ringbuffer *ring = submit->ring;
unsigned long flags;

- pm_runtime_get_sync(&gpu->pdev->dev);
+ WARN_ON(!mutex_is_locked(&gpu->lock));

- mutex_lock(&gpu->lock);
+ pm_runtime_get_sync(&gpu->pdev->dev);

msm_gpu_hw_init(gpu);

+ submit->seqno = submit->hw_fence->seqno;
+
update_sw_cntrs(gpu);

/*
@@ -779,11 +781,8 @@ void msm_gpu_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit)
gpu->funcs->submit(gpu, submit);
gpu->cur_ctx_seqno = submit->queue->ctx->seqno;

- hangcheck_timer_reset(gpu);
-
- mutex_unlock(&gpu->lock);
-
pm_runtime_put(&gpu->pdev->dev);
+ hangcheck_timer_reset(gpu);
}

/*
diff --git a/drivers/gpu/drm/msm/msm_iommu.c b/drivers/gpu/drm/msm/msm_iommu.c
index 5cc8d358cc97..d5512037c38b 100644
--- a/drivers/gpu/drm/msm/msm_iommu.c
+++ b/drivers/gpu/drm/msm/msm_iommu.c
@@ -21,6 +21,8 @@ struct msm_iommu_pagetable {
struct msm_mmu base;
struct msm_mmu *parent;
struct io_pgtable_ops *pgtbl_ops;
+ const struct iommu_flush_ops *tlb;
+ struct device *iommu_dev;
unsigned long pgsize_bitmap; /* Bitmap of page sizes in use */
phys_addr_t ttbr;
u32 asid;
@@ -201,11 +203,33 @@ static const struct msm_mmu_funcs pagetable_funcs = {

static void msm_iommu_tlb_flush_all(void *cookie)
{
+ struct msm_iommu_pagetable *pagetable = cookie;
+ struct adreno_smmu_priv *adreno_smmu;
+
+ if (!pm_runtime_get_if_in_use(pagetable->iommu_dev))
+ return;
+
+ adreno_smmu = dev_get_drvdata(pagetable->parent->dev);
+
+ pagetable->tlb->tlb_flush_all((void *)adreno_smmu->cookie);
+
+ pm_runtime_put_autosuspend(pagetable->iommu_dev);
}

static void msm_iommu_tlb_flush_walk(unsigned long iova, size_t size,
size_t granule, void *cookie)
{
+ struct msm_iommu_pagetable *pagetable = cookie;
+ struct adreno_smmu_priv *adreno_smmu;
+
+ if (!pm_runtime_get_if_in_use(pagetable->iommu_dev))
+ return;
+
+ adreno_smmu = dev_get_drvdata(pagetable->parent->dev);
+
+ pagetable->tlb->tlb_flush_walk(iova, size, granule, (void *)adreno_smmu->cookie);
+
+ pm_runtime_put_autosuspend(pagetable->iommu_dev);
}

static void msm_iommu_tlb_add_page(struct iommu_iotlb_gather *gather,
@@ -213,7 +237,7 @@ static void msm_iommu_tlb_add_page(struct iommu_iotlb_gather *gather,
{
}

-static const struct iommu_flush_ops null_tlb_ops = {
+static const struct iommu_flush_ops tlb_ops = {
.tlb_flush_all = msm_iommu_tlb_flush_all,
.tlb_flush_walk = msm_iommu_tlb_flush_walk,
.tlb_add_page = msm_iommu_tlb_add_page,
@@ -254,10 +278,10 @@ struct msm_mmu *msm_iommu_pagetable_create(struct msm_mmu *parent)

/* The incoming cfg will have the TTBR1 quirk enabled */
ttbr0_cfg.quirks &= ~IO_PGTABLE_QUIRK_ARM_TTBR1;
- ttbr0_cfg.tlb = &null_tlb_ops;
+ ttbr0_cfg.tlb = &tlb_ops;

pagetable->pgtbl_ops = alloc_io_pgtable_ops(ARM_64_LPAE_S1,
- &ttbr0_cfg, iommu->domain);
+ &ttbr0_cfg, pagetable);

if (!pagetable->pgtbl_ops) {
kfree(pagetable);
@@ -279,6 +303,8 @@ struct msm_mmu *msm_iommu_pagetable_create(struct msm_mmu *parent)

/* Needed later for TLB flush */
pagetable->parent = parent;
+ pagetable->tlb = ttbr1_cfg->tlb;
+ pagetable->iommu_dev = ttbr1_cfg->iommu_dev;
pagetable->pgsize_bitmap = ttbr0_cfg.pgsize_bitmap;
pagetable->ttbr = ttbr0_cfg.arm_lpae_s1_cfg.ttbr;

diff --git a/drivers/gpu/drm/msm/msm_ringbuffer.c b/drivers/gpu/drm/msm/msm_ringbuffer.c
index 40c0bc35a44c..7f5e0a961bba 100644
--- a/drivers/gpu/drm/msm/msm_ringbuffer.c
+++ b/drivers/gpu/drm/msm/msm_ringbuffer.c
@@ -21,8 +21,6 @@ static struct dma_fence *msm_job_run(struct drm_sched_job *job)

msm_fence_init(submit->hw_fence, fctx);

- submit->seqno = submit->hw_fence->seqno;
-
mutex_lock(&priv->lru.lock);

for (i = 0; i < submit->nr_bos; i++) {
@@ -34,8 +32,13 @@ static struct dma_fence *msm_job_run(struct drm_sched_job *job)

mutex_unlock(&priv->lru.lock);

+ /* TODO move submit path over to using a per-ring lock.. */
+ mutex_lock(&gpu->lock);
+
msm_gpu_submit(gpu, submit);

+ mutex_unlock(&gpu->lock);
+
return dma_fence_get(submit->hw_fence);
}

diff --git a/drivers/gpu/drm/nouveau/nouveau_fence.c b/drivers/gpu/drm/nouveau/nouveau_fence.c
index ca762ea55413..93f08f9479d8 100644
--- a/drivers/gpu/drm/nouveau/nouveau_fence.c
+++ b/drivers/gpu/drm/nouveau/nouveau_fence.c
@@ -103,6 +103,7 @@ nouveau_fence_context_kill(struct nouveau_fence_chan *fctx, int error)
void
nouveau_fence_context_del(struct nouveau_fence_chan *fctx)
{
+ cancel_work_sync(&fctx->uevent_work);
nouveau_fence_context_kill(fctx, 0);
nvif_event_dtor(&fctx->event);
fctx->dead = 1;
@@ -145,12 +146,13 @@ nouveau_fence_update(struct nouveau_channel *chan, struct nouveau_fence_chan *fc
return drop;
}

-static int
-nouveau_fence_wait_uevent_handler(struct nvif_event *event, void *repv, u32 repc)
+static void
+nouveau_fence_uevent_work(struct work_struct *work)
{
- struct nouveau_fence_chan *fctx = container_of(event, typeof(*fctx), event);
+ struct nouveau_fence_chan *fctx = container_of(work, struct nouveau_fence_chan,
+ uevent_work);
unsigned long flags;
- int ret = NVIF_EVENT_KEEP;
+ int drop = 0;

spin_lock_irqsave(&fctx->lock, flags);
if (!list_empty(&fctx->pending)) {
@@ -160,11 +162,20 @@ nouveau_fence_wait_uevent_handler(struct nvif_event *event, void *repv, u32 repc
fence = list_entry(fctx->pending.next, typeof(*fence), head);
chan = rcu_dereference_protected(fence->channel, lockdep_is_held(&fctx->lock));
if (nouveau_fence_update(chan, fctx))
- ret = NVIF_EVENT_DROP;
+ drop = 1;
}
+ if (drop)
+ nvif_event_block(&fctx->event);
+
spin_unlock_irqrestore(&fctx->lock, flags);
+}

- return ret;
+static int
+nouveau_fence_wait_uevent_handler(struct nvif_event *event, void *repv, u32 repc)
+{
+ struct nouveau_fence_chan *fctx = container_of(event, typeof(*fctx), event);
+ schedule_work(&fctx->uevent_work);
+ return NVIF_EVENT_KEEP;
}

void
@@ -178,6 +189,7 @@ nouveau_fence_context_new(struct nouveau_channel *chan, struct nouveau_fence_cha
} args;
int ret;

+ INIT_WORK(&fctx->uevent_work, nouveau_fence_uevent_work);
INIT_LIST_HEAD(&fctx->flip);
INIT_LIST_HEAD(&fctx->pending);
spin_lock_init(&fctx->lock);
diff --git a/drivers/gpu/drm/nouveau/nouveau_fence.h b/drivers/gpu/drm/nouveau/nouveau_fence.h
index 64d33ae7f356..8bc065acfe35 100644
--- a/drivers/gpu/drm/nouveau/nouveau_fence.h
+++ b/drivers/gpu/drm/nouveau/nouveau_fence.h
@@ -44,6 +44,7 @@ struct nouveau_fence_chan {
u32 context;
char name[32];

+ struct work_struct uevent_work;
struct nvif_event event;
int notify_ref, dead, killed;
};
diff --git a/drivers/gpu/drm/nouveau/nouveau_svm.c b/drivers/gpu/drm/nouveau/nouveau_svm.c
index 186351ecf72f..ec9f307370fa 100644
--- a/drivers/gpu/drm/nouveau/nouveau_svm.c
+++ b/drivers/gpu/drm/nouveau/nouveau_svm.c
@@ -1011,7 +1011,7 @@ nouveau_svm_fault_buffer_ctor(struct nouveau_svm *svm, s32 oclass, int id)
if (ret)
return ret;

- buffer->fault = kvcalloc(sizeof(*buffer->fault), buffer->entries, GFP_KERNEL);
+ buffer->fault = kvcalloc(buffer->entries, sizeof(*buffer->fault), GFP_KERNEL);
if (!buffer->fault)
return -ENOMEM;

diff --git a/drivers/gpu/drm/virtio/virtgpu_drv.c b/drivers/gpu/drm/virtio/virtgpu_drv.c
index 148f09aaf99a..c5716fd0aed3 100644
--- a/drivers/gpu/drm/virtio/virtgpu_drv.c
+++ b/drivers/gpu/drm/virtio/virtgpu_drv.c
@@ -94,6 +94,7 @@ static int virtio_gpu_probe(struct virtio_device *vdev)
goto err_free;
}

+ dma_set_max_seg_size(dev->dev, dma_max_mapping_size(dev->dev) ?: UINT_MAX);
ret = virtio_gpu_init(vdev, dev);
if (ret)
goto err_free;
diff --git a/drivers/hid/bpf/hid_bpf_dispatch.c b/drivers/hid/bpf/hid_bpf_dispatch.c
index d9ef45fcaeab..7903c8638e81 100644
--- a/drivers/hid/bpf/hid_bpf_dispatch.c
+++ b/drivers/hid/bpf/hid_bpf_dispatch.c
@@ -241,6 +241,39 @@ int hid_bpf_reconnect(struct hid_device *hdev)
return 0;
}

+static int do_hid_bpf_attach_prog(struct hid_device *hdev, int prog_fd, struct bpf_prog *prog,
+ __u32 flags)
+{
+ int fd, err, prog_type;
+
+ prog_type = hid_bpf_get_prog_attach_type(prog);
+ if (prog_type < 0)
+ return prog_type;
+
+ if (prog_type >= HID_BPF_PROG_TYPE_MAX)
+ return -EINVAL;
+
+ if (prog_type == HID_BPF_PROG_TYPE_DEVICE_EVENT) {
+ err = hid_bpf_allocate_event_data(hdev);
+ if (err)
+ return err;
+ }
+
+ fd = __hid_bpf_attach_prog(hdev, prog_type, prog_fd, prog, flags);
+ if (fd < 0)
+ return fd;
+
+ if (prog_type == HID_BPF_PROG_TYPE_RDESC_FIXUP) {
+ err = hid_bpf_reconnect(hdev);
+ if (err) {
+ close_fd(fd);
+ return err;
+ }
+ }
+
+ return fd;
+}
+
/**
* hid_bpf_attach_prog - Attach the given @prog_fd to the given HID device
*
@@ -257,18 +290,13 @@ noinline int
hid_bpf_attach_prog(unsigned int hid_id, int prog_fd, __u32 flags)
{
struct hid_device *hdev;
+ struct bpf_prog *prog;
struct device *dev;
- int fd, err, prog_type = hid_bpf_get_prog_attach_type(prog_fd);
+ int err, fd;

if (!hid_bpf_ops)
return -EINVAL;

- if (prog_type < 0)
- return prog_type;
-
- if (prog_type >= HID_BPF_PROG_TYPE_MAX)
- return -EINVAL;
-
if ((flags & ~HID_BPF_FLAG_MASK))
return -EINVAL;

@@ -278,25 +306,29 @@ hid_bpf_attach_prog(unsigned int hid_id, int prog_fd, __u32 flags)

hdev = to_hid_device(dev);

- if (prog_type == HID_BPF_PROG_TYPE_DEVICE_EVENT) {
- err = hid_bpf_allocate_event_data(hdev);
- if (err)
- return err;
+ /*
+ * take a ref on the prog itself, it will be released
+ * on errors or when it'll be detached
+ */
+ prog = bpf_prog_get(prog_fd);
+ if (IS_ERR(prog)) {
+ err = PTR_ERR(prog);
+ goto out_dev_put;
}

- fd = __hid_bpf_attach_prog(hdev, prog_type, prog_fd, flags);
- if (fd < 0)
- return fd;
-
- if (prog_type == HID_BPF_PROG_TYPE_RDESC_FIXUP) {
- err = hid_bpf_reconnect(hdev);
- if (err) {
- close_fd(fd);
- return err;
- }
+ fd = do_hid_bpf_attach_prog(hdev, prog_fd, prog, flags);
+ if (fd < 0) {
+ err = fd;
+ goto out_prog_put;
}

return fd;
+
+ out_prog_put:
+ bpf_prog_put(prog);
+ out_dev_put:
+ put_device(dev);
+ return err;
}

/**
@@ -323,8 +355,10 @@ hid_bpf_allocate_context(unsigned int hid_id)
hdev = to_hid_device(dev);

ctx_kern = kzalloc(sizeof(*ctx_kern), GFP_KERNEL);
- if (!ctx_kern)
+ if (!ctx_kern) {
+ put_device(dev);
return NULL;
+ }

ctx_kern->ctx.hid = hdev;

@@ -341,10 +375,15 @@ noinline void
hid_bpf_release_context(struct hid_bpf_ctx *ctx)
{
struct hid_bpf_ctx_kern *ctx_kern;
+ struct hid_device *hid;

ctx_kern = container_of(ctx, struct hid_bpf_ctx_kern, ctx);
+ hid = (struct hid_device *)ctx_kern->ctx.hid; /* ignore const */

kfree(ctx_kern);
+
+ /* get_device() is called by bus_find_device() */
+ put_device(&hid->dev);
}

/**
diff --git a/drivers/hid/bpf/hid_bpf_dispatch.h b/drivers/hid/bpf/hid_bpf_dispatch.h
index 63dfc8605cd2..fbe0639d09f2 100644
--- a/drivers/hid/bpf/hid_bpf_dispatch.h
+++ b/drivers/hid/bpf/hid_bpf_dispatch.h
@@ -12,9 +12,9 @@ struct hid_bpf_ctx_kern {

int hid_bpf_preload_skel(void);
void hid_bpf_free_links_and_skel(void);
-int hid_bpf_get_prog_attach_type(int prog_fd);
+int hid_bpf_get_prog_attach_type(struct bpf_prog *prog);
int __hid_bpf_attach_prog(struct hid_device *hdev, enum hid_bpf_prog_type prog_type, int prog_fd,
- __u32 flags);
+ struct bpf_prog *prog, __u32 flags);
void __hid_bpf_destroy_device(struct hid_device *hdev);
int hid_bpf_prog_run(struct hid_device *hdev, enum hid_bpf_prog_type type,
struct hid_bpf_ctx_kern *ctx_kern);
diff --git a/drivers/hid/bpf/hid_bpf_jmp_table.c b/drivers/hid/bpf/hid_bpf_jmp_table.c
index eca34b7372f9..aa8e1c79cdf5 100644
--- a/drivers/hid/bpf/hid_bpf_jmp_table.c
+++ b/drivers/hid/bpf/hid_bpf_jmp_table.c
@@ -196,6 +196,7 @@ static void __hid_bpf_do_release_prog(int map_fd, unsigned int idx)
static void hid_bpf_release_progs(struct work_struct *work)
{
int i, j, n, map_fd = -1;
+ bool hdev_destroyed;

if (!jmp_table.map)
return;
@@ -220,6 +221,12 @@ static void hid_bpf_release_progs(struct work_struct *work)
if (entry->hdev) {
hdev = entry->hdev;
type = entry->type;
+ /*
+ * hdev is still valid, even if we are called after hid_destroy_device():
+ * when hid_bpf_attach() gets called, it takes a ref on the dev through
+ * bus_find_device()
+ */
+ hdev_destroyed = hdev->bpf.destroyed;

hid_bpf_populate_hdev(hdev, type);

@@ -232,12 +239,19 @@ static void hid_bpf_release_progs(struct work_struct *work)
if (test_bit(next->idx, jmp_table.enabled))
continue;

- if (next->hdev == hdev && next->type == type)
+ if (next->hdev == hdev && next->type == type) {
+ /*
+ * clear the hdev reference and decrement the device ref
+ * that was taken during bus_find_device() while calling
+ * hid_bpf_attach()
+ */
next->hdev = NULL;
+ put_device(&hdev->dev);
+ }
}

- /* if type was rdesc fixup, reconnect device */
- if (type == HID_BPF_PROG_TYPE_RDESC_FIXUP)
+ /* if type was rdesc fixup and the device is not gone, reconnect device */
+ if (type == HID_BPF_PROG_TYPE_RDESC_FIXUP && !hdev_destroyed)
hid_bpf_reconnect(hdev);
}
}
@@ -333,15 +347,10 @@ static int hid_bpf_insert_prog(int prog_fd, struct bpf_prog *prog)
return err;
}

-int hid_bpf_get_prog_attach_type(int prog_fd)
+int hid_bpf_get_prog_attach_type(struct bpf_prog *prog)
{
- struct bpf_prog *prog = NULL;
- int i;
int prog_type = HID_BPF_PROG_TYPE_UNDEF;
-
- prog = bpf_prog_get(prog_fd);
- if (IS_ERR(prog))
- return PTR_ERR(prog);
+ int i;

for (i = 0; i < HID_BPF_PROG_TYPE_MAX; i++) {
if (hid_bpf_btf_ids[i] == prog->aux->attach_btf_id) {
@@ -350,8 +359,6 @@ int hid_bpf_get_prog_attach_type(int prog_fd)
}
}

- bpf_prog_put(prog);
-
return prog_type;
}

@@ -388,19 +395,13 @@ static const struct bpf_link_ops hid_bpf_link_lops = {
/* called from syscall */
noinline int
__hid_bpf_attach_prog(struct hid_device *hdev, enum hid_bpf_prog_type prog_type,
- int prog_fd, __u32 flags)
+ int prog_fd, struct bpf_prog *prog, __u32 flags)
{
struct bpf_link_primer link_primer;
struct hid_bpf_link *link;
- struct bpf_prog *prog = NULL;
struct hid_bpf_prog_entry *prog_entry;
int cnt, err = -EINVAL, prog_table_idx = -1;

- /* take a ref on the prog itself */
- prog = bpf_prog_get(prog_fd);
- if (IS_ERR(prog))
- return PTR_ERR(prog);
-
mutex_lock(&hid_bpf_attach_lock);

link = kzalloc(sizeof(*link), GFP_USER);
@@ -467,7 +468,6 @@ __hid_bpf_attach_prog(struct hid_device *hdev, enum hid_bpf_prog_type prog_type,
err_unlock:
mutex_unlock(&hid_bpf_attach_lock);

- bpf_prog_put(prog);
kfree(link);

return err;
diff --git a/drivers/hid/i2c-hid/i2c-hid-of.c b/drivers/hid/i2c-hid/i2c-hid-of.c
index c4e1fa0273c8..8be4d576da77 100644
--- a/drivers/hid/i2c-hid/i2c-hid-of.c
+++ b/drivers/hid/i2c-hid/i2c-hid-of.c
@@ -87,6 +87,7 @@ static int i2c_hid_of_probe(struct i2c_client *client)
if (!ihid_of)
return -ENOMEM;

+ ihid_of->client = client;
ihid_of->ops.power_up = i2c_hid_of_power_up;
ihid_of->ops.power_down = i2c_hid_of_power_down;

diff --git a/drivers/hid/wacom_sys.c b/drivers/hid/wacom_sys.c
index 3f704b8072e8..7659c98d9429 100644
--- a/drivers/hid/wacom_sys.c
+++ b/drivers/hid/wacom_sys.c
@@ -2080,7 +2080,7 @@ static int wacom_allocate_inputs(struct wacom *wacom)
return 0;
}

-static int wacom_register_inputs(struct wacom *wacom)
+static int wacom_setup_inputs(struct wacom *wacom)
{
struct input_dev *pen_input_dev, *touch_input_dev, *pad_input_dev;
struct wacom_wac *wacom_wac = &(wacom->wacom_wac);
@@ -2099,10 +2099,6 @@ static int wacom_register_inputs(struct wacom *wacom)
input_free_device(pen_input_dev);
wacom_wac->pen_input = NULL;
pen_input_dev = NULL;
- } else {
- error = input_register_device(pen_input_dev);
- if (error)
- goto fail;
}

error = wacom_setup_touch_input_capabilities(touch_input_dev, wacom_wac);
@@ -2111,10 +2107,6 @@ static int wacom_register_inputs(struct wacom *wacom)
input_free_device(touch_input_dev);
wacom_wac->touch_input = NULL;
touch_input_dev = NULL;
- } else {
- error = input_register_device(touch_input_dev);
- if (error)
- goto fail;
}

error = wacom_setup_pad_input_capabilities(pad_input_dev, wacom_wac);
@@ -2123,7 +2115,34 @@ static int wacom_register_inputs(struct wacom *wacom)
input_free_device(pad_input_dev);
wacom_wac->pad_input = NULL;
pad_input_dev = NULL;
- } else {
+ }
+
+ return 0;
+}
+
+static int wacom_register_inputs(struct wacom *wacom)
+{
+ struct input_dev *pen_input_dev, *touch_input_dev, *pad_input_dev;
+ struct wacom_wac *wacom_wac = &(wacom->wacom_wac);
+ int error = 0;
+
+ pen_input_dev = wacom_wac->pen_input;
+ touch_input_dev = wacom_wac->touch_input;
+ pad_input_dev = wacom_wac->pad_input;
+
+ if (pen_input_dev) {
+ error = input_register_device(pen_input_dev);
+ if (error)
+ goto fail;
+ }
+
+ if (touch_input_dev) {
+ error = input_register_device(touch_input_dev);
+ if (error)
+ goto fail;
+ }
+
+ if (pad_input_dev) {
error = input_register_device(pad_input_dev);
if (error)
goto fail;
@@ -2376,6 +2395,20 @@ static int wacom_parse_and_register(struct wacom *wacom, bool wireless)
if (error)
goto fail;

+ error = wacom_setup_inputs(wacom);
+ if (error)
+ goto fail;
+
+ if (features->type == HID_GENERIC)
+ connect_mask |= HID_CONNECT_DRIVER;
+
+ /* Regular HID work starts now */
+ error = hid_hw_start(hdev, connect_mask);
+ if (error) {
+ hid_err(hdev, "hw start failed\n");
+ goto fail;
+ }
+
error = wacom_register_inputs(wacom);
if (error)
goto fail;
@@ -2390,16 +2423,6 @@ static int wacom_parse_and_register(struct wacom *wacom, bool wireless)
goto fail;
}

- if (features->type == HID_GENERIC)
- connect_mask |= HID_CONNECT_DRIVER;
-
- /* Regular HID work starts now */
- error = hid_hw_start(hdev, connect_mask);
- if (error) {
- hid_err(hdev, "hw start failed\n");
- goto fail;
- }
-
if (!wireless) {
/* Note that if query fails it is not a hard failure */
wacom_query_tablet_data(wacom);
diff --git a/drivers/hid/wacom_wac.c b/drivers/hid/wacom_wac.c
index 8289ce763704..002cbaa16bd1 100644
--- a/drivers/hid/wacom_wac.c
+++ b/drivers/hid/wacom_wac.c
@@ -2574,7 +2574,14 @@ static void wacom_wac_pen_report(struct hid_device *hdev,
wacom_wac->hid_data.tipswitch);
input_report_key(input, wacom_wac->tool[0], sense);
if (wacom_wac->serial[0]) {
- input_event(input, EV_MSC, MSC_SERIAL, wacom_wac->serial[0]);
+ /*
+ * xf86-input-wacom does not accept a serial number
+ * of '0'. Report the low 32 bits if possible, but
+ * if they are zero, report the upper ones instead.
+ */
+ __u32 serial_lo = wacom_wac->serial[0] & 0xFFFFFFFFu;
+ __u32 serial_hi = wacom_wac->serial[0] >> 32;
+ input_event(input, EV_MSC, MSC_SERIAL, (int)(serial_lo ? serial_lo : serial_hi));
input_report_abs(input, ABS_MISC, sense ? id : 0);
}

diff --git a/drivers/i2c/busses/Makefile b/drivers/i2c/busses/Makefile
index af56fe2c75c0..9be9fdb07f3d 100644
--- a/drivers/i2c/busses/Makefile
+++ b/drivers/i2c/busses/Makefile
@@ -90,10 +90,8 @@ obj-$(CONFIG_I2C_NPCM) += i2c-npcm7xx.o
obj-$(CONFIG_I2C_OCORES) += i2c-ocores.o
obj-$(CONFIG_I2C_OMAP) += i2c-omap.o
obj-$(CONFIG_I2C_OWL) += i2c-owl.o
-i2c-pasemi-objs := i2c-pasemi-core.o i2c-pasemi-pci.o
-obj-$(CONFIG_I2C_PASEMI) += i2c-pasemi.o
-i2c-apple-objs := i2c-pasemi-core.o i2c-pasemi-platform.o
-obj-$(CONFIG_I2C_APPLE) += i2c-apple.o
+obj-$(CONFIG_I2C_PASEMI) += i2c-pasemi-core.o i2c-pasemi-pci.o
+obj-$(CONFIG_I2C_APPLE) += i2c-pasemi-core.o i2c-pasemi-platform.o
obj-$(CONFIG_I2C_PCA_PLATFORM) += i2c-pca-platform.o
obj-$(CONFIG_I2C_PNX) += i2c-pnx.o
obj-$(CONFIG_I2C_PXA) += i2c-pxa.o
diff --git a/drivers/i2c/busses/i2c-i801.c b/drivers/i2c/busses/i2c-i801.c
index a87e3c15e5fc..f1c82b2016f3 100644
--- a/drivers/i2c/busses/i2c-i801.c
+++ b/drivers/i2c/busses/i2c-i801.c
@@ -500,11 +500,10 @@ static int i801_block_transaction_by_block(struct i801_priv *priv,
/* Set block buffer mode */
outb_p(inb_p(SMBAUXCTL(priv)) | SMBAUXCTL_E32B, SMBAUXCTL(priv));

- inb_p(SMBHSTCNT(priv)); /* reset the data buffer index */
-
if (read_write == I2C_SMBUS_WRITE) {
len = data->block[0];
outb_p(len, SMBHSTDAT0(priv));
+ inb_p(SMBHSTCNT(priv)); /* reset the data buffer index */
for (i = 0; i < len; i++)
outb_p(data->block[i+1], SMBBLKDAT(priv));
}
@@ -522,6 +521,7 @@ static int i801_block_transaction_by_block(struct i801_priv *priv,
}

data->block[0] = len;
+ inb_p(SMBHSTCNT(priv)); /* reset the data buffer index */
for (i = 0; i < len; i++)
data->block[i + 1] = inb_p(SMBBLKDAT(priv));
}
diff --git a/drivers/i2c/busses/i2c-pasemi-core.c b/drivers/i2c/busses/i2c-pasemi-core.c
index 7d54a9f34c74..bd8becbdeeb2 100644
--- a/drivers/i2c/busses/i2c-pasemi-core.c
+++ b/drivers/i2c/busses/i2c-pasemi-core.c
@@ -369,6 +369,7 @@ int pasemi_i2c_common_probe(struct pasemi_smbus *smbus)

return 0;
}
+EXPORT_SYMBOL_GPL(pasemi_i2c_common_probe);

irqreturn_t pasemi_irq_handler(int irq, void *dev_id)
{
@@ -378,3 +379,8 @@ irqreturn_t pasemi_irq_handler(int irq, void *dev_id)
complete(&smbus->irq_completion);
return IRQ_HANDLED;
}
+EXPORT_SYMBOL_GPL(pasemi_irq_handler);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Olof Johansson <olof@xxxxxxxxx>");
+MODULE_DESCRIPTION("PA Semi PWRficient SMBus driver");
diff --git a/drivers/i2c/busses/i2c-qcom-geni.c b/drivers/i2c/busses/i2c-qcom-geni.c
index 0a9d389df301..5cc32a465f12 100644
--- a/drivers/i2c/busses/i2c-qcom-geni.c
+++ b/drivers/i2c/busses/i2c-qcom-geni.c
@@ -613,20 +613,20 @@ static int geni_i2c_gpi_xfer(struct geni_i2c_dev *gi2c, struct i2c_msg msgs[], i

peripheral.addr = msgs[i].addr;

+ ret = geni_i2c_gpi(gi2c, &msgs[i], &config,
+ &tx_addr, &tx_buf, I2C_WRITE, gi2c->tx_c);
+ if (ret)
+ goto err;
+
if (msgs[i].flags & I2C_M_RD) {
ret = geni_i2c_gpi(gi2c, &msgs[i], &config,
&rx_addr, &rx_buf, I2C_READ, gi2c->rx_c);
if (ret)
goto err;
- }
-
- ret = geni_i2c_gpi(gi2c, &msgs[i], &config,
- &tx_addr, &tx_buf, I2C_WRITE, gi2c->tx_c);
- if (ret)
- goto err;

- if (msgs[i].flags & I2C_M_RD)
dma_async_issue_pending(gi2c->rx_c);
+ }
+
dma_async_issue_pending(gi2c->tx_c);

timeout = wait_for_completion_timeout(&gi2c->done, XFER_TIMEOUT);
diff --git a/drivers/iio/accel/Kconfig b/drivers/iio/accel/Kconfig
index b6b45d359f28..5c5876b4d3b6 100644
--- a/drivers/iio/accel/Kconfig
+++ b/drivers/iio/accel/Kconfig
@@ -219,10 +219,12 @@ config BMA400

config BMA400_I2C
tristate
+ select REGMAP_I2C
depends on BMA400

config BMA400_SPI
tristate
+ select REGMAP_SPI
depends on BMA400

config BMC150_ACCEL
diff --git a/drivers/iio/adc/ad4130.c b/drivers/iio/adc/ad4130.c
index 5a5dd5e87ffc..e650ebd167b0 100644
--- a/drivers/iio/adc/ad4130.c
+++ b/drivers/iio/adc/ad4130.c
@@ -1826,7 +1826,7 @@ static int ad4130_setup_int_clk(struct ad4130_state *st)
{
struct device *dev = &st->spi->dev;
struct device_node *of_node = dev_of_node(dev);
- struct clk_init_data init;
+ struct clk_init_data init = {};
const char *clk_name;
struct clk *clk;
int ret;
@@ -1900,10 +1900,14 @@ static int ad4130_setup(struct iio_dev *indio_dev)
return ret;

/*
- * Configure all GPIOs for output. If configured, the interrupt function
- * of P2 takes priority over the GPIO out function.
+ * Configure unused GPIOs for output. If configured, the interrupt
+ * function of P2 takes priority over the GPIO out function.
*/
- val = AD4130_IO_CONTROL_GPIO_CTRL_MASK;
+ val = 0;
+ for (i = 0; i < AD4130_MAX_GPIOS; i++)
+ if (st->pins_fn[i + AD4130_AIN2_P1] == AD4130_PIN_FN_NONE)
+ val |= FIELD_PREP(AD4130_IO_CONTROL_GPIO_CTRL_MASK, BIT(i));
+
val |= FIELD_PREP(AD4130_IO_CONTROL_INT_PIN_SEL_MASK, st->int_pin_sel);

ret = regmap_write(st->regmap, AD4130_IO_CONTROL_REG, val);
diff --git a/drivers/iio/imu/bno055/Kconfig b/drivers/iio/imu/bno055/Kconfig
index 83e53acfbe88..c7f5866a177d 100644
--- a/drivers/iio/imu/bno055/Kconfig
+++ b/drivers/iio/imu/bno055/Kconfig
@@ -8,6 +8,7 @@ config BOSCH_BNO055
config BOSCH_BNO055_SERIAL
tristate "Bosch BNO055 attached via UART"
depends on SERIAL_DEV_BUS
+ select REGMAP
select BOSCH_BNO055
help
Enable this to support Bosch BNO055 IMUs attached via UART.
diff --git a/drivers/iio/industrialio-core.c b/drivers/iio/industrialio-core.c
index d752e9c0499b..feec93adb065 100644
--- a/drivers/iio/industrialio-core.c
+++ b/drivers/iio/industrialio-core.c
@@ -1577,10 +1577,13 @@ static int iio_device_register_sysfs(struct iio_dev *indio_dev)
ret = iio_device_register_sysfs_group(indio_dev,
&iio_dev_opaque->chan_attr_group);
if (ret)
- goto error_clear_attrs;
+ goto error_free_chan_attrs;

return 0;

+error_free_chan_attrs:
+ kfree(iio_dev_opaque->chan_attr_group.attrs);
+ iio_dev_opaque->chan_attr_group.attrs = NULL;
error_clear_attrs:
iio_free_chan_devattr_list(&iio_dev_opaque->channel_attr_list);

diff --git a/drivers/iio/light/hid-sensor-als.c b/drivers/iio/light/hid-sensor-als.c
index eb1aedad7edc..3c8b9aab5da7 100644
--- a/drivers/iio/light/hid-sensor-als.c
+++ b/drivers/iio/light/hid-sensor-als.c
@@ -226,6 +226,7 @@ static int als_capture_sample(struct hid_sensor_hub_device *hsdev,
case HID_USAGE_SENSOR_TIME_TIMESTAMP:
als_state->timestamp = hid_sensor_convert_timestamp(&als_state->common_attributes,
*(s64 *)raw_data);
+ ret = 0;
break;
default:
break;
diff --git a/drivers/iio/magnetometer/rm3100-core.c b/drivers/iio/magnetometer/rm3100-core.c
index 69938204456f..42b70cd42b39 100644
--- a/drivers/iio/magnetometer/rm3100-core.c
+++ b/drivers/iio/magnetometer/rm3100-core.c
@@ -530,6 +530,7 @@ int rm3100_common_probe(struct device *dev, struct regmap *regmap, int irq)
struct rm3100_data *data;
unsigned int tmp;
int ret;
+ int samp_rate_index;

indio_dev = devm_iio_device_alloc(dev, sizeof(*data));
if (!indio_dev)
@@ -586,9 +587,14 @@ int rm3100_common_probe(struct device *dev, struct regmap *regmap, int irq)
ret = regmap_read(regmap, RM3100_REG_TMRC, &tmp);
if (ret < 0)
return ret;
+
+ samp_rate_index = tmp - RM3100_TMRC_OFFSET;
+ if (samp_rate_index < 0 || samp_rate_index >= RM3100_SAMP_NUM) {
+ dev_err(dev, "The value read from RM3100_REG_TMRC is invalid!\n");
+ return -EINVAL;
+ }
/* Initializing max wait time, which is double conversion time. */
- data->conversion_time = rm3100_samp_rates[tmp - RM3100_TMRC_OFFSET][2]
- * 2;
+ data->conversion_time = rm3100_samp_rates[samp_rate_index][2] * 2;

/* Cycle count values may not be what we want. */
if ((tmp - RM3100_TMRC_OFFSET) == 0)
diff --git a/drivers/iio/pressure/bmp280-spi.c b/drivers/iio/pressure/bmp280-spi.c
index 1dff9bb7c4e9..967de99c1bb9 100644
--- a/drivers/iio/pressure/bmp280-spi.c
+++ b/drivers/iio/pressure/bmp280-spi.c
@@ -91,6 +91,7 @@ static const struct of_device_id bmp280_of_spi_match[] = {
MODULE_DEVICE_TABLE(of, bmp280_of_spi_match);

static const struct spi_device_id bmp280_spi_id[] = {
+ { "bmp085", (kernel_ulong_t)&bmp180_chip_info },
{ "bmp180", (kernel_ulong_t)&bmp180_chip_info },
{ "bmp181", (kernel_ulong_t)&bmp180_chip_info },
{ "bmp280", (kernel_ulong_t)&bmp280_chip_info },
diff --git a/drivers/interconnect/qcom/sc8180x.c b/drivers/interconnect/qcom/sc8180x.c
index bdd3471d4ac8..a741badaa966 100644
--- a/drivers/interconnect/qcom/sc8180x.c
+++ b/drivers/interconnect/qcom/sc8180x.c
@@ -1372,6 +1372,7 @@ static struct qcom_icc_bcm bcm_mm0 = {

static struct qcom_icc_bcm bcm_co0 = {
.name = "CO0",
+ .keepalive = true,
.num_nodes = 1,
.nodes = { &slv_qns_cdsp_mem_noc }
};
diff --git a/drivers/interconnect/qcom/sm8550.c b/drivers/interconnect/qcom/sm8550.c
index a10c8b6549ee..16b2dfd794b4 100644
--- a/drivers/interconnect/qcom/sm8550.c
+++ b/drivers/interconnect/qcom/sm8550.c
@@ -2223,6 +2223,7 @@ static struct platform_driver qnoc_driver = {
.driver = {
.name = "qnoc-sm8550",
.of_match_table = qnoc_of_match,
+ .sync_state = icc_sync_state,
},
};

diff --git a/drivers/irqchip/irq-brcmstb-l2.c b/drivers/irqchip/irq-brcmstb-l2.c
index 5559c943f03f..2b0b3175cea0 100644
--- a/drivers/irqchip/irq-brcmstb-l2.c
+++ b/drivers/irqchip/irq-brcmstb-l2.c
@@ -2,7 +2,7 @@
/*
* Generic Broadcom Set Top Box Level 2 Interrupt controller driver
*
- * Copyright (C) 2014-2017 Broadcom
+ * Copyright (C) 2014-2024 Broadcom
*/

#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
@@ -112,6 +112,9 @@ static void brcmstb_l2_intc_irq_handle(struct irq_desc *desc)
generic_handle_domain_irq(b->domain, irq);
} while (status);
out:
+ /* Don't ack parent before all device writes are done */
+ wmb();
+
chained_irq_exit(chip, desc);
}

diff --git a/drivers/irqchip/irq-gic-v3-its.c b/drivers/irqchip/irq-gic-v3-its.c
index 9a7a74239eab..3632c92cd183 100644
--- a/drivers/irqchip/irq-gic-v3-its.c
+++ b/drivers/irqchip/irq-gic-v3-its.c
@@ -207,6 +207,11 @@ static bool require_its_list_vmovp(struct its_vm *vm, struct its_node *its)
return (gic_rdists->has_rvpeid || vm->vlpi_count[its->list_nr]);
}

+static bool rdists_support_shareable(void)
+{
+ return !(gic_rdists->flags & RDIST_FLAGS_FORCE_NON_SHAREABLE);
+}
+
static u16 get_its_list(struct its_vm *vm)
{
struct its_node *its;
@@ -2710,10 +2715,12 @@ static u64 inherit_vpe_l1_table_from_its(void)
break;
}
val |= FIELD_PREP(GICR_VPROPBASER_4_1_ADDR, addr >> 12);
- val |= FIELD_PREP(GICR_VPROPBASER_SHAREABILITY_MASK,
- FIELD_GET(GITS_BASER_SHAREABILITY_MASK, baser));
- val |= FIELD_PREP(GICR_VPROPBASER_INNER_CACHEABILITY_MASK,
- FIELD_GET(GITS_BASER_INNER_CACHEABILITY_MASK, baser));
+ if (rdists_support_shareable()) {
+ val |= FIELD_PREP(GICR_VPROPBASER_SHAREABILITY_MASK,
+ FIELD_GET(GITS_BASER_SHAREABILITY_MASK, baser));
+ val |= FIELD_PREP(GICR_VPROPBASER_INNER_CACHEABILITY_MASK,
+ FIELD_GET(GITS_BASER_INNER_CACHEABILITY_MASK, baser));
+ }
val |= FIELD_PREP(GICR_VPROPBASER_4_1_SIZE, GITS_BASER_NR_PAGES(baser) - 1);

return val;
@@ -2936,8 +2943,10 @@ static int allocate_vpe_l1_table(void)
WARN_ON(!IS_ALIGNED(pa, psz));

val |= FIELD_PREP(GICR_VPROPBASER_4_1_ADDR, pa >> 12);
- val |= GICR_VPROPBASER_RaWb;
- val |= GICR_VPROPBASER_InnerShareable;
+ if (rdists_support_shareable()) {
+ val |= GICR_VPROPBASER_RaWb;
+ val |= GICR_VPROPBASER_InnerShareable;
+ }
val |= GICR_VPROPBASER_4_1_Z;
val |= GICR_VPROPBASER_4_1_VALID;

@@ -3126,7 +3135,7 @@ static void its_cpu_init_lpis(void)
gicr_write_propbaser(val, rbase + GICR_PROPBASER);
tmp = gicr_read_propbaser(rbase + GICR_PROPBASER);

- if (gic_rdists->flags & RDIST_FLAGS_FORCE_NON_SHAREABLE)
+ if (!rdists_support_shareable())
tmp &= ~GICR_PROPBASER_SHAREABILITY_MASK;

if ((tmp ^ val) & GICR_PROPBASER_SHAREABILITY_MASK) {
@@ -3153,7 +3162,7 @@ static void its_cpu_init_lpis(void)
gicr_write_pendbaser(val, rbase + GICR_PENDBASER);
tmp = gicr_read_pendbaser(rbase + GICR_PENDBASER);

- if (gic_rdists->flags & RDIST_FLAGS_FORCE_NON_SHAREABLE)
+ if (!rdists_support_shareable())
tmp &= ~GICR_PENDBASER_SHAREABILITY_MASK;

if (!(tmp & GICR_PENDBASER_SHAREABILITY_MASK)) {
@@ -3817,8 +3826,9 @@ static int its_vpe_set_affinity(struct irq_data *d,
bool force)
{
struct its_vpe *vpe = irq_data_get_irq_chip_data(d);
- int from, cpu = cpumask_first(mask_val);
+ struct cpumask common, *table_mask;
unsigned long flags;
+ int from, cpu;

/*
* Changing affinity is mega expensive, so let's be as lazy as
@@ -3834,19 +3844,22 @@ static int its_vpe_set_affinity(struct irq_data *d,
* taken on any vLPI handling path that evaluates vpe->col_idx.
*/
from = vpe_to_cpuid_lock(vpe, &flags);
- if (from == cpu)
- goto out;
-
- vpe->col_idx = cpu;
+ table_mask = gic_data_rdist_cpu(from)->vpe_table_mask;

/*
- * GICv4.1 allows us to skip VMOVP if moving to a cpu whose RD
- * is sharing its VPE table with the current one.
+ * If we are offered another CPU in the same GICv4.1 ITS
+ * affinity, pick this one. Otherwise, any CPU will do.
*/
- if (gic_data_rdist_cpu(cpu)->vpe_table_mask &&
- cpumask_test_cpu(from, gic_data_rdist_cpu(cpu)->vpe_table_mask))
+ if (table_mask && cpumask_and(&common, mask_val, table_mask))
+ cpu = cpumask_test_cpu(from, &common) ? from : cpumask_first(&common);
+ else
+ cpu = cpumask_first(mask_val);
+
+ if (from == cpu)
goto out;

+ vpe->col_idx = cpu;
+
its_send_vmovp(vpe);
its_vpe_db_proxy_move(vpe, from, cpu);

@@ -3880,14 +3893,18 @@ static void its_vpe_schedule(struct its_vpe *vpe)
val = virt_to_phys(page_address(vpe->its_vm->vprop_page)) &
GENMASK_ULL(51, 12);
val |= (LPI_NRBITS - 1) & GICR_VPROPBASER_IDBITS_MASK;
- val |= GICR_VPROPBASER_RaWb;
- val |= GICR_VPROPBASER_InnerShareable;
+ if (rdists_support_shareable()) {
+ val |= GICR_VPROPBASER_RaWb;
+ val |= GICR_VPROPBASER_InnerShareable;
+ }
gicr_write_vpropbaser(val, vlpi_base + GICR_VPROPBASER);

val = virt_to_phys(page_address(vpe->vpt_page)) &
GENMASK_ULL(51, 16);
- val |= GICR_VPENDBASER_RaWaWb;
- val |= GICR_VPENDBASER_InnerShareable;
+ if (rdists_support_shareable()) {
+ val |= GICR_VPENDBASER_RaWaWb;
+ val |= GICR_VPENDBASER_InnerShareable;
+ }
/*
* There is no good way of finding out if the pending table is
* empty as we can race against the doorbell interrupt very
@@ -5078,6 +5095,8 @@ static int __init its_probe_one(struct its_node *its)
u32 ctlr;
int err;

+ its_enable_quirks(its);
+
if (is_v4(its)) {
if (!(its->typer & GITS_TYPER_VMOVP)) {
err = its_compute_its_list_map(its);
@@ -5429,7 +5448,6 @@ static int __init its_of_probe(struct device_node *node)
if (!its)
return -ENOMEM;

- its_enable_quirks(its);
err = its_probe_one(its);
if (err) {
its_node_destroy(its);
diff --git a/drivers/irqchip/irq-loongson-eiointc.c b/drivers/irqchip/irq-loongson-eiointc.c
index 1623cd779175..b3736bdd4b9f 100644
--- a/drivers/irqchip/irq-loongson-eiointc.c
+++ b/drivers/irqchip/irq-loongson-eiointc.c
@@ -241,7 +241,7 @@ static int eiointc_domain_alloc(struct irq_domain *domain, unsigned int virq,
int ret;
unsigned int i, type;
unsigned long hwirq = 0;
- struct eiointc *priv = domain->host_data;
+ struct eiointc_priv *priv = domain->host_data;

ret = irq_domain_translate_onecell(domain, arg, &hwirq, &type);
if (ret)
diff --git a/drivers/md/dm-core.h b/drivers/md/dm-core.h
index 095b9b49aa82..e6757a30dcca 100644
--- a/drivers/md/dm-core.h
+++ b/drivers/md/dm-core.h
@@ -22,6 +22,8 @@
#include "dm-ima.h"

#define DM_RESERVED_MAX_IOS 1024
+#define DM_MAX_TARGETS 1048576
+#define DM_MAX_TARGET_PARAMS 1024

struct dm_io;

diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c
index 17ffbf7fbe73..1a539ec81bac 100644
--- a/drivers/md/dm-crypt.c
+++ b/drivers/md/dm-crypt.c
@@ -73,10 +73,8 @@ struct dm_crypt_io {
struct bio *base_bio;
u8 *integrity_metadata;
bool integrity_metadata_from_pool:1;
- bool in_tasklet:1;

struct work_struct work;
- struct tasklet_struct tasklet;

struct convert_context ctx;

@@ -1768,7 +1766,6 @@ static void crypt_io_init(struct dm_crypt_io *io, struct crypt_config *cc,
io->ctx.r.req = NULL;
io->integrity_metadata = NULL;
io->integrity_metadata_from_pool = false;
- io->in_tasklet = false;
atomic_set(&io->io_pending, 0);
}

@@ -1777,13 +1774,6 @@ static void crypt_inc_pending(struct dm_crypt_io *io)
atomic_inc(&io->io_pending);
}

-static void kcryptd_io_bio_endio(struct work_struct *work)
-{
- struct dm_crypt_io *io = container_of(work, struct dm_crypt_io, work);
-
- bio_endio(io->base_bio);
-}
-
/*
* One of the bios was finished. Check for completion of
* the whole request and correctly clean up the buffer.
@@ -1807,20 +1797,6 @@ static void crypt_dec_pending(struct dm_crypt_io *io)

base_bio->bi_status = error;

- /*
- * If we are running this function from our tasklet,
- * we can't call bio_endio() here, because it will call
- * clone_endio() from dm.c, which in turn will
- * free the current struct dm_crypt_io structure with
- * our tasklet. In this case we need to delay bio_endio()
- * execution to after the tasklet is done and dequeued.
- */
- if (io->in_tasklet) {
- INIT_WORK(&io->work, kcryptd_io_bio_endio);
- queue_work(cc->io_queue, &io->work);
- return;
- }
-
bio_endio(base_bio);
}

@@ -2252,11 +2228,6 @@ static void kcryptd_crypt(struct work_struct *work)
kcryptd_crypt_write_convert(io);
}

-static void kcryptd_crypt_tasklet(unsigned long work)
-{
- kcryptd_crypt((struct work_struct *)work);
-}
-
static void kcryptd_queue_crypt(struct dm_crypt_io *io)
{
struct crypt_config *cc = io->cc;
@@ -2268,15 +2239,10 @@ static void kcryptd_queue_crypt(struct dm_crypt_io *io)
* irqs_disabled(): the kernel may run some IO completion from the idle thread, but
* it is being executed with irqs disabled.
*/
- if (in_hardirq() || irqs_disabled()) {
- io->in_tasklet = true;
- tasklet_init(&io->tasklet, kcryptd_crypt_tasklet, (unsigned long)&io->work);
- tasklet_schedule(&io->tasklet);
+ if (!(in_hardirq() || irqs_disabled())) {
+ kcryptd_crypt(&io->work);
return;
}
-
- kcryptd_crypt(&io->work);
- return;
}

INIT_WORK(&io->work, kcryptd_crypt);
diff --git a/drivers/md/dm-ioctl.c b/drivers/md/dm-ioctl.c
index 21ebb6c39394..3b8b2e886cf6 100644
--- a/drivers/md/dm-ioctl.c
+++ b/drivers/md/dm-ioctl.c
@@ -1941,7 +1941,8 @@ static int copy_params(struct dm_ioctl __user *user, struct dm_ioctl *param_kern
minimum_data_size - sizeof(param_kernel->version)))
return -EFAULT;

- if (param_kernel->data_size < minimum_data_size) {
+ if (unlikely(param_kernel->data_size < minimum_data_size) ||
+ unlikely(param_kernel->data_size > DM_MAX_TARGETS * DM_MAX_TARGET_PARAMS)) {
DMERR("Invalid data size in the ioctl structure: %u",
param_kernel->data_size);
return -EINVAL;
diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c
index 37b48f63ae6a..fd84e06670e8 100644
--- a/drivers/md/dm-table.c
+++ b/drivers/md/dm-table.c
@@ -129,7 +129,12 @@ static int alloc_targets(struct dm_table *t, unsigned int num)
int dm_table_create(struct dm_table **result, blk_mode_t mode,
unsigned int num_targets, struct mapped_device *md)
{
- struct dm_table *t = kzalloc(sizeof(*t), GFP_KERNEL);
+ struct dm_table *t;
+
+ if (num_targets > DM_MAX_TARGETS)
+ return -EOVERFLOW;
+
+ t = kzalloc(sizeof(*t), GFP_KERNEL);

if (!t)
return -ENOMEM;
@@ -144,7 +149,7 @@ int dm_table_create(struct dm_table **result, blk_mode_t mode,

if (!num_targets) {
kfree(t);
- return -ENOMEM;
+ return -EOVERFLOW;
}

if (alloc_targets(t, num_targets)) {
diff --git a/drivers/md/dm-verity-target.c b/drivers/md/dm-verity-target.c
index 14e58ae70521..82662f5769c4 100644
--- a/drivers/md/dm-verity-target.c
+++ b/drivers/md/dm-verity-target.c
@@ -645,23 +645,6 @@ static void verity_work(struct work_struct *w)
verity_finish_io(io, errno_to_blk_status(verity_verify_io(io)));
}

-static void verity_tasklet(unsigned long data)
-{
- struct dm_verity_io *io = (struct dm_verity_io *)data;
- int err;
-
- io->in_tasklet = true;
- err = verity_verify_io(io);
- if (err == -EAGAIN || err == -ENOMEM) {
- /* fallback to retrying with work-queue */
- INIT_WORK(&io->work, verity_work);
- queue_work(io->v->verify_wq, &io->work);
- return;
- }
-
- verity_finish_io(io, errno_to_blk_status(err));
-}
-
static void verity_end_io(struct bio *bio)
{
struct dm_verity_io *io = bio->bi_private;
@@ -674,13 +657,8 @@ static void verity_end_io(struct bio *bio)
return;
}

- if (static_branch_unlikely(&use_tasklet_enabled) && io->v->use_tasklet) {
- tasklet_init(&io->tasklet, verity_tasklet, (unsigned long)io);
- tasklet_schedule(&io->tasklet);
- } else {
- INIT_WORK(&io->work, verity_work);
- queue_work(io->v->verify_wq, &io->work);
- }
+ INIT_WORK(&io->work, verity_work);
+ queue_work(io->v->verify_wq, &io->work);
}

/*
diff --git a/drivers/md/dm-verity.h b/drivers/md/dm-verity.h
index f9d522c870e6..f3f607008419 100644
--- a/drivers/md/dm-verity.h
+++ b/drivers/md/dm-verity.h
@@ -83,7 +83,6 @@ struct dm_verity_io {
struct bvec_iter iter;

struct work_struct work;
- struct tasklet_struct tasklet;

/*
* Three variably-size fields follow this struct:
diff --git a/drivers/md/md.c b/drivers/md/md.c
index dccf270aa1b4..108590041db6 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -940,9 +940,10 @@ void md_super_write(struct mddev *mddev, struct md_rdev *rdev,
return;

bio = bio_alloc_bioset(rdev->meta_bdev ? rdev->meta_bdev : rdev->bdev,
- 1,
- REQ_OP_WRITE | REQ_SYNC | REQ_PREFLUSH | REQ_FUA,
- GFP_NOIO, &mddev->sync_set);
+ 1,
+ REQ_OP_WRITE | REQ_SYNC | REQ_IDLE | REQ_META
+ | REQ_PREFLUSH | REQ_FUA,
+ GFP_NOIO, &mddev->sync_set);

atomic_inc(&rdev->nr_pending);

diff --git a/drivers/media/platform/rockchip/rkisp1/rkisp1-dev.c b/drivers/media/platform/rockchip/rkisp1/rkisp1-dev.c
index f96f821a7b50..acc559652d6e 100644
--- a/drivers/media/platform/rockchip/rkisp1/rkisp1-dev.c
+++ b/drivers/media/platform/rockchip/rkisp1/rkisp1-dev.c
@@ -559,7 +559,7 @@ static int rkisp1_probe(struct platform_device *pdev)
rkisp1->irqs[il] = irq;
}

- ret = devm_request_irq(dev, irq, info->isrs[i].isr, 0,
+ ret = devm_request_irq(dev, irq, info->isrs[i].isr, IRQF_SHARED,
dev_driver_string(dev), dev);
if (ret) {
dev_err(dev, "request irq failed: %d\n", ret);
diff --git a/drivers/media/rc/bpf-lirc.c b/drivers/media/rc/bpf-lirc.c
index fe17c7f98e81..52d82cbe7685 100644
--- a/drivers/media/rc/bpf-lirc.c
+++ b/drivers/media/rc/bpf-lirc.c
@@ -253,7 +253,7 @@ int lirc_prog_attach(const union bpf_attr *attr, struct bpf_prog *prog)
if (attr->attach_flags)
return -EINVAL;

- rcdev = rc_dev_get_from_fd(attr->target_fd);
+ rcdev = rc_dev_get_from_fd(attr->target_fd, true);
if (IS_ERR(rcdev))
return PTR_ERR(rcdev);

@@ -278,7 +278,7 @@ int lirc_prog_detach(const union bpf_attr *attr)
if (IS_ERR(prog))
return PTR_ERR(prog);

- rcdev = rc_dev_get_from_fd(attr->target_fd);
+ rcdev = rc_dev_get_from_fd(attr->target_fd, true);
if (IS_ERR(rcdev)) {
bpf_prog_put(prog);
return PTR_ERR(rcdev);
@@ -303,7 +303,7 @@ int lirc_prog_query(const union bpf_attr *attr, union bpf_attr __user *uattr)
if (attr->query.query_flags)
return -EINVAL;

- rcdev = rc_dev_get_from_fd(attr->query.target_fd);
+ rcdev = rc_dev_get_from_fd(attr->query.target_fd, false);
if (IS_ERR(rcdev))
return PTR_ERR(rcdev);

diff --git a/drivers/media/rc/ir_toy.c b/drivers/media/rc/ir_toy.c
index 196806709259..69e630d85262 100644
--- a/drivers/media/rc/ir_toy.c
+++ b/drivers/media/rc/ir_toy.c
@@ -332,6 +332,7 @@ static int irtoy_tx(struct rc_dev *rc, uint *txbuf, uint count)
sizeof(COMMAND_SMODE_EXIT), STATE_COMMAND_NO_RESP);
if (err) {
dev_err(irtoy->dev, "exit sample mode: %d\n", err);
+ kfree(buf);
return err;
}

@@ -339,6 +340,7 @@ static int irtoy_tx(struct rc_dev *rc, uint *txbuf, uint count)
sizeof(COMMAND_SMODE_ENTER), STATE_COMMAND);
if (err) {
dev_err(irtoy->dev, "enter sample mode: %d\n", err);
+ kfree(buf);
return err;
}

diff --git a/drivers/media/rc/lirc_dev.c b/drivers/media/rc/lirc_dev.c
index a537734832c5..caad59f76793 100644
--- a/drivers/media/rc/lirc_dev.c
+++ b/drivers/media/rc/lirc_dev.c
@@ -814,7 +814,7 @@ void __exit lirc_dev_exit(void)
unregister_chrdev_region(lirc_base_dev, RC_DEV_MAX);
}

-struct rc_dev *rc_dev_get_from_fd(int fd)
+struct rc_dev *rc_dev_get_from_fd(int fd, bool write)
{
struct fd f = fdget(fd);
struct lirc_fh *fh;
@@ -828,6 +828,9 @@ struct rc_dev *rc_dev_get_from_fd(int fd)
return ERR_PTR(-EINVAL);
}

+ if (write && !(f.file->f_mode & FMODE_WRITE))
+ return ERR_PTR(-EPERM);
+
fh = f.file->private_data;
dev = fh->rc;

diff --git a/drivers/media/rc/rc-core-priv.h b/drivers/media/rc/rc-core-priv.h
index ef1e95e1af7f..7df949fc65e2 100644
--- a/drivers/media/rc/rc-core-priv.h
+++ b/drivers/media/rc/rc-core-priv.h
@@ -325,7 +325,7 @@ void lirc_raw_event(struct rc_dev *dev, struct ir_raw_event ev);
void lirc_scancode_event(struct rc_dev *dev, struct lirc_scancode *lsc);
int lirc_register(struct rc_dev *dev);
void lirc_unregister(struct rc_dev *dev);
-struct rc_dev *rc_dev_get_from_fd(int fd);
+struct rc_dev *rc_dev_get_from_fd(int fd, bool write);
#else
static inline int lirc_dev_init(void) { return 0; }
static inline void lirc_dev_exit(void) {}
diff --git a/drivers/misc/fastrpc.c b/drivers/misc/fastrpc.c
index 1c6c62a7f7f5..03319a1fa97f 100644
--- a/drivers/misc/fastrpc.c
+++ b/drivers/misc/fastrpc.c
@@ -2191,7 +2191,7 @@ static int fastrpc_cb_remove(struct platform_device *pdev)
int i;

spin_lock_irqsave(&cctx->lock, flags);
- for (i = 1; i < FASTRPC_MAX_SESSIONS; i++) {
+ for (i = 0; i < FASTRPC_MAX_SESSIONS; i++) {
if (cctx->session[i].sid == sess->sid) {
cctx->session[i].valid = false;
cctx->sesscount--;
diff --git a/drivers/mmc/core/slot-gpio.c b/drivers/mmc/core/slot-gpio.c
index 2a2d949a9344..39f45c2b6de8 100644
--- a/drivers/mmc/core/slot-gpio.c
+++ b/drivers/mmc/core/slot-gpio.c
@@ -75,11 +75,15 @@ EXPORT_SYMBOL(mmc_gpio_set_cd_irq);
int mmc_gpio_get_ro(struct mmc_host *host)
{
struct mmc_gpio *ctx = host->slot.handler_priv;
+ int cansleep;

if (!ctx || !ctx->ro_gpio)
return -ENOSYS;

- return gpiod_get_value_cansleep(ctx->ro_gpio);
+ cansleep = gpiod_cansleep(ctx->ro_gpio);
+ return cansleep ?
+ gpiod_get_value_cansleep(ctx->ro_gpio) :
+ gpiod_get_value(ctx->ro_gpio);
}
EXPORT_SYMBOL(mmc_gpio_get_ro);

diff --git a/drivers/mmc/host/sdhci-pci-o2micro.c b/drivers/mmc/host/sdhci-pci-o2micro.c
index 7bfee28116af..d4a02184784a 100644
--- a/drivers/mmc/host/sdhci-pci-o2micro.c
+++ b/drivers/mmc/host/sdhci-pci-o2micro.c
@@ -693,6 +693,35 @@ static int sdhci_pci_o2_init_sd_express(struct mmc_host *mmc, struct mmc_ios *io
return 0;
}

+static void sdhci_pci_o2_set_power(struct sdhci_host *host, unsigned char mode, unsigned short vdd)
+{
+ struct sdhci_pci_chip *chip;
+ struct sdhci_pci_slot *slot = sdhci_priv(host);
+ u32 scratch_32 = 0;
+ u8 scratch_8 = 0;
+
+ chip = slot->chip;
+
+ if (mode == MMC_POWER_OFF) {
+ /* UnLock WP */
+ pci_read_config_byte(chip->pdev, O2_SD_LOCK_WP, &scratch_8);
+ scratch_8 &= 0x7f;
+ pci_write_config_byte(chip->pdev, O2_SD_LOCK_WP, scratch_8);
+
+ /* Set PCR 0x354[16] to switch Clock Source back to OPE Clock */
+ pci_read_config_dword(chip->pdev, O2_SD_OUTPUT_CLK_SOURCE_SWITCH, &scratch_32);
+ scratch_32 &= ~(O2_SD_SEL_DLL);
+ pci_write_config_dword(chip->pdev, O2_SD_OUTPUT_CLK_SOURCE_SWITCH, scratch_32);
+
+ /* Lock WP */
+ pci_read_config_byte(chip->pdev, O2_SD_LOCK_WP, &scratch_8);
+ scratch_8 |= 0x80;
+ pci_write_config_byte(chip->pdev, O2_SD_LOCK_WP, scratch_8);
+ }
+
+ sdhci_set_power(host, mode, vdd);
+}
+
static int sdhci_pci_o2_probe_slot(struct sdhci_pci_slot *slot)
{
struct sdhci_pci_chip *chip;
@@ -1051,6 +1080,7 @@ static const struct sdhci_ops sdhci_pci_o2_ops = {
.set_bus_width = sdhci_set_bus_width,
.reset = sdhci_reset,
.set_uhs_signaling = sdhci_set_uhs_signaling,
+ .set_power = sdhci_pci_o2_set_power,
};

const struct sdhci_pci_fixes sdhci_o2 = {
diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
index 8e6cc0e133b7..6cf7f364704e 100644
--- a/drivers/net/bonding/bond_main.c
+++ b/drivers/net/bonding/bond_main.c
@@ -1819,6 +1819,8 @@ void bond_xdp_set_features(struct net_device *bond_dev)
bond_for_each_slave(bond, slave, iter)
val &= slave->dev->xdp_features;

+ val &= ~NETDEV_XDP_ACT_XSK_ZEROCOPY;
+
xdp_set_features_flag(bond_dev, val);
}

@@ -5934,9 +5936,6 @@ void bond_setup(struct net_device *bond_dev)
if (BOND_MODE(bond) == BOND_MODE_ACTIVEBACKUP)
bond_dev->features |= BOND_XFRM_FEATURES;
#endif /* CONFIG_XFRM_OFFLOAD */
-
- if (bond_xdp_check(bond))
- bond_dev->xdp_features = NETDEV_XDP_ACT_MASK;
}

/* Destroy a bonding device.
diff --git a/drivers/net/can/dev/netlink.c b/drivers/net/can/dev/netlink.c
index 036d85ef07f5..dfdc039d92a6 100644
--- a/drivers/net/can/dev/netlink.c
+++ b/drivers/net/can/dev/netlink.c
@@ -346,7 +346,7 @@ static int can_changelink(struct net_device *dev, struct nlattr *tb[],
/* Neither of TDC parameters nor TDC flags are
* provided: do calculation
*/
- can_calc_tdco(&priv->tdc, priv->tdc_const, &priv->data_bittiming,
+ can_calc_tdco(&priv->tdc, priv->tdc_const, &dbt,
&priv->ctrlmode, priv->ctrlmode_supported);
} /* else: both CAN_CTRLMODE_TDC_{AUTO,MANUAL} are explicitly
* turned off. TDC is disabled: do nothing
diff --git a/drivers/net/dsa/mv88e6xxx/chip.c b/drivers/net/dsa/mv88e6xxx/chip.c
index dc7f9b99f409..5ad51271a534 100644
--- a/drivers/net/dsa/mv88e6xxx/chip.c
+++ b/drivers/net/dsa/mv88e6xxx/chip.c
@@ -3545,7 +3545,7 @@ static int mv88e6xxx_mdio_read_c45(struct mii_bus *bus, int phy, int devad,
int err;

if (!chip->info->ops->phy_read_c45)
- return -EOPNOTSUPP;
+ return 0xffff;

mv88e6xxx_reg_lock(chip);
err = chip->info->ops->phy_read_c45(chip, bus, phy, devad, reg, &val);
diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c
index aad39ebff4ab..9d37c0374c75 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_main.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_main.c
@@ -5351,7 +5351,7 @@ static int i40e_pf_wait_queues_disabled(struct i40e_pf *pf)
{
int v, ret = 0;

- for (v = 0; v < pf->hw.func_caps.num_vsis; v++) {
+ for (v = 0; v < pf->num_alloc_vsi; v++) {
if (pf->vsi[v]) {
ret = i40e_vsi_wait_queues_disabled(pf->vsi[v]);
if (ret)
diff --git a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c
index cc4c53470db2..082c09920999 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c
@@ -2848,6 +2848,24 @@ static int i40e_vc_get_stats_msg(struct i40e_vf *vf, u8 *msg)
(u8 *)&stats, sizeof(stats));
}

+/**
+ * i40e_can_vf_change_mac
+ * @vf: pointer to the VF info
+ *
+ * Return true if the VF is allowed to change its MAC filters, false otherwise
+ */
+static bool i40e_can_vf_change_mac(struct i40e_vf *vf)
+{
+ /* If the VF MAC address has been set administratively (via the
+ * ndo_set_vf_mac command), then deny permission to the VF to
+ * add/delete unicast MAC addresses, unless the VF is trusted
+ */
+ if (vf->pf_set_mac && !vf->trusted)
+ return false;
+
+ return true;
+}
+
#define I40E_MAX_MACVLAN_PER_HW 3072
#define I40E_MAX_MACVLAN_PER_PF(num_ports) (I40E_MAX_MACVLAN_PER_HW / \
(num_ports))
@@ -2907,8 +2925,8 @@ static inline int i40e_check_vf_permission(struct i40e_vf *vf,
* The VF may request to set the MAC address filter already
* assigned to it so do not return an error in that case.
*/
- if (!test_bit(I40E_VIRTCHNL_VF_CAP_PRIVILEGE, &vf->vf_caps) &&
- !is_multicast_ether_addr(addr) && vf->pf_set_mac &&
+ if (!i40e_can_vf_change_mac(vf) &&
+ !is_multicast_ether_addr(addr) &&
!ether_addr_equal(addr, vf->default_lan_addr.addr)) {
dev_err(&pf->pdev->dev,
"VF attempting to override administratively set MAC address, bring down and up the VF interface to resume normal operation\n");
@@ -3114,19 +3132,29 @@ static int i40e_vc_del_mac_addr_msg(struct i40e_vf *vf, u8 *msg)
ret = -EINVAL;
goto error_param;
}
- if (ether_addr_equal(al->list[i].addr, vf->default_lan_addr.addr))
- was_unimac_deleted = true;
}
vsi = pf->vsi[vf->lan_vsi_idx];

spin_lock_bh(&vsi->mac_filter_hash_lock);
/* delete addresses from the list */
- for (i = 0; i < al->num_elements; i++)
+ for (i = 0; i < al->num_elements; i++) {
+ const u8 *addr = al->list[i].addr;
+
+ /* Allow to delete VF primary MAC only if it was not set
+ * administratively by PF or if VF is trusted.
+ */
+ if (ether_addr_equal(addr, vf->default_lan_addr.addr) &&
+ i40e_can_vf_change_mac(vf))
+ was_unimac_deleted = true;
+ else
+ continue;
+
if (i40e_del_mac_filter(vsi, al->list[i].addr)) {
ret = -EINVAL;
spin_unlock_bh(&vsi->mac_filter_hash_lock);
goto error_param;
}
+ }

spin_unlock_bh(&vsi->mac_filter_hash_lock);

diff --git a/drivers/net/ethernet/microchip/lan966x/lan966x_lag.c b/drivers/net/ethernet/microchip/lan966x/lan966x_lag.c
index 41fa2523d91d..5f2cd9a8cf8f 100644
--- a/drivers/net/ethernet/microchip/lan966x/lan966x_lag.c
+++ b/drivers/net/ethernet/microchip/lan966x/lan966x_lag.c
@@ -37,19 +37,24 @@ static void lan966x_lag_set_aggr_pgids(struct lan966x *lan966x)

/* Now, set PGIDs for each active LAG */
for (lag = 0; lag < lan966x->num_phys_ports; ++lag) {
- struct net_device *bond = lan966x->ports[lag]->bond;
+ struct lan966x_port *port = lan966x->ports[lag];
int num_active_ports = 0;
+ struct net_device *bond;
unsigned long bond_mask;
u8 aggr_idx[16];

- if (!bond || (visited & BIT(lag)))
+ if (!port || !port->bond || (visited & BIT(lag)))
continue;

+ bond = port->bond;
bond_mask = lan966x_lag_get_mask(lan966x, bond);

for_each_set_bit(p, &bond_mask, lan966x->num_phys_ports) {
struct lan966x_port *port = lan966x->ports[p];

+ if (!port)
+ continue;
+
lan_wr(ANA_PGID_PGID_SET(bond_mask),
lan966x, ANA_PGID(p));
if (port->lag_tx_active)
diff --git a/drivers/net/ethernet/netronome/nfp/flower/conntrack.c b/drivers/net/ethernet/netronome/nfp/flower/conntrack.c
index 2967bab72505..15180538b80a 100644
--- a/drivers/net/ethernet/netronome/nfp/flower/conntrack.c
+++ b/drivers/net/ethernet/netronome/nfp/flower/conntrack.c
@@ -1424,10 +1424,30 @@ static void nfp_nft_ct_translate_mangle_action(struct flow_action_entry *mangle_
mangle_action->mangle.mask = (__force u32)cpu_to_be32(mangle_action->mangle.mask);
return;

+ /* Both struct tcphdr and struct udphdr start with
+ * __be16 source;
+ * __be16 dest;
+ * so we can use the same code for both.
+ */
case FLOW_ACT_MANGLE_HDR_TYPE_TCP:
case FLOW_ACT_MANGLE_HDR_TYPE_UDP:
- mangle_action->mangle.val = (__force u16)cpu_to_be16(mangle_action->mangle.val);
- mangle_action->mangle.mask = (__force u16)cpu_to_be16(mangle_action->mangle.mask);
+ if (mangle_action->mangle.offset == offsetof(struct tcphdr, source)) {
+ mangle_action->mangle.val =
+ (__force u32)cpu_to_be32(mangle_action->mangle.val << 16);
+ /* The mask of mangle action is inverse mask,
+ * so clear the dest tp port with 0xFFFF to
+ * instead of rotate-left operation.
+ */
+ mangle_action->mangle.mask =
+ (__force u32)cpu_to_be32(mangle_action->mangle.mask << 16 | 0xFFFF);
+ }
+ if (mangle_action->mangle.offset == offsetof(struct tcphdr, dest)) {
+ mangle_action->mangle.offset = 0;
+ mangle_action->mangle.val =
+ (__force u32)cpu_to_be32(mangle_action->mangle.val);
+ mangle_action->mangle.mask =
+ (__force u32)cpu_to_be32(mangle_action->mangle.mask);
+ }
return;

default:
@@ -1864,10 +1884,30 @@ int nfp_fl_ct_handle_post_ct(struct nfp_flower_priv *priv,
{
struct flow_rule *rule = flow_cls_offload_flow_rule(flow);
struct nfp_fl_ct_flow_entry *ct_entry;
+ struct flow_action_entry *ct_goto;
struct nfp_fl_ct_zone_entry *zt;
+ struct flow_action_entry *act;
bool wildcarded = false;
struct flow_match_ct ct;
- struct flow_action_entry *ct_goto;
+ int i;
+
+ flow_action_for_each(i, act, &rule->action) {
+ switch (act->id) {
+ case FLOW_ACTION_REDIRECT:
+ case FLOW_ACTION_REDIRECT_INGRESS:
+ case FLOW_ACTION_MIRRED:
+ case FLOW_ACTION_MIRRED_INGRESS:
+ if (act->dev->rtnl_link_ops &&
+ !strcmp(act->dev->rtnl_link_ops->kind, "openvswitch")) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "unsupported offload: out port is openvswitch internal port");
+ return -EOPNOTSUPP;
+ }
+ break;
+ default:
+ break;
+ }
+ }

flow_rule_match_ct(rule, &ct);
if (!ct.mask->ct_zone) {
diff --git a/drivers/net/ethernet/netronome/nfp/flower/tunnel_conf.c b/drivers/net/ethernet/netronome/nfp/flower/tunnel_conf.c
index e522845c7c21..0d7d138d6e0d 100644
--- a/drivers/net/ethernet/netronome/nfp/flower/tunnel_conf.c
+++ b/drivers/net/ethernet/netronome/nfp/flower/tunnel_conf.c
@@ -1084,7 +1084,7 @@ nfp_tunnel_add_shared_mac(struct nfp_app *app, struct net_device *netdev,
u16 nfp_mac_idx = 0;

entry = nfp_tunnel_lookup_offloaded_macs(app, netdev->dev_addr);
- if (entry && nfp_tunnel_is_mac_idx_global(entry->index)) {
+ if (entry && (nfp_tunnel_is_mac_idx_global(entry->index) || netif_is_lag_port(netdev))) {
if (entry->bridge_count ||
!nfp_flower_is_supported_bridge(netdev)) {
nfp_tunnel_offloaded_macs_inc_ref_and_link(entry,
diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c
index de0a5d5ded30..f2085340a1cf 100644
--- a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c
+++ b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c
@@ -2588,6 +2588,7 @@ static void nfp_net_netdev_init(struct nfp_net *nn)
case NFP_NFD_VER_NFD3:
netdev->netdev_ops = &nfp_nfd3_netdev_ops;
netdev->xdp_features |= NETDEV_XDP_ACT_XSK_ZEROCOPY;
+ netdev->xdp_features |= NETDEV_XDP_ACT_REDIRECT;
break;
case NFP_NFD_VER_NFDK:
netdev->netdev_ops = &nfp_nfdk_netdev_ops;
diff --git a/drivers/net/ethernet/netronome/nfp/nfpcore/nfp6000_pcie.c b/drivers/net/ethernet/netronome/nfp/nfpcore/nfp6000_pcie.c
index 33b4c2856316..3f10c5365c80 100644
--- a/drivers/net/ethernet/netronome/nfp/nfpcore/nfp6000_pcie.c
+++ b/drivers/net/ethernet/netronome/nfp/nfpcore/nfp6000_pcie.c
@@ -537,11 +537,13 @@ static int enable_bars(struct nfp6000_pcie *nfp, u16 interface)
const u32 barcfg_msix_general =
NFP_PCIE_BAR_PCIE2CPP_MapType(
NFP_PCIE_BAR_PCIE2CPP_MapType_GENERAL) |
- NFP_PCIE_BAR_PCIE2CPP_LengthSelect_32BIT;
+ NFP_PCIE_BAR_PCIE2CPP_LengthSelect(
+ NFP_PCIE_BAR_PCIE2CPP_LengthSelect_32BIT);
const u32 barcfg_msix_xpb =
NFP_PCIE_BAR_PCIE2CPP_MapType(
NFP_PCIE_BAR_PCIE2CPP_MapType_BULK) |
- NFP_PCIE_BAR_PCIE2CPP_LengthSelect_32BIT |
+ NFP_PCIE_BAR_PCIE2CPP_LengthSelect(
+ NFP_PCIE_BAR_PCIE2CPP_LengthSelect_32BIT) |
NFP_PCIE_BAR_PCIE2CPP_Target_BaseAddress(
NFP_CPP_TARGET_ISLAND_XPB);
const u32 barcfg_explicit[4] = {
diff --git a/drivers/net/ethernet/stmicro/stmmac/common.h b/drivers/net/ethernet/stmicro/stmmac/common.h
index 3d4f34e178a8..b0dd8adce356 100644
--- a/drivers/net/ethernet/stmicro/stmmac/common.h
+++ b/drivers/net/ethernet/stmicro/stmmac/common.h
@@ -59,28 +59,51 @@
#undef FRAME_FILTER_DEBUG
/* #define FRAME_FILTER_DEBUG */

+struct stmmac_q_tx_stats {
+ u64_stats_t tx_bytes;
+ u64_stats_t tx_set_ic_bit;
+ u64_stats_t tx_tso_frames;
+ u64_stats_t tx_tso_nfrags;
+};
+
+struct stmmac_napi_tx_stats {
+ u64_stats_t tx_packets;
+ u64_stats_t tx_pkt_n;
+ u64_stats_t poll;
+ u64_stats_t tx_clean;
+ u64_stats_t tx_set_ic_bit;
+};
+
struct stmmac_txq_stats {
- u64 tx_bytes;
- u64 tx_packets;
- u64 tx_pkt_n;
- u64 tx_normal_irq_n;
- u64 napi_poll;
- u64 tx_clean;
- u64 tx_set_ic_bit;
- u64 tx_tso_frames;
- u64 tx_tso_nfrags;
- struct u64_stats_sync syncp;
+ /* Updates protected by tx queue lock. */
+ struct u64_stats_sync q_syncp;
+ struct stmmac_q_tx_stats q;
+
+ /* Updates protected by NAPI poll logic. */
+ struct u64_stats_sync napi_syncp;
+ struct stmmac_napi_tx_stats napi;
} ____cacheline_aligned_in_smp;

+struct stmmac_napi_rx_stats {
+ u64_stats_t rx_bytes;
+ u64_stats_t rx_packets;
+ u64_stats_t rx_pkt_n;
+ u64_stats_t poll;
+};
+
struct stmmac_rxq_stats {
- u64 rx_bytes;
- u64 rx_packets;
- u64 rx_pkt_n;
- u64 rx_normal_irq_n;
- u64 napi_poll;
- struct u64_stats_sync syncp;
+ /* Updates protected by NAPI poll logic. */
+ struct u64_stats_sync napi_syncp;
+ struct stmmac_napi_rx_stats napi;
} ____cacheline_aligned_in_smp;

+/* Updates on each CPU protected by not allowing nested irqs. */
+struct stmmac_pcpu_stats {
+ struct u64_stats_sync syncp;
+ u64_stats_t rx_normal_irq_n[MTL_MAX_TX_QUEUES];
+ u64_stats_t tx_normal_irq_n[MTL_MAX_RX_QUEUES];
+};
+
/* Extra statistic and debug information exposed by ethtool */
struct stmmac_extra_stats {
/* Transmit errors */
@@ -205,6 +228,7 @@ struct stmmac_extra_stats {
/* per queue statistics */
struct stmmac_txq_stats txq_stats[MTL_MAX_TX_QUEUES];
struct stmmac_rxq_stats rxq_stats[MTL_MAX_RX_QUEUES];
+ struct stmmac_pcpu_stats __percpu *pcpu_stats;
unsigned long rx_dropped;
unsigned long rx_errors;
unsigned long tx_dropped;
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c
index 465ff1fd4785..51f121f86745 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c
@@ -441,8 +441,7 @@ static int sun8i_dwmac_dma_interrupt(struct stmmac_priv *priv,
struct stmmac_extra_stats *x, u32 chan,
u32 dir)
{
- struct stmmac_rxq_stats *rxq_stats = &priv->xstats.rxq_stats[chan];
- struct stmmac_txq_stats *txq_stats = &priv->xstats.txq_stats[chan];
+ struct stmmac_pcpu_stats *stats = this_cpu_ptr(priv->xstats.pcpu_stats);
int ret = 0;
u32 v;

@@ -455,9 +454,9 @@ static int sun8i_dwmac_dma_interrupt(struct stmmac_priv *priv,

if (v & EMAC_TX_INT) {
ret |= handle_tx;
- u64_stats_update_begin(&txq_stats->syncp);
- txq_stats->tx_normal_irq_n++;
- u64_stats_update_end(&txq_stats->syncp);
+ u64_stats_update_begin(&stats->syncp);
+ u64_stats_inc(&stats->tx_normal_irq_n[chan]);
+ u64_stats_update_end(&stats->syncp);
}

if (v & EMAC_TX_DMA_STOP_INT)
@@ -479,9 +478,9 @@ static int sun8i_dwmac_dma_interrupt(struct stmmac_priv *priv,

if (v & EMAC_RX_INT) {
ret |= handle_rx;
- u64_stats_update_begin(&rxq_stats->syncp);
- rxq_stats->rx_normal_irq_n++;
- u64_stats_update_end(&rxq_stats->syncp);
+ u64_stats_update_begin(&stats->syncp);
+ u64_stats_inc(&stats->rx_normal_irq_n[chan]);
+ u64_stats_update_end(&stats->syncp);
}

if (v & EMAC_RX_BUF_UA_INT)
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac4_lib.c b/drivers/net/ethernet/stmicro/stmmac/dwmac4_lib.c
index 9470d3fd2ded..0d185e54eb7e 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac4_lib.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac4_lib.c
@@ -171,8 +171,7 @@ int dwmac4_dma_interrupt(struct stmmac_priv *priv, void __iomem *ioaddr,
const struct dwmac4_addrs *dwmac4_addrs = priv->plat->dwmac4_addrs;
u32 intr_status = readl(ioaddr + DMA_CHAN_STATUS(dwmac4_addrs, chan));
u32 intr_en = readl(ioaddr + DMA_CHAN_INTR_ENA(dwmac4_addrs, chan));
- struct stmmac_rxq_stats *rxq_stats = &priv->xstats.rxq_stats[chan];
- struct stmmac_txq_stats *txq_stats = &priv->xstats.txq_stats[chan];
+ struct stmmac_pcpu_stats *stats = this_cpu_ptr(priv->xstats.pcpu_stats);
int ret = 0;

if (dir == DMA_DIR_RX)
@@ -201,15 +200,15 @@ int dwmac4_dma_interrupt(struct stmmac_priv *priv, void __iomem *ioaddr,
}
/* TX/RX NORMAL interrupts */
if (likely(intr_status & DMA_CHAN_STATUS_RI)) {
- u64_stats_update_begin(&rxq_stats->syncp);
- rxq_stats->rx_normal_irq_n++;
- u64_stats_update_end(&rxq_stats->syncp);
+ u64_stats_update_begin(&stats->syncp);
+ u64_stats_inc(&stats->rx_normal_irq_n[chan]);
+ u64_stats_update_end(&stats->syncp);
ret |= handle_rx;
}
if (likely(intr_status & DMA_CHAN_STATUS_TI)) {
- u64_stats_update_begin(&txq_stats->syncp);
- txq_stats->tx_normal_irq_n++;
- u64_stats_update_end(&txq_stats->syncp);
+ u64_stats_update_begin(&stats->syncp);
+ u64_stats_inc(&stats->tx_normal_irq_n[chan]);
+ u64_stats_update_end(&stats->syncp);
ret |= handle_tx;
}

diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac_lib.c b/drivers/net/ethernet/stmicro/stmmac/dwmac_lib.c
index 7907d62d3437..85e18f9a22f9 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac_lib.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac_lib.c
@@ -162,8 +162,7 @@ static void show_rx_process_state(unsigned int status)
int dwmac_dma_interrupt(struct stmmac_priv *priv, void __iomem *ioaddr,
struct stmmac_extra_stats *x, u32 chan, u32 dir)
{
- struct stmmac_rxq_stats *rxq_stats = &priv->xstats.rxq_stats[chan];
- struct stmmac_txq_stats *txq_stats = &priv->xstats.txq_stats[chan];
+ struct stmmac_pcpu_stats *stats = this_cpu_ptr(priv->xstats.pcpu_stats);
int ret = 0;
/* read the status register (CSR5) */
u32 intr_status = readl(ioaddr + DMA_STATUS);
@@ -215,16 +214,16 @@ int dwmac_dma_interrupt(struct stmmac_priv *priv, void __iomem *ioaddr,
u32 value = readl(ioaddr + DMA_INTR_ENA);
/* to schedule NAPI on real RIE event. */
if (likely(value & DMA_INTR_ENA_RIE)) {
- u64_stats_update_begin(&rxq_stats->syncp);
- rxq_stats->rx_normal_irq_n++;
- u64_stats_update_end(&rxq_stats->syncp);
+ u64_stats_update_begin(&stats->syncp);
+ u64_stats_inc(&stats->rx_normal_irq_n[chan]);
+ u64_stats_update_end(&stats->syncp);
ret |= handle_rx;
}
}
if (likely(intr_status & DMA_STATUS_TI)) {
- u64_stats_update_begin(&txq_stats->syncp);
- txq_stats->tx_normal_irq_n++;
- u64_stats_update_end(&txq_stats->syncp);
+ u64_stats_update_begin(&stats->syncp);
+ u64_stats_inc(&stats->tx_normal_irq_n[chan]);
+ u64_stats_update_end(&stats->syncp);
ret |= handle_tx;
}
if (unlikely(intr_status & DMA_STATUS_ERI))
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_dma.c b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_dma.c
index 3cde695fec91..dd2ab6185c40 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_dma.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_dma.c
@@ -337,8 +337,7 @@ static int dwxgmac2_dma_interrupt(struct stmmac_priv *priv,
struct stmmac_extra_stats *x, u32 chan,
u32 dir)
{
- struct stmmac_rxq_stats *rxq_stats = &priv->xstats.rxq_stats[chan];
- struct stmmac_txq_stats *txq_stats = &priv->xstats.txq_stats[chan];
+ struct stmmac_pcpu_stats *stats = this_cpu_ptr(priv->xstats.pcpu_stats);
u32 intr_status = readl(ioaddr + XGMAC_DMA_CH_STATUS(chan));
u32 intr_en = readl(ioaddr + XGMAC_DMA_CH_INT_EN(chan));
int ret = 0;
@@ -367,15 +366,15 @@ static int dwxgmac2_dma_interrupt(struct stmmac_priv *priv,
/* TX/RX NORMAL interrupts */
if (likely(intr_status & XGMAC_NIS)) {
if (likely(intr_status & XGMAC_RI)) {
- u64_stats_update_begin(&rxq_stats->syncp);
- rxq_stats->rx_normal_irq_n++;
- u64_stats_update_end(&rxq_stats->syncp);
+ u64_stats_update_begin(&stats->syncp);
+ u64_stats_inc(&stats->rx_normal_irq_n[chan]);
+ u64_stats_update_end(&stats->syncp);
ret |= handle_rx;
}
if (likely(intr_status & (XGMAC_TI | XGMAC_TBU))) {
- u64_stats_update_begin(&txq_stats->syncp);
- txq_stats->tx_normal_irq_n++;
- u64_stats_update_end(&txq_stats->syncp);
+ u64_stats_update_begin(&stats->syncp);
+ u64_stats_inc(&stats->tx_normal_irq_n[chan]);
+ u64_stats_update_end(&stats->syncp);
ret |= handle_tx;
}
}
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c
index 69c8c2528524..521b1b5ffebb 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c
@@ -539,44 +539,79 @@ stmmac_set_pauseparam(struct net_device *netdev,
}
}

+static u64 stmmac_get_rx_normal_irq_n(struct stmmac_priv *priv, int q)
+{
+ u64 total;
+ int cpu;
+
+ total = 0;
+ for_each_possible_cpu(cpu) {
+ struct stmmac_pcpu_stats *pcpu;
+ unsigned int start;
+ u64 irq_n;
+
+ pcpu = per_cpu_ptr(priv->xstats.pcpu_stats, cpu);
+ do {
+ start = u64_stats_fetch_begin(&pcpu->syncp);
+ irq_n = u64_stats_read(&pcpu->rx_normal_irq_n[q]);
+ } while (u64_stats_fetch_retry(&pcpu->syncp, start));
+ total += irq_n;
+ }
+ return total;
+}
+
+static u64 stmmac_get_tx_normal_irq_n(struct stmmac_priv *priv, int q)
+{
+ u64 total;
+ int cpu;
+
+ total = 0;
+ for_each_possible_cpu(cpu) {
+ struct stmmac_pcpu_stats *pcpu;
+ unsigned int start;
+ u64 irq_n;
+
+ pcpu = per_cpu_ptr(priv->xstats.pcpu_stats, cpu);
+ do {
+ start = u64_stats_fetch_begin(&pcpu->syncp);
+ irq_n = u64_stats_read(&pcpu->tx_normal_irq_n[q]);
+ } while (u64_stats_fetch_retry(&pcpu->syncp, start));
+ total += irq_n;
+ }
+ return total;
+}
+
static void stmmac_get_per_qstats(struct stmmac_priv *priv, u64 *data)
{
u32 tx_cnt = priv->plat->tx_queues_to_use;
u32 rx_cnt = priv->plat->rx_queues_to_use;
unsigned int start;
- int q, stat;
- char *p;
+ int q;

for (q = 0; q < tx_cnt; q++) {
struct stmmac_txq_stats *txq_stats = &priv->xstats.txq_stats[q];
- struct stmmac_txq_stats snapshot;
+ u64 pkt_n;

do {
- start = u64_stats_fetch_begin(&txq_stats->syncp);
- snapshot = *txq_stats;
- } while (u64_stats_fetch_retry(&txq_stats->syncp, start));
+ start = u64_stats_fetch_begin(&txq_stats->napi_syncp);
+ pkt_n = u64_stats_read(&txq_stats->napi.tx_pkt_n);
+ } while (u64_stats_fetch_retry(&txq_stats->napi_syncp, start));

- p = (char *)&snapshot + offsetof(struct stmmac_txq_stats, tx_pkt_n);
- for (stat = 0; stat < STMMAC_TXQ_STATS; stat++) {
- *data++ = (*(u64 *)p);
- p += sizeof(u64);
- }
+ *data++ = pkt_n;
+ *data++ = stmmac_get_tx_normal_irq_n(priv, q);
}

for (q = 0; q < rx_cnt; q++) {
struct stmmac_rxq_stats *rxq_stats = &priv->xstats.rxq_stats[q];
- struct stmmac_rxq_stats snapshot;
+ u64 pkt_n;

do {
- start = u64_stats_fetch_begin(&rxq_stats->syncp);
- snapshot = *rxq_stats;
- } while (u64_stats_fetch_retry(&rxq_stats->syncp, start));
+ start = u64_stats_fetch_begin(&rxq_stats->napi_syncp);
+ pkt_n = u64_stats_read(&rxq_stats->napi.rx_pkt_n);
+ } while (u64_stats_fetch_retry(&rxq_stats->napi_syncp, start));

- p = (char *)&snapshot + offsetof(struct stmmac_rxq_stats, rx_pkt_n);
- for (stat = 0; stat < STMMAC_RXQ_STATS; stat++) {
- *data++ = (*(u64 *)p);
- p += sizeof(u64);
- }
+ *data++ = pkt_n;
+ *data++ = stmmac_get_rx_normal_irq_n(priv, q);
}
}

@@ -635,39 +670,49 @@ static void stmmac_get_ethtool_stats(struct net_device *dev,
pos = j;
for (i = 0; i < rx_queues_count; i++) {
struct stmmac_rxq_stats *rxq_stats = &priv->xstats.rxq_stats[i];
- struct stmmac_rxq_stats snapshot;
+ struct stmmac_napi_rx_stats snapshot;
+ u64 n_irq;

j = pos;
do {
- start = u64_stats_fetch_begin(&rxq_stats->syncp);
- snapshot = *rxq_stats;
- } while (u64_stats_fetch_retry(&rxq_stats->syncp, start));
-
- data[j++] += snapshot.rx_pkt_n;
- data[j++] += snapshot.rx_normal_irq_n;
- normal_irq_n += snapshot.rx_normal_irq_n;
- napi_poll += snapshot.napi_poll;
+ start = u64_stats_fetch_begin(&rxq_stats->napi_syncp);
+ snapshot = rxq_stats->napi;
+ } while (u64_stats_fetch_retry(&rxq_stats->napi_syncp, start));
+
+ data[j++] += u64_stats_read(&snapshot.rx_pkt_n);
+ n_irq = stmmac_get_rx_normal_irq_n(priv, i);
+ data[j++] += n_irq;
+ normal_irq_n += n_irq;
+ napi_poll += u64_stats_read(&snapshot.poll);
}

pos = j;
for (i = 0; i < tx_queues_count; i++) {
struct stmmac_txq_stats *txq_stats = &priv->xstats.txq_stats[i];
- struct stmmac_txq_stats snapshot;
+ struct stmmac_napi_tx_stats napi_snapshot;
+ struct stmmac_q_tx_stats q_snapshot;
+ u64 n_irq;

j = pos;
do {
- start = u64_stats_fetch_begin(&txq_stats->syncp);
- snapshot = *txq_stats;
- } while (u64_stats_fetch_retry(&txq_stats->syncp, start));
-
- data[j++] += snapshot.tx_pkt_n;
- data[j++] += snapshot.tx_normal_irq_n;
- normal_irq_n += snapshot.tx_normal_irq_n;
- data[j++] += snapshot.tx_clean;
- data[j++] += snapshot.tx_set_ic_bit;
- data[j++] += snapshot.tx_tso_frames;
- data[j++] += snapshot.tx_tso_nfrags;
- napi_poll += snapshot.napi_poll;
+ start = u64_stats_fetch_begin(&txq_stats->q_syncp);
+ q_snapshot = txq_stats->q;
+ } while (u64_stats_fetch_retry(&txq_stats->q_syncp, start));
+ do {
+ start = u64_stats_fetch_begin(&txq_stats->napi_syncp);
+ napi_snapshot = txq_stats->napi;
+ } while (u64_stats_fetch_retry(&txq_stats->napi_syncp, start));
+
+ data[j++] += u64_stats_read(&napi_snapshot.tx_pkt_n);
+ n_irq = stmmac_get_tx_normal_irq_n(priv, i);
+ data[j++] += n_irq;
+ normal_irq_n += n_irq;
+ data[j++] += u64_stats_read(&napi_snapshot.tx_clean);
+ data[j++] += u64_stats_read(&q_snapshot.tx_set_ic_bit) +
+ u64_stats_read(&napi_snapshot.tx_set_ic_bit);
+ data[j++] += u64_stats_read(&q_snapshot.tx_tso_frames);
+ data[j++] += u64_stats_read(&q_snapshot.tx_tso_nfrags);
+ napi_poll += u64_stats_read(&napi_snapshot.poll);
}
normal_irq_n += priv->xstats.rx_early_irq;
data[j++] = normal_irq_n;
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
index 292857c0e601..f1614ad2daaa 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
@@ -2442,7 +2442,6 @@ static bool stmmac_xdp_xmit_zc(struct stmmac_priv *priv, u32 queue, u32 budget)
struct xdp_desc xdp_desc;
bool work_done = true;
u32 tx_set_ic_bit = 0;
- unsigned long flags;

/* Avoids TX time-out as we are sharing with slow path */
txq_trans_cond_update(nq);
@@ -2515,9 +2514,9 @@ static bool stmmac_xdp_xmit_zc(struct stmmac_priv *priv, u32 queue, u32 budget)
tx_q->cur_tx = STMMAC_GET_ENTRY(tx_q->cur_tx, priv->dma_conf.dma_tx_size);
entry = tx_q->cur_tx;
}
- flags = u64_stats_update_begin_irqsave(&txq_stats->syncp);
- txq_stats->tx_set_ic_bit += tx_set_ic_bit;
- u64_stats_update_end_irqrestore(&txq_stats->syncp, flags);
+ u64_stats_update_begin(&txq_stats->napi_syncp);
+ u64_stats_add(&txq_stats->napi.tx_set_ic_bit, tx_set_ic_bit);
+ u64_stats_update_end(&txq_stats->napi_syncp);

if (tx_desc) {
stmmac_flush_tx_descriptors(priv, queue);
@@ -2561,7 +2560,6 @@ static int stmmac_tx_clean(struct stmmac_priv *priv, int budget, u32 queue)
unsigned int bytes_compl = 0, pkts_compl = 0;
unsigned int entry, xmits = 0, count = 0;
u32 tx_packets = 0, tx_errors = 0;
- unsigned long flags;

__netif_tx_lock_bh(netdev_get_tx_queue(priv->dev, queue));

@@ -2717,11 +2715,11 @@ static int stmmac_tx_clean(struct stmmac_priv *priv, int budget, u32 queue)
if (tx_q->dirty_tx != tx_q->cur_tx)
stmmac_tx_timer_arm(priv, queue);

- flags = u64_stats_update_begin_irqsave(&txq_stats->syncp);
- txq_stats->tx_packets += tx_packets;
- txq_stats->tx_pkt_n += tx_packets;
- txq_stats->tx_clean++;
- u64_stats_update_end_irqrestore(&txq_stats->syncp, flags);
+ u64_stats_update_begin(&txq_stats->napi_syncp);
+ u64_stats_add(&txq_stats->napi.tx_packets, tx_packets);
+ u64_stats_add(&txq_stats->napi.tx_pkt_n, tx_packets);
+ u64_stats_inc(&txq_stats->napi.tx_clean);
+ u64_stats_update_end(&txq_stats->napi_syncp);

priv->xstats.tx_errors += tx_errors;

@@ -3853,6 +3851,9 @@ static int __stmmac_open(struct net_device *dev,
priv->rx_copybreak = STMMAC_RX_COPYBREAK;

buf_sz = dma_conf->dma_buf_sz;
+ for (int i = 0; i < MTL_MAX_TX_QUEUES; i++)
+ if (priv->dma_conf.tx_queue[i].tbs & STMMAC_TBS_EN)
+ dma_conf->tx_queue[i].tbs = priv->dma_conf.tx_queue[i].tbs;
memcpy(&priv->dma_conf, dma_conf, sizeof(*dma_conf));

stmmac_reset_queues_param(priv);
@@ -4131,7 +4132,6 @@ static netdev_tx_t stmmac_tso_xmit(struct sk_buff *skb, struct net_device *dev)
struct stmmac_tx_queue *tx_q;
bool has_vlan, set_ic;
u8 proto_hdr_len, hdr;
- unsigned long flags;
u32 pay_len, mss;
dma_addr_t des;
int i;
@@ -4296,13 +4296,13 @@ static netdev_tx_t stmmac_tso_xmit(struct sk_buff *skb, struct net_device *dev)
netif_tx_stop_queue(netdev_get_tx_queue(priv->dev, queue));
}

- flags = u64_stats_update_begin_irqsave(&txq_stats->syncp);
- txq_stats->tx_bytes += skb->len;
- txq_stats->tx_tso_frames++;
- txq_stats->tx_tso_nfrags += nfrags;
+ u64_stats_update_begin(&txq_stats->q_syncp);
+ u64_stats_add(&txq_stats->q.tx_bytes, skb->len);
+ u64_stats_inc(&txq_stats->q.tx_tso_frames);
+ u64_stats_add(&txq_stats->q.tx_tso_nfrags, nfrags);
if (set_ic)
- txq_stats->tx_set_ic_bit++;
- u64_stats_update_end_irqrestore(&txq_stats->syncp, flags);
+ u64_stats_inc(&txq_stats->q.tx_set_ic_bit);
+ u64_stats_update_end(&txq_stats->q_syncp);

if (priv->sarc_type)
stmmac_set_desc_sarc(priv, first, priv->sarc_type);
@@ -4401,7 +4401,6 @@ static netdev_tx_t stmmac_xmit(struct sk_buff *skb, struct net_device *dev)
struct stmmac_tx_queue *tx_q;
bool has_vlan, set_ic;
int entry, first_tx;
- unsigned long flags;
dma_addr_t des;

tx_q = &priv->dma_conf.tx_queue[queue];
@@ -4571,11 +4570,11 @@ static netdev_tx_t stmmac_xmit(struct sk_buff *skb, struct net_device *dev)
netif_tx_stop_queue(netdev_get_tx_queue(priv->dev, queue));
}

- flags = u64_stats_update_begin_irqsave(&txq_stats->syncp);
- txq_stats->tx_bytes += skb->len;
+ u64_stats_update_begin(&txq_stats->q_syncp);
+ u64_stats_add(&txq_stats->q.tx_bytes, skb->len);
if (set_ic)
- txq_stats->tx_set_ic_bit++;
- u64_stats_update_end_irqrestore(&txq_stats->syncp, flags);
+ u64_stats_inc(&txq_stats->q.tx_set_ic_bit);
+ u64_stats_update_end(&txq_stats->q_syncp);

if (priv->sarc_type)
stmmac_set_desc_sarc(priv, first, priv->sarc_type);
@@ -4839,12 +4838,11 @@ static int stmmac_xdp_xmit_xdpf(struct stmmac_priv *priv, int queue,
set_ic = false;

if (set_ic) {
- unsigned long flags;
tx_q->tx_count_frames = 0;
stmmac_set_tx_ic(priv, tx_desc);
- flags = u64_stats_update_begin_irqsave(&txq_stats->syncp);
- txq_stats->tx_set_ic_bit++;
- u64_stats_update_end_irqrestore(&txq_stats->syncp, flags);
+ u64_stats_update_begin(&txq_stats->q_syncp);
+ u64_stats_inc(&txq_stats->q.tx_set_ic_bit);
+ u64_stats_update_end(&txq_stats->q_syncp);
}

stmmac_enable_dma_transmission(priv, priv->ioaddr);
@@ -4994,7 +4992,6 @@ static void stmmac_dispatch_skb_zc(struct stmmac_priv *priv, u32 queue,
unsigned int len = xdp->data_end - xdp->data;
enum pkt_hash_types hash_type;
int coe = priv->hw->rx_csum;
- unsigned long flags;
struct sk_buff *skb;
u32 hash;

@@ -5019,10 +5016,10 @@ static void stmmac_dispatch_skb_zc(struct stmmac_priv *priv, u32 queue,
skb_record_rx_queue(skb, queue);
napi_gro_receive(&ch->rxtx_napi, skb);

- flags = u64_stats_update_begin_irqsave(&rxq_stats->syncp);
- rxq_stats->rx_pkt_n++;
- rxq_stats->rx_bytes += len;
- u64_stats_update_end_irqrestore(&rxq_stats->syncp, flags);
+ u64_stats_update_begin(&rxq_stats->napi_syncp);
+ u64_stats_inc(&rxq_stats->napi.rx_pkt_n);
+ u64_stats_add(&rxq_stats->napi.rx_bytes, len);
+ u64_stats_update_end(&rxq_stats->napi_syncp);
}

static bool stmmac_rx_refill_zc(struct stmmac_priv *priv, u32 queue, u32 budget)
@@ -5104,7 +5101,6 @@ static int stmmac_rx_zc(struct stmmac_priv *priv, int limit, u32 queue)
unsigned int desc_size;
struct bpf_prog *prog;
bool failure = false;
- unsigned long flags;
int xdp_status = 0;
int status = 0;

@@ -5259,9 +5255,9 @@ static int stmmac_rx_zc(struct stmmac_priv *priv, int limit, u32 queue)

stmmac_finalize_xdp_rx(priv, xdp_status);

- flags = u64_stats_update_begin_irqsave(&rxq_stats->syncp);
- rxq_stats->rx_pkt_n += count;
- u64_stats_update_end_irqrestore(&rxq_stats->syncp, flags);
+ u64_stats_update_begin(&rxq_stats->napi_syncp);
+ u64_stats_add(&rxq_stats->napi.rx_pkt_n, count);
+ u64_stats_update_end(&rxq_stats->napi_syncp);

priv->xstats.rx_dropped += rx_dropped;
priv->xstats.rx_errors += rx_errors;
@@ -5299,7 +5295,6 @@ static int stmmac_rx(struct stmmac_priv *priv, int limit, u32 queue)
unsigned int desc_size;
struct sk_buff *skb = NULL;
struct stmmac_xdp_buff ctx;
- unsigned long flags;
int xdp_status = 0;
int buf_sz;

@@ -5552,11 +5547,11 @@ static int stmmac_rx(struct stmmac_priv *priv, int limit, u32 queue)

stmmac_rx_refill(priv, queue);

- flags = u64_stats_update_begin_irqsave(&rxq_stats->syncp);
- rxq_stats->rx_packets += rx_packets;
- rxq_stats->rx_bytes += rx_bytes;
- rxq_stats->rx_pkt_n += count;
- u64_stats_update_end_irqrestore(&rxq_stats->syncp, flags);
+ u64_stats_update_begin(&rxq_stats->napi_syncp);
+ u64_stats_add(&rxq_stats->napi.rx_packets, rx_packets);
+ u64_stats_add(&rxq_stats->napi.rx_bytes, rx_bytes);
+ u64_stats_add(&rxq_stats->napi.rx_pkt_n, count);
+ u64_stats_update_end(&rxq_stats->napi_syncp);

priv->xstats.rx_dropped += rx_dropped;
priv->xstats.rx_errors += rx_errors;
@@ -5571,13 +5566,12 @@ static int stmmac_napi_poll_rx(struct napi_struct *napi, int budget)
struct stmmac_priv *priv = ch->priv_data;
struct stmmac_rxq_stats *rxq_stats;
u32 chan = ch->index;
- unsigned long flags;
int work_done;

rxq_stats = &priv->xstats.rxq_stats[chan];
- flags = u64_stats_update_begin_irqsave(&rxq_stats->syncp);
- rxq_stats->napi_poll++;
- u64_stats_update_end_irqrestore(&rxq_stats->syncp, flags);
+ u64_stats_update_begin(&rxq_stats->napi_syncp);
+ u64_stats_inc(&rxq_stats->napi.poll);
+ u64_stats_update_end(&rxq_stats->napi_syncp);

work_done = stmmac_rx(priv, budget, chan);
if (work_done < budget && napi_complete_done(napi, work_done)) {
@@ -5598,13 +5592,12 @@ static int stmmac_napi_poll_tx(struct napi_struct *napi, int budget)
struct stmmac_priv *priv = ch->priv_data;
struct stmmac_txq_stats *txq_stats;
u32 chan = ch->index;
- unsigned long flags;
int work_done;

txq_stats = &priv->xstats.txq_stats[chan];
- flags = u64_stats_update_begin_irqsave(&txq_stats->syncp);
- txq_stats->napi_poll++;
- u64_stats_update_end_irqrestore(&txq_stats->syncp, flags);
+ u64_stats_update_begin(&txq_stats->napi_syncp);
+ u64_stats_inc(&txq_stats->napi.poll);
+ u64_stats_update_end(&txq_stats->napi_syncp);

work_done = stmmac_tx_clean(priv, budget, chan);
work_done = min(work_done, budget);
@@ -5629,17 +5622,16 @@ static int stmmac_napi_poll_rxtx(struct napi_struct *napi, int budget)
struct stmmac_rxq_stats *rxq_stats;
struct stmmac_txq_stats *txq_stats;
u32 chan = ch->index;
- unsigned long flags;

rxq_stats = &priv->xstats.rxq_stats[chan];
- flags = u64_stats_update_begin_irqsave(&rxq_stats->syncp);
- rxq_stats->napi_poll++;
- u64_stats_update_end_irqrestore(&rxq_stats->syncp, flags);
+ u64_stats_update_begin(&rxq_stats->napi_syncp);
+ u64_stats_inc(&rxq_stats->napi.poll);
+ u64_stats_update_end(&rxq_stats->napi_syncp);

txq_stats = &priv->xstats.txq_stats[chan];
- flags = u64_stats_update_begin_irqsave(&txq_stats->syncp);
- txq_stats->napi_poll++;
- u64_stats_update_end_irqrestore(&txq_stats->syncp, flags);
+ u64_stats_update_begin(&txq_stats->napi_syncp);
+ u64_stats_inc(&txq_stats->napi.poll);
+ u64_stats_update_end(&txq_stats->napi_syncp);

tx_done = stmmac_tx_clean(priv, budget, chan);
tx_done = min(tx_done, budget);
@@ -6961,10 +6953,13 @@ static void stmmac_get_stats64(struct net_device *dev, struct rtnl_link_stats64
u64 tx_bytes;

do {
- start = u64_stats_fetch_begin(&txq_stats->syncp);
- tx_packets = txq_stats->tx_packets;
- tx_bytes = txq_stats->tx_bytes;
- } while (u64_stats_fetch_retry(&txq_stats->syncp, start));
+ start = u64_stats_fetch_begin(&txq_stats->q_syncp);
+ tx_bytes = u64_stats_read(&txq_stats->q.tx_bytes);
+ } while (u64_stats_fetch_retry(&txq_stats->q_syncp, start));
+ do {
+ start = u64_stats_fetch_begin(&txq_stats->napi_syncp);
+ tx_packets = u64_stats_read(&txq_stats->napi.tx_packets);
+ } while (u64_stats_fetch_retry(&txq_stats->napi_syncp, start));

stats->tx_packets += tx_packets;
stats->tx_bytes += tx_bytes;
@@ -6976,10 +6971,10 @@ static void stmmac_get_stats64(struct net_device *dev, struct rtnl_link_stats64
u64 rx_bytes;

do {
- start = u64_stats_fetch_begin(&rxq_stats->syncp);
- rx_packets = rxq_stats->rx_packets;
- rx_bytes = rxq_stats->rx_bytes;
- } while (u64_stats_fetch_retry(&rxq_stats->syncp, start));
+ start = u64_stats_fetch_begin(&rxq_stats->napi_syncp);
+ rx_packets = u64_stats_read(&rxq_stats->napi.rx_packets);
+ rx_bytes = u64_stats_read(&rxq_stats->napi.rx_bytes);
+ } while (u64_stats_fetch_retry(&rxq_stats->napi_syncp, start));

stats->rx_packets += rx_packets;
stats->rx_bytes += rx_bytes;
@@ -7373,9 +7368,16 @@ int stmmac_dvr_probe(struct device *device,
priv->dev = ndev;

for (i = 0; i < MTL_MAX_RX_QUEUES; i++)
- u64_stats_init(&priv->xstats.rxq_stats[i].syncp);
- for (i = 0; i < MTL_MAX_TX_QUEUES; i++)
- u64_stats_init(&priv->xstats.txq_stats[i].syncp);
+ u64_stats_init(&priv->xstats.rxq_stats[i].napi_syncp);
+ for (i = 0; i < MTL_MAX_TX_QUEUES; i++) {
+ u64_stats_init(&priv->xstats.txq_stats[i].q_syncp);
+ u64_stats_init(&priv->xstats.txq_stats[i].napi_syncp);
+ }
+
+ priv->xstats.pcpu_stats =
+ devm_netdev_alloc_pcpu_stats(device, struct stmmac_pcpu_stats);
+ if (!priv->xstats.pcpu_stats)
+ return -ENOMEM;

stmmac_set_ethtool_ops(ndev);
priv->pause = pause;
diff --git a/drivers/net/ethernet/ti/cpsw.c b/drivers/net/ethernet/ti/cpsw.c
index ca4d4548f85e..2ed165dcdbdc 100644
--- a/drivers/net/ethernet/ti/cpsw.c
+++ b/drivers/net/ethernet/ti/cpsw.c
@@ -631,6 +631,8 @@ static void cpsw_slave_open(struct cpsw_slave *slave, struct cpsw_priv *priv)
}
}

+ phy->mac_managed_pm = true;
+
slave->phy = phy;

phy_attached_info(slave->phy);
diff --git a/drivers/net/ethernet/ti/cpsw_new.c b/drivers/net/ethernet/ti/cpsw_new.c
index 0e4f526b1753..9061dca97fcb 100644
--- a/drivers/net/ethernet/ti/cpsw_new.c
+++ b/drivers/net/ethernet/ti/cpsw_new.c
@@ -773,6 +773,9 @@ static void cpsw_slave_open(struct cpsw_slave *slave, struct cpsw_priv *priv)
slave->slave_num);
return;
}
+
+ phy->mac_managed_pm = true;
+
slave->phy = phy;

phy_attached_info(slave->phy);
diff --git a/drivers/net/hyperv/netvsc.c b/drivers/net/hyperv/netvsc.c
index 82e9796c8f5e..4f9658a74102 100644
--- a/drivers/net/hyperv/netvsc.c
+++ b/drivers/net/hyperv/netvsc.c
@@ -708,7 +708,10 @@ void netvsc_device_remove(struct hv_device *device)
/* Disable NAPI and disassociate its context from the device. */
for (i = 0; i < net_device->num_chn; i++) {
/* See also vmbus_reset_channel_cb(). */
- napi_disable(&net_device->chan_table[i].napi);
+ /* only disable enabled NAPI channel */
+ if (i < ndev->real_num_rx_queues)
+ napi_disable(&net_device->chan_table[i].napi);
+
netif_napi_del(&net_device->chan_table[i].napi);
}

diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c
index cd15d7b380ab..9d2d66a4aafd 100644
--- a/drivers/net/hyperv/netvsc_drv.c
+++ b/drivers/net/hyperv/netvsc_drv.c
@@ -42,6 +42,10 @@
#define LINKCHANGE_INT (2 * HZ)
#define VF_TAKEOVER_INT (HZ / 10)

+/* Macros to define the context of vf registration */
+#define VF_REG_IN_PROBE 1
+#define VF_REG_IN_NOTIFIER 2
+
static unsigned int ring_size __ro_after_init = 128;
module_param(ring_size, uint, 0444);
MODULE_PARM_DESC(ring_size, "Ring buffer size (# of 4K pages)");
@@ -2183,7 +2187,7 @@ static rx_handler_result_t netvsc_vf_handle_frame(struct sk_buff **pskb)
}

static int netvsc_vf_join(struct net_device *vf_netdev,
- struct net_device *ndev)
+ struct net_device *ndev, int context)
{
struct net_device_context *ndev_ctx = netdev_priv(ndev);
int ret;
@@ -2206,7 +2210,11 @@ static int netvsc_vf_join(struct net_device *vf_netdev,
goto upper_link_failed;
}

- schedule_delayed_work(&ndev_ctx->vf_takeover, VF_TAKEOVER_INT);
+ /* If this registration is called from probe context vf_takeover
+ * is taken care of later in probe itself.
+ */
+ if (context == VF_REG_IN_NOTIFIER)
+ schedule_delayed_work(&ndev_ctx->vf_takeover, VF_TAKEOVER_INT);

call_netdevice_notifiers(NETDEV_JOIN, vf_netdev);

@@ -2344,7 +2352,7 @@ static int netvsc_prepare_bonding(struct net_device *vf_netdev)
return NOTIFY_DONE;
}

-static int netvsc_register_vf(struct net_device *vf_netdev)
+static int netvsc_register_vf(struct net_device *vf_netdev, int context)
{
struct net_device_context *net_device_ctx;
struct netvsc_device *netvsc_dev;
@@ -2384,7 +2392,7 @@ static int netvsc_register_vf(struct net_device *vf_netdev)

netdev_info(ndev, "VF registering: %s\n", vf_netdev->name);

- if (netvsc_vf_join(vf_netdev, ndev) != 0)
+ if (netvsc_vf_join(vf_netdev, ndev, context) != 0)
return NOTIFY_DONE;

dev_hold(vf_netdev);
@@ -2482,10 +2490,31 @@ static int netvsc_unregister_vf(struct net_device *vf_netdev)
return NOTIFY_OK;
}

+static int check_dev_is_matching_vf(struct net_device *event_ndev)
+{
+ /* Skip NetVSC interfaces */
+ if (event_ndev->netdev_ops == &device_ops)
+ return -ENODEV;
+
+ /* Avoid non-Ethernet type devices */
+ if (event_ndev->type != ARPHRD_ETHER)
+ return -ENODEV;
+
+ /* Avoid Vlan dev with same MAC registering as VF */
+ if (is_vlan_dev(event_ndev))
+ return -ENODEV;
+
+ /* Avoid Bonding master dev with same MAC registering as VF */
+ if (netif_is_bond_master(event_ndev))
+ return -ENODEV;
+
+ return 0;
+}
+
static int netvsc_probe(struct hv_device *dev,
const struct hv_vmbus_device_id *dev_id)
{
- struct net_device *net = NULL;
+ struct net_device *net = NULL, *vf_netdev;
struct net_device_context *net_device_ctx;
struct netvsc_device_info *device_info = NULL;
struct netvsc_device *nvdev;
@@ -2597,6 +2626,30 @@ static int netvsc_probe(struct hv_device *dev,
}

list_add(&net_device_ctx->list, &netvsc_dev_list);
+
+ /* When the hv_netvsc driver is unloaded and reloaded, the
+ * NET_DEVICE_REGISTER for the vf device is replayed before probe
+ * is complete. This is because register_netdevice_notifier() gets
+ * registered before vmbus_driver_register() so that callback func
+ * is set before probe and we don't miss events like NETDEV_POST_INIT
+ * So, in this section we try to register the matching vf device that
+ * is present as a netdevice, knowing that its register call is not
+ * processed in the netvsc_netdev_notifier(as probing is progress and
+ * get_netvsc_byslot fails).
+ */
+ for_each_netdev(dev_net(net), vf_netdev) {
+ ret = check_dev_is_matching_vf(vf_netdev);
+ if (ret != 0)
+ continue;
+
+ if (net != get_netvsc_byslot(vf_netdev))
+ continue;
+
+ netvsc_prepare_bonding(vf_netdev);
+ netvsc_register_vf(vf_netdev, VF_REG_IN_PROBE);
+ __netvsc_vf_setup(net, vf_netdev);
+ break;
+ }
rtnl_unlock();

netvsc_devinfo_put(device_info);
@@ -2752,28 +2805,17 @@ static int netvsc_netdev_event(struct notifier_block *this,
unsigned long event, void *ptr)
{
struct net_device *event_dev = netdev_notifier_info_to_dev(ptr);
+ int ret = 0;

- /* Skip our own events */
- if (event_dev->netdev_ops == &device_ops)
- return NOTIFY_DONE;
-
- /* Avoid non-Ethernet type devices */
- if (event_dev->type != ARPHRD_ETHER)
- return NOTIFY_DONE;
-
- /* Avoid Vlan dev with same MAC registering as VF */
- if (is_vlan_dev(event_dev))
- return NOTIFY_DONE;
-
- /* Avoid Bonding master dev with same MAC registering as VF */
- if (netif_is_bond_master(event_dev))
+ ret = check_dev_is_matching_vf(event_dev);
+ if (ret != 0)
return NOTIFY_DONE;

switch (event) {
case NETDEV_POST_INIT:
return netvsc_prepare_bonding(event_dev);
case NETDEV_REGISTER:
- return netvsc_register_vf(event_dev);
+ return netvsc_register_vf(event_dev, VF_REG_IN_NOTIFIER);
case NETDEV_UNREGISTER:
return netvsc_unregister_vf(event_dev);
case NETDEV_UP:
diff --git a/drivers/net/wireless/intel/iwlwifi/fw/acpi.c b/drivers/net/wireless/intel/iwlwifi/fw/acpi.c
index b26f90e52256..359397a61715 100644
--- a/drivers/net/wireless/intel/iwlwifi/fw/acpi.c
+++ b/drivers/net/wireless/intel/iwlwifi/fw/acpi.c
@@ -618,7 +618,7 @@ int iwl_sar_get_wrds_table(struct iwl_fw_runtime *fwrt)
&tbl_rev);
if (!IS_ERR(wifi_pkg)) {
if (tbl_rev != 2) {
- ret = PTR_ERR(wifi_pkg);
+ ret = -EINVAL;
goto out_free;
}

@@ -634,7 +634,7 @@ int iwl_sar_get_wrds_table(struct iwl_fw_runtime *fwrt)
&tbl_rev);
if (!IS_ERR(wifi_pkg)) {
if (tbl_rev != 1) {
- ret = PTR_ERR(wifi_pkg);
+ ret = -EINVAL;
goto out_free;
}

@@ -650,7 +650,7 @@ int iwl_sar_get_wrds_table(struct iwl_fw_runtime *fwrt)
&tbl_rev);
if (!IS_ERR(wifi_pkg)) {
if (tbl_rev != 0) {
- ret = PTR_ERR(wifi_pkg);
+ ret = -EINVAL;
goto out_free;
}

@@ -707,7 +707,7 @@ int iwl_sar_get_ewrd_table(struct iwl_fw_runtime *fwrt)
&tbl_rev);
if (!IS_ERR(wifi_pkg)) {
if (tbl_rev != 2) {
- ret = PTR_ERR(wifi_pkg);
+ ret = -EINVAL;
goto out_free;
}

@@ -723,7 +723,7 @@ int iwl_sar_get_ewrd_table(struct iwl_fw_runtime *fwrt)
&tbl_rev);
if (!IS_ERR(wifi_pkg)) {
if (tbl_rev != 1) {
- ret = PTR_ERR(wifi_pkg);
+ ret = -EINVAL;
goto out_free;
}

@@ -739,7 +739,7 @@ int iwl_sar_get_ewrd_table(struct iwl_fw_runtime *fwrt)
&tbl_rev);
if (!IS_ERR(wifi_pkg)) {
if (tbl_rev != 0) {
- ret = PTR_ERR(wifi_pkg);
+ ret = -EINVAL;
goto out_free;
}

@@ -1088,6 +1088,9 @@ int iwl_acpi_get_ppag_table(struct iwl_fw_runtime *fwrt)
goto read_table;
}

+ ret = PTR_ERR(wifi_pkg);
+ goto out_free;
+
read_table:
fwrt->ppag_ver = tbl_rev;
flags = &wifi_pkg->package.elements[1];
diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-drv.c b/drivers/net/wireless/intel/iwlwifi/iwl-drv.c
index fb5e254757e7..8faf4e7872bb 100644
--- a/drivers/net/wireless/intel/iwlwifi/iwl-drv.c
+++ b/drivers/net/wireless/intel/iwlwifi/iwl-drv.c
@@ -128,6 +128,7 @@ static void iwl_dealloc_ucode(struct iwl_drv *drv)
kfree(drv->fw.ucode_capa.cmd_versions);
kfree(drv->fw.phy_integration_ver);
kfree(drv->trans->dbg.pc_data);
+ drv->trans->dbg.pc_data = NULL;

for (i = 0; i < IWL_UCODE_TYPE_MAX; i++)
iwl_free_fw_img(drv, drv->fw.img + i);
diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c b/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c
index a25ea638229b..0aeca64725da 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c
@@ -3666,6 +3666,9 @@ iwl_mvm_sta_state_notexist_to_none(struct iwl_mvm *mvm,
NL80211_TDLS_SETUP);
}

+ if (ret)
+ return ret;
+
for_each_sta_active_link(vif, sta, link_sta, i)
link_sta->agg.max_rc_amsdu_len = 1;

diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c b/drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c
index 8d1e44fd9de7..82b4d4d01097 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c
@@ -503,6 +503,10 @@ static bool iwl_mvm_is_dup(struct ieee80211_sta *sta, int queue,
return false;

mvm_sta = iwl_mvm_sta_from_mac80211(sta);
+
+ if (WARN_ON_ONCE(!mvm_sta->dup_data))
+ return false;
+
dup_data = &mvm_sta->dup_data[queue];

/*
diff --git a/drivers/net/xen-netback/netback.c b/drivers/net/xen-netback/netback.c
index d7503aef599f..fab361a250d6 100644
--- a/drivers/net/xen-netback/netback.c
+++ b/drivers/net/xen-netback/netback.c
@@ -104,13 +104,12 @@ bool provides_xdp_headroom = true;
module_param(provides_xdp_headroom, bool, 0644);

static void xenvif_idx_release(struct xenvif_queue *queue, u16 pending_idx,
- u8 status);
+ s8 status);

static void make_tx_response(struct xenvif_queue *queue,
- struct xen_netif_tx_request *txp,
+ const struct xen_netif_tx_request *txp,
unsigned int extra_count,
- s8 st);
-static void push_tx_responses(struct xenvif_queue *queue);
+ s8 status);

static void xenvif_idx_unmap(struct xenvif_queue *queue, u16 pending_idx);

@@ -208,13 +207,9 @@ static void xenvif_tx_err(struct xenvif_queue *queue,
unsigned int extra_count, RING_IDX end)
{
RING_IDX cons = queue->tx.req_cons;
- unsigned long flags;

do {
- spin_lock_irqsave(&queue->response_lock, flags);
make_tx_response(queue, txp, extra_count, XEN_NETIF_RSP_ERROR);
- push_tx_responses(queue);
- spin_unlock_irqrestore(&queue->response_lock, flags);
if (cons == end)
break;
RING_COPY_REQUEST(&queue->tx, cons++, txp);
@@ -465,12 +460,7 @@ static void xenvif_get_requests(struct xenvif_queue *queue,
for (shinfo->nr_frags = 0; nr_slots > 0 && shinfo->nr_frags < MAX_SKB_FRAGS;
nr_slots--) {
if (unlikely(!txp->size)) {
- unsigned long flags;
-
- spin_lock_irqsave(&queue->response_lock, flags);
make_tx_response(queue, txp, 0, XEN_NETIF_RSP_OKAY);
- push_tx_responses(queue);
- spin_unlock_irqrestore(&queue->response_lock, flags);
++txp;
continue;
}
@@ -496,14 +486,8 @@ static void xenvif_get_requests(struct xenvif_queue *queue,

for (shinfo->nr_frags = 0; shinfo->nr_frags < nr_slots; ++txp) {
if (unlikely(!txp->size)) {
- unsigned long flags;
-
- spin_lock_irqsave(&queue->response_lock, flags);
make_tx_response(queue, txp, 0,
XEN_NETIF_RSP_OKAY);
- push_tx_responses(queue);
- spin_unlock_irqrestore(&queue->response_lock,
- flags);
continue;
}

@@ -995,7 +979,6 @@ static void xenvif_tx_build_gops(struct xenvif_queue *queue,
(ret == 0) ?
XEN_NETIF_RSP_OKAY :
XEN_NETIF_RSP_ERROR);
- push_tx_responses(queue);
continue;
}

@@ -1007,7 +990,6 @@ static void xenvif_tx_build_gops(struct xenvif_queue *queue,

make_tx_response(queue, &txreq, extra_count,
XEN_NETIF_RSP_OKAY);
- push_tx_responses(queue);
continue;
}

@@ -1433,8 +1415,35 @@ int xenvif_tx_action(struct xenvif_queue *queue, int budget)
return work_done;
}

+static void _make_tx_response(struct xenvif_queue *queue,
+ const struct xen_netif_tx_request *txp,
+ unsigned int extra_count,
+ s8 status)
+{
+ RING_IDX i = queue->tx.rsp_prod_pvt;
+ struct xen_netif_tx_response *resp;
+
+ resp = RING_GET_RESPONSE(&queue->tx, i);
+ resp->id = txp->id;
+ resp->status = status;
+
+ while (extra_count-- != 0)
+ RING_GET_RESPONSE(&queue->tx, ++i)->status = XEN_NETIF_RSP_NULL;
+
+ queue->tx.rsp_prod_pvt = ++i;
+}
+
+static void push_tx_responses(struct xenvif_queue *queue)
+{
+ int notify;
+
+ RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&queue->tx, notify);
+ if (notify)
+ notify_remote_via_irq(queue->tx_irq);
+}
+
static void xenvif_idx_release(struct xenvif_queue *queue, u16 pending_idx,
- u8 status)
+ s8 status)
{
struct pending_tx_info *pending_tx_info;
pending_ring_idx_t index;
@@ -1444,8 +1453,8 @@ static void xenvif_idx_release(struct xenvif_queue *queue, u16 pending_idx,

spin_lock_irqsave(&queue->response_lock, flags);

- make_tx_response(queue, &pending_tx_info->req,
- pending_tx_info->extra_count, status);
+ _make_tx_response(queue, &pending_tx_info->req,
+ pending_tx_info->extra_count, status);

/* Release the pending index before pusing the Tx response so
* its available before a new Tx request is pushed by the
@@ -1459,32 +1468,19 @@ static void xenvif_idx_release(struct xenvif_queue *queue, u16 pending_idx,
spin_unlock_irqrestore(&queue->response_lock, flags);
}

-
static void make_tx_response(struct xenvif_queue *queue,
- struct xen_netif_tx_request *txp,
+ const struct xen_netif_tx_request *txp,
unsigned int extra_count,
- s8 st)
+ s8 status)
{
- RING_IDX i = queue->tx.rsp_prod_pvt;
- struct xen_netif_tx_response *resp;
-
- resp = RING_GET_RESPONSE(&queue->tx, i);
- resp->id = txp->id;
- resp->status = st;
-
- while (extra_count-- != 0)
- RING_GET_RESPONSE(&queue->tx, ++i)->status = XEN_NETIF_RSP_NULL;
+ unsigned long flags;

- queue->tx.rsp_prod_pvt = ++i;
-}
+ spin_lock_irqsave(&queue->response_lock, flags);

-static void push_tx_responses(struct xenvif_queue *queue)
-{
- int notify;
+ _make_tx_response(queue, txp, extra_count, status);
+ push_tx_responses(queue);

- RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&queue->tx, notify);
- if (notify)
- notify_remote_via_irq(queue->tx_irq);
+ spin_unlock_irqrestore(&queue->response_lock, flags);
}

static void xenvif_idx_unmap(struct xenvif_queue *queue, u16 pending_idx)
diff --git a/drivers/of/property.c b/drivers/of/property.c
index cf8dacf3e3b8..e1a2cb5ef401 100644
--- a/drivers/of/property.c
+++ b/drivers/of/property.c
@@ -762,7 +762,9 @@ struct device_node *of_graph_get_port_parent(struct device_node *node)
/* Walk 3 levels up only if there is 'ports' node. */
for (depth = 3; depth && node; depth--) {
node = of_get_next_parent(node);
- if (depth == 2 && !of_node_name_eq(node, "ports"))
+ if (depth == 2 && !of_node_name_eq(node, "ports") &&
+ !of_node_name_eq(node, "in-ports") &&
+ !of_node_name_eq(node, "out-ports"))
break;
}
return node;
@@ -1062,36 +1064,6 @@ of_fwnode_device_get_match_data(const struct fwnode_handle *fwnode,
return of_device_get_match_data(dev);
}

-static struct device_node *of_get_compat_node(struct device_node *np)
-{
- of_node_get(np);
-
- while (np) {
- if (!of_device_is_available(np)) {
- of_node_put(np);
- np = NULL;
- }
-
- if (of_property_present(np, "compatible"))
- break;
-
- np = of_get_next_parent(np);
- }
-
- return np;
-}
-
-static struct device_node *of_get_compat_node_parent(struct device_node *np)
-{
- struct device_node *parent, *node;
-
- parent = of_get_parent(np);
- node = of_get_compat_node(parent);
- of_node_put(parent);
-
- return node;
-}
-
static void of_link_to_phandle(struct device_node *con_np,
struct device_node *sup_np)
{
@@ -1221,10 +1193,10 @@ static struct device_node *parse_##fname(struct device_node *np, \
* @parse_prop.prop_name: Name of property holding a phandle value
* @parse_prop.index: For properties holding a list of phandles, this is the
* index into the list
+ * @get_con_dev: If the consumer node containing the property is never converted
+ * to a struct device, implement this ops so fw_devlink can use it
+ * to find the true consumer.
* @optional: Describes whether a supplier is mandatory or not
- * @node_not_dev: The consumer node containing the property is never converted
- * to a struct device. Instead, parse ancestor nodes for the
- * compatible property to find a node corresponding to a device.
*
* Returns:
* parse_prop() return values are
@@ -1235,15 +1207,15 @@ static struct device_node *parse_##fname(struct device_node *np, \
struct supplier_bindings {
struct device_node *(*parse_prop)(struct device_node *np,
const char *prop_name, int index);
+ struct device_node *(*get_con_dev)(struct device_node *np);
bool optional;
- bool node_not_dev;
};

DEFINE_SIMPLE_PROP(clocks, "clocks", "#clock-cells")
DEFINE_SIMPLE_PROP(interconnects, "interconnects", "#interconnect-cells")
DEFINE_SIMPLE_PROP(iommus, "iommus", "#iommu-cells")
DEFINE_SIMPLE_PROP(mboxes, "mboxes", "#mbox-cells")
-DEFINE_SIMPLE_PROP(io_channels, "io-channel", "#io-channel-cells")
+DEFINE_SIMPLE_PROP(io_channels, "io-channels", "#io-channel-cells")
DEFINE_SIMPLE_PROP(interrupt_parent, "interrupt-parent", NULL)
DEFINE_SIMPLE_PROP(dmas, "dmas", "#dma-cells")
DEFINE_SIMPLE_PROP(power_domains, "power-domains", "#power-domain-cells")
@@ -1261,7 +1233,6 @@ DEFINE_SIMPLE_PROP(pinctrl5, "pinctrl-5", NULL)
DEFINE_SIMPLE_PROP(pinctrl6, "pinctrl-6", NULL)
DEFINE_SIMPLE_PROP(pinctrl7, "pinctrl-7", NULL)
DEFINE_SIMPLE_PROP(pinctrl8, "pinctrl-8", NULL)
-DEFINE_SIMPLE_PROP(remote_endpoint, "remote-endpoint", NULL)
DEFINE_SIMPLE_PROP(pwms, "pwms", "#pwm-cells")
DEFINE_SIMPLE_PROP(resets, "resets", "#reset-cells")
DEFINE_SIMPLE_PROP(leds, "leds", NULL)
@@ -1326,6 +1297,17 @@ static struct device_node *parse_interrupts(struct device_node *np,
return of_irq_parse_one(np, index, &sup_args) ? NULL : sup_args.np;
}

+static struct device_node *parse_remote_endpoint(struct device_node *np,
+ const char *prop_name,
+ int index)
+{
+ /* Return NULL for index > 0 to signify end of remote-endpoints. */
+ if (!index || strcmp(prop_name, "remote-endpoint"))
+ return NULL;
+
+ return of_graph_get_remote_port_parent(np);
+}
+
static const struct supplier_bindings of_supplier_bindings[] = {
{ .parse_prop = parse_clocks, },
{ .parse_prop = parse_interconnects, },
@@ -1350,7 +1332,10 @@ static const struct supplier_bindings of_supplier_bindings[] = {
{ .parse_prop = parse_pinctrl6, },
{ .parse_prop = parse_pinctrl7, },
{ .parse_prop = parse_pinctrl8, },
- { .parse_prop = parse_remote_endpoint, .node_not_dev = true, },
+ {
+ .parse_prop = parse_remote_endpoint,
+ .get_con_dev = of_graph_get_port_parent,
+ },
{ .parse_prop = parse_pwms, },
{ .parse_prop = parse_resets, },
{ .parse_prop = parse_leds, },
@@ -1400,8 +1385,8 @@ static int of_link_property(struct device_node *con_np, const char *prop_name)
while ((phandle = s->parse_prop(con_np, prop_name, i))) {
struct device_node *con_dev_np;

- con_dev_np = s->node_not_dev
- ? of_get_compat_node_parent(con_np)
+ con_dev_np = s->get_con_dev
+ ? s->get_con_dev(con_np)
: of_node_get(con_np);
matched = true;
i++;
diff --git a/drivers/of/unittest.c b/drivers/of/unittest.c
index f278def7ef03..4f58345b5c68 100644
--- a/drivers/of/unittest.c
+++ b/drivers/of/unittest.c
@@ -50,6 +50,12 @@ static struct unittest_results {
failed; \
})

+#ifdef CONFIG_OF_KOBJ
+#define OF_KREF_READ(NODE) kref_read(&(NODE)->kobj.kref)
+#else
+#define OF_KREF_READ(NODE) 1
+#endif
+
/*
* Expected message may have a message level other than KERN_INFO.
* Print the expected message only if the current loglevel will allow
@@ -570,7 +576,7 @@ static void __init of_unittest_parse_phandle_with_args_map(void)
pr_err("missing testcase data\n");
return;
}
- prefs[i] = kref_read(&p[i]->kobj.kref);
+ prefs[i] = OF_KREF_READ(p[i]);
}

rc = of_count_phandle_with_args(np, "phandle-list", "#phandle-cells");
@@ -693,9 +699,9 @@ static void __init of_unittest_parse_phandle_with_args_map(void)
unittest(rc == -EINVAL, "expected:%i got:%i\n", -EINVAL, rc);

for (i = 0; i < ARRAY_SIZE(p); ++i) {
- unittest(prefs[i] == kref_read(&p[i]->kobj.kref),
+ unittest(prefs[i] == OF_KREF_READ(p[i]),
"provider%d: expected:%d got:%d\n",
- i, prefs[i], kref_read(&p[i]->kobj.kref));
+ i, prefs[i], OF_KREF_READ(p[i]));
of_node_put(p[i]);
}
}
diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c
index 59d6cb1a3a9d..06fc6f532d6c 100644
--- a/drivers/pci/pci.c
+++ b/drivers/pci/pci.c
@@ -2434,29 +2434,36 @@ static void pci_pme_list_scan(struct work_struct *work)
if (pdev->pme_poll) {
struct pci_dev *bridge = pdev->bus->self;
struct device *dev = &pdev->dev;
- int pm_status;
+ struct device *bdev = bridge ? &bridge->dev : NULL;
+ int bref = 0;

/*
- * If bridge is in low power state, the
- * configuration space of subordinate devices
- * may be not accessible
+ * If we have a bridge, it should be in an active/D0
+ * state or the configuration space of subordinate
+ * devices may not be accessible or stable over the
+ * course of the call.
*/
- if (bridge && bridge->current_state != PCI_D0)
- continue;
+ if (bdev) {
+ bref = pm_runtime_get_if_active(bdev, true);
+ if (!bref)
+ continue;
+
+ if (bridge->current_state != PCI_D0)
+ goto put_bridge;
+ }

/*
- * If the device is in a low power state it
- * should not be polled either.
+ * The device itself should be suspended but config
+ * space must be accessible, therefore it cannot be in
+ * D3cold.
*/
- pm_status = pm_runtime_get_if_active(dev, true);
- if (!pm_status)
- continue;
-
- if (pdev->current_state != PCI_D3cold)
+ if (pm_runtime_suspended(dev) &&
+ pdev->current_state != PCI_D3cold)
pci_pme_wakeup(pdev, NULL);

- if (pm_status > 0)
- pm_runtime_put(dev);
+put_bridge:
+ if (bref > 0)
+ pm_runtime_put(bdev);
} else {
list_del(&pme_dev->list);
kfree(pme_dev);
diff --git a/drivers/perf/cxl_pmu.c b/drivers/perf/cxl_pmu.c
index 365d964b0f6a..bc0d414a6aff 100644
--- a/drivers/perf/cxl_pmu.c
+++ b/drivers/perf/cxl_pmu.c
@@ -419,7 +419,7 @@ static struct attribute *cxl_pmu_event_attrs[] = {
CXL_PMU_EVENT_CXL_ATTR(s2m_ndr_cmp, CXL_PMU_GID_S2M_NDR, BIT(0)),
CXL_PMU_EVENT_CXL_ATTR(s2m_ndr_cmps, CXL_PMU_GID_S2M_NDR, BIT(1)),
CXL_PMU_EVENT_CXL_ATTR(s2m_ndr_cmpe, CXL_PMU_GID_S2M_NDR, BIT(2)),
- CXL_PMU_EVENT_CXL_ATTR(s2m_ndr_biconflictack, CXL_PMU_GID_S2M_NDR, BIT(3)),
+ CXL_PMU_EVENT_CXL_ATTR(s2m_ndr_biconflictack, CXL_PMU_GID_S2M_NDR, BIT(4)),
/* CXL rev 3.0 Table 3-46 S2M DRS opcodes */
CXL_PMU_EVENT_CXL_ATTR(s2m_drs_memdata, CXL_PMU_GID_S2M_DRS, BIT(0)),
CXL_PMU_EVENT_CXL_ATTR(s2m_drs_memdatanxm, CXL_PMU_GID_S2M_DRS, BIT(1)),
diff --git a/drivers/pmdomain/mediatek/mtk-pm-domains.c b/drivers/pmdomain/mediatek/mtk-pm-domains.c
index ee962804b830..edded392950c 100644
--- a/drivers/pmdomain/mediatek/mtk-pm-domains.c
+++ b/drivers/pmdomain/mediatek/mtk-pm-domains.c
@@ -508,6 +508,11 @@ static int scpsys_add_subdomain(struct scpsys *scpsys, struct device_node *paren
goto err_put_node;
}

+ /* recursive call to add all subdomains */
+ ret = scpsys_add_subdomain(scpsys, child);
+ if (ret)
+ goto err_put_node;
+
ret = pm_genpd_add_subdomain(parent_pd, child_pd);
if (ret) {
dev_err(scpsys->dev, "failed to add %s subdomain to parent %s\n",
@@ -517,11 +522,6 @@ static int scpsys_add_subdomain(struct scpsys *scpsys, struct device_node *paren
dev_dbg(scpsys->dev, "%s add subdomain: %s\n", parent_pd->name,
child_pd->name);
}
-
- /* recursive call to add all subdomains */
- ret = scpsys_add_subdomain(scpsys, child);
- if (ret)
- goto err_put_node;
}

return 0;
@@ -535,9 +535,6 @@ static void scpsys_remove_one_domain(struct scpsys_domain *pd)
{
int ret;

- if (scpsys_domain_is_on(pd))
- scpsys_power_off(&pd->genpd);
-
/*
* We're in the error cleanup already, so we only complain,
* but won't emit another error on top of the original one.
@@ -547,6 +544,8 @@ static void scpsys_remove_one_domain(struct scpsys_domain *pd)
dev_err(pd->scpsys->dev,
"failed to remove domain '%s' : %d - state may be inconsistent\n",
pd->genpd.name, ret);
+ if (scpsys_domain_is_on(pd))
+ scpsys_power_off(&pd->genpd);

clk_bulk_put(pd->num_clks, pd->clks);
clk_bulk_put(pd->num_subsys_clks, pd->subsys_clks);
diff --git a/drivers/pmdomain/renesas/r8a77980-sysc.c b/drivers/pmdomain/renesas/r8a77980-sysc.c
index 39ca84a67daa..621e411fc999 100644
--- a/drivers/pmdomain/renesas/r8a77980-sysc.c
+++ b/drivers/pmdomain/renesas/r8a77980-sysc.c
@@ -25,7 +25,8 @@ static const struct rcar_sysc_area r8a77980_areas[] __initconst = {
PD_CPU_NOCR },
{ "ca53-cpu3", 0x200, 3, R8A77980_PD_CA53_CPU3, R8A77980_PD_CA53_SCU,
PD_CPU_NOCR },
- { "cr7", 0x240, 0, R8A77980_PD_CR7, R8A77980_PD_ALWAYS_ON },
+ { "cr7", 0x240, 0, R8A77980_PD_CR7, R8A77980_PD_ALWAYS_ON,
+ PD_CPU_NOCR },
{ "a3ir", 0x180, 0, R8A77980_PD_A3IR, R8A77980_PD_ALWAYS_ON },
{ "a2ir0", 0x400, 0, R8A77980_PD_A2IR0, R8A77980_PD_A3IR },
{ "a2ir1", 0x400, 1, R8A77980_PD_A2IR1, R8A77980_PD_A3IR },
diff --git a/drivers/s390/net/qeth_l3_main.c b/drivers/s390/net/qeth_l3_main.c
index b92a32b4b114..04c64ce0a1ca 100644
--- a/drivers/s390/net/qeth_l3_main.c
+++ b/drivers/s390/net/qeth_l3_main.c
@@ -255,9 +255,10 @@ static void qeth_l3_clear_ip_htable(struct qeth_card *card, int recover)
if (!recover) {
hash_del(&addr->hnode);
kfree(addr);
- continue;
+ } else {
+ /* prepare for recovery */
+ addr->disp_flag = QETH_DISP_ADDR_ADD;
}
- addr->disp_flag = QETH_DISP_ADDR_ADD;
}

mutex_unlock(&card->ip_lock);
@@ -278,9 +279,11 @@ static void qeth_l3_recover_ip(struct qeth_card *card)
if (addr->disp_flag == QETH_DISP_ADDR_ADD) {
rc = qeth_l3_register_addr_entry(card, addr);

- if (!rc) {
+ if (!rc || rc == -EADDRINUSE || rc == -ENETDOWN) {
+ /* keep it in the records */
addr->disp_flag = QETH_DISP_ADDR_DO_NOTHING;
} else {
+ /* bad address */
hash_del(&addr->hnode);
kfree(addr);
}
diff --git a/drivers/scsi/fcoe/fcoe_ctlr.c b/drivers/scsi/fcoe/fcoe_ctlr.c
index 19eee108db02..5c8d1ba3f8f3 100644
--- a/drivers/scsi/fcoe/fcoe_ctlr.c
+++ b/drivers/scsi/fcoe/fcoe_ctlr.c
@@ -319,17 +319,16 @@ static void fcoe_ctlr_announce(struct fcoe_ctlr *fip)
{
struct fcoe_fcf *sel;
struct fcoe_fcf *fcf;
- unsigned long flags;

mutex_lock(&fip->ctlr_mutex);
- spin_lock_irqsave(&fip->ctlr_lock, flags);
+ spin_lock_bh(&fip->ctlr_lock);

kfree_skb(fip->flogi_req);
fip->flogi_req = NULL;
list_for_each_entry(fcf, &fip->fcfs, list)
fcf->flogi_sent = 0;

- spin_unlock_irqrestore(&fip->ctlr_lock, flags);
+ spin_unlock_bh(&fip->ctlr_lock);
sel = fip->sel_fcf;

if (sel && ether_addr_equal(sel->fcf_mac, fip->dest_addr))
@@ -700,7 +699,6 @@ int fcoe_ctlr_els_send(struct fcoe_ctlr *fip, struct fc_lport *lport,
{
struct fc_frame *fp;
struct fc_frame_header *fh;
- unsigned long flags;
u16 old_xid;
u8 op;
u8 mac[ETH_ALEN];
@@ -734,11 +732,11 @@ int fcoe_ctlr_els_send(struct fcoe_ctlr *fip, struct fc_lport *lport,
op = FIP_DT_FLOGI;
if (fip->mode == FIP_MODE_VN2VN)
break;
- spin_lock_irqsave(&fip->ctlr_lock, flags);
+ spin_lock_bh(&fip->ctlr_lock);
kfree_skb(fip->flogi_req);
fip->flogi_req = skb;
fip->flogi_req_send = 1;
- spin_unlock_irqrestore(&fip->ctlr_lock, flags);
+ spin_unlock_bh(&fip->ctlr_lock);
schedule_work(&fip->timer_work);
return -EINPROGRESS;
case ELS_FDISC:
@@ -1707,11 +1705,10 @@ static int fcoe_ctlr_flogi_send_locked(struct fcoe_ctlr *fip)
static int fcoe_ctlr_flogi_retry(struct fcoe_ctlr *fip)
{
struct fcoe_fcf *fcf;
- unsigned long flags;
int error;

mutex_lock(&fip->ctlr_mutex);
- spin_lock_irqsave(&fip->ctlr_lock, flags);
+ spin_lock_bh(&fip->ctlr_lock);
LIBFCOE_FIP_DBG(fip, "re-sending FLOGI - reselect\n");
fcf = fcoe_ctlr_select(fip);
if (!fcf || fcf->flogi_sent) {
@@ -1722,7 +1719,7 @@ static int fcoe_ctlr_flogi_retry(struct fcoe_ctlr *fip)
fcoe_ctlr_solicit(fip, NULL);
error = fcoe_ctlr_flogi_send_locked(fip);
}
- spin_unlock_irqrestore(&fip->ctlr_lock, flags);
+ spin_unlock_bh(&fip->ctlr_lock);
mutex_unlock(&fip->ctlr_mutex);
return error;
}
@@ -1739,9 +1736,8 @@ static int fcoe_ctlr_flogi_retry(struct fcoe_ctlr *fip)
static void fcoe_ctlr_flogi_send(struct fcoe_ctlr *fip)
{
struct fcoe_fcf *fcf;
- unsigned long flags;

- spin_lock_irqsave(&fip->ctlr_lock, flags);
+ spin_lock_bh(&fip->ctlr_lock);
fcf = fip->sel_fcf;
if (!fcf || !fip->flogi_req_send)
goto unlock;
@@ -1768,7 +1764,7 @@ static void fcoe_ctlr_flogi_send(struct fcoe_ctlr *fip)
} else /* XXX */
LIBFCOE_FIP_DBG(fip, "No FCF selected - defer send\n");
unlock:
- spin_unlock_irqrestore(&fip->ctlr_lock, flags);
+ spin_unlock_bh(&fip->ctlr_lock);
}

/**
diff --git a/drivers/scsi/storvsc_drv.c b/drivers/scsi/storvsc_drv.c
index a95936b18f69..7ceb982040a5 100644
--- a/drivers/scsi/storvsc_drv.c
+++ b/drivers/scsi/storvsc_drv.c
@@ -330,6 +330,7 @@ enum storvsc_request_type {
*/

static int storvsc_ringbuffer_size = (128 * 1024);
+static int aligned_ringbuffer_size;
static u32 max_outstanding_req_per_channel;
static int storvsc_change_queue_depth(struct scsi_device *sdev, int queue_depth);

@@ -687,8 +688,8 @@ static void handle_sc_creation(struct vmbus_channel *new_sc)
new_sc->next_request_id_callback = storvsc_next_request_id;

ret = vmbus_open(new_sc,
- storvsc_ringbuffer_size,
- storvsc_ringbuffer_size,
+ aligned_ringbuffer_size,
+ aligned_ringbuffer_size,
(void *)&props,
sizeof(struct vmstorage_channel_properties),
storvsc_on_channel_callback, new_sc);
@@ -1973,7 +1974,7 @@ static int storvsc_probe(struct hv_device *device,
dma_set_min_align_mask(&device->device, HV_HYP_PAGE_SIZE - 1);

stor_device->port_number = host->host_no;
- ret = storvsc_connect_to_vsp(device, storvsc_ringbuffer_size, is_fc);
+ ret = storvsc_connect_to_vsp(device, aligned_ringbuffer_size, is_fc);
if (ret)
goto err_out1;

@@ -2164,7 +2165,7 @@ static int storvsc_resume(struct hv_device *hv_dev)
{
int ret;

- ret = storvsc_connect_to_vsp(hv_dev, storvsc_ringbuffer_size,
+ ret = storvsc_connect_to_vsp(hv_dev, aligned_ringbuffer_size,
hv_dev_is_fc(hv_dev));
return ret;
}
@@ -2198,8 +2199,9 @@ static int __init storvsc_drv_init(void)
* the ring buffer indices) by the max request size (which is
* vmbus_channel_packet_multipage_buffer + struct vstor_packet + u64)
*/
+ aligned_ringbuffer_size = VMBUS_RING_SIZE(storvsc_ringbuffer_size);
max_outstanding_req_per_channel =
- ((storvsc_ringbuffer_size - PAGE_SIZE) /
+ ((aligned_ringbuffer_size - PAGE_SIZE) /
ALIGN(MAX_MULTIPAGE_BUFFER_PACKET +
sizeof(struct vstor_packet) + sizeof(u64),
sizeof(u64)));
diff --git a/drivers/spi/spi-imx.c b/drivers/spi/spi-imx.c
index 272bc871a848..e2d3e3ec1378 100644
--- a/drivers/spi/spi-imx.c
+++ b/drivers/spi/spi-imx.c
@@ -2,6 +2,7 @@
// Copyright 2004-2007 Freescale Semiconductor, Inc. All Rights Reserved.
// Copyright (C) 2008 Juergen Beisert

+#include <linux/bits.h>
#include <linux/clk.h>
#include <linux/completion.h>
#include <linux/delay.h>
@@ -660,15 +661,15 @@ static int mx51_ecspi_prepare_transfer(struct spi_imx_data *spi_imx,
<< MX51_ECSPI_CTRL_BL_OFFSET;
else {
if (spi_imx->usedma) {
- ctrl |= (spi_imx->bits_per_word *
- spi_imx_bytes_per_word(spi_imx->bits_per_word) - 1)
+ ctrl |= (spi_imx->bits_per_word - 1)
<< MX51_ECSPI_CTRL_BL_OFFSET;
} else {
if (spi_imx->count >= MX51_ECSPI_CTRL_MAX_BURST)
- ctrl |= (MX51_ECSPI_CTRL_MAX_BURST - 1)
+ ctrl |= (MX51_ECSPI_CTRL_MAX_BURST * BITS_PER_BYTE - 1)
<< MX51_ECSPI_CTRL_BL_OFFSET;
else
- ctrl |= (spi_imx->count * spi_imx->bits_per_word - 1)
+ ctrl |= spi_imx->count / DIV_ROUND_UP(spi_imx->bits_per_word,
+ BITS_PER_BYTE) * spi_imx->bits_per_word
<< MX51_ECSPI_CTRL_BL_OFFSET;
}
}
diff --git a/drivers/spi/spi-ppc4xx.c b/drivers/spi/spi-ppc4xx.c
index 03aab661be9d..e982d3189fdc 100644
--- a/drivers/spi/spi-ppc4xx.c
+++ b/drivers/spi/spi-ppc4xx.c
@@ -166,10 +166,8 @@ static int spi_ppc4xx_setupxfer(struct spi_device *spi, struct spi_transfer *t)
int scr;
u8 cdm = 0;
u32 speed;
- u8 bits_per_word;

/* Start with the generic configuration for this device. */
- bits_per_word = spi->bits_per_word;
speed = spi->max_speed_hz;

/*
@@ -177,9 +175,6 @@ static int spi_ppc4xx_setupxfer(struct spi_device *spi, struct spi_transfer *t)
* the transfer to overwrite the generic configuration with zeros.
*/
if (t) {
- if (t->bits_per_word)
- bits_per_word = t->bits_per_word;
-
if (t->speed_hz)
speed = min(t->speed_hz, spi->max_speed_hz);
}
diff --git a/drivers/staging/iio/impedance-analyzer/ad5933.c b/drivers/staging/iio/impedance-analyzer/ad5933.c
index 46db6d91542a..2d0883a64082 100644
--- a/drivers/staging/iio/impedance-analyzer/ad5933.c
+++ b/drivers/staging/iio/impedance-analyzer/ad5933.c
@@ -608,7 +608,7 @@ static void ad5933_work(struct work_struct *work)
struct ad5933_state, work.work);
struct iio_dev *indio_dev = i2c_get_clientdata(st->client);
__be16 buf[2];
- int val[2];
+ u16 val[2];
unsigned char status;
int ret;

diff --git a/drivers/thunderbolt/tb_regs.h b/drivers/thunderbolt/tb_regs.h
index cf9f2370878a..580277dc9115 100644
--- a/drivers/thunderbolt/tb_regs.h
+++ b/drivers/thunderbolt/tb_regs.h
@@ -203,7 +203,7 @@ struct tb_regs_switch_header {
#define ROUTER_CS_5_WOP BIT(1)
#define ROUTER_CS_5_WOU BIT(2)
#define ROUTER_CS_5_WOD BIT(3)
-#define ROUTER_CS_5_C3S BIT(23)
+#define ROUTER_CS_5_CNS BIT(23)
#define ROUTER_CS_5_PTO BIT(24)
#define ROUTER_CS_5_UTO BIT(25)
#define ROUTER_CS_5_HCO BIT(26)
diff --git a/drivers/thunderbolt/usb4.c b/drivers/thunderbolt/usb4.c
index 05ddb224c464..13c779e23011 100644
--- a/drivers/thunderbolt/usb4.c
+++ b/drivers/thunderbolt/usb4.c
@@ -290,7 +290,7 @@ int usb4_switch_setup(struct tb_switch *sw)
}

/* TBT3 supported by the CM */
- val |= ROUTER_CS_5_C3S;
+ val &= ~ROUTER_CS_5_CNS;

return tb_sw_write(sw, &val, TB_CFG_SWITCH, ROUTER_CS_5, 1);
}
diff --git a/drivers/tty/serial/max310x.c b/drivers/tty/serial/max310x.c
index db3204d2a305..3865c7f6dba8 100644
--- a/drivers/tty/serial/max310x.c
+++ b/drivers/tty/serial/max310x.c
@@ -237,6 +237,14 @@
#define MAX310x_REV_MASK (0xf8)
#define MAX310X_WRITE_BIT 0x80

+/* Port startup definitions */
+#define MAX310X_PORT_STARTUP_WAIT_RETRIES 20 /* Number of retries */
+#define MAX310X_PORT_STARTUP_WAIT_DELAY_MS 10 /* Delay between retries */
+
+/* Crystal-related definitions */
+#define MAX310X_XTAL_WAIT_RETRIES 20 /* Number of retries */
+#define MAX310X_XTAL_WAIT_DELAY_MS 10 /* Delay between retries */
+
/* MAX3107 specific */
#define MAX3107_REV_ID (0xa0)

@@ -583,7 +591,7 @@ static int max310x_update_best_err(unsigned long f, long *besterr)
return 1;
}

-static u32 max310x_set_ref_clk(struct device *dev, struct max310x_port *s,
+static s32 max310x_set_ref_clk(struct device *dev, struct max310x_port *s,
unsigned long freq, bool xtal)
{
unsigned int div, clksrc, pllcfg = 0;
@@ -641,12 +649,20 @@ static u32 max310x_set_ref_clk(struct device *dev, struct max310x_port *s,

/* Wait for crystal */
if (xtal) {
- unsigned int val;
- msleep(10);
- regmap_read(s->regmap, MAX310X_STS_IRQSTS_REG, &val);
- if (!(val & MAX310X_STS_CLKREADY_BIT)) {
- dev_warn(dev, "clock is not stable yet\n");
- }
+ bool stable = false;
+ unsigned int try = 0, val = 0;
+
+ do {
+ msleep(MAX310X_XTAL_WAIT_DELAY_MS);
+ regmap_read(s->regmap, MAX310X_STS_IRQSTS_REG, &val);
+
+ if (val & MAX310X_STS_CLKREADY_BIT)
+ stable = true;
+ } while (!stable && (++try < MAX310X_XTAL_WAIT_RETRIES));
+
+ if (!stable)
+ return dev_err_probe(dev, -EAGAIN,
+ "clock is not stable\n");
}

return bestfreq;
@@ -1271,7 +1287,7 @@ static int max310x_probe(struct device *dev, const struct max310x_devtype *devty
{
int i, ret, fmin, fmax, freq;
struct max310x_port *s;
- u32 uartclk = 0;
+ s32 uartclk = 0;
bool xtal;

for (i = 0; i < devtype->nr; i++)
@@ -1334,6 +1350,9 @@ static int max310x_probe(struct device *dev, const struct max310x_devtype *devty
goto out_clk;

for (i = 0; i < devtype->nr; i++) {
+ bool started = false;
+ unsigned int try = 0, val = 0;
+
/* Reset port */
regmap_write(regmaps[i], MAX310X_MODE2_REG,
MAX310X_MODE2_RST_BIT);
@@ -1342,13 +1361,27 @@ static int max310x_probe(struct device *dev, const struct max310x_devtype *devty

/* Wait for port startup */
do {
- regmap_read(regmaps[i], MAX310X_BRGDIVLSB_REG, &ret);
- } while (ret != 0x01);
+ msleep(MAX310X_PORT_STARTUP_WAIT_DELAY_MS);
+ regmap_read(regmaps[i], MAX310X_BRGDIVLSB_REG, &val);
+
+ if (val == 0x01)
+ started = true;
+ } while (!started && (++try < MAX310X_PORT_STARTUP_WAIT_RETRIES));
+
+ if (!started) {
+ ret = dev_err_probe(dev, -EAGAIN, "port reset failed\n");
+ goto out_uart;
+ }

regmap_write(regmaps[i], MAX310X_MODE1_REG, devtype->mode1);
}

uartclk = max310x_set_ref_clk(dev, s, freq, xtal);
+ if (uartclk < 0) {
+ ret = uartclk;
+ goto out_uart;
+ }
+
dev_dbg(dev, "Reference clock set to %i Hz\n", uartclk);

for (i = 0; i < devtype->nr; i++) {
diff --git a/drivers/tty/serial/mxs-auart.c b/drivers/tty/serial/mxs-auart.c
index 8eeecf8ad359..380a8b0590e3 100644
--- a/drivers/tty/serial/mxs-auart.c
+++ b/drivers/tty/serial/mxs-auart.c
@@ -605,13 +605,16 @@ static void mxs_auart_tx_chars(struct mxs_auart_port *s)
return;
}

- pending = uart_port_tx(&s->port, ch,
+ pending = uart_port_tx_flags(&s->port, ch, UART_TX_NOSTOP,
!(mxs_read(s, REG_STAT) & AUART_STAT_TXFF),
mxs_write(ch, s, REG_DATA));
if (pending)
mxs_set(AUART_INTR_TXIEN, s, REG_INTR);
else
mxs_clr(AUART_INTR_TXIEN, s, REG_INTR);
+
+ if (uart_tx_stopped(&s->port))
+ mxs_auart_stop_tx(&s->port);
}

static void mxs_auart_rx_char(struct mxs_auart_port *s)
diff --git a/drivers/usb/chipidea/ci.h b/drivers/usb/chipidea/ci.h
index d9bb3d3f026e..2a38e1eb6546 100644
--- a/drivers/usb/chipidea/ci.h
+++ b/drivers/usb/chipidea/ci.h
@@ -176,6 +176,7 @@ struct hw_bank {
* @enabled_otg_timer_bits: bits of enabled otg timers
* @next_otg_timer: next nearest enabled timer to be expired
* @work: work for role changing
+ * @power_lost_work: work for power lost handling
* @wq: workqueue thread
* @qh_pool: allocation pool for queue heads
* @td_pool: allocation pool for transfer descriptors
@@ -226,6 +227,7 @@ struct ci_hdrc {
enum otg_fsm_timer next_otg_timer;
struct usb_role_switch *role_switch;
struct work_struct work;
+ struct work_struct power_lost_work;
struct workqueue_struct *wq;

struct dma_pool *qh_pool;
diff --git a/drivers/usb/chipidea/core.c b/drivers/usb/chipidea/core.c
index 85e9c3ab66e9..ca71df4f32e4 100644
--- a/drivers/usb/chipidea/core.c
+++ b/drivers/usb/chipidea/core.c
@@ -856,6 +856,27 @@ static int ci_extcon_register(struct ci_hdrc *ci)
return 0;
}

+static void ci_power_lost_work(struct work_struct *work)
+{
+ struct ci_hdrc *ci = container_of(work, struct ci_hdrc, power_lost_work);
+ enum ci_role role;
+
+ disable_irq_nosync(ci->irq);
+ pm_runtime_get_sync(ci->dev);
+ if (!ci_otg_is_fsm_mode(ci)) {
+ role = ci_get_role(ci);
+
+ if (ci->role != role) {
+ ci_handle_id_switch(ci);
+ } else if (role == CI_ROLE_GADGET) {
+ if (ci->is_otg && hw_read_otgsc(ci, OTGSC_BSV))
+ usb_gadget_vbus_connect(&ci->gadget);
+ }
+ }
+ pm_runtime_put_sync(ci->dev);
+ enable_irq(ci->irq);
+}
+
static DEFINE_IDA(ci_ida);

struct platform_device *ci_hdrc_add_device(struct device *dev,
@@ -1045,6 +1066,8 @@ static int ci_hdrc_probe(struct platform_device *pdev)

spin_lock_init(&ci->lock);
mutex_init(&ci->mutex);
+ INIT_WORK(&ci->power_lost_work, ci_power_lost_work);
+
ci->dev = dev;
ci->platdata = dev_get_platdata(dev);
ci->imx28_write_fix = !!(ci->platdata->flags &
@@ -1396,25 +1419,6 @@ static int ci_suspend(struct device *dev)
return 0;
}

-static void ci_handle_power_lost(struct ci_hdrc *ci)
-{
- enum ci_role role;
-
- disable_irq_nosync(ci->irq);
- if (!ci_otg_is_fsm_mode(ci)) {
- role = ci_get_role(ci);
-
- if (ci->role != role) {
- ci_handle_id_switch(ci);
- } else if (role == CI_ROLE_GADGET) {
- if (ci->is_otg && hw_read_otgsc(ci, OTGSC_BSV))
- usb_gadget_vbus_connect(&ci->gadget);
- }
- }
-
- enable_irq(ci->irq);
-}
-
static int ci_resume(struct device *dev)
{
struct ci_hdrc *ci = dev_get_drvdata(dev);
@@ -1446,7 +1450,7 @@ static int ci_resume(struct device *dev)
ci_role(ci)->resume(ci, power_lost);

if (power_lost)
- ci_handle_power_lost(ci);
+ queue_work(system_freezable_wq, &ci->power_lost_work);

if (ci->supports_runtime_pm) {
pm_runtime_disable(dev);
diff --git a/drivers/usb/common/ulpi.c b/drivers/usb/common/ulpi.c
index 84d91b1c1eed..0886b19d2e1c 100644
--- a/drivers/usb/common/ulpi.c
+++ b/drivers/usb/common/ulpi.c
@@ -301,7 +301,7 @@ static int ulpi_register(struct device *dev, struct ulpi *ulpi)
return ret;
}

- root = debugfs_create_dir(dev_name(dev), ulpi_root);
+ root = debugfs_create_dir(dev_name(&ulpi->dev), ulpi_root);
debugfs_create_file("regs", 0444, root, ulpi, &ulpi_regs_fops);

dev_dbg(&ulpi->dev, "registered ULPI PHY: vendor %04x, product %04x\n",
diff --git a/drivers/usb/core/hub.c b/drivers/usb/core/hub.c
index bd27741327df..71635dfa741d 100644
--- a/drivers/usb/core/hub.c
+++ b/drivers/usb/core/hub.c
@@ -2047,9 +2047,19 @@ static void update_port_device_state(struct usb_device *udev)

if (udev->parent) {
hub = usb_hub_to_struct_hub(udev->parent);
- port_dev = hub->ports[udev->portnum - 1];
- WRITE_ONCE(port_dev->state, udev->state);
- sysfs_notify_dirent(port_dev->state_kn);
+
+ /*
+ * The Link Layer Validation System Driver (lvstest)
+ * has a test step to unbind the hub before running the
+ * rest of the procedure. This triggers hub_disconnect
+ * which will set the hub's maxchild to 0, further
+ * resulting in usb_hub_to_struct_hub returning NULL.
+ */
+ if (hub) {
+ port_dev = hub->ports[udev->portnum - 1];
+ WRITE_ONCE(port_dev->state, udev->state);
+ sysfs_notify_dirent(port_dev->state_kn);
+ }
}
}

@@ -2380,17 +2390,25 @@ static int usb_enumerate_device_otg(struct usb_device *udev)
}
} else if (desc->bLength == sizeof
(struct usb_otg_descriptor)) {
- /* Set a_alt_hnp_support for legacy otg device */
- err = usb_control_msg(udev,
- usb_sndctrlpipe(udev, 0),
- USB_REQ_SET_FEATURE, 0,
- USB_DEVICE_A_ALT_HNP_SUPPORT,
- 0, NULL, 0,
- USB_CTRL_SET_TIMEOUT);
- if (err < 0)
- dev_err(&udev->dev,
- "set a_alt_hnp_support failed: %d\n",
- err);
+ /*
+ * We are operating on a legacy OTP device
+ * These should be told that they are operating
+ * on the wrong port if we have another port that does
+ * support HNP
+ */
+ if (bus->otg_port != 0) {
+ /* Set a_alt_hnp_support for legacy otg device */
+ err = usb_control_msg(udev,
+ usb_sndctrlpipe(udev, 0),
+ USB_REQ_SET_FEATURE, 0,
+ USB_DEVICE_A_ALT_HNP_SUPPORT,
+ 0, NULL, 0,
+ USB_CTRL_SET_TIMEOUT);
+ if (err < 0)
+ dev_err(&udev->dev,
+ "set a_alt_hnp_support failed: %d\n",
+ err);
+ }
}
}
#endif
diff --git a/drivers/usb/dwc3/gadget.c b/drivers/usb/dwc3/gadget.c
index 89de363ecf8b..4c8dd6724678 100644
--- a/drivers/usb/dwc3/gadget.c
+++ b/drivers/usb/dwc3/gadget.c
@@ -4703,15 +4703,13 @@ int dwc3_gadget_suspend(struct dwc3 *dwc)
unsigned long flags;
int ret;

- if (!dwc->gadget_driver)
- return 0;
-
ret = dwc3_gadget_soft_disconnect(dwc);
if (ret)
goto err;

spin_lock_irqsave(&dwc->lock, flags);
- dwc3_disconnect_gadget(dwc);
+ if (dwc->gadget_driver)
+ dwc3_disconnect_gadget(dwc);
spin_unlock_irqrestore(&dwc->lock, flags);

return 0;
diff --git a/drivers/usb/gadget/function/f_mass_storage.c b/drivers/usb/gadget/function/f_mass_storage.c
index 722a3ab2b337..c265a1f62fc1 100644
--- a/drivers/usb/gadget/function/f_mass_storage.c
+++ b/drivers/usb/gadget/function/f_mass_storage.c
@@ -545,21 +545,37 @@ static int start_transfer(struct fsg_dev *fsg, struct usb_ep *ep,

static bool start_in_transfer(struct fsg_common *common, struct fsg_buffhd *bh)
{
+ int rc;
+
if (!fsg_is_set(common))
return false;
bh->state = BUF_STATE_SENDING;
- if (start_transfer(common->fsg, common->fsg->bulk_in, bh->inreq))
+ rc = start_transfer(common->fsg, common->fsg->bulk_in, bh->inreq);
+ if (rc) {
bh->state = BUF_STATE_EMPTY;
+ if (rc == -ESHUTDOWN) {
+ common->running = 0;
+ return false;
+ }
+ }
return true;
}

static bool start_out_transfer(struct fsg_common *common, struct fsg_buffhd *bh)
{
+ int rc;
+
if (!fsg_is_set(common))
return false;
bh->state = BUF_STATE_RECEIVING;
- if (start_transfer(common->fsg, common->fsg->bulk_out, bh->outreq))
+ rc = start_transfer(common->fsg, common->fsg->bulk_out, bh->outreq);
+ if (rc) {
bh->state = BUF_STATE_FULL;
+ if (rc == -ESHUTDOWN) {
+ common->running = 0;
+ return false;
+ }
+ }
return true;
}

diff --git a/drivers/usb/typec/tcpm/tcpm.c b/drivers/usb/typec/tcpm/tcpm.c
index 47ae2d520fda..6d455ca76125 100644
--- a/drivers/usb/typec/tcpm/tcpm.c
+++ b/drivers/usb/typec/tcpm/tcpm.c
@@ -4862,7 +4862,8 @@ static void run_state_machine(struct tcpm_port *port)
break;
case PORT_RESET:
tcpm_reset_port(port);
- tcpm_set_cc(port, TYPEC_CC_OPEN);
+ tcpm_set_cc(port, tcpm_default_state(port) == SNK_UNATTACHED ?
+ TYPEC_CC_RD : tcpm_rp_cc(port));
tcpm_set_state(port, PORT_RESET_WAIT_OFF,
PD_T_ERROR_RECOVERY);
break;
diff --git a/drivers/usb/typec/ucsi/ucsi.c b/drivers/usb/typec/ucsi/ucsi.c
index 61b64558f96c..8f9dff993b3d 100644
--- a/drivers/usb/typec/ucsi/ucsi.c
+++ b/drivers/usb/typec/ucsi/ucsi.c
@@ -935,7 +935,9 @@ static void ucsi_handle_connector_change(struct work_struct *work)

clear_bit(EVENT_PENDING, &con->ucsi->flags);

+ mutex_lock(&ucsi->ppm_lock);
ret = ucsi_acknowledge_connector_change(ucsi);
+ mutex_unlock(&ucsi->ppm_lock);
if (ret)
dev_err(ucsi->dev, "%s: ACK failed (%d)", __func__, ret);

diff --git a/drivers/usb/typec/ucsi/ucsi_acpi.c b/drivers/usb/typec/ucsi/ucsi_acpi.c
index 6bbf490ac401..fa222080887d 100644
--- a/drivers/usb/typec/ucsi/ucsi_acpi.c
+++ b/drivers/usb/typec/ucsi/ucsi_acpi.c
@@ -73,9 +73,13 @@ static int ucsi_acpi_sync_write(struct ucsi *ucsi, unsigned int offset,
const void *val, size_t val_len)
{
struct ucsi_acpi *ua = ucsi_get_drvdata(ucsi);
+ bool ack = UCSI_COMMAND(*(u64 *)val) == UCSI_ACK_CC_CI;
int ret;

- set_bit(COMMAND_PENDING, &ua->flags);
+ if (ack)
+ set_bit(ACK_PENDING, &ua->flags);
+ else
+ set_bit(COMMAND_PENDING, &ua->flags);

ret = ucsi_acpi_async_write(ucsi, offset, val, val_len);
if (ret)
@@ -85,7 +89,10 @@ static int ucsi_acpi_sync_write(struct ucsi *ucsi, unsigned int offset,
ret = -ETIMEDOUT;

out_clear_bit:
- clear_bit(COMMAND_PENDING, &ua->flags);
+ if (ack)
+ clear_bit(ACK_PENDING, &ua->flags);
+ else
+ clear_bit(COMMAND_PENDING, &ua->flags);

return ret;
}
@@ -142,8 +149,10 @@ static void ucsi_acpi_notify(acpi_handle handle, u32 event, void *data)
if (UCSI_CCI_CONNECTOR(cci))
ucsi_connector_change(ua->ucsi, UCSI_CCI_CONNECTOR(cci));

- if (test_bit(COMMAND_PENDING, &ua->flags) &&
- cci & (UCSI_CCI_ACK_COMPLETE | UCSI_CCI_COMMAND_COMPLETE))
+ if (cci & UCSI_CCI_ACK_COMPLETE && test_bit(ACK_PENDING, &ua->flags))
+ complete(&ua->complete);
+ if (cci & UCSI_CCI_COMMAND_COMPLETE &&
+ test_bit(COMMAND_PENDING, &ua->flags))
complete(&ua->complete);
}

diff --git a/fs/btrfs/block-group.c b/fs/btrfs/block-group.c
index 5a97db988810..3afd435dc2c7 100644
--- a/fs/btrfs/block-group.c
+++ b/fs/btrfs/block-group.c
@@ -1467,6 +1467,7 @@ static bool clean_pinned_extents(struct btrfs_trans_handle *trans,
*/
void btrfs_delete_unused_bgs(struct btrfs_fs_info *fs_info)
{
+ LIST_HEAD(retry_list);
struct btrfs_block_group *block_group;
struct btrfs_space_info *space_info;
struct btrfs_trans_handle *trans;
@@ -1488,6 +1489,7 @@ void btrfs_delete_unused_bgs(struct btrfs_fs_info *fs_info)

spin_lock(&fs_info->unused_bgs_lock);
while (!list_empty(&fs_info->unused_bgs)) {
+ u64 used;
int trimming;

block_group = list_first_entry(&fs_info->unused_bgs,
@@ -1523,9 +1525,9 @@ void btrfs_delete_unused_bgs(struct btrfs_fs_info *fs_info)
goto next;
}

+ spin_lock(&space_info->lock);
spin_lock(&block_group->lock);
- if (block_group->reserved || block_group->pinned ||
- block_group->used || block_group->ro ||
+ if (btrfs_is_block_group_used(block_group) || block_group->ro ||
list_is_singular(&block_group->list)) {
/*
* We want to bail if we made new allocations or have
@@ -1535,10 +1537,49 @@ void btrfs_delete_unused_bgs(struct btrfs_fs_info *fs_info)
*/
trace_btrfs_skip_unused_block_group(block_group);
spin_unlock(&block_group->lock);
+ spin_unlock(&space_info->lock);
+ up_write(&space_info->groups_sem);
+ goto next;
+ }
+
+ /*
+ * The block group may be unused but there may be space reserved
+ * accounting with the existence of that block group, that is,
+ * space_info->bytes_may_use was incremented by a task but no
+ * space was yet allocated from the block group by the task.
+ * That space may or may not be allocated, as we are generally
+ * pessimistic about space reservation for metadata as well as
+ * for data when using compression (as we reserve space based on
+ * the worst case, when data can't be compressed, and before
+ * actually attempting compression, before starting writeback).
+ *
+ * So check if the total space of the space_info minus the size
+ * of this block group is less than the used space of the
+ * space_info - if that's the case, then it means we have tasks
+ * that might be relying on the block group in order to allocate
+ * extents, and add back the block group to the unused list when
+ * we finish, so that we retry later in case no tasks ended up
+ * needing to allocate extents from the block group.
+ */
+ used = btrfs_space_info_used(space_info, true);
+ if (space_info->total_bytes - block_group->length < used) {
+ /*
+ * Add a reference for the list, compensate for the ref
+ * drop under the "next" label for the
+ * fs_info->unused_bgs list.
+ */
+ btrfs_get_block_group(block_group);
+ list_add_tail(&block_group->bg_list, &retry_list);
+
+ trace_btrfs_skip_unused_block_group(block_group);
+ spin_unlock(&block_group->lock);
+ spin_unlock(&space_info->lock);
up_write(&space_info->groups_sem);
goto next;
}
+
spin_unlock(&block_group->lock);
+ spin_unlock(&space_info->lock);

/* We don't want to force the issue, only flip if it's ok. */
ret = inc_block_group_ro(block_group, 0);
@@ -1662,12 +1703,16 @@ void btrfs_delete_unused_bgs(struct btrfs_fs_info *fs_info)
btrfs_put_block_group(block_group);
spin_lock(&fs_info->unused_bgs_lock);
}
+ list_splice_tail(&retry_list, &fs_info->unused_bgs);
spin_unlock(&fs_info->unused_bgs_lock);
mutex_unlock(&fs_info->reclaim_bgs_lock);
return;

flip_async:
btrfs_end_transaction(trans);
+ spin_lock(&fs_info->unused_bgs_lock);
+ list_splice_tail(&retry_list, &fs_info->unused_bgs);
+ spin_unlock(&fs_info->unused_bgs_lock);
mutex_unlock(&fs_info->reclaim_bgs_lock);
btrfs_put_block_group(block_group);
btrfs_discard_punt_unused_bgs_list(fs_info);
diff --git a/fs/btrfs/block-group.h b/fs/btrfs/block-group.h
index 2bdbcb834f95..089979981e4a 100644
--- a/fs/btrfs/block-group.h
+++ b/fs/btrfs/block-group.h
@@ -255,6 +255,13 @@ static inline u64 btrfs_block_group_end(struct btrfs_block_group *block_group)
return (block_group->start + block_group->length);
}

+static inline bool btrfs_is_block_group_used(const struct btrfs_block_group *bg)
+{
+ lockdep_assert_held(&bg->lock);
+
+ return (bg->used > 0 || bg->reserved > 0 || bg->pinned > 0);
+}
+
static inline bool btrfs_is_block_group_data_only(
struct btrfs_block_group *block_group)
{
diff --git a/fs/btrfs/delalloc-space.c b/fs/btrfs/delalloc-space.c
index eef341bbcc60..4a7aefa5f9cf 100644
--- a/fs/btrfs/delalloc-space.c
+++ b/fs/btrfs/delalloc-space.c
@@ -245,7 +245,6 @@ static void btrfs_calculate_inode_block_rsv_size(struct btrfs_fs_info *fs_info,
struct btrfs_block_rsv *block_rsv = &inode->block_rsv;
u64 reserve_size = 0;
u64 qgroup_rsv_size = 0;
- u64 csum_leaves;
unsigned outstanding_extents;

lockdep_assert_held(&inode->lock);
@@ -260,10 +259,12 @@ static void btrfs_calculate_inode_block_rsv_size(struct btrfs_fs_info *fs_info,
outstanding_extents);
reserve_size += btrfs_calc_metadata_size(fs_info, 1);
}
- csum_leaves = btrfs_csum_bytes_to_leaves(fs_info,
- inode->csum_bytes);
- reserve_size += btrfs_calc_insert_metadata_size(fs_info,
- csum_leaves);
+ if (!(inode->flags & BTRFS_INODE_NODATASUM)) {
+ u64 csum_leaves;
+
+ csum_leaves = btrfs_csum_bytes_to_leaves(fs_info, inode->csum_bytes);
+ reserve_size += btrfs_calc_insert_metadata_size(fs_info, csum_leaves);
+ }
/*
* For qgroup rsv, the calculation is very simple:
* account one nodesize for each outstanding extent
@@ -278,14 +279,20 @@ static void btrfs_calculate_inode_block_rsv_size(struct btrfs_fs_info *fs_info,
spin_unlock(&block_rsv->lock);
}

-static void calc_inode_reservations(struct btrfs_fs_info *fs_info,
+static void calc_inode_reservations(struct btrfs_inode *inode,
u64 num_bytes, u64 disk_num_bytes,
u64 *meta_reserve, u64 *qgroup_reserve)
{
+ struct btrfs_fs_info *fs_info = inode->root->fs_info;
u64 nr_extents = count_max_extents(fs_info, num_bytes);
- u64 csum_leaves = btrfs_csum_bytes_to_leaves(fs_info, disk_num_bytes);
+ u64 csum_leaves;
u64 inode_update = btrfs_calc_metadata_size(fs_info, 1);

+ if (inode->flags & BTRFS_INODE_NODATASUM)
+ csum_leaves = 0;
+ else
+ csum_leaves = btrfs_csum_bytes_to_leaves(fs_info, disk_num_bytes);
+
*meta_reserve = btrfs_calc_insert_metadata_size(fs_info,
nr_extents + csum_leaves);

@@ -337,7 +344,7 @@ int btrfs_delalloc_reserve_metadata(struct btrfs_inode *inode, u64 num_bytes,
* everything out and try again, which is bad. This way we just
* over-reserve slightly, and clean up the mess when we are done.
*/
- calc_inode_reservations(fs_info, num_bytes, disk_num_bytes,
+ calc_inode_reservations(inode, num_bytes, disk_num_bytes,
&meta_reserve, &qgroup_reserve);
ret = btrfs_qgroup_reserve_meta_prealloc(root, qgroup_reserve, true,
noflush);
@@ -358,7 +365,8 @@ int btrfs_delalloc_reserve_metadata(struct btrfs_inode *inode, u64 num_bytes,
nr_extents = count_max_extents(fs_info, num_bytes);
spin_lock(&inode->lock);
btrfs_mod_outstanding_extents(inode, nr_extents);
- inode->csum_bytes += disk_num_bytes;
+ if (!(inode->flags & BTRFS_INODE_NODATASUM))
+ inode->csum_bytes += disk_num_bytes;
btrfs_calculate_inode_block_rsv_size(fs_info, inode);
spin_unlock(&inode->lock);

@@ -392,7 +400,8 @@ void btrfs_delalloc_release_metadata(struct btrfs_inode *inode, u64 num_bytes,

num_bytes = ALIGN(num_bytes, fs_info->sectorsize);
spin_lock(&inode->lock);
- inode->csum_bytes -= num_bytes;
+ if (!(inode->flags & BTRFS_INODE_NODATASUM))
+ inode->csum_bytes -= num_bytes;
btrfs_calculate_inode_block_rsv_size(fs_info, inode);
spin_unlock(&inode->lock);

diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index b79781df7071..ffb9ae303f2a 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -1311,8 +1311,17 @@ static struct btrfs_root *btrfs_get_root_ref(struct btrfs_fs_info *fs_info,
again:
root = btrfs_lookup_fs_root(fs_info, objectid);
if (root) {
- /* Shouldn't get preallocated anon_dev for cached roots */
- ASSERT(!anon_dev);
+ /*
+ * Some other caller may have read out the newly inserted
+ * subvolume already (for things like backref walk etc). Not
+ * that common but still possible. In that case, we just need
+ * to free the anon_dev.
+ */
+ if (unlikely(anon_dev)) {
+ free_anon_bdev(anon_dev);
+ anon_dev = 0;
+ }
+
if (check_ref && btrfs_root_refs(&root->root_item) == 0) {
btrfs_put_root(root);
return ERR_PTR(-ENOENT);
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index f250e2083c7e..ca79c2b8adc4 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -3168,8 +3168,23 @@ int btrfs_finish_one_ordered(struct btrfs_ordered_extent *ordered_extent)
unwritten_start += logical_len;
clear_extent_uptodate(io_tree, unwritten_start, end, NULL);

- /* Drop extent maps for the part of the extent we didn't write. */
- btrfs_drop_extent_map_range(inode, unwritten_start, end, false);
+ /*
+ * Drop extent maps for the part of the extent we didn't write.
+ *
+ * We have an exception here for the free_space_inode, this is
+ * because when we do btrfs_get_extent() on the free space inode
+ * we will search the commit root. If this is a new block group
+ * we won't find anything, and we will trip over the assert in
+ * writepage where we do ASSERT(em->block_start !=
+ * EXTENT_MAP_HOLE).
+ *
+ * Theoretically we could also skip this for any NOCOW extent as
+ * we don't mess with the extent map tree in the NOCOW case, but
+ * for now simply skip this if we are the free space inode.
+ */
+ if (!btrfs_is_free_space_inode(inode))
+ btrfs_drop_extent_map_range(inode, unwritten_start,
+ end, false);

/*
* If the ordered extent had an IOERR or something else went
@@ -10223,6 +10238,13 @@ ssize_t btrfs_do_encoded_write(struct kiocb *iocb, struct iov_iter *from,
if (encoded->encryption != BTRFS_ENCODED_IO_ENCRYPTION_NONE)
return -EINVAL;

+ /*
+ * Compressed extents should always have checksums, so error out if we
+ * have a NOCOW file or inode was created while mounted with NODATASUM.
+ */
+ if (inode->flags & BTRFS_INODE_NODATASUM)
+ return -EINVAL;
+
orig_count = iov_iter_count(from);

/* The extent size must be sane. */
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index 908215928d6a..a26a909a5ad1 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -3810,6 +3810,11 @@ static long btrfs_ioctl_qgroup_create(struct file *file, void __user *arg)
goto out;
}

+ if (sa->create && is_fstree(sa->qgroupid)) {
+ ret = -EINVAL;
+ goto out;
+ }
+
trans = btrfs_join_transaction(root);
if (IS_ERR(trans)) {
ret = PTR_ERR(trans);
diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c
index a006f5160e6b..9acdd0f91a5a 100644
--- a/fs/btrfs/qgroup.c
+++ b/fs/btrfs/qgroup.c
@@ -1659,6 +1659,15 @@ int btrfs_create_qgroup(struct btrfs_trans_handle *trans, u64 qgroupid)
return ret;
}

+static bool qgroup_has_usage(struct btrfs_qgroup *qgroup)
+{
+ return (qgroup->rfer > 0 || qgroup->rfer_cmpr > 0 ||
+ qgroup->excl > 0 || qgroup->excl_cmpr > 0 ||
+ qgroup->rsv.values[BTRFS_QGROUP_RSV_DATA] > 0 ||
+ qgroup->rsv.values[BTRFS_QGROUP_RSV_META_PREALLOC] > 0 ||
+ qgroup->rsv.values[BTRFS_QGROUP_RSV_META_PERTRANS] > 0);
+}
+
int btrfs_remove_qgroup(struct btrfs_trans_handle *trans, u64 qgroupid)
{
struct btrfs_fs_info *fs_info = trans->fs_info;
@@ -1678,6 +1687,11 @@ int btrfs_remove_qgroup(struct btrfs_trans_handle *trans, u64 qgroupid)
goto out;
}

+ if (is_fstree(qgroupid) && qgroup_has_usage(qgroup)) {
+ ret = -EBUSY;
+ goto out;
+ }
+
/* Check if there are no children of this qgroup */
if (!list_empty(&qgroup->members)) {
ret = -EBUSY;
diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c
index db94eefda27e..994c0be8055c 100644
--- a/fs/btrfs/send.c
+++ b/fs/btrfs/send.c
@@ -8111,7 +8111,7 @@ long btrfs_ioctl_send(struct inode *inode, struct btrfs_ioctl_send_args *arg)
}

if (arg->flags & ~BTRFS_SEND_FLAG_MASK) {
- ret = -EINVAL;
+ ret = -EOPNOTSUPP;
goto out;
}

diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c
index 7f7e5f5d643c..fc9f8f1a9036 100644
--- a/fs/ceph/caps.c
+++ b/fs/ceph/caps.c
@@ -1415,7 +1415,7 @@ static void __prep_cap(struct cap_msg_args *arg, struct ceph_cap *cap,
if (flushing & CEPH_CAP_XATTR_EXCL) {
arg->old_xattr_buf = __ceph_build_xattrs_blob(ci);
arg->xattr_version = ci->i_xattrs.version;
- arg->xattr_buf = ci->i_xattrs.blob;
+ arg->xattr_buf = ceph_buffer_get(ci->i_xattrs.blob);
} else {
arg->xattr_buf = NULL;
arg->old_xattr_buf = NULL;
@@ -1513,6 +1513,7 @@ static void __send_cap(struct cap_msg_args *arg, struct ceph_inode_info *ci)
encode_cap_msg(msg, arg);
ceph_con_send(&arg->session->s_con, msg);
ceph_buffer_put(arg->old_xattr_buf);
+ ceph_buffer_put(arg->xattr_buf);
if (arg->wake)
wake_up_all(&ci->i_cap_wq);
}
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index 529ca47da035..7a2d42a84807 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -1909,11 +1909,6 @@ static void mb_free_blocks(struct inode *inode, struct ext4_buddy *e4b,
mb_check_buddy(e4b);
mb_free_blocks_double(inode, e4b, first, count);

- this_cpu_inc(discard_pa_seq);
- e4b->bd_info->bb_free += count;
- if (first < e4b->bd_info->bb_first_free)
- e4b->bd_info->bb_first_free = first;
-
/* access memory sequentially: check left neighbour,
* clear range and then check right neighbour
*/
@@ -1927,23 +1922,31 @@ static void mb_free_blocks(struct inode *inode, struct ext4_buddy *e4b,
struct ext4_sb_info *sbi = EXT4_SB(sb);
ext4_fsblk_t blocknr;

+ /*
+ * Fastcommit replay can free already freed blocks which
+ * corrupts allocation info. Regenerate it.
+ */
+ if (sbi->s_mount_state & EXT4_FC_REPLAY) {
+ mb_regenerate_buddy(e4b);
+ goto check;
+ }
+
blocknr = ext4_group_first_block_no(sb, e4b->bd_group);
blocknr += EXT4_C2B(sbi, block);
- if (!(sbi->s_mount_state & EXT4_FC_REPLAY)) {
- ext4_grp_locked_error(sb, e4b->bd_group,
- inode ? inode->i_ino : 0,
- blocknr,
- "freeing already freed block (bit %u); block bitmap corrupt.",
- block);
- ext4_mark_group_bitmap_corrupted(
- sb, e4b->bd_group,
+ ext4_grp_locked_error(sb, e4b->bd_group,
+ inode ? inode->i_ino : 0, blocknr,
+ "freeing already freed block (bit %u); block bitmap corrupt.",
+ block);
+ ext4_mark_group_bitmap_corrupted(sb, e4b->bd_group,
EXT4_GROUP_INFO_BBITMAP_CORRUPT);
- } else {
- mb_regenerate_buddy(e4b);
- }
- goto done;
+ return;
}

+ this_cpu_inc(discard_pa_seq);
+ e4b->bd_info->bb_free += count;
+ if (first < e4b->bd_info->bb_first_free)
+ e4b->bd_info->bb_first_free = first;
+
/* let's maintain fragments counter */
if (left_is_free && right_is_free)
e4b->bd_info->bb_fragments--;
@@ -1968,9 +1971,9 @@ static void mb_free_blocks(struct inode *inode, struct ext4_buddy *e4b,
if (first <= last)
mb_buddy_mark_free(e4b, first >> 1, last >> 1);

-done:
mb_set_largest_free_order(sb, e4b->bd_info);
mb_update_avg_fragment_size(sb, e4b->bd_info);
+check:
mb_check_buddy(e4b);
}

diff --git a/fs/ext4/move_extent.c b/fs/ext4/move_extent.c
index 18a9e7c47975..e6976716e85d 100644
--- a/fs/ext4/move_extent.c
+++ b/fs/ext4/move_extent.c
@@ -619,6 +619,7 @@ ext4_move_extents(struct file *o_filp, struct file *d_filp, __u64 orig_blk,
goto out;
o_end = o_start + len;

+ *moved_len = 0;
while (o_start < o_end) {
struct ext4_extent *ex;
ext4_lblk_t cur_blk, next_blk;
@@ -673,7 +674,7 @@ ext4_move_extents(struct file *o_filp, struct file *d_filp, __u64 orig_blk,
*/
ext4_double_up_write_data_sem(orig_inode, donor_inode);
/* Swap original branches with new branches */
- move_extent_per_page(o_filp, donor_inode,
+ *moved_len += move_extent_per_page(o_filp, donor_inode,
orig_page_index, donor_page_index,
offset_in_page, cur_len,
unwritten, &ret);
@@ -683,9 +684,6 @@ ext4_move_extents(struct file *o_filp, struct file *d_filp, __u64 orig_blk,
o_start += cur_len;
d_start += cur_len;
}
- *moved_len = o_start - orig_blk;
- if (*moved_len > len)
- *moved_len = len;

out:
if (*moved_len) {
diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c
index 60fce26ff937..ac519515ef6c 100644
--- a/fs/hugetlbfs/inode.c
+++ b/fs/hugetlbfs/inode.c
@@ -123,6 +123,7 @@ static int hugetlbfs_file_mmap(struct file *file, struct vm_area_struct *vma)
loff_t len, vma_len;
int ret;
struct hstate *h = hstate_file(file);
+ vm_flags_t vm_flags;

/*
* vma address alignment (but not the pgoff alignment) has
@@ -164,10 +165,20 @@ static int hugetlbfs_file_mmap(struct file *file, struct vm_area_struct *vma)
file_accessed(file);

ret = -ENOMEM;
+
+ vm_flags = vma->vm_flags;
+ /*
+ * for SHM_HUGETLB, the pages are reserved in the shmget() call so skip
+ * reserving here. Note: only for SHM hugetlbfs file, the inode
+ * flag S_PRIVATE is set.
+ */
+ if (inode->i_flags & S_PRIVATE)
+ vm_flags |= VM_NORESERVE;
+
if (!hugetlb_reserve_pages(inode,
vma->vm_pgoff >> huge_page_order(h),
len >> huge_page_shift(h), vma,
- vma->vm_flags))
+ vm_flags))
goto out;

ret = 0;
@@ -1390,6 +1401,7 @@ static int hugetlbfs_parse_param(struct fs_context *fc, struct fs_parameter *par
{
struct hugetlbfs_fs_context *ctx = fc->fs_private;
struct fs_parse_result result;
+ struct hstate *h;
char *rest;
unsigned long ps;
int opt;
@@ -1434,11 +1446,12 @@ static int hugetlbfs_parse_param(struct fs_context *fc, struct fs_parameter *par

case Opt_pagesize:
ps = memparse(param->string, &rest);
- ctx->hstate = size_to_hstate(ps);
- if (!ctx->hstate) {
+ h = size_to_hstate(ps);
+ if (!h) {
pr_err("Unsupported page size %lu MB\n", ps / SZ_1M);
return -EINVAL;
}
+ ctx->hstate = h;
return 0;

case Opt_min_size:
diff --git a/fs/namespace.c b/fs/namespace.c
index bfc5cff0e196..e6c61d4997cc 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -4470,10 +4470,15 @@ static int do_mount_setattr(struct path *path, struct mount_kattr *kattr)
/*
* If this is an attached mount make sure it's located in the callers
* mount namespace. If it's not don't let the caller interact with it.
- * If this is a detached mount make sure it has an anonymous mount
- * namespace attached to it, i.e. we've created it via OPEN_TREE_CLONE.
+ *
+ * If this mount doesn't have a parent it's most often simply a
+ * detached mount with an anonymous mount namespace. IOW, something
+ * that's simply not attached yet. But there are apparently also users
+ * that do change mount properties on the rootfs itself. That obviously
+ * neither has a parent nor is it a detached mount so we cannot
+ * unconditionally check for detached mounts.
*/
- if (!(mnt_has_parent(mnt) ? check_mnt(mnt) : is_anon_ns(mnt->mnt_ns)))
+ if ((mnt_has_parent(mnt) || !is_anon_ns(mnt->mnt_ns)) && !check_mnt(mnt))
goto out;

/*
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index f4cccbf664ce..522596060252 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -4944,10 +4944,8 @@ nfsd_break_deleg_cb(struct file_lock *fl)
*/
fl->fl_break_time = 0;

- spin_lock(&fp->fi_lock);
fp->fi_had_conflict = true;
nfsd_break_one_deleg(dp);
- spin_unlock(&fp->fi_lock);
return false;
}

@@ -5556,12 +5554,13 @@ nfs4_set_delegation(struct nfsd4_open *open, struct nfs4_ol_stateid *stp,
if (status)
goto out_unlock;

+ status = -EAGAIN;
+ if (fp->fi_had_conflict)
+ goto out_unlock;
+
spin_lock(&state_lock);
spin_lock(&fp->fi_lock);
- if (fp->fi_had_conflict)
- status = -EAGAIN;
- else
- status = hash_delegation_locked(dp, fp);
+ status = hash_delegation_locked(dp, fp);
spin_unlock(&fp->fi_lock);
spin_unlock(&state_lock);

diff --git a/fs/nilfs2/file.c b/fs/nilfs2/file.c
index 740ce26d1e76..0505feef79f4 100644
--- a/fs/nilfs2/file.c
+++ b/fs/nilfs2/file.c
@@ -105,7 +105,13 @@ static vm_fault_t nilfs_page_mkwrite(struct vm_fault *vmf)
nilfs_transaction_commit(inode->i_sb);

mapped:
- wait_for_stable_page(page);
+ /*
+ * Since checksumming including data blocks is performed to determine
+ * the validity of the log to be written and used for recovery, it is
+ * necessary to wait for writeback to finish here, regardless of the
+ * stable write requirement of the backing device.
+ */
+ wait_on_page_writeback(page);
out:
sb_end_pagefault(inode->i_sb);
return vmf_fs_error(ret);
diff --git a/fs/nilfs2/recovery.c b/fs/nilfs2/recovery.c
index 0955b657938f..a9b8d77c8c1d 100644
--- a/fs/nilfs2/recovery.c
+++ b/fs/nilfs2/recovery.c
@@ -472,9 +472,10 @@ static int nilfs_prepare_segment_for_recovery(struct the_nilfs *nilfs,

static int nilfs_recovery_copy_block(struct the_nilfs *nilfs,
struct nilfs_recovery_block *rb,
- struct page *page)
+ loff_t pos, struct page *page)
{
struct buffer_head *bh_org;
+ size_t from = pos & ~PAGE_MASK;
void *kaddr;

bh_org = __bread(nilfs->ns_bdev, rb->blocknr, nilfs->ns_blocksize);
@@ -482,7 +483,7 @@ static int nilfs_recovery_copy_block(struct the_nilfs *nilfs,
return -EIO;

kaddr = kmap_atomic(page);
- memcpy(kaddr + bh_offset(bh_org), bh_org->b_data, bh_org->b_size);
+ memcpy(kaddr + from, bh_org->b_data, bh_org->b_size);
kunmap_atomic(kaddr);
brelse(bh_org);
return 0;
@@ -521,7 +522,7 @@ static int nilfs_recover_dsync_blocks(struct the_nilfs *nilfs,
goto failed_inode;
}

- err = nilfs_recovery_copy_block(nilfs, rb, page);
+ err = nilfs_recovery_copy_block(nilfs, rb, pos, page);
if (unlikely(err))
goto failed_page;

diff --git a/fs/nilfs2/segment.c b/fs/nilfs2/segment.c
index 7ec16879756e..a03e37207f48 100644
--- a/fs/nilfs2/segment.c
+++ b/fs/nilfs2/segment.c
@@ -1704,7 +1704,6 @@ static void nilfs_segctor_prepare_write(struct nilfs_sc_info *sci)

list_for_each_entry(bh, &segbuf->sb_payload_buffers,
b_assoc_buffers) {
- set_buffer_async_write(bh);
if (bh == segbuf->sb_super_root) {
if (bh->b_page != bd_page) {
lock_page(bd_page);
@@ -1715,6 +1714,7 @@ static void nilfs_segctor_prepare_write(struct nilfs_sc_info *sci)
}
break;
}
+ set_buffer_async_write(bh);
if (bh->b_page != fs_page) {
nilfs_begin_page_io(fs_page);
fs_page = bh->b_page;
@@ -1800,7 +1800,6 @@ static void nilfs_abort_logs(struct list_head *logs, int err)

list_for_each_entry(bh, &segbuf->sb_payload_buffers,
b_assoc_buffers) {
- clear_buffer_async_write(bh);
if (bh == segbuf->sb_super_root) {
clear_buffer_uptodate(bh);
if (bh->b_page != bd_page) {
@@ -1809,6 +1808,7 @@ static void nilfs_abort_logs(struct list_head *logs, int err)
}
break;
}
+ clear_buffer_async_write(bh);
if (bh->b_page != fs_page) {
nilfs_end_page_io(fs_page, err);
fs_page = bh->b_page;
@@ -1896,8 +1896,9 @@ static void nilfs_segctor_complete_write(struct nilfs_sc_info *sci)
BIT(BH_Delay) | BIT(BH_NILFS_Volatile) |
BIT(BH_NILFS_Redirected));

- set_mask_bits(&bh->b_state, clear_bits, set_bits);
if (bh == segbuf->sb_super_root) {
+ set_buffer_uptodate(bh);
+ clear_buffer_dirty(bh);
if (bh->b_page != bd_page) {
end_page_writeback(bd_page);
bd_page = bh->b_page;
@@ -1905,6 +1906,7 @@ static void nilfs_segctor_complete_write(struct nilfs_sc_info *sci)
update_sr = true;
break;
}
+ set_mask_bits(&bh->b_state, clear_bits, set_bits);
if (bh->b_page != fs_page) {
nilfs_end_page_io(fs_page, 0);
fs_page = bh->b_page;
diff --git a/fs/proc/array.c b/fs/proc/array.c
index 2c2efbe685d8..37b8061d84bb 100644
--- a/fs/proc/array.c
+++ b/fs/proc/array.c
@@ -511,7 +511,7 @@ static int do_task_stat(struct seq_file *m, struct pid_namespace *ns,

sigemptyset(&sigign);
sigemptyset(&sigcatch);
- cutime = cstime = utime = stime = 0;
+ cutime = cstime = 0;
cgtime = gtime = 0;

if (lock_task_sighand(task, &flags)) {
@@ -545,7 +545,6 @@ static int do_task_stat(struct seq_file *m, struct pid_namespace *ns,

min_flt += sig->min_flt;
maj_flt += sig->maj_flt;
- thread_group_cputime_adjusted(task, &utime, &stime);
gtime += sig->gtime;

if (sig->flags & (SIGNAL_GROUP_EXIT | SIGNAL_STOP_STOPPED))
@@ -561,10 +560,13 @@ static int do_task_stat(struct seq_file *m, struct pid_namespace *ns,

if (permitted && (!whole || num_threads < 2))
wchan = !task_is_running(task);
- if (!whole) {
+
+ if (whole) {
+ thread_group_cputime_adjusted(task, &utime, &stime);
+ } else {
+ task_cputime_adjusted(task, &utime, &stime);
min_flt = task->min_flt;
maj_flt = task->maj_flt;
- task_cputime_adjusted(task, &utime, &stime);
gtime = task_gtime(task);
}

diff --git a/fs/smb/client/connect.c b/fs/smb/client/connect.c
index 0ed6eb915c6a..19440255944b 100644
--- a/fs/smb/client/connect.c
+++ b/fs/smb/client/connect.c
@@ -3426,8 +3426,18 @@ int cifs_mount_get_tcon(struct cifs_mount_ctx *mnt_ctx)
* the user on mount
*/
if ((cifs_sb->ctx->wsize == 0) ||
- (cifs_sb->ctx->wsize > server->ops->negotiate_wsize(tcon, ctx)))
- cifs_sb->ctx->wsize = server->ops->negotiate_wsize(tcon, ctx);
+ (cifs_sb->ctx->wsize > server->ops->negotiate_wsize(tcon, ctx))) {
+ cifs_sb->ctx->wsize =
+ round_down(server->ops->negotiate_wsize(tcon, ctx), PAGE_SIZE);
+ /*
+ * in the very unlikely event that the server sent a max write size under PAGE_SIZE,
+ * (which would get rounded down to 0) then reset wsize to absolute minimum eg 4096
+ */
+ if (cifs_sb->ctx->wsize == 0) {
+ cifs_sb->ctx->wsize = PAGE_SIZE;
+ cifs_dbg(VFS, "wsize too small, reset to minimum ie PAGE_SIZE, usually 4096\n");
+ }
+ }
if ((cifs_sb->ctx->rsize == 0) ||
(cifs_sb->ctx->rsize > server->ops->negotiate_rsize(tcon, ctx)))
cifs_sb->ctx->rsize = server->ops->negotiate_rsize(tcon, ctx);
diff --git a/fs/smb/client/fs_context.c b/fs/smb/client/fs_context.c
index a3493da12ad1..75f2c8734ff5 100644
--- a/fs/smb/client/fs_context.c
+++ b/fs/smb/client/fs_context.c
@@ -1107,6 +1107,17 @@ static int smb3_fs_context_parse_param(struct fs_context *fc,
case Opt_wsize:
ctx->wsize = result.uint_32;
ctx->got_wsize = true;
+ if (ctx->wsize % PAGE_SIZE != 0) {
+ ctx->wsize = round_down(ctx->wsize, PAGE_SIZE);
+ if (ctx->wsize == 0) {
+ ctx->wsize = PAGE_SIZE;
+ cifs_dbg(VFS, "wsize too small, reset to minimum %ld\n", PAGE_SIZE);
+ } else {
+ cifs_dbg(VFS,
+ "wsize rounded down to %d to multiple of PAGE_SIZE %ld\n",
+ ctx->wsize, PAGE_SIZE);
+ }
+ }
break;
case Opt_acregmax:
ctx->acregmax = HZ * result.uint_32;
diff --git a/fs/smb/client/namespace.c b/fs/smb/client/namespace.c
index a6968573b775..4a517b280f2b 100644
--- a/fs/smb/client/namespace.c
+++ b/fs/smb/client/namespace.c
@@ -168,6 +168,21 @@ static char *automount_fullpath(struct dentry *dentry, void *page)
return s;
}

+static void fs_context_set_ids(struct smb3_fs_context *ctx)
+{
+ kuid_t uid = current_fsuid();
+ kgid_t gid = current_fsgid();
+
+ if (ctx->multiuser) {
+ if (!ctx->uid_specified)
+ ctx->linux_uid = uid;
+ if (!ctx->gid_specified)
+ ctx->linux_gid = gid;
+ }
+ if (!ctx->cruid_specified)
+ ctx->cred_uid = uid;
+}
+
/*
* Create a vfsmount that we can automount
*/
@@ -205,6 +220,7 @@ static struct vfsmount *cifs_do_automount(struct path *path)
tmp.leaf_fullpath = NULL;
tmp.UNC = tmp.prepath = NULL;
tmp.dfs_root_ses = NULL;
+ fs_context_set_ids(&tmp);

rc = smb3_fs_context_dup(ctx, &tmp);
if (rc) {
diff --git a/fs/smb/client/smb2ops.c b/fs/smb/client/smb2ops.c
index e33ed0fbc318..5850f861e7e1 100644
--- a/fs/smb/client/smb2ops.c
+++ b/fs/smb/client/smb2ops.c
@@ -619,7 +619,7 @@ parse_server_interfaces(struct network_interface_info_ioctl_rsp *buf,
goto out;
}

- while (bytes_left >= sizeof(*p)) {
+ while (bytes_left >= (ssize_t)sizeof(*p)) {
memset(&tmp_iface, 0, sizeof(tmp_iface));
tmp_iface.speed = le64_to_cpu(p->LinkSpeed);
tmp_iface.rdma_capable = le32_to_cpu(p->Capability & RDMA_CAPABLE) ? 1 : 0;
diff --git a/fs/smb/server/smb2pdu.c b/fs/smb/server/smb2pdu.c
index 6ddfe3fef55f..3e885cdc5ffc 100644
--- a/fs/smb/server/smb2pdu.c
+++ b/fs/smb/server/smb2pdu.c
@@ -6173,8 +6173,10 @@ static noinline int smb2_read_pipe(struct ksmbd_work *work)
err = ksmbd_iov_pin_rsp_read(work, (void *)rsp,
offsetof(struct smb2_read_rsp, Buffer),
aux_payload_buf, nbytes);
- if (err)
+ if (err) {
+ kvfree(aux_payload_buf);
goto out;
+ }
kvfree(rpc_resp);
} else {
err = ksmbd_iov_pin_rsp(work, (void *)rsp,
@@ -6384,8 +6386,10 @@ int smb2_read(struct ksmbd_work *work)
err = ksmbd_iov_pin_rsp_read(work, (void *)rsp,
offsetof(struct smb2_read_rsp, Buffer),
aux_payload_buf, nbytes);
- if (err)
+ if (err) {
+ kvfree(aux_payload_buf);
goto out;
+ }
ksmbd_fd_put(work, fp);
return 0;

diff --git a/fs/tracefs/event_inode.c b/fs/tracefs/event_inode.c
index efbdc47c74dc..110e8a272189 100644
--- a/fs/tracefs/event_inode.c
+++ b/fs/tracefs/event_inode.c
@@ -2,8 +2,9 @@
/*
* event_inode.c - part of tracefs, a pseudo file system for activating tracing
*
- * Copyright (C) 2020-23 VMware Inc, author: Steven Rostedt (VMware) <rostedt@xxxxxxxxxxx>
+ * Copyright (C) 2020-23 VMware Inc, author: Steven Rostedt <rostedt@xxxxxxxxxxx>
* Copyright (C) 2020-23 VMware Inc, author: Ajay Kaher <akaher@xxxxxxxxxx>
+ * Copyright (C) 2023 Google, author: Steven Rostedt <rostedt@xxxxxxxxxxx>
*
* eventfs is used to dynamically create inodes and dentries based on the
* meta data provided by the tracing system.
@@ -23,51 +24,32 @@
#include <linux/delay.h>
#include "internal.h"

-struct eventfs_inode {
- struct list_head e_top_files;
-};
-
/*
- * struct eventfs_file - hold the properties of the eventfs files and
- * directories.
- * @name: the name of the file or directory to create
- * @d_parent: holds parent's dentry
- * @dentry: once accessed holds dentry
- * @list: file or directory to be added to parent directory
- * @ei: list of files and directories within directory
- * @fop: file_operations for file or directory
- * @iop: inode_operations for file or directory
- * @data: something that the caller will want to get to later on
- * @is_freed: Flag set if the eventfs is on its way to be freed
- * @mode: the permission that the file or directory should have
- * @uid: saved uid if changed
- * @gid: saved gid if changed
+ * eventfs_mutex protects the eventfs_inode (ei) dentry. Any access
+ * to the ei->dentry must be done under this mutex and after checking
+ * if ei->is_freed is not set. When ei->is_freed is set, the dentry
+ * is on its way to being freed after the last dput() is made on it.
*/
-struct eventfs_file {
- const char *name;
- struct dentry *d_parent;
- struct dentry *dentry;
- struct list_head list;
- struct eventfs_inode *ei;
- const struct file_operations *fop;
- const struct inode_operations *iop;
- /*
- * Union - used for deletion
- * @llist: for calling dput() if needed after RCU
- * @rcu: eventfs_file to delete in RCU
- */
- union {
- struct llist_node llist;
- struct rcu_head rcu;
- };
- void *data;
- unsigned int is_freed:1;
- unsigned int mode:31;
- kuid_t uid;
- kgid_t gid;
-};
-
static DEFINE_MUTEX(eventfs_mutex);
+
+/* Choose something "unique" ;-) */
+#define EVENTFS_FILE_INODE_INO 0x12c4e37
+
+/* Just try to make something consistent and unique */
+static int eventfs_dir_ino(struct eventfs_inode *ei)
+{
+ if (!ei->ino)
+ ei->ino = get_next_ino();
+
+ return ei->ino;
+}
+
+/*
+ * The eventfs_inode (ei) itself is protected by SRCU. It is released from
+ * its parent's list and will have is_freed set (under eventfs_mutex).
+ * After the SRCU grace period is over and the last dput() is called
+ * the ei is freed.
+ */
DEFINE_STATIC_SRCU(eventfs_srcu);

/* Mode is unsigned short, use the upper bits for flags */
@@ -75,60 +57,202 @@ enum {
EVENTFS_SAVE_MODE = BIT(16),
EVENTFS_SAVE_UID = BIT(17),
EVENTFS_SAVE_GID = BIT(18),
+ EVENTFS_TOPLEVEL = BIT(19),
};

#define EVENTFS_MODE_MASK (EVENTFS_SAVE_MODE - 1)

+/*
+ * eventfs_inode reference count management.
+ *
+ * NOTE! We count only references from dentries, in the
+ * form 'dentry->d_fsdata'. There are also references from
+ * directory inodes ('ti->private'), but the dentry reference
+ * count is always a superset of the inode reference count.
+ */
+static void release_ei(struct kref *ref)
+{
+ struct eventfs_inode *ei = container_of(ref, struct eventfs_inode, kref);
+
+ WARN_ON_ONCE(!ei->is_freed);
+
+ kfree(ei->entry_attrs);
+ kfree_const(ei->name);
+ kfree_rcu(ei, rcu);
+}
+
+static inline void put_ei(struct eventfs_inode *ei)
+{
+ if (ei)
+ kref_put(&ei->kref, release_ei);
+}
+
+static inline void free_ei(struct eventfs_inode *ei)
+{
+ if (ei) {
+ ei->is_freed = 1;
+ put_ei(ei);
+ }
+}
+
+static inline struct eventfs_inode *get_ei(struct eventfs_inode *ei)
+{
+ if (ei)
+ kref_get(&ei->kref);
+ return ei;
+}
+
static struct dentry *eventfs_root_lookup(struct inode *dir,
struct dentry *dentry,
unsigned int flags);
-static int dcache_dir_open_wrapper(struct inode *inode, struct file *file);
-static int dcache_readdir_wrapper(struct file *file, struct dir_context *ctx);
-static int eventfs_release(struct inode *inode, struct file *file);
+static int eventfs_iterate(struct file *file, struct dir_context *ctx);

-static void update_attr(struct eventfs_file *ef, struct iattr *iattr)
+static void update_attr(struct eventfs_attr *attr, struct iattr *iattr)
{
unsigned int ia_valid = iattr->ia_valid;

if (ia_valid & ATTR_MODE) {
- ef->mode = (ef->mode & ~EVENTFS_MODE_MASK) |
+ attr->mode = (attr->mode & ~EVENTFS_MODE_MASK) |
(iattr->ia_mode & EVENTFS_MODE_MASK) |
EVENTFS_SAVE_MODE;
}
if (ia_valid & ATTR_UID) {
- ef->mode |= EVENTFS_SAVE_UID;
- ef->uid = iattr->ia_uid;
+ attr->mode |= EVENTFS_SAVE_UID;
+ attr->uid = iattr->ia_uid;
}
if (ia_valid & ATTR_GID) {
- ef->mode |= EVENTFS_SAVE_GID;
- ef->gid = iattr->ia_gid;
+ attr->mode |= EVENTFS_SAVE_GID;
+ attr->gid = iattr->ia_gid;
}
}

static int eventfs_set_attr(struct mnt_idmap *idmap, struct dentry *dentry,
- struct iattr *iattr)
+ struct iattr *iattr)
{
- struct eventfs_file *ef;
+ const struct eventfs_entry *entry;
+ struct eventfs_inode *ei;
+ const char *name;
int ret;

mutex_lock(&eventfs_mutex);
- ef = dentry->d_fsdata;
- if (ef && ef->is_freed) {
+ ei = dentry->d_fsdata;
+ if (ei->is_freed) {
/* Do not allow changes if the event is about to be removed. */
mutex_unlock(&eventfs_mutex);
return -ENODEV;
}

+ /* Preallocate the children mode array if necessary */
+ if (!(dentry->d_inode->i_mode & S_IFDIR)) {
+ if (!ei->entry_attrs) {
+ ei->entry_attrs = kcalloc(ei->nr_entries, sizeof(*ei->entry_attrs),
+ GFP_NOFS);
+ if (!ei->entry_attrs) {
+ ret = -ENOMEM;
+ goto out;
+ }
+ }
+ }
+
ret = simple_setattr(idmap, dentry, iattr);
- if (!ret && ef)
- update_attr(ef, iattr);
+ if (ret < 0)
+ goto out;
+
+ /*
+ * If this is a dir, then update the ei cache, only the file
+ * mode is saved in the ei->m_children, and the ownership is
+ * determined by the parent directory.
+ */
+ if (dentry->d_inode->i_mode & S_IFDIR) {
+ /*
+ * The events directory dentry is never freed, unless its
+ * part of an instance that is deleted. It's attr is the
+ * default for its child files and directories.
+ * Do not update it. It's not used for its own mode or ownership.
+ */
+ if (ei->is_events) {
+ /* But it still needs to know if it was modified */
+ if (iattr->ia_valid & ATTR_UID)
+ ei->attr.mode |= EVENTFS_SAVE_UID;
+ if (iattr->ia_valid & ATTR_GID)
+ ei->attr.mode |= EVENTFS_SAVE_GID;
+ } else {
+ update_attr(&ei->attr, iattr);
+ }
+
+ } else {
+ name = dentry->d_name.name;
+
+ for (int i = 0; i < ei->nr_entries; i++) {
+ entry = &ei->entries[i];
+ if (strcmp(name, entry->name) == 0) {
+ update_attr(&ei->entry_attrs[i], iattr);
+ break;
+ }
+ }
+ }
+ out:
mutex_unlock(&eventfs_mutex);
return ret;
}

+static void update_top_events_attr(struct eventfs_inode *ei, struct super_block *sb)
+{
+ struct inode *root;
+
+ /* Only update if the "events" was on the top level */
+ if (!ei || !(ei->attr.mode & EVENTFS_TOPLEVEL))
+ return;
+
+ /* Get the tracefs root inode. */
+ root = d_inode(sb->s_root);
+ ei->attr.uid = root->i_uid;
+ ei->attr.gid = root->i_gid;
+}
+
+static void set_top_events_ownership(struct inode *inode)
+{
+ struct tracefs_inode *ti = get_tracefs(inode);
+ struct eventfs_inode *ei = ti->private;
+
+ /* The top events directory doesn't get automatically updated */
+ if (!ei || !ei->is_events || !(ei->attr.mode & EVENTFS_TOPLEVEL))
+ return;
+
+ update_top_events_attr(ei, inode->i_sb);
+
+ if (!(ei->attr.mode & EVENTFS_SAVE_UID))
+ inode->i_uid = ei->attr.uid;
+
+ if (!(ei->attr.mode & EVENTFS_SAVE_GID))
+ inode->i_gid = ei->attr.gid;
+}
+
+static int eventfs_get_attr(struct mnt_idmap *idmap,
+ const struct path *path, struct kstat *stat,
+ u32 request_mask, unsigned int flags)
+{
+ struct dentry *dentry = path->dentry;
+ struct inode *inode = d_backing_inode(dentry);
+
+ set_top_events_ownership(inode);
+
+ generic_fillattr(idmap, request_mask, inode, stat);
+ return 0;
+}
+
+static int eventfs_permission(struct mnt_idmap *idmap,
+ struct inode *inode, int mask)
+{
+ set_top_events_ownership(inode);
+ return generic_permission(idmap, inode, mask);
+}
+
static const struct inode_operations eventfs_root_dir_inode_operations = {
.lookup = eventfs_root_lookup,
.setattr = eventfs_set_attr,
+ .getattr = eventfs_get_attr,
+ .permission = eventfs_permission,
};

static const struct inode_operations eventfs_file_inode_operations = {
@@ -136,764 +260,548 @@ static const struct inode_operations eventfs_file_inode_operations = {
};

static const struct file_operations eventfs_file_operations = {
- .open = dcache_dir_open_wrapper,
.read = generic_read_dir,
- .iterate_shared = dcache_readdir_wrapper,
+ .iterate_shared = eventfs_iterate,
.llseek = generic_file_llseek,
- .release = eventfs_release,
};

-static void update_inode_attr(struct inode *inode, struct eventfs_file *ef)
+/* Return the evenfs_inode of the "events" directory */
+static struct eventfs_inode *eventfs_find_events(struct dentry *dentry)
+{
+ struct eventfs_inode *ei;
+
+ do {
+ // The parent is stable because we do not do renames
+ dentry = dentry->d_parent;
+ // ... and directories always have d_fsdata
+ ei = dentry->d_fsdata;
+
+ /*
+ * If the ei is being freed, the ownership of the children
+ * doesn't matter.
+ */
+ if (ei->is_freed) {
+ ei = NULL;
+ break;
+ }
+ // Walk upwards until you find the events inode
+ } while (!ei->is_events);
+
+ update_top_events_attr(ei, dentry->d_sb);
+
+ return ei;
+}
+
+static void update_inode_attr(struct dentry *dentry, struct inode *inode,
+ struct eventfs_attr *attr, umode_t mode)
{
- inode->i_mode = ef->mode & EVENTFS_MODE_MASK;
+ struct eventfs_inode *events_ei = eventfs_find_events(dentry);
+
+ if (!events_ei)
+ return;
+
+ inode->i_mode = mode;
+ inode->i_uid = events_ei->attr.uid;
+ inode->i_gid = events_ei->attr.gid;
+
+ if (!attr)
+ return;
+
+ if (attr->mode & EVENTFS_SAVE_MODE)
+ inode->i_mode = attr->mode & EVENTFS_MODE_MASK;

- if (ef->mode & EVENTFS_SAVE_UID)
- inode->i_uid = ef->uid;
+ if (attr->mode & EVENTFS_SAVE_UID)
+ inode->i_uid = attr->uid;

- if (ef->mode & EVENTFS_SAVE_GID)
- inode->i_gid = ef->gid;
+ if (attr->mode & EVENTFS_SAVE_GID)
+ inode->i_gid = attr->gid;
}

/**
- * create_file - create a file in the tracefs filesystem
- * @ef: the eventfs_file
- * @parent: parent dentry for this file.
+ * lookup_file - look up a file in the tracefs filesystem
+ * @dentry: the dentry to look up
+ * @mode: the permission that the file should have.
+ * @attr: saved attributes changed by user
* @data: something that the caller will want to get to later on.
* @fop: struct file_operations that should be used for this file.
*
- * This is the basic "create a file" function for tracefs. It allows for a
- * wide range of flexibility in creating a file.
- *
- * This function will return a pointer to a dentry if it succeeds. This
- * pointer must be passed to the tracefs_remove() function when the file is
- * to be removed (no automatic cleanup happens if your module is unloaded,
- * you are responsible here.) If an error occurs, %NULL will be returned.
- *
- * If tracefs is not enabled in the kernel, the value -%ENODEV will be
- * returned.
+ * This function creates a dentry that represents a file in the eventsfs_inode
+ * directory. The inode.i_private pointer will point to @data in the open()
+ * call.
*/
-static struct dentry *create_file(struct eventfs_file *ef,
- struct dentry *parent, void *data,
+static struct dentry *lookup_file(struct eventfs_inode *parent_ei,
+ struct dentry *dentry,
+ umode_t mode,
+ struct eventfs_attr *attr,
+ void *data,
const struct file_operations *fop)
{
struct tracefs_inode *ti;
- struct dentry *dentry;
struct inode *inode;

- if (!(ef->mode & S_IFMT))
- ef->mode |= S_IFREG;
-
- if (WARN_ON_ONCE(!S_ISREG(ef->mode)))
- return NULL;
-
- dentry = eventfs_start_creating(ef->name, parent);
+ if (!(mode & S_IFMT))
+ mode |= S_IFREG;

- if (IS_ERR(dentry))
- return dentry;
+ if (WARN_ON_ONCE(!S_ISREG(mode)))
+ return ERR_PTR(-EIO);

inode = tracefs_get_inode(dentry->d_sb);
if (unlikely(!inode))
- return eventfs_failed_creating(dentry);
+ return ERR_PTR(-ENOMEM);

/* If the user updated the directory's attributes, use them */
- update_inode_attr(inode, ef);
+ update_inode_attr(dentry, inode, attr, mode);

inode->i_op = &eventfs_file_inode_operations;
inode->i_fop = fop;
inode->i_private = data;

+ /* All files will have the same inode number */
+ inode->i_ino = EVENTFS_FILE_INODE_INO;
+
ti = get_tracefs(inode);
ti->flags |= TRACEFS_EVENT_INODE;
- d_instantiate(dentry, inode);
- fsnotify_create(dentry->d_parent->d_inode, dentry);
- return eventfs_end_creating(dentry);
+
+ // Files have their parent's ei as their fsdata
+ dentry->d_fsdata = get_ei(parent_ei);
+
+ d_add(dentry, inode);
+ return NULL;
};

/**
- * create_dir - create a dir in the tracefs filesystem
+ * lookup_dir_entry - look up a dir in the tracefs filesystem
+ * @dentry: the directory to look up
* @ei: the eventfs_inode that represents the directory to create
- * @parent: parent dentry for this file.
- * @data: something that the caller will want to get to later on.
*
- * This is the basic "create a dir" function for eventfs. It allows for a
- * wide range of flexibility in creating a dir.
- *
- * This function will return a pointer to a dentry if it succeeds. This
- * pointer must be passed to the tracefs_remove() function when the file is
- * to be removed (no automatic cleanup happens if your module is unloaded,
- * you are responsible here.) If an error occurs, %NULL will be returned.
- *
- * If tracefs is not enabled in the kernel, the value -%ENODEV will be
- * returned.
+ * This function will look up a dentry for a directory represented by
+ * a eventfs_inode.
*/
-static struct dentry *create_dir(struct eventfs_file *ef,
- struct dentry *parent, void *data)
+static struct dentry *lookup_dir_entry(struct dentry *dentry,
+ struct eventfs_inode *pei, struct eventfs_inode *ei)
{
struct tracefs_inode *ti;
- struct dentry *dentry;
struct inode *inode;

- dentry = eventfs_start_creating(ef->name, parent);
- if (IS_ERR(dentry))
- return dentry;
-
inode = tracefs_get_inode(dentry->d_sb);
if (unlikely(!inode))
- return eventfs_failed_creating(dentry);
+ return ERR_PTR(-ENOMEM);

- update_inode_attr(inode, ef);
+ /* If the user updated the directory's attributes, use them */
+ update_inode_attr(dentry, inode, &ei->attr,
+ S_IFDIR | S_IRWXU | S_IRUGO | S_IXUGO);

inode->i_op = &eventfs_root_dir_inode_operations;
inode->i_fop = &eventfs_file_operations;
- inode->i_private = data;
+
+ /* All directories will have the same inode number */
+ inode->i_ino = eventfs_dir_ino(ei);

ti = get_tracefs(inode);
ti->flags |= TRACEFS_EVENT_INODE;
+ /* Only directories have ti->private set to an ei, not files */
+ ti->private = ei;

- inc_nlink(inode);
- d_instantiate(dentry, inode);
- inc_nlink(dentry->d_parent->d_inode);
- fsnotify_mkdir(dentry->d_parent->d_inode, dentry);
- return eventfs_end_creating(dentry);
-}
+ dentry->d_fsdata = get_ei(ei);

-static void free_ef(struct eventfs_file *ef)
-{
- kfree(ef->name);
- kfree(ef->ei);
- kfree(ef);
+ d_add(dentry, inode);
+ return NULL;
}

-/**
- * eventfs_set_ef_status_free - set the ef->status to free
- * @ti: the tracefs_inode of the dentry
- * @dentry: dentry who's status to be freed
- *
- * eventfs_set_ef_status_free will be called if no more
- * references remain
- */
-void eventfs_set_ef_status_free(struct tracefs_inode *ti, struct dentry *dentry)
+static inline struct eventfs_inode *alloc_ei(const char *name)
{
- struct eventfs_inode *ei;
- struct eventfs_file *ef;
+ struct eventfs_inode *ei = kzalloc(sizeof(*ei), GFP_KERNEL);

- /* The top level events directory may be freed by this */
- if (unlikely(ti->flags & TRACEFS_EVENT_TOP_INODE)) {
- mutex_lock(&eventfs_mutex);
- ei = ti->private;
-
- /* Nothing should access this, but just in case! */
- ti->private = NULL;
- mutex_unlock(&eventfs_mutex);
-
- ef = dentry->d_fsdata;
- if (ef)
- free_ef(ef);
- return;
- }
-
- mutex_lock(&eventfs_mutex);
-
- ef = dentry->d_fsdata;
- if (!ef)
- goto out;
+ if (!ei)
+ return NULL;

- if (ef->is_freed) {
- free_ef(ef);
- } else {
- ef->dentry = NULL;
+ ei->name = kstrdup_const(name, GFP_KERNEL);
+ if (!ei->name) {
+ kfree(ei);
+ return NULL;
}
-
- dentry->d_fsdata = NULL;
-out:
- mutex_unlock(&eventfs_mutex);
+ kref_init(&ei->kref);
+ return ei;
}

/**
- * eventfs_post_create_dir - post create dir routine
- * @ef: eventfs_file of recently created dir
+ * eventfs_d_release - dentry is going away
+ * @dentry: dentry which has the reference to remove.
*
- * Map the meta-data of files within an eventfs dir to their parent dentry
+ * Remove the association between a dentry from an eventfs_inode.
*/
-static void eventfs_post_create_dir(struct eventfs_file *ef)
+void eventfs_d_release(struct dentry *dentry)
{
- struct eventfs_file *ef_child;
- struct tracefs_inode *ti;
-
- /* srcu lock already held */
- /* fill parent-child relation */
- list_for_each_entry_srcu(ef_child, &ef->ei->e_top_files, list,
- srcu_read_lock_held(&eventfs_srcu)) {
- ef_child->d_parent = ef->dentry;
- }
-
- ti = get_tracefs(ef->dentry->d_inode);
- ti->private = ef->ei;
+ put_ei(dentry->d_fsdata);
}

/**
- * create_dentry - helper function to create dentry
- * @ef: eventfs_file of file or directory to create
- * @parent: parent dentry
- * @lookup: true if called from lookup routine
+ * lookup_file_dentry - create a dentry for a file of an eventfs_inode
+ * @ei: the eventfs_inode that the file will be created under
+ * @idx: the index into the entry_attrs[] of the @ei
+ * @parent: The parent dentry of the created file.
+ * @name: The name of the file to create
+ * @mode: The mode of the file.
+ * @data: The data to use to set the inode of the file with on open()
+ * @fops: The fops of the file to be created.
*
- * Used to create a dentry for file/dir, executes post dentry creation routine
+ * Create a dentry for a file of an eventfs_inode @ei and place it into the
+ * address located at @e_dentry.
*/
static struct dentry *
-create_dentry(struct eventfs_file *ef, struct dentry *parent, bool lookup)
+lookup_file_dentry(struct dentry *dentry,
+ struct eventfs_inode *ei, int idx,
+ umode_t mode, void *data,
+ const struct file_operations *fops)
{
- bool invalidate = false;
- struct dentry *dentry;
+ struct eventfs_attr *attr = NULL;

- mutex_lock(&eventfs_mutex);
- if (ef->is_freed) {
- mutex_unlock(&eventfs_mutex);
- return NULL;
- }
- if (ef->dentry) {
- dentry = ef->dentry;
- /* On dir open, up the ref count */
- if (!lookup)
- dget(dentry);
- mutex_unlock(&eventfs_mutex);
- return dentry;
- }
- mutex_unlock(&eventfs_mutex);
-
- if (!lookup)
- inode_lock(parent->d_inode);
-
- if (ef->ei)
- dentry = create_dir(ef, parent, ef->data);
- else
- dentry = create_file(ef, parent, ef->data, ef->fop);
+ if (ei->entry_attrs)
+ attr = &ei->entry_attrs[idx];

- if (!lookup)
- inode_unlock(parent->d_inode);
-
- mutex_lock(&eventfs_mutex);
- if (IS_ERR_OR_NULL(dentry)) {
- /* If the ef was already updated get it */
- dentry = ef->dentry;
- if (dentry && !lookup)
- dget(dentry);
- mutex_unlock(&eventfs_mutex);
- return dentry;
- }
-
- if (!ef->dentry && !ef->is_freed) {
- ef->dentry = dentry;
- if (ef->ei)
- eventfs_post_create_dir(ef);
- dentry->d_fsdata = ef;
- } else {
- /* A race here, should try again (unless freed) */
- invalidate = true;
-
- /*
- * Should never happen unless we get here due to being freed.
- * Otherwise it means two dentries exist with the same name.
- */
- WARN_ON_ONCE(!ef->is_freed);
- }
- mutex_unlock(&eventfs_mutex);
- if (invalidate)
- d_invalidate(dentry);
-
- if (lookup || invalidate)
- dput(dentry);
-
- return invalidate ? NULL : dentry;
-}
-
-static bool match_event_file(struct eventfs_file *ef, const char *name)
-{
- bool ret;
-
- mutex_lock(&eventfs_mutex);
- ret = !ef->is_freed && strcmp(ef->name, name) == 0;
- mutex_unlock(&eventfs_mutex);
-
- return ret;
+ return lookup_file(ei, dentry, mode, attr, data, fops);
}

/**
* eventfs_root_lookup - lookup routine to create file/dir
* @dir: in which a lookup is being done
* @dentry: file/dir dentry
- * @flags: to pass as flags parameter to simple lookup
+ * @flags: Just passed to simple_lookup()
*
- * Used to create a dynamic file/dir within @dir. Use the eventfs_inode
- * list of meta data to find the information needed to create the file/dir.
+ * Used to create dynamic file/dir with-in @dir, search with-in @ei
+ * list, if @dentry found go ahead and create the file/dir
*/
+
static struct dentry *eventfs_root_lookup(struct inode *dir,
struct dentry *dentry,
unsigned int flags)
{
+ struct eventfs_inode *ei_child;
struct tracefs_inode *ti;
struct eventfs_inode *ei;
- struct eventfs_file *ef;
- struct dentry *ret = NULL;
- int idx;
+ const char *name = dentry->d_name.name;
+ struct dentry *result = NULL;

ti = get_tracefs(dir);
if (!(ti->flags & TRACEFS_EVENT_INODE))
- return NULL;
+ return ERR_PTR(-EIO);
+
+ mutex_lock(&eventfs_mutex);

ei = ti->private;
- idx = srcu_read_lock(&eventfs_srcu);
- list_for_each_entry_srcu(ef, &ei->e_top_files, list,
- srcu_read_lock_held(&eventfs_srcu)) {
- if (!match_event_file(ef, dentry->d_name.name))
+ if (!ei || ei->is_freed)
+ goto out;
+
+ list_for_each_entry(ei_child, &ei->children, list) {
+ if (strcmp(ei_child->name, name) != 0)
continue;
- ret = simple_lookup(dir, dentry, flags);
- create_dentry(ef, ef->d_parent, true);
- break;
+ if (ei_child->is_freed)
+ goto out;
+ result = lookup_dir_entry(dentry, ei, ei_child);
+ goto out;
}
- srcu_read_unlock(&eventfs_srcu, idx);
- return ret;
-}

-struct dentry_list {
- void *cursor;
- struct dentry **dentries;
-};
-
-/**
- * eventfs_release - called to release eventfs file/dir
- * @inode: inode to be released
- * @file: file to be released (not used)
- */
-static int eventfs_release(struct inode *inode, struct file *file)
-{
- struct tracefs_inode *ti;
- struct dentry_list *dlist = file->private_data;
- void *cursor;
- int i;
+ for (int i = 0; i < ei->nr_entries; i++) {
+ void *data;
+ umode_t mode;
+ const struct file_operations *fops;
+ const struct eventfs_entry *entry = &ei->entries[i];

- ti = get_tracefs(inode);
- if (!(ti->flags & TRACEFS_EVENT_INODE))
- return -EINVAL;
+ if (strcmp(name, entry->name) != 0)
+ continue;

- if (WARN_ON_ONCE(!dlist))
- return -EINVAL;
+ data = ei->data;
+ if (entry->callback(name, &mode, &data, &fops) <= 0)
+ goto out;

- for (i = 0; dlist->dentries && dlist->dentries[i]; i++) {
- dput(dlist->dentries[i]);
+ result = lookup_file_dentry(dentry, ei, i, mode, data, fops);
+ goto out;
}
-
- cursor = dlist->cursor;
- kfree(dlist->dentries);
- kfree(dlist);
- file->private_data = cursor;
- return dcache_dir_close(inode, file);
+ out:
+ mutex_unlock(&eventfs_mutex);
+ return result;
}

-/**
- * dcache_dir_open_wrapper - eventfs open wrapper
- * @inode: not used
- * @file: dir to be opened (to create its child)
- *
- * Used to dynamically create the file/dir within @file. @file is really a
- * directory and all the files/dirs of the children within @file will be
- * created. If any of the files/dirs have already been created, their
- * reference count will be incremented.
+/*
+ * Walk the children of a eventfs_inode to fill in getdents().
*/
-static int dcache_dir_open_wrapper(struct inode *inode, struct file *file)
+static int eventfs_iterate(struct file *file, struct dir_context *ctx)
{
+ const struct file_operations *fops;
+ struct inode *f_inode = file_inode(file);
+ const struct eventfs_entry *entry;
+ struct eventfs_inode *ei_child;
struct tracefs_inode *ti;
struct eventfs_inode *ei;
- struct eventfs_file *ef;
- struct dentry_list *dlist;
- struct dentry **dentries = NULL;
- struct dentry *dentry = file_dentry(file);
- struct dentry *d;
- struct inode *f_inode = file_inode(file);
- int cnt = 0;
+ const char *name;
+ umode_t mode;
int idx;
- int ret;
+ int ret = -EINVAL;
+ int ino;
+ int i, r, c;
+
+ if (!dir_emit_dots(file, ctx))
+ return 0;

ti = get_tracefs(f_inode);
if (!(ti->flags & TRACEFS_EVENT_INODE))
return -EINVAL;

- if (WARN_ON_ONCE(file->private_data))
- return -EINVAL;
-
- dlist = kmalloc(sizeof(*dlist), GFP_KERNEL);
- if (!dlist)
- return -ENOMEM;
+ c = ctx->pos - 2;

- ei = ti->private;
idx = srcu_read_lock(&eventfs_srcu);
- list_for_each_entry_srcu(ef, &ei->e_top_files, list,
- srcu_read_lock_held(&eventfs_srcu)) {
- d = create_dentry(ef, dentry, false);
- if (d) {
- struct dentry **tmp;

+ mutex_lock(&eventfs_mutex);
+ ei = READ_ONCE(ti->private);
+ if (ei && ei->is_freed)
+ ei = NULL;
+ mutex_unlock(&eventfs_mutex);

- tmp = krealloc(dentries, sizeof(d) * (cnt + 2), GFP_KERNEL);
- if (!tmp)
- break;
- tmp[cnt] = d;
- tmp[cnt + 1] = NULL;
- cnt++;
- dentries = tmp;
- }
- }
- srcu_read_unlock(&eventfs_srcu, idx);
- ret = dcache_dir_open(inode, file);
+ if (!ei)
+ goto out;

/*
- * dcache_dir_open() sets file->private_data to a dentry cursor.
- * Need to save that but also save all the dentries that were
- * opened by this function.
+ * Need to create the dentries and inodes to have a consistent
+ * inode number.
*/
- dlist->cursor = file->private_data;
- dlist->dentries = dentries;
- file->private_data = dlist;
- return ret;
-}
+ ret = 0;

-/*
- * This just sets the file->private_data back to the cursor and back.
- */
-static int dcache_readdir_wrapper(struct file *file, struct dir_context *ctx)
-{
- struct dentry_list *dlist = file->private_data;
- int ret;
+ /* Start at 'c' to jump over already read entries */
+ for (i = c; i < ei->nr_entries; i++, ctx->pos++) {
+ void *cdata = ei->data;

- file->private_data = dlist->cursor;
- ret = dcache_readdir(file, ctx);
- dlist->cursor = file->private_data;
- file->private_data = dlist;
- return ret;
-}
+ entry = &ei->entries[i];
+ name = entry->name;

-/**
- * eventfs_prepare_ef - helper function to prepare eventfs_file
- * @name: the name of the file/directory to create.
- * @mode: the permission that the file should have.
- * @fop: struct file_operations that should be used for this file/directory.
- * @iop: struct inode_operations that should be used for this file/directory.
- * @data: something that the caller will want to get to later on. The
- * inode.i_private pointer will point to this value on the open() call.
- *
- * This function allocates and fills the eventfs_file structure.
- */
-static struct eventfs_file *eventfs_prepare_ef(const char *name, umode_t mode,
- const struct file_operations *fop,
- const struct inode_operations *iop,
- void *data)
-{
- struct eventfs_file *ef;
+ mutex_lock(&eventfs_mutex);
+ /* If ei->is_freed then just bail here, nothing more to do */
+ if (ei->is_freed) {
+ mutex_unlock(&eventfs_mutex);
+ goto out;
+ }
+ r = entry->callback(name, &mode, &cdata, &fops);
+ mutex_unlock(&eventfs_mutex);
+ if (r <= 0)
+ continue;

- ef = kzalloc(sizeof(*ef), GFP_KERNEL);
- if (!ef)
- return ERR_PTR(-ENOMEM);
+ ino = EVENTFS_FILE_INODE_INO;

- ef->name = kstrdup(name, GFP_KERNEL);
- if (!ef->name) {
- kfree(ef);
- return ERR_PTR(-ENOMEM);
+ if (!dir_emit(ctx, name, strlen(name), ino, DT_REG))
+ goto out;
}

- if (S_ISDIR(mode)) {
- ef->ei = kzalloc(sizeof(*ef->ei), GFP_KERNEL);
- if (!ef->ei) {
- kfree(ef->name);
- kfree(ef);
- return ERR_PTR(-ENOMEM);
- }
- INIT_LIST_HEAD(&ef->ei->e_top_files);
- ef->mode = S_IFDIR | S_IRWXU | S_IRUGO | S_IXUGO;
- } else {
- ef->ei = NULL;
- ef->mode = mode;
- }
+ /* Subtract the skipped entries above */
+ c -= min((unsigned int)c, (unsigned int)ei->nr_entries);

- ef->iop = iop;
- ef->fop = fop;
- ef->data = data;
- return ef;
-}
+ list_for_each_entry_srcu(ei_child, &ei->children, list,
+ srcu_read_lock_held(&eventfs_srcu)) {

-/**
- * eventfs_create_events_dir - create the trace event structure
- * @name: the name of the directory to create.
- * @parent: parent dentry for this file. This should be a directory dentry
- * if set. If this parameter is NULL, then the directory will be
- * created in the root of the tracefs filesystem.
- *
- * This function creates the top of the trace event directory.
- */
-struct dentry *eventfs_create_events_dir(const char *name,
- struct dentry *parent)
-{
- struct dentry *dentry = tracefs_start_creating(name, parent);
- struct eventfs_inode *ei;
- struct tracefs_inode *ti;
- struct inode *inode;
+ if (c > 0) {
+ c--;
+ continue;
+ }

- if (security_locked_down(LOCKDOWN_TRACEFS))
- return NULL;
+ ctx->pos++;

- if (IS_ERR(dentry))
- return dentry;
+ if (ei_child->is_freed)
+ continue;

- ei = kzalloc(sizeof(*ei), GFP_KERNEL);
- if (!ei)
- return ERR_PTR(-ENOMEM);
- inode = tracefs_get_inode(dentry->d_sb);
- if (unlikely(!inode)) {
- kfree(ei);
- tracefs_failed_creating(dentry);
- return ERR_PTR(-ENOMEM);
- }
+ name = ei_child->name;

- INIT_LIST_HEAD(&ei->e_top_files);
+ ino = eventfs_dir_ino(ei_child);

- ti = get_tracefs(inode);
- ti->flags |= TRACEFS_EVENT_INODE | TRACEFS_EVENT_TOP_INODE;
- ti->private = ei;
+ if (!dir_emit(ctx, name, strlen(name), ino, DT_DIR))
+ goto out_dec;
+ }
+ ret = 1;
+ out:
+ srcu_read_unlock(&eventfs_srcu, idx);

- inode->i_mode = S_IFDIR | S_IRWXU | S_IRUGO | S_IXUGO;
- inode->i_op = &eventfs_root_dir_inode_operations;
- inode->i_fop = &eventfs_file_operations;
+ return ret;

- /* directory inodes start off with i_nlink == 2 (for "." entry) */
- inc_nlink(inode);
- d_instantiate(dentry, inode);
- inc_nlink(dentry->d_parent->d_inode);
- fsnotify_mkdir(dentry->d_parent->d_inode, dentry);
- return tracefs_end_creating(dentry);
+ out_dec:
+ /* Incremented ctx->pos without adding something, reset it */
+ ctx->pos--;
+ goto out;
}

/**
- * eventfs_add_subsystem_dir - add eventfs subsystem_dir to list to create later
- * @name: the name of the file to create.
- * @parent: parent dentry for this dir.
+ * eventfs_create_dir - Create the eventfs_inode for this directory
+ * @name: The name of the directory to create.
+ * @parent: The eventfs_inode of the parent directory.
+ * @entries: A list of entries that represent the files under this directory
+ * @size: The number of @entries
+ * @data: The default data to pass to the files (an entry may override it).
+ *
+ * This function creates the descriptor to represent a directory in the
+ * eventfs. This descriptor is an eventfs_inode, and it is returned to be
+ * used to create other children underneath.
+ *
+ * The @entries is an array of eventfs_entry structures which has:
+ * const char *name
+ * eventfs_callback callback;
+ *
+ * The name is the name of the file, and the callback is a pointer to a function
+ * that will be called when the file is reference (either by lookup or by
+ * reading a directory). The callback is of the prototype:
+ *
+ * int callback(const char *name, umode_t *mode, void **data,
+ * const struct file_operations **fops);
+ *
+ * When a file needs to be created, this callback will be called with
+ * name = the name of the file being created (so that the same callback
+ * may be used for multiple files).
+ * mode = a place to set the file's mode
+ * data = A pointer to @data, and the callback may replace it, which will
+ * cause the file created to pass the new data to the open() call.
+ * fops = the fops to use for the created file.
*
- * This function adds eventfs subsystem dir to list.
- * And all these dirs are created on the fly when they are looked up,
- * and the dentry and inodes will be removed when they are done.
+ * NB. @callback is called while holding internal locks of the eventfs
+ * system. The callback must not call any code that might also call into
+ * the tracefs or eventfs system or it will risk creating a deadlock.
*/
-struct eventfs_file *eventfs_add_subsystem_dir(const char *name,
- struct dentry *parent)
+struct eventfs_inode *eventfs_create_dir(const char *name, struct eventfs_inode *parent,
+ const struct eventfs_entry *entries,
+ int size, void *data)
{
- struct tracefs_inode *ti_parent;
- struct eventfs_inode *ei_parent;
- struct eventfs_file *ef;
-
- if (security_locked_down(LOCKDOWN_TRACEFS))
- return NULL;
+ struct eventfs_inode *ei;

if (!parent)
return ERR_PTR(-EINVAL);

- ti_parent = get_tracefs(parent->d_inode);
- ei_parent = ti_parent->private;
+ ei = alloc_ei(name);
+ if (!ei)
+ return ERR_PTR(-ENOMEM);

- ef = eventfs_prepare_ef(name, S_IFDIR, NULL, NULL, NULL);
- if (IS_ERR(ef))
- return ef;
+ ei->entries = entries;
+ ei->nr_entries = size;
+ ei->data = data;
+ INIT_LIST_HEAD(&ei->children);
+ INIT_LIST_HEAD(&ei->list);

mutex_lock(&eventfs_mutex);
- list_add_tail(&ef->list, &ei_parent->e_top_files);
- ef->d_parent = parent;
+ if (!parent->is_freed)
+ list_add_tail(&ei->list, &parent->children);
mutex_unlock(&eventfs_mutex);
- return ef;
-}
-
-/**
- * eventfs_add_dir - add eventfs dir to list to create later
- * @name: the name of the file to create.
- * @ef_parent: parent eventfs_file for this dir.
- *
- * This function adds eventfs dir to list.
- * And all these dirs are created on the fly when they are looked up,
- * and the dentry and inodes will be removed when they are done.
- */
-struct eventfs_file *eventfs_add_dir(const char *name,
- struct eventfs_file *ef_parent)
-{
- struct eventfs_file *ef;
-
- if (security_locked_down(LOCKDOWN_TRACEFS))
- return NULL;
-
- if (!ef_parent)
- return ERR_PTR(-EINVAL);
-
- ef = eventfs_prepare_ef(name, S_IFDIR, NULL, NULL, NULL);
- if (IS_ERR(ef))
- return ef;

- mutex_lock(&eventfs_mutex);
- list_add_tail(&ef->list, &ef_parent->ei->e_top_files);
- ef->d_parent = ef_parent->dentry;
- mutex_unlock(&eventfs_mutex);
- return ef;
+ /* Was the parent freed? */
+ if (list_empty(&ei->list)) {
+ free_ei(ei);
+ ei = NULL;
+ }
+ return ei;
}

/**
- * eventfs_add_events_file - add the data needed to create a file for later reference
- * @name: the name of the file to create.
- * @mode: the permission that the file should have.
- * @parent: parent dentry for this file.
- * @data: something that the caller will want to get to later on.
- * @fop: struct file_operations that should be used for this file.
+ * eventfs_create_events_dir - create the top level events directory
+ * @name: The name of the top level directory to create.
+ * @parent: Parent dentry for this file in the tracefs directory.
+ * @entries: A list of entries that represent the files under this directory
+ * @size: The number of @entries
+ * @data: The default data to pass to the files (an entry may override it).
*
- * This function is used to add the information needed to create a
- * dentry/inode within the top level events directory. The file created
- * will have the @mode permissions. The @data will be used to fill the
- * inode.i_private when the open() call is done. The dentry and inodes are
- * all created when they are referenced, and removed when they are no
- * longer referenced.
+ * This function creates the top of the trace event directory.
+ *
+ * See eventfs_create_dir() for use of @entries.
*/
-int eventfs_add_events_file(const char *name, umode_t mode,
- struct dentry *parent, void *data,
- const struct file_operations *fop)
+struct eventfs_inode *eventfs_create_events_dir(const char *name, struct dentry *parent,
+ const struct eventfs_entry *entries,
+ int size, void *data)
{
- struct tracefs_inode *ti;
+ struct dentry *dentry = tracefs_start_creating(name, parent);
struct eventfs_inode *ei;
- struct eventfs_file *ef;
+ struct tracefs_inode *ti;
+ struct inode *inode;
+ kuid_t uid;
+ kgid_t gid;

if (security_locked_down(LOCKDOWN_TRACEFS))
- return -ENODEV;
-
- if (!parent)
- return -EINVAL;
-
- if (!(mode & S_IFMT))
- mode |= S_IFREG;
-
- if (!parent->d_inode)
- return -EINVAL;
-
- ti = get_tracefs(parent->d_inode);
- if (!(ti->flags & TRACEFS_EVENT_INODE))
- return -EINVAL;
-
- ei = ti->private;
- ef = eventfs_prepare_ef(name, mode, fop, NULL, data);
-
- if (IS_ERR(ef))
- return -ENOMEM;
-
- mutex_lock(&eventfs_mutex);
- list_add_tail(&ef->list, &ei->e_top_files);
- ef->d_parent = parent;
- mutex_unlock(&eventfs_mutex);
- return 0;
-}
-
-/**
- * eventfs_add_file - add eventfs file to list to create later
- * @name: the name of the file to create.
- * @mode: the permission that the file should have.
- * @ef_parent: parent eventfs_file for this file.
- * @data: something that the caller will want to get to later on.
- * @fop: struct file_operations that should be used for this file.
- *
- * This function is used to add the information needed to create a
- * file within a subdirectory of the events directory. The file created
- * will have the @mode permissions. The @data will be used to fill the
- * inode.i_private when the open() call is done. The dentry and inodes are
- * all created when they are referenced, and removed when they are no
- * longer referenced.
- */
-int eventfs_add_file(const char *name, umode_t mode,
- struct eventfs_file *ef_parent,
- void *data,
- const struct file_operations *fop)
-{
- struct eventfs_file *ef;
+ return NULL;

- if (security_locked_down(LOCKDOWN_TRACEFS))
- return -ENODEV;
+ if (IS_ERR(dentry))
+ return ERR_CAST(dentry);

- if (!ef_parent)
- return -EINVAL;
+ ei = alloc_ei(name);
+ if (!ei)
+ goto fail;

- if (!(mode & S_IFMT))
- mode |= S_IFREG;
+ inode = tracefs_get_inode(dentry->d_sb);
+ if (unlikely(!inode))
+ goto fail;

- ef = eventfs_prepare_ef(name, mode, fop, NULL, data);
- if (IS_ERR(ef))
- return -ENOMEM;
+ // Note: we have a ref to the dentry from tracefs_start_creating()
+ ei->events_dir = dentry;
+ ei->entries = entries;
+ ei->nr_entries = size;
+ ei->is_events = 1;
+ ei->data = data;

- mutex_lock(&eventfs_mutex);
- list_add_tail(&ef->list, &ef_parent->ei->e_top_files);
- ef->d_parent = ef_parent->dentry;
- mutex_unlock(&eventfs_mutex);
- return 0;
-}
+ /* Save the ownership of this directory */
+ uid = d_inode(dentry->d_parent)->i_uid;
+ gid = d_inode(dentry->d_parent)->i_gid;

-static LLIST_HEAD(free_list);
+ /*
+ * If the events directory is of the top instance, then parent
+ * is NULL. Set the attr.mode to reflect this and its permissions will
+ * default to the tracefs root dentry.
+ */
+ if (!parent)
+ ei->attr.mode = EVENTFS_TOPLEVEL;

-static void eventfs_workfn(struct work_struct *work)
-{
- struct eventfs_file *ef, *tmp;
- struct llist_node *llnode;
-
- llnode = llist_del_all(&free_list);
- llist_for_each_entry_safe(ef, tmp, llnode, llist) {
- /* This should only get here if it had a dentry */
- if (!WARN_ON_ONCE(!ef->dentry))
- dput(ef->dentry);
- }
-}
+ /* This is used as the default ownership of the files and directories */
+ ei->attr.uid = uid;
+ ei->attr.gid = gid;

-static DECLARE_WORK(eventfs_work, eventfs_workfn);
+ INIT_LIST_HEAD(&ei->children);
+ INIT_LIST_HEAD(&ei->list);

-static void free_rcu_ef(struct rcu_head *head)
-{
- struct eventfs_file *ef = container_of(head, struct eventfs_file, rcu);
+ ti = get_tracefs(inode);
+ ti->flags |= TRACEFS_EVENT_INODE | TRACEFS_EVENT_TOP_INODE;
+ ti->private = ei;

- if (ef->dentry) {
- /* Do not free the ef until all references of dentry are gone */
- if (llist_add(&ef->llist, &free_list))
- queue_work(system_unbound_wq, &eventfs_work);
- return;
- }
+ inode->i_mode = S_IFDIR | S_IRWXU | S_IRUGO | S_IXUGO;
+ inode->i_uid = uid;
+ inode->i_gid = gid;
+ inode->i_op = &eventfs_root_dir_inode_operations;
+ inode->i_fop = &eventfs_file_operations;

- free_ef(ef);
-}
+ dentry->d_fsdata = get_ei(ei);

-static void unhook_dentry(struct dentry *dentry)
-{
- if (!dentry)
- return;
/*
- * Need to add a reference to the dentry that is expected by
- * simple_recursive_removal(), which will include a dput().
+ * Keep all eventfs directories with i_nlink == 1.
+ * Due to the dynamic nature of the dentry creations and not
+ * wanting to add a pointer to the parent eventfs_inode in the
+ * eventfs_inode structure, keeping the i_nlink in sync with the
+ * number of directories would cause too much complexity for
+ * something not worth much. Keeping directory links at 1
+ * tells userspace not to trust the link number.
*/
- dget(dentry);
+ d_instantiate(dentry, inode);
+ /* The dentry of the "events" parent does keep track though */
+ inc_nlink(dentry->d_parent->d_inode);
+ fsnotify_mkdir(dentry->d_parent->d_inode, dentry);
+ tracefs_end_creating(dentry);

- /*
- * Also add a reference for the dput() in eventfs_workfn().
- * That is required as that dput() will free the ei after
- * the SRCU grace period is over.
- */
- dget(dentry);
+ return ei;
+
+ fail:
+ free_ei(ei);
+ tracefs_failed_creating(dentry);
+ return ERR_PTR(-ENOMEM);
}

/**
* eventfs_remove_rec - remove eventfs dir or file from list
- * @ef: eventfs_file to be removed.
- * @level: to check recursion depth
+ * @ei: eventfs_inode to be removed.
+ * @level: prevent recursion from going more than 3 levels deep.
*
- * The helper function eventfs_remove_rec() is used to clean up and free the
- * associated data from eventfs for both of the added functions.
+ * This function recursively removes eventfs_inodes which
+ * contains info of files and/or directories.
*/
-static void eventfs_remove_rec(struct eventfs_file *ef, int level)
+static void eventfs_remove_rec(struct eventfs_inode *ei, int level)
{
- struct eventfs_file *ef_child;
+ struct eventfs_inode *ei_child;

- if (!ef)
- return;
/*
* Check recursion depth. It should never be greater than 3:
* 0 - events/
@@ -904,72 +812,54 @@ static void eventfs_remove_rec(struct eventfs_file *ef, int level)
if (WARN_ON_ONCE(level > 3))
return;

- if (ef->ei) {
- /* search for nested folders or files */
- list_for_each_entry_srcu(ef_child, &ef->ei->e_top_files, list,
- lockdep_is_held(&eventfs_mutex)) {
- eventfs_remove_rec(ef_child, level + 1);
- }
- }
-
- ef->is_freed = 1;
+ /* search for nested folders or files */
+ list_for_each_entry(ei_child, &ei->children, list)
+ eventfs_remove_rec(ei_child, level + 1);

- unhook_dentry(ef->dentry);
-
- list_del_rcu(&ef->list);
- call_srcu(&eventfs_srcu, &ef->rcu, free_rcu_ef);
+ list_del(&ei->list);
+ free_ei(ei);
}

/**
- * eventfs_remove - remove eventfs dir or file from list
- * @ef: eventfs_file to be removed.
+ * eventfs_remove_dir - remove eventfs dir or file from list
+ * @ei: eventfs_inode to be removed.
*
* This function acquire the eventfs_mutex lock and call eventfs_remove_rec()
*/
-void eventfs_remove(struct eventfs_file *ef)
+void eventfs_remove_dir(struct eventfs_inode *ei)
{
- struct dentry *dentry;
-
- if (!ef)
+ if (!ei)
return;

mutex_lock(&eventfs_mutex);
- dentry = ef->dentry;
- eventfs_remove_rec(ef, 0);
+ eventfs_remove_rec(ei, 0);
mutex_unlock(&eventfs_mutex);
-
- /*
- * If any of the ei children has a dentry, then the ei itself
- * must have a dentry.
- */
- if (dentry)
- simple_recursive_removal(dentry, NULL);
}

/**
- * eventfs_remove_events_dir - remove eventfs dir or file from list
- * @dentry: events's dentry to be removed.
+ * eventfs_remove_events_dir - remove the top level eventfs directory
+ * @ei: the event_inode returned by eventfs_create_events_dir().
*
- * This function remove events main directory
+ * This function removes the events main directory
*/
-void eventfs_remove_events_dir(struct dentry *dentry)
+void eventfs_remove_events_dir(struct eventfs_inode *ei)
{
- struct eventfs_file *ef_child;
- struct eventfs_inode *ei;
- struct tracefs_inode *ti;
+ struct dentry *dentry;

- if (!dentry || !dentry->d_inode)
+ dentry = ei->events_dir;
+ if (!dentry)
return;

- ti = get_tracefs(dentry->d_inode);
- if (!ti || !(ti->flags & TRACEFS_EVENT_INODE))
- return;
+ ei->events_dir = NULL;
+ eventfs_remove_dir(ei);

- mutex_lock(&eventfs_mutex);
- ei = ti->private;
- list_for_each_entry_srcu(ef_child, &ei->e_top_files, list,
- lockdep_is_held(&eventfs_mutex)) {
- eventfs_remove_rec(ef_child, 0);
- }
- mutex_unlock(&eventfs_mutex);
+ /*
+ * Matches the dget() done by tracefs_start_creating()
+ * in eventfs_create_events_dir() when it the dentry was
+ * created. In other words, it's a normal dentry that
+ * sticks around while the other ei->dentry are created
+ * and destroyed dynamically.
+ */
+ d_invalidate(dentry);
+ dput(dentry);
}
diff --git a/fs/tracefs/inode.c b/fs/tracefs/inode.c
index 0292c6a2bed9..99caf1c3d0c4 100644
--- a/fs/tracefs/inode.c
+++ b/fs/tracefs/inode.c
@@ -38,8 +38,6 @@ static struct inode *tracefs_alloc_inode(struct super_block *sb)
if (!ti)
return NULL;

- ti->flags = 0;
-
return &ti->vfs_inode;
}

@@ -91,6 +89,7 @@ static int tracefs_syscall_mkdir(struct mnt_idmap *idmap,
struct inode *inode, struct dentry *dentry,
umode_t mode)
{
+ struct tracefs_inode *ti;
char *name;
int ret;

@@ -98,6 +97,15 @@ static int tracefs_syscall_mkdir(struct mnt_idmap *idmap,
if (!name)
return -ENOMEM;

+ /*
+ * This is a new directory that does not take the default of
+ * the rootfs. It becomes the default permissions for all the
+ * files and directories underneath it.
+ */
+ ti = get_tracefs(inode);
+ ti->flags |= TRACEFS_INSTANCE_INODE;
+ ti->private = inode;
+
/*
* The mkdir call can call the generic functions that create
* the files within the tracefs system. It is up to the individual
@@ -141,10 +149,76 @@ static int tracefs_syscall_rmdir(struct inode *inode, struct dentry *dentry)
return ret;
}

-static const struct inode_operations tracefs_dir_inode_operations = {
+static void set_tracefs_inode_owner(struct inode *inode)
+{
+ struct tracefs_inode *ti = get_tracefs(inode);
+ struct inode *root_inode = ti->private;
+
+ /*
+ * If this inode has never been referenced, then update
+ * the permissions to the superblock.
+ */
+ if (!(ti->flags & TRACEFS_UID_PERM_SET))
+ inode->i_uid = root_inode->i_uid;
+
+ if (!(ti->flags & TRACEFS_GID_PERM_SET))
+ inode->i_gid = root_inode->i_gid;
+}
+
+static int tracefs_permission(struct mnt_idmap *idmap,
+ struct inode *inode, int mask)
+{
+ set_tracefs_inode_owner(inode);
+ return generic_permission(idmap, inode, mask);
+}
+
+static int tracefs_getattr(struct mnt_idmap *idmap,
+ const struct path *path, struct kstat *stat,
+ u32 request_mask, unsigned int flags)
+{
+ struct inode *inode = d_backing_inode(path->dentry);
+
+ set_tracefs_inode_owner(inode);
+ generic_fillattr(idmap, request_mask, inode, stat);
+ return 0;
+}
+
+static int tracefs_setattr(struct mnt_idmap *idmap, struct dentry *dentry,
+ struct iattr *attr)
+{
+ unsigned int ia_valid = attr->ia_valid;
+ struct inode *inode = d_inode(dentry);
+ struct tracefs_inode *ti = get_tracefs(inode);
+
+ if (ia_valid & ATTR_UID)
+ ti->flags |= TRACEFS_UID_PERM_SET;
+
+ if (ia_valid & ATTR_GID)
+ ti->flags |= TRACEFS_GID_PERM_SET;
+
+ return simple_setattr(idmap, dentry, attr);
+}
+
+static const struct inode_operations tracefs_instance_dir_inode_operations = {
.lookup = simple_lookup,
.mkdir = tracefs_syscall_mkdir,
.rmdir = tracefs_syscall_rmdir,
+ .permission = tracefs_permission,
+ .getattr = tracefs_getattr,
+ .setattr = tracefs_setattr,
+};
+
+static const struct inode_operations tracefs_dir_inode_operations = {
+ .lookup = simple_lookup,
+ .permission = tracefs_permission,
+ .getattr = tracefs_getattr,
+ .setattr = tracefs_setattr,
+};
+
+static const struct inode_operations tracefs_file_inode_operations = {
+ .permission = tracefs_permission,
+ .getattr = tracefs_getattr,
+ .setattr = tracefs_setattr,
};

struct inode *tracefs_get_inode(struct super_block *sb)
@@ -183,77 +257,6 @@ struct tracefs_fs_info {
struct tracefs_mount_opts mount_opts;
};

-static void change_gid(struct dentry *dentry, kgid_t gid)
-{
- if (!dentry->d_inode)
- return;
- dentry->d_inode->i_gid = gid;
-}
-
-/*
- * Taken from d_walk, but without he need for handling renames.
- * Nothing can be renamed while walking the list, as tracefs
- * does not support renames. This is only called when mounting
- * or remounting the file system, to set all the files to
- * the given gid.
- */
-static void set_gid(struct dentry *parent, kgid_t gid)
-{
- struct dentry *this_parent;
- struct list_head *next;
-
- this_parent = parent;
- spin_lock(&this_parent->d_lock);
-
- change_gid(this_parent, gid);
-repeat:
- next = this_parent->d_subdirs.next;
-resume:
- while (next != &this_parent->d_subdirs) {
- struct list_head *tmp = next;
- struct dentry *dentry = list_entry(tmp, struct dentry, d_child);
- next = tmp->next;
-
- spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED);
-
- change_gid(dentry, gid);
-
- if (!list_empty(&dentry->d_subdirs)) {
- spin_unlock(&this_parent->d_lock);
- spin_release(&dentry->d_lock.dep_map, _RET_IP_);
- this_parent = dentry;
- spin_acquire(&this_parent->d_lock.dep_map, 0, 1, _RET_IP_);
- goto repeat;
- }
- spin_unlock(&dentry->d_lock);
- }
- /*
- * All done at this level ... ascend and resume the search.
- */
- rcu_read_lock();
-ascend:
- if (this_parent != parent) {
- struct dentry *child = this_parent;
- this_parent = child->d_parent;
-
- spin_unlock(&child->d_lock);
- spin_lock(&this_parent->d_lock);
-
- /* go into the first sibling still alive */
- do {
- next = child->d_child.next;
- if (next == &this_parent->d_subdirs)
- goto ascend;
- child = list_entry(next, struct dentry, d_child);
- } while (unlikely(child->d_flags & DCACHE_DENTRY_KILLED));
- rcu_read_unlock();
- goto resume;
- }
- rcu_read_unlock();
- spin_unlock(&this_parent->d_lock);
- return;
-}
-
static int tracefs_parse_options(char *data, struct tracefs_mount_opts *opts)
{
substring_t args[MAX_OPT_ARGS];
@@ -326,10 +329,8 @@ static int tracefs_apply_options(struct super_block *sb, bool remount)
if (!remount || opts->opts & BIT(Opt_uid))
inode->i_uid = opts->uid;

- if (!remount || opts->opts & BIT(Opt_gid)) {
- /* Set all the group ids to the mount option */
- set_gid(sb->s_root, opts->gid);
- }
+ if (!remount || opts->opts & BIT(Opt_gid))
+ inode->i_gid = opts->gid;

return 0;
}
@@ -376,21 +377,30 @@ static const struct super_operations tracefs_super_operations = {
.show_options = tracefs_show_options,
};

-static void tracefs_dentry_iput(struct dentry *dentry, struct inode *inode)
+/*
+ * It would be cleaner if eventfs had its own dentry ops.
+ *
+ * Note that d_revalidate is called potentially under RCU,
+ * so it can't take the eventfs mutex etc. It's fine - if
+ * we open a file just as it's marked dead, things will
+ * still work just fine, and just see the old stale case.
+ */
+static void tracefs_d_release(struct dentry *dentry)
{
- struct tracefs_inode *ti;
+ if (dentry->d_fsdata)
+ eventfs_d_release(dentry);
+}

- if (!dentry || !inode)
- return;
+static int tracefs_d_revalidate(struct dentry *dentry, unsigned int flags)
+{
+ struct eventfs_inode *ei = dentry->d_fsdata;

- ti = get_tracefs(inode);
- if (ti && ti->flags & TRACEFS_EVENT_INODE)
- eventfs_set_ef_status_free(ti, dentry);
- iput(inode);
+ return !(ei && ei->is_freed);
}

static const struct dentry_operations tracefs_dentry_operations = {
- .d_iput = tracefs_dentry_iput,
+ .d_revalidate = tracefs_d_revalidate,
+ .d_release = tracefs_d_release,
};

static int trace_fill_super(struct super_block *sb, void *data, int silent)
@@ -494,73 +504,24 @@ struct dentry *tracefs_end_creating(struct dentry *dentry)
return dentry;
}

-/**
- * eventfs_start_creating - start the process of creating a dentry
- * @name: Name of the file created for the dentry
- * @parent: The parent dentry where this dentry will be created
- *
- * This is a simple helper function for the dynamically created eventfs
- * files. When the directory of the eventfs files are accessed, their
- * dentries are created on the fly. This function is used to start that
- * process.
- */
-struct dentry *eventfs_start_creating(const char *name, struct dentry *parent)
+/* Find the inode that this will use for default */
+static struct inode *instance_inode(struct dentry *parent, struct inode *inode)
{
- struct dentry *dentry;
- int error;
-
- /* Must always have a parent. */
- if (WARN_ON_ONCE(!parent))
- return ERR_PTR(-EINVAL);
-
- error = simple_pin_fs(&trace_fs_type, &tracefs_mount,
- &tracefs_mount_count);
- if (error)
- return ERR_PTR(error);
+ struct tracefs_inode *ti;

- if (unlikely(IS_DEADDIR(parent->d_inode)))
- dentry = ERR_PTR(-ENOENT);
- else
- dentry = lookup_one_len(name, parent, strlen(name));
+ /* If parent is NULL then use root inode */
+ if (!parent)
+ return d_inode(inode->i_sb->s_root);

- if (!IS_ERR(dentry) && dentry->d_inode) {
- dput(dentry);
- dentry = ERR_PTR(-EEXIST);
+ /* Find the inode that is flagged as an instance or the root inode */
+ while (!IS_ROOT(parent)) {
+ ti = get_tracefs(d_inode(parent));
+ if (ti->flags & TRACEFS_INSTANCE_INODE)
+ break;
+ parent = parent->d_parent;
}

- if (IS_ERR(dentry))
- simple_release_fs(&tracefs_mount, &tracefs_mount_count);
-
- return dentry;
-}
-
-/**
- * eventfs_failed_creating - clean up a failed eventfs dentry creation
- * @dentry: The dentry to clean up
- *
- * If after calling eventfs_start_creating(), a failure is detected, the
- * resources created by eventfs_start_creating() needs to be cleaned up. In
- * that case, this function should be called to perform that clean up.
- */
-struct dentry *eventfs_failed_creating(struct dentry *dentry)
-{
- dput(dentry);
- simple_release_fs(&tracefs_mount, &tracefs_mount_count);
- return NULL;
-}
-
-/**
- * eventfs_end_creating - Finish the process of creating a eventfs dentry
- * @dentry: The dentry that has successfully been created.
- *
- * This function is currently just a place holder to match
- * eventfs_start_creating(). In case any synchronization needs to be added,
- * this function will be used to implement that without having to modify
- * the callers of eventfs_start_creating().
- */
-struct dentry *eventfs_end_creating(struct dentry *dentry)
-{
- return dentry;
+ return d_inode(parent);
}

/**
@@ -593,6 +554,7 @@ struct dentry *tracefs_create_file(const char *name, umode_t mode,
struct dentry *parent, void *data,
const struct file_operations *fops)
{
+ struct tracefs_inode *ti;
struct dentry *dentry;
struct inode *inode;

@@ -611,7 +573,11 @@ struct dentry *tracefs_create_file(const char *name, umode_t mode,
if (unlikely(!inode))
return tracefs_failed_creating(dentry);

+ ti = get_tracefs(inode);
+ ti->private = instance_inode(parent, inode);
+
inode->i_mode = mode;
+ inode->i_op = &tracefs_file_inode_operations;
inode->i_fop = fops ? fops : &tracefs_file_operations;
inode->i_private = data;
inode->i_uid = d_inode(dentry->d_parent)->i_uid;
@@ -624,6 +590,7 @@ struct dentry *tracefs_create_file(const char *name, umode_t mode,
static struct dentry *__create_dir(const char *name, struct dentry *parent,
const struct inode_operations *ops)
{
+ struct tracefs_inode *ti;
struct dentry *dentry = tracefs_start_creating(name, parent);
struct inode *inode;

@@ -641,6 +608,9 @@ static struct dentry *__create_dir(const char *name, struct dentry *parent,
inode->i_uid = d_inode(dentry->d_parent)->i_uid;
inode->i_gid = d_inode(dentry->d_parent)->i_gid;

+ ti = get_tracefs(inode);
+ ti->private = instance_inode(parent, inode);
+
/* directory inodes start off with i_nlink == 2 (for "." entry) */
inc_nlink(inode);
d_instantiate(dentry, inode);
@@ -671,7 +641,7 @@ struct dentry *tracefs_create_dir(const char *name, struct dentry *parent)
if (security_locked_down(LOCKDOWN_TRACEFS))
return NULL;

- return __create_dir(name, parent, &simple_dir_inode_operations);
+ return __create_dir(name, parent, &tracefs_dir_inode_operations);
}

/**
@@ -702,7 +672,7 @@ __init struct dentry *tracefs_create_instance_dir(const char *name,
if (WARN_ON(tracefs_ops.mkdir || tracefs_ops.rmdir))
return NULL;

- dentry = __create_dir(name, parent, &tracefs_dir_inode_operations);
+ dentry = __create_dir(name, parent, &tracefs_instance_dir_inode_operations);
if (!dentry)
return NULL;

@@ -747,7 +717,11 @@ static void init_once(void *foo)
{
struct tracefs_inode *ti = (struct tracefs_inode *) foo;

+ /* inode_init_once() calls memset() on the vfs_inode portion */
inode_init_once(&ti->vfs_inode);
+
+ /* Zero out the rest */
+ memset_after(ti, 0, vfs_inode);
}

static int __init tracefs_init(void)
diff --git a/fs/tracefs/internal.h b/fs/tracefs/internal.h
index 4f2e49e2197b..beb3dcd0e434 100644
--- a/fs/tracefs/internal.h
+++ b/fs/tracefs/internal.h
@@ -5,12 +5,64 @@
enum {
TRACEFS_EVENT_INODE = BIT(1),
TRACEFS_EVENT_TOP_INODE = BIT(2),
+ TRACEFS_GID_PERM_SET = BIT(3),
+ TRACEFS_UID_PERM_SET = BIT(4),
+ TRACEFS_INSTANCE_INODE = BIT(5),
};

struct tracefs_inode {
+ struct inode vfs_inode;
+ /* The below gets initialized with memset_after(ti, 0, vfs_inode) */
unsigned long flags;
void *private;
- struct inode vfs_inode;
+};
+
+/*
+ * struct eventfs_attr - cache the mode and ownership of a eventfs entry
+ * @mode: saved mode plus flags of what is saved
+ * @uid: saved uid if changed
+ * @gid: saved gid if changed
+ */
+struct eventfs_attr {
+ int mode;
+ kuid_t uid;
+ kgid_t gid;
+};
+
+/*
+ * struct eventfs_inode - hold the properties of the eventfs directories.
+ * @list: link list into the parent directory
+ * @rcu: Union with @list for freeing
+ * @children: link list into the child eventfs_inode
+ * @entries: the array of entries representing the files in the directory
+ * @name: the name of the directory to create
+ * @events_dir: the dentry of the events directory
+ * @entry_attrs: Saved mode and ownership of the @d_children
+ * @data: The private data to pass to the callbacks
+ * @attr: Saved mode and ownership of eventfs_inode itself
+ * @is_freed: Flag set if the eventfs is on its way to be freed
+ * Note if is_freed is set, then dentry is corrupted.
+ * @is_events: Flag set for only the top level "events" directory
+ * @nr_entries: The number of items in @entries
+ * @ino: The saved inode number
+ */
+struct eventfs_inode {
+ union {
+ struct list_head list;
+ struct rcu_head rcu;
+ };
+ struct list_head children;
+ const struct eventfs_entry *entries;
+ const char *name;
+ struct dentry *events_dir;
+ struct eventfs_attr *entry_attrs;
+ void *data;
+ struct eventfs_attr attr;
+ struct kref kref;
+ unsigned int is_freed:1;
+ unsigned int is_events:1;
+ unsigned int nr_entries:30;
+ unsigned int ino;
};

static inline struct tracefs_inode *get_tracefs(const struct inode *inode)
@@ -22,9 +74,7 @@ struct dentry *tracefs_start_creating(const char *name, struct dentry *parent);
struct dentry *tracefs_end_creating(struct dentry *dentry);
struct dentry *tracefs_failed_creating(struct dentry *dentry);
struct inode *tracefs_get_inode(struct super_block *sb);
-struct dentry *eventfs_start_creating(const char *name, struct dentry *parent);
-struct dentry *eventfs_failed_creating(struct dentry *dentry);
-struct dentry *eventfs_end_creating(struct dentry *dentry);
-void eventfs_set_ef_status_free(struct tracefs_inode *ti, struct dentry *dentry);
+
+void eventfs_d_release(struct dentry *dentry);

#endif /* _TRACEFS_INTERNAL_H */
diff --git a/fs/zonefs/file.c b/fs/zonefs/file.c
index b2c9b35df8f7..897b12ec61e2 100644
--- a/fs/zonefs/file.c
+++ b/fs/zonefs/file.c
@@ -348,7 +348,12 @@ static int zonefs_file_write_dio_end_io(struct kiocb *iocb, ssize_t size,
struct zonefs_inode_info *zi = ZONEFS_I(inode);

if (error) {
- zonefs_io_error(inode, true);
+ /*
+ * For Sync IOs, error recovery is called from
+ * zonefs_file_dio_write().
+ */
+ if (!is_sync_kiocb(iocb))
+ zonefs_io_error(inode, true);
return error;
}

@@ -491,6 +496,14 @@ static ssize_t zonefs_file_dio_write(struct kiocb *iocb, struct iov_iter *from)
ret = -EINVAL;
goto inode_unlock;
}
+ /*
+ * Advance the zone write pointer offset. This assumes that the
+ * IO will succeed, which is OK to do because we do not allow
+ * partial writes (IOMAP_DIO_PARTIAL is not set) and if the IO
+ * fails, the error path will correct the write pointer offset.
+ */
+ z->z_wpoffset += count;
+ zonefs_inode_account_active(inode);
mutex_unlock(&zi->i_truncate_mutex);
}

@@ -504,20 +517,19 @@ static ssize_t zonefs_file_dio_write(struct kiocb *iocb, struct iov_iter *from)
if (ret == -ENOTBLK)
ret = -EBUSY;

- if (zonefs_zone_is_seq(z) &&
- (ret > 0 || ret == -EIOCBQUEUED)) {
- if (ret > 0)
- count = ret;
-
- /*
- * Update the zone write pointer offset assuming the write
- * operation succeeded. If it did not, the error recovery path
- * will correct it. Also do active seq file accounting.
- */
- mutex_lock(&zi->i_truncate_mutex);
- z->z_wpoffset += count;
- zonefs_inode_account_active(inode);
- mutex_unlock(&zi->i_truncate_mutex);
+ /*
+ * For a failed IO or partial completion, trigger error recovery
+ * to update the zone write pointer offset to a correct value.
+ * For asynchronous IOs, zonefs_file_write_dio_end_io() may already
+ * have executed error recovery if the IO already completed when we
+ * reach here. However, we cannot know that and execute error recovery
+ * again (that will not change anything).
+ */
+ if (zonefs_zone_is_seq(z)) {
+ if (ret > 0 && ret != count)
+ ret = -EIO;
+ if (ret < 0 && ret != -EIOCBQUEUED)
+ zonefs_io_error(inode, true);
}

inode_unlock:
diff --git a/fs/zonefs/super.c b/fs/zonefs/super.c
index 9d1a9808fbbb..cc364669d723 100644
--- a/fs/zonefs/super.c
+++ b/fs/zonefs/super.c
@@ -246,16 +246,18 @@ static void zonefs_inode_update_mode(struct inode *inode)
z->z_mode = inode->i_mode;
}

-struct zonefs_ioerr_data {
- struct inode *inode;
- bool write;
-};
-
static int zonefs_io_error_cb(struct blk_zone *zone, unsigned int idx,
void *data)
{
- struct zonefs_ioerr_data *err = data;
- struct inode *inode = err->inode;
+ struct blk_zone *z = data;
+
+ *z = *zone;
+ return 0;
+}
+
+static void zonefs_handle_io_error(struct inode *inode, struct blk_zone *zone,
+ bool write)
+{
struct zonefs_zone *z = zonefs_inode_zone(inode);
struct super_block *sb = inode->i_sb;
struct zonefs_sb_info *sbi = ZONEFS_SB(sb);
@@ -270,8 +272,8 @@ static int zonefs_io_error_cb(struct blk_zone *zone, unsigned int idx,
data_size = zonefs_check_zone_condition(sb, z, zone);
isize = i_size_read(inode);
if (!(z->z_flags & (ZONEFS_ZONE_READONLY | ZONEFS_ZONE_OFFLINE)) &&
- !err->write && isize == data_size)
- return 0;
+ !write && isize == data_size)
+ return;

/*
* At this point, we detected either a bad zone or an inconsistency
@@ -292,7 +294,7 @@ static int zonefs_io_error_cb(struct blk_zone *zone, unsigned int idx,
* In all cases, warn about inode size inconsistency and handle the
* IO error according to the zone condition and to the mount options.
*/
- if (zonefs_zone_is_seq(z) && isize != data_size)
+ if (isize != data_size)
zonefs_warn(sb,
"inode %lu: invalid size %lld (should be %lld)\n",
inode->i_ino, isize, data_size);
@@ -352,8 +354,6 @@ static int zonefs_io_error_cb(struct blk_zone *zone, unsigned int idx,
zonefs_i_size_write(inode, data_size);
z->z_wpoffset = data_size;
zonefs_inode_account_active(inode);
-
- return 0;
}

/*
@@ -367,23 +367,25 @@ void __zonefs_io_error(struct inode *inode, bool write)
{
struct zonefs_zone *z = zonefs_inode_zone(inode);
struct super_block *sb = inode->i_sb;
- struct zonefs_sb_info *sbi = ZONEFS_SB(sb);
unsigned int noio_flag;
- unsigned int nr_zones = 1;
- struct zonefs_ioerr_data err = {
- .inode = inode,
- .write = write,
- };
+ struct blk_zone zone;
int ret;

/*
- * The only files that have more than one zone are conventional zone
- * files with aggregated conventional zones, for which the inode zone
- * size is always larger than the device zone size.
+ * Conventional zone have no write pointer and cannot become read-only
+ * or offline. So simply fake a report for a single or aggregated zone
+ * and let zonefs_handle_io_error() correct the zone inode information
+ * according to the mount options.
*/
- if (z->z_size > bdev_zone_sectors(sb->s_bdev))
- nr_zones = z->z_size >>
- (sbi->s_zone_sectors_shift + SECTOR_SHIFT);
+ if (!zonefs_zone_is_seq(z)) {
+ zone.start = z->z_sector;
+ zone.len = z->z_size >> SECTOR_SHIFT;
+ zone.wp = zone.start + zone.len;
+ zone.type = BLK_ZONE_TYPE_CONVENTIONAL;
+ zone.cond = BLK_ZONE_COND_NOT_WP;
+ zone.capacity = zone.len;
+ goto handle_io_error;
+ }

/*
* Memory allocations in blkdev_report_zones() can trigger a memory
@@ -394,12 +396,20 @@ void __zonefs_io_error(struct inode *inode, bool write)
* the GFP_NOIO context avoids both problems.
*/
noio_flag = memalloc_noio_save();
- ret = blkdev_report_zones(sb->s_bdev, z->z_sector, nr_zones,
- zonefs_io_error_cb, &err);
- if (ret != nr_zones)
+ ret = blkdev_report_zones(sb->s_bdev, z->z_sector, 1,
+ zonefs_io_error_cb, &zone);
+ memalloc_noio_restore(noio_flag);
+
+ if (ret != 1) {
zonefs_err(sb, "Get inode %lu zone information failed %d\n",
inode->i_ino, ret);
- memalloc_noio_restore(noio_flag);
+ zonefs_warn(sb, "remounting filesystem read-only\n");
+ sb->s_flags |= SB_RDONLY;
+ return;
+ }
+
+handle_io_error:
+ zonefs_handle_io_error(inode, &zone, write);
}

static struct kmem_cache *zonefs_inode_cachep;
diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h
index 67d8dd2f1bde..bae0fe4d499b 100644
--- a/include/asm-generic/vmlinux.lds.h
+++ b/include/asm-generic/vmlinux.lds.h
@@ -356,7 +356,6 @@
*(.ref.data) \
*(.data..shared_aligned) /* percpu related */ \
MEM_KEEP(init.data*) \
- MEM_KEEP(exit.data*) \
*(.data.unlikely) \
__start_once = .; \
*(.data.once) \
@@ -521,7 +520,6 @@
__init_rodata : AT(ADDR(__init_rodata) - LOAD_OFFSET) { \
*(.ref.rodata) \
MEM_KEEP(init.rodata) \
- MEM_KEEP(exit.rodata) \
} \
\
/* Built-in module parameters. */ \
@@ -574,7 +572,6 @@
*(.ref.text) \
*(.text.asan.* .text.tsan.*) \
MEM_KEEP(init.text*) \
- MEM_KEEP(exit.text*) \


/* sched.text is aling to function alignment to secure we have same
@@ -714,13 +711,10 @@
*(.exit.data .exit.data.*) \
*(.fini_array .fini_array.*) \
*(.dtors .dtors.*) \
- MEM_DISCARD(exit.data*) \
- MEM_DISCARD(exit.rodata*)

#define EXIT_TEXT \
*(.exit.text) \
*(.text.exit) \
- MEM_DISCARD(exit.text)

#define EXIT_CALL \
*(.exitcall.exit)
diff --git a/include/linux/backing-dev-defs.h b/include/linux/backing-dev-defs.h
index ae12696ec492..2ad261082bba 100644
--- a/include/linux/backing-dev-defs.h
+++ b/include/linux/backing-dev-defs.h
@@ -141,8 +141,6 @@ struct bdi_writeback {
struct delayed_work dwork; /* work item used for writeback */
struct delayed_work bw_dwork; /* work item used for bandwidth estimate */

- unsigned long dirty_sleep; /* last wait */
-
struct list_head bdi_node; /* anchored at bdi->wb_list */

#ifdef CONFIG_CGROUP_WRITEBACK
@@ -179,6 +177,11 @@ struct backing_dev_info {
* any dirty wbs, which is depended upon by bdi_has_dirty().
*/
atomic_long_t tot_write_bandwidth;
+ /*
+ * Jiffies when last process was dirty throttled on this bdi. Used by
+ * blk-wbt.
+ */
+ unsigned long last_bdp_sleep;

struct bdi_writeback wb; /* the root writeback info for this bdi */
struct list_head wb_list; /* list of all wbs */
diff --git a/include/linux/compiler-gcc.h b/include/linux/compiler-gcc.h
index 7af9e34ec261..8c9a095c1757 100644
--- a/include/linux/compiler-gcc.h
+++ b/include/linux/compiler-gcc.h
@@ -66,6 +66,26 @@
__builtin_unreachable(); \
} while (0)

+/*
+ * GCC 'asm goto' with outputs miscompiles certain code sequences:
+ *
+ * https://gcc.gnu.org/bugzilla/show_bug.cgi?id=113921
+ *
+ * Work around it via the same compiler barrier quirk that we used
+ * to use for the old 'asm goto' workaround.
+ *
+ * Also, always mark such 'asm goto' statements as volatile: all
+ * asm goto statements are supposed to be volatile as per the
+ * documentation, but some versions of gcc didn't actually do
+ * that for asms with outputs:
+ *
+ * https://gcc.gnu.org/bugzilla/show_bug.cgi?id=98619
+ */
+#ifdef CONFIG_GCC_ASM_GOTO_OUTPUT_WORKAROUND
+#define asm_goto_output(x...) \
+ do { asm volatile goto(x); asm (""); } while (0)
+#endif
+
#if defined(CONFIG_ARCH_USE_BUILTIN_BSWAP)
#define __HAVE_BUILTIN_BSWAP32__
#define __HAVE_BUILTIN_BSWAP64__
diff --git a/include/linux/compiler_types.h b/include/linux/compiler_types.h
index c523c6683789..b2f9e2c409cf 100644
--- a/include/linux/compiler_types.h
+++ b/include/linux/compiler_types.h
@@ -352,8 +352,15 @@ struct ftrace_likely_data {
# define __realloc_size(x, ...)
#endif

-#ifndef asm_volatile_goto
-#define asm_volatile_goto(x...) asm goto(x)
+/*
+ * Some versions of gcc do not mark 'asm goto' volatile:
+ *
+ * https://gcc.gnu.org/bugzilla/show_bug.cgi?id=103979
+ *
+ * We do it here by hand, because it doesn't hurt.
+ */
+#ifndef asm_goto_output
+#define asm_goto_output(x...) asm volatile goto(x)
#endif

#ifdef CONFIG_CC_HAS_ASM_INLINE
diff --git a/include/linux/iio/adc/ad_sigma_delta.h b/include/linux/iio/adc/ad_sigma_delta.h
index 7852f6c9a714..719cf9cc6e1a 100644
--- a/include/linux/iio/adc/ad_sigma_delta.h
+++ b/include/linux/iio/adc/ad_sigma_delta.h
@@ -8,6 +8,8 @@
#ifndef __AD_SIGMA_DELTA_H__
#define __AD_SIGMA_DELTA_H__

+#include <linux/iio/iio.h>
+
enum ad_sigma_delta_mode {
AD_SD_MODE_CONTINUOUS = 0,
AD_SD_MODE_SINGLE = 1,
@@ -99,7 +101,7 @@ struct ad_sigma_delta {
* 'rx_buf' is up to 32 bits per sample + 64 bit timestamp,
* rounded to 16 bytes to take into account padding.
*/
- uint8_t tx_buf[4] ____cacheline_aligned;
+ uint8_t tx_buf[4] __aligned(IIO_DMA_MINALIGN);
uint8_t rx_buf[16] __aligned(8);
};

diff --git a/include/linux/iio/common/st_sensors.h b/include/linux/iio/common/st_sensors.h
index 607c3a89a647..f9ae5cdd884f 100644
--- a/include/linux/iio/common/st_sensors.h
+++ b/include/linux/iio/common/st_sensors.h
@@ -258,9 +258,9 @@ struct st_sensor_data {
bool hw_irq_trigger;
s64 hw_timestamp;

- char buffer_data[ST_SENSORS_MAX_BUFFER_SIZE] ____cacheline_aligned;
-
struct mutex odr_lock;
+
+ char buffer_data[ST_SENSORS_MAX_BUFFER_SIZE] __aligned(IIO_DMA_MINALIGN);
};

#ifdef CONFIG_IIO_BUFFER
diff --git a/include/linux/iio/imu/adis.h b/include/linux/iio/imu/adis.h
index dc9ea299e088..8898966bc0f0 100644
--- a/include/linux/iio/imu/adis.h
+++ b/include/linux/iio/imu/adis.h
@@ -11,6 +11,7 @@

#include <linux/spi/spi.h>
#include <linux/interrupt.h>
+#include <linux/iio/iio.h>
#include <linux/iio/types.h>

#define ADIS_WRITE_REG(reg) ((0x80 | (reg)))
@@ -131,7 +132,7 @@ struct adis {
unsigned long irq_flag;
void *buffer;

- u8 tx[10] ____cacheline_aligned;
+ u8 tx[10] __aligned(IIO_DMA_MINALIGN);
u8 rx[4];
};

diff --git a/include/linux/init.h b/include/linux/init.h
index 266c3e1640d4..01b52c9c7526 100644
--- a/include/linux/init.h
+++ b/include/linux/init.h
@@ -89,9 +89,6 @@
__latent_entropy
#define __meminitdata __section(".meminit.data")
#define __meminitconst __section(".meminit.rodata")
-#define __memexit __section(".memexit.text") __exitused __cold notrace
-#define __memexitdata __section(".memexit.data")
-#define __memexitconst __section(".memexit.rodata")

/* For assembly routines */
#define __HEAD .section ".head.text","ax"
diff --git a/include/linux/lsm_hook_defs.h b/include/linux/lsm_hook_defs.h
index 74c60f9446f8..489362b0cd85 100644
--- a/include/linux/lsm_hook_defs.h
+++ b/include/linux/lsm_hook_defs.h
@@ -311,9 +311,9 @@ LSM_HOOK(int, 0, socket_getsockopt, struct socket *sock, int level, int optname)
LSM_HOOK(int, 0, socket_setsockopt, struct socket *sock, int level, int optname)
LSM_HOOK(int, 0, socket_shutdown, struct socket *sock, int how)
LSM_HOOK(int, 0, socket_sock_rcv_skb, struct sock *sk, struct sk_buff *skb)
-LSM_HOOK(int, 0, socket_getpeersec_stream, struct socket *sock,
+LSM_HOOK(int, -ENOPROTOOPT, socket_getpeersec_stream, struct socket *sock,
sockptr_t optval, sockptr_t optlen, unsigned int len)
-LSM_HOOK(int, 0, socket_getpeersec_dgram, struct socket *sock,
+LSM_HOOK(int, -ENOPROTOOPT, socket_getpeersec_dgram, struct socket *sock,
struct sk_buff *skb, u32 *secid)
LSM_HOOK(int, 0, sk_alloc_security, struct sock *sk, int family, gfp_t priority)
LSM_HOOK(void, LSM_RET_VOID, sk_free_security, struct sock *sk)
diff --git a/include/linux/netfilter/ipset/ip_set.h b/include/linux/netfilter/ipset/ip_set.h
index e8c350a3ade1..e9f4f845d760 100644
--- a/include/linux/netfilter/ipset/ip_set.h
+++ b/include/linux/netfilter/ipset/ip_set.h
@@ -186,6 +186,8 @@ struct ip_set_type_variant {
/* Return true if "b" set is the same as "a"
* according to the create set parameters */
bool (*same_set)(const struct ip_set *a, const struct ip_set *b);
+ /* Cancel ongoing garbage collectors before destroying the set*/
+ void (*cancel_gc)(struct ip_set *set);
/* Region-locking is used */
bool region_lock;
};
@@ -242,6 +244,8 @@ extern void ip_set_type_unregister(struct ip_set_type *set_type);

/* A generic IP set */
struct ip_set {
+ /* For call_cru in destroy */
+ struct rcu_head rcu;
/* The name of the set */
char name[IPSET_MAXNAMELEN];
/* Lock protecting the set data */
diff --git a/include/linux/ptrace.h b/include/linux/ptrace.h
index eaaef3ffec22..90507d4afcd6 100644
--- a/include/linux/ptrace.h
+++ b/include/linux/ptrace.h
@@ -393,6 +393,10 @@ static inline void user_single_step_report(struct pt_regs *regs)
#define current_user_stack_pointer() user_stack_pointer(current_pt_regs())
#endif

+#ifndef exception_ip
+#define exception_ip(x) instruction_pointer(x)
+#endif
+
extern int task_current_syscall(struct task_struct *target, struct syscall_info *info);

extern void sigaction_compat_abi(struct k_sigaction *act, struct k_sigaction *oact);
diff --git a/include/linux/serial_core.h b/include/linux/serial_core.h
index bbbafc0feb5b..27a26092493a 100644
--- a/include/linux/serial_core.h
+++ b/include/linux/serial_core.h
@@ -748,8 +748,17 @@ struct uart_driver {

void uart_write_wakeup(struct uart_port *port);

-#define __uart_port_tx(uport, ch, tx_ready, put_char, tx_done, for_test, \
- for_post) \
+/**
+ * enum UART_TX_FLAGS -- flags for uart_port_tx_flags()
+ *
+ * @UART_TX_NOSTOP: don't call port->ops->stop_tx() on empty buffer
+ */
+enum UART_TX_FLAGS {
+ UART_TX_NOSTOP = BIT(0),
+};
+
+#define __uart_port_tx(uport, ch, flags, tx_ready, put_char, tx_done, \
+ for_test, for_post) \
({ \
struct uart_port *__port = (uport); \
struct circ_buf *xmit = &__port->state->xmit; \
@@ -777,7 +786,7 @@ void uart_write_wakeup(struct uart_port *port);
if (pending < WAKEUP_CHARS) { \
uart_write_wakeup(__port); \
\
- if (pending == 0) \
+ if (!((flags) & UART_TX_NOSTOP) && pending == 0) \
__port->ops->stop_tx(__port); \
} \
\
@@ -812,7 +821,7 @@ void uart_write_wakeup(struct uart_port *port);
*/
#define uart_port_tx_limited(port, ch, count, tx_ready, put_char, tx_done) ({ \
unsigned int __count = (count); \
- __uart_port_tx(port, ch, tx_ready, put_char, tx_done, __count, \
+ __uart_port_tx(port, ch, 0, tx_ready, put_char, tx_done, __count, \
__count--); \
})

@@ -826,8 +835,21 @@ void uart_write_wakeup(struct uart_port *port);
* See uart_port_tx_limited() for more details.
*/
#define uart_port_tx(port, ch, tx_ready, put_char) \
- __uart_port_tx(port, ch, tx_ready, put_char, ({}), true, ({}))
+ __uart_port_tx(port, ch, 0, tx_ready, put_char, ({}), true, ({}))
+

+/**
+ * uart_port_tx_flags -- transmit helper for uart_port with flags
+ * @port: uart port
+ * @ch: variable to store a character to be written to the HW
+ * @flags: %UART_TX_NOSTOP or similar
+ * @tx_ready: can HW accept more data function
+ * @put_char: function to write a character
+ *
+ * See uart_port_tx_limited() for more details.
+ */
+#define uart_port_tx_flags(port, ch, flags, tx_ready, put_char) \
+ __uart_port_tx(port, ch, flags, tx_ready, put_char, ({}), true, ({}))
/*
* Baud rate helpers.
*/
diff --git a/include/linux/trace_events.h b/include/linux/trace_events.h
index cf9f0c61796e..696f8dc4aa53 100644
--- a/include/linux/trace_events.h
+++ b/include/linux/trace_events.h
@@ -652,7 +652,7 @@ struct trace_event_file {
struct list_head list;
struct trace_event_call *event_call;
struct event_filter __rcu *filter;
- struct eventfs_file *ef;
+ struct eventfs_inode *ei;
struct trace_array *tr;
struct trace_subsystem_dir *system;
struct list_head triggers;
diff --git a/include/linux/tracefs.h b/include/linux/tracefs.h
index 009072792fa3..7a5fe17b6bf9 100644
--- a/include/linux/tracefs.h
+++ b/include/linux/tracefs.h
@@ -23,26 +23,69 @@ struct file_operations;

struct eventfs_file;

-struct dentry *eventfs_create_events_dir(const char *name,
- struct dentry *parent);
-
-struct eventfs_file *eventfs_add_subsystem_dir(const char *name,
- struct dentry *parent);
+/**
+ * eventfs_callback - A callback function to create dynamic files in eventfs
+ * @name: The name of the file that is to be created
+ * @mode: return the file mode for the file (RW access, etc)
+ * @data: data to pass to the created file ops
+ * @fops: the file operations of the created file
+ *
+ * The evetnfs files are dynamically created. The struct eventfs_entry array
+ * is passed to eventfs_create_dir() or eventfs_create_events_dir() that will
+ * be used to create the files within those directories. When a lookup
+ * or access to a file within the directory is made, the struct eventfs_entry
+ * array is used to find a callback() with the matching name that is being
+ * referenced (for lookups, the entire array is iterated and each callback
+ * will be called).
+ *
+ * The callback will be called with @name for the name of the file to create.
+ * The callback can return less than 1 to indicate that no file should be
+ * created.
+ *
+ * If a file is to be created, then @mode should be populated with the file
+ * mode (permissions) for which the file is created for. This would be
+ * used to set the created inode i_mode field.
+ *
+ * The @data should be set to the data passed to the other file operations
+ * (read, write, etc). Note, @data will also point to the data passed in
+ * to eventfs_create_dir() or eventfs_create_events_dir(), but the callback
+ * can replace the data if it chooses to. Otherwise, the original data
+ * will be used for the file operation functions.
+ *
+ * The @fops should be set to the file operations that will be used to create
+ * the inode.
+ *
+ * NB. This callback is called while holding internal locks of the eventfs
+ * system. The callback must not call any code that might also call into
+ * the tracefs or eventfs system or it will risk creating a deadlock.
+ */
+typedef int (*eventfs_callback)(const char *name, umode_t *mode, void **data,
+ const struct file_operations **fops);

-struct eventfs_file *eventfs_add_dir(const char *name,
- struct eventfs_file *ef_parent);
+/**
+ * struct eventfs_entry - dynamically created eventfs file call back handler
+ * @name: Then name of the dynamic file in an eventfs directory
+ * @callback: The callback to get the fops of the file when it is created
+ *
+ * See evenfs_callback() typedef for how to set up @callback.
+ */
+struct eventfs_entry {
+ const char *name;
+ eventfs_callback callback;
+};

-int eventfs_add_file(const char *name, umode_t mode,
- struct eventfs_file *ef_parent, void *data,
- const struct file_operations *fops);
+struct eventfs_inode;

-int eventfs_add_events_file(const char *name, umode_t mode,
- struct dentry *parent, void *data,
- const struct file_operations *fops);
+struct eventfs_inode *eventfs_create_events_dir(const char *name, struct dentry *parent,
+ const struct eventfs_entry *entries,
+ int size, void *data);

-void eventfs_remove(struct eventfs_file *ef);
+struct eventfs_inode *eventfs_create_dir(const char *name, struct eventfs_inode *parent,
+ const struct eventfs_entry *entries,
+ int size, void *data);

-void eventfs_remove_events_dir(struct dentry *dentry);
+void eventfs_remove_events_dir(struct eventfs_inode *ei);
+void eventfs_remove_dir(struct eventfs_inode *ei);

struct dentry *tracefs_create_file(const char *name, umode_t mode,
struct dentry *parent, void *data,
diff --git a/include/net/tls.h b/include/net/tls.h
index a2b44578dcb7..5fdd5dd251df 100644
--- a/include/net/tls.h
+++ b/include/net/tls.h
@@ -96,9 +96,6 @@ struct tls_sw_context_tx {
struct tls_rec *open_rec;
struct list_head tx_list;
atomic_t encrypt_pending;
- /* protect crypto_wait with encrypt_pending */
- spinlock_t encrypt_compl_lock;
- int async_notify;
u8 async_capable:1;

#define BIT_TX_SCHEDULED 0
@@ -135,8 +132,6 @@ struct tls_sw_context_rx {
struct tls_strparser strp;

atomic_t decrypt_pending;
- /* protect crypto_wait with decrypt_pending*/
- spinlock_t decrypt_compl_lock;
struct sk_buff_head async_hold;
struct wait_queue_head wq;
};
diff --git a/include/sound/tas2781.h b/include/sound/tas2781.h
index a6c808b22318..475294c853aa 100644
--- a/include/sound/tas2781.h
+++ b/include/sound/tas2781.h
@@ -135,6 +135,7 @@ struct tasdevice_priv {

void tas2781_reset(struct tasdevice_priv *tas_dev);
int tascodec_init(struct tasdevice_priv *tas_priv, void *codec,
+ struct module *module,
void (*cont)(const struct firmware *fw, void *context));
struct tasdevice_priv *tasdevice_kzalloc(struct i2c_client *i2c);
int tasdevice_init(struct tasdevice_priv *tas_priv);
diff --git a/init/Kconfig b/init/Kconfig
index 6d35728b94b2..18fece8fe085 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -89,6 +89,15 @@ config CC_HAS_ASM_GOTO_TIED_OUTPUT
# Detect buggy gcc and clang, fixed in gcc-11 clang-14.
def_bool $(success,echo 'int foo(int *x) { asm goto (".long (%l[bar]) - .": "+m"(*x) ::: bar); return *x; bar: return 0; }' | $CC -x c - -c -o /dev/null)

+config GCC_ASM_GOTO_OUTPUT_WORKAROUND
+ bool
+ depends on CC_IS_GCC && CC_HAS_ASM_GOTO_OUTPUT
+ # Fixed in GCC 14, 13.3, 12.4 and 11.5
+ # https://gcc.gnu.org/bugzilla/show_bug.cgi?id=113921
+ default y if GCC_VERSION < 110500
+ default y if GCC_VERSION >= 120000 && GCC_VERSION < 120400
+ default y if GCC_VERSION >= 130000 && GCC_VERSION < 130300
+
config TOOLS_SUPPORT_RELR
def_bool $(success,env "CC=$(CC)" "LD=$(LD)" "NM=$(NM)" "OBJCOPY=$(OBJCOPY)" $(srctree)/scripts/tools-support-relr.sh)

diff --git a/io_uring/net.c b/io_uring/net.c
index 43bc9a5f96f9..161622029147 100644
--- a/io_uring/net.c
+++ b/io_uring/net.c
@@ -1372,7 +1372,7 @@ int io_accept(struct io_kiocb *req, unsigned int issue_flags)
* has already been done
*/
if (issue_flags & IO_URING_F_MULTISHOT)
- ret = IOU_ISSUE_SKIP_COMPLETE;
+ return IOU_ISSUE_SKIP_COMPLETE;
return ret;
}
if (ret == -ERESTARTSYS)
@@ -1397,7 +1397,8 @@ int io_accept(struct io_kiocb *req, unsigned int issue_flags)
ret, IORING_CQE_F_MORE))
goto retry;

- return -ECANCELED;
+ io_req_set_res(req, ret, 0);
+ return IOU_STOP_MULTISHOT;
}

int io_socket_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
diff --git a/kernel/sched/membarrier.c b/kernel/sched/membarrier.c
index 2ad881d07752..4e715b9b278e 100644
--- a/kernel/sched/membarrier.c
+++ b/kernel/sched/membarrier.c
@@ -162,6 +162,9 @@
| MEMBARRIER_PRIVATE_EXPEDITED_RSEQ_BITMASK \
| MEMBARRIER_CMD_GET_REGISTRATIONS)

+static DEFINE_MUTEX(membarrier_ipi_mutex);
+#define SERIALIZE_IPI() guard(mutex)(&membarrier_ipi_mutex)
+
static void ipi_mb(void *info)
{
smp_mb(); /* IPIs should be serializing but paranoid. */
@@ -259,6 +262,7 @@ static int membarrier_global_expedited(void)
if (!zalloc_cpumask_var(&tmpmask, GFP_KERNEL))
return -ENOMEM;

+ SERIALIZE_IPI();
cpus_read_lock();
rcu_read_lock();
for_each_online_cpu(cpu) {
@@ -347,6 +351,7 @@ static int membarrier_private_expedited(int flags, int cpu_id)
if (cpu_id < 0 && !zalloc_cpumask_var(&tmpmask, GFP_KERNEL))
return -ENOMEM;

+ SERIALIZE_IPI();
cpus_read_lock();

if (cpu_id >= 0) {
@@ -460,6 +465,7 @@ static int sync_runqueues_membarrier_state(struct mm_struct *mm)
* between threads which are users of @mm has its membarrier state
* updated.
*/
+ SERIALIZE_IPI();
cpus_read_lock();
rcu_read_lock();
for_each_online_cpu(cpu) {
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index b01ae7d36021..83ba342aef31 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -5325,7 +5325,17 @@ static LIST_HEAD(ftrace_direct_funcs);

static int register_ftrace_function_nolock(struct ftrace_ops *ops);

+/*
+ * If there are multiple ftrace_ops, use SAVE_REGS by default, so that direct
+ * call will be jumped from ftrace_regs_caller. Only if the architecture does
+ * not support ftrace_regs_caller but direct_call, use SAVE_ARGS so that it
+ * jumps from ftrace_caller for multiple ftrace_ops.
+ */
+#ifndef CONFIG_HAVE_DYNAMIC_FTRACE_WITH_REGS
#define MULTI_FLAGS (FTRACE_OPS_FL_DIRECT | FTRACE_OPS_FL_SAVE_ARGS)
+#else
+#define MULTI_FLAGS (FTRACE_OPS_FL_DIRECT | FTRACE_OPS_FL_SAVE_REGS)
+#endif

static int check_direct_multi(struct ftrace_ops *ops)
{
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index f232cf56fa05..1ac6637895a4 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -1091,7 +1091,7 @@ __poll_t ring_buffer_poll_wait(struct trace_buffer *buffer, int cpu,
full = 0;
} else {
if (!cpumask_test_cpu(cpu, buffer->cpumask))
- return -EINVAL;
+ return EPOLLERR;

cpu_buffer = buffer->buffers[cpu];
work = &cpu_buffer->irq_work;
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index fc00356a5a0a..82e28777cacf 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -39,6 +39,7 @@
#include <linux/ctype.h>
#include <linux/init.h>
#include <linux/panic_notifier.h>
+#include <linux/kmemleak.h>
#include <linux/poll.h>
#include <linux/nmi.h>
#include <linux/fs.h>
@@ -2311,7 +2312,7 @@ struct saved_cmdlines_buffer {
unsigned *map_cmdline_to_pid;
unsigned cmdline_num;
int cmdline_idx;
- char *saved_cmdlines;
+ char saved_cmdlines[];
};
static struct saved_cmdlines_buffer *savedcmd;

@@ -2325,47 +2326,60 @@ static inline void set_cmdline(int idx, const char *cmdline)
strncpy(get_saved_cmdlines(idx), cmdline, TASK_COMM_LEN);
}

-static int allocate_cmdlines_buffer(unsigned int val,
- struct saved_cmdlines_buffer *s)
+static void free_saved_cmdlines_buffer(struct saved_cmdlines_buffer *s)
+{
+ int order = get_order(sizeof(*s) + s->cmdline_num * TASK_COMM_LEN);
+
+ kfree(s->map_cmdline_to_pid);
+ kmemleak_free(s);
+ free_pages((unsigned long)s, order);
+}
+
+static struct saved_cmdlines_buffer *allocate_cmdlines_buffer(unsigned int val)
{
+ struct saved_cmdlines_buffer *s;
+ struct page *page;
+ int orig_size, size;
+ int order;
+
+ /* Figure out how much is needed to hold the given number of cmdlines */
+ orig_size = sizeof(*s) + val * TASK_COMM_LEN;
+ order = get_order(orig_size);
+ size = 1 << (order + PAGE_SHIFT);
+ page = alloc_pages(GFP_KERNEL, order);
+ if (!page)
+ return NULL;
+
+ s = page_address(page);
+ kmemleak_alloc(s, size, 1, GFP_KERNEL);
+ memset(s, 0, sizeof(*s));
+
+ /* Round up to actual allocation */
+ val = (size - sizeof(*s)) / TASK_COMM_LEN;
+ s->cmdline_num = val;
+
s->map_cmdline_to_pid = kmalloc_array(val,
sizeof(*s->map_cmdline_to_pid),
GFP_KERNEL);
- if (!s->map_cmdline_to_pid)
- return -ENOMEM;
-
- s->saved_cmdlines = kmalloc_array(TASK_COMM_LEN, val, GFP_KERNEL);
- if (!s->saved_cmdlines) {
- kfree(s->map_cmdline_to_pid);
- return -ENOMEM;
+ if (!s->map_cmdline_to_pid) {
+ free_saved_cmdlines_buffer(s);
+ return NULL;
}

s->cmdline_idx = 0;
- s->cmdline_num = val;
memset(&s->map_pid_to_cmdline, NO_CMDLINE_MAP,
sizeof(s->map_pid_to_cmdline));
memset(s->map_cmdline_to_pid, NO_CMDLINE_MAP,
val * sizeof(*s->map_cmdline_to_pid));

- return 0;
+ return s;
}

static int trace_create_savedcmd(void)
{
- int ret;
+ savedcmd = allocate_cmdlines_buffer(SAVED_CMDLINES_DEFAULT);

- savedcmd = kmalloc(sizeof(*savedcmd), GFP_KERNEL);
- if (!savedcmd)
- return -ENOMEM;
-
- ret = allocate_cmdlines_buffer(SAVED_CMDLINES_DEFAULT, savedcmd);
- if (ret < 0) {
- kfree(savedcmd);
- savedcmd = NULL;
- return -ENOMEM;
- }
-
- return 0;
+ return savedcmd ? 0 : -ENOMEM;
}

int is_tracing_stopped(void)
@@ -6056,26 +6070,14 @@ tracing_saved_cmdlines_size_read(struct file *filp, char __user *ubuf,
return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
}

-static void free_saved_cmdlines_buffer(struct saved_cmdlines_buffer *s)
-{
- kfree(s->saved_cmdlines);
- kfree(s->map_cmdline_to_pid);
- kfree(s);
-}
-
static int tracing_resize_saved_cmdlines(unsigned int val)
{
struct saved_cmdlines_buffer *s, *savedcmd_temp;

- s = kmalloc(sizeof(*s), GFP_KERNEL);
+ s = allocate_cmdlines_buffer(val);
if (!s)
return -ENOMEM;

- if (allocate_cmdlines_buffer(val, s) < 0) {
- kfree(s);
- return -ENOMEM;
- }
-
preempt_disable();
arch_spin_lock(&trace_cmdline_lock);
savedcmd_temp = savedcmd;
@@ -9758,7 +9760,6 @@ static __init void create_trace_instances(struct dentry *d_tracer)
static void
init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer)
{
- struct trace_event_file *file;
int cpu;

trace_create_file("available_tracers", TRACE_MODE_READ, d_tracer,
@@ -9791,11 +9792,7 @@ init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer)
trace_create_file("trace_marker", 0220, d_tracer,
tr, &tracing_mark_fops);

- file = __find_event_file(tr, "ftrace", "print");
- if (file && file->ef)
- eventfs_add_file("trigger", TRACE_MODE_WRITE, file->ef,
- file, &event_trigger_fops);
- tr->trace_marker_file = file;
+ tr->trace_marker_file = __find_event_file(tr, "ftrace", "print");

trace_create_file("trace_marker_raw", 0220, d_tracer,
tr, &tracing_mark_raw_fops);
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index 51c0a970339e..02b727a54648 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -381,7 +381,7 @@ struct trace_array {
struct dentry *dir;
struct dentry *options;
struct dentry *percpu_dir;
- struct dentry *event_dir;
+ struct eventfs_inode *event_dir;
struct trace_options *topts;
struct list_head systems;
struct list_head events;
@@ -1345,7 +1345,7 @@ struct trace_subsystem_dir {
struct list_head list;
struct event_subsystem *subsystem;
struct trace_array *tr;
- struct eventfs_file *ef;
+ struct eventfs_inode *ei;
int ref_count;
int nr_events;
};
diff --git a/kernel/trace/trace_btf.c b/kernel/trace/trace_btf.c
index ca224d53bfdc..5bbdbcbbde3c 100644
--- a/kernel/trace/trace_btf.c
+++ b/kernel/trace/trace_btf.c
@@ -91,8 +91,8 @@ const struct btf_member *btf_find_struct_member(struct btf *btf,
for_each_member(i, type, member) {
if (!member->name_off) {
/* Anonymous union/struct: push it for later use */
- type = btf_type_skip_modifiers(btf, member->type, &tid);
- if (type && top < BTF_ANON_STACK_MAX) {
+ if (btf_type_skip_modifiers(btf, member->type, &tid) &&
+ top < BTF_ANON_STACK_MAX) {
anon_stack[top].tid = tid;
anon_stack[top++].offset =
cur_offset + member->offset;
diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c
index 82cb22ad6d61..941a394d3911 100644
--- a/kernel/trace/trace_events.c
+++ b/kernel/trace/trace_events.c
@@ -984,7 +984,7 @@ static void remove_subsystem(struct trace_subsystem_dir *dir)
return;

if (!--dir->nr_events) {
- eventfs_remove(dir->ef);
+ eventfs_remove_dir(dir->ei);
list_del(&dir->list);
__put_system_dir(dir);
}
@@ -1013,7 +1013,7 @@ void event_file_put(struct trace_event_file *file)

static void remove_event_file_dir(struct trace_event_file *file)
{
- eventfs_remove(file->ef);
+ eventfs_remove_dir(file->ei);
list_del(&file->list);
remove_subsystem(file->system);
free_event_filter(file->filter);
@@ -2302,14 +2302,40 @@ create_new_subsystem(const char *name)
return NULL;
}

-static struct eventfs_file *
+static int system_callback(const char *name, umode_t *mode, void **data,
+ const struct file_operations **fops)
+{
+ if (strcmp(name, "filter") == 0)
+ *fops = &ftrace_subsystem_filter_fops;
+
+ else if (strcmp(name, "enable") == 0)
+ *fops = &ftrace_system_enable_fops;
+
+ else
+ return 0;
+
+ *mode = TRACE_MODE_WRITE;
+ return 1;
+}
+
+static struct eventfs_inode *
event_subsystem_dir(struct trace_array *tr, const char *name,
- struct trace_event_file *file, struct dentry *parent)
+ struct trace_event_file *file, struct eventfs_inode *parent)
{
struct event_subsystem *system, *iter;
struct trace_subsystem_dir *dir;
- struct eventfs_file *ef;
- int res;
+ struct eventfs_inode *ei;
+ int nr_entries;
+ static struct eventfs_entry system_entries[] = {
+ {
+ .name = "filter",
+ .callback = system_callback,
+ },
+ {
+ .name = "enable",
+ .callback = system_callback,
+ }
+ };

/* First see if we did not already create this dir */
list_for_each_entry(dir, &tr->systems, list) {
@@ -2317,7 +2343,7 @@ event_subsystem_dir(struct trace_array *tr, const char *name,
if (strcmp(system->name, name) == 0) {
dir->nr_events++;
file->system = dir;
- return dir->ef;
+ return dir->ei;
}
}

@@ -2341,39 +2367,29 @@ event_subsystem_dir(struct trace_array *tr, const char *name,
} else
__get_system(system);

- ef = eventfs_add_subsystem_dir(name, parent);
- if (IS_ERR(ef)) {
+ /* ftrace only has directories no files */
+ if (strcmp(name, "ftrace") == 0)
+ nr_entries = 0;
+ else
+ nr_entries = ARRAY_SIZE(system_entries);
+
+ ei = eventfs_create_dir(name, parent, system_entries, nr_entries, dir);
+ if (IS_ERR(ei)) {
pr_warn("Failed to create system directory %s\n", name);
__put_system(system);
goto out_free;
}

- dir->ef = ef;
+ dir->ei = ei;
dir->tr = tr;
dir->ref_count = 1;
dir->nr_events = 1;
dir->subsystem = system;
file->system = dir;

- /* the ftrace system is special, do not create enable or filter files */
- if (strcmp(name, "ftrace") != 0) {
-
- res = eventfs_add_file("filter", TRACE_MODE_WRITE,
- dir->ef, dir,
- &ftrace_subsystem_filter_fops);
- if (res) {
- kfree(system->filter);
- system->filter = NULL;
- pr_warn("Could not create tracefs '%s/filter' entry\n", name);
- }
-
- eventfs_add_file("enable", TRACE_MODE_WRITE, dir->ef, dir,
- &ftrace_system_enable_fops);
- }
-
list_add(&dir->list, &tr->systems);

- return dir->ef;
+ return dir->ei;

out_free:
kfree(dir);
@@ -2422,15 +2438,134 @@ event_define_fields(struct trace_event_call *call)
return ret;
}

+static int event_callback(const char *name, umode_t *mode, void **data,
+ const struct file_operations **fops)
+{
+ struct trace_event_file *file = *data;
+ struct trace_event_call *call = file->event_call;
+
+ if (strcmp(name, "format") == 0) {
+ *mode = TRACE_MODE_READ;
+ *fops = &ftrace_event_format_fops;
+ *data = call;
+ return 1;
+ }
+
+ /*
+ * Only event directories that can be enabled should have
+ * triggers or filters, with the exception of the "print"
+ * event that can have a "trigger" file.
+ */
+ if (!(call->flags & TRACE_EVENT_FL_IGNORE_ENABLE)) {
+ if (call->class->reg && strcmp(name, "enable") == 0) {
+ *mode = TRACE_MODE_WRITE;
+ *fops = &ftrace_enable_fops;
+ return 1;
+ }
+
+ if (strcmp(name, "filter") == 0) {
+ *mode = TRACE_MODE_WRITE;
+ *fops = &ftrace_event_filter_fops;
+ return 1;
+ }
+ }
+
+ if (!(call->flags & TRACE_EVENT_FL_IGNORE_ENABLE) ||
+ strcmp(trace_event_name(call), "print") == 0) {
+ if (strcmp(name, "trigger") == 0) {
+ *mode = TRACE_MODE_WRITE;
+ *fops = &event_trigger_fops;
+ return 1;
+ }
+ }
+
+#ifdef CONFIG_PERF_EVENTS
+ if (call->event.type && call->class->reg &&
+ strcmp(name, "id") == 0) {
+ *mode = TRACE_MODE_READ;
+ *data = (void *)(long)call->event.type;
+ *fops = &ftrace_event_id_fops;
+ return 1;
+ }
+#endif
+
+#ifdef CONFIG_HIST_TRIGGERS
+ if (strcmp(name, "hist") == 0) {
+ *mode = TRACE_MODE_READ;
+ *fops = &event_hist_fops;
+ return 1;
+ }
+#endif
+#ifdef CONFIG_HIST_TRIGGERS_DEBUG
+ if (strcmp(name, "hist_debug") == 0) {
+ *mode = TRACE_MODE_READ;
+ *fops = &event_hist_debug_fops;
+ return 1;
+ }
+#endif
+#ifdef CONFIG_TRACE_EVENT_INJECT
+ if (call->event.type && call->class->reg &&
+ strcmp(name, "inject") == 0) {
+ *mode = 0200;
+ *fops = &event_inject_fops;
+ return 1;
+ }
+#endif
+ return 0;
+}
+
static int
-event_create_dir(struct dentry *parent, struct trace_event_file *file)
+event_create_dir(struct eventfs_inode *parent, struct trace_event_file *file)
{
struct trace_event_call *call = file->event_call;
- struct eventfs_file *ef_subsystem = NULL;
struct trace_array *tr = file->tr;
- struct eventfs_file *ef;
+ struct eventfs_inode *e_events;
+ struct eventfs_inode *ei;
const char *name;
+ int nr_entries;
int ret;
+ static struct eventfs_entry event_entries[] = {
+ {
+ .name = "enable",
+ .callback = event_callback,
+ },
+ {
+ .name = "filter",
+ .callback = event_callback,
+ },
+ {
+ .name = "trigger",
+ .callback = event_callback,
+ },
+ {
+ .name = "format",
+ .callback = event_callback,
+ },
+#ifdef CONFIG_PERF_EVENTS
+ {
+ .name = "id",
+ .callback = event_callback,
+ },
+#endif
+#ifdef CONFIG_HIST_TRIGGERS
+ {
+ .name = "hist",
+ .callback = event_callback,
+ },
+#endif
+#ifdef CONFIG_HIST_TRIGGERS_DEBUG
+ {
+ .name = "hist_debug",
+ .callback = event_callback,
+ },
+#endif
+#ifdef CONFIG_TRACE_EVENT_INJECT
+ {
+ .name = "inject",
+ .callback = event_callback,
+ },
+#endif
+ };

/*
* If the trace point header did not define TRACE_SYSTEM
@@ -2440,29 +2575,20 @@ event_create_dir(struct dentry *parent, struct trace_event_file *file)
if (WARN_ON_ONCE(strcmp(call->class->system, TRACE_SYSTEM) == 0))
return -ENODEV;

- ef_subsystem = event_subsystem_dir(tr, call->class->system, file, parent);
- if (!ef_subsystem)
+ e_events = event_subsystem_dir(tr, call->class->system, file, parent);
+ if (!e_events)
return -ENOMEM;

+ nr_entries = ARRAY_SIZE(event_entries);
+
name = trace_event_name(call);
- ef = eventfs_add_dir(name, ef_subsystem);
- if (IS_ERR(ef)) {
+ ei = eventfs_create_dir(name, e_events, event_entries, nr_entries, file);
+ if (IS_ERR(ei)) {
pr_warn("Could not create tracefs '%s' directory\n", name);
return -1;
}

- file->ef = ef;
-
- if (call->class->reg && !(call->flags & TRACE_EVENT_FL_IGNORE_ENABLE))
- eventfs_add_file("enable", TRACE_MODE_WRITE, file->ef, file,
- &ftrace_enable_fops);
-
-#ifdef CONFIG_PERF_EVENTS
- if (call->event.type && call->class->reg)
- eventfs_add_file("id", TRACE_MODE_READ, file->ef,
- (void *)(long)call->event.type,
- &ftrace_event_id_fops);
-#endif
+ file->ei = ei;

ret = event_define_fields(call);
if (ret < 0) {
@@ -2470,35 +2596,6 @@ event_create_dir(struct dentry *parent, struct trace_event_file *file)
return ret;
}

- /*
- * Only event directories that can be enabled should have
- * triggers or filters.
- */
- if (!(call->flags & TRACE_EVENT_FL_IGNORE_ENABLE)) {
- eventfs_add_file("filter", TRACE_MODE_WRITE, file->ef,
- file, &ftrace_event_filter_fops);
-
- eventfs_add_file("trigger", TRACE_MODE_WRITE, file->ef,
- file, &event_trigger_fops);
- }
-
-#ifdef CONFIG_HIST_TRIGGERS
- eventfs_add_file("hist", TRACE_MODE_READ, file->ef, file,
- &event_hist_fops);
-#endif
-#ifdef CONFIG_HIST_TRIGGERS_DEBUG
- eventfs_add_file("hist_debug", TRACE_MODE_READ, file->ef, file,
- &event_hist_debug_fops);
-#endif
- eventfs_add_file("format", TRACE_MODE_READ, file->ef, call,
- &ftrace_event_format_fops);
-
-#ifdef CONFIG_TRACE_EVENT_INJECT
- if (call->event.type && call->class->reg)
- eventfs_add_file("inject", 0200, file->ef, file,
- &event_inject_fops);
-#endif
-
return 0;
}

@@ -3644,30 +3741,65 @@ static __init int setup_trace_event(char *str)
}
__setup("trace_event=", setup_trace_event);

+static int events_callback(const char *name, umode_t *mode, void **data,
+ const struct file_operations **fops)
+{
+ if (strcmp(name, "enable") == 0) {
+ *mode = TRACE_MODE_WRITE;
+ *fops = &ftrace_tr_enable_fops;
+ return 1;
+ }
+
+ if (strcmp(name, "header_page") == 0)
+ *data = ring_buffer_print_page_header;
+
+ else if (strcmp(name, "header_event") == 0)
+ *data = ring_buffer_print_entry_header;
+
+ else
+ return 0;
+
+ *mode = TRACE_MODE_READ;
+ *fops = &ftrace_show_header_fops;
+ return 1;
+}
+
/* Expects to have event_mutex held when called */
static int
create_event_toplevel_files(struct dentry *parent, struct trace_array *tr)
{
- struct dentry *d_events;
+ struct eventfs_inode *e_events;
struct dentry *entry;
- int error = 0;
+ int nr_entries;
+ static struct eventfs_entry events_entries[] = {
+ {
+ .name = "enable",
+ .callback = events_callback,
+ },
+ {
+ .name = "header_page",
+ .callback = events_callback,
+ },
+ {
+ .name = "header_event",
+ .callback = events_callback,
+ },
+ };

entry = trace_create_file("set_event", TRACE_MODE_WRITE, parent,
tr, &ftrace_set_event_fops);
if (!entry)
return -ENOMEM;

- d_events = eventfs_create_events_dir("events", parent);
- if (IS_ERR(d_events)) {
+ nr_entries = ARRAY_SIZE(events_entries);
+
+ e_events = eventfs_create_events_dir("events", parent, events_entries,
+ nr_entries, tr);
+ if (IS_ERR(e_events)) {
pr_warn("Could not create tracefs 'events' directory\n");
return -ENOMEM;
}

- error = eventfs_add_events_file("enable", TRACE_MODE_WRITE, d_events,
- tr, &ftrace_tr_enable_fops);
- if (error)
- return -ENOMEM;
-
/* There are not as crucial, just warn if they are not created */

trace_create_file("set_event_pid", TRACE_MODE_WRITE, parent,
@@ -3677,16 +3809,7 @@ create_event_toplevel_files(struct dentry *parent, struct trace_array *tr)
TRACE_MODE_WRITE, parent, tr,
&ftrace_set_event_notrace_pid_fops);

- /* ring buffer internal formats */
- eventfs_add_events_file("header_page", TRACE_MODE_READ, d_events,
- ring_buffer_print_page_header,
- &ftrace_show_header_fops);
-
- eventfs_add_events_file("header_event", TRACE_MODE_READ, d_events,
- ring_buffer_print_entry_header,
- &ftrace_show_header_fops);
-
- tr->event_dir = d_events;
+ tr->event_dir = e_events;

return 0;
}
diff --git a/kernel/trace/trace_events_synth.c b/kernel/trace/trace_events_synth.c
index 846e02c0fb59..624e0867316d 100644
--- a/kernel/trace/trace_events_synth.c
+++ b/kernel/trace/trace_events_synth.c
@@ -441,8 +441,9 @@ static unsigned int trace_string(struct synth_trace_event *entry,
if (is_dynamic) {
union trace_synth_field *data = &entry->fields[*n_u64];

+ len = fetch_store_strlen((unsigned long)str_val);
data->as_dynamic.offset = struct_size(entry, fields, event->n_u64) + data_size;
- data->as_dynamic.len = fetch_store_strlen((unsigned long)str_val);
+ data->as_dynamic.len = len;

ret = fetch_store_string((unsigned long)str_val, &entry->fields[*n_u64], entry);

diff --git a/kernel/trace/trace_events_trigger.c b/kernel/trace/trace_events_trigger.c
index 46439e3bcec4..b33c3861fbbb 100644
--- a/kernel/trace/trace_events_trigger.c
+++ b/kernel/trace/trace_events_trigger.c
@@ -1470,8 +1470,10 @@ register_snapshot_trigger(char *glob,
struct event_trigger_data *data,
struct trace_event_file *file)
{
- if (tracing_alloc_snapshot_instance(file->tr) != 0)
- return 0;
+ int ret = tracing_alloc_snapshot_instance(file->tr);
+
+ if (ret < 0)
+ return ret;

return register_trigger(glob, data, file);
}
diff --git a/kernel/trace/trace_osnoise.c b/kernel/trace/trace_osnoise.c
index bd0d01d00fb9..a8e28f9b9271 100644
--- a/kernel/trace/trace_osnoise.c
+++ b/kernel/trace/trace_osnoise.c
@@ -2444,6 +2444,9 @@ static int timerlat_fd_open(struct inode *inode, struct file *file)
tlat = this_cpu_tmr_var();
tlat->count = 0;

+ hrtimer_init(&tlat->timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS_PINNED_HARD);
+ tlat->timer.function = timerlat_irq;
+
migrate_enable();
return 0;
};
@@ -2526,9 +2529,6 @@ timerlat_fd_read(struct file *file, char __user *ubuf, size_t count,
tlat->tracing_thread = false;
tlat->kthread = current;

- hrtimer_init(&tlat->timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS_PINNED_HARD);
- tlat->timer.function = timerlat_irq;
-
/* Annotate now to drift new period */
tlat->abs_period = hrtimer_cb_get_time(&tlat->timer);

diff --git a/kernel/trace/trace_probe.c b/kernel/trace/trace_probe.c
index 4dc74d73fc1d..34289f9c6707 100644
--- a/kernel/trace/trace_probe.c
+++ b/kernel/trace/trace_probe.c
@@ -1159,9 +1159,12 @@ static int traceprobe_parse_probe_arg_body(const char *argv, ssize_t *size,
if (!(ctx->flags & TPARG_FL_TEVENT) &&
(strcmp(arg, "$comm") == 0 || strcmp(arg, "$COMM") == 0 ||
strncmp(arg, "\\\"", 2) == 0)) {
- /* The type of $comm must be "string", and not an array. */
- if (parg->count || (t && strcmp(t, "string")))
+ /* The type of $comm must be "string", and not an array type. */
+ if (parg->count || (t && strcmp(t, "string"))) {
+ trace_probe_log_err(ctx->offset + (t ? (t - arg) : 0),
+ NEED_STRING_TYPE);
goto out;
+ }
parg->type = find_fetch_type("string", ctx->flags);
} else
parg->type = find_fetch_type(t, ctx->flags);
@@ -1169,18 +1172,6 @@ static int traceprobe_parse_probe_arg_body(const char *argv, ssize_t *size,
trace_probe_log_err(ctx->offset + (t ? (t - arg) : 0), BAD_TYPE);
goto out;
}
- parg->offset = *size;
- *size += parg->type->size * (parg->count ?: 1);
-
- ret = -ENOMEM;
- if (parg->count) {
- len = strlen(parg->type->fmttype) + 6;
- parg->fmt = kmalloc(len, GFP_KERNEL);
- if (!parg->fmt)
- goto out;
- snprintf(parg->fmt, len, "%s[%d]", parg->type->fmttype,
- parg->count);
- }

code = tmp = kcalloc(FETCH_INSN_MAX, sizeof(*code), GFP_KERNEL);
if (!code)
@@ -1204,6 +1195,19 @@ static int traceprobe_parse_probe_arg_body(const char *argv, ssize_t *size,
goto fail;
}
}
+ parg->offset = *size;
+ *size += parg->type->size * (parg->count ?: 1);
+
+ if (parg->count) {
+ len = strlen(parg->type->fmttype) + 6;
+ parg->fmt = kmalloc(len, GFP_KERNEL);
+ if (!parg->fmt) {
+ ret = -ENOMEM;
+ goto out;
+ }
+ snprintf(parg->fmt, len, "%s[%d]", parg->type->fmttype,
+ parg->count);
+ }

ret = -EINVAL;
/* Store operation */
diff --git a/kernel/trace/trace_probe.h b/kernel/trace/trace_probe.h
index 850d9ecb6765..c1877d018269 100644
--- a/kernel/trace/trace_probe.h
+++ b/kernel/trace/trace_probe.h
@@ -515,7 +515,8 @@ extern int traceprobe_define_arg_fields(struct trace_event_call *event_call,
C(BAD_HYPHEN, "Failed to parse single hyphen. Forgot '>'?"), \
C(NO_BTF_FIELD, "This field is not found."), \
C(BAD_BTF_TID, "Failed to get BTF type info."),\
- C(BAD_TYPE4STR, "This type does not fit for string."),
+ C(BAD_TYPE4STR, "This type does not fit for string."),\
+ C(NEED_STRING_TYPE, "$comm and immediate-string only accepts string type"),

#undef C
#define C(a, b) TP_ERR_##a
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index e6a95bb74e22..fd7b84b06d92 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -5793,13 +5793,9 @@ static int workqueue_apply_unbound_cpumask(const cpumask_var_t unbound_cpumask)
list_for_each_entry(wq, &workqueues, list) {
if (!(wq->flags & WQ_UNBOUND))
continue;
-
/* creating multiple pwqs breaks ordering guarantee */
- if (!list_empty(&wq->pwqs)) {
- if (wq->flags & __WQ_ORDERED_EXPLICIT)
- continue;
- wq->flags &= ~__WQ_ORDERED;
- }
+ if (wq->flags & __WQ_ORDERED)
+ continue;

ctx = apply_wqattrs_prepare(wq, wq->unbound_attrs, unbound_cpumask);
if (IS_ERR(ctx)) {
diff --git a/lib/kobject.c b/lib/kobject.c
index 59dbcbdb1c91..72fa20f405f1 100644
--- a/lib/kobject.c
+++ b/lib/kobject.c
@@ -74,10 +74,12 @@ static int create_dir(struct kobject *kobj)
if (error)
return error;

- error = sysfs_create_groups(kobj, ktype->default_groups);
- if (error) {
- sysfs_remove_dir(kobj);
- return error;
+ if (ktype) {
+ error = sysfs_create_groups(kobj, ktype->default_groups);
+ if (error) {
+ sysfs_remove_dir(kobj);
+ return error;
+ }
}

/*
@@ -589,7 +591,8 @@ static void __kobject_del(struct kobject *kobj)
sd = kobj->sd;
ktype = get_ktype(kobj);

- sysfs_remove_groups(kobj, ktype->default_groups);
+ if (ktype)
+ sysfs_remove_groups(kobj, ktype->default_groups);

/* send "remove" if the caller did not do it but sent "add" */
if (kobj->state_add_uevent_sent && !kobj->state_remove_uevent_sent) {
@@ -666,6 +669,10 @@ static void kobject_cleanup(struct kobject *kobj)
pr_debug("'%s' (%p): %s, parent %p\n",
kobject_name(kobj), kobj, __func__, kobj->parent);

+ if (t && !t->release)
+ pr_debug("'%s' (%p): does not have a release() function, it is broken and must be fixed. See Documentation/core-api/kobject.rst.\n",
+ kobject_name(kobj), kobj);
+
/* remove from sysfs if the caller did not do it */
if (kobj->state_in_sysfs) {
pr_debug("'%s' (%p): auto cleanup kobject_del\n",
@@ -676,13 +683,10 @@ static void kobject_cleanup(struct kobject *kobj)
parent = NULL;
}

- if (t->release) {
+ if (t && t->release) {
pr_debug("'%s' (%p): calling ktype release\n",
kobject_name(kobj), kobj);
t->release(kobj);
- } else {
- pr_debug("'%s' (%p): does not have a release() function, it is broken and must be fixed. See Documentation/core-api/kobject.rst.\n",
- kobject_name(kobj), kobj);
}

/* free name if we allocated it */
@@ -1056,7 +1060,7 @@ const struct kobj_ns_type_operations *kobj_child_ns_ops(const struct kobject *pa
{
const struct kobj_ns_type_operations *ops = NULL;

- if (parent && parent->ktype->child_ns_type)
+ if (parent && parent->ktype && parent->ktype->child_ns_type)
ops = parent->ktype->child_ns_type(parent);

return ops;
diff --git a/mm/backing-dev.c b/mm/backing-dev.c
index 1e3447bccdb1..e039d05304dd 100644
--- a/mm/backing-dev.c
+++ b/mm/backing-dev.c
@@ -436,7 +436,6 @@ static int wb_init(struct bdi_writeback *wb, struct backing_dev_info *bdi,
INIT_LIST_HEAD(&wb->work_list);
INIT_DELAYED_WORK(&wb->dwork, wb_workfn);
INIT_DELAYED_WORK(&wb->bw_dwork, wb_update_bandwidth_workfn);
- wb->dirty_sleep = jiffies;

err = fprop_local_init_percpu(&wb->completions, gfp);
if (err)
@@ -921,6 +920,7 @@ int bdi_init(struct backing_dev_info *bdi)
INIT_LIST_HEAD(&bdi->bdi_list);
INIT_LIST_HEAD(&bdi->wb_list);
init_waitqueue_head(&bdi->wb_waitq);
+ bdi->last_bdp_sleep = jiffies;

return cgwb_bdi_init(bdi);
}
diff --git a/mm/memory.c b/mm/memory.c
index dccf9203dd53..b3be18f1f120 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -5315,7 +5315,7 @@ static inline bool get_mmap_lock_carefully(struct mm_struct *mm, struct pt_regs
return true;

if (regs && !user_mode(regs)) {
- unsigned long ip = instruction_pointer(regs);
+ unsigned long ip = exception_ip(regs);
if (!search_exception_tables(ip))
return false;
}
@@ -5340,7 +5340,7 @@ static inline bool upgrade_mmap_lock_carefully(struct mm_struct *mm, struct pt_r
{
mmap_read_unlock(mm);
if (regs && !user_mode(regs)) {
- unsigned long ip = instruction_pointer(regs);
+ unsigned long ip = exception_ip(regs);
if (!search_exception_tables(ip))
return false;
}
diff --git a/mm/page-writeback.c b/mm/page-writeback.c
index 4656534b8f5c..a9303f886639 100644
--- a/mm/page-writeback.c
+++ b/mm/page-writeback.c
@@ -1638,7 +1638,7 @@ static inline void wb_dirty_limits(struct dirty_throttle_control *dtc)
*/
dtc->wb_thresh = __wb_calc_thresh(dtc);
dtc->wb_bg_thresh = dtc->thresh ?
- div_u64((u64)dtc->wb_thresh * dtc->bg_thresh, dtc->thresh) : 0;
+ div64_u64(dtc->wb_thresh * dtc->bg_thresh, dtc->thresh) : 0;

/*
* In order to avoid the stacked BDI deadlock we need
@@ -1921,7 +1921,7 @@ static int balance_dirty_pages(struct bdi_writeback *wb,
break;
}
__set_current_state(TASK_KILLABLE);
- wb->dirty_sleep = now;
+ bdi->last_bdp_sleep = jiffies;
io_schedule_timeout(pause);

current->dirty_paused_when = now + pause;
diff --git a/mm/userfaultfd.c b/mm/userfaultfd.c
index 96d9eae5c7cc..cd5ad448ac2f 100644
--- a/mm/userfaultfd.c
+++ b/mm/userfaultfd.c
@@ -357,6 +357,7 @@ static __always_inline ssize_t mfill_atomic_hugetlb(
unsigned long dst_start,
unsigned long src_start,
unsigned long len,
+ atomic_t *mmap_changing,
uffd_flags_t flags)
{
struct mm_struct *dst_mm = dst_vma->vm_mm;
@@ -472,6 +473,15 @@ static __always_inline ssize_t mfill_atomic_hugetlb(
goto out;
}
mmap_read_lock(dst_mm);
+ /*
+ * If memory mappings are changing because of non-cooperative
+ * operation (e.g. mremap) running in parallel, bail out and
+ * request the user to retry later
+ */
+ if (mmap_changing && atomic_read(mmap_changing)) {
+ err = -EAGAIN;
+ break;
+ }

dst_vma = NULL;
goto retry;
@@ -506,6 +516,7 @@ extern ssize_t mfill_atomic_hugetlb(struct vm_area_struct *dst_vma,
unsigned long dst_start,
unsigned long src_start,
unsigned long len,
+ atomic_t *mmap_changing,
uffd_flags_t flags);
#endif /* CONFIG_HUGETLB_PAGE */

@@ -622,8 +633,8 @@ static __always_inline ssize_t mfill_atomic(struct mm_struct *dst_mm,
* If this is a HUGETLB vma, pass off to appropriate routine
*/
if (is_vm_hugetlb_page(dst_vma))
- return mfill_atomic_hugetlb(dst_vma, dst_start,
- src_start, len, flags);
+ return mfill_atomic_hugetlb(dst_vma, dst_start, src_start,
+ len, mmap_changing, flags);

if (!vma_is_anonymous(dst_vma) && !vma_is_shmem(dst_vma))
goto out_unlock;
diff --git a/net/can/j1939/j1939-priv.h b/net/can/j1939/j1939-priv.h
index 16af1a7f80f6..31a93cae5111 100644
--- a/net/can/j1939/j1939-priv.h
+++ b/net/can/j1939/j1939-priv.h
@@ -86,7 +86,7 @@ struct j1939_priv {
unsigned int tp_max_packet_size;

/* lock for j1939_socks list */
- spinlock_t j1939_socks_lock;
+ rwlock_t j1939_socks_lock;
struct list_head j1939_socks;

struct kref rx_kref;
@@ -301,6 +301,7 @@ struct j1939_sock {

int ifindex;
struct j1939_addr addr;
+ spinlock_t filters_lock;
struct j1939_filter *filters;
int nfilters;
pgn_t pgn_rx_filter;
diff --git a/net/can/j1939/main.c b/net/can/j1939/main.c
index ecff1c947d68..a6fb89fa6278 100644
--- a/net/can/j1939/main.c
+++ b/net/can/j1939/main.c
@@ -274,7 +274,7 @@ struct j1939_priv *j1939_netdev_start(struct net_device *ndev)
return ERR_PTR(-ENOMEM);

j1939_tp_init(priv);
- spin_lock_init(&priv->j1939_socks_lock);
+ rwlock_init(&priv->j1939_socks_lock);
INIT_LIST_HEAD(&priv->j1939_socks);

mutex_lock(&j1939_netdev_lock);
diff --git a/net/can/j1939/socket.c b/net/can/j1939/socket.c
index b28c976f52a0..1f49d6164ea1 100644
--- a/net/can/j1939/socket.c
+++ b/net/can/j1939/socket.c
@@ -80,16 +80,16 @@ static void j1939_jsk_add(struct j1939_priv *priv, struct j1939_sock *jsk)
jsk->state |= J1939_SOCK_BOUND;
j1939_priv_get(priv);

- spin_lock_bh(&priv->j1939_socks_lock);
+ write_lock_bh(&priv->j1939_socks_lock);
list_add_tail(&jsk->list, &priv->j1939_socks);
- spin_unlock_bh(&priv->j1939_socks_lock);
+ write_unlock_bh(&priv->j1939_socks_lock);
}

static void j1939_jsk_del(struct j1939_priv *priv, struct j1939_sock *jsk)
{
- spin_lock_bh(&priv->j1939_socks_lock);
+ write_lock_bh(&priv->j1939_socks_lock);
list_del_init(&jsk->list);
- spin_unlock_bh(&priv->j1939_socks_lock);
+ write_unlock_bh(&priv->j1939_socks_lock);

j1939_priv_put(priv);
jsk->state &= ~J1939_SOCK_BOUND;
@@ -262,12 +262,17 @@ static bool j1939_sk_match_dst(struct j1939_sock *jsk,
static bool j1939_sk_match_filter(struct j1939_sock *jsk,
const struct j1939_sk_buff_cb *skcb)
{
- const struct j1939_filter *f = jsk->filters;
- int nfilter = jsk->nfilters;
+ const struct j1939_filter *f;
+ int nfilter;
+
+ spin_lock_bh(&jsk->filters_lock);
+
+ f = jsk->filters;
+ nfilter = jsk->nfilters;

if (!nfilter)
/* receive all when no filters are assigned */
- return true;
+ goto filter_match_found;

for (; nfilter; ++f, --nfilter) {
if ((skcb->addr.pgn & f->pgn_mask) != f->pgn)
@@ -276,9 +281,15 @@ static bool j1939_sk_match_filter(struct j1939_sock *jsk,
continue;
if ((skcb->addr.src_name & f->name_mask) != f->name)
continue;
- return true;
+ goto filter_match_found;
}
+
+ spin_unlock_bh(&jsk->filters_lock);
return false;
+
+filter_match_found:
+ spin_unlock_bh(&jsk->filters_lock);
+ return true;
}

static bool j1939_sk_recv_match_one(struct j1939_sock *jsk,
@@ -329,13 +340,13 @@ bool j1939_sk_recv_match(struct j1939_priv *priv, struct j1939_sk_buff_cb *skcb)
struct j1939_sock *jsk;
bool match = false;

- spin_lock_bh(&priv->j1939_socks_lock);
+ read_lock_bh(&priv->j1939_socks_lock);
list_for_each_entry(jsk, &priv->j1939_socks, list) {
match = j1939_sk_recv_match_one(jsk, skcb);
if (match)
break;
}
- spin_unlock_bh(&priv->j1939_socks_lock);
+ read_unlock_bh(&priv->j1939_socks_lock);

return match;
}
@@ -344,11 +355,11 @@ void j1939_sk_recv(struct j1939_priv *priv, struct sk_buff *skb)
{
struct j1939_sock *jsk;

- spin_lock_bh(&priv->j1939_socks_lock);
+ read_lock_bh(&priv->j1939_socks_lock);
list_for_each_entry(jsk, &priv->j1939_socks, list) {
j1939_sk_recv_one(jsk, skb);
}
- spin_unlock_bh(&priv->j1939_socks_lock);
+ read_unlock_bh(&priv->j1939_socks_lock);
}

static void j1939_sk_sock_destruct(struct sock *sk)
@@ -401,6 +412,7 @@ static int j1939_sk_init(struct sock *sk)
atomic_set(&jsk->skb_pending, 0);
spin_lock_init(&jsk->sk_session_queue_lock);
INIT_LIST_HEAD(&jsk->sk_session_queue);
+ spin_lock_init(&jsk->filters_lock);

/* j1939_sk_sock_destruct() depends on SOCK_RCU_FREE flag */
sock_set_flag(sk, SOCK_RCU_FREE);
@@ -703,9 +715,11 @@ static int j1939_sk_setsockopt(struct socket *sock, int level, int optname,
}

lock_sock(&jsk->sk);
+ spin_lock_bh(&jsk->filters_lock);
ofilters = jsk->filters;
jsk->filters = filters;
jsk->nfilters = count;
+ spin_unlock_bh(&jsk->filters_lock);
release_sock(&jsk->sk);
kfree(ofilters);
return 0;
@@ -1080,12 +1094,12 @@ void j1939_sk_errqueue(struct j1939_session *session,
}

/* spread RX notifications to all sockets subscribed to this session */
- spin_lock_bh(&priv->j1939_socks_lock);
+ read_lock_bh(&priv->j1939_socks_lock);
list_for_each_entry(jsk, &priv->j1939_socks, list) {
if (j1939_sk_recv_match_one(jsk, &session->skcb))
__j1939_sk_errqueue(session, &jsk->sk, type);
}
- spin_unlock_bh(&priv->j1939_socks_lock);
+ read_unlock_bh(&priv->j1939_socks_lock);
};

void j1939_sk_send_loop_abort(struct sock *sk, int err)
@@ -1273,7 +1287,7 @@ void j1939_sk_netdev_event_netdown(struct j1939_priv *priv)
struct j1939_sock *jsk;
int error_code = ENETDOWN;

- spin_lock_bh(&priv->j1939_socks_lock);
+ read_lock_bh(&priv->j1939_socks_lock);
list_for_each_entry(jsk, &priv->j1939_socks, list) {
jsk->sk.sk_err = error_code;
if (!sock_flag(&jsk->sk, SOCK_DEAD))
@@ -1281,7 +1295,7 @@ void j1939_sk_netdev_event_netdown(struct j1939_priv *priv)

j1939_sk_queue_drop_all(priv, jsk, error_code);
}
- spin_unlock_bh(&priv->j1939_socks_lock);
+ read_unlock_bh(&priv->j1939_socks_lock);
}

static int j1939_sk_no_ioctlcmd(struct socket *sock, unsigned int cmd,
diff --git a/net/handshake/handshake-test.c b/net/handshake/handshake-test.c
index 16ed7bfd29e4..34fd1d9b2db8 100644
--- a/net/handshake/handshake-test.c
+++ b/net/handshake/handshake-test.c
@@ -471,7 +471,10 @@ static void handshake_req_destroy_test1(struct kunit *test)
handshake_req_cancel(sock->sk);

/* Act */
- fput(filp);
+ /* Ensure the close/release/put process has run to
+ * completion before checking the result.
+ */
+ __fput_sync(filp);

/* Assert */
KUNIT_EXPECT_PTR_EQ(test, handshake_req_destroy_test, req);
diff --git a/net/hsr/hsr_device.c b/net/hsr/hsr_device.c
index 306f942c3b28..dd4b5f0aa131 100644
--- a/net/hsr/hsr_device.c
+++ b/net/hsr/hsr_device.c
@@ -291,7 +291,7 @@ static void send_hsr_supervision_frame(struct hsr_port *master,

skb = hsr_init_skb(master);
if (!skb) {
- WARN_ONCE(1, "HSR: Could not send supervision frame\n");
+ netdev_warn_once(master->dev, "HSR: Could not send supervision frame\n");
return;
}

@@ -338,7 +338,7 @@ static void send_prp_supervision_frame(struct hsr_port *master,

skb = hsr_init_skb(master);
if (!skb) {
- WARN_ONCE(1, "PRP: Could not send supervision frame\n");
+ netdev_warn_once(master->dev, "PRP: Could not send supervision frame\n");
return;
}

diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c
index 5481acbfc1d4..5ab9594ae119 100644
--- a/net/mac80211/tx.c
+++ b/net/mac80211/tx.c
@@ -5,7 +5,7 @@
* Copyright 2006-2007 Jiri Benc <jbenc@xxxxxxx>
* Copyright 2007 Johannes Berg <johannes@xxxxxxxxxxxxxxxx>
* Copyright 2013-2014 Intel Mobile Communications GmbH
- * Copyright (C) 2018-2022 Intel Corporation
+ * Copyright (C) 2018-2024 Intel Corporation
*
* Transmit and frame generation functions.
*/
@@ -3913,6 +3913,7 @@ struct sk_buff *ieee80211_tx_dequeue(struct ieee80211_hw *hw,
goto begin;

skb = __skb_dequeue(&tx.skbs);
+ info = IEEE80211_SKB_CB(skb);

if (!skb_queue_empty(&tx.skbs)) {
spin_lock_bh(&fq->lock);
@@ -3957,7 +3958,7 @@ struct sk_buff *ieee80211_tx_dequeue(struct ieee80211_hw *hw,
}

encap_out:
- IEEE80211_SKB_CB(skb)->control.vif = vif;
+ info->control.vif = vif;

if (tx.sta &&
wiphy_ext_feature_isset(local->hw.wiphy, NL80211_EXT_FEATURE_AQL)) {
diff --git a/net/mptcp/pm_userspace.c b/net/mptcp/pm_userspace.c
index d042d32beb4d..c1717322c892 100644
--- a/net/mptcp/pm_userspace.c
+++ b/net/mptcp/pm_userspace.c
@@ -130,10 +130,21 @@ int mptcp_userspace_pm_get_flags_and_ifindex_by_id(struct mptcp_sock *msk,
int mptcp_userspace_pm_get_local_id(struct mptcp_sock *msk,
struct mptcp_addr_info *skc)
{
- struct mptcp_pm_addr_entry new_entry;
+ struct mptcp_pm_addr_entry *entry = NULL, *e, new_entry;
__be16 msk_sport = ((struct inet_sock *)
inet_sk((struct sock *)msk))->inet_sport;

+ spin_lock_bh(&msk->pm.lock);
+ list_for_each_entry(e, &msk->pm.userspace_pm_local_addr_list, list) {
+ if (mptcp_addresses_equal(&e->addr, skc, false)) {
+ entry = e;
+ break;
+ }
+ }
+ spin_unlock_bh(&msk->pm.lock);
+ if (entry)
+ return entry->addr.id;
+
memset(&new_entry, 0, sizeof(struct mptcp_pm_addr_entry));
new_entry.addr = *skc;
new_entry.addr.id = 0;
diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
index 5c003a0f0fe5..9d4d5dbdbb53 100644
--- a/net/mptcp/protocol.c
+++ b/net/mptcp/protocol.c
@@ -1522,8 +1522,11 @@ static void mptcp_update_post_push(struct mptcp_sock *msk,

void mptcp_check_and_set_pending(struct sock *sk)
{
- if (mptcp_send_head(sk))
- mptcp_sk(sk)->push_pending |= BIT(MPTCP_PUSH_PENDING);
+ if (mptcp_send_head(sk)) {
+ mptcp_data_lock(sk);
+ mptcp_sk(sk)->cb_flags |= BIT(MPTCP_PUSH_PENDING);
+ mptcp_data_unlock(sk);
+ }
}

static int __subflow_push_pending(struct sock *sk, struct sock *ssk,
@@ -1964,6 +1967,9 @@ static void mptcp_rcv_space_adjust(struct mptcp_sock *msk, int copied)
if (copied <= 0)
return;

+ if (!msk->rcvspace_init)
+ mptcp_rcv_space_init(msk, msk->first);
+
msk->rcvq_space.copied += copied;

mstamp = div_u64(tcp_clock_ns(), NSEC_PER_USEC);
@@ -2318,9 +2324,6 @@ bool __mptcp_retransmit_pending_data(struct sock *sk)
if (__mptcp_check_fallback(msk))
return false;

- if (tcp_rtx_and_write_queues_empty(sk))
- return false;
-
/* the closing socket has some data untransmitted and/or unacked:
* some data in the mptcp rtx queue has not really xmitted yet.
* keep it simple and re-inject the whole mptcp level rtx queue
@@ -3137,7 +3140,6 @@ static int mptcp_disconnect(struct sock *sk, int flags)
mptcp_destroy_common(msk, MPTCP_CF_FASTCLOSE);
WRITE_ONCE(msk->flags, 0);
msk->cb_flags = 0;
- msk->push_pending = 0;
msk->recovery = false;
msk->can_ack = false;
msk->fully_established = false;
@@ -3152,6 +3154,7 @@ static int mptcp_disconnect(struct sock *sk, int flags)
msk->bytes_received = 0;
msk->bytes_sent = 0;
msk->bytes_retrans = 0;
+ msk->rcvspace_init = 0;

WRITE_ONCE(sk->sk_shutdown, 0);
sk_error_report(sk);
@@ -3239,6 +3242,7 @@ void mptcp_rcv_space_init(struct mptcp_sock *msk, const struct sock *ssk)
{
const struct tcp_sock *tp = tcp_sk(ssk);

+ msk->rcvspace_init = 1;
msk->rcvq_space.copied = 0;
msk->rcvq_space.rtt_us = 0;

@@ -3249,8 +3253,6 @@ void mptcp_rcv_space_init(struct mptcp_sock *msk, const struct sock *ssk)
TCP_INIT_CWND * tp->advmss);
if (msk->rcvq_space.space == 0)
msk->rcvq_space.space = TCP_INIT_CWND * TCP_MSS_DEFAULT;
-
- WRITE_ONCE(msk->wnd_end, msk->snd_nxt + tcp_sk(ssk)->snd_wnd);
}

static struct sock *mptcp_accept(struct sock *ssk, int flags, int *err,
@@ -3362,8 +3364,7 @@ static void mptcp_release_cb(struct sock *sk)
struct mptcp_sock *msk = mptcp_sk(sk);

for (;;) {
- unsigned long flags = (msk->cb_flags & MPTCP_FLAGS_PROCESS_CTX_NEED) |
- msk->push_pending;
+ unsigned long flags = (msk->cb_flags & MPTCP_FLAGS_PROCESS_CTX_NEED);
struct list_head join_list;

if (!flags)
@@ -3379,7 +3380,6 @@ static void mptcp_release_cb(struct sock *sk)
* datapath acquires the msk socket spinlock while helding
* the subflow socket lock
*/
- msk->push_pending = 0;
msk->cb_flags &= ~flags;
spin_unlock_bh(&sk->sk_lock.slock);

@@ -3510,10 +3510,9 @@ void mptcp_finish_connect(struct sock *ssk)
WRITE_ONCE(msk->write_seq, subflow->idsn + 1);
WRITE_ONCE(msk->snd_nxt, msk->write_seq);
WRITE_ONCE(msk->snd_una, msk->write_seq);
+ WRITE_ONCE(msk->wnd_end, msk->snd_nxt + tcp_sk(ssk)->snd_wnd);

mptcp_pm_new_connection(msk, ssk, 0);
-
- mptcp_rcv_space_init(msk, ssk);
}

void mptcp_sock_graft(struct sock *sk, struct socket *parent)
diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h
index 07c5ac37d092..094d3fd47a92 100644
--- a/net/mptcp/protocol.h
+++ b/net/mptcp/protocol.h
@@ -283,7 +283,6 @@ struct mptcp_sock {
int rmem_released;
unsigned long flags;
unsigned long cb_flags;
- unsigned long push_pending;
bool recovery; /* closing subflow write queue reinjected */
bool can_ack;
bool fully_established;
@@ -302,7 +301,8 @@ struct mptcp_sock {
nodelay:1,
fastopening:1,
in_accept_queue:1,
- free_first:1;
+ free_first:1,
+ rcvspace_init:1;
struct work_struct work;
struct sk_buff *ooo_last_skb;
struct rb_root out_of_order_queue;
@@ -1104,7 +1104,8 @@ static inline bool subflow_simultaneous_connect(struct sock *sk)
{
struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);

- return (1 << sk->sk_state) & (TCPF_ESTABLISHED | TCPF_FIN_WAIT1) &&
+ return (1 << sk->sk_state) &
+ (TCPF_ESTABLISHED | TCPF_FIN_WAIT1 | TCPF_FIN_WAIT2 | TCPF_CLOSING) &&
is_active_ssk(subflow) &&
!subflow->conn_finished;
}
diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c
index d3c5ecf8ddf5..8c7e22a9a37b 100644
--- a/net/mptcp/subflow.c
+++ b/net/mptcp/subflow.c
@@ -424,6 +424,8 @@ void __mptcp_sync_state(struct sock *sk, int state)
struct mptcp_sock *msk = mptcp_sk(sk);

__mptcp_propagate_sndbuf(sk, msk->first);
+ if (!msk->rcvspace_init)
+ mptcp_rcv_space_init(msk, msk->first);
if (sk->sk_state == TCP_SYN_SENT) {
inet_sk_state_store(sk, state);
sk->sk_state_change(sk);
@@ -545,7 +547,6 @@ static void subflow_finish_connect(struct sock *sk, const struct sk_buff *skb)
}
} else if (mptcp_check_fallback(sk)) {
fallback:
- mptcp_rcv_space_init(msk, sk);
mptcp_propagate_state(parent, sk);
}
return;
@@ -1736,7 +1737,6 @@ static void subflow_state_change(struct sock *sk)
msk = mptcp_sk(parent);
if (subflow_simultaneous_connect(sk)) {
mptcp_do_fallback(sk);
- mptcp_rcv_space_init(msk, sk);
pr_fallback(msk);
subflow->conn_finished = 1;
mptcp_propagate_state(parent, sk);
diff --git a/net/netfilter/ipset/ip_set_bitmap_gen.h b/net/netfilter/ipset/ip_set_bitmap_gen.h
index 26ab0e9612d8..9523104a90da 100644
--- a/net/netfilter/ipset/ip_set_bitmap_gen.h
+++ b/net/netfilter/ipset/ip_set_bitmap_gen.h
@@ -28,6 +28,7 @@
#define mtype_del IPSET_TOKEN(MTYPE, _del)
#define mtype_list IPSET_TOKEN(MTYPE, _list)
#define mtype_gc IPSET_TOKEN(MTYPE, _gc)
+#define mtype_cancel_gc IPSET_TOKEN(MTYPE, _cancel_gc)
#define mtype MTYPE

#define get_ext(set, map, id) ((map)->extensions + ((set)->dsize * (id)))
@@ -57,9 +58,6 @@ mtype_destroy(struct ip_set *set)
{
struct mtype *map = set->data;

- if (SET_WITH_TIMEOUT(set))
- del_timer_sync(&map->gc);
-
if (set->dsize && set->extensions & IPSET_EXT_DESTROY)
mtype_ext_cleanup(set);
ip_set_free(map->members);
@@ -288,6 +286,15 @@ mtype_gc(struct timer_list *t)
add_timer(&map->gc);
}

+static void
+mtype_cancel_gc(struct ip_set *set)
+{
+ struct mtype *map = set->data;
+
+ if (SET_WITH_TIMEOUT(set))
+ del_timer_sync(&map->gc);
+}
+
static const struct ip_set_type_variant mtype = {
.kadt = mtype_kadt,
.uadt = mtype_uadt,
@@ -301,6 +308,7 @@ static const struct ip_set_type_variant mtype = {
.head = mtype_head,
.list = mtype_list,
.same_set = mtype_same_set,
+ .cancel_gc = mtype_cancel_gc,
};

#endif /* __IP_SET_BITMAP_IP_GEN_H */
diff --git a/net/netfilter/ipset/ip_set_core.c b/net/netfilter/ipset/ip_set_core.c
index 4c133e06be1d..3184cc6be4c9 100644
--- a/net/netfilter/ipset/ip_set_core.c
+++ b/net/netfilter/ipset/ip_set_core.c
@@ -1154,6 +1154,7 @@ static int ip_set_create(struct sk_buff *skb, const struct nfnl_info *info,
return ret;

cleanup:
+ set->variant->cancel_gc(set);
set->variant->destroy(set);
put_out:
module_put(set->type->me);
@@ -1182,6 +1183,14 @@ ip_set_destroy_set(struct ip_set *set)
kfree(set);
}

+static void
+ip_set_destroy_set_rcu(struct rcu_head *head)
+{
+ struct ip_set *set = container_of(head, struct ip_set, rcu);
+
+ ip_set_destroy_set(set);
+}
+
static int ip_set_destroy(struct sk_buff *skb, const struct nfnl_info *info,
const struct nlattr * const attr[])
{
@@ -1193,8 +1202,6 @@ static int ip_set_destroy(struct sk_buff *skb, const struct nfnl_info *info,
if (unlikely(protocol_min_failed(attr)))
return -IPSET_ERR_PROTOCOL;

- /* Must wait for flush to be really finished in list:set */
- rcu_barrier();

/* Commands are serialized and references are
* protected by the ip_set_ref_lock.
@@ -1206,8 +1213,10 @@ static int ip_set_destroy(struct sk_buff *skb, const struct nfnl_info *info,
* counter, so if it's already zero, we can proceed
* without holding the lock.
*/
- read_lock_bh(&ip_set_ref_lock);
if (!attr[IPSET_ATTR_SETNAME]) {
+ /* Must wait for flush to be really finished in list:set */
+ rcu_barrier();
+ read_lock_bh(&ip_set_ref_lock);
for (i = 0; i < inst->ip_set_max; i++) {
s = ip_set(inst, i);
if (s && (s->ref || s->ref_netlink)) {
@@ -1221,6 +1230,8 @@ static int ip_set_destroy(struct sk_buff *skb, const struct nfnl_info *info,
s = ip_set(inst, i);
if (s) {
ip_set(inst, i) = NULL;
+ /* Must cancel garbage collectors */
+ s->variant->cancel_gc(s);
ip_set_destroy_set(s);
}
}
@@ -1228,6 +1239,9 @@ static int ip_set_destroy(struct sk_buff *skb, const struct nfnl_info *info,
inst->is_destroyed = false;
} else {
u32 flags = flag_exist(info->nlh);
+ u16 features = 0;
+
+ read_lock_bh(&ip_set_ref_lock);
s = find_set_and_id(inst, nla_data(attr[IPSET_ATTR_SETNAME]),
&i);
if (!s) {
@@ -1238,10 +1252,16 @@ static int ip_set_destroy(struct sk_buff *skb, const struct nfnl_info *info,
ret = -IPSET_ERR_BUSY;
goto out;
}
+ features = s->type->features;
ip_set(inst, i) = NULL;
read_unlock_bh(&ip_set_ref_lock);
-
- ip_set_destroy_set(s);
+ if (features & IPSET_TYPE_NAME) {
+ /* Must wait for flush to be really finished */
+ rcu_barrier();
+ }
+ /* Must cancel garbage collectors */
+ s->variant->cancel_gc(s);
+ call_rcu(&s->rcu, ip_set_destroy_set_rcu);
}
return 0;
out:
@@ -1394,9 +1414,6 @@ static int ip_set_swap(struct sk_buff *skb, const struct nfnl_info *info,
ip_set(inst, to_id) = from;
write_unlock_bh(&ip_set_ref_lock);

- /* Make sure all readers of the old set pointers are completed. */
- synchronize_rcu();
-
return 0;
}

@@ -2362,6 +2379,7 @@ ip_set_net_exit(struct net *net)
set = ip_set(inst, i);
if (set) {
ip_set(inst, i) = NULL;
+ set->variant->cancel_gc(set);
ip_set_destroy_set(set);
}
}
@@ -2409,8 +2427,11 @@ ip_set_fini(void)
{
nf_unregister_sockopt(&so_set);
nfnetlink_subsys_unregister(&ip_set_netlink_subsys);
-
unregister_pernet_subsys(&ip_set_net_ops);
+
+ /* Wait for call_rcu() in destroy */
+ rcu_barrier();
+
pr_debug("these are the famous last words\n");
}

diff --git a/net/netfilter/ipset/ip_set_hash_gen.h b/net/netfilter/ipset/ip_set_hash_gen.h
index 7c2399541771..20aad81fcad7 100644
--- a/net/netfilter/ipset/ip_set_hash_gen.h
+++ b/net/netfilter/ipset/ip_set_hash_gen.h
@@ -221,6 +221,7 @@ static const union nf_inet_addr zeromask = {};
#undef mtype_gc_do
#undef mtype_gc
#undef mtype_gc_init
+#undef mtype_cancel_gc
#undef mtype_variant
#undef mtype_data_match

@@ -265,6 +266,7 @@ static const union nf_inet_addr zeromask = {};
#define mtype_gc_do IPSET_TOKEN(MTYPE, _gc_do)
#define mtype_gc IPSET_TOKEN(MTYPE, _gc)
#define mtype_gc_init IPSET_TOKEN(MTYPE, _gc_init)
+#define mtype_cancel_gc IPSET_TOKEN(MTYPE, _cancel_gc)
#define mtype_variant IPSET_TOKEN(MTYPE, _variant)
#define mtype_data_match IPSET_TOKEN(MTYPE, _data_match)

@@ -429,7 +431,7 @@ mtype_ahash_destroy(struct ip_set *set, struct htable *t, bool ext_destroy)
u32 i;

for (i = 0; i < jhash_size(t->htable_bits); i++) {
- n = __ipset_dereference(hbucket(t, i));
+ n = (__force struct hbucket *)hbucket(t, i);
if (!n)
continue;
if (set->extensions & IPSET_EXT_DESTROY && ext_destroy)
@@ -449,10 +451,7 @@ mtype_destroy(struct ip_set *set)
struct htype *h = set->data;
struct list_head *l, *lt;

- if (SET_WITH_TIMEOUT(set))
- cancel_delayed_work_sync(&h->gc.dwork);
-
- mtype_ahash_destroy(set, ipset_dereference_nfnl(h->table), true);
+ mtype_ahash_destroy(set, (__force struct htable *)h->table, true);
list_for_each_safe(l, lt, &h->ad) {
list_del(l);
kfree(l);
@@ -598,6 +597,15 @@ mtype_gc_init(struct htable_gc *gc)
queue_delayed_work(system_power_efficient_wq, &gc->dwork, HZ);
}

+static void
+mtype_cancel_gc(struct ip_set *set)
+{
+ struct htype *h = set->data;
+
+ if (SET_WITH_TIMEOUT(set))
+ cancel_delayed_work_sync(&h->gc.dwork);
+}
+
static int
mtype_add(struct ip_set *set, void *value, const struct ip_set_ext *ext,
struct ip_set_ext *mext, u32 flags);
@@ -1440,6 +1448,7 @@ static const struct ip_set_type_variant mtype_variant = {
.uref = mtype_uref,
.resize = mtype_resize,
.same_set = mtype_same_set,
+ .cancel_gc = mtype_cancel_gc,
.region_lock = true,
};

diff --git a/net/netfilter/ipset/ip_set_list_set.c b/net/netfilter/ipset/ip_set_list_set.c
index e162636525cf..6c3f28bc59b3 100644
--- a/net/netfilter/ipset/ip_set_list_set.c
+++ b/net/netfilter/ipset/ip_set_list_set.c
@@ -426,9 +426,6 @@ list_set_destroy(struct ip_set *set)
struct list_set *map = set->data;
struct set_elem *e, *n;

- if (SET_WITH_TIMEOUT(set))
- timer_shutdown_sync(&map->gc);
-
list_for_each_entry_safe(e, n, &map->members, list) {
list_del(&e->list);
ip_set_put_byindex(map->net, e->id);
@@ -545,6 +542,15 @@ list_set_same_set(const struct ip_set *a, const struct ip_set *b)
a->extensions == b->extensions;
}

+static void
+list_set_cancel_gc(struct ip_set *set)
+{
+ struct list_set *map = set->data;
+
+ if (SET_WITH_TIMEOUT(set))
+ timer_shutdown_sync(&map->gc);
+}
+
static const struct ip_set_type_variant set_variant = {
.kadt = list_set_kadt,
.uadt = list_set_uadt,
@@ -558,6 +564,7 @@ static const struct ip_set_type_variant set_variant = {
.head = list_set_head,
.list = list_set_list,
.same_set = list_set_same_set,
+ .cancel_gc = list_set_cancel_gc,
};

static void
diff --git a/net/netfilter/nft_set_pipapo_avx2.c b/net/netfilter/nft_set_pipapo_avx2.c
index 90e275bb3e5d..a3a8ddca9918 100644
--- a/net/netfilter/nft_set_pipapo_avx2.c
+++ b/net/netfilter/nft_set_pipapo_avx2.c
@@ -57,7 +57,7 @@

/* Jump to label if @reg is zero */
#define NFT_PIPAPO_AVX2_NOMATCH_GOTO(reg, label) \
- asm_volatile_goto("vptest %%ymm" #reg ", %%ymm" #reg ";" \
+ asm goto("vptest %%ymm" #reg ", %%ymm" #reg ";" \
"je %l[" #label "]" : : : : label)

/* Store 256 bits from YMM register into memory. Contrary to bucket load
diff --git a/net/nfc/nci/core.c b/net/nfc/nci/core.c
index 6c9592d05120..12684d835cb5 100644
--- a/net/nfc/nci/core.c
+++ b/net/nfc/nci/core.c
@@ -1208,6 +1208,10 @@ void nci_free_device(struct nci_dev *ndev)
{
nfc_free_device(ndev->nfc_dev);
nci_hci_deallocate(ndev);
+
+ /* drop partial rx data packet if present */
+ if (ndev->rx_data_reassembly)
+ kfree_skb(ndev->rx_data_reassembly);
kfree(ndev);
}
EXPORT_SYMBOL(nci_free_device);
diff --git a/net/openvswitch/flow_netlink.c b/net/openvswitch/flow_netlink.c
index 88965e2068ac..ebc5728aab4e 100644
--- a/net/openvswitch/flow_netlink.c
+++ b/net/openvswitch/flow_netlink.c
@@ -48,6 +48,7 @@ struct ovs_len_tbl {

#define OVS_ATTR_NESTED -1
#define OVS_ATTR_VARIABLE -2
+#define OVS_COPY_ACTIONS_MAX_DEPTH 16

static bool actions_may_change_flow(const struct nlattr *actions)
{
@@ -2545,13 +2546,15 @@ static int __ovs_nla_copy_actions(struct net *net, const struct nlattr *attr,
const struct sw_flow_key *key,
struct sw_flow_actions **sfa,
__be16 eth_type, __be16 vlan_tci,
- u32 mpls_label_count, bool log);
+ u32 mpls_label_count, bool log,
+ u32 depth);

static int validate_and_copy_sample(struct net *net, const struct nlattr *attr,
const struct sw_flow_key *key,
struct sw_flow_actions **sfa,
__be16 eth_type, __be16 vlan_tci,
- u32 mpls_label_count, bool log, bool last)
+ u32 mpls_label_count, bool log, bool last,
+ u32 depth)
{
const struct nlattr *attrs[OVS_SAMPLE_ATTR_MAX + 1];
const struct nlattr *probability, *actions;
@@ -2602,7 +2605,8 @@ static int validate_and_copy_sample(struct net *net, const struct nlattr *attr,
return err;

err = __ovs_nla_copy_actions(net, actions, key, sfa,
- eth_type, vlan_tci, mpls_label_count, log);
+ eth_type, vlan_tci, mpls_label_count, log,
+ depth + 1);

if (err)
return err;
@@ -2617,7 +2621,8 @@ static int validate_and_copy_dec_ttl(struct net *net,
const struct sw_flow_key *key,
struct sw_flow_actions **sfa,
__be16 eth_type, __be16 vlan_tci,
- u32 mpls_label_count, bool log)
+ u32 mpls_label_count, bool log,
+ u32 depth)
{
const struct nlattr *attrs[OVS_DEC_TTL_ATTR_MAX + 1];
int start, action_start, err, rem;
@@ -2660,7 +2665,8 @@ static int validate_and_copy_dec_ttl(struct net *net,
return action_start;

err = __ovs_nla_copy_actions(net, actions, key, sfa, eth_type,
- vlan_tci, mpls_label_count, log);
+ vlan_tci, mpls_label_count, log,
+ depth + 1);
if (err)
return err;

@@ -2674,7 +2680,8 @@ static int validate_and_copy_clone(struct net *net,
const struct sw_flow_key *key,
struct sw_flow_actions **sfa,
__be16 eth_type, __be16 vlan_tci,
- u32 mpls_label_count, bool log, bool last)
+ u32 mpls_label_count, bool log, bool last,
+ u32 depth)
{
int start, err;
u32 exec;
@@ -2694,7 +2701,8 @@ static int validate_and_copy_clone(struct net *net,
return err;

err = __ovs_nla_copy_actions(net, attr, key, sfa,
- eth_type, vlan_tci, mpls_label_count, log);
+ eth_type, vlan_tci, mpls_label_count, log,
+ depth + 1);
if (err)
return err;

@@ -3063,7 +3071,7 @@ static int validate_and_copy_check_pkt_len(struct net *net,
struct sw_flow_actions **sfa,
__be16 eth_type, __be16 vlan_tci,
u32 mpls_label_count,
- bool log, bool last)
+ bool log, bool last, u32 depth)
{
const struct nlattr *acts_if_greater, *acts_if_lesser_eq;
struct nlattr *a[OVS_CHECK_PKT_LEN_ATTR_MAX + 1];
@@ -3111,7 +3119,8 @@ static int validate_and_copy_check_pkt_len(struct net *net,
return nested_acts_start;

err = __ovs_nla_copy_actions(net, acts_if_lesser_eq, key, sfa,
- eth_type, vlan_tci, mpls_label_count, log);
+ eth_type, vlan_tci, mpls_label_count, log,
+ depth + 1);

if (err)
return err;
@@ -3124,7 +3133,8 @@ static int validate_and_copy_check_pkt_len(struct net *net,
return nested_acts_start;

err = __ovs_nla_copy_actions(net, acts_if_greater, key, sfa,
- eth_type, vlan_tci, mpls_label_count, log);
+ eth_type, vlan_tci, mpls_label_count, log,
+ depth + 1);

if (err)
return err;
@@ -3152,12 +3162,16 @@ static int __ovs_nla_copy_actions(struct net *net, const struct nlattr *attr,
const struct sw_flow_key *key,
struct sw_flow_actions **sfa,
__be16 eth_type, __be16 vlan_tci,
- u32 mpls_label_count, bool log)
+ u32 mpls_label_count, bool log,
+ u32 depth)
{
u8 mac_proto = ovs_key_mac_proto(key);
const struct nlattr *a;
int rem, err;

+ if (depth > OVS_COPY_ACTIONS_MAX_DEPTH)
+ return -EOVERFLOW;
+
nla_for_each_nested(a, attr, rem) {
/* Expected argument lengths, (u32)-1 for variable length. */
static const u32 action_lens[OVS_ACTION_ATTR_MAX + 1] = {
@@ -3355,7 +3369,7 @@ static int __ovs_nla_copy_actions(struct net *net, const struct nlattr *attr,
err = validate_and_copy_sample(net, a, key, sfa,
eth_type, vlan_tci,
mpls_label_count,
- log, last);
+ log, last, depth);
if (err)
return err;
skip_copy = true;
@@ -3426,7 +3440,7 @@ static int __ovs_nla_copy_actions(struct net *net, const struct nlattr *attr,
err = validate_and_copy_clone(net, a, key, sfa,
eth_type, vlan_tci,
mpls_label_count,
- log, last);
+ log, last, depth);
if (err)
return err;
skip_copy = true;
@@ -3440,7 +3454,8 @@ static int __ovs_nla_copy_actions(struct net *net, const struct nlattr *attr,
eth_type,
vlan_tci,
mpls_label_count,
- log, last);
+ log, last,
+ depth);
if (err)
return err;
skip_copy = true;
@@ -3450,7 +3465,8 @@ static int __ovs_nla_copy_actions(struct net *net, const struct nlattr *attr,
case OVS_ACTION_ATTR_DEC_TTL:
err = validate_and_copy_dec_ttl(net, a, key, sfa,
eth_type, vlan_tci,
- mpls_label_count, log);
+ mpls_label_count, log,
+ depth);
if (err)
return err;
skip_copy = true;
@@ -3495,7 +3511,8 @@ int ovs_nla_copy_actions(struct net *net, const struct nlattr *attr,

(*sfa)->orig_len = nla_len(attr);
err = __ovs_nla_copy_actions(net, attr, key, sfa, key->eth.type,
- key->eth.vlan.tci, mpls_label_count, log);
+ key->eth.vlan.tci, mpls_label_count, log,
+ 0);
if (err)
ovs_nla_free_flow_actions(*sfa);

diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c
index dba523cdc73d..e1f8ff6e9a73 100644
--- a/net/tls/tls_sw.c
+++ b/net/tls/tls_sw.c
@@ -63,6 +63,7 @@ struct tls_decrypt_ctx {
u8 iv[MAX_IV_SIZE];
u8 aad[TLS_MAX_AAD_SIZE];
u8 tail;
+ bool free_sgout;
struct scatterlist sg[];
};

@@ -187,7 +188,6 @@ static void tls_decrypt_done(void *data, int err)
struct aead_request *aead_req = data;
struct crypto_aead *aead = crypto_aead_reqtfm(aead_req);
struct scatterlist *sgout = aead_req->dst;
- struct scatterlist *sgin = aead_req->src;
struct tls_sw_context_rx *ctx;
struct tls_decrypt_ctx *dctx;
struct tls_context *tls_ctx;
@@ -196,6 +196,17 @@ static void tls_decrypt_done(void *data, int err)
struct sock *sk;
int aead_size;

+ /* If requests get too backlogged crypto API returns -EBUSY and calls
+ * ->complete(-EINPROGRESS) immediately followed by ->complete(0)
+ * to make waiting for backlog to flush with crypto_wait_req() easier.
+ * First wait converts -EBUSY -> -EINPROGRESS, and the second one
+ * -EINPROGRESS -> 0.
+ * We have a single struct crypto_async_request per direction, this
+ * scheme doesn't help us, so just ignore the first ->complete().
+ */
+ if (err == -EINPROGRESS)
+ return;
+
aead_size = sizeof(*aead_req) + crypto_aead_reqsize(aead);
aead_size = ALIGN(aead_size, __alignof__(*dctx));
dctx = (void *)((u8 *)aead_req + aead_size);
@@ -213,7 +224,7 @@ static void tls_decrypt_done(void *data, int err)
}

/* Free the destination pages if skb was not decrypted inplace */
- if (sgout != sgin) {
+ if (dctx->free_sgout) {
/* Skip the first S/G entry as it points to AAD */
for_each_sg(sg_next(sgout), sg, UINT_MAX, pages) {
if (!sg)
@@ -224,10 +235,17 @@ static void tls_decrypt_done(void *data, int err)

kfree(aead_req);

- spin_lock_bh(&ctx->decrypt_compl_lock);
- if (!atomic_dec_return(&ctx->decrypt_pending))
+ if (atomic_dec_and_test(&ctx->decrypt_pending))
complete(&ctx->async_wait.completion);
- spin_unlock_bh(&ctx->decrypt_compl_lock);
+}
+
+static int tls_decrypt_async_wait(struct tls_sw_context_rx *ctx)
+{
+ if (!atomic_dec_and_test(&ctx->decrypt_pending))
+ crypto_wait_req(-EINPROGRESS, &ctx->async_wait);
+ atomic_inc(&ctx->decrypt_pending);
+
+ return ctx->async_wait.err;
}

static int tls_do_decryption(struct sock *sk,
@@ -253,6 +271,7 @@ static int tls_do_decryption(struct sock *sk,
aead_request_set_callback(aead_req,
CRYPTO_TFM_REQ_MAY_BACKLOG,
tls_decrypt_done, aead_req);
+ DEBUG_NET_WARN_ON_ONCE(atomic_read(&ctx->decrypt_pending) < 1);
atomic_inc(&ctx->decrypt_pending);
} else {
aead_request_set_callback(aead_req,
@@ -261,6 +280,10 @@ static int tls_do_decryption(struct sock *sk,
}

ret = crypto_aead_decrypt(aead_req);
+ if (ret == -EBUSY) {
+ ret = tls_decrypt_async_wait(ctx);
+ ret = ret ?: -EINPROGRESS;
+ }
if (ret == -EINPROGRESS) {
if (darg->async)
return 0;
@@ -439,9 +462,10 @@ static void tls_encrypt_done(void *data, int err)
struct tls_rec *rec = data;
struct scatterlist *sge;
struct sk_msg *msg_en;
- bool ready = false;
struct sock *sk;
- int pending;
+
+ if (err == -EINPROGRESS) /* see the comment in tls_decrypt_done() */
+ return;

msg_en = &rec->msg_encrypted;

@@ -476,23 +500,25 @@ static void tls_encrypt_done(void *data, int err)
/* If received record is at head of tx_list, schedule tx */
first_rec = list_first_entry(&ctx->tx_list,
struct tls_rec, list);
- if (rec == first_rec)
- ready = true;
+ if (rec == first_rec) {
+ /* Schedule the transmission */
+ if (!test_and_set_bit(BIT_TX_SCHEDULED,
+ &ctx->tx_bitmask))
+ schedule_delayed_work(&ctx->tx_work.work, 1);
+ }
}

- spin_lock_bh(&ctx->encrypt_compl_lock);
- pending = atomic_dec_return(&ctx->encrypt_pending);
-
- if (!pending && ctx->async_notify)
+ if (atomic_dec_and_test(&ctx->encrypt_pending))
complete(&ctx->async_wait.completion);
- spin_unlock_bh(&ctx->encrypt_compl_lock);
+}

- if (!ready)
- return;
+static int tls_encrypt_async_wait(struct tls_sw_context_tx *ctx)
+{
+ if (!atomic_dec_and_test(&ctx->encrypt_pending))
+ crypto_wait_req(-EINPROGRESS, &ctx->async_wait);
+ atomic_inc(&ctx->encrypt_pending);

- /* Schedule the transmission */
- if (!test_and_set_bit(BIT_TX_SCHEDULED, &ctx->tx_bitmask))
- schedule_delayed_work(&ctx->tx_work.work, 1);
+ return ctx->async_wait.err;
}

static int tls_do_encryption(struct sock *sk,
@@ -541,9 +567,14 @@ static int tls_do_encryption(struct sock *sk,

/* Add the record in tx_list */
list_add_tail((struct list_head *)&rec->list, &ctx->tx_list);
+ DEBUG_NET_WARN_ON_ONCE(atomic_read(&ctx->encrypt_pending) < 1);
atomic_inc(&ctx->encrypt_pending);

rc = crypto_aead_encrypt(aead_req);
+ if (rc == -EBUSY) {
+ rc = tls_encrypt_async_wait(ctx);
+ rc = rc ?: -EINPROGRESS;
+ }
if (!rc || rc != -EINPROGRESS) {
atomic_dec(&ctx->encrypt_pending);
sge->offset -= prot->prepend_size;
@@ -984,7 +1015,6 @@ static int tls_sw_sendmsg_locked(struct sock *sk, struct msghdr *msg,
int num_zc = 0;
int orig_size;
int ret = 0;
- int pending;

if (!eor && (msg->msg_flags & MSG_EOR))
return -EINVAL;
@@ -1163,24 +1193,12 @@ static int tls_sw_sendmsg_locked(struct sock *sk, struct msghdr *msg,
if (!num_async) {
goto send_end;
} else if (num_zc) {
- /* Wait for pending encryptions to get completed */
- spin_lock_bh(&ctx->encrypt_compl_lock);
- ctx->async_notify = true;
-
- pending = atomic_read(&ctx->encrypt_pending);
- spin_unlock_bh(&ctx->encrypt_compl_lock);
- if (pending)
- crypto_wait_req(-EINPROGRESS, &ctx->async_wait);
- else
- reinit_completion(&ctx->async_wait.completion);
-
- /* There can be no concurrent accesses, since we have no
- * pending encrypt operations
- */
- WRITE_ONCE(ctx->async_notify, false);
+ int err;

- if (ctx->async_wait.err) {
- ret = ctx->async_wait.err;
+ /* Wait for pending encryptions to get completed */
+ err = tls_encrypt_async_wait(ctx);
+ if (err) {
+ ret = err;
copied = 0;
}
}
@@ -1229,7 +1247,6 @@ void tls_sw_splice_eof(struct socket *sock)
ssize_t copied = 0;
bool retrying = false;
int ret = 0;
- int pending;

if (!ctx->open_rec)
return;
@@ -1264,22 +1281,7 @@ void tls_sw_splice_eof(struct socket *sock)
}

/* Wait for pending encryptions to get completed */
- spin_lock_bh(&ctx->encrypt_compl_lock);
- ctx->async_notify = true;
-
- pending = atomic_read(&ctx->encrypt_pending);
- spin_unlock_bh(&ctx->encrypt_compl_lock);
- if (pending)
- crypto_wait_req(-EINPROGRESS, &ctx->async_wait);
- else
- reinit_completion(&ctx->async_wait.completion);
-
- /* There can be no concurrent accesses, since we have no pending
- * encrypt operations
- */
- WRITE_ONCE(ctx->async_notify, false);
-
- if (ctx->async_wait.err)
+ if (tls_encrypt_async_wait(ctx))
goto unlock;

/* Transmit if any encryptions have completed */
@@ -1581,6 +1583,7 @@ static int tls_decrypt_sg(struct sock *sk, struct iov_iter *out_iov,
} else if (out_sg) {
memcpy(sgout, out_sg, n_sgout * sizeof(*sgout));
}
+ dctx->free_sgout = !!pages;

/* Prepare and submit AEAD request */
err = tls_do_decryption(sk, sgin, sgout, dctx->iv,
@@ -2109,16 +2112,10 @@ int tls_sw_recvmsg(struct sock *sk,

recv_end:
if (async) {
- int ret, pending;
+ int ret;

/* Wait for all previously submitted records to be decrypted */
- spin_lock_bh(&ctx->decrypt_compl_lock);
- reinit_completion(&ctx->async_wait.completion);
- pending = atomic_read(&ctx->decrypt_pending);
- spin_unlock_bh(&ctx->decrypt_compl_lock);
- ret = 0;
- if (pending)
- ret = crypto_wait_req(-EINPROGRESS, &ctx->async_wait);
+ ret = tls_decrypt_async_wait(ctx);
__skb_queue_purge(&ctx->async_hold);

if (ret) {
@@ -2135,7 +2132,6 @@ int tls_sw_recvmsg(struct sock *sk,
else
err = process_rx_list(ctx, msg, &control, 0,
async_copy_bytes, is_peek);
- decrypted += max(err, 0);
}

copied += decrypted;
@@ -2435,16 +2431,9 @@ void tls_sw_release_resources_tx(struct sock *sk)
struct tls_context *tls_ctx = tls_get_ctx(sk);
struct tls_sw_context_tx *ctx = tls_sw_ctx_tx(tls_ctx);
struct tls_rec *rec, *tmp;
- int pending;

/* Wait for any pending async encryptions to complete */
- spin_lock_bh(&ctx->encrypt_compl_lock);
- ctx->async_notify = true;
- pending = atomic_read(&ctx->encrypt_pending);
- spin_unlock_bh(&ctx->encrypt_compl_lock);
-
- if (pending)
- crypto_wait_req(-EINPROGRESS, &ctx->async_wait);
+ tls_encrypt_async_wait(ctx);

tls_tx_records(sk, -1);

@@ -2597,6 +2586,48 @@ void tls_update_rx_zc_capable(struct tls_context *tls_ctx)
tls_ctx->prot_info.version != TLS_1_3_VERSION;
}

+static struct tls_sw_context_tx *init_ctx_tx(struct tls_context *ctx, struct sock *sk)
+{
+ struct tls_sw_context_tx *sw_ctx_tx;
+
+ if (!ctx->priv_ctx_tx) {
+ sw_ctx_tx = kzalloc(sizeof(*sw_ctx_tx), GFP_KERNEL);
+ if (!sw_ctx_tx)
+ return NULL;
+ } else {
+ sw_ctx_tx = ctx->priv_ctx_tx;
+ }
+
+ crypto_init_wait(&sw_ctx_tx->async_wait);
+ atomic_set(&sw_ctx_tx->encrypt_pending, 1);
+ INIT_LIST_HEAD(&sw_ctx_tx->tx_list);
+ INIT_DELAYED_WORK(&sw_ctx_tx->tx_work.work, tx_work_handler);
+ sw_ctx_tx->tx_work.sk = sk;
+
+ return sw_ctx_tx;
+}
+
+static struct tls_sw_context_rx *init_ctx_rx(struct tls_context *ctx)
+{
+ struct tls_sw_context_rx *sw_ctx_rx;
+
+ if (!ctx->priv_ctx_rx) {
+ sw_ctx_rx = kzalloc(sizeof(*sw_ctx_rx), GFP_KERNEL);
+ if (!sw_ctx_rx)
+ return NULL;
+ } else {
+ sw_ctx_rx = ctx->priv_ctx_rx;
+ }
+
+ crypto_init_wait(&sw_ctx_rx->async_wait);
+ atomic_set(&sw_ctx_rx->decrypt_pending, 1);
+ init_waitqueue_head(&sw_ctx_rx->wq);
+ skb_queue_head_init(&sw_ctx_rx->rx_list);
+ skb_queue_head_init(&sw_ctx_rx->async_hold);
+
+ return sw_ctx_rx;
+}
+
int tls_set_sw_offload(struct sock *sk, struct tls_context *ctx, int tx)
{
struct tls_context *tls_ctx = tls_get_ctx(sk);
@@ -2618,48 +2649,22 @@ int tls_set_sw_offload(struct sock *sk, struct tls_context *ctx, int tx)
}

if (tx) {
- if (!ctx->priv_ctx_tx) {
- sw_ctx_tx = kzalloc(sizeof(*sw_ctx_tx), GFP_KERNEL);
- if (!sw_ctx_tx) {
- rc = -ENOMEM;
- goto out;
- }
- ctx->priv_ctx_tx = sw_ctx_tx;
- } else {
- sw_ctx_tx =
- (struct tls_sw_context_tx *)ctx->priv_ctx_tx;
- }
- } else {
- if (!ctx->priv_ctx_rx) {
- sw_ctx_rx = kzalloc(sizeof(*sw_ctx_rx), GFP_KERNEL);
- if (!sw_ctx_rx) {
- rc = -ENOMEM;
- goto out;
- }
- ctx->priv_ctx_rx = sw_ctx_rx;
- } else {
- sw_ctx_rx =
- (struct tls_sw_context_rx *)ctx->priv_ctx_rx;
- }
- }
+ ctx->priv_ctx_tx = init_ctx_tx(ctx, sk);
+ if (!ctx->priv_ctx_tx)
+ return -ENOMEM;

- if (tx) {
- crypto_init_wait(&sw_ctx_tx->async_wait);
- spin_lock_init(&sw_ctx_tx->encrypt_compl_lock);
+ sw_ctx_tx = ctx->priv_ctx_tx;
crypto_info = &ctx->crypto_send.info;
cctx = &ctx->tx;
aead = &sw_ctx_tx->aead_send;
- INIT_LIST_HEAD(&sw_ctx_tx->tx_list);
- INIT_DELAYED_WORK(&sw_ctx_tx->tx_work.work, tx_work_handler);
- sw_ctx_tx->tx_work.sk = sk;
} else {
- crypto_init_wait(&sw_ctx_rx->async_wait);
- spin_lock_init(&sw_ctx_rx->decrypt_compl_lock);
- init_waitqueue_head(&sw_ctx_rx->wq);
+ ctx->priv_ctx_rx = init_ctx_rx(ctx);
+ if (!ctx->priv_ctx_rx)
+ return -ENOMEM;
+
+ sw_ctx_rx = ctx->priv_ctx_rx;
crypto_info = &ctx->crypto_recv.info;
cctx = &ctx->rx;
- skb_queue_head_init(&sw_ctx_rx->rx_list);
- skb_queue_head_init(&sw_ctx_rx->async_hold);
aead = &sw_ctx_rx->aead_recv;
}

diff --git a/net/wireless/core.c b/net/wireless/core.c
index f6ada0a72977..ff743e1f2e2c 100644
--- a/net/wireless/core.c
+++ b/net/wireless/core.c
@@ -1675,6 +1675,7 @@ void wiphy_delayed_work_queue(struct wiphy *wiphy,
unsigned long delay)
{
if (!delay) {
+ del_timer(&dwork->timer);
wiphy_work_queue(wiphy, &dwork->work);
return;
}
diff --git a/samples/bpf/asm_goto_workaround.h b/samples/bpf/asm_goto_workaround.h
index 7048bb3594d6..634e81d83efd 100644
--- a/samples/bpf/asm_goto_workaround.h
+++ b/samples/bpf/asm_goto_workaround.h
@@ -4,14 +4,14 @@
#define __ASM_GOTO_WORKAROUND_H

/*
- * This will bring in asm_volatile_goto and asm_inline macro definitions
+ * This will bring in asm_goto_output and asm_inline macro definitions
* if enabled by compiler and config options.
*/
#include <linux/types.h>

-#ifdef asm_volatile_goto
-#undef asm_volatile_goto
-#define asm_volatile_goto(x...) asm volatile("invalid use of asm_volatile_goto")
+#ifdef asm_goto_output
+#undef asm_goto_output
+#define asm_goto_output(x...) asm volatile("invalid use of asm_goto_output")
#endif

/*
diff --git a/scripts/link-vmlinux.sh b/scripts/link-vmlinux.sh
index a432b171be82..7862a8101747 100755
--- a/scripts/link-vmlinux.sh
+++ b/scripts/link-vmlinux.sh
@@ -135,8 +135,13 @@ gen_btf()
${OBJCOPY} --only-section=.BTF --set-section-flags .BTF=alloc,readonly \
--strip-all ${1} ${2} 2>/dev/null
# Change e_type to ET_REL so that it can be used to link final vmlinux.
- # Unlike GNU ld, lld does not allow an ET_EXEC input.
- printf '\1' | dd of=${2} conv=notrunc bs=1 seek=16 status=none
+ # GNU ld 2.35+ and lld do not allow an ET_EXEC input.
+ if is_enabled CONFIG_CPU_BIG_ENDIAN; then
+ et_rel='\0\1'
+ else
+ et_rel='\1\0'
+ fi
+ printf "${et_rel}" | dd of=${2} conv=notrunc bs=1 seek=16 status=none
}

# Create ${2} .S file with all symbols from the ${1} object file
diff --git a/scripts/mksysmap b/scripts/mksysmap
index 9ba1c9da0a40..57ff5656d566 100755
--- a/scripts/mksysmap
+++ b/scripts/mksysmap
@@ -48,17 +48,8 @@ ${NM} -n ${1} | sed >${2} -e "
/ __kvm_nvhe_\\$/d
/ __kvm_nvhe_\.L/d

-# arm64 lld
-/ __AArch64ADRPThunk_/d
-
-# arm lld
-/ __ARMV5PILongThunk_/d
-/ __ARMV7PILongThunk_/d
-/ __ThumbV7PILongThunk_/d
-
-# mips lld
-/ __LA25Thunk_/d
-/ __microLA25Thunk_/d
+# lld arm/aarch64/mips thunks
+/ __[[:alnum:]]*Thunk_/d

# CFI type identifiers
/ __kcfi_typeid_/d
diff --git a/scripts/mod/modpost.c b/scripts/mod/modpost.c
index ac4ef3e206bb..5191fdbd3fa2 100644
--- a/scripts/mod/modpost.c
+++ b/scripts/mod/modpost.c
@@ -798,7 +798,7 @@ static void check_section(const char *modname, struct elf_info *elf,
#define ALL_INIT_TEXT_SECTIONS \
".init.text", ".meminit.text"
#define ALL_EXIT_TEXT_SECTIONS \
- ".exit.text", ".memexit.text"
+ ".exit.text"

#define ALL_PCI_INIT_SECTIONS \
".pci_fixup_early", ".pci_fixup_header", ".pci_fixup_final", \
@@ -806,14 +806,14 @@ static void check_section(const char *modname, struct elf_info *elf,
".pci_fixup_resume_early", ".pci_fixup_suspend"

#define ALL_XXXINIT_SECTIONS MEM_INIT_SECTIONS
-#define ALL_XXXEXIT_SECTIONS MEM_EXIT_SECTIONS

#define ALL_INIT_SECTIONS INIT_SECTIONS, ALL_XXXINIT_SECTIONS
-#define ALL_EXIT_SECTIONS EXIT_SECTIONS, ALL_XXXEXIT_SECTIONS
+#define ALL_EXIT_SECTIONS EXIT_SECTIONS

#define DATA_SECTIONS ".data", ".data.rel"
#define TEXT_SECTIONS ".text", ".text.*", ".sched.text", \
- ".kprobes.text", ".cpuidle.text", ".noinstr.text"
+ ".kprobes.text", ".cpuidle.text", ".noinstr.text", \
+ ".ltext", ".ltext.*"
#define OTHER_TEXT_SECTIONS ".ref.text", ".head.text", ".spinlock.text", \
".fixup", ".entry.text", ".exception.text", \
".coldtext", ".softirqentry.text"
@@ -822,7 +822,6 @@ static void check_section(const char *modname, struct elf_info *elf,
#define MEM_INIT_SECTIONS ".meminit.*"

#define EXIT_SECTIONS ".exit.*"
-#define MEM_EXIT_SECTIONS ".memexit.*"

#define ALL_TEXT_SECTIONS ALL_INIT_TEXT_SECTIONS, ALL_EXIT_TEXT_SECTIONS, \
TEXT_SECTIONS, OTHER_TEXT_SECTIONS
@@ -832,7 +831,6 @@ enum mismatch {
DATA_TO_ANY_INIT,
TEXTDATA_TO_ANY_EXIT,
XXXINIT_TO_SOME_INIT,
- XXXEXIT_TO_SOME_EXIT,
ANY_INIT_TO_ANY_EXIT,
ANY_EXIT_TO_ANY_INIT,
EXTABLE_TO_NON_TEXT,
@@ -883,12 +881,6 @@ static const struct sectioncheck sectioncheck[] = {
.bad_tosec = { INIT_SECTIONS, NULL },
.mismatch = XXXINIT_TO_SOME_INIT,
},
-/* Do not reference exit code/data from memexit code/data */
-{
- .fromsec = { ALL_XXXEXIT_SECTIONS, NULL },
- .bad_tosec = { EXIT_SECTIONS, NULL },
- .mismatch = XXXEXIT_TO_SOME_EXIT,
-},
/* Do not use exit code/data from init code */
{
.fromsec = { ALL_INIT_SECTIONS, NULL },
@@ -1017,7 +1009,7 @@ static int secref_whitelist(const char *fromsec, const char *fromsym,

/* symbols in data sections that may refer to meminit sections */
if (match(fromsec, PATTERNS(DATA_SECTIONS)) &&
- match(tosec, PATTERNS(ALL_XXXINIT_SECTIONS, ALL_XXXEXIT_SECTIONS)) &&
+ match(tosec, PATTERNS(ALL_XXXINIT_SECTIONS)) &&
match(fromsym, PATTERNS("*driver")))
return 0;

diff --git a/scripts/mod/sumversion.c b/scripts/mod/sumversion.c
index 31066bfdba04..dc4878502276 100644
--- a/scripts/mod/sumversion.c
+++ b/scripts/mod/sumversion.c
@@ -326,7 +326,12 @@ static int parse_source_files(const char *objfile, struct md4_ctx *md)

/* Sum all files in the same dir or subdirs. */
while ((line = get_line(&pos))) {
- char* p = line;
+ char* p;
+
+ /* trim the leading spaces away */
+ while (isspace(*line))
+ line++;
+ p = line;

if (strncmp(line, "source_", sizeof("source_")-1) == 0) {
p = strrchr(line, ' ');
diff --git a/security/security.c b/security/security.c
index 840a3d58a290..407b51719f79 100644
--- a/security/security.c
+++ b/security/security.c
@@ -4030,7 +4030,19 @@ EXPORT_SYMBOL(security_inode_setsecctx);
*/
int security_inode_getsecctx(struct inode *inode, void **ctx, u32 *ctxlen)
{
- return call_int_hook(inode_getsecctx, -EOPNOTSUPP, inode, ctx, ctxlen);
+ struct security_hook_list *hp;
+ int rc;
+
+ /*
+ * Only one module will provide a security context.
+ */
+ hlist_for_each_entry(hp, &security_hook_heads.inode_getsecctx, list) {
+ rc = hp->hook.inode_getsecctx(inode, ctx, ctxlen);
+ if (rc != LSM_RET_DEFAULT(inode_getsecctx))
+ return rc;
+ }
+
+ return LSM_RET_DEFAULT(inode_getsecctx);
}
EXPORT_SYMBOL(security_inode_getsecctx);

@@ -4387,8 +4399,20 @@ EXPORT_SYMBOL(security_sock_rcv_skb);
int security_socket_getpeersec_stream(struct socket *sock, sockptr_t optval,
sockptr_t optlen, unsigned int len)
{
- return call_int_hook(socket_getpeersec_stream, -ENOPROTOOPT, sock,
- optval, optlen, len);
+ struct security_hook_list *hp;
+ int rc;
+
+ /*
+ * Only one module will provide a security context.
+ */
+ hlist_for_each_entry(hp, &security_hook_heads.socket_getpeersec_stream,
+ list) {
+ rc = hp->hook.socket_getpeersec_stream(sock, optval, optlen,
+ len);
+ if (rc != LSM_RET_DEFAULT(socket_getpeersec_stream))
+ return rc;
+ }
+ return LSM_RET_DEFAULT(socket_getpeersec_stream);
}

/**
@@ -4408,8 +4432,19 @@ int security_socket_getpeersec_stream(struct socket *sock, sockptr_t optval,
int security_socket_getpeersec_dgram(struct socket *sock,
struct sk_buff *skb, u32 *secid)
{
- return call_int_hook(socket_getpeersec_dgram, -ENOPROTOOPT, sock,
- skb, secid);
+ struct security_hook_list *hp;
+ int rc;
+
+ /*
+ * Only one module will provide a security context.
+ */
+ hlist_for_each_entry(hp, &security_hook_heads.socket_getpeersec_dgram,
+ list) {
+ rc = hp->hook.socket_getpeersec_dgram(sock, skb, secid);
+ if (rc != LSM_RET_DEFAULT(socket_getpeersec_dgram))
+ return rc;
+ }
+ return LSM_RET_DEFAULT(socket_getpeersec_dgram);
}
EXPORT_SYMBOL(security_socket_getpeersec_dgram);

diff --git a/sound/pci/hda/Kconfig b/sound/pci/hda/Kconfig
index 0d7502d6e060..21046f72cdca 100644
--- a/sound/pci/hda/Kconfig
+++ b/sound/pci/hda/Kconfig
@@ -140,7 +140,7 @@ config SND_HDA_SCODEC_CS35L56_I2C
depends on I2C
depends on ACPI || COMPILE_TEST
depends on SND_SOC
- select CS_DSP
+ select FW_CS_DSP
select SND_HDA_GENERIC
select SND_SOC_CS35L56_SHARED
select SND_HDA_SCODEC_CS35L56
@@ -154,7 +154,7 @@ config SND_HDA_SCODEC_CS35L56_SPI
depends on SPI_MASTER
depends on ACPI || COMPILE_TEST
depends on SND_SOC
- select CS_DSP
+ select FW_CS_DSP
select SND_HDA_GENERIC
select SND_SOC_CS35L56_SHARED
select SND_HDA_SCODEC_CS35L56
diff --git a/sound/pci/hda/patch_conexant.c b/sound/pci/hda/patch_conexant.c
index e8819e8a9876..e8209178d87b 100644
--- a/sound/pci/hda/patch_conexant.c
+++ b/sound/pci/hda/patch_conexant.c
@@ -344,6 +344,7 @@ enum {
CXT_FIXUP_HP_ZBOOK_MUTE_LED,
CXT_FIXUP_HEADSET_MIC,
CXT_FIXUP_HP_MIC_NO_PRESENCE,
+ CXT_PINCFG_SWS_JS201D,
};

/* for hda_fixup_thinkpad_acpi() */
@@ -841,6 +842,17 @@ static const struct hda_pintbl cxt_pincfg_lemote[] = {
{}
};

+/* SuoWoSi/South-holding JS201D with sn6140 */
+static const struct hda_pintbl cxt_pincfg_sws_js201d[] = {
+ { 0x16, 0x03211040 }, /* hp out */
+ { 0x17, 0x91170110 }, /* SPK/Class_D */
+ { 0x18, 0x95a70130 }, /* Internal mic */
+ { 0x19, 0x03a11020 }, /* Headset Mic */
+ { 0x1a, 0x40f001f0 }, /* Not used */
+ { 0x21, 0x40f001f0 }, /* Not used */
+ {}
+};
+
static const struct hda_fixup cxt_fixups[] = {
[CXT_PINCFG_LENOVO_X200] = {
.type = HDA_FIXUP_PINS,
@@ -996,6 +1008,10 @@ static const struct hda_fixup cxt_fixups[] = {
.chained = true,
.chain_id = CXT_FIXUP_HEADSET_MIC,
},
+ [CXT_PINCFG_SWS_JS201D] = {
+ .type = HDA_FIXUP_PINS,
+ .v.pins = cxt_pincfg_sws_js201d,
+ },
};

static const struct snd_pci_quirk cxt5045_fixups[] = {
@@ -1069,6 +1085,7 @@ static const struct snd_pci_quirk cxt5066_fixups[] = {
SND_PCI_QUIRK(0x103c, 0x8457, "HP Z2 G4 mini", CXT_FIXUP_HP_MIC_NO_PRESENCE),
SND_PCI_QUIRK(0x103c, 0x8458, "HP Z2 G4 mini premium", CXT_FIXUP_HP_MIC_NO_PRESENCE),
SND_PCI_QUIRK(0x1043, 0x138d, "Asus", CXT_FIXUP_HEADPHONE_MIC_PIN),
+ SND_PCI_QUIRK(0x14f1, 0x0265, "SWS JS201D", CXT_PINCFG_SWS_JS201D),
SND_PCI_QUIRK(0x152d, 0x0833, "OLPC XO-1.5", CXT_FIXUP_OLPC_XO),
SND_PCI_QUIRK(0x17aa, 0x20f2, "Lenovo T400", CXT_PINCFG_LENOVO_TP410),
SND_PCI_QUIRK(0x17aa, 0x215e, "Lenovo T410", CXT_PINCFG_LENOVO_TP410),
@@ -1109,6 +1126,7 @@ static const struct hda_model_fixup cxt5066_fixup_models[] = {
{ .id = CXT_FIXUP_HP_ZBOOK_MUTE_LED, .name = "hp-zbook-mute-led" },
{ .id = CXT_FIXUP_HP_MIC_NO_PRESENCE, .name = "hp-mic-fix" },
{ .id = CXT_PINCFG_LENOVO_NOTEBOOK, .name = "lenovo-20149" },
+ { .id = CXT_PINCFG_SWS_JS201D, .name = "sws-js201d" },
{}
};

diff --git a/sound/pci/hda/patch_cs8409.c b/sound/pci/hda/patch_cs8409.c
index 627899959ffe..e41316e2e983 100644
--- a/sound/pci/hda/patch_cs8409.c
+++ b/sound/pci/hda/patch_cs8409.c
@@ -1371,6 +1371,7 @@ void dolphin_fixups(struct hda_codec *codec, const struct hda_fixup *fix, int ac
spec->scodecs[CS8409_CODEC1] = &dolphin_cs42l42_1;
spec->scodecs[CS8409_CODEC1]->codec = codec;
spec->num_scodecs = 2;
+ spec->gen.suppress_vmaster = 1;

codec->patch_ops = cs8409_dolphin_patch_ops;

diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c
index 375569d0864b..0cb8ccdabc09 100644
--- a/sound/pci/hda/patch_realtek.c
+++ b/sound/pci/hda/patch_realtek.c
@@ -438,6 +438,10 @@ static void alc_fill_eapd_coef(struct hda_codec *codec)
alc_update_coef_idx(codec, 0x67, 0xf000, 0x3000);
fallthrough;
case 0x10ec0215:
+ case 0x10ec0285:
+ case 0x10ec0289:
+ alc_update_coef_idx(codec, 0x36, 1<<13, 0);
+ fallthrough;
case 0x10ec0230:
case 0x10ec0233:
case 0x10ec0235:
@@ -451,9 +455,7 @@ static void alc_fill_eapd_coef(struct hda_codec *codec)
case 0x10ec0283:
case 0x10ec0286:
case 0x10ec0288:
- case 0x10ec0285:
case 0x10ec0298:
- case 0x10ec0289:
case 0x10ec0300:
alc_update_coef_idx(codec, 0x10, 1<<9, 0);
break;
@@ -9479,7 +9481,7 @@ static const struct hda_fixup alc269_fixups[] = {
.type = HDA_FIXUP_FUNC,
.v.func = cs35l41_fixup_i2c_two,
.chained = true,
- .chain_id = ALC269_FIXUP_THINKPAD_ACPI,
+ .chain_id = ALC285_FIXUP_THINKPAD_NO_BASS_SPK_HEADSET_JACK,
},
[ALC287_FIXUP_TAS2781_I2C] = {
.type = HDA_FIXUP_FUNC,
@@ -9500,6 +9502,8 @@ static const struct hda_fixup alc269_fixups[] = {
[ALC287_FIXUP_THINKPAD_I2S_SPK] = {
.type = HDA_FIXUP_FUNC,
.v.func = alc287_fixup_bind_dacs,
+ .chained = true,
+ .chain_id = ALC285_FIXUP_THINKPAD_NO_BASS_SPK_HEADSET_JACK,
},
[ALC287_FIXUP_MG_RTKC_CSAMP_CS35L41_I2C_THINKPAD] = {
.type = HDA_FIXUP_FUNC,
@@ -9549,6 +9553,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = {
SND_PCI_QUIRK(0x1025, 0x1247, "Acer vCopperbox", ALC269VC_FIXUP_ACER_VCOPPERBOX_PINS),
SND_PCI_QUIRK(0x1025, 0x1248, "Acer Veriton N4660G", ALC269VC_FIXUP_ACER_MIC_NO_PRESENCE),
SND_PCI_QUIRK(0x1025, 0x1269, "Acer SWIFT SF314-54", ALC256_FIXUP_ACER_HEADSET_MIC),
+ SND_PCI_QUIRK(0x1025, 0x126a, "Acer Swift SF114-32", ALC256_FIXUP_ACER_MIC_NO_PRESENCE),
SND_PCI_QUIRK(0x1025, 0x128f, "Acer Veriton Z6860G", ALC286_FIXUP_ACER_AIO_HEADSET_MIC),
SND_PCI_QUIRK(0x1025, 0x1290, "Acer Veriton Z4860G", ALC286_FIXUP_ACER_AIO_HEADSET_MIC),
SND_PCI_QUIRK(0x1025, 0x1291, "Acer Veriton Z4660G", ALC286_FIXUP_ACER_AIO_HEADSET_MIC),
@@ -9626,6 +9631,9 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = {
SND_PCI_QUIRK(0x1028, 0x0b71, "Dell Inspiron 16 Plus 7620", ALC295_FIXUP_DELL_INSPIRON_TOP_SPEAKERS),
SND_PCI_QUIRK(0x1028, 0x0beb, "Dell XPS 15 9530 (2023)", ALC289_FIXUP_DELL_CS35L41_SPI_2),
SND_PCI_QUIRK(0x1028, 0x0c03, "Dell Precision 5340", ALC269_FIXUP_DELL4_MIC_NO_PRESENCE),
+ SND_PCI_QUIRK(0x1028, 0x0c0b, "Dell Oasis 14 RPL-P", ALC289_FIXUP_RTK_AMP_DUAL_SPK),
+ SND_PCI_QUIRK(0x1028, 0x0c0d, "Dell Oasis", ALC289_FIXUP_RTK_AMP_DUAL_SPK),
+ SND_PCI_QUIRK(0x1028, 0x0c0e, "Dell Oasis 16", ALC289_FIXUP_RTK_AMP_DUAL_SPK),
SND_PCI_QUIRK(0x1028, 0x0c19, "Dell Precision 3340", ALC236_FIXUP_DELL_DUAL_CODECS),
SND_PCI_QUIRK(0x1028, 0x0c1a, "Dell Precision 3340", ALC236_FIXUP_DELL_DUAL_CODECS),
SND_PCI_QUIRK(0x1028, 0x0c1b, "Dell Precision 3440", ALC236_FIXUP_DELL_DUAL_CODECS),
@@ -9745,6 +9753,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = {
SND_PCI_QUIRK(0x103c, 0x8786, "HP OMEN 15", ALC285_FIXUP_HP_MUTE_LED),
SND_PCI_QUIRK(0x103c, 0x8787, "HP OMEN 15", ALC285_FIXUP_HP_MUTE_LED),
SND_PCI_QUIRK(0x103c, 0x8788, "HP OMEN 15", ALC285_FIXUP_HP_MUTE_LED),
+ SND_PCI_QUIRK(0x103c, 0x87b7, "HP Laptop 14-fq0xxx", ALC236_FIXUP_HP_MUTE_LED_COEFBIT2),
SND_PCI_QUIRK(0x103c, 0x87c8, "HP", ALC287_FIXUP_HP_GPIO_LED),
SND_PCI_QUIRK(0x103c, 0x87e5, "HP ProBook 440 G8 Notebook PC", ALC236_FIXUP_HP_GPIO_LED),
SND_PCI_QUIRK(0x103c, 0x87e7, "HP ProBook 450 G8 Notebook PC", ALC236_FIXUP_HP_GPIO_LED),
@@ -9814,6 +9823,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = {
SND_PCI_QUIRK(0x103c, 0x8abb, "HP ZBook Firefly 14 G9", ALC245_FIXUP_CS35L41_SPI_2_HP_GPIO_LED),
SND_PCI_QUIRK(0x103c, 0x8ad1, "HP EliteBook 840 14 inch G9 Notebook PC", ALC245_FIXUP_CS35L41_SPI_2_HP_GPIO_LED),
SND_PCI_QUIRK(0x103c, 0x8ad2, "HP EliteBook 860 16 inch G9 Notebook PC", ALC245_FIXUP_CS35L41_SPI_2_HP_GPIO_LED),
+ SND_PCI_QUIRK(0x103c, 0x8b0f, "HP Elite mt645 G7 Mobile Thin Client U81", ALC236_FIXUP_HP_MUTE_LED_MICMUTE_VREF),
SND_PCI_QUIRK(0x103c, 0x8b2f, "HP 255 15.6 inch G10 Notebook PC", ALC236_FIXUP_HP_MUTE_LED_COEFBIT2),
SND_PCI_QUIRK(0x103c, 0x8b42, "HP", ALC245_FIXUP_CS35L41_SPI_2_HP_GPIO_LED),
SND_PCI_QUIRK(0x103c, 0x8b43, "HP", ALC245_FIXUP_CS35L41_SPI_2_HP_GPIO_LED),
@@ -9821,6 +9831,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = {
SND_PCI_QUIRK(0x103c, 0x8b45, "HP", ALC245_FIXUP_CS35L41_SPI_2_HP_GPIO_LED),
SND_PCI_QUIRK(0x103c, 0x8b46, "HP", ALC245_FIXUP_CS35L41_SPI_2_HP_GPIO_LED),
SND_PCI_QUIRK(0x103c, 0x8b47, "HP", ALC245_FIXUP_CS35L41_SPI_2_HP_GPIO_LED),
+ SND_PCI_QUIRK(0x103c, 0x8b59, "HP Elite mt645 G7 Mobile Thin Client U89", ALC236_FIXUP_HP_MUTE_LED_MICMUTE_VREF),
SND_PCI_QUIRK(0x103c, 0x8b5d, "HP", ALC236_FIXUP_HP_MUTE_LED_MICMUTE_VREF),
SND_PCI_QUIRK(0x103c, 0x8b5e, "HP", ALC236_FIXUP_HP_MUTE_LED_MICMUTE_VREF),
SND_PCI_QUIRK(0x103c, 0x8b63, "HP Elite Dragonfly 13.5 inch G4", ALC245_FIXUP_CS35L41_SPI_4_HP_GPIO_LED),
@@ -9850,6 +9861,8 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = {
SND_PCI_QUIRK(0x103c, 0x8c72, "HP EliteBook 865 G11", ALC287_FIXUP_CS35L41_I2C_2_HP_GPIO_LED),
SND_PCI_QUIRK(0x103c, 0x8c96, "HP", ALC236_FIXUP_HP_MUTE_LED_MICMUTE_VREF),
SND_PCI_QUIRK(0x103c, 0x8c97, "HP ZBook", ALC236_FIXUP_HP_MUTE_LED_MICMUTE_VREF),
+ SND_PCI_QUIRK(0x103c, 0x8ca1, "HP ZBook Power", ALC236_FIXUP_HP_GPIO_LED),
+ SND_PCI_QUIRK(0x103c, 0x8ca2, "HP ZBook Power", ALC236_FIXUP_HP_GPIO_LED),
SND_PCI_QUIRK(0x103c, 0x8ca4, "HP ZBook Fury", ALC245_FIXUP_CS35L41_SPI_2_HP_GPIO_LED),
SND_PCI_QUIRK(0x103c, 0x8ca7, "HP ZBook Fury", ALC245_FIXUP_CS35L41_SPI_2_HP_GPIO_LED),
SND_PCI_QUIRK(0x103c, 0x8cf5, "HP ZBook Studio 16", ALC245_FIXUP_CS35L41_SPI_4_HP_GPIO_LED),
@@ -10200,6 +10213,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = {
SND_PCI_QUIRK(0x1d72, 0x1945, "Redmi G", ALC256_FIXUP_ASUS_HEADSET_MIC),
SND_PCI_QUIRK(0x1d72, 0x1947, "RedmiBook Air", ALC255_FIXUP_XIAOMI_HEADSET_MIC),
SND_PCI_QUIRK(0x2782, 0x0232, "CHUWI CoreBook XPro", ALC269VB_FIXUP_CHUWI_COREBOOK_XPRO),
+ SND_PCI_QUIRK(0x2782, 0x1707, "Vaio VJFE-ADL", ALC298_FIXUP_SPK_VOLUME),
SND_PCI_QUIRK(0x8086, 0x2074, "Intel NUC 8", ALC233_FIXUP_INTEL_NUC8_DMIC),
SND_PCI_QUIRK(0x8086, 0x2080, "Intel NUC 8 Rugged", ALC256_FIXUP_INTEL_NUC8_RUGGED),
SND_PCI_QUIRK(0x8086, 0x2081, "Intel NUC 10", ALC256_FIXUP_INTEL_NUC10),
diff --git a/sound/pci/hda/tas2781_hda_i2c.c b/sound/pci/hda/tas2781_hda_i2c.c
index 731664fc8b21..26928d5ae5f7 100644
--- a/sound/pci/hda/tas2781_hda_i2c.c
+++ b/sound/pci/hda/tas2781_hda_i2c.c
@@ -627,7 +627,7 @@ static int tas2781_hda_bind(struct device *dev, struct device *master,

strscpy(comps->name, dev_name(dev), sizeof(comps->name));

- ret = tascodec_init(tas_hda->priv, codec, tasdev_fw_ready);
+ ret = tascodec_init(tas_hda->priv, codec, THIS_MODULE, tasdev_fw_ready);
if (!ret)
comps->playback_hook = tas2781_hda_playback_hook;

diff --git a/sound/soc/amd/yc/acp6x-mach.c b/sound/soc/amd/yc/acp6x-mach.c
index d83cb6e4c62a..80ad60d485ea 100644
--- a/sound/soc/amd/yc/acp6x-mach.c
+++ b/sound/soc/amd/yc/acp6x-mach.c
@@ -248,6 +248,13 @@ static const struct dmi_system_id yc_acp_quirk_table[] = {
DMI_MATCH(DMI_PRODUCT_NAME, "82YM"),
}
},
+ {
+ .driver_data = &acp6x_card,
+ .matches = {
+ DMI_MATCH(DMI_BOARD_VENDOR, "LENOVO"),
+ DMI_MATCH(DMI_PRODUCT_NAME, "83AS"),
+ }
+ },
{
.driver_data = &acp6x_card,
.matches = {
@@ -297,6 +304,13 @@ static const struct dmi_system_id yc_acp_quirk_table[] = {
DMI_MATCH(DMI_PRODUCT_NAME, "Bravo 15 B7ED"),
}
},
+ {
+ .driver_data = &acp6x_card,
+ .matches = {
+ DMI_MATCH(DMI_BOARD_VENDOR, "Micro-Star International Co., Ltd."),
+ DMI_MATCH(DMI_PRODUCT_NAME, "Bravo 15 C7VF"),
+ }
+ },
{
.driver_data = &acp6x_card,
.matches = {
diff --git a/sound/soc/codecs/rt5645.c b/sound/soc/codecs/rt5645.c
index edcb85bd8ea7..ea08b7cfc31d 100644
--- a/sound/soc/codecs/rt5645.c
+++ b/sound/soc/codecs/rt5645.c
@@ -3314,6 +3314,7 @@ static void rt5645_jack_detect_work(struct work_struct *work)
report, SND_JACK_HEADPHONE);
snd_soc_jack_report(rt5645->mic_jack,
report, SND_JACK_MICROPHONE);
+ mutex_unlock(&rt5645->jd_mutex);
return;
case 4:
val = snd_soc_component_read(rt5645->component, RT5645_A_JD_CTRL1) & 0x0020;
diff --git a/sound/soc/codecs/tas2781-comlib.c b/sound/soc/codecs/tas2781-comlib.c
index 00e35169ae49..add16302f711 100644
--- a/sound/soc/codecs/tas2781-comlib.c
+++ b/sound/soc/codecs/tas2781-comlib.c
@@ -267,6 +267,7 @@ void tas2781_reset(struct tasdevice_priv *tas_dev)
EXPORT_SYMBOL_GPL(tas2781_reset);

int tascodec_init(struct tasdevice_priv *tas_priv, void *codec,
+ struct module *module,
void (*cont)(const struct firmware *fw, void *context))
{
int ret = 0;
@@ -280,7 +281,7 @@ int tascodec_init(struct tasdevice_priv *tas_priv, void *codec,
tas_priv->dev_name, tas_priv->ndev);
crc8_populate_msb(tas_priv->crc8_lkp_tbl, TASDEVICE_CRC8_POLYNOMIAL);
tas_priv->codec = codec;
- ret = request_firmware_nowait(THIS_MODULE, FW_ACTION_UEVENT,
+ ret = request_firmware_nowait(module, FW_ACTION_UEVENT,
tas_priv->rca_binaryname, tas_priv->dev, GFP_KERNEL, tas_priv,
cont);
if (ret)
diff --git a/sound/soc/codecs/tas2781-i2c.c b/sound/soc/codecs/tas2781-i2c.c
index 917b1c15f71d..2f7f8b18c36f 100644
--- a/sound/soc/codecs/tas2781-i2c.c
+++ b/sound/soc/codecs/tas2781-i2c.c
@@ -564,7 +564,7 @@ static int tasdevice_codec_probe(struct snd_soc_component *codec)
{
struct tasdevice_priv *tas_priv = snd_soc_component_get_drvdata(codec);

- return tascodec_init(tas_priv, codec, tasdevice_fw_ready);
+ return tascodec_init(tas_priv, codec, THIS_MODULE, tasdevice_fw_ready);
}

static void tasdevice_deinit(void *context)
diff --git a/sound/soc/codecs/wcd938x.c b/sound/soc/codecs/wcd938x.c
index 23d06338f716..7df1719e0723 100644
--- a/sound/soc/codecs/wcd938x.c
+++ b/sound/soc/codecs/wcd938x.c
@@ -3589,7 +3589,7 @@ static int wcd938x_probe(struct platform_device *pdev)
ret = wcd938x_populate_dt_data(wcd938x, dev);
if (ret) {
dev_err(dev, "%s: Fail to obtain platform data\n", __func__);
- return -EINVAL;
+ return ret;
}

ret = wcd938x_add_slave_components(wcd938x, dev, &match);
diff --git a/sound/soc/sof/ipc3-topology.c b/sound/soc/sof/ipc3-topology.c
index 2c7a5e7a364c..d96555438c6b 100644
--- a/sound/soc/sof/ipc3-topology.c
+++ b/sound/soc/sof/ipc3-topology.c
@@ -2309,27 +2309,16 @@ static int sof_tear_down_left_over_pipelines(struct snd_sof_dev *sdev)
return 0;
}

-/*
- * For older firmware, this function doesn't free widgets for static pipelines during suspend.
- * It only resets use_count for all widgets.
- */
-static int sof_ipc3_tear_down_all_pipelines(struct snd_sof_dev *sdev, bool verify)
+static int sof_ipc3_free_widgets_in_list(struct snd_sof_dev *sdev, bool include_scheduler,
+ bool *dyn_widgets, bool verify)
{
struct sof_ipc_fw_version *v = &sdev->fw_ready.version;
struct snd_sof_widget *swidget;
- struct snd_sof_route *sroute;
- bool dyn_widgets = false;
int ret;

- /*
- * This function is called during suspend and for one-time topology verification during
- * first boot. In both cases, there is no need to protect swidget->use_count and
- * sroute->setup because during suspend all running streams are suspended and during
- * topology loading the sound card unavailable to open PCMs.
- */
list_for_each_entry(swidget, &sdev->widget_list, list) {
if (swidget->dynamic_pipeline_widget) {
- dyn_widgets = true;
+ *dyn_widgets = true;
continue;
}

@@ -2344,11 +2333,49 @@ static int sof_ipc3_tear_down_all_pipelines(struct snd_sof_dev *sdev, bool verif
continue;
}

+ if (include_scheduler && swidget->id != snd_soc_dapm_scheduler)
+ continue;
+
+ if (!include_scheduler && swidget->id == snd_soc_dapm_scheduler)
+ continue;
+
ret = sof_widget_free(sdev, swidget);
if (ret < 0)
return ret;
}

+ return 0;
+}
+
+/*
+ * For older firmware, this function doesn't free widgets for static pipelines during suspend.
+ * It only resets use_count for all widgets.
+ */
+static int sof_ipc3_tear_down_all_pipelines(struct snd_sof_dev *sdev, bool verify)
+{
+ struct sof_ipc_fw_version *v = &sdev->fw_ready.version;
+ struct snd_sof_widget *swidget;
+ struct snd_sof_route *sroute;
+ bool dyn_widgets = false;
+ int ret;
+
+ /*
+ * This function is called during suspend and for one-time topology verification during
+ * first boot. In both cases, there is no need to protect swidget->use_count and
+ * sroute->setup because during suspend all running streams are suspended and during
+ * topology loading the sound card unavailable to open PCMs. Do not free the scheduler
+ * widgets yet so that the secondary cores do not get powered down before all the widgets
+ * associated with the scheduler are freed.
+ */
+ ret = sof_ipc3_free_widgets_in_list(sdev, false, &dyn_widgets, verify);
+ if (ret < 0)
+ return ret;
+
+ /* free all the scheduler widgets now */
+ ret = sof_ipc3_free_widgets_in_list(sdev, true, &dyn_widgets, verify);
+ if (ret < 0)
+ return ret;
+
/*
* Tear down all pipelines associated with PCMs that did not get suspended
* and unset the prepare flag so that they can be set up again during resume.
diff --git a/sound/soc/sof/ipc3.c b/sound/soc/sof/ipc3.c
index fb40378ad084..c03dd513fbff 100644
--- a/sound/soc/sof/ipc3.c
+++ b/sound/soc/sof/ipc3.c
@@ -1067,7 +1067,7 @@ static void sof_ipc3_rx_msg(struct snd_sof_dev *sdev)
return;
}

- if (hdr.size < sizeof(hdr)) {
+ if (hdr.size < sizeof(hdr) || hdr.size > SOF_IPC_MSG_MAX_SIZE) {
dev_err(sdev->dev, "The received message size is invalid\n");
return;
}
diff --git a/tools/arch/x86/include/asm/rmwcc.h b/tools/arch/x86/include/asm/rmwcc.h
index 11ff975242ca..e2ff22b379a4 100644
--- a/tools/arch/x86/include/asm/rmwcc.h
+++ b/tools/arch/x86/include/asm/rmwcc.h
@@ -4,7 +4,7 @@

#define __GEN_RMWcc(fullop, var, cc, ...) \
do { \
- asm_volatile_goto (fullop "; j" cc " %l[cc_label]" \
+ asm goto (fullop "; j" cc " %l[cc_label]" \
: : "m" (var), ## __VA_ARGS__ \
: "memory" : cc_label); \
return 0; \
diff --git a/tools/include/linux/compiler_types.h b/tools/include/linux/compiler_types.h
index 1bdd834bdd57..d09f9dc172a4 100644
--- a/tools/include/linux/compiler_types.h
+++ b/tools/include/linux/compiler_types.h
@@ -36,8 +36,8 @@
#include <linux/compiler-gcc.h>
#endif

-#ifndef asm_volatile_goto
-#define asm_volatile_goto(x...) asm goto(x)
+#ifndef asm_goto_output
+#define asm_goto_output(x...) asm goto(x)
#endif

#endif /* __LINUX_COMPILER_TYPES_H */
diff --git a/tools/testing/selftests/kvm/dirty_log_test.c b/tools/testing/selftests/kvm/dirty_log_test.c
index 936f3a8d1b83..e96fababd3f0 100644
--- a/tools/testing/selftests/kvm/dirty_log_test.c
+++ b/tools/testing/selftests/kvm/dirty_log_test.c
@@ -376,7 +376,10 @@ static void dirty_ring_collect_dirty_pages(struct kvm_vcpu *vcpu, int slot,

cleared = kvm_vm_reset_dirty_ring(vcpu->vm);

- /* Cleared pages should be the same as collected */
+ /*
+ * Cleared pages should be the same as collected, as KVM is supposed to
+ * clear only the entries that have been harvested.
+ */
TEST_ASSERT(cleared == count, "Reset dirty pages (%u) mismatch "
"with collected (%u)", cleared, count);

@@ -415,12 +418,6 @@ static void dirty_ring_after_vcpu_run(struct kvm_vcpu *vcpu, int ret, int err)
}
}

-static void dirty_ring_before_vcpu_join(void)
-{
- /* Kick another round of vcpu just to make sure it will quit */
- sem_post(&sem_vcpu_cont);
-}
-
struct log_mode {
const char *name;
/* Return true if this mode is supported, otherwise false */
@@ -433,7 +430,6 @@ struct log_mode {
uint32_t *ring_buf_idx);
/* Hook to call when after each vcpu run */
void (*after_vcpu_run)(struct kvm_vcpu *vcpu, int ret, int err);
- void (*before_vcpu_join) (void);
} log_modes[LOG_MODE_NUM] = {
{
.name = "dirty-log",
@@ -452,7 +448,6 @@ struct log_mode {
.supported = dirty_ring_supported,
.create_vm_done = dirty_ring_create_vm_done,
.collect_dirty_pages = dirty_ring_collect_dirty_pages,
- .before_vcpu_join = dirty_ring_before_vcpu_join,
.after_vcpu_run = dirty_ring_after_vcpu_run,
},
};
@@ -513,14 +508,6 @@ static void log_mode_after_vcpu_run(struct kvm_vcpu *vcpu, int ret, int err)
mode->after_vcpu_run(vcpu, ret, err);
}

-static void log_mode_before_vcpu_join(void)
-{
- struct log_mode *mode = &log_modes[host_log_mode];
-
- if (mode->before_vcpu_join)
- mode->before_vcpu_join();
-}
-
static void generate_random_array(uint64_t *guest_array, uint64_t size)
{
uint64_t i;
@@ -719,6 +706,7 @@ static void run_test(enum vm_guest_mode mode, void *arg)
struct kvm_vm *vm;
unsigned long *bmap;
uint32_t ring_buf_idx = 0;
+ int sem_val;

if (!log_mode_supported()) {
print_skip("Log mode '%s' not supported",
@@ -788,12 +776,22 @@ static void run_test(enum vm_guest_mode mode, void *arg)
/* Start the iterations */
iteration = 1;
sync_global_to_guest(vm, iteration);
- host_quit = false;
+ WRITE_ONCE(host_quit, false);
host_dirty_count = 0;
host_clear_count = 0;
host_track_next_count = 0;
WRITE_ONCE(dirty_ring_vcpu_ring_full, false);

+ /*
+ * Ensure the previous iteration didn't leave a dangling semaphore, i.e.
+ * that the main task and vCPU worker were synchronized and completed
+ * verification of all iterations.
+ */
+ sem_getvalue(&sem_vcpu_stop, &sem_val);
+ TEST_ASSERT_EQ(sem_val, 0);
+ sem_getvalue(&sem_vcpu_cont, &sem_val);
+ TEST_ASSERT_EQ(sem_val, 0);
+
pthread_create(&vcpu_thread, NULL, vcpu_worker, vcpu);

while (iteration < p->iterations) {
@@ -819,15 +817,21 @@ static void run_test(enum vm_guest_mode mode, void *arg)
assert(host_log_mode == LOG_MODE_DIRTY_RING ||
atomic_read(&vcpu_sync_stop_requested) == false);
vm_dirty_log_verify(mode, bmap);
- sem_post(&sem_vcpu_cont);

- iteration++;
+ /*
+ * Set host_quit before sem_vcpu_cont in the final iteration to
+ * ensure that the vCPU worker doesn't resume the guest. As
+ * above, the dirty ring test may stop and wait even when not
+ * explicitly request to do so, i.e. would hang waiting for a
+ * "continue" if it's allowed to resume the guest.
+ */
+ if (++iteration == p->iterations)
+ WRITE_ONCE(host_quit, true);
+
+ sem_post(&sem_vcpu_cont);
sync_global_to_guest(vm, iteration);
}

- /* Tell the vcpu thread to quit */
- host_quit = true;
- log_mode_before_vcpu_join();
pthread_join(vcpu_thread, NULL);

pr_info("Total bits checked: dirty (%"PRIu64"), clear (%"PRIu64"), "
diff --git a/tools/testing/selftests/kvm/x86_64/amx_test.c b/tools/testing/selftests/kvm/x86_64/amx_test.c
index 11329e5ff945..309ee5c72b46 100644
--- a/tools/testing/selftests/kvm/x86_64/amx_test.c
+++ b/tools/testing/selftests/kvm/x86_64/amx_test.c
@@ -221,7 +221,7 @@ int main(int argc, char *argv[])
vm_vaddr_t amx_cfg, tiledata, xstate;
struct ucall uc;
u32 amx_offset;
- int stage, ret;
+ int ret;

/*
* Note, all off-by-default features must be enabled before anything
@@ -263,7 +263,7 @@ int main(int argc, char *argv[])
memset(addr_gva2hva(vm, xstate), 0, PAGE_SIZE * DIV_ROUND_UP(XSAVE_SIZE, PAGE_SIZE));
vcpu_args_set(vcpu, 3, amx_cfg, tiledata, xstate);

- for (stage = 1; ; stage++) {
+ for (;;) {
vcpu_run(vcpu);
TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);

diff --git a/tools/testing/selftests/kvm/x86_64/hyperv_features.c b/tools/testing/selftests/kvm/x86_64/hyperv_features.c
index 9f28aa276c4e..a726831b8024 100644
--- a/tools/testing/selftests/kvm/x86_64/hyperv_features.c
+++ b/tools/testing/selftests/kvm/x86_64/hyperv_features.c
@@ -454,7 +454,7 @@ static void guest_test_msrs_access(void)
case 44:
/* MSR is not available when CPUID feature bit is unset */
if (!has_invtsc)
- continue;
+ goto next_stage;
msr->idx = HV_X64_MSR_TSC_INVARIANT_CONTROL;
msr->write = false;
msr->fault_expected = true;
@@ -462,7 +462,7 @@ static void guest_test_msrs_access(void)
case 45:
/* MSR is vailable when CPUID feature bit is set */
if (!has_invtsc)
- continue;
+ goto next_stage;
vcpu_set_cpuid_feature(vcpu, HV_ACCESS_TSC_INVARIANT);
msr->idx = HV_X64_MSR_TSC_INVARIANT_CONTROL;
msr->write = false;
@@ -471,7 +471,7 @@ static void guest_test_msrs_access(void)
case 46:
/* Writing bits other than 0 is forbidden */
if (!has_invtsc)
- continue;
+ goto next_stage;
msr->idx = HV_X64_MSR_TSC_INVARIANT_CONTROL;
msr->write = true;
msr->write_val = 0xdeadbeef;
@@ -480,7 +480,7 @@ static void guest_test_msrs_access(void)
case 47:
/* Setting bit 0 enables the feature */
if (!has_invtsc)
- continue;
+ goto next_stage;
msr->idx = HV_X64_MSR_TSC_INVARIANT_CONTROL;
msr->write = true;
msr->write_val = 1;
@@ -513,6 +513,7 @@ static void guest_test_msrs_access(void)
return;
}

+next_stage:
stage++;
kvm_vm_free(vm);
}
diff --git a/tools/testing/selftests/landlock/fs_test.c b/tools/testing/selftests/landlock/fs_test.c
index 251594306d40..720bafa0f87b 100644
--- a/tools/testing/selftests/landlock/fs_test.c
+++ b/tools/testing/selftests/landlock/fs_test.c
@@ -241,9 +241,11 @@ struct mnt_opt {
const char *const data;
};

-const struct mnt_opt mnt_tmp = {
+#define MNT_TMP_DATA "size=4m,mode=700"
+
+static const struct mnt_opt mnt_tmp = {
.type = "tmpfs",
- .data = "size=4m,mode=700",
+ .data = MNT_TMP_DATA,
};

static int mount_opt(const struct mnt_opt *const mnt, const char *const target)
@@ -4523,7 +4525,10 @@ FIXTURE_VARIANT(layout3_fs)
/* clang-format off */
FIXTURE_VARIANT_ADD(layout3_fs, tmpfs) {
/* clang-format on */
- .mnt = mnt_tmp,
+ .mnt = {
+ .type = "tmpfs",
+ .data = MNT_TMP_DATA,
+ },
.file_path = file1_s1d1,
};

diff --git a/tools/testing/selftests/mm/charge_reserved_hugetlb.sh b/tools/testing/selftests/mm/charge_reserved_hugetlb.sh
index 0899019a7fcb..e14bdd4455f2 100755
--- a/tools/testing/selftests/mm/charge_reserved_hugetlb.sh
+++ b/tools/testing/selftests/mm/charge_reserved_hugetlb.sh
@@ -1,4 +1,4 @@
-#!/bin/sh
+#!/bin/bash
# SPDX-License-Identifier: GPL-2.0

# Kselftest framework requirement - SKIP code is 4.
diff --git a/tools/testing/selftests/mm/ksm_tests.c b/tools/testing/selftests/mm/ksm_tests.c
index 380b691d3eb9..b748c48908d9 100644
--- a/tools/testing/selftests/mm/ksm_tests.c
+++ b/tools/testing/selftests/mm/ksm_tests.c
@@ -566,7 +566,7 @@ static int ksm_merge_hugepages_time(int merge_type, int mapping, int prot,
if (map_ptr_orig == MAP_FAILED)
err(2, "initial mmap");

- if (madvise(map_ptr, len + HPAGE_SIZE, MADV_HUGEPAGE))
+ if (madvise(map_ptr, len, MADV_HUGEPAGE))
err(2, "MADV_HUGEPAGE");

pagemap_fd = open("/proc/self/pagemap", O_RDONLY);
diff --git a/tools/testing/selftests/mm/map_hugetlb.c b/tools/testing/selftests/mm/map_hugetlb.c
index 193281560b61..86e8f2048a40 100644
--- a/tools/testing/selftests/mm/map_hugetlb.c
+++ b/tools/testing/selftests/mm/map_hugetlb.c
@@ -15,6 +15,7 @@
#include <unistd.h>
#include <sys/mman.h>
#include <fcntl.h>
+#include "vm_util.h"

#define LENGTH (256UL*1024*1024)
#define PROTECTION (PROT_READ | PROT_WRITE)
@@ -58,10 +59,16 @@ int main(int argc, char **argv)
{
void *addr;
int ret;
+ size_t hugepage_size;
size_t length = LENGTH;
int flags = FLAGS;
int shift = 0;

+ hugepage_size = default_huge_page_size();
+ /* munmap with fail if the length is not page aligned */
+ if (hugepage_size > length)
+ length = hugepage_size;
+
if (argc > 1)
length = atol(argv[1]) << 20;
if (argc > 2) {
diff --git a/tools/testing/selftests/mm/va_high_addr_switch.sh b/tools/testing/selftests/mm/va_high_addr_switch.sh
index 45cae7cab27e..a0a75f302904 100755
--- a/tools/testing/selftests/mm/va_high_addr_switch.sh
+++ b/tools/testing/selftests/mm/va_high_addr_switch.sh
@@ -29,9 +29,15 @@ check_supported_x86_64()
# See man 1 gzip under '-f'.
local pg_table_levels=$(gzip -dcfq "${config}" | grep PGTABLE_LEVELS | cut -d'=' -f 2)

+ local cpu_supports_pl5=$(awk '/^flags/ {if (/la57/) {print 0;}
+ else {print 1}; exit}' /proc/cpuinfo 2>/dev/null)
+
if [[ "${pg_table_levels}" -lt 5 ]]; then
echo "$0: PGTABLE_LEVELS=${pg_table_levels}, must be >= 5 to run this test"
exit $ksft_skip
+ elif [[ "${cpu_supports_pl5}" -ne 0 ]]; then
+ echo "$0: CPU does not have the necessary la57 flag to support page table level 5"
+ exit $ksft_skip
fi
}

diff --git a/tools/testing/selftests/mm/write_hugetlb_memory.sh b/tools/testing/selftests/mm/write_hugetlb_memory.sh
index 70a02301f4c2..3d2d2eb9d6ff 100755
--- a/tools/testing/selftests/mm/write_hugetlb_memory.sh
+++ b/tools/testing/selftests/mm/write_hugetlb_memory.sh
@@ -1,4 +1,4 @@
-#!/bin/sh
+#!/bin/bash
# SPDX-License-Identifier: GPL-2.0

set -e
diff --git a/tools/testing/selftests/net/forwarding/bridge_locked_port.sh b/tools/testing/selftests/net/forwarding/bridge_locked_port.sh
index 9af9f6964808..c62331b2e006 100755
--- a/tools/testing/selftests/net/forwarding/bridge_locked_port.sh
+++ b/tools/testing/selftests/net/forwarding/bridge_locked_port.sh
@@ -327,10 +327,10 @@ locked_port_mab_redirect()
RET=0
check_port_mab_support || return 0

- bridge link set dev $swp1 learning on locked on mab on
tc qdisc add dev $swp1 clsact
tc filter add dev $swp1 ingress protocol all pref 1 handle 101 flower \
action mirred egress redirect dev $swp2
+ bridge link set dev $swp1 learning on locked on mab on

ping_do $h1 192.0.2.2
check_err $? "Ping did not work with redirection"
@@ -349,8 +349,8 @@ locked_port_mab_redirect()
check_err $? "Locked entry not created after deleting filter"

bridge fdb del `mac_get $h1` vlan 1 dev $swp1 master
- tc qdisc del dev $swp1 clsact
bridge link set dev $swp1 learning off locked off mab off
+ tc qdisc del dev $swp1 clsact

log_test "Locked port MAB redirect"
}
diff --git a/tools/testing/selftests/net/forwarding/bridge_mdb.sh b/tools/testing/selftests/net/forwarding/bridge_mdb.sh
index d0c6c499d5da..a3678dfe5848 100755
--- a/tools/testing/selftests/net/forwarding/bridge_mdb.sh
+++ b/tools/testing/selftests/net/forwarding/bridge_mdb.sh
@@ -145,14 +145,14 @@ cfg_test_host_common()

# Check basic add, replace and delete behavior.
bridge mdb add dev br0 port br0 grp $grp $state vid 10
- bridge mdb show dev br0 vid 10 | grep -q "$grp"
+ bridge mdb get dev br0 grp $grp vid 10 &> /dev/null
check_err $? "Failed to add $name host entry"

bridge mdb replace dev br0 port br0 grp $grp $state vid 10 &> /dev/null
check_fail $? "Managed to replace $name host entry"

bridge mdb del dev br0 port br0 grp $grp $state vid 10
- bridge mdb show dev br0 vid 10 | grep -q "$grp"
+ bridge mdb get dev br0 grp $grp vid 10 &> /dev/null
check_fail $? "Failed to delete $name host entry"

# Check error cases.
@@ -200,7 +200,7 @@ cfg_test_port_common()

# Check basic add, replace and delete behavior.
bridge mdb add dev br0 port $swp1 $grp_key permanent vid 10
- bridge mdb show dev br0 vid 10 | grep -q "$grp_key"
+ bridge mdb get dev br0 $grp_key vid 10 &> /dev/null
check_err $? "Failed to add $name entry"

bridge mdb replace dev br0 port $swp1 $grp_key permanent vid 10 \
@@ -208,31 +208,31 @@ cfg_test_port_common()
check_err $? "Failed to replace $name entry"

bridge mdb del dev br0 port $swp1 $grp_key permanent vid 10
- bridge mdb show dev br0 vid 10 | grep -q "$grp_key"
+ bridge mdb get dev br0 $grp_key vid 10 &> /dev/null
check_fail $? "Failed to delete $name entry"

# Check default protocol and replacement.
bridge mdb add dev br0 port $swp1 $grp_key permanent vid 10
- bridge -d mdb show dev br0 vid 10 | grep "$grp_key" | grep -q "static"
+ bridge -d mdb get dev br0 $grp_key vid 10 | grep -q "static"
check_err $? "$name entry not added with default \"static\" protocol"

bridge mdb replace dev br0 port $swp1 $grp_key permanent vid 10 \
proto 123
- bridge -d mdb show dev br0 vid 10 | grep "$grp_key" | grep -q "123"
+ bridge -d mdb get dev br0 $grp_key vid 10 | grep -q "123"
check_err $? "Failed to replace protocol of $name entry"
bridge mdb del dev br0 port $swp1 $grp_key permanent vid 10

# Check behavior when VLAN is not specified.
bridge mdb add dev br0 port $swp1 $grp_key permanent
- bridge mdb show dev br0 vid 10 | grep -q "$grp_key"
+ bridge mdb get dev br0 $grp_key vid 10 &> /dev/null
check_err $? "$name entry with VLAN 10 not added when VLAN was not specified"
- bridge mdb show dev br0 vid 20 | grep -q "$grp_key"
+ bridge mdb get dev br0 $grp_key vid 20 &> /dev/null
check_err $? "$name entry with VLAN 20 not added when VLAN was not specified"

bridge mdb del dev br0 port $swp1 $grp_key permanent
- bridge mdb show dev br0 vid 10 | grep -q "$grp_key"
+ bridge mdb get dev br0 $grp_key vid 10 &> /dev/null
check_fail $? "$name entry with VLAN 10 not deleted when VLAN was not specified"
- bridge mdb show dev br0 vid 20 | grep -q "$grp_key"
+ bridge mdb get dev br0 $grp_key vid 20 &> /dev/null
check_fail $? "$name entry with VLAN 20 not deleted when VLAN was not specified"

# Check behavior when bridge port is down.
@@ -298,21 +298,21 @@ __cfg_test_port_ip_star_g()
RET=0

bridge mdb add dev br0 port $swp1 grp $grp vid 10
- bridge -d mdb show dev br0 vid 10 | grep "$grp" | grep -q "exclude"
+ bridge -d mdb get dev br0 grp $grp vid 10 | grep -q "exclude"
check_err $? "Default filter mode is not \"exclude\""
bridge mdb del dev br0 port $swp1 grp $grp vid 10

# Check basic add and delete behavior.
bridge mdb add dev br0 port $swp1 grp $grp vid 10 filter_mode exclude \
source_list $src1
- bridge -d mdb show dev br0 vid 10 | grep "$grp" | grep -q -v "src"
+ bridge -d mdb get dev br0 grp $grp vid 10 &> /dev/null
check_err $? "(*, G) entry not created"
- bridge -d mdb show dev br0 vid 10 | grep "$grp" | grep -q "src $src1"
+ bridge -d mdb get dev br0 grp $grp src $src1 vid 10 &> /dev/null
check_err $? "(S, G) entry not created"
bridge mdb del dev br0 port $swp1 grp $grp vid 10
- bridge -d mdb show dev br0 vid 10 | grep "$grp" | grep -q -v "src"
+ bridge -d mdb get dev br0 grp $grp vid 10 &> /dev/null
check_fail $? "(*, G) entry not deleted"
- bridge -d mdb show dev br0 vid 10 | grep "$grp" | grep -q "src $src1"
+ bridge -d mdb get dev br0 grp $grp src $src1 vid 10 &> /dev/null
check_fail $? "(S, G) entry not deleted"

## State (permanent / temp) tests.
@@ -321,18 +321,15 @@ __cfg_test_port_ip_star_g()
bridge mdb add dev br0 port $swp1 grp $grp permanent vid 10 \
filter_mode exclude source_list $src1

- bridge -d mdb show dev br0 vid 10 | grep "$grp" | grep -v "src" | \
- grep -q "permanent"
+ bridge -d mdb get dev br0 grp $grp vid 10 | grep -q "permanent"
check_err $? "(*, G) entry not added as \"permanent\" when should"
- bridge -d mdb show dev br0 vid 10 | grep "$grp" | grep "src" | \
+ bridge -d mdb get dev br0 grp $grp src $src1 vid 10 | \
grep -q "permanent"
check_err $? "(S, G) entry not added as \"permanent\" when should"

- bridge -d -s mdb show dev br0 vid 10 | grep "$grp" | grep -v "src" | \
- grep -q " 0.00"
+ bridge -d -s mdb get dev br0 grp $grp vid 10 | grep -q " 0.00"
check_err $? "(*, G) \"permanent\" entry has a pending group timer"
- bridge -d -s mdb show dev br0 vid 10 | grep "$grp" | grep -v "src" | \
- grep -q "\/0.00"
+ bridge -d -s mdb get dev br0 grp $grp vid 10 | grep -q "/0.00"
check_err $? "\"permanent\" source entry has a pending source timer"

bridge mdb del dev br0 port $swp1 grp $grp vid 10
@@ -342,18 +339,14 @@ __cfg_test_port_ip_star_g()
bridge mdb add dev br0 port $swp1 grp $grp temp vid 10 \
filter_mode exclude source_list $src1

- bridge -d mdb show dev br0 vid 10 | grep "$grp" | grep -v "src" | \
- grep -q "temp"
+ bridge -d mdb get dev br0 grp $grp vid 10 | grep -q "temp"
check_err $? "(*, G) EXCLUDE entry not added as \"temp\" when should"
- bridge -d mdb show dev br0 vid 10 | grep "$grp" | grep "src" | \
- grep -q "temp"
+ bridge -d mdb get dev br0 grp $grp src $src1 vid 10 | grep -q "temp"
check_err $? "(S, G) \"blocked\" entry not added as \"temp\" when should"

- bridge -d -s mdb show dev br0 vid 10 | grep "$grp" | grep -v "src" | \
- grep -q " 0.00"
+ bridge -d -s mdb get dev br0 grp $grp vid 10 | grep -q " 0.00"
check_fail $? "(*, G) EXCLUDE entry does not have a pending group timer"
- bridge -d -s mdb show dev br0 vid 10 | grep "$grp" | grep -v "src" | \
- grep -q "\/0.00"
+ bridge -d -s mdb get dev br0 grp $grp vid 10 | grep -q "/0.00"
check_err $? "\"blocked\" source entry has a pending source timer"

bridge mdb del dev br0 port $swp1 grp $grp vid 10
@@ -363,18 +356,14 @@ __cfg_test_port_ip_star_g()
bridge mdb add dev br0 port $swp1 grp $grp temp vid 10 \
filter_mode include source_list $src1

- bridge -d mdb show dev br0 vid 10 | grep "$grp" | grep -v "src" | \
- grep -q "temp"
+ bridge -d mdb get dev br0 grp $grp vid 10 | grep -q "temp"
check_err $? "(*, G) INCLUDE entry not added as \"temp\" when should"
- bridge -d mdb show dev br0 vid 10 | grep "$grp" | grep "src" | \
- grep -q "temp"
+ bridge -d mdb get dev br0 grp $grp src $src1 vid 10 | grep -q "temp"
check_err $? "(S, G) entry not added as \"temp\" when should"

- bridge -d -s mdb show dev br0 vid 10 | grep "$grp" | grep -v "src" | \
- grep -q " 0.00"
+ bridge -d -s mdb get dev br0 grp $grp vid 10 | grep -q " 0.00"
check_err $? "(*, G) INCLUDE entry has a pending group timer"
- bridge -d -s mdb show dev br0 vid 10 | grep "$grp" | grep -v "src" | \
- grep -q "\/0.00"
+ bridge -d -s mdb get dev br0 grp $grp vid 10 | grep -q "/0.00"
check_fail $? "Source entry does not have a pending source timer"

bridge mdb del dev br0 port $swp1 grp $grp vid 10
@@ -383,8 +372,7 @@ __cfg_test_port_ip_star_g()
bridge mdb add dev br0 port $swp1 grp $grp temp vid 10 \
filter_mode include source_list $src1

- bridge -d -s mdb show dev br0 vid 10 | grep "$grp" | grep "src" | \
- grep -q " 0.00"
+ bridge -d -s mdb get dev br0 grp $grp src $src1 vid 10 | grep -q " 0.00"
check_err $? "(S, G) entry has a pending group timer"

bridge mdb del dev br0 port $swp1 grp $grp vid 10
@@ -396,11 +384,9 @@ __cfg_test_port_ip_star_g()
bridge mdb add dev br0 port $swp1 grp $grp vid 10 \
filter_mode include source_list $src1

- bridge -d mdb show dev br0 vid 10 | grep "$grp" | grep -v "src" | \
- grep -q "include"
+ bridge -d mdb get dev br0 grp $grp vid 10 | grep -q "include"
check_err $? "(*, G) INCLUDE not added with \"include\" filter mode"
- bridge -d mdb show dev br0 vid 10 | grep "$grp" | grep "src" | \
- grep -q "blocked"
+ bridge -d mdb get dev br0 grp $grp src $src1 vid 10 | grep -q "blocked"
check_fail $? "(S, G) entry marked as \"blocked\" when should not"

bridge mdb del dev br0 port $swp1 grp $grp vid 10
@@ -410,11 +396,9 @@ __cfg_test_port_ip_star_g()
bridge mdb add dev br0 port $swp1 grp $grp vid 10 \
filter_mode exclude source_list $src1

- bridge -d mdb show dev br0 vid 10 | grep "$grp" | grep -v "src" | \
- grep -q "exclude"
+ bridge -d mdb get dev br0 grp $grp vid 10 | grep -q "exclude"
check_err $? "(*, G) EXCLUDE not added with \"exclude\" filter mode"
- bridge -d mdb show dev br0 vid 10 | grep "$grp" | grep "src" | \
- grep -q "blocked"
+ bridge -d mdb get dev br0 grp $grp src $src1 vid 10 | grep -q "blocked"
check_err $? "(S, G) entry not marked as \"blocked\" when should"

bridge mdb del dev br0 port $swp1 grp $grp vid 10
@@ -426,11 +410,9 @@ __cfg_test_port_ip_star_g()
bridge mdb add dev br0 port $swp1 grp $grp vid 10 \
filter_mode exclude source_list $src1 proto zebra

- bridge -d mdb show dev br0 vid 10 | grep "$grp" | grep -v "src" | \
- grep -q "zebra"
+ bridge -d mdb get dev br0 grp $grp vid 10 | grep -q "zebra"
check_err $? "(*, G) entry not added with \"zebra\" protocol"
- bridge -d mdb show dev br0 vid 10 | grep "$grp" | grep "src" | \
- grep -q "zebra"
+ bridge -d mdb get dev br0 grp $grp src $src1 vid 10 | grep -q "zebra"
check_err $? "(S, G) entry not marked added with \"zebra\" protocol"

bridge mdb del dev br0 port $swp1 grp $grp vid 10
@@ -443,20 +425,16 @@ __cfg_test_port_ip_star_g()

bridge mdb replace dev br0 port $swp1 grp $grp permanent vid 10 \
filter_mode exclude source_list $src1
- bridge -d mdb show dev br0 vid 10 | grep "$grp" | grep -v "src" | \
- grep -q "permanent"
+ bridge -d mdb get dev br0 grp $grp vid 10 | grep -q "permanent"
check_err $? "(*, G) entry not marked as \"permanent\" after replace"
- bridge -d mdb show dev br0 vid 10 | grep "$grp" | grep "src" | \
- grep -q "permanent"
+ bridge -d mdb get dev br0 grp $grp src $src1 vid 10 | grep -q "permanent"
check_err $? "(S, G) entry not marked as \"permanent\" after replace"

bridge mdb replace dev br0 port $swp1 grp $grp temp vid 10 \
filter_mode exclude source_list $src1
- bridge -d mdb show dev br0 vid 10 | grep "$grp" | grep -v "src" | \
- grep -q "temp"
+ bridge -d mdb get dev br0 grp $grp vid 10 | grep -q "temp"
check_err $? "(*, G) entry not marked as \"temp\" after replace"
- bridge -d mdb show dev br0 vid 10 | grep "$grp" | grep "src" | \
- grep -q "temp"
+ bridge -d mdb get dev br0 grp $grp src $src1 vid 10 | grep -q "temp"
check_err $? "(S, G) entry not marked as \"temp\" after replace"

bridge mdb del dev br0 port $swp1 grp $grp vid 10
@@ -467,20 +445,16 @@ __cfg_test_port_ip_star_g()

bridge mdb replace dev br0 port $swp1 grp $grp temp vid 10 \
filter_mode include source_list $src1
- bridge -d mdb show dev br0 vid 10 | grep "$grp" | grep -v "src" | \
- grep -q "include"
+ bridge -d mdb get dev br0 grp $grp vid 10 | grep -q "include"
check_err $? "(*, G) not marked with \"include\" filter mode after replace"
- bridge -d mdb show dev br0 vid 10 | grep "$grp" | grep "src" | \
- grep -q "blocked"
+ bridge -d mdb get dev br0 grp $grp src $src1 vid 10 | grep -q "blocked"
check_fail $? "(S, G) marked as \"blocked\" after replace"

bridge mdb replace dev br0 port $swp1 grp $grp temp vid 10 \
filter_mode exclude source_list $src1
- bridge -d mdb show dev br0 vid 10 | grep "$grp" | grep -v "src" | \
- grep -q "exclude"
+ bridge -d mdb get dev br0 grp $grp vid 10 | grep -q "exclude"
check_err $? "(*, G) not marked with \"exclude\" filter mode after replace"
- bridge -d mdb show dev br0 vid 10 | grep "$grp" | grep "src" | \
- grep -q "blocked"
+ bridge -d mdb get dev br0 grp $grp src $src1 vid 10 | grep -q "blocked"
check_err $? "(S, G) not marked as \"blocked\" after replace"

bridge mdb del dev br0 port $swp1 grp $grp vid 10
@@ -491,20 +465,20 @@ __cfg_test_port_ip_star_g()

bridge mdb replace dev br0 port $swp1 grp $grp temp vid 10 \
filter_mode exclude source_list $src1,$src2,$src3
- bridge -d mdb show dev br0 vid 10 | grep "$grp" | grep -q "src $src1"
+ bridge -d mdb get dev br0 grp $grp src $src1 vid 10 &> /dev/null
check_err $? "(S, G) entry for source $src1 not created after replace"
- bridge -d mdb show dev br0 vid 10 | grep "$grp" | grep -q "src $src2"
+ bridge -d mdb get dev br0 grp $grp src $src2 vid 10 &> /dev/null
check_err $? "(S, G) entry for source $src2 not created after replace"
- bridge -d mdb show dev br0 vid 10 | grep "$grp" | grep -q "src $src3"
+ bridge -d mdb get dev br0 grp $grp src $src3 vid 10 &> /dev/null
check_err $? "(S, G) entry for source $src3 not created after replace"

bridge mdb replace dev br0 port $swp1 grp $grp temp vid 10 \
filter_mode exclude source_list $src1,$src3
- bridge -d mdb show dev br0 vid 10 | grep "$grp" | grep -q "src $src1"
+ bridge -d mdb get dev br0 grp $grp src $src1 vid 10 &> /dev/null
check_err $? "(S, G) entry for source $src1 not created after second replace"
- bridge -d mdb show dev br0 vid 10 | grep "$grp" | grep -q "src $src2"
+ bridge -d mdb get dev br0 grp $grp src $src2 vid 10 &> /dev/null
check_fail $? "(S, G) entry for source $src2 created after second replace"
- bridge -d mdb show dev br0 vid 10 | grep "$grp" | grep -q "src $src3"
+ bridge -d mdb get dev br0 grp $grp src $src3 vid 10 &> /dev/null
check_err $? "(S, G) entry for source $src3 not created after second replace"

bridge mdb del dev br0 port $swp1 grp $grp vid 10
@@ -515,11 +489,9 @@ __cfg_test_port_ip_star_g()

bridge mdb replace dev br0 port $swp1 grp $grp temp vid 10 \
filter_mode exclude source_list $src1 proto bgp
- bridge -d mdb show dev br0 vid 10 | grep "$grp" | grep -v "src" | \
- grep -q "bgp"
+ bridge -d mdb get dev br0 grp $grp vid 10 | grep -q "bgp"
check_err $? "(*, G) protocol not changed to \"bgp\" after replace"
- bridge -d mdb show dev br0 vid 10 | grep "$grp" | grep "src" | \
- grep -q "bgp"
+ bridge -d mdb get dev br0 grp $grp src $src1 vid 10 | grep -q "bgp"
check_err $? "(S, G) protocol not changed to \"bgp\" after replace"

bridge mdb del dev br0 port $swp1 grp $grp vid 10
@@ -532,8 +504,8 @@ __cfg_test_port_ip_star_g()
bridge mdb add dev br0 port $swp2 grp $grp vid 10 \
filter_mode include source_list $src1
bridge mdb add dev br0 port $swp1 grp $grp vid 10
- bridge -d mdb show dev br0 vid 10 | grep "$swp1" | grep "$grp" | \
- grep "$src1" | grep -q "added_by_star_ex"
+ bridge -d mdb get dev br0 grp $grp src $src1 vid 10 | grep "$swp1" | \
+ grep -q "added_by_star_ex"
check_err $? "\"added_by_star_ex\" entry not created after adding (*, G) entry"
bridge mdb del dev br0 port $swp1 grp $grp vid 10
bridge mdb del dev br0 port $swp2 grp $grp src $src1 vid 10
@@ -606,27 +578,23 @@ __cfg_test_port_ip_sg()
RET=0

bridge mdb add dev br0 port $swp1 $grp_key vid 10
- bridge -d mdb show dev br0 vid 10 | grep "$grp_key" | grep -q "include"
+ bridge -d mdb get dev br0 $grp_key vid 10 | grep -q "include"
check_err $? "Default filter mode is not \"include\""
bridge mdb del dev br0 port $swp1 $grp_key vid 10

# Check that entries can be added as both permanent and temp and that
# group timer is set correctly.
bridge mdb add dev br0 port $swp1 $grp_key permanent vid 10
- bridge -d mdb show dev br0 vid 10 | grep "$grp_key" | \
- grep -q "permanent"
+ bridge -d mdb get dev br0 $grp_key vid 10 | grep -q "permanent"
check_err $? "Entry not added as \"permanent\" when should"
- bridge -d -s mdb show dev br0 vid 10 | grep "$grp_key" | \
- grep -q " 0.00"
+ bridge -d -s mdb get dev br0 $grp_key vid 10 | grep -q " 0.00"
check_err $? "\"permanent\" entry has a pending group timer"
bridge mdb del dev br0 port $swp1 $grp_key vid 10

bridge mdb add dev br0 port $swp1 $grp_key temp vid 10
- bridge -d mdb show dev br0 vid 10 | grep "$grp_key" | \
- grep -q "temp"
+ bridge -d mdb get dev br0 $grp_key vid 10 | grep -q "temp"
check_err $? "Entry not added as \"temp\" when should"
- bridge -d -s mdb show dev br0 vid 10 | grep "$grp_key" | \
- grep -q " 0.00"
+ bridge -d -s mdb get dev br0 $grp_key vid 10 | grep -q " 0.00"
check_fail $? "\"temp\" entry has an unpending group timer"
bridge mdb del dev br0 port $swp1 $grp_key vid 10

@@ -650,24 +618,19 @@ __cfg_test_port_ip_sg()
# Check that we can replace available attributes.
bridge mdb add dev br0 port $swp1 $grp_key vid 10 proto 123
bridge mdb replace dev br0 port $swp1 $grp_key vid 10 proto 111
- bridge -d mdb show dev br0 vid 10 | grep "$grp_key" | \
- grep -q "111"
+ bridge -d mdb get dev br0 $grp_key vid 10 | grep -q "111"
check_err $? "Failed to replace protocol"

bridge mdb replace dev br0 port $swp1 $grp_key vid 10 permanent
- bridge -d mdb show dev br0 vid 10 | grep "$grp_key" | \
- grep -q "permanent"
+ bridge -d mdb get dev br0 $grp_key vid 10 | grep -q "permanent"
check_err $? "Entry not marked as \"permanent\" after replace"
- bridge -d -s mdb show dev br0 vid 10 | grep "$grp_key" | \
- grep -q " 0.00"
+ bridge -d -s mdb get dev br0 $grp_key vid 10 | grep -q " 0.00"
check_err $? "Entry has a pending group timer after replace"

bridge mdb replace dev br0 port $swp1 $grp_key vid 10 temp
- bridge -d mdb show dev br0 vid 10 | grep "$grp_key" | \
- grep -q "temp"
+ bridge -d mdb get dev br0 $grp_key vid 10 | grep -q "temp"
check_err $? "Entry not marked as \"temp\" after replace"
- bridge -d -s mdb show dev br0 vid 10 | grep "$grp_key" | \
- grep -q " 0.00"
+ bridge -d -s mdb get dev br0 $grp_key vid 10 | grep -q " 0.00"
check_fail $? "Entry has an unpending group timer after replace"
bridge mdb del dev br0 port $swp1 $grp_key vid 10

@@ -675,7 +638,7 @@ __cfg_test_port_ip_sg()
# (*, G) ports need to be added to it.
bridge mdb add dev br0 port $swp2 grp $grp vid 10
bridge mdb add dev br0 port $swp1 $grp_key vid 10
- bridge mdb show dev br0 vid 10 | grep "$grp_key" | grep $swp2 | \
+ bridge mdb get dev br0 $grp_key vid 10 | grep $swp2 | \
grep -q "added_by_star_ex"
check_err $? "\"added_by_star_ex\" entry not created after adding (S, G) entry"
bridge mdb del dev br0 port $swp1 $grp_key vid 10
@@ -1102,14 +1065,17 @@ fwd_test()
echo
log_info "# Forwarding tests"

+ # Set the Max Response Delay to 100 centiseconds (1 second) so that the
+ # bridge will start forwarding according to its MDB soon after a
+ # multicast querier is enabled.
+ ip link set dev br0 type bridge mcast_query_response_interval 100
+
# Forwarding according to MDB entries only takes place when the bridge
# detects that there is a valid querier in the network. Set the bridge
# as the querier and assign it a valid IPv6 link-local address to be
# used as the source address for MLD queries.
ip -6 address add fe80::1/64 nodad dev br0
ip link set dev br0 type bridge mcast_querier 1
- # Wait the default Query Response Interval (10 seconds) for the bridge
- # to determine that there are no other queriers in the network.
sleep 10

fwd_test_host
@@ -1117,6 +1083,7 @@ fwd_test()

ip link set dev br0 type bridge mcast_querier 0
ip -6 address del fe80::1/64 dev br0
+ ip link set dev br0 type bridge mcast_query_response_interval 1000
}

ctrl_igmpv3_is_in_test()
@@ -1132,7 +1099,7 @@ ctrl_igmpv3_is_in_test()
$MZ $h1.10 -c 1 -a own -b 01:00:5e:01:01:01 -A 192.0.2.1 -B 239.1.1.1 \
-t ip proto=2,p=$(igmpv3_is_in_get 239.1.1.1 192.0.2.2) -q

- bridge -d mdb show dev br0 vid 10 | grep 239.1.1.1 | grep -q 192.0.2.2
+ bridge mdb get dev br0 grp 239.1.1.1 src 192.0.2.2 vid 10 &> /dev/null
check_fail $? "Permanent entry affected by IGMP packet"

# Replace the permanent entry with a temporary one and check that after
@@ -1145,12 +1112,10 @@ ctrl_igmpv3_is_in_test()
$MZ $h1.10 -a own -b 01:00:5e:01:01:01 -c 1 -A 192.0.2.1 -B 239.1.1.1 \
-t ip proto=2,p=$(igmpv3_is_in_get 239.1.1.1 192.0.2.2) -q

- bridge -d mdb show dev br0 vid 10 | grep 239.1.1.1 | grep -v "src" | \
- grep -q 192.0.2.2
+ bridge -d mdb get dev br0 grp 239.1.1.1 vid 10 | grep -q 192.0.2.2
check_err $? "Source not add to source list"

- bridge -d mdb show dev br0 vid 10 | grep 239.1.1.1 | \
- grep -q "src 192.0.2.2"
+ bridge mdb get dev br0 grp 239.1.1.1 src 192.0.2.2 vid 10 &> /dev/null
check_err $? "(S, G) entry not created for new source"

bridge mdb del dev br0 port $swp1 grp 239.1.1.1 vid 10
@@ -1172,8 +1137,7 @@ ctrl_mldv2_is_in_test()
$MZ -6 $h1.10 -a own -b 33:33:00:00:00:01 -c 1 -A fe80::1 -B ff0e::1 \
-t ip hop=1,next=0,p="$p" -q

- bridge -d mdb show dev br0 vid 10 | grep ff0e::1 | \
- grep -q 2001:db8:1::2
+ bridge mdb get dev br0 grp ff0e::1 src 2001:db8:1::2 vid 10 &> /dev/null
check_fail $? "Permanent entry affected by MLD packet"

# Replace the permanent entry with a temporary one and check that after
@@ -1186,12 +1150,10 @@ ctrl_mldv2_is_in_test()
$MZ -6 $h1.10 -a own -b 33:33:00:00:00:01 -c 1 -A fe80::1 -B ff0e::1 \
-t ip hop=1,next=0,p="$p" -q

- bridge -d mdb show dev br0 vid 10 | grep ff0e::1 | grep -v "src" | \
- grep -q 2001:db8:1::2
+ bridge -d mdb get dev br0 grp ff0e::1 vid 10 | grep -q 2001:db8:1::2
check_err $? "Source not add to source list"

- bridge -d mdb show dev br0 vid 10 | grep ff0e::1 | \
- grep -q "src 2001:db8:1::2"
+ bridge mdb get dev br0 grp ff0e::1 src 2001:db8:1::2 vid 10 &> /dev/null
check_err $? "(S, G) entry not created for new source"

bridge mdb del dev br0 port $swp1 grp ff0e::1 vid 10
@@ -1208,8 +1170,8 @@ ctrl_test()
ctrl_mldv2_is_in_test
}

-if ! bridge mdb help 2>&1 | grep -q "replace"; then
- echo "SKIP: iproute2 too old, missing bridge mdb replace support"
+if ! bridge mdb help 2>&1 | grep -q "get"; then
+ echo "SKIP: iproute2 too old, missing bridge mdb get support"
exit $ksft_skip
fi

diff --git a/tools/testing/selftests/net/forwarding/tc_flower_l2_miss.sh b/tools/testing/selftests/net/forwarding/tc_flower_l2_miss.sh
index 20a7cb7222b8..c2420bb72c12 100755
--- a/tools/testing/selftests/net/forwarding/tc_flower_l2_miss.sh
+++ b/tools/testing/selftests/net/forwarding/tc_flower_l2_miss.sh
@@ -209,14 +209,17 @@ test_l2_miss_multicast()
# both registered and unregistered multicast traffic.
bridge link set dev $swp2 mcast_router 2

+ # Set the Max Response Delay to 100 centiseconds (1 second) so that the
+ # bridge will start forwarding according to its MDB soon after a
+ # multicast querier is enabled.
+ ip link set dev br1 type bridge mcast_query_response_interval 100
+
# Forwarding according to MDB entries only takes place when the bridge
# detects that there is a valid querier in the network. Set the bridge
# as the querier and assign it a valid IPv6 link-local address to be
# used as the source address for MLD queries.
ip link set dev br1 type bridge mcast_querier 1
ip -6 address add fe80::1/64 nodad dev br1
- # Wait the default Query Response Interval (10 seconds) for the bridge
- # to determine that there are no other queriers in the network.
sleep 10

test_l2_miss_multicast_ipv4
@@ -224,6 +227,7 @@ test_l2_miss_multicast()

ip -6 address del fe80::1/64 dev br1
ip link set dev br1 type bridge mcast_querier 0
+ ip link set dev br1 type bridge mcast_query_response_interval 1000
bridge link set dev $swp2 mcast_router 1
}

diff --git a/tools/testing/selftests/net/mptcp/config b/tools/testing/selftests/net/mptcp/config
index e317c2e44dae..4f80014cae49 100644
--- a/tools/testing/selftests/net/mptcp/config
+++ b/tools/testing/selftests/net/mptcp/config
@@ -22,8 +22,11 @@ CONFIG_NFT_TPROXY=m
CONFIG_NFT_SOCKET=m
CONFIG_IP_ADVANCED_ROUTER=y
CONFIG_IP_MULTIPLE_TABLES=y
+CONFIG_IP_NF_FILTER=m
+CONFIG_IP_NF_MANGLE=m
CONFIG_IP_NF_TARGET_REJECT=m
CONFIG_IPV6_MULTIPLE_TABLES=y
+CONFIG_IP6_NF_FILTER=m
CONFIG_NET_ACT_CSUM=m
CONFIG_NET_ACT_PEDIT=m
CONFIG_NET_CLS_ACT=y
diff --git a/tools/testing/selftests/net/mptcp/mptcp_join.sh b/tools/testing/selftests/net/mptcp/mptcp_join.sh
index 4632a954c73e..67ca22856d54 100755
--- a/tools/testing/selftests/net/mptcp/mptcp_join.sh
+++ b/tools/testing/selftests/net/mptcp/mptcp_join.sh
@@ -682,16 +682,10 @@ wait_mpj()
done
}

-kill_wait()
-{
- kill $1 > /dev/null 2>&1
- wait $1 2>/dev/null
-}
-
kill_events_pids()
{
- kill_wait $evts_ns1_pid
- kill_wait $evts_ns2_pid
+ mptcp_lib_kill_wait $evts_ns1_pid
+ mptcp_lib_kill_wait $evts_ns2_pid
}

kill_tests_wait()
diff --git a/tools/testing/selftests/net/mptcp/mptcp_lib.sh b/tools/testing/selftests/net/mptcp/mptcp_lib.sh
index 92a5befe8039..4cd4297ca86d 100644
--- a/tools/testing/selftests/net/mptcp/mptcp_lib.sh
+++ b/tools/testing/selftests/net/mptcp/mptcp_lib.sh
@@ -6,7 +6,7 @@ readonly KSFT_FAIL=1
readonly KSFT_SKIP=4

# shellcheck disable=SC2155 # declare and assign separately
-readonly KSFT_TEST=$(basename "${0}" | sed 's/\.sh$//g')
+readonly KSFT_TEST="${MPTCP_LIB_KSFT_TEST:-$(basename "${0}" .sh)}"

MPTCP_LIB_SUBTESTS=()

@@ -207,3 +207,12 @@ mptcp_lib_result_print_all_tap() {
printf "%s\n" "${subtest}"
done
}
+
+# $1: PID
+mptcp_lib_kill_wait() {
+ [ "${1}" -eq 0 ] && return 0
+
+ kill -SIGUSR1 "${1}" > /dev/null 2>&1
+ kill "${1}" > /dev/null 2>&1
+ wait "${1}" 2>/dev/null
+}
diff --git a/tools/testing/selftests/net/mptcp/settings b/tools/testing/selftests/net/mptcp/settings
index 79b65bdf05db..abc5648b59ab 100644
--- a/tools/testing/selftests/net/mptcp/settings
+++ b/tools/testing/selftests/net/mptcp/settings
@@ -1 +1 @@
-timeout=1200
+timeout=1800
diff --git a/tools/testing/selftests/net/mptcp/userspace_pm.sh b/tools/testing/selftests/net/mptcp/userspace_pm.sh
index b25a3e33eb25..c44bf5c7c6e0 100755
--- a/tools/testing/selftests/net/mptcp/userspace_pm.sh
+++ b/tools/testing/selftests/net/mptcp/userspace_pm.sh
@@ -108,15 +108,6 @@ test_fail()
mptcp_lib_result_fail "${test_name}"
}

-kill_wait()
-{
- [ $1 -eq 0 ] && return 0
-
- kill -SIGUSR1 $1 > /dev/null 2>&1
- kill $1 > /dev/null 2>&1
- wait $1 2>/dev/null
-}
-
# This function is used in the cleanup trap
#shellcheck disable=SC2317
cleanup()
@@ -128,7 +119,7 @@ cleanup()
for pid in $client4_pid $server4_pid $client6_pid $server6_pid\
$server_evts_pid $client_evts_pid
do
- kill_wait $pid
+ mptcp_lib_kill_wait $pid
done

local netns
@@ -210,7 +201,7 @@ make_connection()
fi
:>"$client_evts"
if [ $client_evts_pid -ne 0 ]; then
- kill_wait $client_evts_pid
+ mptcp_lib_kill_wait $client_evts_pid
fi
ip netns exec "$ns2" ./pm_nl_ctl events >> "$client_evts" 2>&1 &
client_evts_pid=$!
@@ -219,7 +210,7 @@ make_connection()
fi
:>"$server_evts"
if [ $server_evts_pid -ne 0 ]; then
- kill_wait $server_evts_pid
+ mptcp_lib_kill_wait $server_evts_pid
fi
ip netns exec "$ns1" ./pm_nl_ctl events >> "$server_evts" 2>&1 &
server_evts_pid=$!
@@ -627,7 +618,7 @@ test_subflows()
"10.0.2.2" "$client4_port" "23" "$client_addr_id" "ns1" "ns2"

# Delete the listener from the client ns, if one was created
- kill_wait $listener_pid
+ mptcp_lib_kill_wait $listener_pid

local sport
sport=$(sed --unbuffered -n 's/.*\(sport:\)\([[:digit:]]*\).*$/\2/p;q' "$server_evts")
@@ -666,7 +657,7 @@ test_subflows()
"$client_addr_id" "ns1" "ns2"

# Delete the listener from the client ns, if one was created
- kill_wait $listener_pid
+ mptcp_lib_kill_wait $listener_pid

sport=$(sed --unbuffered -n 's/.*\(sport:\)\([[:digit:]]*\).*$/\2/p;q' "$server_evts")

@@ -705,7 +696,7 @@ test_subflows()
"$client_addr_id" "ns1" "ns2"

# Delete the listener from the client ns, if one was created
- kill_wait $listener_pid
+ mptcp_lib_kill_wait $listener_pid

sport=$(sed --unbuffered -n 's/.*\(sport:\)\([[:digit:]]*\).*$/\2/p;q' "$server_evts")

@@ -743,7 +734,7 @@ test_subflows()
"10.0.2.1" "$app4_port" "23" "$server_addr_id" "ns2" "ns1"

# Delete the listener from the server ns, if one was created
- kill_wait $listener_pid
+ mptcp_lib_kill_wait $listener_pid

sport=$(sed --unbuffered -n 's/.*\(sport:\)\([[:digit:]]*\).*$/\2/p;q' "$client_evts")

@@ -782,7 +773,7 @@ test_subflows()
"$server_addr_id" "ns2" "ns1"

# Delete the listener from the server ns, if one was created
- kill_wait $listener_pid
+ mptcp_lib_kill_wait $listener_pid

sport=$(sed --unbuffered -n 's/.*\(sport:\)\([[:digit:]]*\).*$/\2/p;q' "$client_evts")

@@ -819,7 +810,7 @@ test_subflows()
"10.0.2.2" "10.0.2.1" "$new4_port" "23" "$server_addr_id" "ns2" "ns1"

# Delete the listener from the server ns, if one was created
- kill_wait $listener_pid
+ mptcp_lib_kill_wait $listener_pid

sport=$(sed --unbuffered -n 's/.*\(sport:\)\([[:digit:]]*\).*$/\2/p;q' "$client_evts")

@@ -865,7 +856,7 @@ test_subflows_v4_v6_mix()
"$server_addr_id" "ns2" "ns1"

# Delete the listener from the server ns, if one was created
- kill_wait $listener_pid
+ mptcp_lib_kill_wait $listener_pid

sport=$(sed --unbuffered -n 's/.*\(sport:\)\([[:digit:]]*\).*$/\2/p;q' "$client_evts")

@@ -982,7 +973,7 @@ test_listener()
sleep 0.5

# Delete the listener from the client ns, if one was created
- kill_wait $listener_pid
+ mptcp_lib_kill_wait $listener_pid

sleep 0.5
verify_listener_events $client_evts $LISTENER_CLOSED $AF_INET 10.0.2.2 $client4_port
diff --git a/tools/testing/selftests/net/test_bridge_backup_port.sh b/tools/testing/selftests/net/test_bridge_backup_port.sh
index 112cfd8a10ad..1b3f89e2b86e 100755
--- a/tools/testing/selftests/net/test_bridge_backup_port.sh
+++ b/tools/testing/selftests/net/test_bridge_backup_port.sh
@@ -35,9 +35,8 @@
# | sw1 | | sw2 |
# +------------------------------------+ +------------------------------------+

+source lib.sh
ret=0
-# Kselftest framework requirement - SKIP code is 4.
-ksft_skip=4

# All tests in this script. Can be overridden with -t option.
TESTS="
@@ -125,6 +124,16 @@ tc_check_packets()
[[ $pkts == $count ]]
}

+bridge_link_check()
+{
+ local ns=$1; shift
+ local dev=$1; shift
+ local state=$1; shift
+
+ bridge -n $ns -d -j link show dev $dev | \
+ jq -e ".[][\"state\"] == \"$state\"" &> /dev/null
+}
+
################################################################################
# Setup

@@ -132,9 +141,6 @@ setup_topo_ns()
{
local ns=$1; shift

- ip netns add $ns
- ip -n $ns link set dev lo up
-
ip netns exec $ns sysctl -qw net.ipv6.conf.all.keep_addr_on_down=1
ip netns exec $ns sysctl -qw net.ipv6.conf.default.ignore_routes_with_linkdown=1
ip netns exec $ns sysctl -qw net.ipv6.conf.all.accept_dad=0
@@ -145,13 +151,14 @@ setup_topo()
{
local ns

- for ns in sw1 sw2; do
+ setup_ns sw1 sw2
+ for ns in $sw1 $sw2; do
setup_topo_ns $ns
done

ip link add name veth0 type veth peer name veth1
- ip link set dev veth0 netns sw1 name veth0
- ip link set dev veth1 netns sw2 name veth0
+ ip link set dev veth0 netns $sw1 name veth0
+ ip link set dev veth1 netns $sw2 name veth0
}

setup_sw_common()
@@ -190,7 +197,7 @@ setup_sw_common()

setup_sw1()
{
- local ns=sw1
+ local ns=$sw1
local local_addr=192.0.2.33
local remote_addr=192.0.2.34
local veth_addr=192.0.2.49
@@ -203,7 +210,7 @@ setup_sw1()

setup_sw2()
{
- local ns=sw2
+ local ns=$sw2
local local_addr=192.0.2.34
local remote_addr=192.0.2.33
local veth_addr=192.0.2.50
@@ -229,11 +236,7 @@ setup()

cleanup()
{
- local ns
-
- for ns in h1 h2 sw1 sw2; do
- ip netns del $ns &> /dev/null
- done
+ cleanup_ns $sw1 $sw2
}

################################################################################
@@ -248,85 +251,90 @@ backup_port()
echo "Backup port"
echo "-----------"

- run_cmd "tc -n sw1 qdisc replace dev swp1 clsact"
- run_cmd "tc -n sw1 filter replace dev swp1 egress pref 1 handle 101 proto ip flower src_mac $smac dst_mac $dmac action pass"
+ run_cmd "tc -n $sw1 qdisc replace dev swp1 clsact"
+ run_cmd "tc -n $sw1 filter replace dev swp1 egress pref 1 handle 101 proto ip flower src_mac $smac dst_mac $dmac action pass"

- run_cmd "tc -n sw1 qdisc replace dev vx0 clsact"
- run_cmd "tc -n sw1 filter replace dev vx0 egress pref 1 handle 101 proto ip flower src_mac $smac dst_mac $dmac action pass"
+ run_cmd "tc -n $sw1 qdisc replace dev vx0 clsact"
+ run_cmd "tc -n $sw1 filter replace dev vx0 egress pref 1 handle 101 proto ip flower src_mac $smac dst_mac $dmac action pass"

- run_cmd "bridge -n sw1 fdb replace $dmac dev swp1 master static vlan 10"
+ run_cmd "bridge -n $sw1 fdb replace $dmac dev swp1 master static vlan 10"

# Initial state - check that packets are forwarded out of swp1 when it
# has a carrier and not forwarded out of any port when it does not have
# a carrier.
- run_cmd "ip netns exec sw1 mausezahn br0.10 -a $smac -b $dmac -A 198.51.100.1 -B 198.51.100.2 -t ip -p 100 -q -c 1"
- tc_check_packets sw1 "dev swp1 egress" 101 1
+ run_cmd "ip netns exec $sw1 mausezahn br0.10 -a $smac -b $dmac -A 198.51.100.1 -B 198.51.100.2 -t ip -p 100 -q -c 1"
+ tc_check_packets $sw1 "dev swp1 egress" 101 1
log_test $? 0 "Forwarding out of swp1"
- tc_check_packets sw1 "dev vx0 egress" 101 0
+ tc_check_packets $sw1 "dev vx0 egress" 101 0
log_test $? 0 "No forwarding out of vx0"

- run_cmd "ip -n sw1 link set dev swp1 carrier off"
+ run_cmd "ip -n $sw1 link set dev swp1 carrier off"
+ busywait $BUSYWAIT_TIMEOUT bridge_link_check $sw1 swp1 disabled
log_test $? 0 "swp1 carrier off"

- run_cmd "ip netns exec sw1 mausezahn br0.10 -a $smac -b $dmac -A 198.51.100.1 -B 198.51.100.2 -t ip -p 100 -q -c 1"
- tc_check_packets sw1 "dev swp1 egress" 101 1
+ run_cmd "ip netns exec $sw1 mausezahn br0.10 -a $smac -b $dmac -A 198.51.100.1 -B 198.51.100.2 -t ip -p 100 -q -c 1"
+ tc_check_packets $sw1 "dev swp1 egress" 101 1
log_test $? 0 "No forwarding out of swp1"
- tc_check_packets sw1 "dev vx0 egress" 101 0
+ tc_check_packets $sw1 "dev vx0 egress" 101 0
log_test $? 0 "No forwarding out of vx0"

- run_cmd "ip -n sw1 link set dev swp1 carrier on"
+ run_cmd "ip -n $sw1 link set dev swp1 carrier on"
+ busywait $BUSYWAIT_TIMEOUT bridge_link_check $sw1 swp1 forwarding
log_test $? 0 "swp1 carrier on"

# Configure vx0 as the backup port of swp1 and check that packets are
# forwarded out of swp1 when it has a carrier and out of vx0 when swp1
# does not have a carrier.
- run_cmd "bridge -n sw1 link set dev swp1 backup_port vx0"
- run_cmd "bridge -n sw1 -d link show dev swp1 | grep \"backup_port vx0\""
+ run_cmd "bridge -n $sw1 link set dev swp1 backup_port vx0"
+ run_cmd "bridge -n $sw1 -d link show dev swp1 | grep \"backup_port vx0\""
log_test $? 0 "vx0 configured as backup port of swp1"

- run_cmd "ip netns exec sw1 mausezahn br0.10 -a $smac -b $dmac -A 198.51.100.1 -B 198.51.100.2 -t ip -p 100 -q -c 1"
- tc_check_packets sw1 "dev swp1 egress" 101 2
+ run_cmd "ip netns exec $sw1 mausezahn br0.10 -a $smac -b $dmac -A 198.51.100.1 -B 198.51.100.2 -t ip -p 100 -q -c 1"
+ tc_check_packets $sw1 "dev swp1 egress" 101 2
log_test $? 0 "Forwarding out of swp1"
- tc_check_packets sw1 "dev vx0 egress" 101 0
+ tc_check_packets $sw1 "dev vx0 egress" 101 0
log_test $? 0 "No forwarding out of vx0"

- run_cmd "ip -n sw1 link set dev swp1 carrier off"
+ run_cmd "ip -n $sw1 link set dev swp1 carrier off"
+ busywait $BUSYWAIT_TIMEOUT bridge_link_check $sw1 swp1 disabled
log_test $? 0 "swp1 carrier off"

- run_cmd "ip netns exec sw1 mausezahn br0.10 -a $smac -b $dmac -A 198.51.100.1 -B 198.51.100.2 -t ip -p 100 -q -c 1"
- tc_check_packets sw1 "dev swp1 egress" 101 2
+ run_cmd "ip netns exec $sw1 mausezahn br0.10 -a $smac -b $dmac -A 198.51.100.1 -B 198.51.100.2 -t ip -p 100 -q -c 1"
+ tc_check_packets $sw1 "dev swp1 egress" 101 2
log_test $? 0 "No forwarding out of swp1"
- tc_check_packets sw1 "dev vx0 egress" 101 1
+ tc_check_packets $sw1 "dev vx0 egress" 101 1
log_test $? 0 "Forwarding out of vx0"

- run_cmd "ip -n sw1 link set dev swp1 carrier on"
+ run_cmd "ip -n $sw1 link set dev swp1 carrier on"
+ busywait $BUSYWAIT_TIMEOUT bridge_link_check $sw1 swp1 forwarding
log_test $? 0 "swp1 carrier on"

- run_cmd "ip netns exec sw1 mausezahn br0.10 -a $smac -b $dmac -A 198.51.100.1 -B 198.51.100.2 -t ip -p 100 -q -c 1"
- tc_check_packets sw1 "dev swp1 egress" 101 3
+ run_cmd "ip netns exec $sw1 mausezahn br0.10 -a $smac -b $dmac -A 198.51.100.1 -B 198.51.100.2 -t ip -p 100 -q -c 1"
+ tc_check_packets $sw1 "dev swp1 egress" 101 3
log_test $? 0 "Forwarding out of swp1"
- tc_check_packets sw1 "dev vx0 egress" 101 1
+ tc_check_packets $sw1 "dev vx0 egress" 101 1
log_test $? 0 "No forwarding out of vx0"

# Remove vx0 as the backup port of swp1 and check that packets are no
# longer forwarded out of vx0 when swp1 does not have a carrier.
- run_cmd "bridge -n sw1 link set dev swp1 nobackup_port"
- run_cmd "bridge -n sw1 -d link show dev swp1 | grep \"backup_port vx0\""
+ run_cmd "bridge -n $sw1 link set dev swp1 nobackup_port"
+ run_cmd "bridge -n $sw1 -d link show dev swp1 | grep \"backup_port vx0\""
log_test $? 1 "vx0 not configured as backup port of swp1"

- run_cmd "ip netns exec sw1 mausezahn br0.10 -a $smac -b $dmac -A 198.51.100.1 -B 198.51.100.2 -t ip -p 100 -q -c 1"
- tc_check_packets sw1 "dev swp1 egress" 101 4
+ run_cmd "ip netns exec $sw1 mausezahn br0.10 -a $smac -b $dmac -A 198.51.100.1 -B 198.51.100.2 -t ip -p 100 -q -c 1"
+ tc_check_packets $sw1 "dev swp1 egress" 101 4
log_test $? 0 "Forwarding out of swp1"
- tc_check_packets sw1 "dev vx0 egress" 101 1
+ tc_check_packets $sw1 "dev vx0 egress" 101 1
log_test $? 0 "No forwarding out of vx0"

- run_cmd "ip -n sw1 link set dev swp1 carrier off"
+ run_cmd "ip -n $sw1 link set dev swp1 carrier off"
+ busywait $BUSYWAIT_TIMEOUT bridge_link_check $sw1 swp1 disabled
log_test $? 0 "swp1 carrier off"

- run_cmd "ip netns exec sw1 mausezahn br0.10 -a $smac -b $dmac -A 198.51.100.1 -B 198.51.100.2 -t ip -p 100 -q -c 1"
- tc_check_packets sw1 "dev swp1 egress" 101 4
+ run_cmd "ip netns exec $sw1 mausezahn br0.10 -a $smac -b $dmac -A 198.51.100.1 -B 198.51.100.2 -t ip -p 100 -q -c 1"
+ tc_check_packets $sw1 "dev swp1 egress" 101 4
log_test $? 0 "No forwarding out of swp1"
- tc_check_packets sw1 "dev vx0 egress" 101 1
+ tc_check_packets $sw1 "dev vx0 egress" 101 1
log_test $? 0 "No forwarding out of vx0"
}

@@ -339,125 +347,130 @@ backup_nhid()
echo "Backup nexthop ID"
echo "-----------------"

- run_cmd "tc -n sw1 qdisc replace dev swp1 clsact"
- run_cmd "tc -n sw1 filter replace dev swp1 egress pref 1 handle 101 proto ip flower src_mac $smac dst_mac $dmac action pass"
+ run_cmd "tc -n $sw1 qdisc replace dev swp1 clsact"
+ run_cmd "tc -n $sw1 filter replace dev swp1 egress pref 1 handle 101 proto ip flower src_mac $smac dst_mac $dmac action pass"

- run_cmd "tc -n sw1 qdisc replace dev vx0 clsact"
- run_cmd "tc -n sw1 filter replace dev vx0 egress pref 1 handle 101 proto ip flower src_mac $smac dst_mac $dmac action pass"
+ run_cmd "tc -n $sw1 qdisc replace dev vx0 clsact"
+ run_cmd "tc -n $sw1 filter replace dev vx0 egress pref 1 handle 101 proto ip flower src_mac $smac dst_mac $dmac action pass"

- run_cmd "ip -n sw1 nexthop replace id 1 via 192.0.2.34 fdb"
- run_cmd "ip -n sw1 nexthop replace id 2 via 192.0.2.34 fdb"
- run_cmd "ip -n sw1 nexthop replace id 10 group 1/2 fdb"
+ run_cmd "ip -n $sw1 nexthop replace id 1 via 192.0.2.34 fdb"
+ run_cmd "ip -n $sw1 nexthop replace id 2 via 192.0.2.34 fdb"
+ run_cmd "ip -n $sw1 nexthop replace id 10 group 1/2 fdb"

- run_cmd "bridge -n sw1 fdb replace $dmac dev swp1 master static vlan 10"
- run_cmd "bridge -n sw1 fdb replace $dmac dev vx0 self static dst 192.0.2.36 src_vni 10010"
+ run_cmd "bridge -n $sw1 fdb replace $dmac dev swp1 master static vlan 10"
+ run_cmd "bridge -n $sw1 fdb replace $dmac dev vx0 self static dst 192.0.2.36 src_vni 10010"

- run_cmd "ip -n sw2 address replace 192.0.2.36/32 dev lo"
+ run_cmd "ip -n $sw2 address replace 192.0.2.36/32 dev lo"

# The first filter matches on packets forwarded using the backup
# nexthop ID and the second filter matches on packets forwarded using a
# regular VXLAN FDB entry.
- run_cmd "tc -n sw2 qdisc replace dev vx0 clsact"
- run_cmd "tc -n sw2 filter replace dev vx0 ingress pref 1 handle 101 proto ip flower src_mac $smac dst_mac $dmac enc_key_id 10010 enc_dst_ip 192.0.2.34 action pass"
- run_cmd "tc -n sw2 filter replace dev vx0 ingress pref 1 handle 102 proto ip flower src_mac $smac dst_mac $dmac enc_key_id 10010 enc_dst_ip 192.0.2.36 action pass"
+ run_cmd "tc -n $sw2 qdisc replace dev vx0 clsact"
+ run_cmd "tc -n $sw2 filter replace dev vx0 ingress pref 1 handle 101 proto ip flower src_mac $smac dst_mac $dmac enc_key_id 10010 enc_dst_ip 192.0.2.34 action pass"
+ run_cmd "tc -n $sw2 filter replace dev vx0 ingress pref 1 handle 102 proto ip flower src_mac $smac dst_mac $dmac enc_key_id 10010 enc_dst_ip 192.0.2.36 action pass"

# Configure vx0 as the backup port of swp1 and check that packets are
# forwarded out of swp1 when it has a carrier and out of vx0 when swp1
# does not have a carrier. When packets are forwarded out of vx0, check
# that they are forwarded by the VXLAN FDB entry.
- run_cmd "bridge -n sw1 link set dev swp1 backup_port vx0"
- run_cmd "bridge -n sw1 -d link show dev swp1 | grep \"backup_port vx0\""
+ run_cmd "bridge -n $sw1 link set dev swp1 backup_port vx0"
+ run_cmd "bridge -n $sw1 -d link show dev swp1 | grep \"backup_port vx0\""
log_test $? 0 "vx0 configured as backup port of swp1"

- run_cmd "ip netns exec sw1 mausezahn br0.10 -a $smac -b $dmac -A 198.51.100.1 -B 198.51.100.2 -t ip -p 100 -q -c 1"
- tc_check_packets sw1 "dev swp1 egress" 101 1
+ run_cmd "ip netns exec $sw1 mausezahn br0.10 -a $smac -b $dmac -A 198.51.100.1 -B 198.51.100.2 -t ip -p 100 -q -c 1"
+ tc_check_packets $sw1 "dev swp1 egress" 101 1
log_test $? 0 "Forwarding out of swp1"
- tc_check_packets sw1 "dev vx0 egress" 101 0
+ tc_check_packets $sw1 "dev vx0 egress" 101 0
log_test $? 0 "No forwarding out of vx0"

- run_cmd "ip -n sw1 link set dev swp1 carrier off"
+ run_cmd "ip -n $sw1 link set dev swp1 carrier off"
+ busywait $BUSYWAIT_TIMEOUT bridge_link_check $sw1 swp1 disabled
log_test $? 0 "swp1 carrier off"

- run_cmd "ip netns exec sw1 mausezahn br0.10 -a $smac -b $dmac -A 198.51.100.1 -B 198.51.100.2 -t ip -p 100 -q -c 1"
- tc_check_packets sw1 "dev swp1 egress" 101 1
+ run_cmd "ip netns exec $sw1 mausezahn br0.10 -a $smac -b $dmac -A 198.51.100.1 -B 198.51.100.2 -t ip -p 100 -q -c 1"
+ tc_check_packets $sw1 "dev swp1 egress" 101 1
log_test $? 0 "No forwarding out of swp1"
- tc_check_packets sw1 "dev vx0 egress" 101 1
+ tc_check_packets $sw1 "dev vx0 egress" 101 1
log_test $? 0 "Forwarding out of vx0"
- tc_check_packets sw2 "dev vx0 ingress" 101 0
+ tc_check_packets $sw2 "dev vx0 ingress" 101 0
log_test $? 0 "No forwarding using backup nexthop ID"
- tc_check_packets sw2 "dev vx0 ingress" 102 1
+ tc_check_packets $sw2 "dev vx0 ingress" 102 1
log_test $? 0 "Forwarding using VXLAN FDB entry"

- run_cmd "ip -n sw1 link set dev swp1 carrier on"
+ run_cmd "ip -n $sw1 link set dev swp1 carrier on"
+ busywait $BUSYWAIT_TIMEOUT bridge_link_check $sw1 swp1 forwarding
log_test $? 0 "swp1 carrier on"

# Configure nexthop ID 10 as the backup nexthop ID of swp1 and check
# that when packets are forwarded out of vx0, they are forwarded using
# the backup nexthop ID.
- run_cmd "bridge -n sw1 link set dev swp1 backup_nhid 10"
- run_cmd "bridge -n sw1 -d link show dev swp1 | grep \"backup_nhid 10\""
+ run_cmd "bridge -n $sw1 link set dev swp1 backup_nhid 10"
+ run_cmd "bridge -n $sw1 -d link show dev swp1 | grep \"backup_nhid 10\""
log_test $? 0 "nexthop ID 10 configured as backup nexthop ID of swp1"

- run_cmd "ip netns exec sw1 mausezahn br0.10 -a $smac -b $dmac -A 198.51.100.1 -B 198.51.100.2 -t ip -p 100 -q -c 1"
- tc_check_packets sw1 "dev swp1 egress" 101 2
+ run_cmd "ip netns exec $sw1 mausezahn br0.10 -a $smac -b $dmac -A 198.51.100.1 -B 198.51.100.2 -t ip -p 100 -q -c 1"
+ tc_check_packets $sw1 "dev swp1 egress" 101 2
log_test $? 0 "Forwarding out of swp1"
- tc_check_packets sw1 "dev vx0 egress" 101 1
+ tc_check_packets $sw1 "dev vx0 egress" 101 1
log_test $? 0 "No forwarding out of vx0"

- run_cmd "ip -n sw1 link set dev swp1 carrier off"
+ run_cmd "ip -n $sw1 link set dev swp1 carrier off"
+ busywait $BUSYWAIT_TIMEOUT bridge_link_check $sw1 swp1 disabled
log_test $? 0 "swp1 carrier off"

- run_cmd "ip netns exec sw1 mausezahn br0.10 -a $smac -b $dmac -A 198.51.100.1 -B 198.51.100.2 -t ip -p 100 -q -c 1"
- tc_check_packets sw1 "dev swp1 egress" 101 2
+ run_cmd "ip netns exec $sw1 mausezahn br0.10 -a $smac -b $dmac -A 198.51.100.1 -B 198.51.100.2 -t ip -p 100 -q -c 1"
+ tc_check_packets $sw1 "dev swp1 egress" 101 2
log_test $? 0 "No forwarding out of swp1"
- tc_check_packets sw1 "dev vx0 egress" 101 2
+ tc_check_packets $sw1 "dev vx0 egress" 101 2
log_test $? 0 "Forwarding out of vx0"
- tc_check_packets sw2 "dev vx0 ingress" 101 1
+ tc_check_packets $sw2 "dev vx0 ingress" 101 1
log_test $? 0 "Forwarding using backup nexthop ID"
- tc_check_packets sw2 "dev vx0 ingress" 102 1
+ tc_check_packets $sw2 "dev vx0 ingress" 102 1
log_test $? 0 "No forwarding using VXLAN FDB entry"

- run_cmd "ip -n sw1 link set dev swp1 carrier on"
+ run_cmd "ip -n $sw1 link set dev swp1 carrier on"
+ busywait $BUSYWAIT_TIMEOUT bridge_link_check $sw1 swp1 forwarding
log_test $? 0 "swp1 carrier on"

- run_cmd "ip netns exec sw1 mausezahn br0.10 -a $smac -b $dmac -A 198.51.100.1 -B 198.51.100.2 -t ip -p 100 -q -c 1"
- tc_check_packets sw1 "dev swp1 egress" 101 3
+ run_cmd "ip netns exec $sw1 mausezahn br0.10 -a $smac -b $dmac -A 198.51.100.1 -B 198.51.100.2 -t ip -p 100 -q -c 1"
+ tc_check_packets $sw1 "dev swp1 egress" 101 3
log_test $? 0 "Forwarding out of swp1"
- tc_check_packets sw1 "dev vx0 egress" 101 2
+ tc_check_packets $sw1 "dev vx0 egress" 101 2
log_test $? 0 "No forwarding out of vx0"
- tc_check_packets sw2 "dev vx0 ingress" 101 1
+ tc_check_packets $sw2 "dev vx0 ingress" 101 1
log_test $? 0 "No forwarding using backup nexthop ID"
- tc_check_packets sw2 "dev vx0 ingress" 102 1
+ tc_check_packets $sw2 "dev vx0 ingress" 102 1
log_test $? 0 "No forwarding using VXLAN FDB entry"

# Reset the backup nexthop ID to 0 and check that packets are no longer
# forwarded using the backup nexthop ID when swp1 does not have a
# carrier and are instead forwarded by the VXLAN FDB.
- run_cmd "bridge -n sw1 link set dev swp1 backup_nhid 0"
- run_cmd "bridge -n sw1 -d link show dev swp1 | grep \"backup_nhid\""
+ run_cmd "bridge -n $sw1 link set dev swp1 backup_nhid 0"
+ run_cmd "bridge -n $sw1 -d link show dev swp1 | grep \"backup_nhid\""
log_test $? 1 "No backup nexthop ID configured for swp1"

- run_cmd "ip netns exec sw1 mausezahn br0.10 -a $smac -b $dmac -A 198.51.100.1 -B 198.51.100.2 -t ip -p 100 -q -c 1"
- tc_check_packets sw1 "dev swp1 egress" 101 4
+ run_cmd "ip netns exec $sw1 mausezahn br0.10 -a $smac -b $dmac -A 198.51.100.1 -B 198.51.100.2 -t ip -p 100 -q -c 1"
+ tc_check_packets $sw1 "dev swp1 egress" 101 4
log_test $? 0 "Forwarding out of swp1"
- tc_check_packets sw1 "dev vx0 egress" 101 2
+ tc_check_packets $sw1 "dev vx0 egress" 101 2
log_test $? 0 "No forwarding out of vx0"
- tc_check_packets sw2 "dev vx0 ingress" 101 1
+ tc_check_packets $sw2 "dev vx0 ingress" 101 1
log_test $? 0 "No forwarding using backup nexthop ID"
- tc_check_packets sw2 "dev vx0 ingress" 102 1
+ tc_check_packets $sw2 "dev vx0 ingress" 102 1
log_test $? 0 "No forwarding using VXLAN FDB entry"

- run_cmd "ip -n sw1 link set dev swp1 carrier off"
+ run_cmd "ip -n $sw1 link set dev swp1 carrier off"
+ busywait $BUSYWAIT_TIMEOUT bridge_link_check $sw1 swp1 disabled
log_test $? 0 "swp1 carrier off"

- run_cmd "ip netns exec sw1 mausezahn br0.10 -a $smac -b $dmac -A 198.51.100.1 -B 198.51.100.2 -t ip -p 100 -q -c 1"
- tc_check_packets sw1 "dev swp1 egress" 101 4
+ run_cmd "ip netns exec $sw1 mausezahn br0.10 -a $smac -b $dmac -A 198.51.100.1 -B 198.51.100.2 -t ip -p 100 -q -c 1"
+ tc_check_packets $sw1 "dev swp1 egress" 101 4
log_test $? 0 "No forwarding out of swp1"
- tc_check_packets sw1 "dev vx0 egress" 101 3
+ tc_check_packets $sw1 "dev vx0 egress" 101 3
log_test $? 0 "Forwarding out of vx0"
- tc_check_packets sw2 "dev vx0 ingress" 101 1
+ tc_check_packets $sw2 "dev vx0 ingress" 101 1
log_test $? 0 "No forwarding using backup nexthop ID"
- tc_check_packets sw2 "dev vx0 ingress" 102 2
+ tc_check_packets $sw2 "dev vx0 ingress" 102 2
log_test $? 0 "Forwarding using VXLAN FDB entry"
}

@@ -475,109 +488,110 @@ backup_nhid_invalid()
# is forwarded out of the VXLAN port, but dropped by the VXLAN driver
# and does not crash the host.

- run_cmd "tc -n sw1 qdisc replace dev swp1 clsact"
- run_cmd "tc -n sw1 filter replace dev swp1 egress pref 1 handle 101 proto ip flower src_mac $smac dst_mac $dmac action pass"
+ run_cmd "tc -n $sw1 qdisc replace dev swp1 clsact"
+ run_cmd "tc -n $sw1 filter replace dev swp1 egress pref 1 handle 101 proto ip flower src_mac $smac dst_mac $dmac action pass"

- run_cmd "tc -n sw1 qdisc replace dev vx0 clsact"
- run_cmd "tc -n sw1 filter replace dev vx0 egress pref 1 handle 101 proto ip flower src_mac $smac dst_mac $dmac action pass"
+ run_cmd "tc -n $sw1 qdisc replace dev vx0 clsact"
+ run_cmd "tc -n $sw1 filter replace dev vx0 egress pref 1 handle 101 proto ip flower src_mac $smac dst_mac $dmac action pass"
# Drop all other Tx traffic to avoid changes to Tx drop counter.
- run_cmd "tc -n sw1 filter replace dev vx0 egress pref 2 handle 102 proto all matchall action drop"
+ run_cmd "tc -n $sw1 filter replace dev vx0 egress pref 2 handle 102 proto all matchall action drop"

- tx_drop=$(ip -n sw1 -s -j link show dev vx0 | jq '.[]["stats64"]["tx"]["dropped"]')
+ tx_drop=$(ip -n $sw1 -s -j link show dev vx0 | jq '.[]["stats64"]["tx"]["dropped"]')

- run_cmd "ip -n sw1 nexthop replace id 1 via 192.0.2.34 fdb"
- run_cmd "ip -n sw1 nexthop replace id 2 via 192.0.2.34 fdb"
- run_cmd "ip -n sw1 nexthop replace id 10 group 1/2 fdb"
+ run_cmd "ip -n $sw1 nexthop replace id 1 via 192.0.2.34 fdb"
+ run_cmd "ip -n $sw1 nexthop replace id 2 via 192.0.2.34 fdb"
+ run_cmd "ip -n $sw1 nexthop replace id 10 group 1/2 fdb"

- run_cmd "bridge -n sw1 fdb replace $dmac dev swp1 master static vlan 10"
+ run_cmd "bridge -n $sw1 fdb replace $dmac dev swp1 master static vlan 10"

- run_cmd "tc -n sw2 qdisc replace dev vx0 clsact"
- run_cmd "tc -n sw2 filter replace dev vx0 ingress pref 1 handle 101 proto ip flower src_mac $smac dst_mac $dmac enc_key_id 10010 enc_dst_ip 192.0.2.34 action pass"
+ run_cmd "tc -n $sw2 qdisc replace dev vx0 clsact"
+ run_cmd "tc -n $sw2 filter replace dev vx0 ingress pref 1 handle 101 proto ip flower src_mac $smac dst_mac $dmac enc_key_id 10010 enc_dst_ip 192.0.2.34 action pass"

# First, check that redirection works.
- run_cmd "bridge -n sw1 link set dev swp1 backup_port vx0"
- run_cmd "bridge -n sw1 -d link show dev swp1 | grep \"backup_port vx0\""
+ run_cmd "bridge -n $sw1 link set dev swp1 backup_port vx0"
+ run_cmd "bridge -n $sw1 -d link show dev swp1 | grep \"backup_port vx0\""
log_test $? 0 "vx0 configured as backup port of swp1"

- run_cmd "bridge -n sw1 link set dev swp1 backup_nhid 10"
- run_cmd "bridge -n sw1 -d link show dev swp1 | grep \"backup_nhid 10\""
+ run_cmd "bridge -n $sw1 link set dev swp1 backup_nhid 10"
+ run_cmd "bridge -n $sw1 -d link show dev swp1 | grep \"backup_nhid 10\""
log_test $? 0 "Valid nexthop as backup nexthop"

- run_cmd "ip -n sw1 link set dev swp1 carrier off"
+ run_cmd "ip -n $sw1 link set dev swp1 carrier off"
+ busywait $BUSYWAIT_TIMEOUT bridge_link_check $sw1 swp1 disabled
log_test $? 0 "swp1 carrier off"

- run_cmd "ip netns exec sw1 mausezahn br0.10 -a $smac -b $dmac -A 198.51.100.1 -B 198.51.100.2 -t ip -p 100 -q -c 1"
- tc_check_packets sw1 "dev swp1 egress" 101 0
+ run_cmd "ip netns exec $sw1 mausezahn br0.10 -a $smac -b $dmac -A 198.51.100.1 -B 198.51.100.2 -t ip -p 100 -q -c 1"
+ tc_check_packets $sw1 "dev swp1 egress" 101 0
log_test $? 0 "No forwarding out of swp1"
- tc_check_packets sw1 "dev vx0 egress" 101 1
+ tc_check_packets $sw1 "dev vx0 egress" 101 1
log_test $? 0 "Forwarding out of vx0"
- tc_check_packets sw2 "dev vx0 ingress" 101 1
+ tc_check_packets $sw2 "dev vx0 ingress" 101 1
log_test $? 0 "Forwarding using backup nexthop ID"
- run_cmd "ip -n sw1 -s -j link show dev vx0 | jq -e '.[][\"stats64\"][\"tx\"][\"dropped\"] == $tx_drop'"
+ run_cmd "ip -n $sw1 -s -j link show dev vx0 | jq -e '.[][\"stats64\"][\"tx\"][\"dropped\"] == $tx_drop'"
log_test $? 0 "No Tx drop increase"

# Use a non-existent nexthop ID.
- run_cmd "bridge -n sw1 link set dev swp1 backup_nhid 20"
- run_cmd "bridge -n sw1 -d link show dev swp1 | grep \"backup_nhid 20\""
+ run_cmd "bridge -n $sw1 link set dev swp1 backup_nhid 20"
+ run_cmd "bridge -n $sw1 -d link show dev swp1 | grep \"backup_nhid 20\""
log_test $? 0 "Non-existent nexthop as backup nexthop"

- run_cmd "ip netns exec sw1 mausezahn br0.10 -a $smac -b $dmac -A 198.51.100.1 -B 198.51.100.2 -t ip -p 100 -q -c 1"
- tc_check_packets sw1 "dev swp1 egress" 101 0
+ run_cmd "ip netns exec $sw1 mausezahn br0.10 -a $smac -b $dmac -A 198.51.100.1 -B 198.51.100.2 -t ip -p 100 -q -c 1"
+ tc_check_packets $sw1 "dev swp1 egress" 101 0
log_test $? 0 "No forwarding out of swp1"
- tc_check_packets sw1 "dev vx0 egress" 101 2
+ tc_check_packets $sw1 "dev vx0 egress" 101 2
log_test $? 0 "Forwarding out of vx0"
- tc_check_packets sw2 "dev vx0 ingress" 101 1
+ tc_check_packets $sw2 "dev vx0 ingress" 101 1
log_test $? 0 "No forwarding using backup nexthop ID"
- run_cmd "ip -n sw1 -s -j link show dev vx0 | jq -e '.[][\"stats64\"][\"tx\"][\"dropped\"] == $((tx_drop + 1))'"
+ run_cmd "ip -n $sw1 -s -j link show dev vx0 | jq -e '.[][\"stats64\"][\"tx\"][\"dropped\"] == $((tx_drop + 1))'"
log_test $? 0 "Tx drop increased"

# Use a blckhole nexthop.
- run_cmd "ip -n sw1 nexthop replace id 30 blackhole"
- run_cmd "bridge -n sw1 link set dev swp1 backup_nhid 30"
- run_cmd "bridge -n sw1 -d link show dev swp1 | grep \"backup_nhid 30\""
+ run_cmd "ip -n $sw1 nexthop replace id 30 blackhole"
+ run_cmd "bridge -n $sw1 link set dev swp1 backup_nhid 30"
+ run_cmd "bridge -n $sw1 -d link show dev swp1 | grep \"backup_nhid 30\""
log_test $? 0 "Blackhole nexthop as backup nexthop"

- run_cmd "ip netns exec sw1 mausezahn br0.10 -a $smac -b $dmac -A 198.51.100.1 -B 198.51.100.2 -t ip -p 100 -q -c 1"
- tc_check_packets sw1 "dev swp1 egress" 101 0
+ run_cmd "ip netns exec $sw1 mausezahn br0.10 -a $smac -b $dmac -A 198.51.100.1 -B 198.51.100.2 -t ip -p 100 -q -c 1"
+ tc_check_packets $sw1 "dev swp1 egress" 101 0
log_test $? 0 "No forwarding out of swp1"
- tc_check_packets sw1 "dev vx0 egress" 101 3
+ tc_check_packets $sw1 "dev vx0 egress" 101 3
log_test $? 0 "Forwarding out of vx0"
- tc_check_packets sw2 "dev vx0 ingress" 101 1
+ tc_check_packets $sw2 "dev vx0 ingress" 101 1
log_test $? 0 "No forwarding using backup nexthop ID"
- run_cmd "ip -n sw1 -s -j link show dev vx0 | jq -e '.[][\"stats64\"][\"tx\"][\"dropped\"] == $((tx_drop + 2))'"
+ run_cmd "ip -n $sw1 -s -j link show dev vx0 | jq -e '.[][\"stats64\"][\"tx\"][\"dropped\"] == $((tx_drop + 2))'"
log_test $? 0 "Tx drop increased"

# Non-group FDB nexthop.
- run_cmd "bridge -n sw1 link set dev swp1 backup_nhid 1"
- run_cmd "bridge -n sw1 -d link show dev swp1 | grep \"backup_nhid 1\""
+ run_cmd "bridge -n $sw1 link set dev swp1 backup_nhid 1"
+ run_cmd "bridge -n $sw1 -d link show dev swp1 | grep \"backup_nhid 1\""
log_test $? 0 "Non-group FDB nexthop as backup nexthop"

- run_cmd "ip netns exec sw1 mausezahn br0.10 -a $smac -b $dmac -A 198.51.100.1 -B 198.51.100.2 -t ip -p 100 -q -c 1"
- tc_check_packets sw1 "dev swp1 egress" 101 0
+ run_cmd "ip netns exec $sw1 mausezahn br0.10 -a $smac -b $dmac -A 198.51.100.1 -B 198.51.100.2 -t ip -p 100 -q -c 1"
+ tc_check_packets $sw1 "dev swp1 egress" 101 0
log_test $? 0 "No forwarding out of swp1"
- tc_check_packets sw1 "dev vx0 egress" 101 4
+ tc_check_packets $sw1 "dev vx0 egress" 101 4
log_test $? 0 "Forwarding out of vx0"
- tc_check_packets sw2 "dev vx0 ingress" 101 1
+ tc_check_packets $sw2 "dev vx0 ingress" 101 1
log_test $? 0 "No forwarding using backup nexthop ID"
- run_cmd "ip -n sw1 -s -j link show dev vx0 | jq -e '.[][\"stats64\"][\"tx\"][\"dropped\"] == $((tx_drop + 3))'"
+ run_cmd "ip -n $sw1 -s -j link show dev vx0 | jq -e '.[][\"stats64\"][\"tx\"][\"dropped\"] == $((tx_drop + 3))'"
log_test $? 0 "Tx drop increased"

# IPv6 address family nexthop.
- run_cmd "ip -n sw1 nexthop replace id 100 via 2001:db8:100::1 fdb"
- run_cmd "ip -n sw1 nexthop replace id 200 via 2001:db8:100::1 fdb"
- run_cmd "ip -n sw1 nexthop replace id 300 group 100/200 fdb"
- run_cmd "bridge -n sw1 link set dev swp1 backup_nhid 300"
- run_cmd "bridge -n sw1 -d link show dev swp1 | grep \"backup_nhid 300\""
+ run_cmd "ip -n $sw1 nexthop replace id 100 via 2001:db8:100::1 fdb"
+ run_cmd "ip -n $sw1 nexthop replace id 200 via 2001:db8:100::1 fdb"
+ run_cmd "ip -n $sw1 nexthop replace id 300 group 100/200 fdb"
+ run_cmd "bridge -n $sw1 link set dev swp1 backup_nhid 300"
+ run_cmd "bridge -n $sw1 -d link show dev swp1 | grep \"backup_nhid 300\""
log_test $? 0 "IPv6 address family nexthop as backup nexthop"

- run_cmd "ip netns exec sw1 mausezahn br0.10 -a $smac -b $dmac -A 198.51.100.1 -B 198.51.100.2 -t ip -p 100 -q -c 1"
- tc_check_packets sw1 "dev swp1 egress" 101 0
+ run_cmd "ip netns exec $sw1 mausezahn br0.10 -a $smac -b $dmac -A 198.51.100.1 -B 198.51.100.2 -t ip -p 100 -q -c 1"
+ tc_check_packets $sw1 "dev swp1 egress" 101 0
log_test $? 0 "No forwarding out of swp1"
- tc_check_packets sw1 "dev vx0 egress" 101 5
+ tc_check_packets $sw1 "dev vx0 egress" 101 5
log_test $? 0 "Forwarding out of vx0"
- tc_check_packets sw2 "dev vx0 ingress" 101 1
+ tc_check_packets $sw2 "dev vx0 ingress" 101 1
log_test $? 0 "No forwarding using backup nexthop ID"
- run_cmd "ip -n sw1 -s -j link show dev vx0 | jq -e '.[][\"stats64\"][\"tx\"][\"dropped\"] == $((tx_drop + 4))'"
+ run_cmd "ip -n $sw1 -s -j link show dev vx0 | jq -e '.[][\"stats64\"][\"tx\"][\"dropped\"] == $((tx_drop + 4))'"
log_test $? 0 "Tx drop increased"
}

@@ -591,44 +605,46 @@ backup_nhid_ping()
echo "------------------------"

# Test bidirectional traffic when traffic is redirected in both VTEPs.
- sw1_mac=$(ip -n sw1 -j -p link show br0.10 | jq -r '.[]["address"]')
- sw2_mac=$(ip -n sw2 -j -p link show br0.10 | jq -r '.[]["address"]')
+ sw1_mac=$(ip -n $sw1 -j -p link show br0.10 | jq -r '.[]["address"]')
+ sw2_mac=$(ip -n $sw2 -j -p link show br0.10 | jq -r '.[]["address"]')

- run_cmd "bridge -n sw1 fdb replace $sw2_mac dev swp1 master static vlan 10"
- run_cmd "bridge -n sw2 fdb replace $sw1_mac dev swp1 master static vlan 10"
+ run_cmd "bridge -n $sw1 fdb replace $sw2_mac dev swp1 master static vlan 10"
+ run_cmd "bridge -n $sw2 fdb replace $sw1_mac dev swp1 master static vlan 10"

- run_cmd "ip -n sw1 neigh replace 192.0.2.66 lladdr $sw2_mac nud perm dev br0.10"
- run_cmd "ip -n sw2 neigh replace 192.0.2.65 lladdr $sw1_mac nud perm dev br0.10"
+ run_cmd "ip -n $sw1 neigh replace 192.0.2.66 lladdr $sw2_mac nud perm dev br0.10"
+ run_cmd "ip -n $sw2 neigh replace 192.0.2.65 lladdr $sw1_mac nud perm dev br0.10"

- run_cmd "ip -n sw1 nexthop replace id 1 via 192.0.2.34 fdb"
- run_cmd "ip -n sw2 nexthop replace id 1 via 192.0.2.33 fdb"
- run_cmd "ip -n sw1 nexthop replace id 10 group 1 fdb"
- run_cmd "ip -n sw2 nexthop replace id 10 group 1 fdb"
+ run_cmd "ip -n $sw1 nexthop replace id 1 via 192.0.2.34 fdb"
+ run_cmd "ip -n $sw2 nexthop replace id 1 via 192.0.2.33 fdb"
+ run_cmd "ip -n $sw1 nexthop replace id 10 group 1 fdb"
+ run_cmd "ip -n $sw2 nexthop replace id 10 group 1 fdb"

- run_cmd "bridge -n sw1 link set dev swp1 backup_port vx0"
- run_cmd "bridge -n sw2 link set dev swp1 backup_port vx0"
- run_cmd "bridge -n sw1 link set dev swp1 backup_nhid 10"
- run_cmd "bridge -n sw2 link set dev swp1 backup_nhid 10"
+ run_cmd "bridge -n $sw1 link set dev swp1 backup_port vx0"
+ run_cmd "bridge -n $sw2 link set dev swp1 backup_port vx0"
+ run_cmd "bridge -n $sw1 link set dev swp1 backup_nhid 10"
+ run_cmd "bridge -n $sw2 link set dev swp1 backup_nhid 10"

- run_cmd "ip -n sw1 link set dev swp1 carrier off"
- run_cmd "ip -n sw2 link set dev swp1 carrier off"
+ run_cmd "ip -n $sw1 link set dev swp1 carrier off"
+ busywait $BUSYWAIT_TIMEOUT bridge_link_check $sw1 swp1 disabled
+ run_cmd "ip -n $sw2 link set dev swp1 carrier off"
+ busywait $BUSYWAIT_TIMEOUT bridge_link_check $sw2 swp1 disabled

- run_cmd "ip netns exec sw1 ping -i 0.1 -c 10 -w $PING_TIMEOUT 192.0.2.66"
+ run_cmd "ip netns exec $sw1 ping -i 0.1 -c 10 -w $PING_TIMEOUT 192.0.2.66"
log_test $? 0 "Ping with backup nexthop ID"

# Reset the backup nexthop ID to 0 and check that ping fails.
- run_cmd "bridge -n sw1 link set dev swp1 backup_nhid 0"
- run_cmd "bridge -n sw2 link set dev swp1 backup_nhid 0"
+ run_cmd "bridge -n $sw1 link set dev swp1 backup_nhid 0"
+ run_cmd "bridge -n $sw2 link set dev swp1 backup_nhid 0"

- run_cmd "ip netns exec sw1 ping -i 0.1 -c 10 -w $PING_TIMEOUT 192.0.2.66"
+ run_cmd "ip netns exec $sw1 ping -i 0.1 -c 10 -w $PING_TIMEOUT 192.0.2.66"
log_test $? 1 "Ping after disabling backup nexthop ID"
}

backup_nhid_add_del_loop()
{
while true; do
- ip -n sw1 nexthop del id 10
- ip -n sw1 nexthop replace id 10 group 1/2 fdb
+ ip -n $sw1 nexthop del id 10
+ ip -n $sw1 nexthop replace id 10 group 1/2 fdb
done >/dev/null 2>&1
}

@@ -648,19 +664,19 @@ backup_nhid_torture()
# deleting the group. The test is considered successful if nothing
# crashed.

- run_cmd "ip -n sw1 nexthop replace id 1 via 192.0.2.34 fdb"
- run_cmd "ip -n sw1 nexthop replace id 2 via 192.0.2.34 fdb"
- run_cmd "ip -n sw1 nexthop replace id 10 group 1/2 fdb"
+ run_cmd "ip -n $sw1 nexthop replace id 1 via 192.0.2.34 fdb"
+ run_cmd "ip -n $sw1 nexthop replace id 2 via 192.0.2.34 fdb"
+ run_cmd "ip -n $sw1 nexthop replace id 10 group 1/2 fdb"

- run_cmd "bridge -n sw1 fdb replace $dmac dev swp1 master static vlan 10"
+ run_cmd "bridge -n $sw1 fdb replace $dmac dev swp1 master static vlan 10"

- run_cmd "bridge -n sw1 link set dev swp1 backup_port vx0"
- run_cmd "bridge -n sw1 link set dev swp1 backup_nhid 10"
- run_cmd "ip -n sw1 link set dev swp1 carrier off"
+ run_cmd "bridge -n $sw1 link set dev swp1 backup_port vx0"
+ run_cmd "bridge -n $sw1 link set dev swp1 backup_nhid 10"
+ run_cmd "ip -n $sw1 link set dev swp1 carrier off"

backup_nhid_add_del_loop &
pid1=$!
- ip netns exec sw1 mausezahn br0.10 -a $smac -b $dmac -A 198.51.100.1 -B 198.51.100.2 -t ip -p 100 -q -c 0 &
+ ip netns exec $sw1 mausezahn br0.10 -a $smac -b $dmac -A 198.51.100.1 -B 198.51.100.2 -t ip -p 100 -q -c 0 &
pid2=$!

sleep 30
diff --git a/tools/tracing/rtla/Makefile b/tools/tracing/rtla/Makefile
index 2456a399eb9a..afd18c678ff5 100644
--- a/tools/tracing/rtla/Makefile
+++ b/tools/tracing/rtla/Makefile
@@ -28,10 +28,15 @@ FOPTS := -flto=auto -ffat-lto-objects -fexceptions -fstack-protector-strong \
-fasynchronous-unwind-tables -fstack-clash-protection
WOPTS := -Wall -Werror=format-security -Wp,-D_FORTIFY_SOURCE=2 -Wp,-D_GLIBCXX_ASSERTIONS -Wno-maybe-uninitialized

+ifeq ($(CC),clang)
+ FOPTS := $(filter-out -ffat-lto-objects, $(FOPTS))
+ WOPTS := $(filter-out -Wno-maybe-uninitialized, $(WOPTS))
+endif
+
TRACEFS_HEADERS := $$($(PKG_CONFIG) --cflags libtracefs)

CFLAGS := -O -g -DVERSION=\"$(VERSION)\" $(FOPTS) $(MOPTS) $(WOPTS) $(TRACEFS_HEADERS) $(EXTRA_CFLAGS)
-LDFLAGS := -ggdb $(EXTRA_LDFLAGS)
+LDFLAGS := -flto=auto -ggdb $(EXTRA_LDFLAGS)
LIBS := $$($(PKG_CONFIG) --libs libtracefs)

SRC := $(wildcard src/*.c)
diff --git a/tools/tracing/rtla/src/osnoise_hist.c b/tools/tracing/rtla/src/osnoise_hist.c
index 8f81fa007364..01870d50942a 100644
--- a/tools/tracing/rtla/src/osnoise_hist.c
+++ b/tools/tracing/rtla/src/osnoise_hist.c
@@ -135,8 +135,7 @@ static void osnoise_hist_update_multiple(struct osnoise_tool *tool, int cpu,
if (params->output_divisor)
duration = duration / params->output_divisor;

- if (data->bucket_size)
- bucket = duration / data->bucket_size;
+ bucket = duration / data->bucket_size;

total_duration = duration * count;

@@ -480,7 +479,11 @@ static void osnoise_hist_usage(char *usage)

for (i = 0; msg[i]; i++)
fprintf(stderr, "%s\n", msg[i]);
- exit(1);
+
+ if (usage)
+ exit(EXIT_FAILURE);
+
+ exit(EXIT_SUCCESS);
}

/*
diff --git a/tools/tracing/rtla/src/osnoise_top.c b/tools/tracing/rtla/src/osnoise_top.c
index f7c959be8677..457360db0767 100644
--- a/tools/tracing/rtla/src/osnoise_top.c
+++ b/tools/tracing/rtla/src/osnoise_top.c
@@ -331,7 +331,11 @@ static void osnoise_top_usage(struct osnoise_top_params *params, char *usage)

for (i = 0; msg[i]; i++)
fprintf(stderr, "%s\n", msg[i]);
- exit(1);
+
+ if (usage)
+ exit(EXIT_FAILURE);
+
+ exit(EXIT_SUCCESS);
}

/*
diff --git a/tools/tracing/rtla/src/timerlat_hist.c b/tools/tracing/rtla/src/timerlat_hist.c
index 47d3d8b53cb2..dbf154082f95 100644
--- a/tools/tracing/rtla/src/timerlat_hist.c
+++ b/tools/tracing/rtla/src/timerlat_hist.c
@@ -178,8 +178,7 @@ timerlat_hist_update(struct osnoise_tool *tool, int cpu,
if (params->output_divisor)
latency = latency / params->output_divisor;

- if (data->bucket_size)
- bucket = latency / data->bucket_size;
+ bucket = latency / data->bucket_size;

if (!context) {
hist = data->hist[cpu].irq;
@@ -546,7 +545,11 @@ static void timerlat_hist_usage(char *usage)

for (i = 0; msg[i]; i++)
fprintf(stderr, "%s\n", msg[i]);
- exit(1);
+
+ if (usage)
+ exit(EXIT_FAILURE);
+
+ exit(EXIT_SUCCESS);
}

/*
diff --git a/tools/tracing/rtla/src/timerlat_top.c b/tools/tracing/rtla/src/timerlat_top.c
index 1640f121baca..3e9af2c38688 100644
--- a/tools/tracing/rtla/src/timerlat_top.c
+++ b/tools/tracing/rtla/src/timerlat_top.c
@@ -375,7 +375,11 @@ static void timerlat_top_usage(char *usage)

for (i = 0; msg[i]; i++)
fprintf(stderr, "%s\n", msg[i]);
- exit(1);
+
+ if (usage)
+ exit(EXIT_FAILURE);
+
+ exit(EXIT_SUCCESS);
}

/*
diff --git a/tools/tracing/rtla/src/utils.c b/tools/tracing/rtla/src/utils.c
index c769d7b3842c..9ac71a66840c 100644
--- a/tools/tracing/rtla/src/utils.c
+++ b/tools/tracing/rtla/src/utils.c
@@ -238,12 +238,6 @@ static inline int sched_setattr(pid_t pid, const struct sched_attr *attr,
return syscall(__NR_sched_setattr, pid, attr, flags);
}

-static inline int sched_getattr(pid_t pid, struct sched_attr *attr,
- unsigned int size, unsigned int flags)
-{
- return syscall(__NR_sched_getattr, pid, attr, size, flags);
-}
-
int __set_sched_attr(int pid, struct sched_attr *attr)
{
int flags = 0;
@@ -479,13 +473,13 @@ int parse_prio(char *arg, struct sched_attr *sched_param)
if (prio == INVALID_VAL)
return -1;

- if (prio < sched_get_priority_min(SCHED_OTHER))
+ if (prio < MIN_NICE)
return -1;
- if (prio > sched_get_priority_max(SCHED_OTHER))
+ if (prio > MAX_NICE)
return -1;

sched_param->sched_policy = SCHED_OTHER;
- sched_param->sched_priority = prio;
+ sched_param->sched_nice = prio;
break;
default:
return -1;
@@ -536,7 +530,7 @@ int set_cpu_dma_latency(int32_t latency)
*/
static const int find_mount(const char *fs, char *mp, int sizeof_mp)
{
- char mount_point[MAX_PATH];
+ char mount_point[MAX_PATH+1];
char type[100];
int found = 0;
FILE *fp;
diff --git a/tools/tracing/rtla/src/utils.h b/tools/tracing/rtla/src/utils.h
index 04ed1e650495..d44513e6c66a 100644
--- a/tools/tracing/rtla/src/utils.h
+++ b/tools/tracing/rtla/src/utils.h
@@ -9,6 +9,8 @@
*/
#define BUFF_U64_STR_SIZE 24
#define MAX_PATH 1024
+#define MAX_NICE 20
+#define MIN_NICE -19

#define container_of(ptr, type, member)({ \
const typeof(((type *)0)->member) *__mptr = (ptr); \
diff --git a/tools/verification/rv/Makefile b/tools/verification/rv/Makefile
index 3d0f3888a58c..485f8aeddbe0 100644
--- a/tools/verification/rv/Makefile
+++ b/tools/verification/rv/Makefile
@@ -28,10 +28,15 @@ FOPTS := -flto=auto -ffat-lto-objects -fexceptions -fstack-protector-strong \
-fasynchronous-unwind-tables -fstack-clash-protection
WOPTS := -Wall -Werror=format-security -Wp,-D_FORTIFY_SOURCE=2 -Wp,-D_GLIBCXX_ASSERTIONS -Wno-maybe-uninitialized

+ifeq ($(CC),clang)
+ FOPTS := $(filter-out -ffat-lto-objects, $(FOPTS))
+ WOPTS := $(filter-out -Wno-maybe-uninitialized, $(WOPTS))
+endif
+
TRACEFS_HEADERS := $$($(PKG_CONFIG) --cflags libtracefs)

CFLAGS := -O -g -DVERSION=\"$(VERSION)\" $(FOPTS) $(MOPTS) $(WOPTS) $(TRACEFS_HEADERS) $(EXTRA_CFLAGS) -I include
-LDFLAGS := -ggdb $(EXTRA_LDFLAGS)
+LDFLAGS := -flto=auto -ggdb $(EXTRA_LDFLAGS)
LIBS := $$($(PKG_CONFIG) --libs libtracefs)

SRC := $(wildcard src/*.c)
diff --git a/tools/verification/rv/src/in_kernel.c b/tools/verification/rv/src/in_kernel.c
index ad28582bcf2b..f04479ecc96c 100644
--- a/tools/verification/rv/src/in_kernel.c
+++ b/tools/verification/rv/src/in_kernel.c
@@ -210,9 +210,9 @@ static char *ikm_read_reactor(char *monitor_name)
static char *ikm_get_current_reactor(char *monitor_name)
{
char *reactors = ikm_read_reactor(monitor_name);
+ char *curr_reactor = NULL;
char *start;
char *end;
- char *curr_reactor;

if (!reactors)
return NULL;