Re: Linux 4.9.255

From: Greg Kroah-Hartman
Date: Wed Feb 03 2021 - 18:10:22 EST


diff --git a/Makefile b/Makefile
index ea9ea119460d..4780b5f80b2a 100644
--- a/Makefile
+++ b/Makefile
@@ -1,6 +1,6 @@
VERSION = 4
PATCHLEVEL = 9
-SUBLEVEL = 254
+SUBLEVEL = 255
EXTRAVERSION =
NAME = Roaring Lionus

diff --git a/arch/arm/mach-imx/suspend-imx6.S b/arch/arm/mach-imx/suspend-imx6.S
index 7d84b617af48..99d2e296082c 100644
--- a/arch/arm/mach-imx/suspend-imx6.S
+++ b/arch/arm/mach-imx/suspend-imx6.S
@@ -73,6 +73,7 @@
#define MX6Q_CCM_CCR 0x0

.align 3
+ .arm

.macro sync_l2_cache

diff --git a/arch/x86/kvm/pmu_intel.c b/arch/x86/kvm/pmu_intel.c
index 84ae4dd261ca..cafdaabf062f 100644
--- a/arch/x86/kvm/pmu_intel.c
+++ b/arch/x86/kvm/pmu_intel.c
@@ -29,7 +29,7 @@ static struct kvm_event_hw_type_mapping intel_arch_events[] = {
[4] = { 0x2e, 0x41, PERF_COUNT_HW_CACHE_MISSES },
[5] = { 0xc4, 0x00, PERF_COUNT_HW_BRANCH_INSTRUCTIONS },
[6] = { 0xc5, 0x00, PERF_COUNT_HW_BRANCH_MISSES },
- [7] = { 0x00, 0x30, PERF_COUNT_HW_REF_CPU_CYCLES },
+ [7] = { 0x00, 0x03, PERF_COUNT_HW_REF_CPU_CYCLES },
};

/* mapping between fixed pmc index and intel_arch_events array */
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 3c0f9be107e4..98fb3a724037 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -97,6 +97,7 @@ static u64 __read_mostly efer_reserved_bits = ~((u64)EFER_SCE);

static void update_cr8_intercept(struct kvm_vcpu *vcpu);
static void process_nmi(struct kvm_vcpu *vcpu);
+static void process_smi(struct kvm_vcpu *vcpu);
static void enter_smm(struct kvm_vcpu *vcpu);
static void __kvm_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags);

@@ -3199,6 +3200,10 @@ static void kvm_vcpu_ioctl_x86_get_vcpu_events(struct kvm_vcpu *vcpu,
struct kvm_vcpu_events *events)
{
process_nmi(vcpu);
+
+ if (kvm_check_request(KVM_REQ_SMI, vcpu))
+ process_smi(vcpu);
+
events->exception.injected =
vcpu->arch.exception.pending &&
!kvm_exception_is_soft(vcpu->arch.exception.nr);
diff --git a/drivers/acpi/device_sysfs.c b/drivers/acpi/device_sysfs.c
index 98b513d049f6..fb610ad495f1 100644
--- a/drivers/acpi/device_sysfs.c
+++ b/drivers/acpi/device_sysfs.c
@@ -259,20 +259,12 @@ int __acpi_device_uevent_modalias(struct acpi_device *adev,
if (add_uevent_var(env, "MODALIAS="))
return -ENOMEM;

- len = create_pnp_modalias(adev, &env->buf[env->buflen - 1],
- sizeof(env->buf) - env->buflen);
- if (len < 0)
- return len;
-
- env->buflen += len;
- if (!adev->data.of_compatible)
- return 0;
-
- if (len > 0 && add_uevent_var(env, "MODALIAS="))
- return -ENOMEM;
-
- len = create_of_modalias(adev, &env->buf[env->buflen - 1],
- sizeof(env->buf) - env->buflen);
+ if (adev->data.of_compatible)
+ len = create_of_modalias(adev, &env->buf[env->buflen - 1],
+ sizeof(env->buf) - env->buflen);
+ else
+ len = create_pnp_modalias(adev, &env->buf[env->buflen - 1],
+ sizeof(env->buf) - env->buflen);
if (len < 0)
return len;

diff --git a/drivers/infiniband/hw/cxgb4/qp.c b/drivers/infiniband/hw/cxgb4/qp.c
index bb45eb22ba1f..36bdb04f8f01 100644
--- a/drivers/infiniband/hw/cxgb4/qp.c
+++ b/drivers/infiniband/hw/cxgb4/qp.c
@@ -1976,7 +1976,7 @@ int c4iw_ib_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
init_attr->cap.max_send_wr = qhp->attr.sq_num_entries;
init_attr->cap.max_recv_wr = qhp->attr.rq_num_entries;
init_attr->cap.max_send_sge = qhp->attr.sq_max_sges;
- init_attr->cap.max_recv_sge = qhp->attr.sq_max_sges;
+ init_attr->cap.max_recv_sge = qhp->attr.rq_max_sges;
init_attr->cap.max_inline_data = T4_MAX_SEND_INLINE;
init_attr->sq_sig_type = qhp->sq_sig_all ? IB_SIGNAL_ALL_WR : 0;
return 0;
diff --git a/drivers/iommu/dmar.c b/drivers/iommu/dmar.c
index 977070ce4fe9..9ad5a7019abf 100644
--- a/drivers/iommu/dmar.c
+++ b/drivers/iommu/dmar.c
@@ -1024,8 +1024,8 @@ static int alloc_iommu(struct dmar_drhd_unit *drhd)
{
struct intel_iommu *iommu;
u32 ver, sts;
- int agaw = 0;
- int msagaw = 0;
+ int agaw = -1;
+ int msagaw = -1;
int err;

if (!drhd->reg_base_addr) {
@@ -1050,17 +1050,28 @@ static int alloc_iommu(struct dmar_drhd_unit *drhd)
}

err = -EINVAL;
- agaw = iommu_calculate_agaw(iommu);
- if (agaw < 0) {
- pr_err("Cannot get a valid agaw for iommu (seq_id = %d)\n",
- iommu->seq_id);
- goto err_unmap;
+ if (cap_sagaw(iommu->cap) == 0) {
+ pr_info("%s: No supported address widths. Not attempting DMA translation.\n",
+ iommu->name);
+ drhd->ignored = 1;
}
- msagaw = iommu_calculate_max_sagaw(iommu);
- if (msagaw < 0) {
- pr_err("Cannot get a valid max agaw for iommu (seq_id = %d)\n",
- iommu->seq_id);
- goto err_unmap;
+
+ if (!drhd->ignored) {
+ agaw = iommu_calculate_agaw(iommu);
+ if (agaw < 0) {
+ pr_err("Cannot get a valid agaw for iommu (seq_id = %d)\n",
+ iommu->seq_id);
+ drhd->ignored = 1;
+ }
+ }
+ if (!drhd->ignored) {
+ msagaw = iommu_calculate_max_sagaw(iommu);
+ if (msagaw < 0) {
+ pr_err("Cannot get a valid max agaw for iommu (seq_id = %d)\n",
+ iommu->seq_id);
+ drhd->ignored = 1;
+ agaw = -1;
+ }
}
iommu->agaw = agaw;
iommu->msagaw = msagaw;
@@ -1087,7 +1098,7 @@ static int alloc_iommu(struct dmar_drhd_unit *drhd)

raw_spin_lock_init(&iommu->register_lock);

- if (intel_iommu_enabled) {
+ if (intel_iommu_enabled && !drhd->ignored) {
iommu->iommu_dev = iommu_device_create(NULL, iommu,
intel_iommu_groups,
"%s", iommu->name);
@@ -1099,6 +1110,7 @@ static int alloc_iommu(struct dmar_drhd_unit *drhd)
}

drhd->iommu = iommu;
+ iommu->drhd = drhd;

return 0;

@@ -1113,7 +1125,8 @@ static int alloc_iommu(struct dmar_drhd_unit *drhd)

static void free_iommu(struct intel_iommu *iommu)
{
- iommu_device_destroy(iommu->iommu_dev);
+ if (intel_iommu_enabled && !iommu->drhd->ignored)
+ iommu_device_destroy(iommu->iommu_dev);

if (iommu->irq) {
if (iommu->pr_irq) {
diff --git a/drivers/leds/led-triggers.c b/drivers/leds/led-triggers.c
index 431123b048a2..573a5a80b23c 100644
--- a/drivers/leds/led-triggers.c
+++ b/drivers/leds/led-triggers.c
@@ -283,14 +283,15 @@ void led_trigger_event(struct led_trigger *trig,
enum led_brightness brightness)
{
struct led_classdev *led_cdev;
+ unsigned long flags;

if (!trig)
return;

- read_lock(&trig->leddev_list_lock);
+ read_lock_irqsave(&trig->leddev_list_lock, flags);
list_for_each_entry(led_cdev, &trig->led_cdevs, trig_list)
led_set_brightness(led_cdev, brightness);
- read_unlock(&trig->leddev_list_lock);
+ read_unlock_irqrestore(&trig->leddev_list_lock, flags);
}
EXPORT_SYMBOL_GPL(led_trigger_event);

@@ -301,11 +302,12 @@ static void led_trigger_blink_setup(struct led_trigger *trig,
int invert)
{
struct led_classdev *led_cdev;
+ unsigned long flags;

if (!trig)
return;

- read_lock(&trig->leddev_list_lock);
+ read_lock_irqsave(&trig->leddev_list_lock, flags);
list_for_each_entry(led_cdev, &trig->led_cdevs, trig_list) {
if (oneshot)
led_blink_set_oneshot(led_cdev, delay_on, delay_off,
@@ -313,7 +315,7 @@ static void led_trigger_blink_setup(struct led_trigger *trig,
else
led_blink_set(led_cdev, delay_on, delay_off);
}
- read_unlock(&trig->leddev_list_lock);
+ read_unlock_irqrestore(&trig->leddev_list_lock, flags);
}

void led_trigger_blink(struct led_trigger *trig,
diff --git a/drivers/net/can/dev.c b/drivers/net/can/dev.c
index 164078609f98..ea38b67d0b73 100644
--- a/drivers/net/can/dev.c
+++ b/drivers/net/can/dev.c
@@ -1017,7 +1017,7 @@ static int can_fill_info(struct sk_buff *skb, const struct net_device *dev)
{
struct can_priv *priv = netdev_priv(dev);
struct can_ctrlmode cm = {.flags = priv->ctrlmode};
- struct can_berr_counter bec;
+ struct can_berr_counter bec = { };
enum can_state state = priv->state;

if (priv->do_get_state)
diff --git a/drivers/net/usb/qmi_wwan.c b/drivers/net/usb/qmi_wwan.c
index a571024882d7..1c0aec70ee5d 100644
--- a/drivers/net/usb/qmi_wwan.c
+++ b/drivers/net/usb/qmi_wwan.c
@@ -942,6 +942,7 @@ static const struct usb_device_id products[] = {
{QMI_FIXED_INTF(0x0b3c, 0xc00a, 6)}, /* Olivetti Olicard 160 */
{QMI_FIXED_INTF(0x0b3c, 0xc00b, 4)}, /* Olivetti Olicard 500 */
{QMI_FIXED_INTF(0x1e2d, 0x0060, 4)}, /* Cinterion PLxx */
+ {QMI_QUIRK_SET_DTR(0x1e2d, 0x006f, 8)}, /* Cinterion PLS83/PLS63 */
{QMI_FIXED_INTF(0x1e2d, 0x0053, 4)}, /* Cinterion PHxx,PXxx */
{QMI_FIXED_INTF(0x1e2d, 0x0063, 10)}, /* Cinterion ALASxx (1 RmNet) */
{QMI_FIXED_INTF(0x1e2d, 0x0082, 4)}, /* Cinterion PHxx,PXxx (2 RmNet) */
diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/trans.c b/drivers/net/wireless/intel/iwlwifi/pcie/trans.c
index e1287c342116..71edbf7a42ed 100644
--- a/drivers/net/wireless/intel/iwlwifi/pcie/trans.c
+++ b/drivers/net/wireless/intel/iwlwifi/pcie/trans.c
@@ -1909,7 +1909,8 @@ static int iwl_trans_pcie_read_mem(struct iwl_trans *trans, u32 addr,

while (offs < dwords) {
/* limit the time we spin here under lock to 1/2s */
- ktime_t timeout = ktime_add_us(ktime_get(), 500 * USEC_PER_MSEC);
+ unsigned long end = jiffies + HZ / 2;
+ bool resched = false;

if (iwl_trans_grab_nic_access(trans, &flags)) {
iwl_write32(trans, HBUS_TARG_MEM_RADDR,
@@ -1920,14 +1921,15 @@ static int iwl_trans_pcie_read_mem(struct iwl_trans *trans, u32 addr,
HBUS_TARG_MEM_RDAT);
offs++;

- /* calling ktime_get is expensive so
- * do it once in 128 reads
- */
- if (offs % 128 == 0 && ktime_after(ktime_get(),
- timeout))
+ if (time_after(jiffies, end)) {
+ resched = true;
break;
+ }
}
iwl_trans_release_nic_access(trans, &flags);
+
+ if (resched)
+ cond_resched();
} else {
return -EBUSY;
}
diff --git a/drivers/net/wireless/mediatek/mt7601u/dma.c b/drivers/net/wireless/mediatek/mt7601u/dma.c
index 56cad16e70ca..1b68aef03fe2 100644
--- a/drivers/net/wireless/mediatek/mt7601u/dma.c
+++ b/drivers/net/wireless/mediatek/mt7601u/dma.c
@@ -160,8 +160,7 @@ mt7601u_rx_process_entry(struct mt7601u_dev *dev, struct mt7601u_dma_buf_rx *e)

if (new_p) {
/* we have one extra ref from the allocator */
- __free_pages(e->p, MT_RX_ORDER);
-
+ put_page(e->p);
e->p = new_p;
}
}
@@ -318,7 +317,6 @@ static int mt7601u_dma_submit_tx(struct mt7601u_dev *dev,
}

e = &q->e[q->end];
- e->skb = skb;
usb_fill_bulk_urb(e->urb, usb_dev, snd_pipe, skb->data, skb->len,
mt7601u_complete_tx, q);
ret = usb_submit_urb(e->urb, GFP_ATOMIC);
@@ -336,6 +334,7 @@ static int mt7601u_dma_submit_tx(struct mt7601u_dev *dev,

q->end = (q->end + 1) % q->entries;
q->used++;
+ e->skb = skb;

if (q->used >= q->entries)
ieee80211_stop_queue(dev->hw, skb_get_queue_mapping(skb));
diff --git a/fs/exec.c b/fs/exec.c
index cd5da140f94c..319a1f5732fa 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -1021,7 +1021,7 @@ static int exec_mmap(struct mm_struct *mm)
/* Notify parent that we're no longer interested in the old VM */
tsk = current;
old_mm = current->mm;
- mm_release(tsk, old_mm);
+ exec_mm_release(tsk, old_mm);

if (old_mm) {
sync_mm_rss(old_mm);
diff --git a/include/linux/compat.h b/include/linux/compat.h
index fab35daf8759..6b9d38a7adca 100644
--- a/include/linux/compat.h
+++ b/include/linux/compat.h
@@ -311,8 +311,6 @@ struct compat_kexec_segment;
struct compat_mq_attr;
struct compat_msgbuf;

-extern void compat_exit_robust_list(struct task_struct *curr);
-
asmlinkage long
compat_sys_set_robust_list(struct compat_robust_list_head __user *head,
compat_size_t len);
diff --git a/include/linux/futex.h b/include/linux/futex.h
index c015fa91e7cc..0f294ae63c78 100644
--- a/include/linux/futex.h
+++ b/include/linux/futex.h
@@ -1,6 +1,8 @@
#ifndef _LINUX_FUTEX_H
#define _LINUX_FUTEX_H

+#include <linux/sched.h>
+
#include <uapi/linux/futex.h>

struct inode;
@@ -11,9 +13,6 @@ union ktime;
long do_futex(u32 __user *uaddr, int op, u32 val, union ktime *timeout,
u32 __user *uaddr2, u32 val2, u32 val3);

-extern int
-handle_futex_death(u32 __user *uaddr, struct task_struct *curr, int pi);
-
/*
* Futexes are matched on equal values of this key.
* The key type depends on whether it's a shared or private mapping.
@@ -56,19 +55,34 @@ union futex_key {
#define FUTEX_KEY_INIT (union futex_key) { .both = { .ptr = 0ULL } }

#ifdef CONFIG_FUTEX
-extern void exit_robust_list(struct task_struct *curr);
-extern void exit_pi_state_list(struct task_struct *curr);
-#ifdef CONFIG_HAVE_FUTEX_CMPXCHG
-#define futex_cmpxchg_enabled 1
-#else
-extern int futex_cmpxchg_enabled;
-#endif
-#else
-static inline void exit_robust_list(struct task_struct *curr)
-{
-}
-static inline void exit_pi_state_list(struct task_struct *curr)
+enum {
+ FUTEX_STATE_OK,
+ FUTEX_STATE_EXITING,
+ FUTEX_STATE_DEAD,
+};
+
+static inline void futex_init_task(struct task_struct *tsk)
{
+ tsk->robust_list = NULL;
+#ifdef CONFIG_COMPAT
+ tsk->compat_robust_list = NULL;
+#endif
+ INIT_LIST_HEAD(&tsk->pi_state_list);
+ tsk->pi_state_cache = NULL;
+ tsk->futex_state = FUTEX_STATE_OK;
+ mutex_init(&tsk->futex_exit_mutex);
}
+
+void futex_exit_recursive(struct task_struct *tsk);
+void futex_exit_release(struct task_struct *tsk);
+void futex_exec_release(struct task_struct *tsk);
+
+long do_futex(u32 __user *uaddr, int op, u32 val, ktime_t *timeout,
+ u32 __user *uaddr2, u32 val2, u32 val3);
+#else
+static inline void futex_init_task(struct task_struct *tsk) { }
+static inline void futex_exit_recursive(struct task_struct *tsk) { }
+static inline void futex_exit_release(struct task_struct *tsk) { }
+static inline void futex_exec_release(struct task_struct *tsk) { }
#endif
#endif
diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h
index d86ac620f0aa..188bd1768971 100644
--- a/include/linux/intel-iommu.h
+++ b/include/linux/intel-iommu.h
@@ -447,6 +447,8 @@ struct intel_iommu {
struct device *iommu_dev; /* IOMMU-sysfs device */
int node;
u32 flags; /* Software defined flags */
+
+ struct dmar_drhd_unit *drhd;
};

static inline void __iommu_flush_cache(
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 1872d4e9acbe..f094882822a6 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1815,6 +1815,8 @@ struct task_struct {
#endif
struct list_head pi_state_list;
struct futex_pi_state *pi_state_cache;
+ struct mutex futex_exit_mutex;
+ unsigned int futex_state;
#endif
#ifdef CONFIG_PERF_EVENTS
struct perf_event_context *perf_event_ctxp[perf_nr_task_contexts];
@@ -2276,7 +2278,6 @@ extern void thread_group_cputime_adjusted(struct task_struct *p, cputime_t *ut,
* Per process flags
*/
#define PF_EXITING 0x00000004 /* getting shut down */
-#define PF_EXITPIDONE 0x00000008 /* pi exit done on shut down */
#define PF_VCPU 0x00000010 /* I'm a virtual CPU */
#define PF_WQ_WORKER 0x00000020 /* I'm a workqueue worker */
#define PF_FORKNOEXEC 0x00000040 /* forked but didn't exec */
@@ -2955,8 +2956,10 @@ extern struct mm_struct *get_task_mm(struct task_struct *task);
* succeeds.
*/
extern struct mm_struct *mm_access(struct task_struct *task, unsigned int mode);
-/* Remove the current tasks stale references to the old mm_struct */
-extern void mm_release(struct task_struct *, struct mm_struct *);
+/* Remove the current tasks stale references to the old mm_struct on exit() */
+extern void exit_mm_release(struct task_struct *, struct mm_struct *);
+/* Remove the current tasks stale references to the old mm_struct on exec() */
+extern void exec_mm_release(struct task_struct *, struct mm_struct *);

#ifdef CONFIG_HAVE_COPY_THREAD_TLS
extern int copy_thread_tls(unsigned long, unsigned long, unsigned long,
diff --git a/kernel/Makefile b/kernel/Makefile
index 184fa9aa5802..92488cf6ad91 100644
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -47,9 +47,6 @@ obj-$(CONFIG_PROFILING) += profile.o
obj-$(CONFIG_STACKTRACE) += stacktrace.o
obj-y += time/
obj-$(CONFIG_FUTEX) += futex.o
-ifeq ($(CONFIG_COMPAT),y)
-obj-$(CONFIG_FUTEX) += futex_compat.o
-endif
obj-$(CONFIG_GENERIC_ISA_DMA) += dma.o
obj-$(CONFIG_SMP) += smp.o
ifneq ($(CONFIG_SMP),y)
diff --git a/kernel/exit.c b/kernel/exit.c
index f9943ef23fa8..8716f0780fe3 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -464,7 +464,7 @@ static void exit_mm(struct task_struct *tsk)
struct mm_struct *mm = tsk->mm;
struct core_state *core_state;

- mm_release(tsk, mm);
+ exit_mm_release(tsk, mm);
if (!mm)
return;
sync_mm_rss(mm);
@@ -785,31 +785,12 @@ void __noreturn do_exit(long code)
*/
if (unlikely(tsk->flags & PF_EXITING)) {
pr_alert("Fixing recursive fault but reboot is needed!\n");
- /*
- * We can do this unlocked here. The futex code uses
- * this flag just to verify whether the pi state
- * cleanup has been done or not. In the worst case it
- * loops once more. We pretend that the cleanup was
- * done as there is no way to return. Either the
- * OWNER_DIED bit is set by now or we push the blocked
- * task into the wait for ever nirwana as well.
- */
- tsk->flags |= PF_EXITPIDONE;
+ futex_exit_recursive(tsk);
set_current_state(TASK_UNINTERRUPTIBLE);
schedule();
}

exit_signals(tsk); /* sets PF_EXITING */
- /*
- * Ensure that all new tsk->pi_lock acquisitions must observe
- * PF_EXITING. Serializes against futex.c:attach_to_pi_owner().
- */
- smp_mb();
- /*
- * Ensure that we must observe the pi_state in exit_mm() ->
- * mm_release() -> exit_pi_state_list().
- */
- raw_spin_unlock_wait(&tsk->pi_lock);

/* sync mm's RSS info before statistics gathering */
if (tsk->mm)
@@ -876,12 +857,6 @@ void __noreturn do_exit(long code)
* Make sure we are holding no locks:
*/
debug_check_no_locks_held();
- /*
- * We can do this unlocked here. The futex code uses this flag
- * just to verify whether the pi state cleanup has been done
- * or not. In the worst case it loops once more.
- */
- tsk->flags |= PF_EXITPIDONE;

if (tsk->io_context)
exit_io_context(tsk);
diff --git a/kernel/fork.c b/kernel/fork.c
index b64efec4a6e6..91349fd3e162 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -1082,24 +1082,8 @@ static int wait_for_vfork_done(struct task_struct *child,
* restoring the old one. . .
* Eric Biederman 10 January 1998
*/
-void mm_release(struct task_struct *tsk, struct mm_struct *mm)
+static void mm_release(struct task_struct *tsk, struct mm_struct *mm)
{
- /* Get rid of any futexes when releasing the mm */
-#ifdef CONFIG_FUTEX
- if (unlikely(tsk->robust_list)) {
- exit_robust_list(tsk);
- tsk->robust_list = NULL;
- }
-#ifdef CONFIG_COMPAT
- if (unlikely(tsk->compat_robust_list)) {
- compat_exit_robust_list(tsk);
- tsk->compat_robust_list = NULL;
- }
-#endif
- if (unlikely(!list_empty(&tsk->pi_state_list)))
- exit_pi_state_list(tsk);
-#endif
-
uprobe_free_utask(tsk);

/* Get rid of any cached register state */
@@ -1132,6 +1116,18 @@ void mm_release(struct task_struct *tsk, struct mm_struct *mm)
complete_vfork_done(tsk);
}

+void exit_mm_release(struct task_struct *tsk, struct mm_struct *mm)
+{
+ futex_exit_release(tsk);
+ mm_release(tsk, mm);
+}
+
+void exec_mm_release(struct task_struct *tsk, struct mm_struct *mm)
+{
+ futex_exec_release(tsk);
+ mm_release(tsk, mm);
+}
+
/*
* Allocate a new mm structure and copy contents from the
* mm structure of the passed in task structure.
@@ -1706,14 +1702,8 @@ static __latent_entropy struct task_struct *copy_process(
#ifdef CONFIG_BLOCK
p->plug = NULL;
#endif
-#ifdef CONFIG_FUTEX
- p->robust_list = NULL;
-#ifdef CONFIG_COMPAT
- p->compat_robust_list = NULL;
-#endif
- INIT_LIST_HEAD(&p->pi_state_list);
- p->pi_state_cache = NULL;
-#endif
+ futex_init_task(p);
+
/*
* sigaltstack should be cleared when sharing the same VM
*/
diff --git a/kernel/futex.c b/kernel/futex.c
index 7123d9cab456..2ef8c5aef35d 100644
--- a/kernel/futex.c
+++ b/kernel/futex.c
@@ -44,6 +44,7 @@
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
+#include <linux/compat.h>
#include <linux/slab.h>
#include <linux/poll.h>
#include <linux/fs.h>
@@ -171,8 +172,10 @@
* double_lock_hb() and double_unlock_hb(), respectively.
*/

-#ifndef CONFIG_HAVE_FUTEX_CMPXCHG
-int __read_mostly futex_cmpxchg_enabled;
+#ifdef CONFIG_HAVE_FUTEX_CMPXCHG
+#define futex_cmpxchg_enabled 1
+#else
+static int __read_mostly futex_cmpxchg_enabled;
#endif

/*
@@ -336,6 +339,12 @@ static inline bool should_fail_futex(bool fshared)
}
#endif /* CONFIG_FAIL_FUTEX */

+#ifdef CONFIG_COMPAT
+static void compat_exit_robust_list(struct task_struct *curr);
+#else
+static inline void compat_exit_robust_list(struct task_struct *curr) { }
+#endif
+
static inline void futex_get_mm(union futex_key *key)
{
atomic_inc(&key->private.mm->mm_count);
@@ -891,7 +900,7 @@ static struct task_struct * futex_find_get_task(pid_t pid)
* Kernel cleans up PI-state, but userspace is likely hosed.
* (Robust-futex cleanup is separate and might save the day for userspace.)
*/
-void exit_pi_state_list(struct task_struct *curr)
+static void exit_pi_state_list(struct task_struct *curr)
{
struct list_head *next, *head = &curr->pi_state_list;
struct futex_pi_state *pi_state;
@@ -1063,12 +1072,43 @@ static int attach_to_pi_state(u32 uval, struct futex_pi_state *pi_state,
return 0;
}

+/**
+ * wait_for_owner_exiting - Block until the owner has exited
+ * @exiting: Pointer to the exiting task
+ *
+ * Caller must hold a refcount on @exiting.
+ */
+static void wait_for_owner_exiting(int ret, struct task_struct *exiting)
+{
+ if (ret != -EBUSY) {
+ WARN_ON_ONCE(exiting);
+ return;
+ }
+
+ if (WARN_ON_ONCE(ret == -EBUSY && !exiting))
+ return;
+
+ mutex_lock(&exiting->futex_exit_mutex);
+ /*
+ * No point in doing state checking here. If the waiter got here
+ * while the task was in exec()->exec_futex_release() then it can
+ * have any FUTEX_STATE_* value when the waiter has acquired the
+ * mutex. OK, if running, EXITING or DEAD if it reached exit()
+ * already. Highly unlikely and not a problem. Just one more round
+ * through the futex maze.
+ */
+ mutex_unlock(&exiting->futex_exit_mutex);
+
+ put_task_struct(exiting);
+}
+
/*
* Lookup the task for the TID provided from user space and attach to
* it after doing proper sanity checks.
*/
static int attach_to_pi_owner(u32 uval, union futex_key *key,
- struct futex_pi_state **ps)
+ struct futex_pi_state **ps,
+ struct task_struct **exiting)
{
pid_t pid = uval & FUTEX_TID_MASK;
struct futex_pi_state *pi_state;
@@ -1090,22 +1130,33 @@ static int attach_to_pi_owner(u32 uval, union futex_key *key,
}

/*
- * We need to look at the task state flags to figure out,
- * whether the task is exiting. To protect against the do_exit
- * change of the task flags, we do this protected by
- * p->pi_lock:
+ * We need to look at the task state to figure out, whether the
+ * task is exiting. To protect against the change of the task state
+ * in futex_exit_release(), we do this protected by p->pi_lock:
*/
raw_spin_lock_irq(&p->pi_lock);
- if (unlikely(p->flags & PF_EXITING)) {
+ if (unlikely(p->futex_state != FUTEX_STATE_OK)) {
/*
- * The task is on the way out. When PF_EXITPIDONE is
- * set, we know that the task has finished the
- * cleanup:
+ * The task is on the way out. When the futex state is
+ * FUTEX_STATE_DEAD, we know that the task has finished
+ * the cleanup:
*/
- int ret = (p->flags & PF_EXITPIDONE) ? -ESRCH : -EAGAIN;
+ int ret = (p->futex_state = FUTEX_STATE_DEAD) ? -ESRCH : -EAGAIN;

raw_spin_unlock_irq(&p->pi_lock);
- put_task_struct(p);
+ /*
+ * If the owner task is between FUTEX_STATE_EXITING and
+ * FUTEX_STATE_DEAD then store the task pointer and keep
+ * the reference on the task struct. The calling code will
+ * drop all locks, wait for the task to reach
+ * FUTEX_STATE_DEAD and then drop the refcount. This is
+ * required to prevent a live lock when the current task
+ * preempted the exiting task between the two states.
+ */
+ if (ret == -EBUSY)
+ *exiting = p;
+ else
+ put_task_struct(p);
return ret;
}

@@ -1136,7 +1187,8 @@ static int attach_to_pi_owner(u32 uval, union futex_key *key,
}

static int lookup_pi_state(u32 uval, struct futex_hash_bucket *hb,
- union futex_key *key, struct futex_pi_state **ps)
+ union futex_key *key, struct futex_pi_state **ps,
+ struct task_struct **exiting)
{
struct futex_q *match = futex_top_waiter(hb, key);

@@ -1151,7 +1203,7 @@ static int lookup_pi_state(u32 uval, struct futex_hash_bucket *hb,
* We are the first waiter - try to look up the owner based on
* @uval and attach to it.
*/
- return attach_to_pi_owner(uval, key, ps);
+ return attach_to_pi_owner(uval, key, ps, exiting);
}

static int lock_pi_update_atomic(u32 __user *uaddr, u32 uval, u32 newval)
@@ -1177,6 +1229,8 @@ static int lock_pi_update_atomic(u32 __user *uaddr, u32 uval, u32 newval)
* lookup
* @task: the task to perform the atomic lock work for. This will
* be "current" except in the case of requeue pi.
+ * @exiting: Pointer to store the task pointer of the owner task
+ * which is in the middle of exiting
* @set_waiters: force setting the FUTEX_WAITERS bit (1) or not (0)
*
* Return:
@@ -1185,11 +1239,17 @@ static int lock_pi_update_atomic(u32 __user *uaddr, u32 uval, u32 newval)
* <0 - error
*
* The hb->lock and futex_key refs shall be held by the caller.
+ *
+ * @exiting is only set when the return value is -EBUSY. If so, this holds
+ * a refcount on the exiting task on return and the caller needs to drop it
+ * after waiting for the exit to complete.
*/
static int futex_lock_pi_atomic(u32 __user *uaddr, struct futex_hash_bucket *hb,
union futex_key *key,
struct futex_pi_state **ps,
- struct task_struct *task, int set_waiters)
+ struct task_struct *task,
+ struct task_struct **exiting,
+ int set_waiters)
{
u32 uval, newval, vpid = task_pid_vnr(task);
struct futex_q *match;
@@ -1259,7 +1319,7 @@ static int futex_lock_pi_atomic(u32 __user *uaddr, struct futex_hash_bucket *hb,
* attach to the owner. If that fails, no harm done, we only
* set the FUTEX_WAITERS bit in the user space variable.
*/
- return attach_to_pi_owner(uval, key, ps);
+ return attach_to_pi_owner(uval, key, ps, exiting);
}

/**
@@ -1685,6 +1745,8 @@ void requeue_pi_wake_futex(struct futex_q *q, union futex_key *key,
* @key1: the from futex key
* @key2: the to futex key
* @ps: address to store the pi_state pointer
+ * @exiting: Pointer to store the task pointer of the owner task
+ * which is in the middle of exiting
* @set_waiters: force setting the FUTEX_WAITERS bit (1) or not (0)
*
* Try and get the lock on behalf of the top waiter if we can do it atomically.
@@ -1692,16 +1754,20 @@ void requeue_pi_wake_futex(struct futex_q *q, union futex_key *key,
* then direct futex_lock_pi_atomic() to force setting the FUTEX_WAITERS bit.
* hb1 and hb2 must be held by the caller.
*
+ * @exiting is only set when the return value is -EBUSY. If so, this holds
+ * a refcount on the exiting task on return and the caller needs to drop it
+ * after waiting for the exit to complete.
+ *
* Return:
* 0 - failed to acquire the lock atomically;
* >0 - acquired the lock, return value is vpid of the top_waiter
* <0 - error
*/
-static int futex_proxy_trylock_atomic(u32 __user *pifutex,
- struct futex_hash_bucket *hb1,
- struct futex_hash_bucket *hb2,
- union futex_key *key1, union futex_key *key2,
- struct futex_pi_state **ps, int set_waiters)
+static int
+futex_proxy_trylock_atomic(u32 __user *pifutex, struct futex_hash_bucket *hb1,
+ struct futex_hash_bucket *hb2, union futex_key *key1,
+ union futex_key *key2, struct futex_pi_state **ps,
+ struct task_struct **exiting, int set_waiters)
{
struct futex_q *top_waiter = NULL;
u32 curval;
@@ -1738,7 +1804,7 @@ static int futex_proxy_trylock_atomic(u32 __user *pifutex,
*/
vpid = task_pid_vnr(top_waiter->task);
ret = futex_lock_pi_atomic(pifutex, hb2, key2, ps, top_waiter->task,
- set_waiters);
+ exiting, set_waiters);
if (ret == 1) {
requeue_pi_wake_futex(top_waiter, key2, hb2);
return vpid;
@@ -1858,6 +1924,8 @@ static int futex_requeue(u32 __user *uaddr1, unsigned int flags,
}

if (requeue_pi && (task_count - nr_wake < nr_requeue)) {
+ struct task_struct *exiting = NULL;
+
/*
* Attempt to acquire uaddr2 and wake the top waiter. If we
* intend to requeue waiters, force setting the FUTEX_WAITERS
@@ -1865,7 +1933,8 @@ static int futex_requeue(u32 __user *uaddr1, unsigned int flags,
* faults rather in the requeue loop below.
*/
ret = futex_proxy_trylock_atomic(uaddr2, hb1, hb2, &key1,
- &key2, &pi_state, nr_requeue);
+ &key2, &pi_state,
+ &exiting, nr_requeue);

/*
* At this point the top_waiter has either taken uaddr2 or is
@@ -1892,7 +1961,8 @@ static int futex_requeue(u32 __user *uaddr1, unsigned int flags,
* If that call succeeds then we have pi_state and an
* initial refcount on it.
*/
- ret = lookup_pi_state(ret, hb2, &key2, &pi_state);
+ ret = lookup_pi_state(ret, hb2, &key2,
+ &pi_state, &exiting);
}

switch (ret) {
@@ -1910,17 +1980,24 @@ static int futex_requeue(u32 __user *uaddr1, unsigned int flags,
if (!ret)
goto retry;
goto out;
+ case -EBUSY:
case -EAGAIN:
/*
* Two reasons for this:
- * - Owner is exiting and we just wait for the
+ * - EBUSY: Owner is exiting and we just wait for the
* exit to complete.
- * - The user space value changed.
+ * - EAGAIN: The user space value changed.
*/
double_unlock_hb(hb1, hb2);
hb_waiters_dec(hb2);
put_futex_key(&key2);
put_futex_key(&key1);
+ /*
+ * Handle the case where the owner is in the middle of
+ * exiting. Wait for the exit to complete otherwise
+ * this task might loop forever, aka. live lock.
+ */
+ wait_for_owner_exiting(ret, exiting);
cond_resched();
goto retry;
default:
@@ -2571,6 +2648,7 @@ static int futex_lock_pi(u32 __user *uaddr, unsigned int flags,
ktime_t *time, int trylock)
{
struct hrtimer_sleeper timeout, *to = NULL;
+ struct task_struct *exiting = NULL;
struct futex_hash_bucket *hb;
struct futex_q q = futex_q_init;
int res, ret;
@@ -2594,7 +2672,8 @@ static int futex_lock_pi(u32 __user *uaddr, unsigned int flags,
retry_private:
hb = queue_lock(&q);

- ret = futex_lock_pi_atomic(uaddr, hb, &q.key, &q.pi_state, current, 0);
+ ret = futex_lock_pi_atomic(uaddr, hb, &q.key, &q.pi_state, current,
+ &exiting, 0);
if (unlikely(ret)) {
/*
* Atomic work succeeded and we got the lock,
@@ -2607,15 +2686,22 @@ static int futex_lock_pi(u32 __user *uaddr, unsigned int flags,
goto out_unlock_put_key;
case -EFAULT:
goto uaddr_faulted;
+ case -EBUSY:
case -EAGAIN:
/*
* Two reasons for this:
- * - Task is exiting and we just wait for the
+ * - EBUSY: Task is exiting and we just wait for the
* exit to complete.
- * - The user space value changed.
+ * - EAGAIN: The user space value changed.
*/
queue_unlock(hb);
put_futex_key(&q.key);
+ /*
+ * Handle the case where the owner is in the middle of
+ * exiting. Wait for the exit to complete otherwise
+ * this task might loop forever, aka. live lock.
+ */
+ wait_for_owner_exiting(ret, exiting);
cond_resched();
goto retry;
default:
@@ -3123,7 +3209,7 @@ SYSCALL_DEFINE3(get_robust_list, int, pid,
* Process a futex-list entry, check whether it's owned by the
* dying task, and do notification if so:
*/
-int handle_futex_death(u32 __user *uaddr, struct task_struct *curr, int pi)
+static int handle_futex_death(u32 __user *uaddr, struct task_struct *curr, int pi)
{
u32 uval, uninitialized_var(nval), mval;

@@ -3198,7 +3284,7 @@ static inline int fetch_robust_entry(struct robust_list __user **entry,
*
* We silently return on any sign of list-walking problem.
*/
-void exit_robust_list(struct task_struct *curr)
+static void exit_robust_list(struct task_struct *curr)
{
struct robust_list_head __user *head = curr->robust_list;
struct robust_list __user *entry, *next_entry, *pending;
@@ -3261,6 +3347,114 @@ void exit_robust_list(struct task_struct *curr)
curr, pip);
}

+static void futex_cleanup(struct task_struct *tsk)
+{
+ if (unlikely(tsk->robust_list)) {
+ exit_robust_list(tsk);
+ tsk->robust_list = NULL;
+ }
+
+#ifdef CONFIG_COMPAT
+ if (unlikely(tsk->compat_robust_list)) {
+ compat_exit_robust_list(tsk);
+ tsk->compat_robust_list = NULL;
+ }
+#endif
+
+ if (unlikely(!list_empty(&tsk->pi_state_list)))
+ exit_pi_state_list(tsk);
+}
+
+/**
+ * futex_exit_recursive - Set the tasks futex state to FUTEX_STATE_DEAD
+ * @tsk: task to set the state on
+ *
+ * Set the futex exit state of the task lockless. The futex waiter code
+ * observes that state when a task is exiting and loops until the task has
+ * actually finished the futex cleanup. The worst case for this is that the
+ * waiter runs through the wait loop until the state becomes visible.
+ *
+ * This is called from the recursive fault handling path in do_exit().
+ *
+ * This is best effort. Either the futex exit code has run already or
+ * not. If the OWNER_DIED bit has been set on the futex then the waiter can
+ * take it over. If not, the problem is pushed back to user space. If the
+ * futex exit code did not run yet, then an already queued waiter might
+ * block forever, but there is nothing which can be done about that.
+ */
+void futex_exit_recursive(struct task_struct *tsk)
+{
+ /* If the state is FUTEX_STATE_EXITING then futex_exit_mutex is held */
+ if (tsk->futex_state == FUTEX_STATE_EXITING)
+ mutex_unlock(&tsk->futex_exit_mutex);
+ tsk->futex_state = FUTEX_STATE_DEAD;
+}
+
+static void futex_cleanup_begin(struct task_struct *tsk)
+{
+ /*
+ * Prevent various race issues against a concurrent incoming waiter
+ * including live locks by forcing the waiter to block on
+ * tsk->futex_exit_mutex when it observes FUTEX_STATE_EXITING in
+ * attach_to_pi_owner().
+ */
+ mutex_lock(&tsk->futex_exit_mutex);
+
+ /*
+ * Switch the state to FUTEX_STATE_EXITING under tsk->pi_lock.
+ *
+ * This ensures that all subsequent checks of tsk->futex_state in
+ * attach_to_pi_owner() must observe FUTEX_STATE_EXITING with
+ * tsk->pi_lock held.
+ *
+ * It guarantees also that a pi_state which was queued right before
+ * the state change under tsk->pi_lock by a concurrent waiter must
+ * be observed in exit_pi_state_list().
+ */
+ raw_spin_lock_irq(&tsk->pi_lock);
+ tsk->futex_state = FUTEX_STATE_EXITING;
+ raw_spin_unlock_irq(&tsk->pi_lock);
+}
+
+static void futex_cleanup_end(struct task_struct *tsk, int state)
+{
+ /*
+ * Lockless store. The only side effect is that an observer might
+ * take another loop until it becomes visible.
+ */
+ tsk->futex_state = state;
+ /*
+ * Drop the exit protection. This unblocks waiters which observed
+ * FUTEX_STATE_EXITING to reevaluate the state.
+ */
+ mutex_unlock(&tsk->futex_exit_mutex);
+}
+
+void futex_exec_release(struct task_struct *tsk)
+{
+ /*
+ * The state handling is done for consistency, but in the case of
+ * exec() there is no way to prevent futher damage as the PID stays
+ * the same. But for the unlikely and arguably buggy case that a
+ * futex is held on exec(), this provides at least as much state
+ * consistency protection which is possible.
+ */
+ futex_cleanup_begin(tsk);
+ futex_cleanup(tsk);
+ /*
+ * Reset the state to FUTEX_STATE_OK. The task is alive and about
+ * exec a new binary.
+ */
+ futex_cleanup_end(tsk, FUTEX_STATE_OK);
+}
+
+void futex_exit_release(struct task_struct *tsk)
+{
+ futex_cleanup_begin(tsk);
+ futex_cleanup(tsk);
+ futex_cleanup_end(tsk, FUTEX_STATE_DEAD);
+}
+
long do_futex(u32 __user *uaddr, int op, u32 val, ktime_t *timeout,
u32 __user *uaddr2, u32 val2, u32 val3)
{
@@ -3354,6 +3548,192 @@ SYSCALL_DEFINE6(futex, u32 __user *, uaddr, int, op, u32, val,
return do_futex(uaddr, op, val, tp, uaddr2, val2, val3);
}

+#ifdef CONFIG_COMPAT
+/*
+ * Fetch a robust-list pointer. Bit 0 signals PI futexes:
+ */
+static inline int
+compat_fetch_robust_entry(compat_uptr_t *uentry, struct robust_list __user **entry,
+ compat_uptr_t __user *head, unsigned int *pi)
+{
+ if (get_user(*uentry, head))
+ return -EFAULT;
+
+ *entry = compat_ptr((*uentry) & ~1);
+ *pi = (unsigned int)(*uentry) & 1;
+
+ return 0;
+}
+
+static void __user *futex_uaddr(struct robust_list __user *entry,
+ compat_long_t futex_offset)
+{
+ compat_uptr_t base = ptr_to_compat(entry);
+ void __user *uaddr = compat_ptr(base + futex_offset);
+
+ return uaddr;
+}
+
+/*
+ * Walk curr->robust_list (very carefully, it's a userspace list!)
+ * and mark any locks found there dead, and notify any waiters.
+ *
+ * We silently return on any sign of list-walking problem.
+ */
+void compat_exit_robust_list(struct task_struct *curr)
+{
+ struct compat_robust_list_head __user *head = curr->compat_robust_list;
+ struct robust_list __user *entry, *next_entry, *pending;
+ unsigned int limit = ROBUST_LIST_LIMIT, pi, pip;
+ unsigned int uninitialized_var(next_pi);
+ compat_uptr_t uentry, next_uentry, upending;
+ compat_long_t futex_offset;
+ int rc;
+
+ if (!futex_cmpxchg_enabled)
+ return;
+
+ /*
+ * Fetch the list head (which was registered earlier, via
+ * sys_set_robust_list()):
+ */
+ if (compat_fetch_robust_entry(&uentry, &entry, &head->list.next, &pi))
+ return;
+ /*
+ * Fetch the relative futex offset:
+ */
+ if (get_user(futex_offset, &head->futex_offset))
+ return;
+ /*
+ * Fetch any possibly pending lock-add first, and handle it
+ * if it exists:
+ */
+ if (compat_fetch_robust_entry(&upending, &pending,
+ &head->list_op_pending, &pip))
+ return;
+
+ next_entry = NULL; /* avoid warning with gcc */
+ while (entry != (struct robust_list __user *) &head->list) {
+ /*
+ * Fetch the next entry in the list before calling
+ * handle_futex_death:
+ */
+ rc = compat_fetch_robust_entry(&next_uentry, &next_entry,
+ (compat_uptr_t __user *)&entry->next, &next_pi);
+ /*
+ * A pending lock might already be on the list, so
+ * dont process it twice:
+ */
+ if (entry != pending) {
+ void __user *uaddr = futex_uaddr(entry, futex_offset);
+
+ if (handle_futex_death(uaddr, curr, pi))
+ return;
+ }
+ if (rc)
+ return;
+ uentry = next_uentry;
+ entry = next_entry;
+ pi = next_pi;
+ /*
+ * Avoid excessively long or circular lists:
+ */
+ if (!--limit)
+ break;
+
+ cond_resched();
+ }
+ if (pending) {
+ void __user *uaddr = futex_uaddr(pending, futex_offset);
+
+ handle_futex_death(uaddr, curr, pip);
+ }
+}
+
+COMPAT_SYSCALL_DEFINE2(set_robust_list,
+ struct compat_robust_list_head __user *, head,
+ compat_size_t, len)
+{
+ if (!futex_cmpxchg_enabled)
+ return -ENOSYS;
+
+ if (unlikely(len != sizeof(*head)))
+ return -EINVAL;
+
+ current->compat_robust_list = head;
+
+ return 0;
+}
+
+COMPAT_SYSCALL_DEFINE3(get_robust_list, int, pid,
+ compat_uptr_t __user *, head_ptr,
+ compat_size_t __user *, len_ptr)
+{
+ struct compat_robust_list_head __user *head;
+ unsigned long ret;
+ struct task_struct *p;
+
+ if (!futex_cmpxchg_enabled)
+ return -ENOSYS;
+
+ rcu_read_lock();
+
+ ret = -ESRCH;
+ if (!pid)
+ p = current;
+ else {
+ p = find_task_by_vpid(pid);
+ if (!p)
+ goto err_unlock;
+ }
+
+ ret = -EPERM;
+ if (!ptrace_may_access(p, PTRACE_MODE_READ_REALCREDS))
+ goto err_unlock;
+
+ head = p->compat_robust_list;
+ rcu_read_unlock();
+
+ if (put_user(sizeof(*head), len_ptr))
+ return -EFAULT;
+ return put_user(ptr_to_compat(head), head_ptr);
+
+err_unlock:
+ rcu_read_unlock();
+
+ return ret;
+}
+
+COMPAT_SYSCALL_DEFINE6(futex, u32 __user *, uaddr, int, op, u32, val,
+ struct compat_timespec __user *, utime, u32 __user *, uaddr2,
+ u32, val3)
+{
+ struct timespec ts;
+ ktime_t t, *tp = NULL;
+ int val2 = 0;
+ int cmd = op & FUTEX_CMD_MASK;
+
+ if (utime && (cmd == FUTEX_WAIT || cmd == FUTEX_LOCK_PI ||
+ cmd == FUTEX_WAIT_BITSET ||
+ cmd == FUTEX_WAIT_REQUEUE_PI)) {
+ if (compat_get_timespec(&ts, utime))
+ return -EFAULT;
+ if (!timespec_valid(&ts))
+ return -EINVAL;
+
+ t = timespec_to_ktime(ts);
+ if (cmd == FUTEX_WAIT)
+ t = ktime_add_safe(ktime_get(), t);
+ tp = &t;
+ }
+ if (cmd == FUTEX_REQUEUE || cmd == FUTEX_CMP_REQUEUE ||
+ cmd == FUTEX_CMP_REQUEUE_PI || cmd == FUTEX_WAKE_OP)
+ val2 = (int) (unsigned long) utime;
+
+ return do_futex(uaddr, op, val, tp, uaddr2, val2, val3);
+}
+#endif /* CONFIG_COMPAT */
+
static void __init futex_detect_cmpxchg(void)
{
#ifndef CONFIG_HAVE_FUTEX_CMPXCHG
diff --git a/kernel/futex_compat.c b/kernel/futex_compat.c
deleted file mode 100644
index 4ae3232e7a28..000000000000
--- a/kernel/futex_compat.c
+++ /dev/null
@@ -1,201 +0,0 @@
-/*
- * linux/kernel/futex_compat.c
- *
- * Futex compatibililty routines.
- *
- * Copyright 2006, Red Hat, Inc., Ingo Molnar
- */
-
-#include <linux/linkage.h>
-#include <linux/compat.h>
-#include <linux/nsproxy.h>
-#include <linux/futex.h>
-#include <linux/ptrace.h>
-#include <linux/syscalls.h>
-
-#include <asm/uaccess.h>
-
-
-/*
- * Fetch a robust-list pointer. Bit 0 signals PI futexes:
- */
-static inline int
-fetch_robust_entry(compat_uptr_t *uentry, struct robust_list __user **entry,
- compat_uptr_t __user *head, unsigned int *pi)
-{
- if (get_user(*uentry, head))
- return -EFAULT;
-
- *entry = compat_ptr((*uentry) & ~1);
- *pi = (unsigned int)(*uentry) & 1;
-
- return 0;
-}
-
-static void __user *futex_uaddr(struct robust_list __user *entry,
- compat_long_t futex_offset)
-{
- compat_uptr_t base = ptr_to_compat(entry);
- void __user *uaddr = compat_ptr(base + futex_offset);
-
- return uaddr;
-}
-
-/*
- * Walk curr->robust_list (very carefully, it's a userspace list!)
- * and mark any locks found there dead, and notify any waiters.
- *
- * We silently return on any sign of list-walking problem.
- */
-void compat_exit_robust_list(struct task_struct *curr)
-{
- struct compat_robust_list_head __user *head = curr->compat_robust_list;
- struct robust_list __user *entry, *next_entry, *pending;
- unsigned int limit = ROBUST_LIST_LIMIT, pi, pip;
- unsigned int uninitialized_var(next_pi);
- compat_uptr_t uentry, next_uentry, upending;
- compat_long_t futex_offset;
- int rc;
-
- if (!futex_cmpxchg_enabled)
- return;
-
- /*
- * Fetch the list head (which was registered earlier, via
- * sys_set_robust_list()):
- */
- if (fetch_robust_entry(&uentry, &entry, &head->list.next, &pi))
- return;
- /*
- * Fetch the relative futex offset:
- */
- if (get_user(futex_offset, &head->futex_offset))
- return;
- /*
- * Fetch any possibly pending lock-add first, and handle it
- * if it exists:
- */
- if (fetch_robust_entry(&upending, &pending,
- &head->list_op_pending, &pip))
- return;
-
- next_entry = NULL; /* avoid warning with gcc */
- while (entry != (struct robust_list __user *) &head->list) {
- /*
- * Fetch the next entry in the list before calling
- * handle_futex_death:
- */
- rc = fetch_robust_entry(&next_uentry, &next_entry,
- (compat_uptr_t __user *)&entry->next, &next_pi);
- /*
- * A pending lock might already be on the list, so
- * dont process it twice:
- */
- if (entry != pending) {
- void __user *uaddr = futex_uaddr(entry, futex_offset);
-
- if (handle_futex_death(uaddr, curr, pi))
- return;
- }
- if (rc)
- return;
- uentry = next_uentry;
- entry = next_entry;
- pi = next_pi;
- /*
- * Avoid excessively long or circular lists:
- */
- if (!--limit)
- break;
-
- cond_resched();
- }
- if (pending) {
- void __user *uaddr = futex_uaddr(pending, futex_offset);
-
- handle_futex_death(uaddr, curr, pip);
- }
-}
-
-COMPAT_SYSCALL_DEFINE2(set_robust_list,
- struct compat_robust_list_head __user *, head,
- compat_size_t, len)
-{
- if (!futex_cmpxchg_enabled)
- return -ENOSYS;
-
- if (unlikely(len != sizeof(*head)))
- return -EINVAL;
-
- current->compat_robust_list = head;
-
- return 0;
-}
-
-COMPAT_SYSCALL_DEFINE3(get_robust_list, int, pid,
- compat_uptr_t __user *, head_ptr,
- compat_size_t __user *, len_ptr)
-{
- struct compat_robust_list_head __user *head;
- unsigned long ret;
- struct task_struct *p;
-
- if (!futex_cmpxchg_enabled)
- return -ENOSYS;
-
- rcu_read_lock();
-
- ret = -ESRCH;
- if (!pid)
- p = current;
- else {
- p = find_task_by_vpid(pid);
- if (!p)
- goto err_unlock;
- }
-
- ret = -EPERM;
- if (!ptrace_may_access(p, PTRACE_MODE_READ_REALCREDS))
- goto err_unlock;
-
- head = p->compat_robust_list;
- rcu_read_unlock();
-
- if (put_user(sizeof(*head), len_ptr))
- return -EFAULT;
- return put_user(ptr_to_compat(head), head_ptr);
-
-err_unlock:
- rcu_read_unlock();
-
- return ret;
-}
-
-COMPAT_SYSCALL_DEFINE6(futex, u32 __user *, uaddr, int, op, u32, val,
- struct compat_timespec __user *, utime, u32 __user *, uaddr2,
- u32, val3)
-{
- struct timespec ts;
- ktime_t t, *tp = NULL;
- int val2 = 0;
- int cmd = op & FUTEX_CMD_MASK;
-
- if (utime && (cmd == FUTEX_WAIT || cmd == FUTEX_LOCK_PI ||
- cmd == FUTEX_WAIT_BITSET ||
- cmd == FUTEX_WAIT_REQUEUE_PI)) {
- if (compat_get_timespec(&ts, utime))
- return -EFAULT;
- if (!timespec_valid(&ts))
- return -EINVAL;
-
- t = timespec_to_ktime(ts);
- if (cmd == FUTEX_WAIT)
- t = ktime_add_safe(ktime_get(), t);
- tp = &t;
- }
- if (cmd == FUTEX_REQUEUE || cmd == FUTEX_CMP_REQUEUE ||
- cmd == FUTEX_CMP_REQUEUE_PI || cmd == FUTEX_WAKE_OP)
- val2 = (int) (unsigned long) utime;
-
- return do_futex(uaddr, op, val, tp, uaddr2, val2, val3);
-}
diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
index 0b0de3030e0d..9c20c53f6729 100644
--- a/net/mac80211/ieee80211_i.h
+++ b/net/mac80211/ieee80211_i.h
@@ -1046,6 +1046,7 @@ enum queue_stop_reason {
IEEE80211_QUEUE_STOP_REASON_FLUSH,
IEEE80211_QUEUE_STOP_REASON_TDLS_TEARDOWN,
IEEE80211_QUEUE_STOP_REASON_RESERVE_TID,
+ IEEE80211_QUEUE_STOP_REASON_IFTYPE_CHANGE,

IEEE80211_QUEUE_STOP_REASONS,
};
diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c
index ad03331ee785..7d43e0085cfc 100644
--- a/net/mac80211/iface.c
+++ b/net/mac80211/iface.c
@@ -1577,6 +1577,10 @@ static int ieee80211_runtime_change_iftype(struct ieee80211_sub_if_data *sdata,
if (ret)
return ret;

+ ieee80211_stop_vif_queues(local, sdata,
+ IEEE80211_QUEUE_STOP_REASON_IFTYPE_CHANGE);
+ synchronize_net();
+
ieee80211_do_stop(sdata, false);

ieee80211_teardown_sdata(sdata);
@@ -1597,6 +1601,8 @@ static int ieee80211_runtime_change_iftype(struct ieee80211_sub_if_data *sdata,
err = ieee80211_do_open(&sdata->wdev, false);
WARN(err, "type change: do_open returned %d", err);

+ ieee80211_wake_vif_queues(local, sdata,
+ IEEE80211_QUEUE_STOP_REASON_IFTYPE_CHANGE);
return ret;
}

diff --git a/net/netfilter/nft_dynset.c b/net/netfilter/nft_dynset.c
index b9dd4e960426..81adbfaffe38 100644
--- a/net/netfilter/nft_dynset.c
+++ b/net/netfilter/nft_dynset.c
@@ -210,8 +210,10 @@ static int nft_dynset_init(const struct nft_ctx *ctx,
nft_set_ext_add_length(&priv->tmpl, NFT_SET_EXT_EXPR,
priv->expr->ops->size);
if (set->flags & NFT_SET_TIMEOUT) {
- if (timeout || set->timeout)
+ if (timeout || set->timeout) {
+ nft_set_ext_add(&priv->tmpl, NFT_SET_EXT_TIMEOUT);
nft_set_ext_add(&priv->tmpl, NFT_SET_EXT_EXPIRATION);
+ }
}

priv->timeout = timeout;
diff --git a/net/nfc/netlink.c b/net/nfc/netlink.c
index 0afae9f73ebb..f326a6ea35fc 100644
--- a/net/nfc/netlink.c
+++ b/net/nfc/netlink.c
@@ -887,6 +887,7 @@ static int nfc_genl_stop_poll(struct sk_buff *skb, struct genl_info *info)

if (!dev->polling) {
device_unlock(&dev->dev);
+ nfc_put_device(dev);
return -EINVAL;
}

diff --git a/net/nfc/rawsock.c b/net/nfc/rawsock.c
index 574af981806f..92a3cfae4de8 100644
--- a/net/nfc/rawsock.c
+++ b/net/nfc/rawsock.c
@@ -117,7 +117,7 @@ static int rawsock_connect(struct socket *sock, struct sockaddr *_addr,
if (addr->target_idx > dev->target_next_idx - 1 ||
addr->target_idx < dev->target_next_idx - dev->n_targets) {
rc = -EINVAL;
- goto error;
+ goto put_dev;
}

rc = nfc_activate_target(dev, addr->target_idx, addr->nfc_protocol);
diff --git a/net/wireless/wext-core.c b/net/wireless/wext-core.c
index 4bf0296a7c43..0e809ae17f38 100644
--- a/net/wireless/wext-core.c
+++ b/net/wireless/wext-core.c
@@ -898,8 +898,9 @@ static int ioctl_standard_iw_point(struct iw_point *iwp, unsigned int cmd,
int call_commit_handler(struct net_device *dev)
{
#ifdef CONFIG_WIRELESS_EXT
- if ((netif_running(dev)) &&
- (dev->wireless_handlers->standard[0] != NULL))
+ if (netif_running(dev) &&
+ dev->wireless_handlers &&
+ dev->wireless_handlers->standard[0])
/* Call the commit handler on the driver */
return dev->wireless_handlers->standard[0](dev, NULL,
NULL, NULL);
diff --git a/net/xfrm/xfrm_input.c b/net/xfrm/xfrm_input.c
index 1e87639f2c27..d613bf77cc0f 100644
--- a/net/xfrm/xfrm_input.c
+++ b/net/xfrm/xfrm_input.c
@@ -315,7 +315,7 @@ int xfrm_input(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type)
/* only the first xfrm gets the encap type */
encap_type = 0;

- if (async && x->repl->recheck(x, skb, seq)) {
+ if (x->repl->recheck(x, skb, seq)) {
XFRM_INC_STATS(net, LINUX_MIB_XFRMINSTATESEQERROR);
goto drop_unlock;
}