Re: Linux 4.4.38

From: Greg KH
Date: Sat Dec 10 2016 - 14:57:02 EST


diff --git a/Makefile b/Makefile
index b57ec79b4941..6876efe0d735 100644
--- a/Makefile
+++ b/Makefile
@@ -1,6 +1,6 @@
VERSION = 4
PATCHLEVEL = 4
-SUBLEVEL = 37
+SUBLEVEL = 38
EXTRAVERSION =
NAME = Blurry Fish Butt

diff --git a/arch/sparc/kernel/signal_32.c b/arch/sparc/kernel/signal_32.c
index c3c12efe0bc0..9c0c8fd0b292 100644
--- a/arch/sparc/kernel/signal_32.c
+++ b/arch/sparc/kernel/signal_32.c
@@ -89,7 +89,7 @@ asmlinkage void do_sigreturn(struct pt_regs *regs)
sf = (struct signal_frame __user *) regs->u_regs[UREG_FP];

/* 1. Make sure we are not getting garbage from the user */
- if (!invalid_frame_pointer(sf, sizeof(*sf)))
+ if (invalid_frame_pointer(sf, sizeof(*sf)))
goto segv_and_exit;

if (get_user(ufp, &sf->info.si_regs.u_regs[UREG_FP]))
@@ -150,7 +150,7 @@ asmlinkage void do_rt_sigreturn(struct pt_regs *regs)

synchronize_user_stack();
sf = (struct rt_signal_frame __user *) regs->u_regs[UREG_FP];
- if (!invalid_frame_pointer(sf, sizeof(*sf)))
+ if (invalid_frame_pointer(sf, sizeof(*sf)))
goto segv;

if (get_user(ufp, &sf->regs.u_regs[UREG_FP]))
diff --git a/arch/sparc/mm/init_64.c b/arch/sparc/mm/init_64.c
index a5331c336b2a..3d3414c14792 100644
--- a/arch/sparc/mm/init_64.c
+++ b/arch/sparc/mm/init_64.c
@@ -800,8 +800,10 @@ struct mdesc_mblock {
};
static struct mdesc_mblock *mblocks;
static int num_mblocks;
+static int find_numa_node_for_addr(unsigned long pa,
+ struct node_mem_mask *pnode_mask);

-static unsigned long ra_to_pa(unsigned long addr)
+static unsigned long __init ra_to_pa(unsigned long addr)
{
int i;

@@ -817,8 +819,11 @@ static unsigned long ra_to_pa(unsigned long addr)
return addr;
}

-static int find_node(unsigned long addr)
+static int __init find_node(unsigned long addr)
{
+ static bool search_mdesc = true;
+ static struct node_mem_mask last_mem_mask = { ~0UL, ~0UL };
+ static int last_index;
int i;

addr = ra_to_pa(addr);
@@ -828,13 +833,30 @@ static int find_node(unsigned long addr)
if ((addr & p->mask) == p->val)
return i;
}
- /* The following condition has been observed on LDOM guests.*/
- WARN_ONCE(1, "find_node: A physical address doesn't match a NUMA node"
- " rule. Some physical memory will be owned by node 0.");
- return 0;
+ /* The following condition has been observed on LDOM guests because
+ * node_masks only contains the best latency mask and value.
+ * LDOM guest's mdesc can contain a single latency group to
+ * cover multiple address range. Print warning message only if the
+ * address cannot be found in node_masks nor mdesc.
+ */
+ if ((search_mdesc) &&
+ ((addr & last_mem_mask.mask) != last_mem_mask.val)) {
+ /* find the available node in the mdesc */
+ last_index = find_numa_node_for_addr(addr, &last_mem_mask);
+ numadbg("find_node: latency group for address 0x%lx is %d\n",
+ addr, last_index);
+ if ((last_index < 0) || (last_index >= num_node_masks)) {
+ /* WARN_ONCE() and use default group 0 */
+ WARN_ONCE(1, "find_node: A physical address doesn't match a NUMA node rule. Some physical memory will be owned by node 0.");
+ search_mdesc = false;
+ last_index = 0;
+ }
+ }
+
+ return last_index;
}

-static u64 memblock_nid_range(u64 start, u64 end, int *nid)
+static u64 __init memblock_nid_range(u64 start, u64 end, int *nid)
{
*nid = find_node(start);
start += PAGE_SIZE;
@@ -1158,6 +1180,41 @@ int __node_distance(int from, int to)
return numa_latency[from][to];
}

+static int find_numa_node_for_addr(unsigned long pa,
+ struct node_mem_mask *pnode_mask)
+{
+ struct mdesc_handle *md = mdesc_grab();
+ u64 node, arc;
+ int i = 0;
+
+ node = mdesc_node_by_name(md, MDESC_NODE_NULL, "latency-groups");
+ if (node == MDESC_NODE_NULL)
+ goto out;
+
+ mdesc_for_each_node_by_name(md, node, "group") {
+ mdesc_for_each_arc(arc, md, node, MDESC_ARC_TYPE_FWD) {
+ u64 target = mdesc_arc_target(md, arc);
+ struct mdesc_mlgroup *m = find_mlgroup(target);
+
+ if (!m)
+ continue;
+ if ((pa & m->mask) == m->match) {
+ if (pnode_mask) {
+ pnode_mask->mask = m->mask;
+ pnode_mask->val = m->match;
+ }
+ mdesc_release(md);
+ return i;
+ }
+ }
+ i++;
+ }
+
+out:
+ mdesc_release(md);
+ return -1;
+}
+
static int find_best_numa_node_for_mlgroup(struct mdesc_mlgroup *grp)
{
int i;
diff --git a/block/blk-map.c b/block/blk-map.c
index f565e11f465a..69953bd97e65 100644
--- a/block/blk-map.c
+++ b/block/blk-map.c
@@ -90,6 +90,9 @@ int blk_rq_map_user_iov(struct request_queue *q, struct request *rq,
if (!iter || !iter->count)
return -EINVAL;

+ if (!iter_is_iovec(iter))
+ return -EINVAL;
+
iov_for_each(iov, i, *iter) {
unsigned long uaddr = (unsigned long) iov.iov_base;

diff --git a/drivers/net/dsa/bcm_sf2.c b/drivers/net/dsa/bcm_sf2.c
index 6f946fedbb77..0864f05633a2 100644
--- a/drivers/net/dsa/bcm_sf2.c
+++ b/drivers/net/dsa/bcm_sf2.c
@@ -1137,6 +1137,7 @@ static void bcm_sf2_sw_adjust_link(struct dsa_switch *ds, int port,
struct phy_device *phydev)
{
struct bcm_sf2_priv *priv = ds_to_priv(ds);
+ struct ethtool_eee *p = &priv->port_sts[port].eee;
u32 id_mode_dis = 0, port_mode;
const char *str = NULL;
u32 reg;
@@ -1211,6 +1212,9 @@ force_link:
reg |= DUPLX_MODE;

core_writel(priv, reg, CORE_STS_OVERRIDE_GMIIP_PORT(port));
+
+ if (!phydev->is_pseudo_fixed_link)
+ p->eee_enabled = bcm_sf2_eee_init(ds, port, phydev);
}

static void bcm_sf2_sw_fixed_link_update(struct dsa_switch *ds, int port,
diff --git a/drivers/net/ethernet/broadcom/genet/bcmgenet.c b/drivers/net/ethernet/broadcom/genet/bcmgenet.c
index 0fb3f8de88e9..91627561c58d 100644
--- a/drivers/net/ethernet/broadcom/genet/bcmgenet.c
+++ b/drivers/net/ethernet/broadcom/genet/bcmgenet.c
@@ -1168,6 +1168,7 @@ static unsigned int __bcmgenet_tx_reclaim(struct net_device *dev,
struct bcmgenet_tx_ring *ring)
{
struct bcmgenet_priv *priv = netdev_priv(dev);
+ struct device *kdev = &priv->pdev->dev;
struct enet_cb *tx_cb_ptr;
struct netdev_queue *txq;
unsigned int pkts_compl = 0;
@@ -1195,7 +1196,7 @@ static unsigned int __bcmgenet_tx_reclaim(struct net_device *dev,
pkts_compl++;
dev->stats.tx_packets++;
dev->stats.tx_bytes += tx_cb_ptr->skb->len;
- dma_unmap_single(&dev->dev,
+ dma_unmap_single(kdev,
dma_unmap_addr(tx_cb_ptr, dma_addr),
dma_unmap_len(tx_cb_ptr, dma_len),
DMA_TO_DEVICE);
@@ -1203,7 +1204,7 @@ static unsigned int __bcmgenet_tx_reclaim(struct net_device *dev,
} else if (dma_unmap_addr(tx_cb_ptr, dma_addr)) {
dev->stats.tx_bytes +=
dma_unmap_len(tx_cb_ptr, dma_len);
- dma_unmap_page(&dev->dev,
+ dma_unmap_page(kdev,
dma_unmap_addr(tx_cb_ptr, dma_addr),
dma_unmap_len(tx_cb_ptr, dma_len),
DMA_TO_DEVICE);
@@ -1754,6 +1755,7 @@ static int bcmgenet_alloc_rx_buffers(struct bcmgenet_priv *priv,

static void bcmgenet_free_rx_buffers(struct bcmgenet_priv *priv)
{
+ struct device *kdev = &priv->pdev->dev;
struct enet_cb *cb;
int i;

@@ -1761,7 +1763,7 @@ static void bcmgenet_free_rx_buffers(struct bcmgenet_priv *priv)
cb = &priv->rx_cbs[i];

if (dma_unmap_addr(cb, dma_addr)) {
- dma_unmap_single(&priv->dev->dev,
+ dma_unmap_single(kdev,
dma_unmap_addr(cb, dma_addr),
priv->rx_buf_len, DMA_FROM_DEVICE);
dma_unmap_addr_set(cb, dma_addr, 0);
diff --git a/drivers/net/ethernet/marvell/sky2.c b/drivers/net/ethernet/marvell/sky2.c
index 5606a043063e..4b62aa1f9ff8 100644
--- a/drivers/net/ethernet/marvell/sky2.c
+++ b/drivers/net/ethernet/marvell/sky2.c
@@ -5220,6 +5220,19 @@ static SIMPLE_DEV_PM_OPS(sky2_pm_ops, sky2_suspend, sky2_resume);

static void sky2_shutdown(struct pci_dev *pdev)
{
+ struct sky2_hw *hw = pci_get_drvdata(pdev);
+ int port;
+
+ for (port = 0; port < hw->ports; port++) {
+ struct net_device *ndev = hw->dev[port];
+
+ rtnl_lock();
+ if (netif_running(ndev)) {
+ dev_close(ndev);
+ netif_device_detach(ndev);
+ }
+ rtnl_unlock();
+ }
sky2_suspend(&pdev->dev);
pci_wake_from_d3(pdev, device_may_wakeup(&pdev->dev));
pci_set_power_state(pdev, PCI_D3hot);
diff --git a/drivers/net/ethernet/renesas/sh_eth.c b/drivers/net/ethernet/renesas/sh_eth.c
index 36fc9427418f..480f3dae0780 100644
--- a/drivers/net/ethernet/renesas/sh_eth.c
+++ b/drivers/net/ethernet/renesas/sh_eth.c
@@ -832,7 +832,7 @@ static struct sh_eth_cpu_data r7s72100_data = {

.ecsr_value = ECSR_ICD,
.ecsipr_value = ECSIPR_ICDIP,
- .eesipr_value = 0xff7f009f,
+ .eesipr_value = 0xe77f009f,

.tx_check = EESR_TC1 | EESR_FTC,
.eesr_err_check = EESR_TWB1 | EESR_TWB | EESR_TABT | EESR_RABT |
diff --git a/drivers/net/geneve.c b/drivers/net/geneve.c
index 4827c6987ac3..f0961cbaf87e 100644
--- a/drivers/net/geneve.c
+++ b/drivers/net/geneve.c
@@ -815,7 +815,6 @@ static netdev_tx_t geneve_xmit_skb(struct sk_buff *skb, struct net_device *dev,
struct geneve_dev *geneve = netdev_priv(dev);
struct geneve_sock *gs4 = geneve->sock4;
struct rtable *rt = NULL;
- const struct iphdr *iip; /* interior IP header */
int err = -EINVAL;
struct flowi4 fl4;
__u8 tos, ttl;
@@ -842,8 +841,6 @@ static netdev_tx_t geneve_xmit_skb(struct sk_buff *skb, struct net_device *dev,
sport = udp_flow_src_port(geneve->net, skb, 1, USHRT_MAX, true);
skb_reset_mac_header(skb);

- iip = ip_hdr(skb);
-
if (info) {
const struct ip_tunnel_key *key = &info->key;
u8 *opts = NULL;
@@ -859,7 +856,7 @@ static netdev_tx_t geneve_xmit_skb(struct sk_buff *skb, struct net_device *dev,
if (unlikely(err))
goto err;

- tos = ip_tunnel_ecn_encap(key->tos, iip, skb);
+ tos = ip_tunnel_ecn_encap(key->tos, ip_hdr(skb), skb);
ttl = key->ttl;
df = key->tun_flags & TUNNEL_DONT_FRAGMENT ? htons(IP_DF) : 0;
} else {
@@ -869,7 +866,7 @@ static netdev_tx_t geneve_xmit_skb(struct sk_buff *skb, struct net_device *dev,
if (unlikely(err))
goto err;

- tos = ip_tunnel_ecn_encap(fl4.flowi4_tos, iip, skb);
+ tos = ip_tunnel_ecn_encap(fl4.flowi4_tos, ip_hdr(skb), skb);
ttl = geneve->ttl;
if (!ttl && IN_MULTICAST(ntohl(fl4.daddr)))
ttl = 1;
@@ -903,7 +900,6 @@ static netdev_tx_t geneve6_xmit_skb(struct sk_buff *skb, struct net_device *dev,
struct geneve_dev *geneve = netdev_priv(dev);
struct geneve_sock *gs6 = geneve->sock6;
struct dst_entry *dst = NULL;
- const struct iphdr *iip; /* interior IP header */
int err = -EINVAL;
struct flowi6 fl6;
__u8 prio, ttl;
@@ -927,8 +923,6 @@ static netdev_tx_t geneve6_xmit_skb(struct sk_buff *skb, struct net_device *dev,
sport = udp_flow_src_port(geneve->net, skb, 1, USHRT_MAX, true);
skb_reset_mac_header(skb);

- iip = ip_hdr(skb);
-
if (info) {
const struct ip_tunnel_key *key = &info->key;
u8 *opts = NULL;
@@ -945,7 +939,7 @@ static netdev_tx_t geneve6_xmit_skb(struct sk_buff *skb, struct net_device *dev,
if (unlikely(err))
goto err;

- prio = ip_tunnel_ecn_encap(key->tos, iip, skb);
+ prio = ip_tunnel_ecn_encap(key->tos, ip_hdr(skb), skb);
ttl = key->ttl;
} else {
udp_csum = false;
@@ -954,7 +948,7 @@ static netdev_tx_t geneve6_xmit_skb(struct sk_buff *skb, struct net_device *dev,
if (unlikely(err))
goto err;

- prio = ip_tunnel_ecn_encap(fl6.flowi6_tos, iip, skb);
+ prio = ip_tunnel_ecn_encap(fl6.flowi6_tos, ip_hdr(skb), skb);
ttl = geneve->ttl;
if (!ttl && ipv6_addr_is_multicast(&fl6.daddr))
ttl = 1;
diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
index f94ab786088f..0e2a19e58923 100644
--- a/drivers/net/virtio_net.c
+++ b/drivers/net/virtio_net.c
@@ -1465,6 +1465,11 @@ static void virtnet_free_queues(struct virtnet_info *vi)
netif_napi_del(&vi->rq[i].napi);
}

+ /* We called napi_hash_del() before netif_napi_del(),
+ * we need to respect an RCU grace period before freeing vi->rq
+ */
+ synchronize_net();
+
kfree(vi->rq);
kfree(vi->sq);
}
diff --git a/include/linux/uio.h b/include/linux/uio.h
index 5f9c59da978b..e2225109b816 100644
--- a/include/linux/uio.h
+++ b/include/linux/uio.h
@@ -101,12 +101,12 @@ int iov_iter_npages(const struct iov_iter *i, int maxpages);

const void *dup_iter(struct iov_iter *new, struct iov_iter *old, gfp_t flags);

-static inline size_t iov_iter_count(struct iov_iter *i)
+static inline size_t iov_iter_count(const struct iov_iter *i)
{
return i->count;
}

-static inline bool iter_is_iovec(struct iov_iter *i)
+static inline bool iter_is_iovec(const struct iov_iter *i)
{
return !(i->type & (ITER_BVEC | ITER_KVEC));
}
diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c
index 2c2eb1b629b1..2e9a1c2818c7 100644
--- a/net/core/net_namespace.c
+++ b/net/core/net_namespace.c
@@ -217,6 +217,8 @@ int peernet2id_alloc(struct net *net, struct net *peer)
bool alloc;
int id;

+ if (atomic_read(&net->count) == 0)
+ return NETNSA_NSID_NOT_ASSIGNED;
spin_lock_irqsave(&net->nsid_lock, flags);
alloc = atomic_read(&peer->count) == 0 ? false : true;
id = __peernet2id_alloc(net, peer, &alloc);
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 87b91ffbdec3..b94e165a4f79 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -2600,7 +2600,10 @@ nla_put_failure:

static inline size_t rtnl_fdb_nlmsg_size(void)
{
- return NLMSG_ALIGN(sizeof(struct ndmsg)) + nla_total_size(ETH_ALEN);
+ return NLMSG_ALIGN(sizeof(struct ndmsg)) +
+ nla_total_size(ETH_ALEN) + /* NDA_LLADDR */
+ nla_total_size(sizeof(u16)) + /* NDA_VLAN */
+ 0;
}

static void rtnl_fdb_notify(struct net_device *dev, u8 *addr, u16 vid, int type)
diff --git a/net/core/sock.c b/net/core/sock.c
index 88f017854509..f4c0917e66b5 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -745,7 +745,7 @@ int sock_setsockopt(struct socket *sock, int level, int optname,
val = min_t(u32, val, sysctl_wmem_max);
set_sndbuf:
sk->sk_userlocks |= SOCK_SNDBUF_LOCK;
- sk->sk_sndbuf = max_t(u32, val * 2, SOCK_MIN_SNDBUF);
+ sk->sk_sndbuf = max_t(int, val * 2, SOCK_MIN_SNDBUF);
/* Wake up sending tasks if we upped the value. */
sk->sk_write_space(sk);
break;
@@ -781,7 +781,7 @@ set_rcvbuf:
* returning the value we actually used in getsockopt
* is the most desirable behavior.
*/
- sk->sk_rcvbuf = max_t(u32, val * 2, SOCK_MIN_RCVBUF);
+ sk->sk_rcvbuf = max_t(int, val * 2, SOCK_MIN_RCVBUF);
break;

case SO_RCVBUFFORCE:
diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c
index 861e1fa25d5e..0759f5b9180e 100644
--- a/net/dccp/ipv4.c
+++ b/net/dccp/ipv4.c
@@ -698,6 +698,7 @@ int dccp_invalid_packet(struct sk_buff *skb)
{
const struct dccp_hdr *dh;
unsigned int cscov;
+ u8 dccph_doff;

if (skb->pkt_type != PACKET_HOST)
return 1;
@@ -719,18 +720,19 @@ int dccp_invalid_packet(struct sk_buff *skb)
/*
* If P.Data Offset is too small for packet type, drop packet and return
*/
- if (dh->dccph_doff < dccp_hdr_len(skb) / sizeof(u32)) {
- DCCP_WARN("P.Data Offset(%u) too small\n", dh->dccph_doff);
+ dccph_doff = dh->dccph_doff;
+ if (dccph_doff < dccp_hdr_len(skb) / sizeof(u32)) {
+ DCCP_WARN("P.Data Offset(%u) too small\n", dccph_doff);
return 1;
}
/*
* If P.Data Offset is too too large for packet, drop packet and return
*/
- if (!pskb_may_pull(skb, dh->dccph_doff * sizeof(u32))) {
- DCCP_WARN("P.Data Offset(%u) too large\n", dh->dccph_doff);
+ if (!pskb_may_pull(skb, dccph_doff * sizeof(u32))) {
+ DCCP_WARN("P.Data Offset(%u) too large\n", dccph_doff);
return 1;
}
-
+ dh = dccp_hdr(skb);
/*
* If P.type is not Data, Ack, or DataAck and P.X == 0 (the packet
* has short sequence numbers), drop packet and return
diff --git a/net/ipv4/esp4.c b/net/ipv4/esp4.c
index d95631d09248..20fb25e3027b 100644
--- a/net/ipv4/esp4.c
+++ b/net/ipv4/esp4.c
@@ -476,7 +476,7 @@ static int esp_input(struct xfrm_state *x, struct sk_buff *skb)
esph = (void *)skb_push(skb, 4);
*seqhi = esph->spi;
esph->spi = esph->seq_no;
- esph->seq_no = htonl(XFRM_SKB_CB(skb)->seq.input.hi);
+ esph->seq_no = XFRM_SKB_CB(skb)->seq.input.hi;
aead_request_set_callback(req, 0, esp_input_done_esn, skb);
}

diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index f2ad5216c438..2b7283303650 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -102,6 +102,9 @@ int __ip_local_out(struct net *net, struct sock *sk, struct sk_buff *skb)

iph->tot_len = htons(skb->len);
ip_send_check(iph);
+
+ skb->protocol = htons(ETH_P_IP);
+
return nf_hook(NFPROTO_IPV4, NF_INET_LOCAL_OUT,
net, sk, skb, NULL, skb_dst(skb)->dev,
dst_output);
diff --git a/net/ipv4/ping.c b/net/ipv4/ping.c
index aa67e0e64b69..23160d2b3f71 100644
--- a/net/ipv4/ping.c
+++ b/net/ipv4/ping.c
@@ -660,6 +660,10 @@ int ping_common_sendmsg(int family, struct msghdr *msg, size_t len,
if (len > 0xFFFF)
return -EMSGSIZE;

+ /* Must have at least a full ICMP header. */
+ if (len < icmph_len)
+ return -EINVAL;
+
/*
* Check the flags.
*/
diff --git a/net/ipv6/esp6.c b/net/ipv6/esp6.c
index 060a60b2f8a6..111ba55fd512 100644
--- a/net/ipv6/esp6.c
+++ b/net/ipv6/esp6.c
@@ -418,7 +418,7 @@ static int esp6_input(struct xfrm_state *x, struct sk_buff *skb)
esph = (void *)skb_push(skb, 4);
*seqhi = esph->spi;
esph->spi = esph->seq_no;
- esph->seq_no = htonl(XFRM_SKB_CB(skb)->seq.input.hi);
+ esph->seq_no = XFRM_SKB_CB(skb)->seq.input.hi;
aead_request_set_callback(req, 0, esp_input_done_esn, skb);
}

diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c
index e8878886eba4..2994d1f1a661 100644
--- a/net/ipv6/ip6_tunnel.c
+++ b/net/ipv6/ip6_tunnel.c
@@ -1043,6 +1043,7 @@ static int ip6_tnl_xmit2(struct sk_buff *skb,
struct ipv6_tel_txoption opt;
struct dst_entry *dst = NULL, *ndst = NULL;
struct net_device *tdev;
+ bool use_cache = false;
int mtu;
unsigned int max_headroom = sizeof(struct ipv6hdr);
u8 proto;
@@ -1070,7 +1071,15 @@ static int ip6_tnl_xmit2(struct sk_buff *skb,

memcpy(&fl6->daddr, addr6, sizeof(fl6->daddr));
neigh_release(neigh);
- } else if (!fl6->flowi6_mark)
+ } else if (!(t->parms.flags &
+ (IP6_TNL_F_USE_ORIG_TCLASS | IP6_TNL_F_USE_ORIG_FWMARK))) {
+ /* enable the cache only only if the routing decision does
+ * not depend on the current inner header value
+ */
+ use_cache = true;
+ }
+
+ if (use_cache)
dst = ip6_tnl_dst_get(t);

if (!ip6_tnl_xmit_ctl(t, &fl6->saddr, &fl6->daddr))
@@ -1134,7 +1143,7 @@ static int ip6_tnl_xmit2(struct sk_buff *skb,
skb = new_skb;
}

- if (!fl6->flowi6_mark && ndst)
+ if (use_cache && ndst)
ip6_tnl_dst_set(t, ndst);
skb_dst_set(skb, dst);

diff --git a/net/ipv6/output_core.c b/net/ipv6/output_core.c
index 462f2a76b5c2..1d184322a7b1 100644
--- a/net/ipv6/output_core.c
+++ b/net/ipv6/output_core.c
@@ -148,6 +148,8 @@ int __ip6_local_out(struct net *net, struct sock *sk, struct sk_buff *skb)
ipv6_hdr(skb)->payload_len = htons(len);
IP6CB(skb)->nhoff = offsetof(struct ipv6hdr, nexthdr);

+ skb->protocol = htons(ETH_P_IPV6);
+
return nf_hook(NFPROTO_IPV6, NF_INET_LOCAL_OUT,
net, sk, skb, NULL, skb_dst(skb)->dev,
dst_output);
diff --git a/net/l2tp/l2tp_ip.c b/net/l2tp/l2tp_ip.c
index 42de4ccd159f..d0e906d39642 100644
--- a/net/l2tp/l2tp_ip.c
+++ b/net/l2tp/l2tp_ip.c
@@ -251,8 +251,6 @@ static int l2tp_ip_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len)
int ret;
int chk_addr_ret;

- if (!sock_flag(sk, SOCK_ZAPPED))
- return -EINVAL;
if (addr_len < sizeof(struct sockaddr_l2tpip))
return -EINVAL;
if (addr->l2tp_family != AF_INET)
@@ -267,6 +265,9 @@ static int l2tp_ip_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len)
read_unlock_bh(&l2tp_ip_lock);

lock_sock(sk);
+ if (!sock_flag(sk, SOCK_ZAPPED))
+ goto out;
+
if (sk->sk_state != TCP_CLOSE || addr_len < sizeof(struct sockaddr_l2tpip))
goto out;

diff --git a/net/l2tp/l2tp_ip6.c b/net/l2tp/l2tp_ip6.c
index 9ee4ddb6b397..3c4f867d3633 100644
--- a/net/l2tp/l2tp_ip6.c
+++ b/net/l2tp/l2tp_ip6.c
@@ -266,8 +266,6 @@ static int l2tp_ip6_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len)
int addr_type;
int err;

- if (!sock_flag(sk, SOCK_ZAPPED))
- return -EINVAL;
if (addr->l2tp_family != AF_INET6)
return -EINVAL;
if (addr_len < sizeof(*addr))
@@ -293,6 +291,9 @@ static int l2tp_ip6_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len)
lock_sock(sk);

err = -EINVAL;
+ if (!sock_flag(sk, SOCK_ZAPPED))
+ goto out_unlock;
+
if (sk->sk_state != TCP_CLOSE)
goto out_unlock;

diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index 28fc283c1ec1..360700a2f46c 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -931,7 +931,6 @@ static void netlink_sock_destruct(struct sock *sk)
if (nlk->cb_running) {
if (nlk->cb.done)
nlk->cb.done(&nlk->cb);
-
module_put(nlk->cb.module);
kfree_skb(nlk->cb.skb);
}
@@ -960,6 +959,14 @@ static void netlink_sock_destruct(struct sock *sk)
WARN_ON(nlk_sk(sk)->groups);
}

+static void netlink_sock_destruct_work(struct work_struct *work)
+{
+ struct netlink_sock *nlk = container_of(work, struct netlink_sock,
+ work);
+
+ sk_free(&nlk->sk);
+}
+
/* This lock without WQ_FLAG_EXCLUSIVE is good on UP and it is _very_ bad on
* SMP. Look, when several writers sleep and reader wakes them up, all but one
* immediately hit write lock and grab all the cpus. Exclusive sleep solves
@@ -1265,8 +1272,18 @@ out_module:
static void deferred_put_nlk_sk(struct rcu_head *head)
{
struct netlink_sock *nlk = container_of(head, struct netlink_sock, rcu);
+ struct sock *sk = &nlk->sk;
+
+ if (!atomic_dec_and_test(&sk->sk_refcnt))
+ return;
+
+ if (nlk->cb_running && nlk->cb.done) {
+ INIT_WORK(&nlk->work, netlink_sock_destruct_work);
+ schedule_work(&nlk->work);
+ return;
+ }

- sock_put(&nlk->sk);
+ sk_free(sk);
}

static int netlink_release(struct socket *sock)
diff --git a/net/netlink/af_netlink.h b/net/netlink/af_netlink.h
index 14437d9b1965..df32cb92d9fc 100644
--- a/net/netlink/af_netlink.h
+++ b/net/netlink/af_netlink.h
@@ -3,6 +3,7 @@

#include <linux/rhashtable.h>
#include <linux/atomic.h>
+#include <linux/workqueue.h>
#include <net/sock.h>

#define NLGRPSZ(x) (ALIGN(x, sizeof(unsigned long) * 8) / 8)
@@ -53,6 +54,7 @@ struct netlink_sock {

struct rhash_head node;
struct rcu_head rcu;
+ struct work_struct work;
};

static inline struct netlink_sock *nlk_sk(struct sock *sk)
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index 34e4fcfd240b..f223d1c80ccf 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -3572,19 +3572,25 @@ packet_setsockopt(struct socket *sock, int level, int optname, char __user *optv

if (optlen != sizeof(val))
return -EINVAL;
- if (po->rx_ring.pg_vec || po->tx_ring.pg_vec)
- return -EBUSY;
if (copy_from_user(&val, optval, sizeof(val)))
return -EFAULT;
switch (val) {
case TPACKET_V1:
case TPACKET_V2:
case TPACKET_V3:
- po->tp_version = val;
- return 0;
+ break;
default:
return -EINVAL;
}
+ lock_sock(sk);
+ if (po->rx_ring.pg_vec || po->tx_ring.pg_vec) {
+ ret = -EBUSY;
+ } else {
+ po->tp_version = val;
+ ret = 0;
+ }
+ release_sock(sk);
+ return ret;
}
case PACKET_RESERVE:
{
@@ -4067,6 +4073,7 @@ static int packet_set_ring(struct sock *sk, union tpacket_req_u *req_u,
/* Added to avoid minimal code churn */
struct tpacket_req *req = &req_u->req;

+ lock_sock(sk);
/* Opening a Tx-ring is NOT supported in TPACKET_V3 */
if (!closing && tx_ring && (po->tp_version > TPACKET_V2)) {
WARN(1, "Tx-ring is not supported.\n");
@@ -4148,7 +4155,6 @@ static int packet_set_ring(struct sock *sk, union tpacket_req_u *req_u,
goto out;
}

- lock_sock(sk);

/* Detach socket from network */
spin_lock(&po->bind_lock);
@@ -4197,11 +4203,11 @@ static int packet_set_ring(struct sock *sk, union tpacket_req_u *req_u,
if (!tx_ring)
prb_shutdown_retire_blk_timer(po, rb_queue);
}
- release_sock(sk);

if (pg_vec)
free_pg_vec(pg_vec, order, req->tp_block_nr);
out:
+ release_sock(sk);
return err;
}

diff --git a/net/sched/act_pedit.c b/net/sched/act_pedit.c
index e38a7701f154..c3434e902445 100644
--- a/net/sched/act_pedit.c
+++ b/net/sched/act_pedit.c
@@ -104,6 +104,17 @@ static void tcf_pedit_cleanup(struct tc_action *a, int bind)
kfree(keys);
}

+static bool offset_valid(struct sk_buff *skb, int offset)
+{
+ if (offset > 0 && offset > skb->len)
+ return false;
+
+ if (offset < 0 && -offset > skb_headroom(skb))
+ return false;
+
+ return true;
+}
+
static int tcf_pedit(struct sk_buff *skb, const struct tc_action *a,
struct tcf_result *res)
{
@@ -130,6 +141,11 @@ static int tcf_pedit(struct sk_buff *skb, const struct tc_action *a,
if (tkey->offmask) {
char *d, _d;

+ if (!offset_valid(skb, off + tkey->at)) {
+ pr_info("tc filter pedit 'at' offset %d out of bounds\n",
+ off + tkey->at);
+ goto bad;
+ }
d = skb_header_pointer(skb, off + tkey->at, 1,
&_d);
if (!d)
@@ -142,10 +158,10 @@ static int tcf_pedit(struct sk_buff *skb, const struct tc_action *a,
" offset must be on 32 bit boundaries\n");
goto bad;
}
- if (offset > 0 && offset > skb->len) {
- pr_info("tc filter pedit"
- " offset %d can't exceed pkt length %d\n",
- offset, skb->len);
+
+ if (!offset_valid(skb, off + offset)) {
+ pr_info("tc filter pedit offset %d out of bounds\n",
+ offset);
goto bad;
}

diff --git a/net/sched/cls_basic.c b/net/sched/cls_basic.c
index 0b8c3ace671f..1bf1f4517db6 100644
--- a/net/sched/cls_basic.c
+++ b/net/sched/cls_basic.c
@@ -62,9 +62,6 @@ static unsigned long basic_get(struct tcf_proto *tp, u32 handle)
struct basic_head *head = rtnl_dereference(tp->root);
struct basic_filter *f;

- if (head == NULL)
- return 0UL;
-
list_for_each_entry(f, &head->flist, link) {
if (f->handle == handle) {
l = (unsigned long) f;
@@ -109,7 +106,6 @@ static bool basic_destroy(struct tcf_proto *tp, bool force)
tcf_unbind_filter(tp, &f->res);
call_rcu(&f->rcu, basic_delete_filter);
}
- RCU_INIT_POINTER(tp->root, NULL);
kfree_rcu(head, rcu);
return true;
}
diff --git a/net/sched/cls_bpf.c b/net/sched/cls_bpf.c
index 5faaa5425f7b..3eef0215e53f 100644
--- a/net/sched/cls_bpf.c
+++ b/net/sched/cls_bpf.c
@@ -199,7 +199,6 @@ static bool cls_bpf_destroy(struct tcf_proto *tp, bool force)
call_rcu(&prog->rcu, __cls_bpf_delete_prog);
}

- RCU_INIT_POINTER(tp->root, NULL);
kfree_rcu(head, rcu);
return true;
}
@@ -210,9 +209,6 @@ static unsigned long cls_bpf_get(struct tcf_proto *tp, u32 handle)
struct cls_bpf_prog *prog;
unsigned long ret = 0UL;

- if (head == NULL)
- return 0UL;
-
list_for_each_entry(prog, &head->plist, link) {
if (prog->handle == handle) {
ret = (unsigned long) prog;
diff --git a/net/sched/cls_cgroup.c b/net/sched/cls_cgroup.c
index 4c85bd3a750c..c104c2019feb 100644
--- a/net/sched/cls_cgroup.c
+++ b/net/sched/cls_cgroup.c
@@ -130,11 +130,10 @@ static bool cls_cgroup_destroy(struct tcf_proto *tp, bool force)

if (!force)
return false;
-
- if (head) {
- RCU_INIT_POINTER(tp->root, NULL);
+ /* Head can still be NULL due to cls_cgroup_init(). */
+ if (head)
call_rcu(&head->rcu, cls_cgroup_destroy_rcu);
- }
+
return true;
}

diff --git a/net/sched/cls_flow.c b/net/sched/cls_flow.c
index fbfec6a18839..d7ba2b4ff0f3 100644
--- a/net/sched/cls_flow.c
+++ b/net/sched/cls_flow.c
@@ -583,7 +583,6 @@ static bool flow_destroy(struct tcf_proto *tp, bool force)
list_del_rcu(&f->list);
call_rcu(&f->rcu, flow_destroy_filter);
}
- RCU_INIT_POINTER(tp->root, NULL);
kfree_rcu(head, rcu);
return true;
}
diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c
index 95b021243233..e5a58c82728a 100644
--- a/net/sched/cls_flower.c
+++ b/net/sched/cls_flower.c
@@ -13,6 +13,7 @@
#include <linux/init.h>
#include <linux/module.h>
#include <linux/rhashtable.h>
+#include <linux/workqueue.h>

#include <linux/if_ether.h>
#include <linux/in6.h>
@@ -55,7 +56,10 @@ struct cls_fl_head {
bool mask_assigned;
struct list_head filters;
struct rhashtable_params ht_params;
- struct rcu_head rcu;
+ union {
+ struct work_struct work;
+ struct rcu_head rcu;
+ };
};

struct cls_fl_filter {
@@ -165,6 +169,24 @@ static void fl_destroy_filter(struct rcu_head *head)
kfree(f);
}

+static void fl_destroy_sleepable(struct work_struct *work)
+{
+ struct cls_fl_head *head = container_of(work, struct cls_fl_head,
+ work);
+ if (head->mask_assigned)
+ rhashtable_destroy(&head->ht);
+ kfree(head);
+ module_put(THIS_MODULE);
+}
+
+static void fl_destroy_rcu(struct rcu_head *rcu)
+{
+ struct cls_fl_head *head = container_of(rcu, struct cls_fl_head, rcu);
+
+ INIT_WORK(&head->work, fl_destroy_sleepable);
+ schedule_work(&head->work);
+}
+
static bool fl_destroy(struct tcf_proto *tp, bool force)
{
struct cls_fl_head *head = rtnl_dereference(tp->root);
@@ -177,10 +199,9 @@ static bool fl_destroy(struct tcf_proto *tp, bool force)
list_del_rcu(&f->list);
call_rcu(&f->rcu, fl_destroy_filter);
}
- RCU_INIT_POINTER(tp->root, NULL);
- if (head->mask_assigned)
- rhashtable_destroy(&head->ht);
- kfree_rcu(head, rcu);
+
+ __module_get(THIS_MODULE);
+ call_rcu(&head->rcu, fl_destroy_rcu);
return true;
}

diff --git a/net/sched/cls_rsvp.h b/net/sched/cls_rsvp.h
index f9c9fc075fe6..9992dfac6938 100644
--- a/net/sched/cls_rsvp.h
+++ b/net/sched/cls_rsvp.h
@@ -152,7 +152,8 @@ static int rsvp_classify(struct sk_buff *skb, const struct tcf_proto *tp,
return -1;
nhptr = ip_hdr(skb);
#endif
-
+ if (unlikely(!head))
+ return -1;
restart:

#if RSVP_DST_LEN == 4
diff --git a/net/sched/cls_tcindex.c b/net/sched/cls_tcindex.c
index 944c8ff45055..403746b20263 100644
--- a/net/sched/cls_tcindex.c
+++ b/net/sched/cls_tcindex.c
@@ -503,7 +503,6 @@ static bool tcindex_destroy(struct tcf_proto *tp, bool force)
walker.fn = tcindex_destroy_element;
tcindex_walk(tp, &walker);

- RCU_INIT_POINTER(tp->root, NULL);
call_rcu(&p->rcu, __tcindex_destroy);
return true;
}
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index 824cc1e160bc..73f75258ce46 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -2194,7 +2194,8 @@ out:
* Sleep until more data has arrived. But check for races..
*/
static long unix_stream_data_wait(struct sock *sk, long timeo,
- struct sk_buff *last, unsigned int last_len)
+ struct sk_buff *last, unsigned int last_len,
+ bool freezable)
{
struct sk_buff *tail;
DEFINE_WAIT(wait);
@@ -2215,7 +2216,10 @@ static long unix_stream_data_wait(struct sock *sk, long timeo,

sk_set_bit(SOCKWQ_ASYNC_WAITDATA, sk);
unix_state_unlock(sk);
- timeo = freezable_schedule_timeout(timeo);
+ if (freezable)
+ timeo = freezable_schedule_timeout(timeo);
+ else
+ timeo = schedule_timeout(timeo);
unix_state_lock(sk);

if (sock_flag(sk, SOCK_DEAD))
@@ -2245,7 +2249,8 @@ struct unix_stream_read_state {
unsigned int splice_flags;
};

-static int unix_stream_read_generic(struct unix_stream_read_state *state)
+static int unix_stream_read_generic(struct unix_stream_read_state *state,
+ bool freezable)
{
struct scm_cookie scm;
struct socket *sock = state->socket;
@@ -2324,7 +2329,7 @@ again:
mutex_unlock(&u->iolock);

timeo = unix_stream_data_wait(sk, timeo, last,
- last_len);
+ last_len, freezable);

if (signal_pending(current)) {
err = sock_intr_errno(timeo);
@@ -2466,7 +2471,7 @@ static int unix_stream_recvmsg(struct socket *sock, struct msghdr *msg,
.flags = flags
};

- return unix_stream_read_generic(&state);
+ return unix_stream_read_generic(&state, true);
}

static ssize_t skb_unix_socket_splice(struct sock *sk,
@@ -2512,7 +2517,7 @@ static ssize_t unix_stream_splice_read(struct socket *sock, loff_t *ppos,
flags & SPLICE_F_NONBLOCK)
state.flags = MSG_DONTWAIT;

- return unix_stream_read_generic(&state);
+ return unix_stream_read_generic(&state, false);
}

static int unix_shutdown(struct socket *sock, int mode)