Re: packet_sendmsg_spkt sleeping from invalid context

From: Eric Dumazet
Date: Mon Dec 14 2009 - 16:39:51 EST


Le 14/12/2009 22:30, Frederic Weisbecker a écrit :
> On Mon, Dec 14, 2009 at 10:25:57PM +0100, Eric Dumazet wrote:
>> Le 14/12/2009 21:52, Frederic Weisbecker a écrit :
>>>
>>> I also wonder. Are you using PREEMPT_RCU ?
>>
>> Not at all :)
>>
>> But yes, this is illegal to do the memcpy_fromiovec() in rcu_read_lock() context.
>
>
> I've just tested, and with rcu preempt it is mute, no warning :)
>
>
>>> That may explain why you haven't seen this issue because
>>> might_sleep() doesn't see you are in a rcu read locked
>>> section as preemption is not disabled, but it is illegal to
>>> voluntarily sleep in such area (although it's fine with
>>> preempt rcu) as doing so with non-prempt RCU config would barf.
>>>
>>> I'm trying a patch to handle that.
>>
>> As you want, I also have a patch testing right now :)
>
>
> But mine is to teach might_sleep() to handle rcu preempt case,
> not to fix this net dev thing.
>
> But I'll happily test the fix you have :)
>

OK here it is, I tested it with PREEMPT_RCU as well

Thanks !

diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index 0205621..bc17351 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -415,7 +415,7 @@ static int packet_sendmsg_spkt(struct kiocb *iocb, struct socket *sock,
{
struct sock *sk = sock->sk;
struct sockaddr_pkt *saddr = (struct sockaddr_pkt *)msg->msg_name;
- struct sk_buff *skb;
+ struct sk_buff *skb = NULL;
struct net_device *dev;
__be16 proto = 0;
int err;
@@ -437,6 +437,7 @@ static int packet_sendmsg_spkt(struct kiocb *iocb, struct socket *sock,
*/

saddr->spkt_device[13] = 0;
+retry:
rcu_read_lock();
dev = dev_get_by_name_rcu(sock_net(sk), saddr->spkt_device);
err = -ENODEV;
@@ -456,58 +457,48 @@ static int packet_sendmsg_spkt(struct kiocb *iocb, struct socket *sock,
if (len > dev->mtu + dev->hard_header_len)
goto out_unlock;

- err = -ENOBUFS;
- skb = sock_wmalloc(sk, len + LL_RESERVED_SPACE(dev), 0, GFP_KERNEL);
-
- /*
- * If the write buffer is full, then tough. At this level the user
- * gets to deal with the problem - do your own algorithmic backoffs.
- * That's far more flexible.
- */
-
- if (skb == NULL)
- goto out_unlock;
-
- /*
- * Fill it in
- */
-
- /* FIXME: Save some space for broken drivers that write a
- * hard header at transmission time by themselves. PPP is the
- * notable one here. This should really be fixed at the driver level.
- */
- skb_reserve(skb, LL_RESERVED_SPACE(dev));
- skb_reset_network_header(skb);
-
- /* Try to align data part correctly */
- if (dev->header_ops) {
- skb->data -= dev->hard_header_len;
- skb->tail -= dev->hard_header_len;
- if (len < dev->hard_header_len)
- skb_reset_network_header(skb);
+ if (!skb) {
+ size_t reserved = LL_RESERVED_SPACE(dev);
+ unsigned int hhlen = dev->header_ops ? dev->hard_header_len : 0;
+
+ rcu_read_unlock();
+ skb = sock_wmalloc(sk, len + reserved, 0, GFP_KERNEL);
+ if (skb == NULL)
+ return -ENOBUFS;
+ skb_reserve(skb, reserved);
+ /* FIXME: Save some space for broken drivers that write a hard
+ * header at transmission time by themselves. PPP is the notable
+ * one here. This should really be fixed at the driver level.
+ */
+ skb_reset_network_header(skb);
+
+ /* Try to align data part correctly */
+ if (hhlen) {
+ skb->data -= hhlen;
+ skb->tail -= hhlen;
+ if (len < hhlen)
+ skb_reset_network_header(skb);
+ }
+ err = memcpy_fromiovec(skb_put(skb, len), msg->msg_iov, len);
+ if (err)
+ goto out_free;
+ goto retry;
}

- /* Returns -EFAULT on error */
- err = memcpy_fromiovec(skb_put(skb, len), msg->msg_iov, len);
+
skb->protocol = proto;
skb->dev = dev;
skb->priority = sk->sk_priority;
skb->mark = sk->sk_mark;
- if (err)
- goto out_free;
-
- /*
- * Now send it
- */

dev_queue_xmit(skb);
rcu_read_unlock();
return len;

-out_free:
- kfree_skb(skb);
out_unlock:
rcu_read_unlock();
+out_free:
+ kfree_skb(skb);
return err;
}


--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/