[PATCH RFC net-next 3/4] net: add flag to indicate NAPI/GRO is running right now

From: Alexander Lobakin
Date: Thu Jun 29 2023 - 11:24:42 EST


Currently, there's no easy way to check if a NAPI polling cycle is
running and on which CPU, although this might come very handy in
several cases.
Commit 8c48eea3adf3 ("page_pool: allow caching from safely localized
NAPI") added napi_struct::list_owner, BUT it's set when the NAPI is
*scheduled*. `->list_owner == smp_processor_id()` doesn't mean we're
inside the corresponding polling loop.
Introduce new NAPI state flag, NAPI{,F}_STATE_RUNNING. Set it right
before calling to ->poll() and clear after all napi_gro_flush() and
gro_normal_list() are done. They are run in the same context and, in
fact, are part of the receive routine.
When `packets == budget`, napi_complete_done() is not called, so in
that case it's safe to clear the flag after ->poll() ends. Otherwise,
however, napi_complete_done() can lead to reenabling interrupts and
scheduling the NAPI already on another CPU. In that case, clear the
flag in napi_complete_done() itself.
The main usecase for the flag is as follows:

if (test_bit(NAPI_STATE_RUNNING, &n->state) &&
READ_ONCE(n->list_owner) == smp_processor_id())
/* you're inside n->poll() or the following GRO
* processing context
*/

IOW, when the condition is true, feel free to use resources protected
by this NAPI, such as page_pools covered by it, percpu NAPI caches etc.
Just make sure interrupts are enabled.

Signed-off-by: Alexander Lobakin <aleksander.lobakin@xxxxxxxxx>
---
include/linux/netdevice.h | 2 ++
net/core/dev.c | 23 +++++++++++++++++------
2 files changed, 19 insertions(+), 6 deletions(-)

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index b828c7a75be2..db3aea863ea9 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -389,6 +389,7 @@ enum {
NAPI_STATE_PREFER_BUSY_POLL, /* prefer busy-polling over softirq processing*/
NAPI_STATE_THREADED, /* The poll is performed inside its own thread*/
NAPI_STATE_SCHED_THREADED, /* Napi is currently scheduled in threaded mode */
+ NAPI_STATE_RUNNING, /* This NAPI or GRO is running on ::list_owner */
};

enum {
@@ -402,6 +403,7 @@ enum {
NAPIF_STATE_PREFER_BUSY_POLL = BIT(NAPI_STATE_PREFER_BUSY_POLL),
NAPIF_STATE_THREADED = BIT(NAPI_STATE_THREADED),
NAPIF_STATE_SCHED_THREADED = BIT(NAPI_STATE_SCHED_THREADED),
+ NAPIF_STATE_RUNNING = BIT(NAPI_STATE_RUNNING),
};

enum gro_result {
diff --git a/net/core/dev.c b/net/core/dev.c
index 69a3e544676c..7f0d23c9e25e 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -6039,7 +6039,8 @@ bool napi_complete_done(struct napi_struct *n, int work_done)

new = val & ~(NAPIF_STATE_MISSED | NAPIF_STATE_SCHED |
NAPIF_STATE_SCHED_THREADED |
- NAPIF_STATE_PREFER_BUSY_POLL);
+ NAPIF_STATE_PREFER_BUSY_POLL |
+ NAPIF_STATE_RUNNING);

/* If STATE_MISSED was set, leave STATE_SCHED set,
* because we will call napi->poll() one more time.
@@ -6128,6 +6129,7 @@ static void busy_poll_stop(struct napi_struct *napi, void *have_poll_lock, bool
/* All we really want here is to re-enable device interrupts.
* Ideally, a new ndo_busy_poll_stop() could avoid another round.
*/
+ set_bit(NAPI_STATE_RUNNING, &napi->state);
rc = napi->poll(napi, budget);
/* We can't gro_normal_list() here, because napi->poll() might have
* rearmed the napi (napi_complete_done()) in which case it could
@@ -6135,8 +6137,10 @@ static void busy_poll_stop(struct napi_struct *napi, void *have_poll_lock, bool
*/
trace_napi_poll(napi, rc, budget);
netpoll_poll_unlock(have_poll_lock);
- if (rc == budget)
+ if (rc == budget) {
__busy_poll_stop(napi, skip_schedule);
+ clear_bit(NAPI_STATE_RUNNING, &napi->state);
+ }
local_bh_enable();
}

@@ -6186,9 +6190,11 @@ void napi_busy_loop(unsigned int napi_id,
have_poll_lock = netpoll_poll_lock(napi);
napi_poll = napi->poll;
}
+ set_bit(NAPI_STATE_RUNNING, &napi->state);
work = napi_poll(napi, budget);
trace_napi_poll(napi, work, budget);
gro_normal_list(napi);
+ clear_bit(NAPI_STATE_RUNNING, &napi->state);
count:
if (work > 0)
__NET_ADD_STATS(dev_net(napi->dev),
@@ -6457,6 +6463,7 @@ static int __napi_poll(struct napi_struct *n, bool *repoll)
*/
work = 0;
if (test_bit(NAPI_STATE_SCHED, &n->state)) {
+ set_bit(NAPI_STATE_RUNNING, &n->state);
work = n->poll(n, weight);
trace_napi_poll(n, work, weight);
}
@@ -6466,7 +6473,7 @@ static int __napi_poll(struct napi_struct *n, bool *repoll)
n->poll, work, weight);

if (likely(work < weight))
- return work;
+ goto out;

/* Drivers must not modify the NAPI state if they
* consume the entire weight. In such cases this code
@@ -6475,7 +6482,7 @@ static int __napi_poll(struct napi_struct *n, bool *repoll)
*/
if (unlikely(napi_disable_pending(n))) {
napi_complete(n);
- return work;
+ goto out;
}

/* The NAPI context has more processing work, but busy-polling
@@ -6488,7 +6495,7 @@ static int __napi_poll(struct napi_struct *n, bool *repoll)
*/
napi_schedule(n);
}
- return work;
+ goto out;
}

if (n->gro_bitmask) {
@@ -6506,11 +6513,15 @@ static int __napi_poll(struct napi_struct *n, bool *repoll)
if (unlikely(!list_empty(&n->poll_list))) {
pr_warn_once("%s: Budget exhausted after napi rescheduled\n",
n->dev ? n->dev->name : "backlog");
- return work;
+ goto out;
}

*repoll = true;

+out:
+ if (READ_ONCE(n->list_owner) == smp_processor_id())
+ clear_bit(NAPI_STATE_RUNNING, &n->state);
+
return work;
}

--
2.41.0