[RFC PATCH v0 3/3] percpu: Avoid using percpu ptrs of non-existing cpus

From: Bharata B Rao
Date: Tue Jun 01 2021 - 02:52:36 EST


Prevent the callers of alloc_percpu() from using the percpu
pointer of non-existing CPUs. Also switch those callers who
require initialization of percpu data for onlined CPU to use
the new variant alloc_percpu_cb()

Note: Not all callers have been modified here

Signed-off-by: Bharata B Rao <bharata@xxxxxxxxxxxxx>
---
fs/namespace.c | 4 ++--
kernel/cgroup/rstat.c | 20 ++++++++++++++++----
kernel/sched/cpuacct.c | 10 +++++-----
kernel/sched/psi.c | 14 +++++++++++---
lib/percpu-refcount.c | 4 ++--
lib/percpu_counter.c | 2 +-
net/ipv4/fib_semantics.c | 2 +-
net/ipv6/route.c | 6 +++---
8 files changed, 41 insertions(+), 21 deletions(-)

diff --git a/fs/namespace.c b/fs/namespace.c
index c3f1a78ba369..b6ea584b99e5 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -182,7 +182,7 @@ int mnt_get_count(struct mount *mnt)
int count = 0;
int cpu;

- for_each_possible_cpu(cpu) {
+ for_each_online_cpu(cpu) {
count += per_cpu_ptr(mnt->mnt_pcp, cpu)->mnt_count;
}

@@ -294,7 +294,7 @@ static unsigned int mnt_get_writers(struct mount *mnt)
unsigned int count = 0;
int cpu;

- for_each_possible_cpu(cpu) {
+ for_each_online_cpu(cpu) {
count += per_cpu_ptr(mnt->mnt_pcp, cpu)->mnt_writers;
}

diff --git a/kernel/cgroup/rstat.c b/kernel/cgroup/rstat.c
index cee265cb535c..b25c59138c0b 100644
--- a/kernel/cgroup/rstat.c
+++ b/kernel/cgroup/rstat.c
@@ -152,7 +152,7 @@ static void cgroup_rstat_flush_locked(struct cgroup *cgrp, bool may_sleep)

lockdep_assert_held(&cgroup_rstat_lock);

- for_each_possible_cpu(cpu) {
+ for_each_online_cpu(cpu) {
raw_spinlock_t *cpu_lock = per_cpu_ptr(&cgroup_rstat_cpu_lock,
cpu);
struct cgroup *pos = NULL;
@@ -245,19 +245,31 @@ void cgroup_rstat_flush_release(void)
spin_unlock_irq(&cgroup_rstat_lock);
}

+static int cgroup_rstat_cpuhp_handler(void __percpu *ptr, unsigned int cpu, void *data)
+{
+ struct cgroup *cgrp = (struct cgroup *)data;
+ struct cgroup_rstat_cpu *rstatc = per_cpu_ptr(ptr, cpu);
+
+ rstatc->updated_children = cgrp;
+ u64_stats_init(&rstatc->bsync);
+ return 0;
+}
+
int cgroup_rstat_init(struct cgroup *cgrp)
{
int cpu;

/* the root cgrp has rstat_cpu preallocated */
if (!cgrp->rstat_cpu) {
- cgrp->rstat_cpu = alloc_percpu(struct cgroup_rstat_cpu);
+ cgrp->rstat_cpu = alloc_percpu_cb(struct cgroup_rstat_cpu,
+ cgroup_rstat_cpuhp_handler,
+ cgrp);
if (!cgrp->rstat_cpu)
return -ENOMEM;
}

/* ->updated_children list is self terminated */
- for_each_possible_cpu(cpu) {
+ for_each_online_cpu(cpu) {
struct cgroup_rstat_cpu *rstatc = cgroup_rstat_cpu(cgrp, cpu);

rstatc->updated_children = cgrp;
@@ -274,7 +286,7 @@ void cgroup_rstat_exit(struct cgroup *cgrp)
cgroup_rstat_flush(cgrp);

/* sanity check */
- for_each_possible_cpu(cpu) {
+ for_each_online_cpu(cpu) {
struct cgroup_rstat_cpu *rstatc = cgroup_rstat_cpu(cgrp, cpu);

if (WARN_ON_ONCE(rstatc->updated_children != cgrp) ||
diff --git a/kernel/sched/cpuacct.c b/kernel/sched/cpuacct.c
index 104a1bade14f..81dd53387ba5 100644
--- a/kernel/sched/cpuacct.c
+++ b/kernel/sched/cpuacct.c
@@ -160,7 +160,7 @@ static u64 __cpuusage_read(struct cgroup_subsys_state *css,
u64 totalcpuusage = 0;
int i;

- for_each_possible_cpu(i)
+ for_each_online_cpu(i)
totalcpuusage += cpuacct_cpuusage_read(ca, i, index);

return totalcpuusage;
@@ -195,7 +195,7 @@ static int cpuusage_write(struct cgroup_subsys_state *css, struct cftype *cft,
if (val)
return -EINVAL;

- for_each_possible_cpu(cpu)
+ for_each_online_cpu(cpu)
cpuacct_cpuusage_write(ca, cpu, 0);

return 0;
@@ -208,7 +208,7 @@ static int __cpuacct_percpu_seq_show(struct seq_file *m,
u64 percpu;
int i;

- for_each_possible_cpu(i) {
+ for_each_online_cpu(i) {
percpu = cpuacct_cpuusage_read(ca, i, index);
seq_printf(m, "%llu ", (unsigned long long) percpu);
}
@@ -242,7 +242,7 @@ static int cpuacct_all_seq_show(struct seq_file *m, void *V)
seq_printf(m, " %s", cpuacct_stat_desc[index]);
seq_puts(m, "\n");

- for_each_possible_cpu(cpu) {
+ for_each_online_cpu(cpu) {
struct cpuacct_usage *cpuusage = per_cpu_ptr(ca->cpuusage, cpu);

seq_printf(m, "%d", cpu);
@@ -275,7 +275,7 @@ static int cpuacct_stats_show(struct seq_file *sf, void *v)
int stat;

memset(val, 0, sizeof(val));
- for_each_possible_cpu(cpu) {
+ for_each_online_cpu(cpu) {
u64 *cpustat = per_cpu_ptr(ca->cpustat, cpu)->cpustat;

val[CPUACCT_STAT_USER] += cpustat[CPUTIME_USER];
diff --git a/kernel/sched/psi.c b/kernel/sched/psi.c
index cc25a3cff41f..228977aa4780 100644
--- a/kernel/sched/psi.c
+++ b/kernel/sched/psi.c
@@ -186,7 +186,7 @@ static void group_init(struct psi_group *group)
{
int cpu;

- for_each_possible_cpu(cpu)
+ for_each_online_cpu(cpu)
seqcount_init(&per_cpu_ptr(group->pcpu, cpu)->seq);
group->avg_last_update = sched_clock();
group->avg_next_update = group->avg_last_update + psi_period;
@@ -321,7 +321,7 @@ static void collect_percpu_times(struct psi_group *group,
* the sampling period. This eliminates artifacts from uneven
* loading, or even entirely idle CPUs.
*/
- for_each_possible_cpu(cpu) {
+ for_each_online_cpu(cpu) {
u32 times[NR_PSI_STATES];
u32 nonidle;
u32 cpu_changed_states;
@@ -935,12 +935,20 @@ void psi_memstall_leave(unsigned long *flags)
}

#ifdef CONFIG_CGROUPS
+static int psi_cpuhp_handler(void __percpu *ptr, unsigned int cpu, void *unused)
+{
+ struct psi_group_cpu *groupc = per_cpu_ptr(ptr, cpu);
+
+ seqcount_init(&groupc->seq);
+ return 0;
+}
+
int psi_cgroup_alloc(struct cgroup *cgroup)
{
if (static_branch_likely(&psi_disabled))
return 0;

- cgroup->psi.pcpu = alloc_percpu(struct psi_group_cpu);
+ cgroup->psi.pcpu = alloc_percpu_cb(struct psi_group_cpu, psi_cpuhp_handler, NULL);
if (!cgroup->psi.pcpu)
return -ENOMEM;
group_init(&cgroup->psi);
diff --git a/lib/percpu-refcount.c b/lib/percpu-refcount.c
index a1071cdefb5a..aeba43c33600 100644
--- a/lib/percpu-refcount.c
+++ b/lib/percpu-refcount.c
@@ -173,7 +173,7 @@ static void percpu_ref_switch_to_atomic_rcu(struct rcu_head *rcu)
unsigned long count = 0;
int cpu;

- for_each_possible_cpu(cpu)
+ for_each_online_cpu(cpu)
count += *per_cpu_ptr(percpu_count, cpu);

pr_debug("global %lu percpu %lu\n",
@@ -253,7 +253,7 @@ static void __percpu_ref_switch_to_percpu(struct percpu_ref *ref)
* zeroing is visible to all percpu accesses which can see the
* following __PERCPU_REF_ATOMIC clearing.
*/
- for_each_possible_cpu(cpu)
+ for_each_online_cpu(cpu)
*per_cpu_ptr(percpu_count, cpu) = 0;

smp_store_release(&ref->percpu_count_ptr,
diff --git a/lib/percpu_counter.c b/lib/percpu_counter.c
index ed610b75dc32..db40abc6f0f5 100644
--- a/lib/percpu_counter.c
+++ b/lib/percpu_counter.c
@@ -63,7 +63,7 @@ void percpu_counter_set(struct percpu_counter *fbc, s64 amount)
unsigned long flags;

raw_spin_lock_irqsave(&fbc->lock, flags);
- for_each_possible_cpu(cpu) {
+ for_each_online_cpu(cpu) {
s32 *pcount = per_cpu_ptr(fbc->counters, cpu);
*pcount = 0;
}
diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c
index a632b66bc13a..dbfd14b0077f 100644
--- a/net/ipv4/fib_semantics.c
+++ b/net/ipv4/fib_semantics.c
@@ -194,7 +194,7 @@ static void rt_fibinfo_free_cpus(struct rtable __rcu * __percpu *rtp)
if (!rtp)
return;

- for_each_possible_cpu(cpu) {
+ for_each_online_cpu(cpu) {
struct rtable *rt;

rt = rcu_dereference_protected(*per_cpu_ptr(rtp, cpu), 1);
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index a22822bdbf39..e7db3a5fe5c5 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -165,7 +165,7 @@ static void rt6_uncached_list_flush_dev(struct net *net, struct net_device *dev)
if (dev == loopback_dev)
return;

- for_each_possible_cpu(cpu) {
+ for_each_online_cpu(cpu) {
struct uncached_list *ul = per_cpu_ptr(&rt6_uncached_list, cpu);
struct rt6_info *rt;

@@ -3542,7 +3542,7 @@ void fib6_nh_release(struct fib6_nh *fib6_nh)
if (fib6_nh->rt6i_pcpu) {
int cpu;

- for_each_possible_cpu(cpu) {
+ for_each_online_cpu(cpu) {
struct rt6_info **ppcpu_rt;
struct rt6_info *pcpu_rt;

@@ -6569,7 +6569,7 @@ int __init ip6_route_init(void)
#endif
#endif

- for_each_possible_cpu(cpu) {
+ for_each_online_cpu(cpu) {
struct uncached_list *ul = per_cpu_ptr(&rt6_uncached_list, cpu);

INIT_LIST_HEAD(&ul->head);
--
2.31.1