Re: x86/mce: suspicious RCU usage in 4.13.4

From: Borislav Petkov
Date: Sun Oct 15 2017 - 05:41:13 EST


On Wed, Oct 11, 2017 at 09:34:22PM +0000, Luck, Tony wrote:
> > here's a second attempt at a more rigorous simplification: RCU stuff is
> > gone and only a single loop scans through the elements.
>
> The dev_mce_log() changes look good now.
>
> You can apply the axe to more bits of mce_chrdev_read() though. Like that

That provoked a very serious axing. Please check whether I went too far. Hunk
below is ontop of what got axed already:

---
diff --git a/arch/x86/kernel/cpu/mcheck/dev-mcelog.c b/arch/x86/kernel/cpu/mcheck/dev-mcelog.c
index 1e1c6d22c93e..17d2bab25720 100644
--- a/arch/x86/kernel/cpu/mcheck/dev-mcelog.c
+++ b/arch/x86/kernel/cpu/mcheck/dev-mcelog.c
@@ -162,13 +162,6 @@ static int mce_chrdev_release(struct inode *inode, struct file *file)
return 0;
}

-static void collect_tscs(void *data)
-{
- unsigned long *cpu_tsc = (unsigned long *)data;
-
- cpu_tsc[smp_processor_id()] = rdtsc();
-}
-
static int mce_apei_read_done;

/* Collect MCE record of previous boot in persistent storage via APEI ERST. */
@@ -216,14 +209,9 @@ static ssize_t mce_chrdev_read(struct file *filp, char __user *ubuf,
size_t usize, loff_t *off)
{
char __user *buf = ubuf;
- unsigned long *cpu_tsc;
- unsigned prev, next;
+ unsigned next;
int i, err;

- cpu_tsc = kmalloc(nr_cpu_ids * sizeof(long), GFP_KERNEL);
- if (!cpu_tsc)
- return -ENOMEM;
-
mutex_lock(&mce_chrdev_read_mutex);

if (!mce_apei_read_done) {
@@ -232,63 +220,32 @@ static ssize_t mce_chrdev_read(struct file *filp, char __user *ubuf,
goto out;
}

- next = mcelog.next;
-
/* Only supports full reads right now */
err = -EINVAL;
if (*off != 0 || usize < MCE_LOG_LEN*sizeof(struct mce))
goto out;

+ next = mcelog.next;
err = 0;
- prev = 0;
- do {
- for (i = prev; i < next; i++) {
- unsigned long start = jiffies;
- struct mce *m = &mcelog.entry[i];
-
- while (!m->finished) {
- if (time_after_eq(jiffies, start + 2)) {
- memset(m, 0, sizeof(*m));
- goto timeout;
- }
- cpu_relax();
- }
- smp_rmb();
- err |= copy_to_user(buf, m, sizeof(*m));
- buf += sizeof(*m);
-timeout:
- ;
- }
-
- memset(mcelog.entry + prev, 0,
- (next - prev) * sizeof(struct mce));
- prev = next;
- next = cmpxchg(&mcelog.next, prev, 0);
- } while (next != prev);
-
- /*
- * Collect entries that were still getting written before the
- * synchronize.
- */
- on_each_cpu(collect_tscs, cpu_tsc, 1);

- for (i = next; i < MCE_LOG_LEN; i++) {
+ for (i = 0; i < next; i++) {
struct mce *m = &mcelog.entry[i];

- if (m->finished && m->tsc < cpu_tsc[m->cpu]) {
- err |= copy_to_user(buf, m, sizeof(*m));
- smp_rmb();
- buf += sizeof(*m);
- memset(m, 0, sizeof(*m));
- }
+ if (!m->finished)
+ continue;
+
+ err |= copy_to_user(buf, m, sizeof(*m));
+ buf += sizeof(*m);
}

+ memset(mcelog.entry, 0, next * sizeof(struct mce));
+ mcelog.next = 0;
+
if (err)
err = -EFAULT;

out:
mutex_unlock(&mce_chrdev_read_mutex);
- kfree(cpu_tsc);

return err ? err : buf - ubuf;
}

--
Regards/Gruss,
Boris.

Good mailing practices for 400: avoid top-posting and trim the reply.