[PATCH v1 2/2] hwrng: core: fix potential suspend/resume race condition

From: Sven van Ashbrook
Date: Wed Aug 31 2022 - 13:21:07 EST


The hwrng fill function runs as a normal kthread. This thread
doesn't get frozen by the PM, i.e. it will keep running during,
or in, system suspend. It may call the client driver's
data_present()/data_read() functions during, or in, suspend;
which may generate errors or warnings. For example, if the
client driver uses an i2c bus, the following warning may be
intermittently generated:

i2c: Transfer while suspended

Fix by converting the delay polled kthread into an ordered work
queue running a single, self-rearming delayed_work. Make the
workqueue WQ_FREEZABLE, so the PM will drain any work items
before going into suspend. This prevents client drivers from
being accessed during, or in, suspend.

Tested on a Chromebook containing an cr50 tpm over i2c. The test
consists of 31000 suspend/resume cycles. Occasional
"i2c: Transfer while suspended" warnings are seen. After applying
this patch, these warnings disappear.

This patch also does not appear to cause any regressions on the
ChromeOS test queues.

Signed-off-by: Sven van Ashbrook <svenva@xxxxxxxxxxxx>
---

drivers/char/hw_random/core.c | 95 +++++++++++++++++++----------------
1 file changed, 51 insertions(+), 44 deletions(-)

diff --git a/drivers/char/hw_random/core.c b/drivers/char/hw_random/core.c
index 3675122c6cce..ee85ca97d215 100644
--- a/drivers/char/hw_random/core.c
+++ b/drivers/char/hw_random/core.c
@@ -17,7 +17,7 @@
#include <linux/hw_random.h>
#include <linux/random.h>
#include <linux/kernel.h>
-#include <linux/kthread.h>
+#include <linux/workqueue.h>
#include <linux/sched/signal.h>
#include <linux/miscdevice.h>
#include <linux/module.h>
@@ -28,14 +28,17 @@

#define RNG_MODULE_NAME "hw_random"

-static struct hwrng *current_rng;
/* the current rng has been explicitly chosen by user via sysfs */
static int cur_rng_set_by_user;
-static struct task_struct *hwrng_fill;
+static struct workqueue_struct *hwrng_wq;
+static struct delayed_work hwrng_fill_dwork;
+static size_t entropy_credit;
+/* Protects rng_list, current_rng, is_hwrng_wq_running */
+static DEFINE_MUTEX(rng_mutex);
/* list of registered rngs */
static LIST_HEAD(rng_list);
-/* Protects rng_list and current_rng */
-static DEFINE_MUTEX(rng_mutex);
+static struct hwrng *current_rng;
+static bool is_hwrng_wq_running;
/* Protects rng read functions, data_avail, rng_buffer and rng_fillbuf */
static DEFINE_MUTEX(reading_mutex);
static int data_avail;
@@ -488,37 +491,29 @@ static int __init register_miscdev(void)
return misc_register(&rng_miscdev);
}

-static int hwrng_fillfn(void *unused)
+static void hwrng_fillfn(struct work_struct *unused)
{
- size_t entropy, entropy_credit = 0; /* in 1/1024 of a bit */
+ unsigned short quality;
unsigned long delay;
+ struct hwrng *rng;
+ size_t entropy; /* in 1/1024 of a bit */
long rc;

- while (!kthread_should_stop()) {
- unsigned short quality;
- struct hwrng *rng;
-
- rng = get_current_rng();
- if (IS_ERR(rng) || !rng)
- break;
- mutex_lock(&reading_mutex);
- rc = rng_get_data(rng, rng_fillbuf,
- rng_buffer_size(), 1);
- if (current_quality != rng->quality)
- rng->quality = current_quality; /* obsolete */
- quality = rng->quality;
- mutex_unlock(&reading_mutex);
- put_rng(rng);
-
- if (!quality)
- break;
+ rng = get_current_rng();
+ if (IS_ERR(rng) || !rng)
+ return;
+ mutex_lock(&reading_mutex);
+ rc = rng_get_data(rng, rng_fillbuf, rng_buffer_size(), 1);
+ if (current_quality != rng->quality)
+ rng->quality = current_quality; /* obsolete */
+ quality = rng->quality;
+ mutex_unlock(&reading_mutex);
+ put_rng(rng);

- if (rc <= 0) {
- pr_warn("hwrng: no data available\n");
- msleep_interruptible(10000);
- continue;
- }
+ if (!quality)
+ return;

+ if (rc > 0) {
/* If we cannot credit at least one bit of entropy,
* keep track of the remainder for the next iteration
*/
@@ -529,11 +524,11 @@ static int hwrng_fillfn(void *unused)
/* Outside lock, sure, but y'know: randomness. */
delay = add_hwgenerator_randomness((void *)rng_fillbuf, rc,
entropy >> 10);
- if (delay > 0)
- schedule_timeout_interruptible(delay);
+ } else {
+ pr_warn("hwrng: no data available\n");
+ delay = 10 * HZ;
}
- hwrng_fill = NULL;
- return 0;
+ mod_delayed_work(hwrng_wq, &hwrng_fill_dwork, delay);
}

static void hwrng_manage_rngd(struct hwrng *rng)
@@ -541,14 +536,12 @@ static void hwrng_manage_rngd(struct hwrng *rng)
if (WARN_ON(!mutex_is_locked(&rng_mutex)))
return;

- if (rng->quality == 0 && hwrng_fill)
- kthread_stop(hwrng_fill);
- if (rng->quality > 0 && !hwrng_fill) {
- hwrng_fill = kthread_run(hwrng_fillfn, NULL, "hwrng");
- if (IS_ERR(hwrng_fill)) {
- pr_err("hwrng_fill thread creation failed\n");
- hwrng_fill = NULL;
- }
+ if (rng->quality == 0 && is_hwrng_wq_running) {
+ cancel_delayed_work(&hwrng_fill_dwork);
+ is_hwrng_wq_running = false;
+ } else if (rng->quality > 0 && !is_hwrng_wq_running) {
+ mod_delayed_work(hwrng_wq, &hwrng_fill_dwork, 0);
+ is_hwrng_wq_running = true;
}
}

@@ -631,8 +624,7 @@ void hwrng_unregister(struct hwrng *rng)
new_rng = get_current_rng_nolock();
if (list_empty(&rng_list)) {
mutex_unlock(&rng_mutex);
- if (hwrng_fill)
- kthread_stop(hwrng_fill);
+ cancel_delayed_work_sync(&hwrng_fill_dwork);
} else
mutex_unlock(&rng_mutex);

@@ -703,17 +695,32 @@ static int __init hwrng_modinit(void)
return -ENOMEM;
}

+ /* ordered wq to mimic delay-polled kthread behaviour */
+ hwrng_wq = alloc_ordered_workqueue("hwrng",
+ WQ_FREEZABLE | /* prevent work from running during suspend/resume */
+ WQ_MEM_RECLAIM /* client drivers may need memory reclaim */
+ );
+ if (!hwrng_wq) {
+ kfree(rng_fillbuf);
+ kfree(rng_buffer);
+ return -ENOMEM;
+ }
+
ret = register_miscdev();
if (ret) {
+ destroy_workqueue(hwrng_wq);
kfree(rng_fillbuf);
kfree(rng_buffer);
}

+ INIT_DELAYED_WORK(&hwrng_fill_dwork, hwrng_fillfn);
+
return ret;
}

static void __exit hwrng_modexit(void)
{
+ destroy_workqueue(hwrng_wq);
mutex_lock(&rng_mutex);
BUG_ON(current_rng);
kfree(rng_buffer);
--
2.37.2.672.g94769d06f0-goog