[ANNOUNCE] 3.10.9-rt5

From: Sebastian Andrzej Siewior
Date: Thu Aug 22 2013 - 14:21:34 EST


Dear RT folks!

I'm pleased to announce the v3.10.9-rt5 patch set.

Changes since v3.10.9-rt4
- swait fixes from Steven. It fixed the issues with CONFIG_RCU_NOCB_CPU
where the system suddenly froze and RCU wasn't doing its job anymore
- hwlat improvements by Steven

Known issues:

- SLAB support not working

- The cpsw network driver shows some issues.

- bcache does not compile.

- set_affinity callbacks result in splat due to sleeping while
atomic

- an ancient race (since we got sleeping spinlocks) where the
TASK_TRACED state is temporary replaced while waiting on a rw
lock and the task can't be traced.

The delta patch against v3.10.9-rt4 is appended below and can be found
here:
https://www.kernel.org/pub/linux/kernel/projects/rt/3.10/incr/patch-3.10.9-rt4-rt5.patch.xz

The RT patch against 3.10.9 can be found here:

https://www.kernel.org/pub/linux/kernel/projects/rt/3.10/patch-3.10.9-rt5.patch.xz

The split quilt queue is available at:

https://www.kernel.org/pub/linux/kernel/projects/rt/3.10/patches-3.10.9-rt5.tar.xz

Sebastian

diff --git a/drivers/misc/hwlat_detector.c b/drivers/misc/hwlat_detector.c
index b7b7c90..0bfa40d 100644
--- a/drivers/misc/hwlat_detector.c
+++ b/drivers/misc/hwlat_detector.c
@@ -41,7 +41,6 @@
#include <linux/module.h>
#include <linux/init.h>
#include <linux/ring_buffer.h>
-#include <linux/stop_machine.h>
#include <linux/time.h>
#include <linux/hrtimer.h>
#include <linux/kthread.h>
@@ -51,6 +50,7 @@
#include <linux/version.h>
#include <linux/delay.h>
#include <linux/slab.h>
+#include <linux/trace_clock.h>

#define BUF_SIZE_DEFAULT 262144UL /* 8K*(sizeof(entry)) */
#define BUF_FLAGS (RB_FL_OVERWRITE) /* no block on full */
@@ -106,7 +106,6 @@ struct data; /* Global state */
/* Sampling functions */
static int __buffer_add_sample(struct sample *sample);
static struct sample *buffer_get_sample(struct sample *sample);
-static int get_sample(void *unused);

/* Threading and state */
static int kthread_fn(void *unused);
@@ -143,11 +142,12 @@ static void detector_exit(void);
struct sample {
u64 seqnum; /* unique sequence */
u64 duration; /* ktime delta */
+ u64 outer_duration; /* ktime delta (outer loop) */
struct timespec timestamp; /* wall time */
unsigned long lost;
};

-/* keep the global state somewhere. Mostly used under stop_machine. */
+/* keep the global state somewhere. */
static struct data {

struct mutex lock; /* protect changes */
@@ -170,7 +170,7 @@ static struct data {
* @sample: The new latency sample value
*
* This receives a new latency sample and records it in a global ring buffer.
- * No additional locking is used in this case - suited for stop_machine use.
+ * No additional locking is used in this case.
*/
static int __buffer_add_sample(struct sample *sample)
{
@@ -210,29 +210,60 @@ static struct sample *buffer_get_sample(struct sample *sample)
return sample;
}

+#ifndef CONFIG_TRACING
+#define time_type ktime_t
+#define time_get() ktime_get()
+#define time_to_us(x) ktime_to_us(x)
+#define time_sub(a, b) ktime_sub(a, b)
+#define init_time(a, b) (a).tv64 = b
+#define time_u64(a) (a).tv64
+#else
+#define time_type u64
+#define time_get() trace_clock_local()
+#define time_to_us(x) ((x) / 1000)
+#define time_sub(a, b) ((a) - (b))
+#define init_time(a, b) a = b
+#define time_u64(a) a
+#endif
/**
* get_sample - sample the CPU TSC and look for likely hardware latencies
- * @unused: This is not used but is a part of the stop_machine API
*
* Used to repeatedly capture the CPU TSC (or similar), looking for potential
- * hardware-induced latency. Called under stop_machine, with data.lock held.
+ * hardware-induced latency. Called with interrupts disabled and with data.lock held.
*/
-static int get_sample(void *unused)
+static int get_sample(void)
{
- ktime_t start, t1, t2;
+ time_type start, t1, t2, last_t2;
s64 diff, total = 0;
u64 sample = 0;
- int ret = 1;
+ u64 outer_sample = 0;
+ int ret = -1;

- start = ktime_get(); /* start timestamp */
+ init_time(last_t2, 0);
+ start = time_get(); /* start timestamp */

do {

- t1 = ktime_get(); /* we'll look for a discontinuity */
- t2 = ktime_get();
+ t1 = time_get(); /* we'll look for a discontinuity */
+ t2 = time_get();
+
+ if (time_u64(last_t2)) {
+ /* Check the delta from the outer loop (t2 to next t1) */
+ diff = time_to_us(time_sub(t1, last_t2));
+ /* This shouldn't happen */
+ if (diff < 0) {
+ printk(KERN_ERR BANNER "time running backwards\n");
+ goto out;
+ }
+ if (diff > outer_sample)
+ outer_sample = diff;
+ }
+ last_t2 = t2;
+
+ total = time_to_us(time_sub(t2, start)); /* sample width */

- total = ktime_to_us(ktime_sub(t2, start)); /* sample width */
- diff = ktime_to_us(ktime_sub(t2, t1)); /* current diff */
+ /* This checks the inner loop (t1 to t2) */
+ diff = time_to_us(time_sub(t2, t1)); /* current diff */

/* This shouldn't happen */
if (diff < 0) {
@@ -245,13 +276,18 @@ static int get_sample(void *unused)

} while (total <= data.sample_width);

+ ret = 0;
+
/* If we exceed the threshold value, we have found a hardware latency */
- if (sample > data.threshold) {
+ if (sample > data.threshold || outer_sample > data.threshold) {
struct sample s;

+ ret = 1;
+
data.count++;
s.seqnum = data.count;
s.duration = sample;
+ s.outer_duration = outer_sample;
s.timestamp = CURRENT_TIME;
__buffer_add_sample(&s);

@@ -260,7 +296,6 @@ static int get_sample(void *unused)
data.max_sample = sample;
}

- ret = 0;
out:
return ret;
}
@@ -270,32 +305,30 @@ static int get_sample(void *unused)
* @unused: A required part of the kthread API.
*
* Used to periodically sample the CPU TSC via a call to get_sample. We
- * use stop_machine, whith does (intentionally) introduce latency since we
+ * disable interrupts, which does (intentionally) introduce latency since we
* need to ensure nothing else might be running (and thus pre-empting).
* Obviously this should never be used in production environments.
*
- * stop_machine will schedule us typically only on CPU0 which is fine for
- * almost every real-world hardware latency situation - but we might later
- * generalize this if we find there are any actualy systems with alternate
- * SMI delivery or other non CPU0 hardware latencies.
+ * Currently this runs on which ever CPU it was scheduled on, but most
+ * real-worald hardware latency situations occur across several CPUs,
+ * but we might later generalize this if we find there are any actualy
+ * systems with alternate SMI delivery or other hardware latencies.
*/
static int kthread_fn(void *unused)
{
- int err = 0;
- u64 interval = 0;
+ int ret;
+ u64 interval;

while (!kthread_should_stop()) {

mutex_lock(&data.lock);

- err = stop_machine(get_sample, unused, 0);
- if (err) {
- /* Houston, we have a problem */
- mutex_unlock(&data.lock);
- goto err_out;
- }
+ local_irq_disable();
+ ret = get_sample();
+ local_irq_enable();

- wake_up(&data.wq); /* wake up reader(s) */
+ if (ret > 0)
+ wake_up(&data.wq); /* wake up reader(s) */

interval = data.sample_window - data.sample_width;
do_div(interval, USEC_PER_MSEC); /* modifies interval value */
@@ -303,15 +336,10 @@ static int kthread_fn(void *unused)
mutex_unlock(&data.lock);

if (msleep_interruptible(interval))
- goto out;
+ break;
}
- goto out;
-err_out:
- printk(KERN_ERR BANNER "could not call stop_machine, disabling\n");
- enabled = 0;
-out:
- return err;

+ return 0;
}

/**
@@ -407,8 +435,7 @@ static int init_stats(void)
* This function provides a generic read implementation for the global state
* "data" structure debugfs filesystem entries. It would be nice to use
* simple_attr_read directly, but we need to make sure that the data.lock
- * spinlock is held during the actual read (even though we likely won't ever
- * actually race here as the updater runs under a stop_machine context).
+ * is held during the actual read.
*/
static ssize_t simple_data_read(struct file *filp, char __user *ubuf,
size_t cnt, loff_t *ppos, const u64 *entry)
@@ -443,8 +470,7 @@ static ssize_t simple_data_read(struct file *filp, char __user *ubuf,
* This function provides a generic write implementation for the global state
* "data" structure debugfs filesystem entries. It would be nice to use
* simple_attr_write directly, but we need to make sure that the data.lock
- * spinlock is held during the actual write (even though we likely won't ever
- * actually race here as the updater runs under a stop_machine context).
+ * is held during the actual write.
*/
static ssize_t simple_data_write(struct file *filp, const char __user *ubuf,
size_t cnt, loff_t *ppos, u64 *entry)
@@ -738,10 +764,11 @@ static ssize_t debug_sample_fread(struct file *filp, char __user *ubuf,
}
}

- len = snprintf(buf, sizeof(buf), "%010lu.%010lu\t%llu\n",
- sample->timestamp.tv_sec,
- sample->timestamp.tv_nsec,
- sample->duration);
+ len = snprintf(buf, sizeof(buf), "%010lu.%010lu\t%llu\t%llu\n",
+ sample->timestamp.tv_sec,
+ sample->timestamp.tv_nsec,
+ sample->duration,
+ sample->outer_duration);


/* handling partial reads is more trouble than it's worth */
diff --git a/kernel/rcutree_plugin.h b/kernel/rcutree_plugin.h
index 684b762..dc0c4b2 100644
--- a/kernel/rcutree_plugin.h
+++ b/kernel/rcutree_plugin.h
@@ -2036,7 +2036,7 @@ static int rcu_nocb_needs_gp(struct rcu_state *rsp)
*/
static void rcu_nocb_gp_cleanup(struct rcu_state *rsp, struct rcu_node *rnp)
{
- swait_wake(&rnp->nocb_gp_wq[rnp->completed & 0x1]);
+ swait_wake_all(&rnp->nocb_gp_wq[rnp->completed & 0x1]);
}

/*
diff --git a/kernel/wait-simple.c b/kernel/wait-simple.c
index 4b9a0b5..2c85626 100644
--- a/kernel/wait-simple.c
+++ b/kernel/wait-simple.c
@@ -16,6 +16,8 @@
static inline void __swait_enqueue(struct swait_head *head, struct swaiter *w)
{
list_add(&w->node, &head->list);
+ /* We can't let the condition leak before the setting of head */
+ smp_mb();
}

/* Removes w from head->list. Must be called with head->lock locked. */
@@ -27,6 +29,8 @@ static inline void __swait_dequeue(struct swaiter *w)
/* Check whether a head has waiters enqueued */
static inline bool swait_head_has_waiters(struct swait_head *h)
{
+ /* Make sure the condition is visible before checking list_empty() */
+ smp_mb();
return !list_empty(&h->list);
}

diff --git a/localversion-rt b/localversion-rt
index ad3da1b..0efe7ba 100644
--- a/localversion-rt
+++ b/localversion-rt
@@ -1 +1 @@
--rt4
+-rt5
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/