Re: INFO: rcu detected stall in n_tty_receive_char_special

From: Tetsuo Handa
Date: Tue Apr 24 2018 - 08:25:42 EST


On 2018/04/24 1:01, Tetsuo Handa wrote:
> Bisection reached to commit e052c6d15c61cc4c ("tty: Use unbound workqueue
> for all input workers") added in 4.4-rc1.
>
> Reverting that commit on top of 4.4 solved the lockup. But unfortunately,
> reverting that commit on top of 4.16 only helped surviving the test longer
> than vanilla 4.16; still stalls at __process_echoes(). There might be similar
> changes which need to be reverted as well.

I noticed that when stall inside __process_echoes() occurs, it is looping at
discard loop added by commit cbfd0340ae199337 ("n_tty: Process echoes in blocks").
Since ldata->echo_commit < ldata->echo_tail is for some reason true upon entry,
the discard loop serves as almost infinite loop.

--- a/drivers/tty/n_tty.c
+++ b/drivers/tty/n_tty.c
@@ -613,12 +613,21 @@ static size_t __process_echoes(struct tty_struct *tty)
int space, old_space;
size_t tail;
unsigned char c;
+ unsigned long start = jiffies;

old_space = space = tty_write_room(tty);

tail = ldata->echo_tail;
+ if (ldata->echo_commit < tail)
+ printk("BUG1: ldata->echo_commit=%ld tail=%ld space=%d old_space=%d\n",
+ ldata->echo_commit, tail, space, old_space);
while (ldata->echo_commit != tail) {
c = echo_buf(ldata, tail);
+ if (time_after(jiffies, start + 5 * HZ)) {
+ printk("ldata->echo_commit=%ld tail=%ld c=%c space=%d old_space=%d\n",
+ ldata->echo_commit, tail, c, space, old_space);
+ start = jiffies;
+ }
if (c == ECHO_OP_START) {
unsigned char op;
int no_space_left = 0;
@@ -729,6 +738,11 @@ static size_t __process_echoes(struct tty_struct *tty)
* of echo overrun before the next commit), then discard enough
* data at the tail to prevent a subsequent overrun */
while (ldata->echo_commit - tail >= ECHO_DISCARD_WATERMARK) {
+ if (time_after(jiffies, start + 5 * HZ)) {
+ printk("ldata->echo_commit=%ld ldata->echo_tail=%ld tail=%ld space=%d old_space=%d\n",
+ ldata->echo_commit, ldata->echo_tail, tail, space, old_space);
+ start = jiffies;
+ }
if (echo_buf(ldata, tail) == ECHO_OP_START) {
if (echo_buf(ldata, tail + 1) == ECHO_OP_ERASE_TAB)
tail += 3;
@@ -739,6 +753,9 @@ static size_t __process_echoes(struct tty_struct *tty)
}

ldata->echo_tail = tail;
+ if (ldata->echo_commit < tail)
+ printk("BUG2: ldata->echo_commit=%ld tail=%ld space=%d old_space=%d\n",
+ ldata->echo_commit, tail, space, old_space);
return old_space - space;
}



[ 5.890788] input: PC Speaker as /devices/platform/pcspkr/input/input4
[ 27.357324] BUG1: ldata->echo_commit=120 tail=130 space=7936 old_space=7936
[ 32.358527] ldata->echo_commit=120 ldata->echo_tail=130 tail=2994833619 space=0 old_space=7936
[ 37.362404] ldata->echo_commit=120 ldata->echo_tail=130 tail=5930715049 space=0 old_space=7936
[ 42.366124] ldata->echo_commit=120 ldata->echo_tail=130 tail=8943875147 space=0 old_space=7936
[ 47.370508] ldata->echo_commit=120 ldata->echo_tail=130 tail=11950164125 space=0 old_space=7936
[ 52.374224] ldata->echo_commit=120 ldata->echo_tail=130 tail=14880807667 space=0 old_space=7936
[ 57.378597] ldata->echo_commit=120 ldata->echo_tail=130 tail=17821338400 space=0 old_space=7936
[ 62.383638] ldata->echo_commit=120 ldata->echo_tail=130 tail=20811717629 space=0 old_space=7936
[ 63.203807] sysrq: SysRq : Resetting

[ 5.843667] input: PC Speaker as /devices/platform/pcspkr/input/input4
[ 28.306346] BUG1: ldata->echo_commit=128 tail=130 space=7936 old_space=7936
[ 33.307133] ldata->echo_commit=128 ldata->echo_tail=130 tail=2892537244 space=0 old_space=7936
[ 38.311478] ldata->echo_commit=128 ldata->echo_tail=130 tail=5766457276 space=0 old_space=7936
[ 43.315319] ldata->echo_commit=128 ldata->echo_tail=130 tail=8686538551 space=0 old_space=7936
[ 48.319164] ldata->echo_commit=128 ldata->echo_tail=130 tail=11556458920 space=0 old_space=7936
[ 50.122017] sysrq: SysRq : Resetting

[ 6.026322] input: PC Speaker as /devices/platform/pcspkr/input/input4
[ 38.149864] BUG1: ldata->echo_commit=118 tail=130 space=7936 old_space=7936
[ 43.150167] ldata->echo_commit=118 ldata->echo_tail=130 tail=2789490540 space=0 old_space=7936
[ 48.154012] ldata->echo_commit=118 ldata->echo_tail=130 tail=5592467000 space=0 old_space=7936
[ 53.158365] ldata->echo_commit=118 ldata->echo_tail=130 tail=8376906144 space=0 old_space=7936
[ 58.162211] ldata->echo_commit=118 ldata->echo_tail=130 tail=11158984120 space=0 old_space=7936
[ 60.487338] sysrq: SysRq : Resetting

[ 9.170831] input: PC Speaker as /devices/platform/pcspkr/input/input4
[ 25.929908] BUG1: ldata->echo_commit=120 tail=130 space=7936 old_space=7936
[ 30.930553] ldata->echo_commit=120 ldata->echo_tail=130 tail=3008469343 space=0 old_space=7936
[ 35.935475] ldata->echo_commit=120 ldata->echo_tail=130 tail=5857403750 space=0 old_space=7936
[ 40.940153] ldata->echo_commit=120 ldata->echo_tail=130 tail=8858176994 space=0 old_space=7936
[ 45.944527] ldata->echo_commit=120 ldata->echo_tail=130 tail=11838898685 space=0 old_space=7936
[ 48.948625] sysrq: SysRq : Resetting

[ 6.071991] input: PC Speaker as /devices/platform/pcspkr/input/input4
[ 22.890428] BUG1: ldata->echo_commit=128 tail=130 space=7936 old_space=7936
[ 27.891372] ldata->echo_commit=128 ldata->echo_tail=130 tail=3064270209 space=0 old_space=7936
[ 32.895212] ldata->echo_commit=128 ldata->echo_tail=130 tail=6085270166 space=0 old_space=7936
[ 37.899047] ldata->echo_commit=128 ldata->echo_tail=130 tail=9102650663 space=0 old_space=7936
[ 42.903405] ldata->echo_commit=128 ldata->echo_tail=130 tail=12195198062 space=0 old_space=7936
[ 44.497876] sysrq: SysRq : Resetting

[ 5.804688] input: PC Speaker as /devices/platform/pcspkr/input/input4
[ 23.632487] BUG1: ldata->echo_commit=102 tail=130 space=7936 old_space=7936
[ 28.633878] ldata->echo_commit=102 ldata->echo_tail=130 tail=2930081035 space=0 old_space=7936
[ 33.638168] ldata->echo_commit=102 ldata->echo_tail=130 tail=5867627875 space=0 old_space=7936
[ 38.654062] ldata->echo_commit=102 ldata->echo_tail=130 tail=8804236495 space=0 old_space=7936
[ 40.296804] sysrq: SysRq : Resetting