Re: fanotify_mark FAN_MARK_FLUSH | _MOUNT stress blocks write to directory

From: Xiong Zhou
Date: Thu Aug 31 2017 - 02:57:28 EST


On Thu, Aug 31, 2017 at 07:52:41AM +0300, Amir Goldstein wrote:
> On Thu, Aug 31, 2017 at 6:51 AM, Xiong Zhou <xzhou@xxxxxxxxxx> wrote:
> > hi,
> >
> > This happens on 4.13.0-rc7+ to commit 42ff72c
>
> Don't understand. Is this a regression? from which commit?

No. I'm just saying the exact kernel version: Linus tree, commit 42ff72c

The same on 4.11. Did not test on kernels older than that.

>
> >
> > After firing up the stress, touch a file in monitoring directory could
> > hang like forever.
> >
> > Pretty easy to hit.
>
> So are running 3 processes that constantly ask to be notified
> blocking on file system events and then they never read those
> events. Even tough the marks are also destroyed, odd are that
> at least one mark will be alive at any given time.
>
> Why is it surprising that touching a file in monitored directory
> hangs forever?

It should complete with an error or success in a reasonable time ?

If we keep it hanging, oom killer is online and system hangs.

>
> I am very happy to see someone is writing stress tests for
> fanotify, I am just not sure this test is tuned correctly.
> Have you been trying to reproduce a reported bug?

No. Just try stress tests.

Xiong

>
> Amir.
>
> >
> > Thanks,
> > Xiong
> >
> > [ 492.060879] INFO: task touch:2259 blocked for more than 120 seconds.
> > [ 492.093497] Not tainted 4.13.0-rc7-master-42ff72c+ #9
> > [ 492.121049] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message.
> > [ 492.156289] touch D 0 2259 2233 0x00000080
> > [ 492.180880] Call Trace:
> > [ 492.191813] __schedule+0x28d/0x890
> > [ 492.207440] schedule+0x36/0x80
> > [ 492.221529] fanotify_handle_event+0x2a1/0x2f0
> > [ 492.241542] ? remove_wait_queue+0x60/0x60
> > [ 492.260150] fsnotify+0x2d3/0x4f0
> > [ 492.275055] ? security_inode_init_security+0xd2/0x190
> > [ 492.298097] security_file_open+0x89/0x90
> > [ 492.316038] do_dentry_open+0x139/0x330
> > [ 492.333216] vfs_open+0x4f/0x70
> > [ 492.347225] ? may_open+0x5a/0x100
> > [ 492.362481] path_openat+0x548/0x13b0
> > [ 492.378958] do_filp_open+0x91/0x100
> > [ 492.394969] ? __alloc_fd+0x46/0x170
> > [ 492.410930] do_sys_open+0x124/0x210
> > [ 492.426927] SyS_open+0x1e/0x20
> > [ 492.440963] do_syscall_64+0x67/0x150
> > [ 492.457367] entry_SYSCALL64_slow_path+0x25/0x25
> > [ 492.478251] RIP: 0033:0x7fe877f8e5a0
> > [ 492.495755] RSP: 002b:00007ffe57120318 EFLAGS: 00000246 ORIG_RAX: 0000000000000002
> > [ 492.529911] RAX: ffffffffffffffda RBX: 00007ffe57120578 RCX: 00007fe877f8e5a0
> > [ 492.561508] RDX: 00000000000001b6 RSI: 0000000000000941 RDI: 00007ffe5712268c
> > [ 492.595577] RBP: 0000000000000000 R08: 0000000000000000 R09: 0000000000000000
> > [ 492.631144] R10: 00007ffe57120030 R11: 0000000000000246 R12: 00007ffe5712268c
> > [ 492.664970] R13: 00007fe8782612a0 R14: 0000000000000001 R15: 0000000000000000
> >
> >
> > --------------------------------
> > $ cat test.sh
> >
> > #!/bin/bash -x
> >
> > fallocate -l 2G test.img
> > mkfs.xfs -fq test.img
> > mkdir -p testdir
> > mount -o loop test.img testdir || exit
> >
> > SCRATCH_MNT=$(readlink -f testdir)
> >
> > function clean ()
> > {
> > while ps -eo comm | grep -q -e fanotify ; do
> > killall fanotify_flush_stress
> > done
> >
> > umount -d $SCRATCH_MNT
> > rm -rf test.img scrc $SCRATCH_MNT fanotify_flush_stress
> > }
> >
> > trap "clean; exit;" 2
> >
> > cc fanotify_flush_stress.c -o fanotify_flush_stress
> >
> > ./fanotify_flush_stress $SCRATCH_MNT &
> > ./fanotify_flush_stress $SCRATCH_MNT &
> > ./fanotify_flush_stress $SCRATCH_MNT &
> >
> > ps jf | grep fanotify
> >
> > touch $SCRATCH_MNT/1 # hang
> >
> > clean
> >
> > --------------------------------
> > $ cat fanotify_flush_stress.c
> >
> > #define _GNU_SOURCE /* Needed to get O_LARGEFILE definition */
> > #include <errno.h>
> > #include <fcntl.h>
> > #include <limits.h>
> > #include <poll.h>
> > #include <stdio.h>
> > #include <stdlib.h>
> > #include <sys/fanotify.h>
> > #include <unistd.h>
> > #include <string.h>
> >
> > int main(int argc, char *argv[])
> > {
> > char buf;
> > int fd;
> >
> > /* Check mount point is supplied */
> > if (argc != 2) {
> > fprintf(stderr, "Usage: %s MOUNT\n", argv[0]);
> > exit(EXIT_FAILURE);
> > }
> >
> > printf("%s on %s\n", argv[0], argv[1]);
> > /* Create the file descriptor for accessing the fanotify API */
> > fd = fanotify_init(FAN_CLOEXEC | FAN_CLASS_CONTENT | FAN_NONBLOCK,
> > O_RDWR | O_LARGEFILE);
> > if (fd == -1) {
> > perror("fanotify_init");
> > exit(EXIT_FAILURE);
> > }
> >
> > /* Loop marking all kinds of events and flush */
> > while (1) {
> >
> > #if 1
> > if (fanotify_mark(fd, FAN_MARK_ADD | FAN_MARK_MOUNT,
> > FAN_ACCESS | FAN_MODIFY | FAN_OPEN_PERM | FAN_CLOSE |
> > FAN_OPEN | FAN_ACCESS_PERM | FAN_ONDIR |
> > FAN_EVENT_ON_CHILD, -1, argv[1]) == -1)
> >
> > perror("fanotify_mark add");
> >
> > if (fanotify_mark(fd, FAN_MARK_FLUSH | FAN_MARK_MOUNT,
> > 0, -1, argv[1]) == -1)
> > perror("fanotify_mark flush mount");
> > #else
> > if (fanotify_mark(fd, FAN_MARK_ADD | FAN_MARK_MOUNT,
> > FAN_ACCESS | FAN_MODIFY | FAN_OPEN_PERM | FAN_CLOSE |
> > FAN_OPEN | FAN_ACCESS_PERM | FAN_ONDIR |
> > FAN_EVENT_ON_CHILD, -1, argv[1]) == -1)
> >
> > perror("fanotify_mark add");
> >
> > if (fanotify_mark(fd, FAN_MARK_FLUSH, 0, -1, argv[1]) == -1)
> > perror("fanotify_mark flush");
> > #endif
> > }
> >
> > close(fd);
> > exit(EXIT_SUCCESS);
> > }