[POSSIBLE BUG]: btrfs partition: cat sees one version of the script, exec the old version

From: Mirsad Goran Todorovac
Date: Wed Jun 28 2023 - 03:57:35 EST


Hi,

Due to another bug discovered by kselftests, on the 6.4.0 kernel from Torvalds
tree with Ubuntu generic config merged with per-kselftest configs ...

It seems that the bash shell while executing a scripts sees the old version
when compared to that seen in the editor like vi.

The modified script on the btrfs partition is:

root@defiant:/linux/kernel/linux_torvalds/tools/testing/selftests/drivers/net/bonding# cat ./test.sh
#!/bin/sh
# SPDX-License-Identifier: GPL-2.0
#
# cause kernel oops in bond_rr_gen_slave_id
DEBUG=${DEBUG:-0}

set -e -x
test ${DEBUG} -ne 0 && set -x

finish()
{
ip link show
ip link del link1_1 || true
ip netns delete server || true
ip netns delete client || true
}

trap finish EXIT

client_ip4=192.168.1.198
server_ip4=192.168.1.254

# setup kernel so it reboots after causing the panic
echo 180 >/proc/sys/kernel/panic

# build namespaces
ip link add dev link1_1 type veth peer name link1_2

ip netns add "server"
ip link set dev link1_2 netns server up name eth0
ip netns exec server ip addr add ${server_ip4}/24 dev eth0

ip netns add "client"
ip link set dev link1_1 netns client down name eth0
ip netns exec client ip link add dev bond0 down type bond mode 1 \
miimon 100 all_slaves_active 1
ip netns exec client ip link set dev eth0 down master bond0
ip netns exec client ip link set dev bond0 up
ip netns exec client ip addr add ${client_ip4}/24 dev bond0
ip netns exec client ping -c 5 $server_ip4 >/dev/null

ip netns exec client ip link set dev eth0 down nomaster
ip netns exec client ip link set dev bond0 down
ip netns exec client ip link set dev bond0 type bond mode 0 \
arp_interval 1000 arp_ip_target "+${server_ip4}"
ip netns exec client ip link set dev eth0 down master bond0
ip netns exec client ip link set dev bond0 up
ip link show
# ip netns exec client ping -c 5 $server_ip4 >/dev/null

echo "Exiting"

exit 0
root@defiant:/linux/kernel/linux_torvalds/tools/testing/selftests/drivers/net/bonding#

However, the bash appears to run the older version no matter what I try:

# ./test.sh
# bash ./test.sh
# . ./test.sh
# sh ./test.sh
# cat test.sh | bash -

What is executed is not what is in the test.sh, but the old version:

root@defiant:/linux/kernel/linux_torvalds/tools/testing/selftests/drivers/net/bonding# sh ./test.sh
+ test 0 -ne 0
+ trap finish EXIT
+ client_ip4=192.168.1.198
+ server_ip4=192.168.1.254
+ echo 180
+ ip link add dev link1_1 type veth peer name link1_2
+ ip netns add server
+ ip link set dev link1_2 netns server up name eth0
+ ip netns exec server ip addr add 192.168.1.254/24 dev eth0
+ ip netns add client
+ ip link set dev link1_1 netns client down name eth0
+ ip netns exec client ip link add dev bond0 down type bond mode 1 miimon 100 all_slaves_active 1
+ ip netns exec client ip link set dev eth0 down master bond0
+ ip netns exec client ip link set dev bond0 up
+ ip netns exec client ip addr add 192.168.1.198/24 dev bond0
+ ip netns exec client ping -c 5 192.168.1.254
+ finish
+ ip link show
1: lo: <LOOPBACK,UP,LOWER_UP> mtu 65536 qdisc noqueue state UNKNOWN mode DEFAULT group default qlen 1000
link/loopback 00:00:00:00:00:00 brd 00:00:00:00:00:00
2: dummy0: <BROADCAST,NOARP> mtu 1500 qdisc noop state DOWN mode DEFAULT group default qlen 1000
link/ether 02:fc:ca:49:e2:d4 brd ff:ff:ff:ff:ff:ff
3: tunl0@NONE: <NOARP> mtu 1480 qdisc noop state DOWN mode DEFAULT group default qlen 1000
link/ipip 0.0.0.0 brd 0.0.0.0
4: gre0@NONE: <NOARP> mtu 1476 qdisc noop state DOWN mode DEFAULT group default qlen 1000
link/gre 0.0.0.0 brd 0.0.0.0
5: gretap0@NONE: <BROADCAST,MULTICAST> mtu 1462 qdisc noop state DOWN mode DEFAULT group default qlen 1000
link/ether 00:00:00:00:00:00 brd ff:ff:ff:ff:ff:ff
6: erspan0@NONE: <BROADCAST,MULTICAST> mtu 1450 qdisc noop state DOWN mode DEFAULT group default qlen 1000
link/ether 00:00:00:00:00:00 brd ff:ff:ff:ff:ff:ff
7: ip_vti0@NONE: <NOARP> mtu 1480 qdisc noop state DOWN mode DEFAULT group default qlen 1000
link/ipip 0.0.0.0 brd 0.0.0.0
8: ip6_vti0@NONE: <NOARP> mtu 1332 qdisc noop state DOWN mode DEFAULT group default qlen 1000
link/tunnel6 :: brd :: permaddr c689:f12:e1f6::
9: sit0@NONE: <NOARP> mtu 1480 qdisc noop state DOWN mode DEFAULT group default qlen 1000
link/sit 0.0.0.0 brd 0.0.0.0
10: ip6tnl0@NONE: <NOARP> mtu 1452 qdisc noop state DOWN mode DEFAULT group default qlen 1000
link/tunnel6 :: brd :: permaddr 7e18:b0ed:661a::
11: ip6gre0@NONE: <NOARP> mtu 1448 qdisc noop state DOWN mode DEFAULT group default qlen 1000
link/gre6 :: brd :: permaddr ce4b:1aae:bc8f::
12: enp16s0: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500 qdisc fq_codel state UP mode DEFAULT group default qlen 1000
link/ether 9c:6b:00:01:fb:80 brd ff:ff:ff:ff:ff:ff
+ ip link del link1_1
Cannot find device "link1_1"
+ true
+ ip netns delete server
+ ip netns delete client
root@defiant:/linux/kernel/linux_torvalds/tools/testing/selftests/drivers/net/bonding#

(The ip netns exec client ping -c 5 192.168.1.254 is executed even when commented
in the program.)

The "strace ./test.sh" command shows that ./test.sh is really opened, so
the only thing possible might be that bash and cat see different versions?

# strace ./test.sh
newfstatat(AT_FDCWD, ".", {st_mode=S_IFDIR|0755, st_size=522, ...}, 0) = 0
openat(AT_FDCWD, "./test.sh", O_RDONLY) = 3
fcntl(3, F_DUPFD, 10) = 10
close(3) = 0
fcntl(10, F_SETFD, FD_CLOEXEC) = 0
geteuid() = 0
getegid() = 0
rt_sigaction(SIGINT, NULL, {sa_handler=SIG_DFL, sa_mask=[], sa_flags=0}, 8) = 0
rt_sigaction(SIGINT, {sa_handler=0x55d28c956aa0, sa_mask=~[RTMIN RT_1], sa_flags=SA_RESTORER, sa_restorer=0x7f5f1b842520}, NULL, 8) = 0
rt_sigaction(SIGQUIT, NULL, {sa_handler=SIG_DFL, sa_mask=[], sa_flags=0}, 8) = 0
rt_sigaction(SIGQUIT, {sa_handler=SIG_DFL, sa_mask=~[RTMIN RT_1], sa_flags=SA_RESTORER, sa_restorer=0x7f5f1b842520}, NULL, 8) = 0
rt_sigaction(SIGTERM, NULL, {sa_handler=SIG_DFL, sa_mask=[], sa_flags=0}, 8) = 0
rt_sigaction(SIGTERM, {sa_handler=SIG_DFL, sa_mask=~[RTMIN RT_1], sa_flags=SA_RESTORER, sa_restorer=0x7f5f1b842520}, NULL, 8) = 0
read(10, "#!/bin/sh\n# SPDX-License-Identif"..., 8192) = 1441


# strace cat ./test.sh

newfstatat(1, "", {st_mode=S_IFIFO|0600, st_size=0, ...}, AT_EMPTY_PATH) = 0
openat(AT_FDCWD, "./test.sh", O_RDONLY) = 3
newfstatat(3, "", {st_mode=S_IFREG|0775, st_size=1441, ...}, AT_EMPTY_PATH) = 0
fadvise64(3, 0, 0, POSIX_FADV_SEQUENTIAL) = 0
mmap(NULL, 139264, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x7f9eceaa6000
read(3, "#!/bin/sh\n# SPDX-License-Identif"..., 131072) = 1441
write(1, "#!/bin/sh\n# SPDX-License-Identif"..., 1441#!/bin/sh

Unless I'm not doing anything stupid, this could be a bug in btrfs COW?

The kernel is 6.4.0 torvalds tree release.

The /home partition is BTRFS on NVME Samsung 980 1 TB disk PCIe 3.0.

# /home was on /dev/nvme0n1p7 during installation
UUID=adfbacf5-c1d4-46c0-80ff-e1312696b829 /home btrfs defaults,subvol=@home 0 2

Best regards,
Mirsad Todorovac