[PATCH] nfsd: fix race condition in nfsd_file_acquire

From: Haodong Wong
Date: Fri Aug 18 2023 - 02:56:10 EST


Before Kernel 6.1, we observed the following OOPS in the stress test
caused by reorder on set bit NFSD_FILE_HASHED and NFSD_FILE_PENDING,
and smp_mb__after_atomic() should be a paire.

Task A: Task B:

nfsd_file_acquire:

new = nfsd_file_alloc()
open_file:
refcount_inc(&nf->nf_ref);
nf = nfsd_file_find_locked();
wait_for_construction:

since nf_flags is zero it will not wait

wait_on_bit(&nf->nf_flags,
NFSD_FILE_PENDING);

if (status == nfs_ok) {
*pnf = nf; //OOPS happen!

Unable to handle kernel NULL pointer at virtual address 0000000000000028
Mem abort info:
ESR = 0x96000004
EC = 0x25: DABT (current EL), IL = 32 bits
SET = 0, FnV = 0
EA = 0, S1PTW = 0
Data abort info:
ISV = 0, ISS = 0x00000004
CM = 0, WnR = 0
user pgtable: 4k pages, 48-bit VAs, pgdp=0000000152546000
[0000000000000028] pgd=0000000000000000, p4d=0000000000000000
Internal error: Oops: 96000004 [#1] PREEMPT_RT SMP
CPU: 7 PID: 1767 Comm: nfsd Not tainted 5.10.104 #1
pstate: 40c00005 (nZcv daif +PAN +UAO -TCO BTYPE=--)
pc : nfsd_read+0x78/0x280 [nfsd]
lr : nfsd_read+0x68/0x280 [nfsd]
sp : ffff80001c0b3c70
x29: ffff80001c0b3c70 x28: 0000000000000000
x27: 0000000000000002 x26: ffff0000c8a3ca70
x25: ffff0000c8a45180 x24: 0000000000002000
x23: ffff0000c8a45178 x22: ffff0000c8a45008
x21: ffff0000c31aac40 x20: ffff0000c8a3c000
x19: 0000000000000000 x18: 0000000000000001
x17: 0000000000000007 x16: 00000000b35db681
x15: 0000000000000156 x14: ffff0000c3f91300
x13: 0000000000000000 x12: 0000000000000000
x11: 0000000000000000 x10: 0000000000000000
x9 : 0000000000000000 x8 : ffff000118014a80
x7 : 0000000000000002 x6 : ffff0002559142dc
x5 : ffff0000c31aac40 x4 : 0000000000000004
x3 : 0000000000000001 x2 : 0000000000000000
x1 : 0000000000000001 x0 : ffff000255914280
Call trace:
nfsd_read+0x78/0x280 [nfsd]
nfsd3_proc_read+0x98/0xc0 [nfsd]
nfsd_dispatch+0xc8/0x160 [nfsd]
svc_process_common+0x440/0x790
svc_process+0xb0/0xd0
nfsd+0xfc/0x160 [nfsd]
kthread+0x17c/0x1a0
ret_from_fork+0x10/0x18

Signed-off-by: Haodong Wong <haydenw.kernel@xxxxxxxxx>
---
fs/nfsd/filecache.c | 9 ++++++++-
1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/fs/nfsd/filecache.c b/fs/nfsd/filecache.c
index e30e1ddc1ace..ba980369e6b4 100644
--- a/fs/nfsd/filecache.c
+++ b/fs/nfsd/filecache.c
@@ -974,8 +974,12 @@ nfsd_file_acquire(struct svc_rqst *rqstp, struct svc_fh *fhp,
nfsd_file_slab_free(&new->nf_rcu);

wait_for_construction:
+ /* In case of set bit NFSD_FILE_PENDING and NFSD_FILE_HASHED reorder */
+ smp_rmb();
wait_on_bit(&nf->nf_flags, NFSD_FILE_PENDING, TASK_UNINTERRUPTIBLE);

+ /* Be a paire of smp_mb after clear bit NFSD_FILE_PENDING */
+ smp_mb__after_atomic();
/* Did construction of this file fail? */
if (!test_bit(NFSD_FILE_HASHED, &nf->nf_flags)) {
if (!retry) {
@@ -1018,8 +1022,11 @@ nfsd_file_acquire(struct svc_rqst *rqstp, struct svc_fh *fhp,
nf = new;
/* Take reference for the hashtable */
refcount_inc(&nf->nf_ref);
- __set_bit(NFSD_FILE_HASHED, &nf->nf_flags);
__set_bit(NFSD_FILE_PENDING, &nf->nf_flags);
+ /* Ensure set bit order set NFSD_FILE_HASHED after set NFSD_FILE_PENDING */
+ smp_wmb();
+ __set_bit(NFSD_FILE_HASHED, &nf->nf_flags);
+
list_lru_add(&nfsd_file_lru, &nf->nf_lru);
hlist_add_head_rcu(&nf->nf_node, &nfsd_file_hashtbl[hashval].nfb_head);
++nfsd_file_hashtbl[hashval].nfb_count;
--
2.25.1