[PATCH] afs: Fix error handling with lookup via FS.InlineBulkStatus

From: David Howells
Date: Tue Jan 02 2024 - 10:21:55 EST


When afs does a lookup, it tries to use FS.InlineBulkStatus to preemptively
look up a bunch of files in the parent directory and cache this locally, on
the basis that we might want to look at them too (for example if someone
does an ls on a directory, they may want want to then stat every file
listed).

FS.InlineBulkStatus can be considered a compound op with the normal abort
code applying to the compound as a whole. Each status fetch within the
compound is then given its own individual abort code - but assuming no
error that prevents the bulk fetch from returning the compound result will
be 0, even if all the constituent status fetches failed.

At the conclusion of afs_do_lookup(), we should use the abort code from the
appropriate status to determine the error to return, if any - but instead
it is assumed that we were successful if the op as a whole succeeded and we
return an incompletely initialised inode, resulting in ENOENT, no matter
the actual reason. In the particular instance reported, a vnode with no
permission granted to be accessed is being given a UAEACCES abort code
which should be reported as EACCES, but is instead being reported as
ENOENT.

Fix this by abandoning the inode (which will be cleaned up with the op) if
file[1] has an abort code indicated and turn that abort code into an error
instead.

Whilst we're at it, add a tracepoint so that the abort codes of the
individual subrequests of FS.InlineBulkStatus can be logged. At the moment
only the container abort code can be 0.

Fixes: e49c7b2f6de7 ("afs: Build an abstraction around an "operation" concept")
Reported-by: Jeffrey Altman <jaltman@xxxxxxxxxxxx>
Signed-off-by: David Howells <dhowells@xxxxxxxxxx>
cc: Marc Dionne <marc.dionne@xxxxxxxxxxxx>
cc: linux-afs@xxxxxxxxxxxxxxxxxxx
---
fs/afs/dir.c | 12 +++++++++---
include/trace/events/afs.h | 25 +++++++++++++++++++++++++
2 files changed, 34 insertions(+), 3 deletions(-)

diff --git a/fs/afs/dir.c b/fs/afs/dir.c
index c14533ef108f..ae563d2a914e 100644
--- a/fs/afs/dir.c
+++ b/fs/afs/dir.c
@@ -708,6 +708,8 @@ static void afs_do_lookup_success(struct afs_operation *op)
break;
}

+ if (vp->scb.status.abort_code)
+ trace_afs_bulkstat_error(op, &vp->fid, i, vp->scb.status.abort_code);
if (!vp->scb.have_status && !vp->scb.have_error)
continue;

@@ -897,12 +899,16 @@ static struct inode *afs_do_lookup(struct inode *dir, struct dentry *dentry,
afs_begin_vnode_operation(op);
afs_wait_for_operation(op);
}
- inode = ERR_PTR(afs_op_error(op));

out_op:
if (!afs_op_error(op)) {
- inode = &op->file[1].vnode->netfs.inode;
- op->file[1].vnode = NULL;
+ if (op->file[1].scb.status.abort_code) {
+ afs_op_accumulate_error(op, -ECONNABORTED,
+ op->file[1].scb.status.abort_code);
+ } else {
+ inode = &op->file[1].vnode->netfs.inode;
+ op->file[1].vnode = NULL;
+ }
}

if (op->file[0].scb.have_status)
diff --git a/include/trace/events/afs.h b/include/trace/events/afs.h
index 5194b7e6dc8d..ce865ea678d3 100644
--- a/include/trace/events/afs.h
+++ b/include/trace/events/afs.h
@@ -1102,6 +1102,31 @@ TRACE_EVENT(afs_file_error,
__print_symbolic(__entry->where, afs_file_errors))
);

+TRACE_EVENT(afs_bulkstat_error,
+ TP_PROTO(struct afs_operation *op, struct afs_fid *fid, unsigned int index, s32 abort),
+
+ TP_ARGS(op, fid, index, abort),
+
+ TP_STRUCT__entry(
+ __field_struct(struct afs_fid, fid)
+ __field(unsigned int, op)
+ __field(unsigned int, index)
+ __field(s32, abort)
+ ),
+
+ TP_fast_assign(
+ __entry->op = op->debug_id;
+ __entry->fid = *fid;
+ __entry->index = index;
+ __entry->abort = abort;
+ ),
+
+ TP_printk("OP=%08x[%02x] %llx:%llx:%x a=%d",
+ __entry->op, __entry->index,
+ __entry->fid.vid, __entry->fid.vnode, __entry->fid.unique,
+ __entry->abort)
+ );
+
TRACE_EVENT(afs_cm_no_server,
TP_PROTO(struct afs_call *call, struct sockaddr_rxrpc *srx),