Re: NFS and kernel 2.6.x

From: Trond Myklebust
Date: Mon Apr 19 2004 - 11:21:46 EST


On Mon, 2004-04-19 at 11:38, Trond Myklebust wrote:
> On Sun, 2004-04-18 at 19:22, Jamie Lokier wrote:

> > You don't respond to the other question: the doubling stopping at
> > 3.2s. Is it intended? It goes againt a basic principle of congestion
> > control.
>
> I can put it back in.

Here's a patch that continues doubling.

Cheers,
Trond
include/linux/sunrpc/xprt.h | 10 ++---
net/sunrpc/auth_gss/auth_gss.c | 2 -
net/sunrpc/clnt.c | 4 --
net/sunrpc/timer.c | 1
net/sunrpc/xprt.c | 81 +++++++++++++++++++++++++----------------
5 files changed, 57 insertions(+), 41 deletions(-)

diff -u --recursive --new-file --show-c-function linux-2.6.6-rc1/include/linux/sunrpc/xprt.h linux-2.6.6-01-soft/include/linux/sunrpc/xprt.h
--- linux-2.6.6-rc1/include/linux/sunrpc/xprt.h 2004-04-17 23:01:09.000000000 -0400
+++ linux-2.6.6-01-soft/include/linux/sunrpc/xprt.h 2004-04-19 11:57:32.000000000 -0400
@@ -69,8 +69,7 @@ extern unsigned int xprt_tcp_slot_table_
* This describes a timeout strategy
*/
struct rpc_timeout {
- unsigned long to_current, /* current timeout */
- to_initval, /* initial timeout */
+ unsigned long to_initval, /* initial timeout */
to_maxval, /* max timeout */
to_increment; /* if !exponential */
unsigned int to_retries; /* max # of retries */
@@ -85,7 +84,6 @@ struct rpc_rqst {
* This is the user-visible part
*/
struct rpc_xprt * rq_xprt; /* RPC client */
- struct rpc_timeout rq_timeout; /* timeout parms */
struct xdr_buf rq_snd_buf; /* send buffer */
struct xdr_buf rq_rcv_buf; /* recv buffer */

@@ -103,6 +101,9 @@ struct rpc_rqst {
struct xdr_buf rq_private_buf; /* The receive buffer
* used in the softirq.
*/
+ unsigned long rq_majortimeo; /* major timeout alarm */
+ unsigned long rq_timeout; /* Current timeout value */
+ unsigned int rq_retries; /* # of retries */
/*
* For authentication (e.g. auth_des)
*/
@@ -115,7 +116,6 @@ struct rpc_rqst {
u32 rq_bytes_sent; /* Bytes we have sent */

unsigned long rq_xtime; /* when transmitted */
- int rq_ntimeo;
int rq_ntrans;
};
#define rq_svec rq_snd_buf.head
@@ -210,7 +210,7 @@ void xprt_reserve(struct rpc_task *);
int xprt_prepare_transmit(struct rpc_task *);
void xprt_transmit(struct rpc_task *);
void xprt_receive(struct rpc_task *);
-int xprt_adjust_timeout(struct rpc_timeout *);
+int xprt_adjust_timeout(struct rpc_rqst *req);
void xprt_release(struct rpc_task *);
void xprt_connect(struct rpc_task *);
int xprt_clear_backlog(struct rpc_xprt *);
diff -u --recursive --new-file --show-c-function linux-2.6.6-rc1/net/sunrpc/auth_gss/auth_gss.c linux-2.6.6-01-soft/net/sunrpc/auth_gss/auth_gss.c
--- linux-2.6.6-rc1/net/sunrpc/auth_gss/auth_gss.c 2004-04-17 23:00:57.000000000 -0400
+++ linux-2.6.6-01-soft/net/sunrpc/auth_gss/auth_gss.c 2004-04-19 11:57:32.000000000 -0400
@@ -736,10 +736,8 @@ static int
gss_refresh(struct rpc_task *task)
{
struct rpc_clnt *clnt = task->tk_client;
- struct rpc_xprt *xprt = task->tk_xprt;
struct rpc_cred *cred = task->tk_msg.rpc_cred;

- task->tk_timeout = xprt->timeout.to_current;
if (!gss_cred_is_uptodate_ctx(cred))
return gss_upcall(clnt, task, cred);
return 0;
diff -u --recursive --new-file --show-c-function linux-2.6.6-rc1/net/sunrpc/clnt.c linux-2.6.6-01-soft/net/sunrpc/clnt.c
--- linux-2.6.6-rc1/net/sunrpc/clnt.c 2004-04-17 23:00:47.000000000 -0400
+++ linux-2.6.6-01-soft/net/sunrpc/clnt.c 2004-04-19 11:57:32.000000000 -0400
@@ -788,13 +788,11 @@ static void
call_timeout(struct rpc_task *task)
{
struct rpc_clnt *clnt = task->tk_client;
- struct rpc_timeout *to = &task->tk_rqstp->rq_timeout;

- if (xprt_adjust_timeout(to)) {
+ if (xprt_adjust_timeout(task->tk_rqstp) == 0) {
dprintk("RPC: %4d call_timeout (minor)\n", task->tk_pid);
goto retry;
}
- to->to_retries = clnt->cl_timeout.to_retries;

dprintk("RPC: %4d call_timeout (major)\n", task->tk_pid);
if (RPC_IS_SOFT(task)) {
diff -u --recursive --new-file --show-c-function linux-2.6.6-rc1/net/sunrpc/timer.c linux-2.6.6-01-soft/net/sunrpc/timer.c
--- linux-2.6.6-rc1/net/sunrpc/timer.c 2004-04-17 23:01:20.000000000 -0400
+++ linux-2.6.6-01-soft/net/sunrpc/timer.c 2004-04-19 11:57:32.000000000 -0400
@@ -39,6 +39,7 @@ rpc_init_rtt(struct rpc_rtt *rt, unsigne
for (i = 0; i < 5; i++) {
rt->srtt[i] = init;
rt->sdrtt[i] = RPC_RTO_INIT;
+ rt->ntimeouts[i] = 0;
}
}

diff -u --recursive --new-file --show-c-function linux-2.6.6-rc1/net/sunrpc/xprt.c linux-2.6.6-01-soft/net/sunrpc/xprt.c
--- linux-2.6.6-rc1/net/sunrpc/xprt.c 2004-04-17 23:01:07.000000000 -0400
+++ linux-2.6.6-01-soft/net/sunrpc/xprt.c 2004-04-19 11:58:03.000000000 -0400
@@ -352,35 +352,57 @@ xprt_adjust_cwnd(struct rpc_xprt *xprt,
}

/*
+ * Reset the major timeout value
+ */
+static void xprt_reset_majortimeo(struct rpc_rqst *req)
+{
+ struct rpc_timeout *to = &req->rq_xprt->timeout;
+
+ req->rq_majortimeo = req->rq_timeout;
+ if (to->to_exponential)
+ req->rq_majortimeo <<= to->to_retries;
+ else
+ req->rq_majortimeo += to->to_increment * to->to_retries;
+ if (req->rq_majortimeo > to->to_maxval || req->rq_majortimeo == 0)
+ req->rq_majortimeo = to->to_maxval;
+ req->rq_majortimeo += jiffies;
+}
+
+/*
* Adjust timeout values etc for next retransmit
*/
-int
-xprt_adjust_timeout(struct rpc_timeout *to)
+int xprt_adjust_timeout(struct rpc_rqst *req)
{
- if (to->to_retries > 0) {
+ struct rpc_xprt *xprt = req->rq_xprt;
+ struct rpc_timeout *to = &xprt->timeout;
+ int status = 0;
+
+ if (time_before(jiffies, req->rq_majortimeo)) {
if (to->to_exponential)
- to->to_current <<= 1;
+ req->rq_timeout <<= 1;
else
- to->to_current += to->to_increment;
- if (to->to_maxval && to->to_current >= to->to_maxval)
- to->to_current = to->to_maxval;
+ req->rq_timeout += to->to_increment;
+ if (to->to_maxval && req->rq_timeout >= to->to_maxval)
+ req->rq_timeout = to->to_maxval;
+ req->rq_retries++;
+ pprintk("RPC: %lu retrans\n", jiffies);
} else {
- if (to->to_exponential)
- to->to_initval <<= 1;
- else
- to->to_initval += to->to_increment;
- if (to->to_maxval && to->to_initval >= to->to_maxval)
- to->to_initval = to->to_maxval;
- to->to_current = to->to_initval;
+ req->rq_timeout = to->to_initval;
+ req->rq_retries = 0;
+ xprt_reset_majortimeo(req);
+ /* Reset the RTT counters == "slow start" */
+ spin_lock_bh(&xprt->sock_lock);
+ rpc_init_rtt(req->rq_task->tk_client->cl_rtt, to->to_initval);
+ spin_unlock_bh(&xprt->sock_lock);
+ pprintk("RPC: %lu timeout\n", jiffies);
+ status = -ETIMEDOUT;
}

- if (!to->to_current) {
- printk(KERN_WARNING "xprt_adjust_timeout: to_current = 0!\n");
- to->to_current = 5 * HZ;
- }
- pprintk("RPC: %lu %s\n", jiffies,
- to->to_retries? "retrans" : "timeout");
- return to->to_retries-- > 0;
+ if (req->rq_timeout == 0) {
+ printk(KERN_WARNING "xprt_adjust_timeout: rq_timeout = 0!\n");
+ req->rq_timeout = 5 * HZ;
+ }
+ return status;
}

/*
@@ -1166,6 +1188,7 @@ xprt_transmit(struct rpc_task *task)
/* Add request to the receive list */
list_add_tail(&req->rq_list, &xprt->recv);
spin_unlock_bh(&xprt->sock_lock);
+ xprt_reset_majortimeo(req);
}
} else if (!req->rq_bytes_sent)
return;
@@ -1221,7 +1244,7 @@ xprt_transmit(struct rpc_task *task)
if (!xprt_connected(xprt))
task->tk_status = -ENOTCONN;
else if (test_bit(SOCK_NOSPACE, &xprt->sock->flags)) {
- task->tk_timeout = req->rq_timeout.to_current;
+ task->tk_timeout = req->rq_timeout;
rpc_sleep_on(&xprt->pending, task, NULL, NULL);
}
spin_unlock_bh(&xprt->sock_lock);
@@ -1248,13 +1271,11 @@ xprt_transmit(struct rpc_task *task)
if (!xprt->nocong) {
int timer = task->tk_msg.rpc_proc->p_timer;
task->tk_timeout = rpc_calc_rto(clnt->cl_rtt, timer);
- task->tk_timeout <<= rpc_ntimeo(clnt->cl_rtt, timer);
- task->tk_timeout <<= clnt->cl_timeout.to_retries
- - req->rq_timeout.to_retries;
- if (task->tk_timeout > req->rq_timeout.to_maxval)
- task->tk_timeout = req->rq_timeout.to_maxval;
+ task->tk_timeout <<= rpc_ntimeo(clnt->cl_rtt, timer) + req->rq_retries;
+ if (task->tk_timeout > xprt->timeout.to_maxval || task->tk_timeout == 0)
+ task->tk_timeout = xprt->timeout.to_maxval;
} else
- task->tk_timeout = req->rq_timeout.to_current;
+ task->tk_timeout = req->rq_timeout;
/* Don't race with disconnect */
if (!xprt_connected(xprt))
task->tk_status = -ENOTCONN;
@@ -1324,7 +1345,7 @@ xprt_request_init(struct rpc_task *task,
{
struct rpc_rqst *req = task->tk_rqstp;

- req->rq_timeout = xprt->timeout;
+ req->rq_timeout = xprt->timeout.to_initval;
req->rq_task = task;
req->rq_xprt = xprt;
req->rq_xid = xprt_alloc_xid(xprt);
@@ -1381,7 +1402,6 @@ xprt_default_timeout(struct rpc_timeout
void
xprt_set_timeout(struct rpc_timeout *to, unsigned int retr, unsigned long incr)
{
- to->to_current =
to->to_initval =
to->to_increment = incr;
to->to_maxval = incr * retr;
@@ -1446,7 +1466,6 @@ xprt_setup(int proto, struct sockaddr_in
/* Set timeout parameters */
if (to) {
xprt->timeout = *to;
- xprt->timeout.to_current = to->to_initval;
} else
xprt_default_timeout(&xprt->timeout, xprt->prot);