From 5e4def20381678ba3ce0a4e117f97e378ecd81bc Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Thu, 2 Nov 2017 15:27:44 +0000
Subject: Pass mode to wait_on_atomic_t() action funcs and provide default
 actions

Make wait_on_atomic_t() pass the TASK_* mode onto its action function as an
extra argument and make it 'unsigned int throughout.

Also, consolidate a bunch of identical action functions into a default
function that can do the appropriate thing for the mode.

Also, change the argument name in the bit_wait*() function declarations to
reflect the fact that it's the mode and not the bit number.

[Peter Z gives this a grudging ACK, but thinks that the whole atomic_t wait
should be done differently, though he's not immediately sure as to how]

Signed-off-by: David Howells <dhowells@redhat.com>
Acked-by: Peter Zijlstra <peterz@infradead.org>
cc: Ingo Molnar <mingo@kernel.org>
---
 fs/afs/rxrpc.c | 8 +-------
 1 file changed, 1 insertion(+), 7 deletions(-)

(limited to 'fs/afs')

diff --git a/fs/afs/rxrpc.c b/fs/afs/rxrpc.c
index bb1e2caa1720..77f5420a1a24 100644
--- a/fs/afs/rxrpc.c
+++ b/fs/afs/rxrpc.c
@@ -41,12 +41,6 @@ static void afs_charge_preallocation(struct work_struct *);
 
 static DECLARE_WORK(afs_charge_preallocation_work, afs_charge_preallocation);
 
-static int afs_wait_atomic_t(atomic_t *p)
-{
-	schedule();
-	return 0;
-}
-
 /*
  * open an RxRPC socket and bind it to be a server for callback notifications
  * - the socket is left in blocking mode and non-blocking ops use MSG_DONTWAIT
@@ -121,7 +115,7 @@ void afs_close_socket(void)
 	}
 
 	_debug("outstanding %u", atomic_read(&afs_outstanding_calls));
-	wait_on_atomic_t(&afs_outstanding_calls, afs_wait_atomic_t,
+	wait_on_atomic_t(&afs_outstanding_calls, atomic_t_wait,
 			 TASK_UNINTERRUPTIBLE);
 	_debug("no outstanding calls");
 
-- 
cgit v1.2.3


From f044c8847bb61eff5e1e95b6f6bb950e7f4a73a4 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Thu, 2 Nov 2017 15:27:45 +0000
Subject: afs: Lay the groundwork for supporting network namespaces

Lay the groundwork for supporting network namespaces (netns) to the AFS
filesystem by moving various global features to a network-namespace struct
(afs_net) and providing an instance of this as a temporary global variable
that everything uses via accessor functions for the moment.

The following changes have been made:

 (1) Store the netns in the superblock info.  This will be obtained from
     the mounter's nsproxy on a manual mount and inherited from the parent
     superblock on an automount.

 (2) The cell list is made per-netns.  It can be viewed through
     /proc/net/afs/cells and also be modified by writing commands to that
     file.

 (3) The local workstation cell is set per-ns in /proc/net/afs/rootcell.
     This is unset by default.

 (4) The 'rootcell' module parameter, which sets a cell and VL server list
     modifies the init net namespace, thereby allowing an AFS root fs to be
     theoretically used.

 (5) The volume location lists and the file lock manager are made
     per-netns.

 (6) The AF_RXRPC socket and associated I/O bits are made per-ns.

The various workqueues remain global for the moment.

Changes still to be made:

 (1) /proc/fs/afs/ should be moved to /proc/net/afs/ and a symlink emplaced
     from the old name.

 (2) A per-netns subsys needs to be registered for AFS into which it can
     store its per-netns data.

 (3) Rather than the AF_RXRPC socket being opened on module init, it needs
     to be opened on the creation of a superblock in that netns.

 (4) The socket needs to be closed when the last superblock using it is
     destroyed and all outstanding client calls on it have been completed.
     This prevents a reference loop on the namespace.

 (5) It is possible that several namespaces will want to use AFS, in which
     case each one will need its own UDP port.  These can either be set
     through /proc/net/afs/cm_port or the kernel can pick one at random.
     The init_ns gets 7001 by default.

Other issues that need resolving:

 (1) The DNS keyring needs net-namespacing.

 (2) Where do upcalls go (eg. DNS request-key upcall)?

 (3) Need something like open_socket_in_file_ns() syscall so that AFS
     command line tools attempting to operate on an AFS file/volume have
     their RPC calls go to the right place.

Signed-off-by: David Howells <dhowells@redhat.com>
---
 fs/afs/afs.h       |   9 +++
 fs/afs/callback.c  |  24 +-------
 fs/afs/cell.c      | 130 +++++++++++++++++++++---------------------
 fs/afs/cmservice.c |  26 ++++-----
 fs/afs/flock.c     |  39 +------------
 fs/afs/fsclient.c  |  56 +++++++++++-------
 fs/afs/internal.h  | 163 +++++++++++++++++++++++++++++++++++++++--------------
 fs/afs/main.c      | 153 +++++++++++++++++++++++++++++++++----------------
 fs/afs/proc.c      |  64 +++++++++++++--------
 fs/afs/rxrpc.c     | 132 +++++++++++++++++++++----------------------
 fs/afs/server.c    |  82 +++++++++++++--------------
 fs/afs/super.c     |  45 ++++++++-------
 fs/afs/vlclient.c  |  10 ++--
 fs/afs/vlocation.c | 151 ++++++++++++++++++++++---------------------------
 fs/afs/volume.c    |  10 ++--
 15 files changed, 602 insertions(+), 492 deletions(-)

(limited to 'fs/afs')

diff --git a/fs/afs/afs.h b/fs/afs/afs.h
index 3c462ff6db63..93053115bcfc 100644
--- a/fs/afs/afs.h
+++ b/fs/afs/afs.h
@@ -72,6 +72,15 @@ struct afs_callback {
 
 #define AFSCBMAX 50	/* maximum callbacks transferred per bulk op */
 
+struct afs_uuid {
+	__be32		time_low;			/* low part of timestamp */
+	__be16		time_mid;			/* mid part of timestamp */
+	__be16		time_hi_and_version;		/* high part of timestamp and version  */
+	__u8		clock_seq_hi_and_reserved;	/* clock seq hi and variant */
+	__u8		clock_seq_low;			/* clock seq low */
+	__u8		node[6];			/* spatially unique node ID (MAC addr) */
+};
+
 /*
  * AFS volume information
  */
diff --git a/fs/afs/callback.c b/fs/afs/callback.c
index 25d404d22cae..d12dffb76b67 100644
--- a/fs/afs/callback.c
+++ b/fs/afs/callback.c
@@ -28,9 +28,7 @@ unsigned afs_vnode_update_timeout = 10;
 	CIRC_SPACE((server)->cb_break_head, (server)->cb_break_tail,	\
 		   ARRAY_SIZE((server)->cb_break))
 
-//static void afs_callback_updater(struct work_struct *);
-
-static struct workqueue_struct *afs_callback_update_worker;
+struct workqueue_struct *afs_callback_update_worker;
 
 /*
  * allow the fileserver to request callback state (re-)initialisation
@@ -343,7 +341,7 @@ void afs_dispatch_give_up_callbacks(struct work_struct *work)
 	 *   had callbacks entirely, and the server will call us later to break
 	 *   them
 	 */
-	afs_fs_give_up_callbacks(server, true);
+	afs_fs_give_up_callbacks(server->cell->net, server, true);
 }
 
 /*
@@ -456,21 +454,3 @@ static void afs_callback_updater(struct work_struct *work)
 	afs_put_vnode(vl);
 }
 #endif
-
-/*
- * initialise the callback update process
- */
-int __init afs_callback_update_init(void)
-{
-	afs_callback_update_worker = alloc_ordered_workqueue("kafs_callbackd",
-							     WQ_MEM_RECLAIM);
-	return afs_callback_update_worker ? 0 : -ENOMEM;
-}
-
-/*
- * shut down the callback update process
- */
-void afs_callback_update_kill(void)
-{
-	destroy_workqueue(afs_callback_update_worker);
-}
diff --git a/fs/afs/cell.c b/fs/afs/cell.c
index ca0a3cf93791..bd570fa539a0 100644
--- a/fs/afs/cell.c
+++ b/fs/afs/cell.c
@@ -18,20 +18,12 @@
 #include <keys/rxrpc-type.h>
 #include "internal.h"
 
-DECLARE_RWSEM(afs_proc_cells_sem);
-LIST_HEAD(afs_proc_cells);
-
-static LIST_HEAD(afs_cells);
-static DEFINE_RWLOCK(afs_cells_lock);
-static DECLARE_RWSEM(afs_cells_sem); /* add/remove serialisation */
-static DECLARE_WAIT_QUEUE_HEAD(afs_cells_freeable_wq);
-static struct afs_cell *afs_cell_root;
-
 /*
  * allocate a cell record and fill in its name, VL server address list and
  * allocate an anonymous key
  */
-static struct afs_cell *afs_cell_alloc(const char *name, unsigned namelen,
+static struct afs_cell *afs_cell_alloc(struct afs_net *net,
+				       const char *name, unsigned namelen,
 				       char *vllist)
 {
 	struct afs_cell *cell;
@@ -62,6 +54,7 @@ static struct afs_cell *afs_cell_alloc(const char *name, unsigned namelen,
 
 	atomic_set(&cell->usage, 1);
 	INIT_LIST_HEAD(&cell->link);
+	cell->net = net;
 	rwlock_init(&cell->servers_lock);
 	INIT_LIST_HEAD(&cell->servers);
 	init_rwsem(&cell->vl_sem);
@@ -142,12 +135,14 @@ error:
 
 /*
  * afs_cell_crate() - create a cell record
+ * @net:	The network namespace
  * @name:	is the name of the cell.
  * @namsesz:	is the strlen of the cell name.
  * @vllist:	is a colon separated list of IP addresses in "a.b.c.d" format.
  * @retref:	is T to return the cell reference when the cell exists.
  */
-struct afs_cell *afs_cell_create(const char *name, unsigned namesz,
+struct afs_cell *afs_cell_create(struct afs_net *net,
+				 const char *name, unsigned namesz,
 				 char *vllist, bool retref)
 {
 	struct afs_cell *cell;
@@ -155,23 +150,23 @@ struct afs_cell *afs_cell_create(const char *name, unsigned namesz,
 
 	_enter("%*.*s,%s", namesz, namesz, name ?: "", vllist);
 
-	down_write(&afs_cells_sem);
-	read_lock(&afs_cells_lock);
-	list_for_each_entry(cell, &afs_cells, link) {
+	down_write(&net->cells_sem);
+	read_lock(&net->cells_lock);
+	list_for_each_entry(cell, &net->cells, link) {
 		if (strncasecmp(cell->name, name, namesz) == 0)
 			goto duplicate_name;
 	}
-	read_unlock(&afs_cells_lock);
+	read_unlock(&net->cells_lock);
 
-	cell = afs_cell_alloc(name, namesz, vllist);
+	cell = afs_cell_alloc(net, name, namesz, vllist);
 	if (IS_ERR(cell)) {
 		_leave(" = %ld", PTR_ERR(cell));
-		up_write(&afs_cells_sem);
+		up_write(&net->cells_sem);
 		return cell;
 	}
 
 	/* add a proc directory for this cell */
-	ret = afs_proc_cell_setup(cell);
+	ret = afs_proc_cell_setup(net, cell);
 	if (ret < 0)
 		goto error;
 
@@ -183,20 +178,20 @@ struct afs_cell *afs_cell_create(const char *name, unsigned namesz,
 #endif
 
 	/* add to the cell lists */
-	write_lock(&afs_cells_lock);
-	list_add_tail(&cell->link, &afs_cells);
-	write_unlock(&afs_cells_lock);
+	write_lock(&net->cells_lock);
+	list_add_tail(&cell->link, &net->cells);
+	write_unlock(&net->cells_lock);
 
-	down_write(&afs_proc_cells_sem);
-	list_add_tail(&cell->proc_link, &afs_proc_cells);
-	up_write(&afs_proc_cells_sem);
-	up_write(&afs_cells_sem);
+	down_write(&net->proc_cells_sem);
+	list_add_tail(&cell->proc_link, &net->proc_cells);
+	up_write(&net->proc_cells_sem);
+	up_write(&net->cells_sem);
 
 	_leave(" = %p", cell);
 	return cell;
 
 error:
-	up_write(&afs_cells_sem);
+	up_write(&net->cells_sem);
 	key_put(cell->anonymous_key);
 	kfree(cell);
 	_leave(" = %d", ret);
@@ -206,8 +201,8 @@ duplicate_name:
 	if (retref && !IS_ERR(cell))
 		afs_get_cell(cell);
 
-	read_unlock(&afs_cells_lock);
-	up_write(&afs_cells_sem);
+	read_unlock(&net->cells_lock);
+	up_write(&net->cells_sem);
 
 	if (retref) {
 		_leave(" = %p", cell);
@@ -223,7 +218,7 @@ duplicate_name:
  * - can be called with a module parameter string
  * - can be called from a write to /proc/fs/afs/rootcell
  */
-int afs_cell_init(char *rootcell)
+int afs_cell_init(struct afs_net *net, char *rootcell)
 {
 	struct afs_cell *old_root, *new_root;
 	char *cp;
@@ -245,17 +240,17 @@ int afs_cell_init(char *rootcell)
 		*cp++ = 0;
 
 	/* allocate a cell record for the root cell */
-	new_root = afs_cell_create(rootcell, strlen(rootcell), cp, false);
+	new_root = afs_cell_create(net, rootcell, strlen(rootcell), cp, false);
 	if (IS_ERR(new_root)) {
 		_leave(" = %ld", PTR_ERR(new_root));
 		return PTR_ERR(new_root);
 	}
 
 	/* install the new cell */
-	write_lock(&afs_cells_lock);
-	old_root = afs_cell_root;
-	afs_cell_root = new_root;
-	write_unlock(&afs_cells_lock);
+	write_lock(&net->cells_lock);
+	old_root = net->ws_cell;
+	net->ws_cell = new_root;
+	write_unlock(&net->cells_lock);
 	afs_put_cell(old_root);
 
 	_leave(" = 0");
@@ -265,19 +260,20 @@ int afs_cell_init(char *rootcell)
 /*
  * lookup a cell record
  */
-struct afs_cell *afs_cell_lookup(const char *name, unsigned namesz,
+struct afs_cell *afs_cell_lookup(struct afs_net *net,
+				 const char *name, unsigned namesz,
 				 bool dns_cell)
 {
 	struct afs_cell *cell;
 
 	_enter("\"%*.*s\",", namesz, namesz, name ?: "");
 
-	down_read(&afs_cells_sem);
-	read_lock(&afs_cells_lock);
+	down_read(&net->cells_sem);
+	read_lock(&net->cells_lock);
 
 	if (name) {
 		/* if the cell was named, look for it in the cell record list */
-		list_for_each_entry(cell, &afs_cells, link) {
+		list_for_each_entry(cell, &net->cells, link) {
 			if (strncmp(cell->name, name, namesz) == 0) {
 				afs_get_cell(cell);
 				goto found;
@@ -289,7 +285,7 @@ struct afs_cell *afs_cell_lookup(const char *name, unsigned namesz,
 	found:
 		;
 	} else {
-		cell = afs_cell_root;
+		cell = net->ws_cell;
 		if (!cell) {
 			/* this should not happen unless user tries to mount
 			 * when root cell is not set. Return an impossibly
@@ -304,16 +300,16 @@ struct afs_cell *afs_cell_lookup(const char *name, unsigned namesz,
 
 	}
 
-	read_unlock(&afs_cells_lock);
-	up_read(&afs_cells_sem);
+	read_unlock(&net->cells_lock);
+	up_read(&net->cells_sem);
 	_leave(" = %p", cell);
 	return cell;
 
 create_cell:
-	read_unlock(&afs_cells_lock);
-	up_read(&afs_cells_sem);
+	read_unlock(&net->cells_lock);
+	up_read(&net->cells_sem);
 
-	cell = afs_cell_create(name, namesz, NULL, true);
+	cell = afs_cell_create(net, name, namesz, NULL, true);
 
 	_leave(" = %p", cell);
 	return cell;
@@ -325,14 +321,14 @@ create_cell:
  */
 struct afs_cell *afs_get_cell_maybe(struct afs_cell *cell)
 {
-	write_lock(&afs_cells_lock);
+	write_lock(&net->cells_lock);
 
 	if (cell && !list_empty(&cell->link))
 		afs_get_cell(cell);
 	else
 		cell = NULL;
 
-	write_unlock(&afs_cells_lock);
+	write_unlock(&net->cells_lock);
 	return cell;
 }
 #endif  /*  0  */
@@ -351,10 +347,10 @@ void afs_put_cell(struct afs_cell *cell)
 
 	/* to prevent a race, the decrement and the dequeue must be effectively
 	 * atomic */
-	write_lock(&afs_cells_lock);
+	write_lock(&cell->net->cells_lock);
 
 	if (likely(!atomic_dec_and_test(&cell->usage))) {
-		write_unlock(&afs_cells_lock);
+		write_unlock(&cell->net->cells_lock);
 		_leave("");
 		return;
 	}
@@ -362,19 +358,19 @@ void afs_put_cell(struct afs_cell *cell)
 	ASSERT(list_empty(&cell->servers));
 	ASSERT(list_empty(&cell->vl_list));
 
-	write_unlock(&afs_cells_lock);
+	wake_up(&cell->net->cells_freeable_wq);
 
-	wake_up(&afs_cells_freeable_wq);
+	write_unlock(&cell->net->cells_lock);
 
 	_leave(" [unused]");
 }
 
 /*
  * destroy a cell record
- * - must be called with the afs_cells_sem write-locked
+ * - must be called with the net->cells_sem write-locked
  * - cell->link should have been broken by the caller
  */
-static void afs_cell_destroy(struct afs_cell *cell)
+static void afs_cell_destroy(struct afs_net *net, struct afs_cell *cell)
 {
 	_enter("%p{%d,%s}", cell, atomic_read(&cell->usage), cell->name);
 
@@ -387,14 +383,14 @@ static void afs_cell_destroy(struct afs_cell *cell)
 
 		_debug("wait for cell %s", cell->name);
 		set_current_state(TASK_UNINTERRUPTIBLE);
-		add_wait_queue(&afs_cells_freeable_wq, &myself);
+		add_wait_queue(&net->cells_freeable_wq, &myself);
 
 		while (atomic_read(&cell->usage) > 0) {
 			schedule();
 			set_current_state(TASK_UNINTERRUPTIBLE);
 		}
 
-		remove_wait_queue(&afs_cells_freeable_wq, &myself);
+		remove_wait_queue(&net->cells_freeable_wq, &myself);
 		set_current_state(TASK_RUNNING);
 	}
 
@@ -403,11 +399,11 @@ static void afs_cell_destroy(struct afs_cell *cell)
 	ASSERT(list_empty(&cell->servers));
 	ASSERT(list_empty(&cell->vl_list));
 
-	afs_proc_cell_remove(cell);
+	afs_proc_cell_remove(net, cell);
 
-	down_write(&afs_proc_cells_sem);
+	down_write(&net->proc_cells_sem);
 	list_del_init(&cell->proc_link);
-	up_write(&afs_proc_cells_sem);
+	up_write(&net->proc_cells_sem);
 
 #ifdef CONFIG_AFS_FSCACHE
 	fscache_relinquish_cookie(cell->cache, 0);
@@ -422,39 +418,39 @@ static void afs_cell_destroy(struct afs_cell *cell)
  * purge in-memory cell database on module unload or afs_init() failure
  * - the timeout daemon is stopped before calling this
  */
-void afs_cell_purge(void)
+void afs_cell_purge(struct afs_net *net)
 {
 	struct afs_cell *cell;
 
 	_enter("");
 
-	afs_put_cell(afs_cell_root);
+	afs_put_cell(net->ws_cell);
 
-	down_write(&afs_cells_sem);
+	down_write(&net->cells_sem);
 
-	while (!list_empty(&afs_cells)) {
+	while (!list_empty(&net->cells)) {
 		cell = NULL;
 
 		/* remove the next cell from the front of the list */
-		write_lock(&afs_cells_lock);
+		write_lock(&net->cells_lock);
 
-		if (!list_empty(&afs_cells)) {
-			cell = list_entry(afs_cells.next,
+		if (!list_empty(&net->cells)) {
+			cell = list_entry(net->cells.next,
 					  struct afs_cell, link);
 			list_del_init(&cell->link);
 		}
 
-		write_unlock(&afs_cells_lock);
+		write_unlock(&net->cells_lock);
 
 		if (cell) {
 			_debug("PURGING CELL %s (%d)",
 			       cell->name, atomic_read(&cell->usage));
 
 			/* now the cell should be left with no references */
-			afs_cell_destroy(cell);
+			afs_cell_destroy(net, cell);
 		}
 	}
 
-	up_write(&afs_cells_sem);
+	up_write(&net->cells_sem);
 	_leave("");
 }
diff --git a/fs/afs/cmservice.c b/fs/afs/cmservice.c
index 782d4d05a53b..30ce4be4165f 100644
--- a/fs/afs/cmservice.c
+++ b/fs/afs/cmservice.c
@@ -193,7 +193,7 @@ static int afs_deliver_cb_callback(struct afs_call *call)
 
 	switch (call->unmarshall) {
 	case 0:
-		rxrpc_kernel_get_peer(afs_socket, call->rxcall, &srx);
+		rxrpc_kernel_get_peer(call->net->socket, call->rxcall, &srx);
 		call->offset = 0;
 		call->unmarshall++;
 
@@ -290,7 +290,7 @@ static int afs_deliver_cb_callback(struct afs_call *call)
 
 	/* we'll need the file server record as that tells us which set of
 	 * vnodes to operate upon */
-	server = afs_find_server(&srx);
+	server = afs_find_server(call->net, &srx);
 	if (!server)
 		return -ENOTCONN;
 	call->server = server;
@@ -324,7 +324,7 @@ static int afs_deliver_cb_init_call_back_state(struct afs_call *call)
 
 	_enter("");
 
-	rxrpc_kernel_get_peer(afs_socket, call->rxcall, &srx);
+	rxrpc_kernel_get_peer(call->net->socket, call->rxcall, &srx);
 
 	ret = afs_extract_data(call, NULL, 0, false);
 	if (ret < 0)
@@ -335,7 +335,7 @@ static int afs_deliver_cb_init_call_back_state(struct afs_call *call)
 
 	/* we'll need the file server record as that tells us which set of
 	 * vnodes to operate upon */
-	server = afs_find_server(&srx);
+	server = afs_find_server(call->net, &srx);
 	if (!server)
 		return -ENOTCONN;
 	call->server = server;
@@ -357,7 +357,7 @@ static int afs_deliver_cb_init_call_back_state3(struct afs_call *call)
 
 	_enter("");
 
-	rxrpc_kernel_get_peer(afs_socket, call->rxcall, &srx);
+	rxrpc_kernel_get_peer(call->net->socket, call->rxcall, &srx);
 
 	_enter("{%u}", call->unmarshall);
 
@@ -407,7 +407,7 @@ static int afs_deliver_cb_init_call_back_state3(struct afs_call *call)
 
 	/* we'll need the file server record as that tells us which set of
 	 * vnodes to operate upon */
-	server = afs_find_server(&srx);
+	server = afs_find_server(call->net, &srx);
 	if (!server)
 		return -ENOTCONN;
 	call->server = server;
@@ -461,7 +461,7 @@ static void SRXAFSCB_ProbeUuid(struct work_struct *work)
 
 	_enter("");
 
-	if (memcmp(r, &afs_uuid, sizeof(afs_uuid)) == 0)
+	if (memcmp(r, &call->net->uuid, sizeof(call->net->uuid)) == 0)
 		reply.match = htonl(0);
 	else
 		reply.match = htonl(1);
@@ -568,13 +568,13 @@ static void SRXAFSCB_TellMeAboutYourself(struct work_struct *work)
 	memset(&reply, 0, sizeof(reply));
 	reply.ia.nifs = htonl(nifs);
 
-	reply.ia.uuid[0] = afs_uuid.time_low;
-	reply.ia.uuid[1] = htonl(ntohs(afs_uuid.time_mid));
-	reply.ia.uuid[2] = htonl(ntohs(afs_uuid.time_hi_and_version));
-	reply.ia.uuid[3] = htonl((s8) afs_uuid.clock_seq_hi_and_reserved);
-	reply.ia.uuid[4] = htonl((s8) afs_uuid.clock_seq_low);
+	reply.ia.uuid[0] = call->net->uuid.time_low;
+	reply.ia.uuid[1] = htonl(ntohs(call->net->uuid.time_mid));
+	reply.ia.uuid[2] = htonl(ntohs(call->net->uuid.time_hi_and_version));
+	reply.ia.uuid[3] = htonl((s8) call->net->uuid.clock_seq_hi_and_reserved);
+	reply.ia.uuid[4] = htonl((s8) call->net->uuid.clock_seq_low);
 	for (loop = 0; loop < 6; loop++)
-		reply.ia.uuid[loop + 5] = htonl((s8) afs_uuid.node[loop]);
+		reply.ia.uuid[loop + 5] = htonl((s8) call->net->uuid.node[loop]);
 
 	if (ifs) {
 		for (loop = 0; loop < nifs; loop++) {
diff --git a/fs/afs/flock.c b/fs/afs/flock.c
index 3191dff2c156..559ac00af5f7 100644
--- a/fs/afs/flock.c
+++ b/fs/afs/flock.c
@@ -14,47 +14,16 @@
 #define AFS_LOCK_GRANTED	0
 #define AFS_LOCK_PENDING	1
 
+struct workqueue_struct *afs_lock_manager;
+
 static void afs_fl_copy_lock(struct file_lock *new, struct file_lock *fl);
 static void afs_fl_release_private(struct file_lock *fl);
 
-static struct workqueue_struct *afs_lock_manager;
-static DEFINE_MUTEX(afs_lock_manager_mutex);
-
 static const struct file_lock_operations afs_lock_ops = {
 	.fl_copy_lock		= afs_fl_copy_lock,
 	.fl_release_private	= afs_fl_release_private,
 };
 
-/*
- * initialise the lock manager thread if it isn't already running
- */
-static int afs_init_lock_manager(void)
-{
-	int ret;
-
-	ret = 0;
-	if (!afs_lock_manager) {
-		mutex_lock(&afs_lock_manager_mutex);
-		if (!afs_lock_manager) {
-			afs_lock_manager = alloc_workqueue("kafs_lockd",
-							   WQ_MEM_RECLAIM, 0);
-			if (!afs_lock_manager)
-				ret = -ENOMEM;
-		}
-		mutex_unlock(&afs_lock_manager_mutex);
-	}
-	return ret;
-}
-
-/*
- * destroy the lock manager thread if it's running
- */
-void __exit afs_kill_lock_manager(void)
-{
-	if (afs_lock_manager)
-		destroy_workqueue(afs_lock_manager);
-}
-
 /*
  * if the callback is broken on this vnode, then the lock may now be available
  */
@@ -264,10 +233,6 @@ static int afs_do_setlk(struct file *file, struct file_lock *fl)
 	if (fl->fl_start != 0 || fl->fl_end != OFFSET_MAX)
 		return -EINVAL;
 
-	ret = afs_init_lock_manager();
-	if (ret < 0)
-		return ret;
-
 	fl->fl_ops = &afs_lock_ops;
 	INIT_LIST_HEAD(&fl->fl_u.afs.link);
 	fl->fl_u.afs.state = AFS_LOCK_PENDING;
diff --git a/fs/afs/fsclient.c b/fs/afs/fsclient.c
index 19f76ae36982..ce6f0159e1d4 100644
--- a/fs/afs/fsclient.c
+++ b/fs/afs/fsclient.c
@@ -284,12 +284,13 @@ int afs_fs_fetch_file_status(struct afs_server *server,
 			     bool async)
 {
 	struct afs_call *call;
+	struct afs_net *net = afs_v2net(vnode);
 	__be32 *bp;
 
 	_enter(",%x,{%x:%u},,",
 	       key_serial(key), vnode->fid.vid, vnode->fid.vnode);
 
-	call = afs_alloc_flat_call(&afs_RXFSFetchStatus, 16, (21 + 3 + 6) * 4);
+	call = afs_alloc_flat_call(net, &afs_RXFSFetchStatus, 16, (21 + 3 + 6) * 4);
 	if (!call)
 		return -ENOMEM;
 
@@ -490,11 +491,12 @@ static int afs_fs_fetch_data64(struct afs_server *server,
 			       bool async)
 {
 	struct afs_call *call;
+	struct afs_net *net = afs_v2net(vnode);
 	__be32 *bp;
 
 	_enter("");
 
-	call = afs_alloc_flat_call(&afs_RXFSFetchData64, 32, (21 + 3 + 6) * 4);
+	call = afs_alloc_flat_call(net, &afs_RXFSFetchData64, 32, (21 + 3 + 6) * 4);
 	if (!call)
 		return -ENOMEM;
 
@@ -531,6 +533,7 @@ int afs_fs_fetch_data(struct afs_server *server,
 		      bool async)
 {
 	struct afs_call *call;
+	struct afs_net *net = afs_v2net(vnode);
 	__be32 *bp;
 
 	if (upper_32_bits(req->pos) ||
@@ -540,7 +543,7 @@ int afs_fs_fetch_data(struct afs_server *server,
 
 	_enter("");
 
-	call = afs_alloc_flat_call(&afs_RXFSFetchData, 24, (21 + 3 + 6) * 4);
+	call = afs_alloc_flat_call(net, &afs_RXFSFetchData, 24, (21 + 3 + 6) * 4);
 	if (!call)
 		return -ENOMEM;
 
@@ -590,7 +593,8 @@ static const struct afs_call_type afs_RXFSGiveUpCallBacks = {
  * give up a set of callbacks
  * - the callbacks are held in the server->cb_break ring
  */
-int afs_fs_give_up_callbacks(struct afs_server *server,
+int afs_fs_give_up_callbacks(struct afs_net *net,
+			     struct afs_server *server,
 			     bool async)
 {
 	struct afs_call *call;
@@ -610,7 +614,7 @@ int afs_fs_give_up_callbacks(struct afs_server *server,
 
 	_debug("break %zu callbacks", ncallbacks);
 
-	call = afs_alloc_flat_call(&afs_RXFSGiveUpCallBacks,
+	call = afs_alloc_flat_call(net, &afs_RXFSGiveUpCallBacks,
 				   12 + ncallbacks * 6 * 4, 0);
 	if (!call)
 		return -ENOMEM;
@@ -699,6 +703,7 @@ int afs_fs_create(struct afs_server *server,
 		  bool async)
 {
 	struct afs_call *call;
+	struct afs_net *net = afs_v2net(vnode);
 	size_t namesz, reqsz, padsz;
 	__be32 *bp;
 
@@ -708,7 +713,7 @@ int afs_fs_create(struct afs_server *server,
 	padsz = (4 - (namesz & 3)) & 3;
 	reqsz = (5 * 4) + namesz + padsz + (6 * 4);
 
-	call = afs_alloc_flat_call(&afs_RXFSCreateXXXX, reqsz,
+	call = afs_alloc_flat_call(net, &afs_RXFSCreateXXXX, reqsz,
 				   (3 + 21 + 21 + 3 + 6) * 4);
 	if (!call)
 		return -ENOMEM;
@@ -789,6 +794,7 @@ int afs_fs_remove(struct afs_server *server,
 		  bool async)
 {
 	struct afs_call *call;
+	struct afs_net *net = afs_v2net(vnode);
 	size_t namesz, reqsz, padsz;
 	__be32 *bp;
 
@@ -798,7 +804,7 @@ int afs_fs_remove(struct afs_server *server,
 	padsz = (4 - (namesz & 3)) & 3;
 	reqsz = (5 * 4) + namesz + padsz;
 
-	call = afs_alloc_flat_call(&afs_RXFSRemoveXXXX, reqsz, (21 + 6) * 4);
+	call = afs_alloc_flat_call(net, &afs_RXFSRemoveXXXX, reqsz, (21 + 6) * 4);
 	if (!call)
 		return -ENOMEM;
 
@@ -870,6 +876,7 @@ int afs_fs_link(struct afs_server *server,
 		bool async)
 {
 	struct afs_call *call;
+	struct afs_net *net = afs_v2net(vnode);
 	size_t namesz, reqsz, padsz;
 	__be32 *bp;
 
@@ -879,7 +886,7 @@ int afs_fs_link(struct afs_server *server,
 	padsz = (4 - (namesz & 3)) & 3;
 	reqsz = (5 * 4) + namesz + padsz + (3 * 4);
 
-	call = afs_alloc_flat_call(&afs_RXFSLink, reqsz, (21 + 21 + 6) * 4);
+	call = afs_alloc_flat_call(net, &afs_RXFSLink, reqsz, (21 + 21 + 6) * 4);
 	if (!call)
 		return -ENOMEM;
 
@@ -958,6 +965,7 @@ int afs_fs_symlink(struct afs_server *server,
 		   bool async)
 {
 	struct afs_call *call;
+	struct afs_net *net = afs_v2net(vnode);
 	size_t namesz, reqsz, padsz, c_namesz, c_padsz;
 	__be32 *bp;
 
@@ -971,7 +979,7 @@ int afs_fs_symlink(struct afs_server *server,
 
 	reqsz = (6 * 4) + namesz + padsz + c_namesz + c_padsz + (6 * 4);
 
-	call = afs_alloc_flat_call(&afs_RXFSSymlink, reqsz,
+	call = afs_alloc_flat_call(net, &afs_RXFSSymlink, reqsz,
 				   (3 + 21 + 21 + 6) * 4);
 	if (!call)
 		return -ENOMEM;
@@ -1062,6 +1070,7 @@ int afs_fs_rename(struct afs_server *server,
 		  bool async)
 {
 	struct afs_call *call;
+	struct afs_net *net = afs_v2net(orig_dvnode);
 	size_t reqsz, o_namesz, o_padsz, n_namesz, n_padsz;
 	__be32 *bp;
 
@@ -1078,7 +1087,7 @@ int afs_fs_rename(struct afs_server *server,
 		(3 * 4) +
 		4 + n_namesz + n_padsz;
 
-	call = afs_alloc_flat_call(&afs_RXFSRename, reqsz, (21 + 21 + 6) * 4);
+	call = afs_alloc_flat_call(net, &afs_RXFSRename, reqsz, (21 + 21 + 6) * 4);
 	if (!call)
 		return -ENOMEM;
 
@@ -1172,12 +1181,13 @@ static int afs_fs_store_data64(struct afs_server *server,
 {
 	struct afs_vnode *vnode = wb->vnode;
 	struct afs_call *call;
+	struct afs_net *net = afs_v2net(vnode);
 	__be32 *bp;
 
 	_enter(",%x,{%x:%u},,",
 	       key_serial(wb->key), vnode->fid.vid, vnode->fid.vnode);
 
-	call = afs_alloc_flat_call(&afs_RXFSStoreData64,
+	call = afs_alloc_flat_call(net, &afs_RXFSStoreData64,
 				   (4 + 6 + 3 * 2) * 4,
 				   (21 + 6) * 4);
 	if (!call)
@@ -1230,6 +1240,7 @@ int afs_fs_store_data(struct afs_server *server, struct afs_writeback *wb,
 {
 	struct afs_vnode *vnode = wb->vnode;
 	struct afs_call *call;
+	struct afs_net *net = afs_v2net(vnode);
 	loff_t size, pos, i_size;
 	__be32 *bp;
 
@@ -1254,7 +1265,7 @@ int afs_fs_store_data(struct afs_server *server, struct afs_writeback *wb,
 		return afs_fs_store_data64(server, wb, first, last, offset, to,
 					   size, pos, i_size, async);
 
-	call = afs_alloc_flat_call(&afs_RXFSStoreData,
+	call = afs_alloc_flat_call(net, &afs_RXFSStoreData,
 				   (4 + 6 + 3) * 4,
 				   (21 + 6) * 4);
 	if (!call)
@@ -1356,6 +1367,7 @@ static int afs_fs_setattr_size64(struct afs_server *server, struct key *key,
 				 bool async)
 {
 	struct afs_call *call;
+	struct afs_net *net = afs_v2net(vnode);
 	__be32 *bp;
 
 	_enter(",%x,{%x:%u},,",
@@ -1363,7 +1375,7 @@ static int afs_fs_setattr_size64(struct afs_server *server, struct key *key,
 
 	ASSERT(attr->ia_valid & ATTR_SIZE);
 
-	call = afs_alloc_flat_call(&afs_RXFSStoreData64_as_Status,
+	call = afs_alloc_flat_call(net, &afs_RXFSStoreData64_as_Status,
 				   (4 + 6 + 3 * 2) * 4,
 				   (21 + 6) * 4);
 	if (!call)
@@ -1404,6 +1416,7 @@ static int afs_fs_setattr_size(struct afs_server *server, struct key *key,
 			       bool async)
 {
 	struct afs_call *call;
+	struct afs_net *net = afs_v2net(vnode);
 	__be32 *bp;
 
 	_enter(",%x,{%x:%u},,",
@@ -1414,7 +1427,7 @@ static int afs_fs_setattr_size(struct afs_server *server, struct key *key,
 		return afs_fs_setattr_size64(server, key, vnode, attr,
 					     async);
 
-	call = afs_alloc_flat_call(&afs_RXFSStoreData_as_Status,
+	call = afs_alloc_flat_call(net, &afs_RXFSStoreData_as_Status,
 				   (4 + 6 + 3) * 4,
 				   (21 + 6) * 4);
 	if (!call)
@@ -1452,6 +1465,7 @@ int afs_fs_setattr(struct afs_server *server, struct key *key,
 		   bool async)
 {
 	struct afs_call *call;
+	struct afs_net *net = afs_v2net(vnode);
 	__be32 *bp;
 
 	if (attr->ia_valid & ATTR_SIZE)
@@ -1461,7 +1475,7 @@ int afs_fs_setattr(struct afs_server *server, struct key *key,
 	_enter(",%x,{%x:%u},,",
 	       key_serial(key), vnode->fid.vid, vnode->fid.vnode);
 
-	call = afs_alloc_flat_call(&afs_RXFSStoreStatus,
+	call = afs_alloc_flat_call(net, &afs_RXFSStoreStatus,
 				   (4 + 6) * 4,
 				   (21 + 6) * 4);
 	if (!call)
@@ -1687,6 +1701,7 @@ int afs_fs_get_volume_status(struct afs_server *server,
 			     bool async)
 {
 	struct afs_call *call;
+	struct afs_net *net = afs_v2net(vnode);
 	__be32 *bp;
 	void *tmpbuf;
 
@@ -1696,7 +1711,7 @@ int afs_fs_get_volume_status(struct afs_server *server,
 	if (!tmpbuf)
 		return -ENOMEM;
 
-	call = afs_alloc_flat_call(&afs_RXFSGetVolumeStatus, 2 * 4, 12 * 4);
+	call = afs_alloc_flat_call(net, &afs_RXFSGetVolumeStatus, 2 * 4, 12 * 4);
 	if (!call) {
 		kfree(tmpbuf);
 		return -ENOMEM;
@@ -1779,11 +1794,12 @@ int afs_fs_set_lock(struct afs_server *server,
 		    bool async)
 {
 	struct afs_call *call;
+	struct afs_net *net = afs_v2net(vnode);
 	__be32 *bp;
 
 	_enter("");
 
-	call = afs_alloc_flat_call(&afs_RXFSSetLock, 5 * 4, 6 * 4);
+	call = afs_alloc_flat_call(net, &afs_RXFSSetLock, 5 * 4, 6 * 4);
 	if (!call)
 		return -ENOMEM;
 
@@ -1812,11 +1828,12 @@ int afs_fs_extend_lock(struct afs_server *server,
 		       bool async)
 {
 	struct afs_call *call;
+	struct afs_net *net = afs_v2net(vnode);
 	__be32 *bp;
 
 	_enter("");
 
-	call = afs_alloc_flat_call(&afs_RXFSExtendLock, 4 * 4, 6 * 4);
+	call = afs_alloc_flat_call(net, &afs_RXFSExtendLock, 4 * 4, 6 * 4);
 	if (!call)
 		return -ENOMEM;
 
@@ -1844,11 +1861,12 @@ int afs_fs_release_lock(struct afs_server *server,
 			bool async)
 {
 	struct afs_call *call;
+	struct afs_net *net = afs_v2net(vnode);
 	__be32 *bp;
 
 	_enter("");
 
-	call = afs_alloc_flat_call(&afs_RXFSReleaseLock, 4 * 4, 6 * 4);
+	call = afs_alloc_flat_call(net, &afs_RXFSReleaseLock, 4 * 4, 6 * 4);
 	if (!call)
 		return -ENOMEM;
 
diff --git a/fs/afs/internal.h b/fs/afs/internal.h
index 3f03f7888302..53bd11d73469 100644
--- a/fs/afs/internal.h
+++ b/fs/afs/internal.h
@@ -21,6 +21,7 @@
 #include <linux/fscache.h>
 #include <linux/backing-dev.h>
 #include <linux/uuid.h>
+#include <net/net_namespace.h>
 #include <net/af_rxrpc.h>
 
 #include "afs.h"
@@ -48,6 +49,7 @@ struct afs_mount_params {
 	afs_voltype_t		type;		/* type of volume requested */
 	int			volnamesz;	/* size of volume name */
 	const char		*volname;	/* name of volume to mount */
+	struct afs_net		*net;		/* Network namespace in effect */
 	struct afs_cell		*cell;		/* cell in which to find volume */
 	struct afs_volume	*volume;	/* volume record */
 	struct key		*key;		/* key to use for secure mounting */
@@ -62,6 +64,7 @@ enum afs_call_state {
 	AFS_CALL_AWAIT_ACK,	/* awaiting final ACK of incoming call */
 	AFS_CALL_COMPLETE,	/* Completed or failed */
 };
+
 /*
  * a record of an in-progress RxRPC call
  */
@@ -72,6 +75,7 @@ struct afs_call {
 	struct work_struct	work;		/* actual work processor */
 	struct rxrpc_call	*rxcall;	/* RxRPC call handle */
 	struct key		*key;		/* security for this call */
+	struct afs_net		*net;		/* The network namespace */
 	struct afs_server	*server;	/* server affected by incoming CM call */
 	void			*request;	/* request data (first part) */
 	struct address_space	*mapping;	/* page set */
@@ -173,6 +177,7 @@ struct afs_writeback {
  * - there's one superblock per volume
  */
 struct afs_super_info {
+	struct afs_net		*net;		/* Network namespace */
 	struct afs_volume	*volume;	/* volume record */
 	char			rwparent;	/* T if parent is R/W AFS volume */
 };
@@ -192,12 +197,62 @@ struct afs_cache_cell {
 	struct in_addr	vl_servers[15];		/* cached cell VL servers */
 };
 
+/*
+ * AFS network namespace record.
+ */
+struct afs_net {
+	struct afs_uuid		uuid;
+	bool			live;		/* F if this namespace is being removed */
+
+	/* AF_RXRPC I/O stuff */
+	struct socket		*socket;
+	struct afs_call		*spare_incoming_call;
+	struct work_struct	charge_preallocation_work;
+	struct mutex		socket_mutex;
+	atomic_t		nr_outstanding_calls;
+	atomic_t		nr_superblocks;
+
+	/* Cell database */
+	struct list_head	cells;
+	struct afs_cell		*ws_cell;
+	rwlock_t		cells_lock;
+	struct rw_semaphore	cells_sem;
+	wait_queue_head_t	cells_freeable_wq;
+
+	struct rw_semaphore	proc_cells_sem;
+	struct list_head	proc_cells;
+
+	/* Volume location database */
+	struct list_head	vl_updates;		/* VL records in need-update order */
+	struct list_head	vl_graveyard;		/* Inactive VL records */
+	struct delayed_work	vl_reaper;
+	struct delayed_work	vl_updater;
+	spinlock_t		vl_updates_lock;
+	spinlock_t		vl_graveyard_lock;
+
+	/* File locking renewal management */
+	struct mutex		lock_manager_mutex;
+
+	/* Server database */
+	struct rb_root		servers;		/* Active servers */
+	rwlock_t		servers_lock;
+	struct list_head	server_graveyard;	/* Inactive server LRU list */
+	spinlock_t		server_graveyard_lock;
+	struct delayed_work	server_reaper;
+
+	/* Misc */
+	struct proc_dir_entry	*proc_afs;		/* /proc/net/afs directory */
+};
+
+extern struct afs_net __afs_net;// Dummy AFS network namespace; TODO: replace with real netns
+
 /*
  * AFS cell record
  */
 struct afs_cell {
 	atomic_t		usage;
 	struct list_head	link;		/* main cell list link */
+	struct afs_net		*net;		/* The network namespace */
 	struct key		*anonymous_key;	/* anonymous user key for this cell */
 	struct list_head	proc_link;	/* /proc cell list link */
 #ifdef CONFIG_AFS_FSCACHE
@@ -411,15 +466,6 @@ struct afs_interface {
 	unsigned	mtu;		/* MTU of interface */
 };
 
-struct afs_uuid {
-	__be32		time_low;			/* low part of timestamp */
-	__be16		time_mid;			/* mid part of timestamp */
-	__be16		time_hi_and_version;		/* high part of timestamp and version  */
-	__u8		clock_seq_hi_and_reserved;	/* clock seq hi and variant */
-	__u8		clock_seq_low;			/* clock seq low */
-	__u8		node[6];			/* spatially unique node ID (MAC addr) */
-};
-
 /*****************************************************************************/
 /*
  * cache.c
@@ -440,6 +486,8 @@ extern struct fscache_cookie_def afs_vnode_cache_index_def;
 /*
  * callback.c
  */
+extern struct workqueue_struct *afs_callback_update_worker;
+
 extern void afs_init_callback_state(struct afs_server *);
 extern void afs_broken_callback_work(struct work_struct *);
 extern void afs_break_callbacks(struct afs_server *, size_t,
@@ -448,22 +496,17 @@ extern void afs_discard_callback_on_delete(struct afs_vnode *);
 extern void afs_give_up_callback(struct afs_vnode *);
 extern void afs_dispatch_give_up_callbacks(struct work_struct *);
 extern void afs_flush_callback_breaks(struct afs_server *);
-extern int __init afs_callback_update_init(void);
-extern void afs_callback_update_kill(void);
 
 /*
  * cell.c
  */
-extern struct rw_semaphore afs_proc_cells_sem;
-extern struct list_head afs_proc_cells;
-
 #define afs_get_cell(C) do { atomic_inc(&(C)->usage); } while(0)
-extern int afs_cell_init(char *);
-extern struct afs_cell *afs_cell_create(const char *, unsigned, char *, bool);
-extern struct afs_cell *afs_cell_lookup(const char *, unsigned, bool);
+extern int afs_cell_init(struct afs_net *, char *);
+extern struct afs_cell *afs_cell_create(struct afs_net *, const char *, unsigned, char *, bool);
+extern struct afs_cell *afs_cell_lookup(struct afs_net *, const char *, unsigned, bool);
 extern struct afs_cell *afs_grab_cell(struct afs_cell *);
 extern void afs_put_cell(struct afs_cell *);
-extern void afs_cell_purge(void);
+extern void __net_exit afs_cell_purge(struct afs_net *);
 
 /*
  * cmservice.c
@@ -492,7 +535,8 @@ extern void afs_put_read(struct afs_read *);
 /*
  * flock.c
  */
-extern void __exit afs_kill_lock_manager(void);
+extern struct workqueue_struct *afs_lock_manager;
+
 extern void afs_lock_work(struct work_struct *);
 extern void afs_lock_may_be_available(struct afs_vnode *);
 extern int afs_lock(struct file *, int, struct file_lock *);
@@ -504,7 +548,7 @@ extern int afs_flock(struct file *, int, struct file_lock *);
 extern int afs_fs_fetch_file_status(struct afs_server *, struct key *,
 				    struct afs_vnode *, struct afs_volsync *,
 				    bool);
-extern int afs_fs_give_up_callbacks(struct afs_server *, bool);
+extern int afs_fs_give_up_callbacks(struct afs_net *, struct afs_server *, bool);
 extern int afs_fs_fetch_data(struct afs_server *, struct key *,
 			     struct afs_vnode *, struct afs_read *, bool);
 extern int afs_fs_create(struct afs_server *, struct key *,
@@ -554,7 +598,35 @@ extern int afs_drop_inode(struct inode *);
  * main.c
  */
 extern struct workqueue_struct *afs_wq;
-extern struct afs_uuid afs_uuid;
+
+static inline struct afs_net *afs_d2net(struct dentry *dentry)
+{
+	return &__afs_net;
+}
+
+static inline struct afs_net *afs_i2net(struct inode *inode)
+{
+	return &__afs_net;
+}
+
+static inline struct afs_net *afs_v2net(struct afs_vnode *vnode)
+{
+	return &__afs_net;
+}
+
+static inline struct afs_net *afs_sock2net(struct sock *sk)
+{
+	return &__afs_net;
+}
+
+static inline struct afs_net *afs_get_net(struct afs_net *net)
+{
+	return net;
+}
+
+static inline void afs_put_net(struct afs_net *net)
+{
+}
 
 /*
  * misc.c
@@ -579,23 +651,24 @@ extern int afs_get_ipv4_interfaces(struct afs_interface *, size_t, bool);
 /*
  * proc.c
  */
-extern int afs_proc_init(void);
-extern void afs_proc_cleanup(void);
-extern int afs_proc_cell_setup(struct afs_cell *);
-extern void afs_proc_cell_remove(struct afs_cell *);
+extern int __net_init afs_proc_init(struct afs_net *);
+extern void __net_exit afs_proc_cleanup(struct afs_net *);
+extern int afs_proc_cell_setup(struct afs_net *, struct afs_cell *);
+extern void afs_proc_cell_remove(struct afs_net *, struct afs_cell *);
 
 /*
  * rxrpc.c
  */
-extern struct socket *afs_socket;
-extern atomic_t afs_outstanding_calls;
+extern struct workqueue_struct *afs_async_calls;
 
-extern int afs_open_socket(void);
-extern void afs_close_socket(void);
+extern int __net_init afs_open_socket(struct afs_net *);
+extern void __net_exit afs_close_socket(struct afs_net *);
+extern void afs_charge_preallocation(struct work_struct *);
 extern void afs_put_call(struct afs_call *);
 extern int afs_queue_call_work(struct afs_call *);
 extern int afs_make_call(struct in_addr *, struct afs_call *, gfp_t, bool);
-extern struct afs_call *afs_alloc_flat_call(const struct afs_call_type *,
+extern struct afs_call *afs_alloc_flat_call(struct afs_net *,
+					    const struct afs_call_type *,
 					    size_t, size_t);
 extern void afs_flat_call_destructor(struct afs_call *);
 extern void afs_send_empty_reply(struct afs_call *);
@@ -629,37 +702,45 @@ do {								\
 
 extern struct afs_server *afs_lookup_server(struct afs_cell *,
 					    const struct in_addr *);
-extern struct afs_server *afs_find_server(const struct sockaddr_rxrpc *);
+extern struct afs_server *afs_find_server(struct afs_net *,
+					  const struct sockaddr_rxrpc *);
 extern void afs_put_server(struct afs_server *);
-extern void __exit afs_purge_servers(void);
+extern void afs_reap_server(struct work_struct *);
+extern void __net_exit afs_purge_servers(struct afs_net *);
 
 /*
  * super.c
  */
-extern int afs_fs_init(void);
-extern void afs_fs_exit(void);
+extern int __init afs_fs_init(void);
+extern void __exit afs_fs_exit(void);
 
 /*
  * vlclient.c
  */
-extern int afs_vl_get_entry_by_name(struct in_addr *, struct key *,
+extern int afs_vl_get_entry_by_name(struct afs_net *,
+				    struct in_addr *, struct key *,
 				    const char *, struct afs_cache_vlocation *,
 				    bool);
-extern int afs_vl_get_entry_by_id(struct in_addr *, struct key *,
+extern int afs_vl_get_entry_by_id(struct afs_net *,
+				  struct in_addr *, struct key *,
 				  afs_volid_t, afs_voltype_t,
 				  struct afs_cache_vlocation *, bool);
 
 /*
  * vlocation.c
  */
+extern struct workqueue_struct *afs_vlocation_update_worker;
+
 #define afs_get_vlocation(V) do { atomic_inc(&(V)->usage); } while(0)
 
-extern int __init afs_vlocation_update_init(void);
-extern struct afs_vlocation *afs_vlocation_lookup(struct afs_cell *,
+extern struct afs_vlocation *afs_vlocation_lookup(struct afs_net *,
+						  struct afs_cell *,
 						  struct key *,
 						  const char *, size_t);
-extern void afs_put_vlocation(struct afs_vlocation *);
-extern void afs_vlocation_purge(void);
+extern void afs_put_vlocation(struct afs_net *, struct afs_vlocation *);
+extern void afs_vlocation_updater(struct work_struct *);
+extern void afs_vlocation_reaper(struct work_struct *);
+extern void __net_exit afs_vlocation_purge(struct afs_net *);
 
 /*
  * vnode.c
@@ -707,7 +788,7 @@ extern int afs_vnode_release_lock(struct afs_vnode *, struct key *);
  */
 #define afs_get_volume(V) do { atomic_inc(&(V)->usage); } while(0)
 
-extern void afs_put_volume(struct afs_volume *);
+extern void afs_put_volume(struct afs_net *, struct afs_volume *);
 extern struct afs_volume *afs_volume_lookup(struct afs_mount_params *);
 extern struct afs_server *afs_volume_pick_fileserver(struct afs_vnode *);
 extern int afs_volume_release_fileserver(struct afs_vnode *,
diff --git a/fs/afs/main.c b/fs/afs/main.c
index 9944770849da..87b1a9c8000d 100644
--- a/fs/afs/main.c
+++ b/fs/afs/main.c
@@ -31,30 +31,104 @@ static char *rootcell;
 module_param(rootcell, charp, 0);
 MODULE_PARM_DESC(rootcell, "root AFS cell name and VL server IP addr list");
 
-struct afs_uuid afs_uuid;
 struct workqueue_struct *afs_wq;
+struct afs_net __afs_net;
+
+/*
+ * Initialise an AFS network namespace record.
+ */
+static int __net_init afs_net_init(struct afs_net *net)
+{
+	int ret;
+
+	net->live = true;
+	generate_random_uuid((unsigned char *)&net->uuid);
+
+	INIT_WORK(&net->charge_preallocation_work, afs_charge_preallocation);
+	mutex_init(&net->socket_mutex);
+	INIT_LIST_HEAD(&net->cells);
+	rwlock_init(&net->cells_lock);
+	init_rwsem(&net->cells_sem);
+	init_waitqueue_head(&net->cells_freeable_wq);
+	init_rwsem(&net->proc_cells_sem);
+	INIT_LIST_HEAD(&net->proc_cells);
+	INIT_LIST_HEAD(&net->vl_updates);
+	INIT_LIST_HEAD(&net->vl_graveyard);
+	INIT_DELAYED_WORK(&net->vl_reaper, afs_vlocation_reaper);
+	INIT_DELAYED_WORK(&net->vl_updater, afs_vlocation_updater);
+	spin_lock_init(&net->vl_updates_lock);
+	spin_lock_init(&net->vl_graveyard_lock);
+	net->servers = RB_ROOT;
+	rwlock_init(&net->servers_lock);
+	INIT_LIST_HEAD(&net->server_graveyard);
+	spin_lock_init(&net->server_graveyard_lock);
+	INIT_DELAYED_WORK(&net->server_reaper, afs_reap_server);
+
+	/* Register the /proc stuff */
+	ret = afs_proc_init(net);
+	if (ret < 0)
+		goto error_proc;
+
+	/* Initialise the cell DB */
+	ret = afs_cell_init(net, rootcell);
+	if (ret < 0)
+		goto error_cell_init;
+
+	/* Create the RxRPC transport */
+	ret = afs_open_socket(net);
+	if (ret < 0)
+		goto error_open_socket;
+
+	return 0;
+
+error_open_socket:
+	afs_vlocation_purge(net);
+	afs_cell_purge(net);
+error_cell_init:
+	afs_proc_cleanup(net);
+error_proc:
+	return ret;
+}
+
+/*
+ * Clean up and destroy an AFS network namespace record.
+ */
+static void __net_exit afs_net_exit(struct afs_net *net)
+{
+	net->live = false;
+	afs_close_socket(net);
+	afs_purge_servers(net);
+	afs_vlocation_purge(net);
+	afs_cell_purge(net);
+	afs_proc_cleanup(net);
+}
 
 /*
  * initialise the AFS client FS module
  */
 static int __init afs_init(void)
 {
-	int ret;
+	int ret = -ENOMEM;
 
 	printk(KERN_INFO "kAFS: Red Hat AFS client v0.1 registering.\n");
 
-	generate_random_uuid((unsigned char *)&afs_uuid);
-
-	/* create workqueue */
-	ret = -ENOMEM;
 	afs_wq = alloc_workqueue("afs", 0, 0);
 	if (!afs_wq)
-		return ret;
-
-	/* register the /proc stuff */
-	ret = afs_proc_init();
-	if (ret < 0)
-		goto error_proc;
+		goto error_afs_wq;
+	afs_async_calls = alloc_workqueue("kafsd", WQ_MEM_RECLAIM, 0);
+	if (!afs_async_calls)
+		goto error_async;
+	afs_vlocation_update_worker =
+		alloc_workqueue("kafs_vlupdated", WQ_MEM_RECLAIM, 0);
+	if (!afs_vlocation_update_worker)
+		goto error_vl_up;
+	afs_callback_update_worker =
+		alloc_ordered_workqueue("kafs_callbackd", WQ_MEM_RECLAIM);
+	if (!afs_callback_update_worker)
+		goto error_callback;
+	afs_lock_manager = alloc_workqueue("kafs_lockd", WQ_MEM_RECLAIM, 0);
+	if (!afs_lock_manager)
+		goto error_lockmgr;
 
 #ifdef CONFIG_AFS_FSCACHE
 	/* we want to be able to cache */
@@ -63,25 +137,9 @@ static int __init afs_init(void)
 		goto error_cache;
 #endif
 
-	/* initialise the cell DB */
-	ret = afs_cell_init(rootcell);
-	if (ret < 0)
-		goto error_cell_init;
-
-	/* initialise the VL update process */
-	ret = afs_vlocation_update_init();
-	if (ret < 0)
-		goto error_vl_update_init;
-
-	/* initialise the callback update process */
-	ret = afs_callback_update_init();
+	ret = afs_net_init(&__afs_net);
 	if (ret < 0)
-		goto error_callback_update_init;
-
-	/* create the RxRPC transport */
-	ret = afs_open_socket();
-	if (ret < 0)
-		goto error_open_socket;
+		goto error_net;
 
 	/* register the filesystems */
 	ret = afs_fs_init();
@@ -91,21 +149,22 @@ static int __init afs_init(void)
 	return ret;
 
 error_fs:
-	afs_close_socket();
-error_open_socket:
-	afs_callback_update_kill();
-error_callback_update_init:
-	afs_vlocation_purge();
-error_vl_update_init:
-	afs_cell_purge();
-error_cell_init:
+	afs_net_exit(&__afs_net);
+error_net:
 #ifdef CONFIG_AFS_FSCACHE
 	fscache_unregister_netfs(&afs_cache_netfs);
 error_cache:
 #endif
-	afs_proc_cleanup();
-error_proc:
+	destroy_workqueue(afs_lock_manager);
+error_lockmgr:
+	destroy_workqueue(afs_callback_update_worker);
+error_callback:
+	destroy_workqueue(afs_vlocation_update_worker);
+error_vl_up:
+	destroy_workqueue(afs_async_calls);
+error_async:
 	destroy_workqueue(afs_wq);
+error_afs_wq:
 	rcu_barrier();
 	printk(KERN_ERR "kAFS: failed to register: %d\n", ret);
 	return ret;
@@ -124,17 +183,15 @@ static void __exit afs_exit(void)
 	printk(KERN_INFO "kAFS: Red Hat AFS client v0.1 unregistering.\n");
 
 	afs_fs_exit();
-	afs_kill_lock_manager();
-	afs_close_socket();
-	afs_purge_servers();
-	afs_callback_update_kill();
-	afs_vlocation_purge();
-	destroy_workqueue(afs_wq);
-	afs_cell_purge();
+	afs_net_exit(&__afs_net);
 #ifdef CONFIG_AFS_FSCACHE
 	fscache_unregister_netfs(&afs_cache_netfs);
 #endif
-	afs_proc_cleanup();
+	destroy_workqueue(afs_lock_manager);
+	destroy_workqueue(afs_callback_update_worker);
+	destroy_workqueue(afs_vlocation_update_worker);
+	destroy_workqueue(afs_async_calls);
+	destroy_workqueue(afs_wq);
 	rcu_barrier();
 }
 
diff --git a/fs/afs/proc.c b/fs/afs/proc.c
index 35efb9a31dd7..c93433460348 100644
--- a/fs/afs/proc.c
+++ b/fs/afs/proc.c
@@ -17,8 +17,15 @@
 #include <linux/uaccess.h>
 #include "internal.h"
 
-static struct proc_dir_entry *proc_afs;
+static inline struct afs_net *afs_proc2net(struct file *f)
+{
+	return &__afs_net;
+}
 
+static inline struct afs_net *afs_seq2net(struct seq_file *m)
+{
+	return &__afs_net; // TODO: use seq_file_net(m)
+}
 
 static int afs_proc_cells_open(struct inode *inode, struct file *file);
 static void *afs_proc_cells_start(struct seq_file *p, loff_t *pos);
@@ -122,23 +129,23 @@ static const struct file_operations afs_proc_cell_servers_fops = {
 /*
  * initialise the /proc/fs/afs/ directory
  */
-int afs_proc_init(void)
+int afs_proc_init(struct afs_net *net)
 {
 	_enter("");
 
-	proc_afs = proc_mkdir("fs/afs", NULL);
-	if (!proc_afs)
+	net->proc_afs = proc_mkdir("fs/afs", NULL);
+	if (!net->proc_afs)
 		goto error_dir;
 
-	if (!proc_create("cells", 0644, proc_afs, &afs_proc_cells_fops) ||
-	    !proc_create("rootcell", 0644, proc_afs, &afs_proc_rootcell_fops))
+	if (!proc_create("cells", 0644, net->proc_afs, &afs_proc_cells_fops) ||
+	    !proc_create("rootcell", 0644, net->proc_afs, &afs_proc_rootcell_fops))
 		goto error_tree;
 
 	_leave(" = 0");
 	return 0;
 
 error_tree:
-	remove_proc_subtree("fs/afs", NULL);
+	proc_remove(net->proc_afs);
 error_dir:
 	_leave(" = -ENOMEM");
 	return -ENOMEM;
@@ -147,9 +154,10 @@ error_dir:
 /*
  * clean up the /proc/fs/afs/ directory
  */
-void afs_proc_cleanup(void)
+void afs_proc_cleanup(struct afs_net *net)
 {
-	remove_proc_subtree("fs/afs", NULL);
+	proc_remove(net->proc_afs);
+	net->proc_afs = NULL;
 }
 
 /*
@@ -176,25 +184,30 @@ static int afs_proc_cells_open(struct inode *inode, struct file *file)
  */
 static void *afs_proc_cells_start(struct seq_file *m, loff_t *_pos)
 {
-	/* lock the list against modification */
-	down_read(&afs_proc_cells_sem);
-	return seq_list_start_head(&afs_proc_cells, *_pos);
+	struct afs_net *net = afs_seq2net(m);
+
+	down_read(&net->proc_cells_sem);
+	return seq_list_start_head(&net->proc_cells, *_pos);
 }
 
 /*
  * move to next cell in cells list
  */
-static void *afs_proc_cells_next(struct seq_file *p, void *v, loff_t *pos)
+static void *afs_proc_cells_next(struct seq_file *m, void *v, loff_t *pos)
 {
-	return seq_list_next(v, &afs_proc_cells, pos);
+	struct afs_net *net = afs_seq2net(m);
+
+	return seq_list_next(v, &net->proc_cells, pos);
 }
 
 /*
  * clean up after reading from the cells list
  */
-static void afs_proc_cells_stop(struct seq_file *p, void *v)
+static void afs_proc_cells_stop(struct seq_file *m, void *v)
 {
-	up_read(&afs_proc_cells_sem);
+	struct afs_net *net = afs_seq2net(m);
+
+	up_read(&net->proc_cells_sem);
 }
 
 /*
@@ -203,8 +216,9 @@ static void afs_proc_cells_stop(struct seq_file *p, void *v)
 static int afs_proc_cells_show(struct seq_file *m, void *v)
 {
 	struct afs_cell *cell = list_entry(v, struct afs_cell, proc_link);
+	struct afs_net *net = afs_seq2net(m);
 
-	if (v == &afs_proc_cells) {
+	if (v == &net->proc_cells) {
 		/* display header on line 1 */
 		seq_puts(m, "USE NAME\n");
 		return 0;
@@ -223,6 +237,7 @@ static int afs_proc_cells_show(struct seq_file *m, void *v)
 static ssize_t afs_proc_cells_write(struct file *file, const char __user *buf,
 				    size_t size, loff_t *_pos)
 {
+	struct afs_net *net = afs_proc2net(file);
 	char *kbuf, *name, *args;
 	int ret;
 
@@ -264,7 +279,7 @@ static ssize_t afs_proc_cells_write(struct file *file, const char __user *buf,
 	if (strcmp(kbuf, "add") == 0) {
 		struct afs_cell *cell;
 
-		cell = afs_cell_create(name, strlen(name), args, false);
+		cell = afs_cell_create(net, name, strlen(name), args, false);
 		if (IS_ERR(cell)) {
 			ret = PTR_ERR(cell);
 			goto done;
@@ -303,6 +318,7 @@ static ssize_t afs_proc_rootcell_write(struct file *file,
 				       const char __user *buf,
 				       size_t size, loff_t *_pos)
 {
+	struct afs_net *net = afs_proc2net(file);
 	char *kbuf, *s;
 	int ret;
 
@@ -322,7 +338,7 @@ static ssize_t afs_proc_rootcell_write(struct file *file,
 	/* determine command to perform */
 	_debug("rootcell=%s", kbuf);
 
-	ret = afs_cell_init(kbuf);
+	ret = afs_cell_init(net, kbuf);
 	if (ret >= 0)
 		ret = size;	/* consume everything, always */
 
@@ -334,13 +350,13 @@ static ssize_t afs_proc_rootcell_write(struct file *file,
 /*
  * initialise /proc/fs/afs/<cell>/
  */
-int afs_proc_cell_setup(struct afs_cell *cell)
+int afs_proc_cell_setup(struct afs_net *net, struct afs_cell *cell)
 {
 	struct proc_dir_entry *dir;
 
 	_enter("%p{%s}", cell, cell->name);
 
-	dir = proc_mkdir(cell->name, proc_afs);
+	dir = proc_mkdir(cell->name, net->proc_afs);
 	if (!dir)
 		goto error_dir;
 
@@ -356,7 +372,7 @@ int afs_proc_cell_setup(struct afs_cell *cell)
 	return 0;
 
 error_tree:
-	remove_proc_subtree(cell->name, proc_afs);
+	remove_proc_subtree(cell->name, net->proc_afs);
 error_dir:
 	_leave(" = -ENOMEM");
 	return -ENOMEM;
@@ -365,11 +381,11 @@ error_dir:
 /*
  * remove /proc/fs/afs/<cell>/
  */
-void afs_proc_cell_remove(struct afs_cell *cell)
+void afs_proc_cell_remove(struct afs_net *net, struct afs_cell *cell)
 {
 	_enter("");
 
-	remove_proc_subtree(cell->name, proc_afs);
+	remove_proc_subtree(cell->name, net->proc_afs);
 
 	_leave("");
 }
diff --git a/fs/afs/rxrpc.c b/fs/afs/rxrpc.c
index 77f5420a1a24..656ceb285b85 100644
--- a/fs/afs/rxrpc.c
+++ b/fs/afs/rxrpc.c
@@ -17,10 +17,7 @@
 #include "internal.h"
 #include "afs_cm.h"
 
-struct socket *afs_socket; /* my RxRPC socket */
-static struct workqueue_struct *afs_async_calls;
-static struct afs_call *afs_spare_incoming_call;
-atomic_t afs_outstanding_calls;
+struct workqueue_struct *afs_async_calls;
 
 static void afs_wake_up_call_waiter(struct sock *, struct rxrpc_call *, unsigned long);
 static int afs_wait_for_call_to_complete(struct afs_call *);
@@ -37,15 +34,11 @@ static const struct afs_call_type afs_RXCMxxxx = {
 	.abort_to_error	= afs_abort_to_error,
 };
 
-static void afs_charge_preallocation(struct work_struct *);
-
-static DECLARE_WORK(afs_charge_preallocation_work, afs_charge_preallocation);
-
 /*
  * open an RxRPC socket and bind it to be a server for callback notifications
  * - the socket is left in blocking mode and non-blocking ops use MSG_DONTWAIT
  */
-int afs_open_socket(void)
+int afs_open_socket(struct afs_net *net)
 {
 	struct sockaddr_rxrpc srx;
 	struct socket *socket;
@@ -53,11 +46,6 @@ int afs_open_socket(void)
 
 	_enter("");
 
-	ret = -ENOMEM;
-	afs_async_calls = alloc_workqueue("kafsd", WQ_MEM_RECLAIM, 0);
-	if (!afs_async_calls)
-		goto error_0;
-
 	ret = sock_create_kern(&init_net, AF_RXRPC, SOCK_DGRAM, PF_INET, &socket);
 	if (ret < 0)
 		goto error_1;
@@ -85,16 +73,14 @@ int afs_open_socket(void)
 	if (ret < 0)
 		goto error_2;
 
-	afs_socket = socket;
-	afs_charge_preallocation(NULL);
+	net->socket = socket;
+	afs_charge_preallocation(&net->charge_preallocation_work);
 	_leave(" = 0");
 	return 0;
 
 error_2:
 	sock_release(socket);
 error_1:
-	destroy_workqueue(afs_async_calls);
-error_0:
 	_leave(" = %d", ret);
 	return ret;
 }
@@ -102,36 +88,36 @@ error_0:
 /*
  * close the RxRPC socket AFS was using
  */
-void afs_close_socket(void)
+void afs_close_socket(struct afs_net *net)
 {
 	_enter("");
 
-	kernel_listen(afs_socket, 0);
+	kernel_listen(net->socket, 0);
 	flush_workqueue(afs_async_calls);
 
-	if (afs_spare_incoming_call) {
-		afs_put_call(afs_spare_incoming_call);
-		afs_spare_incoming_call = NULL;
+	if (net->spare_incoming_call) {
+		afs_put_call(net->spare_incoming_call);
+		net->spare_incoming_call = NULL;
 	}
 
-	_debug("outstanding %u", atomic_read(&afs_outstanding_calls));
-	wait_on_atomic_t(&afs_outstanding_calls, atomic_t_wait,
+	_debug("outstanding %u", atomic_read(&net->nr_outstanding_calls));
+	wait_on_atomic_t(&net->nr_outstanding_calls, atomic_t_wait,
 			 TASK_UNINTERRUPTIBLE);
 	_debug("no outstanding calls");
 
-	kernel_sock_shutdown(afs_socket, SHUT_RDWR);
+	kernel_sock_shutdown(net->socket, SHUT_RDWR);
 	flush_workqueue(afs_async_calls);
-	sock_release(afs_socket);
+	sock_release(net->socket);
 
 	_debug("dework");
-	destroy_workqueue(afs_async_calls);
 	_leave("");
 }
 
 /*
  * Allocate a call.
  */
-static struct afs_call *afs_alloc_call(const struct afs_call_type *type,
+static struct afs_call *afs_alloc_call(struct afs_net *net,
+				       const struct afs_call_type *type,
 				       gfp_t gfp)
 {
 	struct afs_call *call;
@@ -142,11 +128,12 @@ static struct afs_call *afs_alloc_call(const struct afs_call_type *type,
 		return NULL;
 
 	call->type = type;
+	call->net = net;
 	atomic_set(&call->usage, 1);
 	INIT_WORK(&call->async_work, afs_process_async_call);
 	init_waitqueue_head(&call->waitq);
 
-	o = atomic_inc_return(&afs_outstanding_calls);
+	o = atomic_inc_return(&net->nr_outstanding_calls);
 	trace_afs_call(call, afs_call_trace_alloc, 1, o,
 		       __builtin_return_address(0));
 	return call;
@@ -157,8 +144,9 @@ static struct afs_call *afs_alloc_call(const struct afs_call_type *type,
  */
 void afs_put_call(struct afs_call *call)
 {
+	struct afs_net *net = call->net;
 	int n = atomic_dec_return(&call->usage);
-	int o = atomic_read(&afs_outstanding_calls);
+	int o = atomic_read(&net->nr_outstanding_calls);
 
 	trace_afs_call(call, afs_call_trace_put, n + 1, o,
 		       __builtin_return_address(0));
@@ -169,7 +157,7 @@ void afs_put_call(struct afs_call *call)
 		ASSERT(call->type->name != NULL);
 
 		if (call->rxcall) {
-			rxrpc_kernel_end_call(afs_socket, call->rxcall);
+			rxrpc_kernel_end_call(net->socket, call->rxcall);
 			call->rxcall = NULL;
 		}
 		if (call->type->destructor)
@@ -178,11 +166,11 @@ void afs_put_call(struct afs_call *call)
 		kfree(call->request);
 		kfree(call);
 
-		o = atomic_dec_return(&afs_outstanding_calls);
+		o = atomic_dec_return(&net->nr_outstanding_calls);
 		trace_afs_call(call, afs_call_trace_free, 0, o,
 			       __builtin_return_address(0));
 		if (o == 0)
-			wake_up_atomic_t(&afs_outstanding_calls);
+			wake_up_atomic_t(&net->nr_outstanding_calls);
 	}
 }
 
@@ -194,7 +182,7 @@ int afs_queue_call_work(struct afs_call *call)
 	int u = atomic_inc_return(&call->usage);
 
 	trace_afs_call(call, afs_call_trace_work, u,
-		       atomic_read(&afs_outstanding_calls),
+		       atomic_read(&call->net->nr_outstanding_calls),
 		       __builtin_return_address(0));
 
 	INIT_WORK(&call->work, call->type->work);
@@ -207,12 +195,13 @@ int afs_queue_call_work(struct afs_call *call)
 /*
  * allocate a call with flat request and reply buffers
  */
-struct afs_call *afs_alloc_flat_call(const struct afs_call_type *type,
+struct afs_call *afs_alloc_flat_call(struct afs_net *net,
+				     const struct afs_call_type *type,
 				     size_t request_size, size_t reply_max)
 {
 	struct afs_call *call;
 
-	call = afs_alloc_call(type, GFP_NOFS);
+	call = afs_alloc_call(net, type, GFP_NOFS);
 	if (!call)
 		goto nomem_call;
 
@@ -317,7 +306,7 @@ static int afs_send_pages(struct afs_call *call, struct msghdr *msg)
 		bytes = msg->msg_iter.count;
 		nr = msg->msg_iter.nr_segs;
 
-		ret = rxrpc_kernel_send_data(afs_socket, call->rxcall, msg,
+		ret = rxrpc_kernel_send_data(call->net->socket, call->rxcall, msg,
 					     bytes, afs_notify_end_request_tx);
 		for (loop = 0; loop < nr; loop++)
 			put_page(bv[loop].bv_page);
@@ -352,7 +341,7 @@ int afs_make_call(struct in_addr *addr, struct afs_call *call, gfp_t gfp,
 
 	_debug("____MAKE %p{%s,%x} [%d]____",
 	       call, call->type->name, key_serial(call->key),
-	       atomic_read(&afs_outstanding_calls));
+	       atomic_read(&call->net->nr_outstanding_calls));
 
 	call->async = async;
 
@@ -376,7 +365,7 @@ int afs_make_call(struct in_addr *addr, struct afs_call *call, gfp_t gfp,
 	}
 
 	/* create a call */
-	rxcall = rxrpc_kernel_begin_call(afs_socket, &srx, call->key,
+	rxcall = rxrpc_kernel_begin_call(call->net->socket, &srx, call->key,
 					 (unsigned long)call,
 					 tx_total_len, gfp,
 					 (async ?
@@ -410,7 +399,7 @@ int afs_make_call(struct in_addr *addr, struct afs_call *call, gfp_t gfp,
 	 */
 	if (!call->send_pages)
 		call->state = AFS_CALL_AWAIT_REPLY;
-	ret = rxrpc_kernel_send_data(afs_socket, rxcall,
+	ret = rxrpc_kernel_send_data(call->net->socket, rxcall,
 				     &msg, call->request_size,
 				     afs_notify_end_request_tx);
 	if (ret < 0)
@@ -432,13 +421,14 @@ int afs_make_call(struct in_addr *addr, struct afs_call *call, gfp_t gfp,
 error_do_abort:
 	call->state = AFS_CALL_COMPLETE;
 	if (ret != -ECONNABORTED) {
-		rxrpc_kernel_abort_call(afs_socket, rxcall, RX_USER_ABORT,
-					ret, "KSD");
+		rxrpc_kernel_abort_call(call->net->socket, rxcall,
+					RX_USER_ABORT, ret, "KSD");
 	} else {
 		abort_code = 0;
 		offset = 0;
-		rxrpc_kernel_recv_data(afs_socket, rxcall, NULL, 0, &offset,
-				       false, &abort_code, &call->service_id);
+		rxrpc_kernel_recv_data(call->net->socket, rxcall, NULL,
+				       0, &offset, false, &abort_code,
+				       &call->service_id);
 		ret = call->type->abort_to_error(abort_code);
 	}
 error_kill_call:
@@ -464,7 +454,8 @@ static void afs_deliver_to_call(struct afs_call *call)
 	       ) {
 		if (call->state == AFS_CALL_AWAIT_ACK) {
 			size_t offset = 0;
-			ret = rxrpc_kernel_recv_data(afs_socket, call->rxcall,
+			ret = rxrpc_kernel_recv_data(call->net->socket,
+						     call->rxcall,
 						     NULL, 0, &offset, false,
 						     &call->abort_code,
 						     &call->service_id);
@@ -492,12 +483,12 @@ static void afs_deliver_to_call(struct afs_call *call)
 			goto call_complete;
 		case -ENOTCONN:
 			abort_code = RX_CALL_DEAD;
-			rxrpc_kernel_abort_call(afs_socket, call->rxcall,
+			rxrpc_kernel_abort_call(call->net->socket, call->rxcall,
 						abort_code, ret, "KNC");
 			goto save_error;
 		case -ENOTSUPP:
 			abort_code = RXGEN_OPCODE;
-			rxrpc_kernel_abort_call(afs_socket, call->rxcall,
+			rxrpc_kernel_abort_call(call->net->socket, call->rxcall,
 						abort_code, ret, "KIV");
 			goto save_error;
 		case -ENODATA:
@@ -507,7 +498,7 @@ static void afs_deliver_to_call(struct afs_call *call)
 			abort_code = RXGEN_CC_UNMARSHAL;
 			if (call->state != AFS_CALL_AWAIT_REPLY)
 				abort_code = RXGEN_SS_UNMARSHAL;
-			rxrpc_kernel_abort_call(afs_socket, call->rxcall,
+			rxrpc_kernel_abort_call(call->net->socket, call->rxcall,
 						abort_code, -EBADMSG, "KUM");
 			goto save_error;
 		}
@@ -541,13 +532,13 @@ static int afs_wait_for_call_to_complete(struct afs_call *call)
 
 	_enter("");
 
-	rtt = rxrpc_kernel_get_rtt(afs_socket, call->rxcall);
+	rtt = rxrpc_kernel_get_rtt(call->net->socket, call->rxcall);
 	rtt2 = nsecs_to_jiffies64(rtt) * 2;
 	if (rtt2 < 2)
 		rtt2 = 2;
 
 	timeout = rtt2;
-	last_life = rxrpc_kernel_check_life(afs_socket, call->rxcall);
+	last_life = rxrpc_kernel_check_life(call->net->socket, call->rxcall);
 
 	add_wait_queue(&call->waitq, &myself);
 	for (;;) {
@@ -564,7 +555,7 @@ static int afs_wait_for_call_to_complete(struct afs_call *call)
 		if (call->state == AFS_CALL_COMPLETE)
 			break;
 
-		life = rxrpc_kernel_check_life(afs_socket, call->rxcall);
+		life = rxrpc_kernel_check_life(call->net->socket, call->rxcall);
 		if (timeout == 0 &&
 		    life == last_life && signal_pending(current))
 				break;
@@ -583,7 +574,7 @@ static int afs_wait_for_call_to_complete(struct afs_call *call)
 	/* Kill off the call if it's still live. */
 	if (call->state < AFS_CALL_COMPLETE) {
 		_debug("call interrupted");
-		rxrpc_kernel_abort_call(afs_socket, call->rxcall,
+		rxrpc_kernel_abort_call(call->net->socket, call->rxcall,
 					RX_USER_ABORT, -EINTR, "KWI");
 	}
 
@@ -621,7 +612,7 @@ static void afs_wake_up_async_call(struct sock *sk, struct rxrpc_call *rxcall,
 	u = __atomic_add_unless(&call->usage, 1, 0);
 	if (u != 0) {
 		trace_afs_call(call, afs_call_trace_wake, u,
-			       atomic_read(&afs_outstanding_calls),
+			       atomic_read(&call->net->nr_outstanding_calls),
 			       __builtin_return_address(0));
 
 		if (!queue_work(afs_async_calls, &call->async_work))
@@ -685,13 +676,15 @@ static void afs_rx_attach(struct rxrpc_call *rxcall, unsigned long user_call_ID)
 /*
  * Charge the incoming call preallocation.
  */
-static void afs_charge_preallocation(struct work_struct *work)
+void afs_charge_preallocation(struct work_struct *work)
 {
-	struct afs_call *call = afs_spare_incoming_call;
+	struct afs_net *net =
+		container_of(work, struct afs_net, charge_preallocation_work);
+	struct afs_call *call = net->spare_incoming_call;
 
 	for (;;) {
 		if (!call) {
-			call = afs_alloc_call(&afs_RXCMxxxx, GFP_KERNEL);
+			call = afs_alloc_call(net, &afs_RXCMxxxx, GFP_KERNEL);
 			if (!call)
 				break;
 
@@ -700,7 +693,7 @@ static void afs_charge_preallocation(struct work_struct *work)
 			init_waitqueue_head(&call->waitq);
 		}
 
-		if (rxrpc_kernel_charge_accept(afs_socket,
+		if (rxrpc_kernel_charge_accept(net->socket,
 					       afs_wake_up_async_call,
 					       afs_rx_attach,
 					       (unsigned long)call,
@@ -708,7 +701,7 @@ static void afs_charge_preallocation(struct work_struct *work)
 			break;
 		call = NULL;
 	}
-	afs_spare_incoming_call = call;
+	net->spare_incoming_call = call;
 }
 
 /*
@@ -729,7 +722,9 @@ static void afs_rx_discard_new_call(struct rxrpc_call *rxcall,
 static void afs_rx_new_call(struct sock *sk, struct rxrpc_call *rxcall,
 			    unsigned long user_call_ID)
 {
-	queue_work(afs_wq, &afs_charge_preallocation_work);
+	struct afs_net *net = afs_sock2net(sk);
+
+	queue_work(afs_wq, &net->charge_preallocation_work);
 }
 
 /*
@@ -784,11 +779,12 @@ static void afs_notify_end_reply_tx(struct sock *sock,
  */
 void afs_send_empty_reply(struct afs_call *call)
 {
+	struct afs_net *net = call->net;
 	struct msghdr msg;
 
 	_enter("");
 
-	rxrpc_kernel_set_tx_length(afs_socket, call->rxcall, 0);
+	rxrpc_kernel_set_tx_length(net->socket, call->rxcall, 0);
 
 	msg.msg_name		= NULL;
 	msg.msg_namelen		= 0;
@@ -798,7 +794,7 @@ void afs_send_empty_reply(struct afs_call *call)
 	msg.msg_flags		= 0;
 
 	call->state = AFS_CALL_AWAIT_ACK;
-	switch (rxrpc_kernel_send_data(afs_socket, call->rxcall, &msg, 0,
+	switch (rxrpc_kernel_send_data(net->socket, call->rxcall, &msg, 0,
 				       afs_notify_end_reply_tx)) {
 	case 0:
 		_leave(" [replied]");
@@ -806,7 +802,7 @@ void afs_send_empty_reply(struct afs_call *call)
 
 	case -ENOMEM:
 		_debug("oom");
-		rxrpc_kernel_abort_call(afs_socket, call->rxcall,
+		rxrpc_kernel_abort_call(net->socket, call->rxcall,
 					RX_USER_ABORT, -ENOMEM, "KOO");
 	default:
 		_leave(" [error]");
@@ -819,13 +815,14 @@ void afs_send_empty_reply(struct afs_call *call)
  */
 void afs_send_simple_reply(struct afs_call *call, const void *buf, size_t len)
 {
+	struct afs_net *net = call->net;
 	struct msghdr msg;
 	struct kvec iov[1];
 	int n;
 
 	_enter("");
 
-	rxrpc_kernel_set_tx_length(afs_socket, call->rxcall, len);
+	rxrpc_kernel_set_tx_length(net->socket, call->rxcall, len);
 
 	iov[0].iov_base		= (void *) buf;
 	iov[0].iov_len		= len;
@@ -837,7 +834,7 @@ void afs_send_simple_reply(struct afs_call *call, const void *buf, size_t len)
 	msg.msg_flags		= 0;
 
 	call->state = AFS_CALL_AWAIT_ACK;
-	n = rxrpc_kernel_send_data(afs_socket, call->rxcall, &msg, len,
+	n = rxrpc_kernel_send_data(net->socket, call->rxcall, &msg, len,
 				   afs_notify_end_reply_tx);
 	if (n >= 0) {
 		/* Success */
@@ -847,7 +844,7 @@ void afs_send_simple_reply(struct afs_call *call, const void *buf, size_t len)
 
 	if (n == -ENOMEM) {
 		_debug("oom");
-		rxrpc_kernel_abort_call(afs_socket, call->rxcall,
+		rxrpc_kernel_abort_call(net->socket, call->rxcall,
 					RX_USER_ABORT, -ENOMEM, "KOO");
 	}
 	_leave(" [error]");
@@ -859,6 +856,7 @@ void afs_send_simple_reply(struct afs_call *call, const void *buf, size_t len)
 int afs_extract_data(struct afs_call *call, void *buf, size_t count,
 		     bool want_more)
 {
+	struct afs_net *net = call->net;
 	int ret;
 
 	_enter("{%s,%zu},,%zu,%d",
@@ -866,7 +864,7 @@ int afs_extract_data(struct afs_call *call, void *buf, size_t count,
 
 	ASSERTCMP(call->offset, <=, count);
 
-	ret = rxrpc_kernel_recv_data(afs_socket, call->rxcall,
+	ret = rxrpc_kernel_recv_data(net->socket, call->rxcall,
 				     buf, count, &call->offset,
 				     want_more, &call->abort_code,
 				     &call->service_id);
diff --git a/fs/afs/server.c b/fs/afs/server.c
index c001b1f2455f..e47fd9bc0ddc 100644
--- a/fs/afs/server.c
+++ b/fs/afs/server.c
@@ -15,32 +15,22 @@
 
 static unsigned afs_server_timeout = 10;	/* server timeout in seconds */
 
-static void afs_reap_server(struct work_struct *);
-
-/* tree of all the servers, indexed by IP address */
-static struct rb_root afs_servers = RB_ROOT;
-static DEFINE_RWLOCK(afs_servers_lock);
-
-/* LRU list of all the servers not currently in use */
-static LIST_HEAD(afs_server_graveyard);
-static DEFINE_SPINLOCK(afs_server_graveyard_lock);
-static DECLARE_DELAYED_WORK(afs_server_reaper, afs_reap_server);
-
 /*
  * install a server record in the master tree
  */
 static int afs_install_server(struct afs_server *server)
 {
 	struct afs_server *xserver;
+	struct afs_net *net = server->cell->net;
 	struct rb_node **pp, *p;
 	int ret;
 
 	_enter("%p", server);
 
-	write_lock(&afs_servers_lock);
+	write_lock(&net->servers_lock);
 
 	ret = -EEXIST;
-	pp = &afs_servers.rb_node;
+	pp = &net->servers.rb_node;
 	p = NULL;
 	while (*pp) {
 		p = *pp;
@@ -55,11 +45,11 @@ static int afs_install_server(struct afs_server *server)
 	}
 
 	rb_link_node(&server->master_rb, p, pp);
-	rb_insert_color(&server->master_rb, &afs_servers);
+	rb_insert_color(&server->master_rb, &net->servers);
 	ret = 0;
 
 error:
-	write_unlock(&afs_servers_lock);
+	write_unlock(&net->servers_lock);
 	return ret;
 }
 
@@ -150,9 +140,9 @@ found_server_quickly:
 	read_unlock(&cell->servers_lock);
 no_longer_unused:
 	if (!list_empty(&server->grave)) {
-		spin_lock(&afs_server_graveyard_lock);
+		spin_lock(&cell->net->server_graveyard_lock);
 		list_del_init(&server->grave);
-		spin_unlock(&afs_server_graveyard_lock);
+		spin_unlock(&cell->net->server_graveyard_lock);
 	}
 	_leave(" = %p{%d}", server, atomic_read(&server->usage));
 	return server;
@@ -178,7 +168,8 @@ server_in_two_cells:
 /*
  * look up a server by its IP address
  */
-struct afs_server *afs_find_server(const struct sockaddr_rxrpc *srx)
+struct afs_server *afs_find_server(struct afs_net *net,
+				   const struct sockaddr_rxrpc *srx)
 {
 	struct afs_server *server = NULL;
 	struct rb_node *p;
@@ -191,9 +182,9 @@ struct afs_server *afs_find_server(const struct sockaddr_rxrpc *srx)
 		return NULL;
 	}
 
-	read_lock(&afs_servers_lock);
+	read_lock(&net->servers_lock);
 
-	p = afs_servers.rb_node;
+	p = net->servers.rb_node;
 	while (p) {
 		server = rb_entry(p, struct afs_server, master_rb);
 
@@ -211,7 +202,7 @@ struct afs_server *afs_find_server(const struct sockaddr_rxrpc *srx)
 
 	server = NULL;
 found:
-	read_unlock(&afs_servers_lock);
+	read_unlock(&net->servers_lock);
 	ASSERTIFCMP(server, server->addr.s_addr, ==, addr.s_addr);
 	_leave(" = %p", server);
 	return server;
@@ -223,6 +214,8 @@ found:
  */
 void afs_put_server(struct afs_server *server)
 {
+	struct afs_net *net = server->cell->net;
+
 	if (!server)
 		return;
 
@@ -239,14 +232,14 @@ void afs_put_server(struct afs_server *server)
 
 	afs_flush_callback_breaks(server);
 
-	spin_lock(&afs_server_graveyard_lock);
+	spin_lock(&net->server_graveyard_lock);
 	if (atomic_read(&server->usage) == 0) {
-		list_move_tail(&server->grave, &afs_server_graveyard);
+		list_move_tail(&server->grave, &net->server_graveyard);
 		server->time_of_death = ktime_get_real_seconds();
-		queue_delayed_work(afs_wq, &afs_server_reaper,
-				   afs_server_timeout * HZ);
+		queue_delayed_work(afs_wq, &net->server_reaper,
+				   net->live ? afs_server_timeout * HZ : 0);
 	}
-	spin_unlock(&afs_server_graveyard_lock);
+	spin_unlock(&net->server_graveyard_lock);
 	_leave(" [dead]");
 }
 
@@ -272,42 +265,45 @@ static void afs_destroy_server(struct afs_server *server)
 /*
  * reap dead server records
  */
-static void afs_reap_server(struct work_struct *work)
+void afs_reap_server(struct work_struct *work)
 {
 	LIST_HEAD(corpses);
 	struct afs_server *server;
+	struct afs_net *net = container_of(work, struct afs_net, server_reaper.work);
 	unsigned long delay, expiry;
 	time64_t now;
 
 	now = ktime_get_real_seconds();
-	spin_lock(&afs_server_graveyard_lock);
+	spin_lock(&net->server_graveyard_lock);
 
-	while (!list_empty(&afs_server_graveyard)) {
-		server = list_entry(afs_server_graveyard.next,
+	while (!list_empty(&net->server_graveyard)) {
+		server = list_entry(net->server_graveyard.next,
 				    struct afs_server, grave);
 
 		/* the queue is ordered most dead first */
-		expiry = server->time_of_death + afs_server_timeout;
-		if (expiry > now) {
-			delay = (expiry - now) * HZ;
-			mod_delayed_work(afs_wq, &afs_server_reaper, delay);
-			break;
+		if (net->live) {
+			expiry = server->time_of_death + afs_server_timeout;
+			if (expiry > now) {
+				delay = (expiry - now) * HZ;
+				mod_delayed_work(afs_wq, &net->server_reaper, delay);
+				break;
+			}
 		}
 
 		write_lock(&server->cell->servers_lock);
-		write_lock(&afs_servers_lock);
+		write_lock(&net->servers_lock);
 		if (atomic_read(&server->usage) > 0) {
 			list_del_init(&server->grave);
 		} else {
 			list_move_tail(&server->grave, &corpses);
 			list_del_init(&server->link);
-			rb_erase(&server->master_rb, &afs_servers);
+			rb_erase(&server->master_rb, &net->servers);
 		}
-		write_unlock(&afs_servers_lock);
+		write_unlock(&net->servers_lock);
 		write_unlock(&server->cell->servers_lock);
 	}
 
-	spin_unlock(&afs_server_graveyard_lock);
+	spin_unlock(&net->server_graveyard_lock);
 
 	/* now reap the corpses we've extracted */
 	while (!list_empty(&corpses)) {
@@ -318,10 +314,10 @@ static void afs_reap_server(struct work_struct *work)
 }
 
 /*
- * discard all the server records for rmmod
+ * Discard all the server records from a net namespace when it is destroyed or
+ * the afs module is removed.
  */
-void __exit afs_purge_servers(void)
+void __net_exit afs_purge_servers(struct afs_net *net)
 {
-	afs_server_timeout = 0;
-	mod_delayed_work(afs_wq, &afs_server_reaper, 0);
+	mod_delayed_work(afs_wq, &net->server_reaper, 0);
 }
diff --git a/fs/afs/super.c b/fs/afs/super.c
index 689173c0a682..d47a9bc46a69 100644
--- a/fs/afs/super.c
+++ b/fs/afs/super.c
@@ -25,11 +25,10 @@
 #include <linux/statfs.h>
 #include <linux/sched.h>
 #include <linux/nsproxy.h>
+#include <linux/magic.h>
 #include <net/net_namespace.h>
 #include "internal.h"
 
-#define AFS_FS_MAGIC 0x6B414653 /* 'kAFS' */
-
 static void afs_i_init_once(void *foo);
 static struct dentry *afs_mount(struct file_system_type *fs_type,
 		      int flags, const char *dev_name, void *data);
@@ -201,7 +200,8 @@ static int afs_parse_options(struct afs_mount_params *params,
 		token = match_token(p, afs_options_list, args);
 		switch (token) {
 		case afs_opt_cell:
-			cell = afs_cell_lookup(args[0].from,
+			cell = afs_cell_lookup(params->net,
+					       args[0].from,
 					       args[0].to - args[0].from,
 					       false);
 			if (IS_ERR(cell))
@@ -308,7 +308,7 @@ static int afs_parse_device_name(struct afs_mount_params *params,
 
 	/* lookup the cell record */
 	if (cellname || !params->cell) {
-		cell = afs_cell_lookup(cellname, cellnamesz, true);
+		cell = afs_cell_lookup(params->net, cellname, cellnamesz, true);
 		if (IS_ERR(cell)) {
 			printk(KERN_ERR "kAFS: unable to lookup cell '%*.*s'\n",
 			       cellnamesz, cellnamesz, cellname ?: "");
@@ -334,7 +334,7 @@ static int afs_test_super(struct super_block *sb, void *data)
 	struct afs_super_info *as1 = data;
 	struct afs_super_info *as = sb->s_fs_info;
 
-	return as->volume == as1->volume;
+	return as->net == as1->net && as->volume == as1->volume;
 }
 
 static int afs_set_super(struct super_block *sb, void *data)
@@ -411,6 +411,7 @@ static struct dentry *afs_mount(struct file_system_type *fs_type,
 	_enter(",,%s,%p", dev_name, options);
 
 	memset(&params, 0, sizeof(params));
+	params.net = &__afs_net;
 
 	ret = -EINVAL;
 	if (current->nsproxy->net_ns != &init_net)
@@ -444,36 +445,32 @@ static struct dentry *afs_mount(struct file_system_type *fs_type,
 	}
 
 	/* allocate a superblock info record */
+	ret = -ENOMEM;
 	as = kzalloc(sizeof(struct afs_super_info), GFP_KERNEL);
-	if (!as) {
-		ret = -ENOMEM;
-		afs_put_volume(vol);
-		goto error;
-	}
+	if (!as)
+		goto error_vol;
+
+	as->net = afs_get_net(params.net);
 	as->volume = vol;
 
 	/* allocate a deviceless superblock */
 	sb = sget(fs_type, afs_test_super, afs_set_super, flags, as);
 	if (IS_ERR(sb)) {
 		ret = PTR_ERR(sb);
-		afs_put_volume(vol);
-		kfree(as);
-		goto error;
+		goto error_as;
 	}
 
 	if (!sb->s_root) {
 		/* initial superblock/root creation */
 		_debug("create");
 		ret = afs_fill_super(sb, &params);
-		if (ret < 0) {
-			deactivate_locked_super(sb);
-			goto error;
-		}
+		if (ret < 0)
+			goto error_sb;
 		sb->s_flags |= MS_ACTIVE;
 	} else {
 		_debug("reuse");
 		ASSERTCMP(sb->s_flags, &, MS_ACTIVE);
-		afs_put_volume(vol);
+		afs_put_volume(params.net, vol);
 		kfree(as);
 	}
 
@@ -482,6 +479,14 @@ static struct dentry *afs_mount(struct file_system_type *fs_type,
 	_leave(" = 0 [%p]", sb);
 	return dget(sb->s_root);
 
+error_sb:
+	deactivate_locked_super(sb);
+	goto error;
+error_as:
+	afs_put_net(as->net);
+	kfree(as);
+error_vol:
+	afs_put_volume(params.net, vol);
 error:
 	afs_put_cell(params.cell);
 	key_put(params.key);
@@ -493,8 +498,10 @@ error:
 static void afs_kill_super(struct super_block *sb)
 {
 	struct afs_super_info *as = sb->s_fs_info;
+	struct afs_net *net = as->net;
+
 	kill_anon_super(sb);
-	afs_put_volume(as->volume);
+	afs_put_volume(net, as->volume);
 	kfree(as);
 }
 
diff --git a/fs/afs/vlclient.c b/fs/afs/vlclient.c
index a5e4cc561b6c..f5a043a9ba61 100644
--- a/fs/afs/vlclient.c
+++ b/fs/afs/vlclient.c
@@ -143,7 +143,8 @@ static const struct afs_call_type afs_RXVLGetEntryById = {
 /*
  * dispatch a get volume entry by name operation
  */
-int afs_vl_get_entry_by_name(struct in_addr *addr,
+int afs_vl_get_entry_by_name(struct afs_net *net,
+			     struct in_addr *addr,
 			     struct key *key,
 			     const char *volname,
 			     struct afs_cache_vlocation *entry,
@@ -159,7 +160,7 @@ int afs_vl_get_entry_by_name(struct in_addr *addr,
 	padsz = (4 - (volnamesz & 3)) & 3;
 	reqsz = 8 + volnamesz + padsz;
 
-	call = afs_alloc_flat_call(&afs_RXVLGetEntryByName, reqsz, 384);
+	call = afs_alloc_flat_call(net, &afs_RXVLGetEntryByName, reqsz, 384);
 	if (!call)
 		return -ENOMEM;
 
@@ -183,7 +184,8 @@ int afs_vl_get_entry_by_name(struct in_addr *addr,
 /*
  * dispatch a get volume entry by ID operation
  */
-int afs_vl_get_entry_by_id(struct in_addr *addr,
+int afs_vl_get_entry_by_id(struct afs_net *net,
+			   struct in_addr *addr,
 			   struct key *key,
 			   afs_volid_t volid,
 			   afs_voltype_t voltype,
@@ -195,7 +197,7 @@ int afs_vl_get_entry_by_id(struct in_addr *addr,
 
 	_enter("");
 
-	call = afs_alloc_flat_call(&afs_RXVLGetEntryById, 12, 384);
+	call = afs_alloc_flat_call(net, &afs_RXVLGetEntryById, 12, 384);
 	if (!call)
 		return -ENOMEM;
 
diff --git a/fs/afs/vlocation.c b/fs/afs/vlocation.c
index 37b7c3b342a6..ccb7aacfbeca 100644
--- a/fs/afs/vlocation.c
+++ b/fs/afs/vlocation.c
@@ -16,20 +16,11 @@
 #include <linux/sched.h>
 #include "internal.h"
 
+struct workqueue_struct *afs_vlocation_update_worker;
+
 static unsigned afs_vlocation_timeout = 10;	/* volume location timeout in seconds */
 static unsigned afs_vlocation_update_timeout = 10 * 60;
 
-static void afs_vlocation_reaper(struct work_struct *);
-static void afs_vlocation_updater(struct work_struct *);
-
-static LIST_HEAD(afs_vlocation_updates);
-static LIST_HEAD(afs_vlocation_graveyard);
-static DEFINE_SPINLOCK(afs_vlocation_updates_lock);
-static DEFINE_SPINLOCK(afs_vlocation_graveyard_lock);
-static DECLARE_DELAYED_WORK(afs_vlocation_reap, afs_vlocation_reaper);
-static DECLARE_DELAYED_WORK(afs_vlocation_update, afs_vlocation_updater);
-static struct workqueue_struct *afs_vlocation_update_worker;
-
 /*
  * iterate through the VL servers in a cell until one of them admits knowing
  * about the volume in question
@@ -52,8 +43,8 @@ static int afs_vlocation_access_vl_by_name(struct afs_vlocation *vl,
 		_debug("CellServ[%hu]: %08x", cell->vl_curr_svix, addr.s_addr);
 
 		/* attempt to access the VL server */
-		ret = afs_vl_get_entry_by_name(&addr, key, vl->vldb.name, vldb,
-					       false);
+		ret = afs_vl_get_entry_by_name(cell->net, &addr, key,
+					       vl->vldb.name, vldb, false);
 		switch (ret) {
 		case 0:
 			goto out;
@@ -110,8 +101,8 @@ static int afs_vlocation_access_vl_by_id(struct afs_vlocation *vl,
 		_debug("CellServ[%hu]: %08x", cell->vl_curr_svix, addr.s_addr);
 
 		/* attempt to access the VL server */
-		ret = afs_vl_get_entry_by_id(&addr, key, volid, voltype, vldb,
-					     false);
+		ret = afs_vl_get_entry_by_id(cell->net, &addr, key, volid,
+					     voltype, vldb, false);
 		switch (ret) {
 		case 0:
 			goto out;
@@ -335,7 +326,8 @@ static int afs_vlocation_fill_in_record(struct afs_vlocation *vl,
 /*
  * queue a vlocation record for updates
  */
-static void afs_vlocation_queue_for_updates(struct afs_vlocation *vl)
+static void afs_vlocation_queue_for_updates(struct afs_net *net,
+					    struct afs_vlocation *vl)
 {
 	struct afs_vlocation *xvl;
 
@@ -343,25 +335,25 @@ static void afs_vlocation_queue_for_updates(struct afs_vlocation *vl)
 	vl->update_at = ktime_get_real_seconds() +
 			afs_vlocation_update_timeout;
 
-	spin_lock(&afs_vlocation_updates_lock);
+	spin_lock(&net->vl_updates_lock);
 
-	if (!list_empty(&afs_vlocation_updates)) {
+	if (!list_empty(&net->vl_updates)) {
 		/* ... but wait at least 1 second more than the newest record
 		 * already queued so that we don't spam the VL server suddenly
 		 * with lots of requests
 		 */
-		xvl = list_entry(afs_vlocation_updates.prev,
+		xvl = list_entry(net->vl_updates.prev,
 				 struct afs_vlocation, update);
 		if (vl->update_at <= xvl->update_at)
 			vl->update_at = xvl->update_at + 1;
-	} else {
+	} else if (net->live) {
 		queue_delayed_work(afs_vlocation_update_worker,
-				   &afs_vlocation_update,
+				   &net->vl_updater,
 				   afs_vlocation_update_timeout * HZ);
 	}
 
-	list_add_tail(&vl->update, &afs_vlocation_updates);
-	spin_unlock(&afs_vlocation_updates_lock);
+	list_add_tail(&vl->update, &net->vl_updates);
+	spin_unlock(&net->vl_updates_lock);
 }
 
 /*
@@ -371,7 +363,8 @@ static void afs_vlocation_queue_for_updates(struct afs_vlocation *vl)
  * - lookup in the local cache if not able to find on the VL server
  * - insert/update in the local cache if did get a VL response
  */
-struct afs_vlocation *afs_vlocation_lookup(struct afs_cell *cell,
+struct afs_vlocation *afs_vlocation_lookup(struct afs_net *net,
+					   struct afs_cell *cell,
 					   struct key *key,
 					   const char *name,
 					   size_t namesz)
@@ -427,7 +420,7 @@ fill_in_record:
 #endif
 
 	/* schedule for regular updates */
-	afs_vlocation_queue_for_updates(vl);
+	afs_vlocation_queue_for_updates(net, vl);
 	goto success;
 
 found_in_memory:
@@ -436,9 +429,9 @@ found_in_memory:
 	atomic_inc(&vl->usage);
 	spin_unlock(&cell->vl_lock);
 	if (!list_empty(&vl->grave)) {
-		spin_lock(&afs_vlocation_graveyard_lock);
+		spin_lock(&net->vl_graveyard_lock);
 		list_del_init(&vl->grave);
-		spin_unlock(&afs_vlocation_graveyard_lock);
+		spin_unlock(&net->vl_graveyard_lock);
 	}
 	up_write(&cell->vl_sem);
 
@@ -481,7 +474,7 @@ error_abandon:
 	wake_up(&vl->waitq);
 error:
 	ASSERT(vl != NULL);
-	afs_put_vlocation(vl);
+	afs_put_vlocation(net, vl);
 	_leave(" = %d", ret);
 	return ERR_PTR(ret);
 }
@@ -489,7 +482,7 @@ error:
 /*
  * finish using a volume location record
  */
-void afs_put_vlocation(struct afs_vlocation *vl)
+void afs_put_vlocation(struct afs_net *net, struct afs_vlocation *vl)
 {
 	if (!vl)
 		return;
@@ -503,22 +496,22 @@ void afs_put_vlocation(struct afs_vlocation *vl)
 		return;
 	}
 
-	spin_lock(&afs_vlocation_graveyard_lock);
+	spin_lock(&net->vl_graveyard_lock);
 	if (atomic_read(&vl->usage) == 0) {
 		_debug("buried");
-		list_move_tail(&vl->grave, &afs_vlocation_graveyard);
+		list_move_tail(&vl->grave, &net->vl_graveyard);
 		vl->time_of_death = ktime_get_real_seconds();
-		queue_delayed_work(afs_wq, &afs_vlocation_reap,
+		queue_delayed_work(afs_wq, &net->vl_reaper,
 				   afs_vlocation_timeout * HZ);
 
 		/* suspend updates on this record */
 		if (!list_empty(&vl->update)) {
-			spin_lock(&afs_vlocation_updates_lock);
+			spin_lock(&net->vl_updates_lock);
 			list_del_init(&vl->update);
-			spin_unlock(&afs_vlocation_updates_lock);
+			spin_unlock(&net->vl_updates_lock);
 		}
 	}
-	spin_unlock(&afs_vlocation_graveyard_lock);
+	spin_unlock(&net->vl_graveyard_lock);
 	_leave(" [killed?]");
 }
 
@@ -539,31 +532,34 @@ static void afs_vlocation_destroy(struct afs_vlocation *vl)
 /*
  * reap dead volume location records
  */
-static void afs_vlocation_reaper(struct work_struct *work)
+void afs_vlocation_reaper(struct work_struct *work)
 {
 	LIST_HEAD(corpses);
 	struct afs_vlocation *vl;
+	struct afs_net *net = container_of(work, struct afs_net, vl_reaper.work);
 	unsigned long delay, expiry;
 	time64_t now;
 
 	_enter("");
 
 	now = ktime_get_real_seconds();
-	spin_lock(&afs_vlocation_graveyard_lock);
+	spin_lock(&net->vl_graveyard_lock);
 
-	while (!list_empty(&afs_vlocation_graveyard)) {
-		vl = list_entry(afs_vlocation_graveyard.next,
+	while (!list_empty(&net->vl_graveyard)) {
+		vl = list_entry(net->vl_graveyard.next,
 				struct afs_vlocation, grave);
 
 		_debug("check %p", vl);
 
 		/* the queue is ordered most dead first */
-		expiry = vl->time_of_death + afs_vlocation_timeout;
-		if (expiry > now) {
-			delay = (expiry - now) * HZ;
-			_debug("delay %lu", delay);
-			mod_delayed_work(afs_wq, &afs_vlocation_reap, delay);
-			break;
+		if (net->live) {
+			expiry = vl->time_of_death + afs_vlocation_timeout;
+			if (expiry > now) {
+				delay = (expiry - now) * HZ;
+				_debug("delay %lu", delay);
+				mod_delayed_work(afs_wq, &net->vl_reaper, delay);
+				break;
+			}
 		}
 
 		spin_lock(&vl->cell->vl_lock);
@@ -578,7 +574,7 @@ static void afs_vlocation_reaper(struct work_struct *work)
 		spin_unlock(&vl->cell->vl_lock);
 	}
 
-	spin_unlock(&afs_vlocation_graveyard_lock);
+	spin_unlock(&net->vl_graveyard_lock);
 
 	/* now reap the corpses we've extracted */
 	while (!list_empty(&corpses)) {
@@ -590,57 +586,47 @@ static void afs_vlocation_reaper(struct work_struct *work)
 	_leave("");
 }
 
-/*
- * initialise the VL update process
- */
-int __init afs_vlocation_update_init(void)
-{
-	afs_vlocation_update_worker = alloc_workqueue("kafs_vlupdated",
-						      WQ_MEM_RECLAIM, 0);
-	return afs_vlocation_update_worker ? 0 : -ENOMEM;
-}
-
 /*
  * discard all the volume location records for rmmod
  */
-void afs_vlocation_purge(void)
+void __net_exit afs_vlocation_purge(struct afs_net *net)
 {
-	afs_vlocation_timeout = 0;
-
-	spin_lock(&afs_vlocation_updates_lock);
-	list_del_init(&afs_vlocation_updates);
-	spin_unlock(&afs_vlocation_updates_lock);
-	mod_delayed_work(afs_vlocation_update_worker, &afs_vlocation_update, 0);
-	destroy_workqueue(afs_vlocation_update_worker);
-
-	mod_delayed_work(afs_wq, &afs_vlocation_reap, 0);
+	spin_lock(&net->vl_updates_lock);
+	list_del_init(&net->vl_updates);
+	spin_unlock(&net->vl_updates_lock);
+	mod_delayed_work(afs_vlocation_update_worker, &net->vl_updater, 0);
+	mod_delayed_work(afs_wq, &net->vl_reaper, 0);
 }
 
 /*
  * update a volume location
  */
-static void afs_vlocation_updater(struct work_struct *work)
+void afs_vlocation_updater(struct work_struct *work)
 {
 	struct afs_cache_vlocation vldb;
 	struct afs_vlocation *vl, *xvl;
+	struct afs_net *net = container_of(work, struct afs_net, vl_updater.work);
 	time64_t now;
 	long timeout;
 	int ret;
 
+	if (!net->live)
+		return;
+
 	_enter("");
 
 	now = ktime_get_real_seconds();
 
 	/* find a record to update */
-	spin_lock(&afs_vlocation_updates_lock);
+	spin_lock(&net->vl_updates_lock);
 	for (;;) {
-		if (list_empty(&afs_vlocation_updates)) {
-			spin_unlock(&afs_vlocation_updates_lock);
+		if (list_empty(&net->vl_updates) || !net->live) {
+			spin_unlock(&net->vl_updates_lock);
 			_leave(" [nothing]");
 			return;
 		}
 
-		vl = list_entry(afs_vlocation_updates.next,
+		vl = list_entry(net->vl_updates.next,
 				struct afs_vlocation, update);
 		if (atomic_read(&vl->usage) > 0)
 			break;
@@ -650,15 +636,15 @@ static void afs_vlocation_updater(struct work_struct *work)
 	timeout = vl->update_at - now;
 	if (timeout > 0) {
 		queue_delayed_work(afs_vlocation_update_worker,
-				   &afs_vlocation_update, timeout * HZ);
-		spin_unlock(&afs_vlocation_updates_lock);
+				   &net->vl_updater, timeout * HZ);
+		spin_unlock(&net->vl_updates_lock);
 		_leave(" [nothing]");
 		return;
 	}
 
 	list_del_init(&vl->update);
 	atomic_inc(&vl->usage);
-	spin_unlock(&afs_vlocation_updates_lock);
+	spin_unlock(&net->vl_updates_lock);
 
 	/* we can now perform the update */
 	_debug("update %s", vl->vldb.name);
@@ -688,18 +674,18 @@ static void afs_vlocation_updater(struct work_struct *work)
 	vl->update_at = ktime_get_real_seconds() +
 			afs_vlocation_update_timeout;
 
-	spin_lock(&afs_vlocation_updates_lock);
+	spin_lock(&net->vl_updates_lock);
 
-	if (!list_empty(&afs_vlocation_updates)) {
+	if (!list_empty(&net->vl_updates)) {
 		/* next update in 10 minutes, but wait at least 1 second more
 		 * than the newest record already queued so that we don't spam
 		 * the VL server suddenly with lots of requests
 		 */
-		xvl = list_entry(afs_vlocation_updates.prev,
+		xvl = list_entry(net->vl_updates.prev,
 				 struct afs_vlocation, update);
 		if (vl->update_at <= xvl->update_at)
 			vl->update_at = xvl->update_at + 1;
-		xvl = list_entry(afs_vlocation_updates.next,
+		xvl = list_entry(net->vl_updates.next,
 				 struct afs_vlocation, update);
 		timeout = xvl->update_at - now;
 		if (timeout < 0)
@@ -710,11 +696,10 @@ static void afs_vlocation_updater(struct work_struct *work)
 
 	ASSERT(list_empty(&vl->update));
 
-	list_add_tail(&vl->update, &afs_vlocation_updates);
+	list_add_tail(&vl->update, &net->vl_updates);
 
 	_debug("timeout %ld", timeout);
-	queue_delayed_work(afs_vlocation_update_worker,
-			   &afs_vlocation_update, timeout * HZ);
-	spin_unlock(&afs_vlocation_updates_lock);
-	afs_put_vlocation(vl);
+	queue_delayed_work(afs_vlocation_update_worker, &net->vl_updater, timeout * HZ);
+	spin_unlock(&net->vl_updates_lock);
+	afs_put_vlocation(net, vl);
 }
diff --git a/fs/afs/volume.c b/fs/afs/volume.c
index db73d6dad02b..3d5363e0b7e1 100644
--- a/fs/afs/volume.c
+++ b/fs/afs/volume.c
@@ -54,7 +54,7 @@ struct afs_volume *afs_volume_lookup(struct afs_mount_params *params)
 	       params->volnamesz, params->volnamesz, params->volname, params->rwpath);
 
 	/* lookup the volume location record */
-	vlocation = afs_vlocation_lookup(params->cell, params->key,
+	vlocation = afs_vlocation_lookup(params->net, params->cell, params->key,
 					 params->volname, params->volnamesz);
 	if (IS_ERR(vlocation)) {
 		ret = PTR_ERR(vlocation);
@@ -138,7 +138,7 @@ success:
 	_debug("kAFS selected %s volume %08x",
 	       afs_voltypes[volume->type], volume->vid);
 	up_write(&params->cell->vl_sem);
-	afs_put_vlocation(vlocation);
+	afs_put_vlocation(params->net, vlocation);
 	_leave(" = %p", volume);
 	return volume;
 
@@ -146,7 +146,7 @@ success:
 error_up:
 	up_write(&params->cell->vl_sem);
 error:
-	afs_put_vlocation(vlocation);
+	afs_put_vlocation(params->net, vlocation);
 	_leave(" = %d", ret);
 	return ERR_PTR(ret);
 
@@ -163,7 +163,7 @@ error_discard:
 /*
  * destroy a volume record
  */
-void afs_put_volume(struct afs_volume *volume)
+void afs_put_volume(struct afs_net *net, struct afs_volume *volume)
 {
 	struct afs_vlocation *vlocation;
 	int loop;
@@ -195,7 +195,7 @@ void afs_put_volume(struct afs_volume *volume)
 #ifdef CONFIG_AFS_FSCACHE
 	fscache_relinquish_cookie(volume->cache, 0);
 #endif
-	afs_put_vlocation(vlocation);
+	afs_put_vlocation(net, vlocation);
 
 	for (loop = volume->nservers - 1; loop >= 0; loop--)
 		afs_put_server(volume->servers[loop]);
-- 
cgit v1.2.3


From e3b2ffe0f0e1471854dc53bb69ff452e65cc88f2 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Thu, 2 Nov 2017 15:27:45 +0000
Subject: afs: Close the rxrpc socket only after purging the servers

Close the rxrpc socket only after we've purged the server records (and also
cell and volume records which might refer to servers) so that we can give
up the callbacks on each server.

Signed-off-by: David Howells <dhowells@redhat.com>
---
 fs/afs/main.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'fs/afs')

diff --git a/fs/afs/main.c b/fs/afs/main.c
index 87b1a9c8000d..6bd2f3a426de 100644
--- a/fs/afs/main.c
+++ b/fs/afs/main.c
@@ -96,10 +96,10 @@ error_proc:
 static void __net_exit afs_net_exit(struct afs_net *net)
 {
 	net->live = false;
-	afs_close_socket(net);
 	afs_purge_servers(net);
 	afs_vlocation_purge(net);
 	afs_cell_purge(net);
+	afs_close_socket(net);
 	afs_proc_cleanup(net);
 }
 
-- 
cgit v1.2.3


From 59fa1c4a9f528c2a1556f4b2cd4e055b560c1c0a Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Thu, 2 Nov 2017 15:27:45 +0000
Subject: afs: Fix server reaping

Fix server reaping and make sure it's all done before we start trying to
purge cells, given that servers currently pin cells.

Signed-off-by: David Howells <dhowells@redhat.com>
---
 fs/afs/internal.h |  5 ++++-
 fs/afs/main.c     |  3 ++-
 fs/afs/server.c   | 59 +++++++++++++++++++++++++++++++++++++++++++++++--------
 3 files changed, 57 insertions(+), 10 deletions(-)

(limited to 'fs/afs')

diff --git a/fs/afs/internal.h b/fs/afs/internal.h
index 53bd11d73469..bc9ded443b11 100644
--- a/fs/afs/internal.h
+++ b/fs/afs/internal.h
@@ -238,7 +238,9 @@ struct afs_net {
 	rwlock_t		servers_lock;
 	struct list_head	server_graveyard;	/* Inactive server LRU list */
 	spinlock_t		server_graveyard_lock;
-	struct delayed_work	server_reaper;
+	struct timer_list	server_timer;
+	struct work_struct	server_reaper;
+	atomic_t		servers_outstanding;
 
 	/* Misc */
 	struct proc_dir_entry	*proc_afs;		/* /proc/net/afs directory */
@@ -700,6 +702,7 @@ do {								\
 	atomic_inc(&(S)->usage);				\
 } while(0)
 
+extern void afs_server_timer(struct timer_list *);
 extern struct afs_server *afs_lookup_server(struct afs_cell *,
 					    const struct in_addr *);
 extern struct afs_server *afs_find_server(struct afs_net *,
diff --git a/fs/afs/main.c b/fs/afs/main.c
index 6bd2f3a426de..38e15b1f0eec 100644
--- a/fs/afs/main.c
+++ b/fs/afs/main.c
@@ -62,7 +62,8 @@ static int __net_init afs_net_init(struct afs_net *net)
 	rwlock_init(&net->servers_lock);
 	INIT_LIST_HEAD(&net->server_graveyard);
 	spin_lock_init(&net->server_graveyard_lock);
-	INIT_DELAYED_WORK(&net->server_reaper, afs_reap_server);
+	INIT_WORK(&net->server_reaper, afs_reap_server);
+	timer_setup(&net->server_timer, afs_server_timer, 0);
 
 	/* Register the /proc stuff */
 	ret = afs_proc_init(net);
diff --git a/fs/afs/server.c b/fs/afs/server.c
index e47fd9bc0ddc..33aeb527ac7e 100644
--- a/fs/afs/server.c
+++ b/fs/afs/server.c
@@ -15,6 +15,25 @@
 
 static unsigned afs_server_timeout = 10;	/* server timeout in seconds */
 
+static void afs_inc_servers_outstanding(struct afs_net *net)
+{
+	atomic_inc(&net->servers_outstanding);
+}
+
+static void afs_dec_servers_outstanding(struct afs_net *net)
+{
+	if (atomic_dec_and_test(&net->servers_outstanding))
+		wake_up_atomic_t(&net->servers_outstanding);
+}
+
+void afs_server_timer(struct timer_list *timer)
+{
+	struct afs_net *net = container_of(timer, struct afs_net, server_timer);
+
+	if (!queue_work(afs_wq, &net->server_reaper))
+		afs_dec_servers_outstanding(net);
+}
+
 /*
  * install a server record in the master tree
  */
@@ -81,6 +100,7 @@ static struct afs_server *afs_alloc_server(struct afs_cell *cell,
 
 		memcpy(&server->addr, addr, sizeof(struct in_addr));
 		server->addr.s_addr = addr->s_addr;
+		afs_inc_servers_outstanding(cell->net);
 		_leave(" = %p{%d}", server, atomic_read(&server->usage));
 	} else {
 		_leave(" = NULL [nomem]");
@@ -159,6 +179,7 @@ found_server:
 server_in_two_cells:
 	write_unlock(&cell->servers_lock);
 	kfree(candidate);
+	afs_dec_servers_outstanding(cell->net);
 	printk(KERN_NOTICE "kAFS: Server %pI4 appears to be in two cells\n",
 	       addr);
 	_leave(" = -EEXIST");
@@ -208,6 +229,18 @@ found:
 	return server;
 }
 
+static void afs_set_server_timer(struct afs_net *net, time64_t delay)
+{
+	afs_inc_servers_outstanding(net);
+	if (net->live) {
+		if (timer_reduce(&net->server_timer, jiffies + delay * HZ))
+			afs_dec_servers_outstanding(net);
+	} else {
+		if (!queue_work(afs_wq, &net->server_reaper))
+			afs_dec_servers_outstanding(net);
+	}
+}
+
 /*
  * destroy a server record
  * - removes from the cell list
@@ -236,8 +269,7 @@ void afs_put_server(struct afs_server *server)
 	if (atomic_read(&server->usage) == 0) {
 		list_move_tail(&server->grave, &net->server_graveyard);
 		server->time_of_death = ktime_get_real_seconds();
-		queue_delayed_work(afs_wq, &net->server_reaper,
-				   net->live ? afs_server_timeout * HZ : 0);
+		afs_set_server_timer(net, afs_server_timeout);
 	}
 	spin_unlock(&net->server_graveyard_lock);
 	_leave(" [dead]");
@@ -246,7 +278,7 @@ void afs_put_server(struct afs_server *server)
 /*
  * destroy a dead server
  */
-static void afs_destroy_server(struct afs_server *server)
+static void afs_destroy_server(struct afs_net *net, struct afs_server *server)
 {
 	_enter("%p", server);
 
@@ -260,6 +292,7 @@ static void afs_destroy_server(struct afs_server *server)
 
 	afs_put_cell(server->cell);
 	kfree(server);
+	afs_dec_servers_outstanding(net);
 }
 
 /*
@@ -269,7 +302,7 @@ void afs_reap_server(struct work_struct *work)
 {
 	LIST_HEAD(corpses);
 	struct afs_server *server;
-	struct afs_net *net = container_of(work, struct afs_net, server_reaper.work);
+	struct afs_net *net = container_of(work, struct afs_net, server_reaper);
 	unsigned long delay, expiry;
 	time64_t now;
 
@@ -284,8 +317,8 @@ void afs_reap_server(struct work_struct *work)
 		if (net->live) {
 			expiry = server->time_of_death + afs_server_timeout;
 			if (expiry > now) {
-				delay = (expiry - now) * HZ;
-				mod_delayed_work(afs_wq, &net->server_reaper, delay);
+				delay = (expiry - now);
+				afs_set_server_timer(net, delay);
 				break;
 			}
 		}
@@ -309,8 +342,10 @@ void afs_reap_server(struct work_struct *work)
 	while (!list_empty(&corpses)) {
 		server = list_entry(corpses.next, struct afs_server, grave);
 		list_del(&server->grave);
-		afs_destroy_server(server);
+		afs_destroy_server(net, server);
 	}
+
+	afs_dec_servers_outstanding(net);
 }
 
 /*
@@ -319,5 +354,13 @@ void afs_reap_server(struct work_struct *work)
  */
 void __net_exit afs_purge_servers(struct afs_net *net)
 {
-	mod_delayed_work(afs_wq, &net->server_reaper, 0);
+	if (del_timer_sync(&net->server_timer))
+		atomic_dec(&net->servers_outstanding);
+
+	afs_inc_servers_outstanding(net);
+	if (!queue_work(afs_wq, &net->server_reaper))
+		afs_dec_servers_outstanding(net);
+
+	wait_on_atomic_t(&net->servers_outstanding, atomic_t_wait,
+			 TASK_UNINTERRUPTIBLE);
 }
-- 
cgit v1.2.3


From 49566f6f06b38d7c1a5c7eacc8a38c6ea2e36549 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Thu, 2 Nov 2017 15:27:46 +0000
Subject: afs: Note the cell in the superblock info also

Keep a reference to the cell in the superblock info structure in addition
to the volume and net pointers.  This will make it easier to clean up in a
future patch in which afs_put_volume() will need the cell pointer.

Whilst we're at it, make the cell and volume getting functions return a
pointer to the object got to make the call sites look neater.

Signed-off-by: David Howells <dhowells@redhat.com>
---
 fs/afs/internal.h | 15 +++++++++++++--
 fs/afs/super.c    | 57 ++++++++++++++++++++++++++++++++++---------------------
 2 files changed, 48 insertions(+), 24 deletions(-)

(limited to 'fs/afs')

diff --git a/fs/afs/internal.h b/fs/afs/internal.h
index bc9ded443b11..2d90cb7605f3 100644
--- a/fs/afs/internal.h
+++ b/fs/afs/internal.h
@@ -178,6 +178,7 @@ struct afs_writeback {
  */
 struct afs_super_info {
 	struct afs_net		*net;		/* Network namespace */
+	struct afs_cell		*cell;		/* The cell in which the volume resides */
 	struct afs_volume	*volume;	/* volume record */
 	char			rwparent;	/* T if parent is R/W AFS volume */
 };
@@ -502,7 +503,12 @@ extern void afs_flush_callback_breaks(struct afs_server *);
 /*
  * cell.c
  */
-#define afs_get_cell(C) do { atomic_inc(&(C)->usage); } while(0)
+static inline struct afs_cell *afs_get_cell(struct afs_cell *cell)
+{
+	if (cell)
+		atomic_inc(&cell->usage);
+	return cell;
+}
 extern int afs_cell_init(struct afs_net *, char *);
 extern struct afs_cell *afs_cell_create(struct afs_net *, const char *, unsigned, char *, bool);
 extern struct afs_cell *afs_cell_lookup(struct afs_net *, const char *, unsigned, bool);
@@ -789,7 +795,12 @@ extern int afs_vnode_release_lock(struct afs_vnode *, struct key *);
 /*
  * volume.c
  */
-#define afs_get_volume(V) do { atomic_inc(&(V)->usage); } while(0)
+static inline struct afs_volume *afs_get_volume(struct afs_volume *volume)
+{
+	if (volume)
+		atomic_inc(&volume->usage);
+	return volume;
+}
 
 extern void afs_put_volume(struct afs_net *, struct afs_volume *);
 extern struct afs_volume *afs_volume_lookup(struct afs_mount_params *);
diff --git a/fs/afs/super.c b/fs/afs/super.c
index d47a9bc46a69..e43f94ecc391 100644
--- a/fs/afs/super.c
+++ b/fs/afs/super.c
@@ -394,17 +394,38 @@ error:
 	return ret;
 }
 
+static struct afs_super_info *afs_alloc_sbi(struct afs_mount_params *params)
+{
+	struct afs_super_info *as;
+
+	as = kzalloc(sizeof(struct afs_super_info), GFP_KERNEL);
+	if (as) {
+		as->net = afs_get_net(params->net);
+		as->cell = afs_get_cell(params->cell);
+	}
+	return as;
+}
+
+static void afs_destroy_sbi(struct afs_super_info *as)
+{
+	if (as) {
+		afs_put_volume(as->net, as->volume);
+		afs_put_cell(as->cell);
+		afs_put_net(as->net);
+		kfree(as);
+	}
+}
+
 /*
  * get an AFS superblock
  */
 static struct dentry *afs_mount(struct file_system_type *fs_type,
-		      int flags, const char *dev_name, void *options)
+				int flags, const char *dev_name, void *options)
 {
 	struct afs_mount_params params;
 	struct super_block *sb;
 	struct afs_volume *vol;
 	struct key *key;
-	char *new_opts = kstrdup(options, GFP_KERNEL);
 	struct afs_super_info *as;
 	int ret;
 
@@ -437,20 +458,18 @@ static struct dentry *afs_mount(struct file_system_type *fs_type,
 	}
 	params.key = key;
 
+	/* allocate a superblock info record */
+	ret = -ENOMEM;
+	as = afs_alloc_sbi(&params);
+	if (!as)
+		goto error;
+
 	/* parse the device name */
 	vol = afs_volume_lookup(&params);
 	if (IS_ERR(vol)) {
 		ret = PTR_ERR(vol);
 		goto error;
 	}
-
-	/* allocate a superblock info record */
-	ret = -ENOMEM;
-	as = kzalloc(sizeof(struct afs_super_info), GFP_KERNEL);
-	if (!as)
-		goto error_vol;
-
-	as->net = afs_get_net(params.net);
 	as->volume = vol;
 
 	/* allocate a deviceless superblock */
@@ -466,31 +485,27 @@ static struct dentry *afs_mount(struct file_system_type *fs_type,
 		ret = afs_fill_super(sb, &params);
 		if (ret < 0)
 			goto error_sb;
+		as = NULL;
 		sb->s_flags |= MS_ACTIVE;
 	} else {
 		_debug("reuse");
 		ASSERTCMP(sb->s_flags, &, MS_ACTIVE);
-		afs_put_volume(params.net, vol);
-		kfree(as);
+		afs_destroy_sbi(as);
+		as = NULL;
 	}
 
 	afs_put_cell(params.cell);
-	kfree(new_opts);
+	key_put(params.key);
 	_leave(" = 0 [%p]", sb);
 	return dget(sb->s_root);
 
 error_sb:
 	deactivate_locked_super(sb);
-	goto error;
 error_as:
-	afs_put_net(as->net);
-	kfree(as);
-error_vol:
-	afs_put_volume(params.net, vol);
+	afs_destroy_sbi(as);
 error:
 	afs_put_cell(params.cell);
 	key_put(params.key);
-	kfree(new_opts);
 	_leave(" = %d", ret);
 	return ERR_PTR(ret);
 }
@@ -498,11 +513,9 @@ error:
 static void afs_kill_super(struct super_block *sb)
 {
 	struct afs_super_info *as = sb->s_fs_info;
-	struct afs_net *net = as->net;
 
 	kill_anon_super(sb);
-	afs_put_volume(net, as->volume);
-	kfree(as);
+	afs_destroy_sbi(as);
 }
 
 /*
-- 
cgit v1.2.3


From 9ed900b1160ef306bc74ad0228d7ab199234c758 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Thu, 2 Nov 2017 15:27:46 +0000
Subject: afs: Push the net ns pointer to more places

Push the network namespace pointer to more places in AFS, including the
afs_server structure (which doesn't hold a ref on the netns).

In particular, afs_put_cell() now takes requires a net ns parameter so that
it can safely alter the netns after decrementing the cell usage count - the
cell will be deallocated by a background thread after being cached for a
period, which means that it's not safe to access it after reducing its
usage count.

Signed-off-by: David Howells <dhowells@redhat.com>
---
 fs/afs/cell.c      | 14 +++++++-------
 fs/afs/cmservice.c |  2 +-
 fs/afs/dir.c       | 12 ++++++------
 fs/afs/inode.c     |  2 +-
 fs/afs/internal.h  |  7 ++++---
 fs/afs/proc.c      |  2 +-
 fs/afs/server.c    |  7 +++----
 fs/afs/super.c     | 12 ++++++------
 fs/afs/vlocation.c |  6 +++---
 fs/afs/vnode.c     | 28 ++++++++++++++--------------
 fs/afs/volume.c    | 20 ++++++++++----------
 11 files changed, 56 insertions(+), 56 deletions(-)

(limited to 'fs/afs')

diff --git a/fs/afs/cell.c b/fs/afs/cell.c
index bd570fa539a0..2224e335eed7 100644
--- a/fs/afs/cell.c
+++ b/fs/afs/cell.c
@@ -251,7 +251,7 @@ int afs_cell_init(struct afs_net *net, char *rootcell)
 	old_root = net->ws_cell;
 	net->ws_cell = new_root;
 	write_unlock(&net->cells_lock);
-	afs_put_cell(old_root);
+	afs_put_cell(net, old_root);
 
 	_leave(" = 0");
 	return 0;
@@ -336,7 +336,7 @@ struct afs_cell *afs_get_cell_maybe(struct afs_cell *cell)
 /*
  * destroy a cell record
  */
-void afs_put_cell(struct afs_cell *cell)
+void afs_put_cell(struct afs_net *net, struct afs_cell *cell)
 {
 	if (!cell)
 		return;
@@ -347,10 +347,10 @@ void afs_put_cell(struct afs_cell *cell)
 
 	/* to prevent a race, the decrement and the dequeue must be effectively
 	 * atomic */
-	write_lock(&cell->net->cells_lock);
+	write_lock(&net->cells_lock);
 
 	if (likely(!atomic_dec_and_test(&cell->usage))) {
-		write_unlock(&cell->net->cells_lock);
+		write_unlock(&net->cells_lock);
 		_leave("");
 		return;
 	}
@@ -358,9 +358,9 @@ void afs_put_cell(struct afs_cell *cell)
 	ASSERT(list_empty(&cell->servers));
 	ASSERT(list_empty(&cell->vl_list));
 
-	wake_up(&cell->net->cells_freeable_wq);
+	wake_up(&net->cells_freeable_wq);
 
-	write_unlock(&cell->net->cells_lock);
+	write_unlock(&net->cells_lock);
 
 	_leave(" [unused]");
 }
@@ -424,7 +424,7 @@ void afs_cell_purge(struct afs_net *net)
 
 	_enter("");
 
-	afs_put_cell(net->ws_cell);
+	afs_put_cell(net, net->ws_cell);
 
 	down_write(&net->cells_sem);
 
diff --git a/fs/afs/cmservice.c b/fs/afs/cmservice.c
index 30ce4be4165f..9ad39f8a7e87 100644
--- a/fs/afs/cmservice.c
+++ b/fs/afs/cmservice.c
@@ -151,7 +151,7 @@ static void afs_cm_destructor(struct afs_call *call)
 		afs_break_callbacks(call->server, call->count, call->request);
 	}
 
-	afs_put_server(call->server);
+	afs_put_server(call->net, call->server);
 	call->server = NULL;
 	kfree(call->buffer);
 	call->buffer = NULL;
diff --git a/fs/afs/dir.c b/fs/afs/dir.c
index 613a77058263..97ec6a74589e 100644
--- a/fs/afs/dir.c
+++ b/fs/afs/dir.c
@@ -771,7 +771,7 @@ static int afs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
 	vnode->update_cnt++;
 	spin_unlock(&vnode->lock);
 	afs_vnode_finalise_status_update(vnode, server);
-	afs_put_server(server);
+	afs_put_server(afs_i2net(dir), server);
 
 	d_instantiate(dentry, inode);
 	if (d_unhashed(dentry)) {
@@ -783,7 +783,7 @@ static int afs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
 	return 0;
 
 iget_error:
-	afs_put_server(server);
+	afs_put_server(afs_i2net(dir), server);
 mkdir_error:
 	key_put(key);
 error:
@@ -948,7 +948,7 @@ static int afs_create(struct inode *dir, struct dentry *dentry, umode_t mode,
 	vnode->update_cnt++;
 	spin_unlock(&vnode->lock);
 	afs_vnode_finalise_status_update(vnode, server);
-	afs_put_server(server);
+	afs_put_server(afs_i2net(dir), server);
 
 	d_instantiate(dentry, inode);
 	if (d_unhashed(dentry)) {
@@ -960,7 +960,7 @@ static int afs_create(struct inode *dir, struct dentry *dentry, umode_t mode,
 	return 0;
 
 iget_error:
-	afs_put_server(server);
+	afs_put_server(afs_i2net(dir), server);
 create_error:
 	key_put(key);
 error:
@@ -1060,7 +1060,7 @@ static int afs_symlink(struct inode *dir, struct dentry *dentry,
 	vnode->update_cnt++;
 	spin_unlock(&vnode->lock);
 	afs_vnode_finalise_status_update(vnode, server);
-	afs_put_server(server);
+	afs_put_server(afs_i2net(dir), server);
 
 	d_instantiate(dentry, inode);
 	if (d_unhashed(dentry)) {
@@ -1072,7 +1072,7 @@ static int afs_symlink(struct inode *dir, struct dentry *dentry,
 	return 0;
 
 iget_error:
-	afs_put_server(server);
+	afs_put_server(afs_i2net(dir), server);
 create_error:
 	key_put(key);
 error:
diff --git a/fs/afs/inode.c b/fs/afs/inode.c
index 342316a9e3e0..fbb441d25022 100644
--- a/fs/afs/inode.c
+++ b/fs/afs/inode.c
@@ -437,7 +437,7 @@ void afs_evict_inode(struct inode *inode)
 		spin_lock(&vnode->server->fs_lock);
 		rb_erase(&vnode->server_rb, &vnode->server->fs_vnodes);
 		spin_unlock(&vnode->server->fs_lock);
-		afs_put_server(vnode->server);
+		afs_put_server(afs_i2net(inode), vnode->server);
 		vnode->server = NULL;
 	}
 
diff --git a/fs/afs/internal.h b/fs/afs/internal.h
index 2d90cb7605f3..7cd30ae71f91 100644
--- a/fs/afs/internal.h
+++ b/fs/afs/internal.h
@@ -335,6 +335,7 @@ struct afs_server {
 	atomic_t		usage;
 	time64_t		time_of_death;	/* time at which put reduced usage to 0 */
 	struct in_addr		addr;		/* server address */
+	struct afs_net		*net;		/* Network namespace in which the server resides */
 	struct afs_cell		*cell;		/* cell in which server resides */
 	struct list_head	link;		/* link in cell's server list */
 	struct list_head	grave;		/* link in master graveyard list */
@@ -513,7 +514,7 @@ extern int afs_cell_init(struct afs_net *, char *);
 extern struct afs_cell *afs_cell_create(struct afs_net *, const char *, unsigned, char *, bool);
 extern struct afs_cell *afs_cell_lookup(struct afs_net *, const char *, unsigned, bool);
 extern struct afs_cell *afs_grab_cell(struct afs_cell *);
-extern void afs_put_cell(struct afs_cell *);
+extern void afs_put_cell(struct afs_net *, struct afs_cell *);
 extern void __net_exit afs_cell_purge(struct afs_net *);
 
 /*
@@ -713,7 +714,7 @@ extern struct afs_server *afs_lookup_server(struct afs_cell *,
 					    const struct in_addr *);
 extern struct afs_server *afs_find_server(struct afs_net *,
 					  const struct sockaddr_rxrpc *);
-extern void afs_put_server(struct afs_server *);
+extern void afs_put_server(struct afs_net *, struct afs_server *);
 extern void afs_reap_server(struct work_struct *);
 extern void __net_exit afs_purge_servers(struct afs_net *);
 
@@ -802,7 +803,7 @@ static inline struct afs_volume *afs_get_volume(struct afs_volume *volume)
 	return volume;
 }
 
-extern void afs_put_volume(struct afs_net *, struct afs_volume *);
+extern void afs_put_volume(struct afs_cell *, struct afs_volume *);
 extern struct afs_volume *afs_volume_lookup(struct afs_mount_params *);
 extern struct afs_server *afs_volume_pick_fileserver(struct afs_vnode *);
 extern int afs_volume_release_fileserver(struct afs_vnode *,
diff --git a/fs/afs/proc.c b/fs/afs/proc.c
index c93433460348..677a453b08bf 100644
--- a/fs/afs/proc.c
+++ b/fs/afs/proc.c
@@ -285,7 +285,7 @@ static ssize_t afs_proc_cells_write(struct file *file, const char __user *buf,
 			goto done;
 		}
 
-		afs_put_cell(cell);
+		afs_put_cell(net, cell);
 		printk("kAFS: Added new cell '%s'\n", name);
 	} else {
 		goto inval;
diff --git a/fs/afs/server.c b/fs/afs/server.c
index 33aeb527ac7e..d8044be913f0 100644
--- a/fs/afs/server.c
+++ b/fs/afs/server.c
@@ -85,6 +85,7 @@ static struct afs_server *afs_alloc_server(struct afs_cell *cell,
 	server = kzalloc(sizeof(struct afs_server), GFP_KERNEL);
 	if (server) {
 		atomic_set(&server->usage, 1);
+		server->net = cell->net;
 		server->cell = cell;
 
 		INIT_LIST_HEAD(&server->link);
@@ -245,10 +246,8 @@ static void afs_set_server_timer(struct afs_net *net, time64_t delay)
  * destroy a server record
  * - removes from the cell list
  */
-void afs_put_server(struct afs_server *server)
+void afs_put_server(struct afs_net *net, struct afs_server *server)
 {
-	struct afs_net *net = server->cell->net;
-
 	if (!server)
 		return;
 
@@ -290,7 +289,7 @@ static void afs_destroy_server(struct afs_net *net, struct afs_server *server)
 	ASSERTCMP(server->cb_break_head, ==, server->cb_break_tail);
 	ASSERTCMP(atomic_read(&server->cb_break_n), ==, 0);
 
-	afs_put_cell(server->cell);
+	afs_put_cell(server->net, server->cell);
 	kfree(server);
 	afs_dec_servers_outstanding(net);
 }
diff --git a/fs/afs/super.c b/fs/afs/super.c
index e43f94ecc391..dd218f370359 100644
--- a/fs/afs/super.c
+++ b/fs/afs/super.c
@@ -206,7 +206,7 @@ static int afs_parse_options(struct afs_mount_params *params,
 					       false);
 			if (IS_ERR(cell))
 				return PTR_ERR(cell);
-			afs_put_cell(params->cell);
+			afs_put_cell(params->net, params->cell);
 			params->cell = cell;
 			break;
 
@@ -314,7 +314,7 @@ static int afs_parse_device_name(struct afs_mount_params *params,
 			       cellnamesz, cellnamesz, cellname ?: "");
 			return PTR_ERR(cell);
 		}
-		afs_put_cell(params->cell);
+		afs_put_cell(params->net, params->cell);
 		params->cell = cell;
 	}
 
@@ -409,8 +409,8 @@ static struct afs_super_info *afs_alloc_sbi(struct afs_mount_params *params)
 static void afs_destroy_sbi(struct afs_super_info *as)
 {
 	if (as) {
-		afs_put_volume(as->net, as->volume);
-		afs_put_cell(as->cell);
+		afs_put_volume(as->cell, as->volume);
+		afs_put_cell(as->net, as->cell);
 		afs_put_net(as->net);
 		kfree(as);
 	}
@@ -494,7 +494,7 @@ static struct dentry *afs_mount(struct file_system_type *fs_type,
 		as = NULL;
 	}
 
-	afs_put_cell(params.cell);
+	afs_put_cell(params.net, params.cell);
 	key_put(params.key);
 	_leave(" = 0 [%p]", sb);
 	return dget(sb->s_root);
@@ -504,7 +504,7 @@ error_sb:
 error_as:
 	afs_destroy_sbi(as);
 error:
-	afs_put_cell(params.cell);
+	afs_put_cell(params.net, params.cell);
 	key_put(params.key);
 	_leave(" = %d", ret);
 	return ERR_PTR(ret);
diff --git a/fs/afs/vlocation.c b/fs/afs/vlocation.c
index ccb7aacfbeca..cf7e02d5fa3f 100644
--- a/fs/afs/vlocation.c
+++ b/fs/afs/vlocation.c
@@ -518,14 +518,14 @@ void afs_put_vlocation(struct afs_net *net, struct afs_vlocation *vl)
 /*
  * destroy a dead volume location record
  */
-static void afs_vlocation_destroy(struct afs_vlocation *vl)
+static void afs_vlocation_destroy(struct afs_net *net, struct afs_vlocation *vl)
 {
 	_enter("%p", vl);
 
 #ifdef CONFIG_AFS_FSCACHE
 	fscache_relinquish_cookie(vl->cache, 0);
 #endif
-	afs_put_cell(vl->cell);
+	afs_put_cell(net, vl->cell);
 	kfree(vl);
 }
 
@@ -580,7 +580,7 @@ void afs_vlocation_reaper(struct work_struct *work)
 	while (!list_empty(&corpses)) {
 		vl = list_entry(corpses.next, struct afs_vlocation, grave);
 		list_del(&vl->grave);
-		afs_vlocation_destroy(vl);
+		afs_vlocation_destroy(net, vl);
 	}
 
 	_leave("");
diff --git a/fs/afs/vnode.c b/fs/afs/vnode.c
index dcb956143c86..d5ef834ba4ac 100644
--- a/fs/afs/vnode.c
+++ b/fs/afs/vnode.c
@@ -73,7 +73,7 @@ static void afs_install_vnode(struct afs_vnode *vnode,
 
 	afs_get_server(server);
 	vnode->server = server;
-	afs_put_server(old_server);
+	afs_put_server(afs_v2net(vnode), old_server);
 
 	/* insert into the server's vnode tree in FID order */
 	spin_lock(&server->fs_lock);
@@ -196,7 +196,7 @@ static void afs_vnode_deleted_remotely(struct afs_vnode *vnode)
 		spin_unlock(&server->fs_lock);
 
 		vnode->server = NULL;
-		afs_put_server(server);
+		afs_put_server(afs_v2net(vnode), server);
 	} else {
 		ASSERT(!vnode->cb_promised);
 	}
@@ -225,7 +225,7 @@ void afs_vnode_finalise_status_update(struct afs_vnode *vnode,
 	spin_unlock(&vnode->lock);
 
 	wake_up_all(&vnode->update_waitq);
-	afs_put_server(oldserver);
+	afs_put_server(afs_v2net(vnode), oldserver);
 	_leave("");
 }
 
@@ -368,7 +368,7 @@ get_anyway:
 		if (auth_vnode)
 			afs_cache_permit(vnode, key, acl_order);
 		afs_vnode_finalise_status_update(vnode, server);
-		afs_put_server(server);
+		afs_put_server(afs_v2net(vnode), server);
 	} else {
 		_debug("failed [%d]", ret);
 		afs_vnode_status_update_failed(vnode, ret);
@@ -428,7 +428,7 @@ int afs_vnode_fetch_data(struct afs_vnode *vnode, struct key *key,
 	/* adjust the flags */
 	if (ret == 0) {
 		afs_vnode_finalise_status_update(vnode, server);
-		afs_put_server(server);
+		afs_put_server(afs_v2net(vnode), server);
 	} else {
 		afs_vnode_status_update_failed(vnode, ret);
 	}
@@ -540,7 +540,7 @@ int afs_vnode_remove(struct afs_vnode *vnode, struct key *key, const char *name,
 	/* adjust the flags */
 	if (ret == 0) {
 		afs_vnode_finalise_status_update(vnode, server);
-		afs_put_server(server);
+		afs_put_server(afs_v2net(vnode), server);
 	} else {
 		afs_vnode_status_update_failed(vnode, ret);
 	}
@@ -603,7 +603,7 @@ int afs_vnode_link(struct afs_vnode *dvnode, struct afs_vnode *vnode,
 	if (ret == 0) {
 		afs_vnode_finalise_status_update(vnode, server);
 		afs_vnode_finalise_status_update(dvnode, server);
-		afs_put_server(server);
+		afs_put_server(afs_v2net(dvnode), server);
 	} else {
 		afs_vnode_status_update_failed(vnode, ret);
 		afs_vnode_status_update_failed(dvnode, ret);
@@ -738,7 +738,7 @@ int afs_vnode_rename(struct afs_vnode *orig_dvnode,
 		afs_vnode_finalise_status_update(orig_dvnode, server);
 		if (new_dvnode != orig_dvnode)
 			afs_vnode_finalise_status_update(new_dvnode, server);
-		afs_put_server(server);
+		afs_put_server(afs_v2net(orig_dvnode), server);
 	} else {
 		afs_vnode_status_update_failed(orig_dvnode, ret);
 		if (new_dvnode != orig_dvnode)
@@ -802,7 +802,7 @@ int afs_vnode_store_data(struct afs_writeback *wb, pgoff_t first, pgoff_t last,
 	/* adjust the flags */
 	if (ret == 0) {
 		afs_vnode_finalise_status_update(vnode, server);
-		afs_put_server(server);
+		afs_put_server(afs_v2net(vnode), server);
 	} else {
 		afs_vnode_status_update_failed(vnode, ret);
 	}
@@ -854,7 +854,7 @@ int afs_vnode_setattr(struct afs_vnode *vnode, struct key *key,
 	/* adjust the flags */
 	if (ret == 0) {
 		afs_vnode_finalise_status_update(vnode, server);
-		afs_put_server(server);
+		afs_put_server(afs_v2net(vnode), server);
 	} else {
 		afs_vnode_status_update_failed(vnode, ret);
 	}
@@ -900,7 +900,7 @@ int afs_vnode_get_volume_status(struct afs_vnode *vnode, struct key *key,
 
 	/* adjust the flags */
 	if (ret == 0)
-		afs_put_server(server);
+		afs_put_server(afs_v2net(vnode), server);
 
 	_leave(" = %d", ret);
 	return ret;
@@ -939,7 +939,7 @@ int afs_vnode_set_lock(struct afs_vnode *vnode, struct key *key,
 
 	/* adjust the flags */
 	if (ret == 0)
-		afs_put_server(server);
+		afs_put_server(afs_v2net(vnode), server);
 
 	_leave(" = %d", ret);
 	return ret;
@@ -977,7 +977,7 @@ int afs_vnode_extend_lock(struct afs_vnode *vnode, struct key *key)
 
 	/* adjust the flags */
 	if (ret == 0)
-		afs_put_server(server);
+		afs_put_server(afs_v2net(vnode), server);
 
 	_leave(" = %d", ret);
 	return ret;
@@ -1015,7 +1015,7 @@ int afs_vnode_release_lock(struct afs_vnode *vnode, struct key *key)
 
 	/* adjust the flags */
 	if (ret == 0)
-		afs_put_server(server);
+		afs_put_server(afs_v2net(vnode), server);
 
 	_leave(" = %d", ret);
 	return ret;
diff --git a/fs/afs/volume.c b/fs/afs/volume.c
index 3d5363e0b7e1..e2f0e8ec527d 100644
--- a/fs/afs/volume.c
+++ b/fs/afs/volume.c
@@ -154,7 +154,7 @@ error_discard:
 	up_write(&params->cell->vl_sem);
 
 	for (loop = volume->nservers - 1; loop >= 0; loop--)
-		afs_put_server(volume->servers[loop]);
+		afs_put_server(params->net, volume->servers[loop]);
 
 	kfree(volume);
 	goto error;
@@ -163,7 +163,7 @@ error_discard:
 /*
  * destroy a volume record
  */
-void afs_put_volume(struct afs_net *net, struct afs_volume *volume)
+void afs_put_volume(struct afs_cell *cell, struct afs_volume *volume)
 {
 	struct afs_vlocation *vlocation;
 	int loop;
@@ -179,7 +179,7 @@ void afs_put_volume(struct afs_net *net, struct afs_volume *volume)
 
 	/* to prevent a race, the decrement and the dequeue must be effectively
 	 * atomic */
-	down_write(&vlocation->cell->vl_sem);
+	down_write(&cell->vl_sem);
 
 	if (likely(!atomic_dec_and_test(&volume->usage))) {
 		up_write(&vlocation->cell->vl_sem);
@@ -189,16 +189,16 @@ void afs_put_volume(struct afs_net *net, struct afs_volume *volume)
 
 	vlocation->vols[volume->type] = NULL;
 
-	up_write(&vlocation->cell->vl_sem);
+	up_write(&cell->vl_sem);
 
 	/* finish cleaning up the volume */
 #ifdef CONFIG_AFS_FSCACHE
 	fscache_relinquish_cookie(volume->cache, 0);
 #endif
-	afs_put_vlocation(net, vlocation);
+	afs_put_vlocation(cell->net, vlocation);
 
 	for (loop = volume->nservers - 1; loop >= 0; loop--)
-		afs_put_server(volume->servers[loop]);
+		afs_put_server(cell->net, volume->servers[loop]);
 
 	kfree(volume);
 
@@ -336,7 +336,7 @@ int afs_volume_release_fileserver(struct afs_vnode *vnode,
 			sizeof(volume->servers[loop]) *
 			(volume->nservers - loop));
 		volume->servers[volume->nservers] = NULL;
-		afs_put_server(server);
+		afs_put_server(afs_v2net(vnode), server);
 		volume->rjservers++;
 
 		if (volume->nservers > 0)
@@ -350,7 +350,7 @@ int afs_volume_release_fileserver(struct afs_vnode *vnode,
 		 *         no longer registered
 		 */
 		up_write(&volume->server_sem);
-		afs_put_server(server);
+		afs_put_server(afs_v2net(vnode), server);
 		_leave(" [completely rejected]");
 		return 1;
 
@@ -379,7 +379,7 @@ int afs_volume_release_fileserver(struct afs_vnode *vnode,
 	case -ENOMEM:
 	case -ENONET:
 		/* tell the caller to accept the result */
-		afs_put_server(server);
+		afs_put_server(afs_v2net(vnode), server);
 		_leave(" [local failure]");
 		return 1;
 	}
@@ -388,7 +388,7 @@ int afs_volume_release_fileserver(struct afs_vnode *vnode,
 try_next_server_upw:
 	up_write(&volume->server_sem);
 try_next_server:
-	afs_put_server(server);
+	afs_put_server(afs_v2net(vnode), server);
 	_leave(" [try next server]");
 	return 0;
 }
-- 
cgit v1.2.3


From 91a90380efbc896eb129878553202c97213d0861 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Thu, 2 Nov 2017 15:27:46 +0000
Subject: afs: Add some protocol defs

Add some protocol definitions, including max field lengths, flag defs, an
XDR-encoded UUID def, more VL operation IDs and more fileserver abort
codes.

Signed-off-by: David Howells <dhowells@redhat.com>
---
 fs/afs/afs.h    | 25 ++++++++++++++++++++-----
 fs/afs/afs_fs.h |  4 ++++
 fs/afs/afs_vl.h | 15 +++++++++++----
 3 files changed, 35 insertions(+), 9 deletions(-)

(limited to 'fs/afs')

diff --git a/fs/afs/afs.h b/fs/afs/afs.h
index 93053115bcfc..0d837bddbf7d 100644
--- a/fs/afs/afs.h
+++ b/fs/afs/afs.h
@@ -14,11 +14,14 @@
 
 #include <linux/in.h>
 
-#define AFS_MAXCELLNAME	64		/* maximum length of a cell name */
-#define AFS_MAXVOLNAME	64		/* maximum length of a volume name */
-#define AFSNAMEMAX	256		/* maximum length of a filename plus NUL */
-#define AFSPATHMAX	1024		/* maximum length of a pathname plus NUL */
-#define AFSOPAQUEMAX	1024		/* maximum length of an opaque field */
+#define AFS_MAXCELLNAME		64  	/* Maximum length of a cell name */
+#define AFS_MAXVOLNAME		64  	/* Maximum length of a volume name */
+#define AFS_MAXNSERVERS		8   	/* Maximum servers in a basic volume record */
+#define AFS_NMAXNSERVERS	13  	/* Maximum servers in a N/U-class volume record */
+#define AFS_MAXTYPES		3	/* Maximum number of volume types */
+#define AFSNAMEMAX		256 	/* Maximum length of a filename plus NUL */
+#define AFSPATHMAX		1024	/* Maximum length of a pathname plus NUL */
+#define AFSOPAQUEMAX		1024	/* Maximum length of an opaque field */
 
 typedef unsigned			afs_volid_t;
 typedef unsigned			afs_vnodeid_t;
@@ -176,4 +179,16 @@ struct afs_volume_status {
 
 #define AFS_BLOCK_SIZE	1024
 
+/*
+ * XDR encoding of UUID in AFS.
+ */
+struct afs_uuid__xdr {
+	__be32		time_low;
+	__be32		time_mid;
+	__be32		time_hi_and_version;
+	__be32		clock_seq_hi_and_reserved;
+	__be32		clock_seq_low;
+	__be32		node[6];
+};
+
 #endif /* AFS_H */
diff --git a/fs/afs/afs_fs.h b/fs/afs/afs_fs.h
index eb647323d8f0..accd886b4372 100644
--- a/fs/afs/afs_fs.h
+++ b/fs/afs/afs_fs.h
@@ -40,6 +40,7 @@ enum AFS_FS_Operations {
 };
 
 enum AFS_FS_Errors {
+	VRESTARTING	= -100,	/* Server is restarting */
 	VSALVAGE	= 101,	/* volume needs salvaging */
 	VNOVNODE	= 102,	/* no such file/dir (vnode) */
 	VNOVOL		= 103,	/* no such volume or volume unavailable */
@@ -51,6 +52,9 @@ enum AFS_FS_Errors {
 	VOVERQUOTA	= 109,	/* volume's maximum quota exceeded */
 	VBUSY		= 110,	/* volume is temporarily unavailable */
 	VMOVED		= 111,	/* volume moved to new server - ask this FS where */
+	VIO		= 112,	/* I/O error in volume */
+	VSALVAGING	= 113,	/* Volume is being salvaged */
+	VRESTRICTED	= 120,	/* Volume is restricted from using  */
 };
 
 #endif /* AFS_FS_H */
diff --git a/fs/afs/afs_vl.h b/fs/afs/afs_vl.h
index 800f607ffaf5..4eaa620992c8 100644
--- a/fs/afs/afs_vl.h
+++ b/fs/afs/afs_vl.h
@@ -18,9 +18,12 @@
 #define VL_SERVICE		52	/* RxRPC service ID for the Volume Location service */
 
 enum AFSVL_Operations {
-	VLGETENTRYBYID		= 503,	/* AFS Get Cache Entry By ID operation ID */
-	VLGETENTRYBYNAME	= 504,	/* AFS Get Cache Entry By Name operation ID */
-	VLPROBE			= 514,	/* AFS Probe Volume Location Service operation ID */
+	VLGETENTRYBYID		= 503,	/* AFS Get VLDB entry by ID */
+	VLGETENTRYBYNAME	= 504,	/* AFS Get VLDB entry by name */
+	VLPROBE			= 514,	/* AFS probe VL service */
+	VLGETENTRYBYIDU		= 526,	/* AFS Get VLDB entry by ID (UUID-variant) */
+	VLGETENTRYBYNAMEU	= 527,	/* AFS Get VLDB entry by name (UUID-variant) */
+	VLGETADDRSU		= 533,	/* AFS Get addrs for fileserver */
 };
 
 enum AFSVL_Errors {
@@ -74,11 +77,15 @@ struct afs_vldbentry {
 		struct in_addr	addr;		/* server address */
 		unsigned	partition;	/* partition ID on this server */
 		unsigned	flags;		/* server specific flags */
-#define AFS_VLSF_NEWREPSITE	0x0001	/* unused */
+#define AFS_VLSF_NEWREPSITE	0x0001	/* Ignore all 'non-new' servers */
 #define AFS_VLSF_ROVOL		0x0002	/* this server holds a R/O instance of the volume */
 #define AFS_VLSF_RWVOL		0x0004	/* this server holds a R/W instance of the volume */
 #define AFS_VLSF_BACKVOL	0x0008	/* this server holds a backup instance of the volume */
+#define AFS_VLSF_UUID		0x0010	/* This server is referred to by its UUID */
+#define AFS_VLSF_DONTUSE	0x0020	/* This server ref should be ignored */
 	} servers[8];
 };
 
+#define AFS_VLDB_MAXNAMELEN 65
+
 #endif /* AFS_VL_H */
-- 
cgit v1.2.3


From ad6a942a9e74edea8a4a126a1e434feff6a6d5c2 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Thu, 2 Nov 2017 15:27:47 +0000
Subject: afs: Update the cache index structure

Update the cache index structure in the following ways:

 (1) Don't use the volume name followed by the volume type as levels in the
     cache index.  Volumes can be renamed.  Use the volume ID instead.

 (2) Don't store the VLDB data for a volume in the tree.  If the volume
     database should be cached locally, then it should be done in a separate
     tree.

 (3) Expand the volume ID stored in the cache to 64 bits.

 (4) Expand the file/vnode ID stored in the cache to 96 bits.

 (5) Increment the cache structure version number to 1.

Signed-off-by: David Howells <dhowells@redhat.com>
---
 fs/afs/cache.c     | 239 ++++++++++-------------------------------------------
 fs/afs/internal.h  |  21 -----
 fs/afs/vlocation.c |  39 ++-------
 fs/afs/volume.c    |   2 +-
 4 files changed, 50 insertions(+), 251 deletions(-)

(limited to 'fs/afs')

diff --git a/fs/afs/cache.c b/fs/afs/cache.c
index 1fe855191261..f62ff71d28c9 100644
--- a/fs/afs/cache.c
+++ b/fs/afs/cache.c
@@ -14,19 +14,6 @@
 
 static uint16_t afs_cell_cache_get_key(const void *cookie_netfs_data,
 				       void *buffer, uint16_t buflen);
-static uint16_t afs_cell_cache_get_aux(const void *cookie_netfs_data,
-				       void *buffer, uint16_t buflen);
-static enum fscache_checkaux afs_cell_cache_check_aux(void *cookie_netfs_data,
-						      const void *buffer,
-						      uint16_t buflen);
-
-static uint16_t afs_vlocation_cache_get_key(const void *cookie_netfs_data,
-					    void *buffer, uint16_t buflen);
-static uint16_t afs_vlocation_cache_get_aux(const void *cookie_netfs_data,
-					    void *buffer, uint16_t buflen);
-static enum fscache_checkaux afs_vlocation_cache_check_aux(
-	void *cookie_netfs_data, const void *buffer, uint16_t buflen);
-
 static uint16_t afs_volume_cache_get_key(const void *cookie_netfs_data,
 					 void *buffer, uint16_t buflen);
 
@@ -42,23 +29,13 @@ static enum fscache_checkaux afs_vnode_cache_check_aux(void *cookie_netfs_data,
 
 struct fscache_netfs afs_cache_netfs = {
 	.name			= "afs",
-	.version		= 0,
+	.version		= 1,
 };
 
 struct fscache_cookie_def afs_cell_cache_index_def = {
 	.name		= "AFS.cell",
 	.type		= FSCACHE_COOKIE_TYPE_INDEX,
 	.get_key	= afs_cell_cache_get_key,
-	.get_aux	= afs_cell_cache_get_aux,
-	.check_aux	= afs_cell_cache_check_aux,
-};
-
-struct fscache_cookie_def afs_vlocation_cache_index_def = {
-	.name			= "AFS.vldb",
-	.type			= FSCACHE_COOKIE_TYPE_INDEX,
-	.get_key		= afs_vlocation_cache_get_key,
-	.get_aux		= afs_vlocation_cache_get_aux,
-	.check_aux		= afs_vlocation_cache_check_aux,
 };
 
 struct fscache_cookie_def afs_volume_cache_index_def = {
@@ -95,150 +72,26 @@ static uint16_t afs_cell_cache_get_key(const void *cookie_netfs_data,
 	return klen;
 }
 
-/*
- * provide new auxiliary cache data
- */
-static uint16_t afs_cell_cache_get_aux(const void *cookie_netfs_data,
-				       void *buffer, uint16_t bufmax)
-{
-	const struct afs_cell *cell = cookie_netfs_data;
-	uint16_t dlen;
-
-	_enter("%p,%p,%u", cell, buffer, bufmax);
-
-	dlen = cell->vl_naddrs * sizeof(cell->vl_addrs[0]);
-	dlen = min(dlen, bufmax);
-	dlen &= ~(sizeof(cell->vl_addrs[0]) - 1);
-
-	memcpy(buffer, cell->vl_addrs, dlen);
-	return dlen;
-}
-
-/*
- * check that the auxiliary data indicates that the entry is still valid
- */
-static enum fscache_checkaux afs_cell_cache_check_aux(void *cookie_netfs_data,
-						      const void *buffer,
-						      uint16_t buflen)
-{
-	_leave(" = OKAY");
-	return FSCACHE_CHECKAUX_OKAY;
-}
-
-/*****************************************************************************/
-/*
- * set the key for the index entry
- */
-static uint16_t afs_vlocation_cache_get_key(const void *cookie_netfs_data,
-					    void *buffer, uint16_t bufmax)
-{
-	const struct afs_vlocation *vlocation = cookie_netfs_data;
-	uint16_t klen;
-
-	_enter("{%s},%p,%u", vlocation->vldb.name, buffer, bufmax);
-
-	klen = strnlen(vlocation->vldb.name, sizeof(vlocation->vldb.name));
-	if (klen > bufmax)
-		return 0;
-
-	memcpy(buffer, vlocation->vldb.name, klen);
-
-	_leave(" = %u", klen);
-	return klen;
-}
-
-/*
- * provide new auxiliary cache data
- */
-static uint16_t afs_vlocation_cache_get_aux(const void *cookie_netfs_data,
-					    void *buffer, uint16_t bufmax)
-{
-	const struct afs_vlocation *vlocation = cookie_netfs_data;
-	uint16_t dlen;
-
-	_enter("{%s},%p,%u", vlocation->vldb.name, buffer, bufmax);
-
-	dlen = sizeof(struct afs_cache_vlocation);
-	dlen -= offsetof(struct afs_cache_vlocation, nservers);
-	if (dlen > bufmax)
-		return 0;
-
-	memcpy(buffer, (uint8_t *)&vlocation->vldb.nservers, dlen);
-
-	_leave(" = %u", dlen);
-	return dlen;
-}
-
-/*
- * check that the auxiliary data indicates that the entry is still valid
- */
-static
-enum fscache_checkaux afs_vlocation_cache_check_aux(void *cookie_netfs_data,
-						    const void *buffer,
-						    uint16_t buflen)
-{
-	const struct afs_cache_vlocation *cvldb;
-	struct afs_vlocation *vlocation = cookie_netfs_data;
-	uint16_t dlen;
-
-	_enter("{%s},%p,%u", vlocation->vldb.name, buffer, buflen);
-
-	/* check the size of the data is what we're expecting */
-	dlen = sizeof(struct afs_cache_vlocation);
-	dlen -= offsetof(struct afs_cache_vlocation, nservers);
-	if (dlen != buflen)
-		return FSCACHE_CHECKAUX_OBSOLETE;
-
-	cvldb = container_of(buffer, struct afs_cache_vlocation, nservers);
-
-	/* if what's on disk is more valid than what's in memory, then use the
-	 * VL record from the cache */
-	if (!vlocation->valid || vlocation->vldb.rtime == cvldb->rtime) {
-		memcpy((uint8_t *)&vlocation->vldb.nservers, buffer, dlen);
-		vlocation->valid = 1;
-		_leave(" = SUCCESS [c->m]");
-		return FSCACHE_CHECKAUX_OKAY;
-	}
-
-	/* need to update the cache if the cached info differs */
-	if (memcmp(&vlocation->vldb, buffer, dlen) != 0) {
-		/* delete if the volume IDs for this name differ */
-		if (memcmp(&vlocation->vldb.vid, &cvldb->vid,
-			   sizeof(cvldb->vid)) != 0
-		    ) {
-			_leave(" = OBSOLETE");
-			return FSCACHE_CHECKAUX_OBSOLETE;
-		}
-
-		_leave(" = UPDATE");
-		return FSCACHE_CHECKAUX_NEEDS_UPDATE;
-	}
-
-	_leave(" = OKAY");
-	return FSCACHE_CHECKAUX_OKAY;
-}
-
 /*****************************************************************************/
 /*
  * set the key for the volume index entry
  */
 static uint16_t afs_volume_cache_get_key(const void *cookie_netfs_data,
-					void *buffer, uint16_t bufmax)
+					 void *buffer, uint16_t bufmax)
 {
 	const struct afs_volume *volume = cookie_netfs_data;
-	uint16_t klen;
+	struct {
+		u64 volid;
+	} __packed key;
 
 	_enter("{%u},%p,%u", volume->type, buffer, bufmax);
 
-	klen = sizeof(volume->type);
-	if (klen > bufmax)
+	if (bufmax < sizeof(key))
 		return 0;
 
-	memcpy(buffer, &volume->type, sizeof(volume->type));
-
-	_leave(" = %u", klen);
-	return klen;
-
+	key.volid = volume->vid;
+	memcpy(buffer, &key, sizeof(key));
+	return sizeof(key);
 }
 
 /*****************************************************************************/
@@ -249,20 +102,25 @@ static uint16_t afs_vnode_cache_get_key(const void *cookie_netfs_data,
 					void *buffer, uint16_t bufmax)
 {
 	const struct afs_vnode *vnode = cookie_netfs_data;
-	uint16_t klen;
+	struct {
+		u32 vnode_id[3];
+	} __packed key;
 
 	_enter("{%x,%x,%llx},%p,%u",
 	       vnode->fid.vnode, vnode->fid.unique, vnode->status.data_version,
 	       buffer, bufmax);
 
-	klen = sizeof(vnode->fid.vnode);
-	if (klen > bufmax)
-		return 0;
+	/* Allow for a 96-bit key */
+	memset(&key, 0, sizeof(key));
+	key.vnode_id[0] = vnode->fid.vnode;
+	key.vnode_id[1] = 0;
+	key.vnode_id[2] = 0;
 
-	memcpy(buffer, &vnode->fid.vnode, sizeof(vnode->fid.vnode));
+	if (sizeof(key) > bufmax)
+		return 0;
 
-	_leave(" = %u", klen);
-	return klen;
+	memcpy(buffer, &key, sizeof(key));
+	return sizeof(key);
 }
 
 /*
@@ -280,6 +138,11 @@ static void afs_vnode_cache_get_attr(const void *cookie_netfs_data,
 	*size = vnode->status.size;
 }
 
+struct afs_vnode_cache_aux {
+	u64 data_version;
+	u32 fid_unique;
+} __packed;
+
 /*
  * provide new auxiliary cache data
  */
@@ -287,23 +150,21 @@ static uint16_t afs_vnode_cache_get_aux(const void *cookie_netfs_data,
 					void *buffer, uint16_t bufmax)
 {
 	const struct afs_vnode *vnode = cookie_netfs_data;
-	uint16_t dlen;
+	struct afs_vnode_cache_aux aux;
 
 	_enter("{%x,%x,%Lx},%p,%u",
 	       vnode->fid.vnode, vnode->fid.unique, vnode->status.data_version,
 	       buffer, bufmax);
 
-	dlen = sizeof(vnode->fid.unique) + sizeof(vnode->status.data_version);
-	if (dlen > bufmax)
-		return 0;
+	memset(&aux, 0, sizeof(aux));
+	aux.data_version = vnode->status.data_version;
+	aux.fid_unique = vnode->fid.unique;
 
-	memcpy(buffer, &vnode->fid.unique, sizeof(vnode->fid.unique));
-	buffer += sizeof(vnode->fid.unique);
-	memcpy(buffer, &vnode->status.data_version,
-	       sizeof(vnode->status.data_version));
+	if (bufmax < sizeof(aux))
+		return 0;
 
-	_leave(" = %u", dlen);
-	return dlen;
+	memcpy(buffer, &aux, sizeof(aux));
+	return sizeof(aux);
 }
 
 /*
@@ -314,43 +175,29 @@ static enum fscache_checkaux afs_vnode_cache_check_aux(void *cookie_netfs_data,
 						       uint16_t buflen)
 {
 	struct afs_vnode *vnode = cookie_netfs_data;
-	uint16_t dlen;
+	struct afs_vnode_cache_aux aux;
 
 	_enter("{%x,%x,%llx},%p,%u",
 	       vnode->fid.vnode, vnode->fid.unique, vnode->status.data_version,
 	       buffer, buflen);
 
+	memcpy(&aux, buffer, sizeof(aux));
+
 	/* check the size of the data is what we're expecting */
-	dlen = sizeof(vnode->fid.unique) + sizeof(vnode->status.data_version);
-	if (dlen != buflen) {
-		_leave(" = OBSOLETE [len %hx != %hx]", dlen, buflen);
+	if (buflen != sizeof(aux)) {
+		_leave(" = OBSOLETE [len %hx != %zx]", buflen, sizeof(aux));
 		return FSCACHE_CHECKAUX_OBSOLETE;
 	}
 
-	if (memcmp(buffer,
-		   &vnode->fid.unique,
-		   sizeof(vnode->fid.unique)
-		   ) != 0) {
-		unsigned unique;
-
-		memcpy(&unique, buffer, sizeof(unique));
-
+	if (vnode->fid.unique != aux.fid_unique) {
 		_leave(" = OBSOLETE [uniq %x != %x]",
-		       unique, vnode->fid.unique);
+		       aux.fid_unique, vnode->fid.unique);
 		return FSCACHE_CHECKAUX_OBSOLETE;
 	}
 
-	if (memcmp(buffer + sizeof(vnode->fid.unique),
-		   &vnode->status.data_version,
-		   sizeof(vnode->status.data_version)
-		   ) != 0) {
-		afs_dataversion_t version;
-
-		memcpy(&version, buffer + sizeof(vnode->fid.unique),
-		       sizeof(version));
-
+	if (vnode->status.data_version != aux.data_version) {
 		_leave(" = OBSOLETE [vers %llx != %llx]",
-		       version, vnode->status.data_version);
+		       aux.data_version, vnode->status.data_version);
 		return FSCACHE_CHECKAUX_OBSOLETE;
 	}
 
diff --git a/fs/afs/internal.h b/fs/afs/internal.h
index 7cd30ae71f91..b16181b2fa43 100644
--- a/fs/afs/internal.h
+++ b/fs/afs/internal.h
@@ -190,14 +190,6 @@ static inline struct afs_super_info *AFS_FS_S(struct super_block *sb)
 
 extern struct file_system_type afs_fs_type;
 
-/*
- * entry in the cached cell catalogue
- */
-struct afs_cache_cell {
-	char		name[AFS_MAXCELLNAME];	/* cell name (padded with NULs) */
-	struct in_addr	vl_servers[15];		/* cached cell VL servers */
-};
-
 /*
  * AFS network namespace record.
  */
@@ -296,14 +288,6 @@ struct afs_cache_vlocation {
 	time_t			rtime;		/* last retrieval time */
 };
 
-/*
- * volume -> vnode hash table entry
- */
-struct afs_cache_vhash {
-	afs_voltype_t		vtype;		/* which volume variation */
-	uint8_t			hash_bucket;	/* which hash bucket this represents */
-} __attribute__((packed));
-
 /*
  * AFS volume location record
  */
@@ -314,9 +298,6 @@ struct afs_vlocation {
 	struct list_head	grave;		/* link in master graveyard list */
 	struct list_head	update;		/* link in master update list */
 	struct afs_cell		*cell;		/* cell to which volume belongs */
-#ifdef CONFIG_AFS_FSCACHE
-	struct fscache_cookie	*cache;		/* caching cookie */
-#endif
 	struct afs_cache_vlocation vldb;	/* volume information DB record */
 	struct afs_volume	*vols[3];	/* volume access record pointer (index by type) */
 	wait_queue_head_t	waitq;		/* status change waitqueue */
@@ -477,12 +458,10 @@ struct afs_interface {
 #ifdef CONFIG_AFS_FSCACHE
 extern struct fscache_netfs afs_cache_netfs;
 extern struct fscache_cookie_def afs_cell_cache_index_def;
-extern struct fscache_cookie_def afs_vlocation_cache_index_def;
 extern struct fscache_cookie_def afs_volume_cache_index_def;
 extern struct fscache_cookie_def afs_vnode_cache_index_def;
 #else
 #define afs_cell_cache_index_def	(*(struct fscache_cookie_def *) NULL)
-#define afs_vlocation_cache_index_def	(*(struct fscache_cookie_def *) NULL)
 #define afs_volume_cache_index_def	(*(struct fscache_cookie_def *) NULL)
 #define afs_vnode_cache_index_def	(*(struct fscache_cookie_def *) NULL)
 #endif
diff --git a/fs/afs/vlocation.c b/fs/afs/vlocation.c
index cf7e02d5fa3f..745921171058 100644
--- a/fs/afs/vlocation.c
+++ b/fs/afs/vlocation.c
@@ -273,10 +273,6 @@ static void afs_vlocation_apply_update(struct afs_vlocation *vl,
 		       vl->vldb.name, vldb->name);
 
 	vl->vldb = *vldb;
-
-#ifdef CONFIG_AFS_FSCACHE
-	fscache_update_cookie(vl->cache);
-#endif
 }
 
 /*
@@ -295,27 +291,12 @@ static int afs_vlocation_fill_in_record(struct afs_vlocation *vl,
 
 	memset(&vldb, 0, sizeof(vldb));
 
-	/* see if we have an in-cache copy (will set vl->valid if there is) */
-#ifdef CONFIG_AFS_FSCACHE
-	vl->cache = fscache_acquire_cookie(vl->cell->cache,
-					   &afs_vlocation_cache_index_def, vl,
-					   true);
-#endif
-
-	if (vl->valid) {
-		/* try to update a known volume in the cell VL databases by
-		 * ID as the name may have changed */
-		_debug("found in cache");
-		ret = afs_vlocation_update_record(vl, key, &vldb);
-	} else {
-		/* try to look up an unknown volume in the cell VL databases by
-		 * name */
-		ret = afs_vlocation_access_vl_by_name(vl, key, &vldb);
-		if (ret < 0) {
-			printk("kAFS: failed to locate '%s' in cell '%s'\n",
-			       vl->vldb.name, vl->cell->name);
-			return ret;
-		}
+	/* Try to look up an unknown volume in the cell VL databases by name */
+	ret = afs_vlocation_access_vl_by_name(vl, key, &vldb);
+	if (ret < 0) {
+		printk("kAFS: failed to locate '%s' in cell '%s'\n",
+		       vl->vldb.name, vl->cell->name);
+		return ret;
 	}
 
 	afs_vlocation_apply_update(vl, &vldb);
@@ -414,11 +395,6 @@ fill_in_record:
 	spin_unlock(&vl->lock);
 	wake_up(&vl->waitq);
 
-	/* update volume entry in local cache */
-#ifdef CONFIG_AFS_FSCACHE
-	fscache_update_cookie(vl->cache);
-#endif
-
 	/* schedule for regular updates */
 	afs_vlocation_queue_for_updates(net, vl);
 	goto success;
@@ -522,9 +498,6 @@ static void afs_vlocation_destroy(struct afs_net *net, struct afs_vlocation *vl)
 {
 	_enter("%p", vl);
 
-#ifdef CONFIG_AFS_FSCACHE
-	fscache_relinquish_cookie(vl->cache, 0);
-#endif
 	afs_put_cell(net, vl->cell);
 	kfree(vl);
 }
diff --git a/fs/afs/volume.c b/fs/afs/volume.c
index e2f0e8ec527d..ccac5b1e079d 100644
--- a/fs/afs/volume.c
+++ b/fs/afs/volume.c
@@ -125,7 +125,7 @@ struct afs_volume *afs_volume_lookup(struct afs_mount_params *params)
 
 	/* attach the cache and volume location */
 #ifdef CONFIG_AFS_FSCACHE
-	volume->cache = fscache_acquire_cookie(vlocation->cache,
+	volume->cache = fscache_acquire_cookie(volume->cell->cache,
 					       &afs_volume_cache_index_def,
 					       volume, true);
 #endif
-- 
cgit v1.2.3


From 4d9df9868f31df6725481135c10ac6419ce58d44 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Thu, 2 Nov 2017 15:27:47 +0000
Subject: afs: Keep and pass sockaddr_rxrpc addresses rather than in_addr

Keep and pass sockaddr_rxrpc addresses around rather than keeping and
passing in_addr addresses to allow for the use of IPv6 and non-standard
port numbers in future.

This also allows the port and service_id fields to be removed from the
afs_call struct.

Signed-off-by: David Howells <dhowells@redhat.com>
---
 fs/afs/cell.c      | 18 +++++++++++++++---
 fs/afs/fsclient.c  | 36 ------------------------------------
 fs/afs/internal.h  | 15 +++++++--------
 fs/afs/proc.c      | 10 +++++-----
 fs/afs/rxrpc.c     | 18 ++++--------------
 fs/afs/server.c    | 31 ++++++++++++++++---------------
 fs/afs/vlclient.c  | 20 ++++++++++++--------
 fs/afs/vlocation.c | 30 ++++++++----------------------
 fs/afs/vnode.c     | 28 ++++++++++++++--------------
 fs/afs/volume.c    |  9 ++++-----
 10 files changed, 85 insertions(+), 130 deletions(-)

(limited to 'fs/afs')

diff --git a/fs/afs/cell.c b/fs/afs/cell.c
index 2224e335eed7..5523fa3c05d9 100644
--- a/fs/afs/cell.c
+++ b/fs/afs/cell.c
@@ -31,7 +31,7 @@ static struct afs_cell *afs_cell_alloc(struct afs_net *net,
 	char keyname[4 + AFS_MAXCELLNAME + 1], *cp, *dp, *next;
 	char  *dvllist = NULL, *_vllist = NULL;
 	char  delimiter = ':';
-	int ret;
+	int ret, i;
 
 	_enter("%*.*s,%s", namelen, namelen, name ?: "", vllist);
 
@@ -61,6 +61,14 @@ static struct afs_cell *afs_cell_alloc(struct afs_net *net,
 	INIT_LIST_HEAD(&cell->vl_list);
 	spin_lock_init(&cell->vl_lock);
 
+	for (i = 0; i < AFS_CELL_MAX_ADDRS; i++) {
+		struct sockaddr_rxrpc *srx = &cell->vl_addrs[i];
+		srx->srx_family			= AF_RXRPC;
+		srx->srx_service		= VL_SERVICE;
+		srx->transport_type		= SOCK_DGRAM;
+		srx->transport.sin.sin_port	= htons(AFS_VL_PORT);
+	}
+
 	/* if the ip address is invalid, try dns query */
 	if (!vllist || strlen(vllist) < 7) {
 		ret = dns_query("afsdb", name, namelen, "ipv4", &dvllist, NULL);
@@ -83,6 +91,7 @@ static struct afs_cell *afs_cell_alloc(struct afs_net *net,
 
 	/* fill in the VL server list from the rest of the string */
 	do {
+		struct sockaddr_rxrpc *srx = &cell->vl_addrs[cell->vl_naddrs];
 		unsigned a, b, c, d;
 
 		next = strchr(_vllist, delimiter);
@@ -95,10 +104,13 @@ static struct afs_cell *afs_cell_alloc(struct afs_net *net,
 		if (a > 255 || b > 255 || c > 255 || d > 255)
 			goto bad_address;
 
-		cell->vl_addrs[cell->vl_naddrs++].s_addr =
+		srx->transport_len		= sizeof(struct sockaddr_in);
+		srx->transport.sin.sin_family	= AF_INET;
+		srx->transport.sin.sin_addr.s_addr =
 			htonl((a << 24) | (b << 16) | (c << 8) | d);
 
-	} while (cell->vl_naddrs < AFS_CELL_MAX_ADDRS && (_vllist = next));
+	} while (cell->vl_naddrs++,
+		 cell->vl_naddrs < AFS_CELL_MAX_ADDRS && (_vllist = next));
 
 	/* create a key to represent an anonymous user */
 	memcpy(keyname, "afs@", 4);
diff --git a/fs/afs/fsclient.c b/fs/afs/fsclient.c
index ce6f0159e1d4..bac2e8db6e75 100644
--- a/fs/afs/fsclient.c
+++ b/fs/afs/fsclient.c
@@ -297,8 +297,6 @@ int afs_fs_fetch_file_status(struct afs_server *server,
 	call->key = key;
 	call->reply = vnode;
 	call->reply2 = volsync;
-	call->service_id = FS_SERVICE;
-	call->port = htons(AFS_FS_PORT);
 
 	/* marshall the parameters */
 	bp = call->request;
@@ -504,8 +502,6 @@ static int afs_fs_fetch_data64(struct afs_server *server,
 	call->reply = vnode;
 	call->reply2 = NULL; /* volsync */
 	call->reply3 = req;
-	call->service_id = FS_SERVICE;
-	call->port = htons(AFS_FS_PORT);
 	call->operation_ID = FSFETCHDATA64;
 
 	/* marshall the parameters */
@@ -551,8 +547,6 @@ int afs_fs_fetch_data(struct afs_server *server,
 	call->reply = vnode;
 	call->reply2 = NULL; /* volsync */
 	call->reply3 = req;
-	call->service_id = FS_SERVICE;
-	call->port = htons(AFS_FS_PORT);
 	call->operation_ID = FSFETCHDATA;
 
 	/* marshall the parameters */
@@ -619,8 +613,6 @@ int afs_fs_give_up_callbacks(struct afs_net *net,
 	if (!call)
 		return -ENOMEM;
 
-	call->service_id = FS_SERVICE;
-	call->port = htons(AFS_FS_PORT);
 
 	/* marshall the parameters */
 	bp = call->request;
@@ -723,8 +715,6 @@ int afs_fs_create(struct afs_server *server,
 	call->reply2 = newfid;
 	call->reply3 = newstatus;
 	call->reply4 = newcb;
-	call->service_id = FS_SERVICE;
-	call->port = htons(AFS_FS_PORT);
 
 	/* marshall the parameters */
 	bp = call->request;
@@ -810,8 +800,6 @@ int afs_fs_remove(struct afs_server *server,
 
 	call->key = key;
 	call->reply = vnode;
-	call->service_id = FS_SERVICE;
-	call->port = htons(AFS_FS_PORT);
 
 	/* marshall the parameters */
 	bp = call->request;
@@ -893,8 +881,6 @@ int afs_fs_link(struct afs_server *server,
 	call->key = key;
 	call->reply = dvnode;
 	call->reply2 = vnode;
-	call->service_id = FS_SERVICE;
-	call->port = htons(AFS_FS_PORT);
 
 	/* marshall the parameters */
 	bp = call->request;
@@ -988,8 +974,6 @@ int afs_fs_symlink(struct afs_server *server,
 	call->reply = vnode;
 	call->reply2 = newfid;
 	call->reply3 = newstatus;
-	call->service_id = FS_SERVICE;
-	call->port = htons(AFS_FS_PORT);
 
 	/* marshall the parameters */
 	bp = call->request;
@@ -1094,8 +1078,6 @@ int afs_fs_rename(struct afs_server *server,
 	call->key = key;
 	call->reply = orig_dvnode;
 	call->reply2 = new_dvnode;
-	call->service_id = FS_SERVICE;
-	call->port = htons(AFS_FS_PORT);
 
 	/* marshall the parameters */
 	bp = call->request;
@@ -1196,8 +1178,6 @@ static int afs_fs_store_data64(struct afs_server *server,
 	call->wb = wb;
 	call->key = wb->key;
 	call->reply = vnode;
-	call->service_id = FS_SERVICE;
-	call->port = htons(AFS_FS_PORT);
 	call->mapping = vnode->vfs_inode.i_mapping;
 	call->first = first;
 	call->last = last;
@@ -1274,8 +1254,6 @@ int afs_fs_store_data(struct afs_server *server, struct afs_writeback *wb,
 	call->wb = wb;
 	call->key = wb->key;
 	call->reply = vnode;
-	call->service_id = FS_SERVICE;
-	call->port = htons(AFS_FS_PORT);
 	call->mapping = vnode->vfs_inode.i_mapping;
 	call->first = first;
 	call->last = last;
@@ -1383,8 +1361,6 @@ static int afs_fs_setattr_size64(struct afs_server *server, struct key *key,
 
 	call->key = key;
 	call->reply = vnode;
-	call->service_id = FS_SERVICE;
-	call->port = htons(AFS_FS_PORT);
 	call->store_version = vnode->status.data_version + 1;
 	call->operation_ID = FSSTOREDATA;
 
@@ -1435,8 +1411,6 @@ static int afs_fs_setattr_size(struct afs_server *server, struct key *key,
 
 	call->key = key;
 	call->reply = vnode;
-	call->service_id = FS_SERVICE;
-	call->port = htons(AFS_FS_PORT);
 	call->store_version = vnode->status.data_version + 1;
 	call->operation_ID = FSSTOREDATA;
 
@@ -1483,8 +1457,6 @@ int afs_fs_setattr(struct afs_server *server, struct key *key,
 
 	call->key = key;
 	call->reply = vnode;
-	call->service_id = FS_SERVICE;
-	call->port = htons(AFS_FS_PORT);
 	call->operation_ID = FSSTORESTATUS;
 
 	/* marshall the parameters */
@@ -1721,8 +1693,6 @@ int afs_fs_get_volume_status(struct afs_server *server,
 	call->reply = vnode;
 	call->reply2 = vs;
 	call->reply3 = tmpbuf;
-	call->service_id = FS_SERVICE;
-	call->port = htons(AFS_FS_PORT);
 
 	/* marshall the parameters */
 	bp = call->request;
@@ -1805,8 +1775,6 @@ int afs_fs_set_lock(struct afs_server *server,
 
 	call->key = key;
 	call->reply = vnode;
-	call->service_id = FS_SERVICE;
-	call->port = htons(AFS_FS_PORT);
 
 	/* marshall the parameters */
 	bp = call->request;
@@ -1839,8 +1807,6 @@ int afs_fs_extend_lock(struct afs_server *server,
 
 	call->key = key;
 	call->reply = vnode;
-	call->service_id = FS_SERVICE;
-	call->port = htons(AFS_FS_PORT);
 
 	/* marshall the parameters */
 	bp = call->request;
@@ -1872,8 +1838,6 @@ int afs_fs_release_lock(struct afs_server *server,
 
 	call->key = key;
 	call->reply = vnode;
-	call->service_id = FS_SERVICE;
-	call->port = htons(AFS_FS_PORT);
 
 	/* marshall the parameters */
 	bp = call->request;
diff --git a/fs/afs/internal.h b/fs/afs/internal.h
index b16181b2fa43..4fe26bd7bfb0 100644
--- a/fs/afs/internal.h
+++ b/fs/afs/internal.h
@@ -106,7 +106,6 @@ struct afs_call {
 	bool			async;		/* T if asynchronous */
 	bool			upgrade;	/* T to request service upgrade */
 	u16			service_id;	/* RxRPC service ID to call */
-	__be16			port;		/* target UDP port */
 	u32			operation_ID;	/* operation ID for an incoming call */
 	u32			count;		/* count for use in unmarshalling */
 	__be32			tmp;		/* place to extract temporary data */
@@ -264,7 +263,7 @@ struct afs_cell {
 	spinlock_t		vl_lock;	/* vl_list lock */
 	unsigned short		vl_naddrs;	/* number of VL servers in addr list */
 	unsigned short		vl_curr_svix;	/* current server index */
-	struct in_addr		vl_addrs[AFS_CELL_MAX_ADDRS];	/* cell VL server addresses */
+	struct sockaddr_rxrpc	vl_addrs[AFS_CELL_MAX_ADDRS];	/* cell VL server addresses */
 
 	char			name[0];	/* cell name - must go last */
 };
@@ -284,7 +283,7 @@ struct afs_cache_vlocation {
 #define AFS_VOL_VTM_BAK	0x04 /* backup version of the volume is available (on this server) */
 
 	afs_volid_t		vid[3];		/* volume IDs for R/W, R/O and Bak volumes */
-	struct in_addr		servers[8];	/* fileserver addresses */
+	struct sockaddr_rxrpc	servers[8];	/* fileserver addresses */
 	time_t			rtime;		/* last retrieval time */
 };
 
@@ -315,7 +314,7 @@ struct afs_vlocation {
 struct afs_server {
 	atomic_t		usage;
 	time64_t		time_of_death;	/* time at which put reduced usage to 0 */
-	struct in_addr		addr;		/* server address */
+	struct sockaddr_rxrpc	addr;		/* server address */
 	struct afs_net		*net;		/* Network namespace in which the server resides */
 	struct afs_cell		*cell;		/* cell in which server resides */
 	struct list_head	link;		/* link in cell's server list */
@@ -654,7 +653,7 @@ extern void __net_exit afs_close_socket(struct afs_net *);
 extern void afs_charge_preallocation(struct work_struct *);
 extern void afs_put_call(struct afs_call *);
 extern int afs_queue_call_work(struct afs_call *);
-extern int afs_make_call(struct in_addr *, struct afs_call *, gfp_t, bool);
+extern int afs_make_call(struct sockaddr_rxrpc *, struct afs_call *, gfp_t, bool);
 extern struct afs_call *afs_alloc_flat_call(struct afs_net *,
 					    const struct afs_call_type *,
 					    size_t, size_t);
@@ -690,7 +689,7 @@ do {								\
 
 extern void afs_server_timer(struct timer_list *);
 extern struct afs_server *afs_lookup_server(struct afs_cell *,
-					    const struct in_addr *);
+					    struct sockaddr_rxrpc *);
 extern struct afs_server *afs_find_server(struct afs_net *,
 					  const struct sockaddr_rxrpc *);
 extern void afs_put_server(struct afs_net *, struct afs_server *);
@@ -707,11 +706,11 @@ extern void __exit afs_fs_exit(void);
  * vlclient.c
  */
 extern int afs_vl_get_entry_by_name(struct afs_net *,
-				    struct in_addr *, struct key *,
+				    struct sockaddr_rxrpc *, struct key *,
 				    const char *, struct afs_cache_vlocation *,
 				    bool);
 extern int afs_vl_get_entry_by_id(struct afs_net *,
-				  struct in_addr *, struct key *,
+				  struct sockaddr_rxrpc *, struct key *,
 				  afs_volid_t, afs_voltype_t,
 				  struct afs_cache_vlocation *, bool);
 
diff --git a/fs/afs/proc.c b/fs/afs/proc.c
index 677a453b08bf..f76018104ae0 100644
--- a/fs/afs/proc.c
+++ b/fs/afs/proc.c
@@ -570,16 +570,16 @@ static void afs_proc_cell_vlservers_stop(struct seq_file *p, void *v)
  */
 static int afs_proc_cell_vlservers_show(struct seq_file *m, void *v)
 {
-	struct in_addr *addr = v;
+	struct sockaddr_rxrpc *addr = v;
 
 	/* display header on line 1 */
-	if (v == (struct in_addr *) 1) {
+	if (v == (void *)1) {
 		seq_puts(m, "ADDRESS\n");
 		return 0;
 	}
 
 	/* display one cell per line on subsequent lines */
-	seq_printf(m, "%pI4\n", &addr->s_addr);
+	seq_printf(m, "%pISp\n", &addr->transport);
 	return 0;
 }
 
@@ -652,7 +652,7 @@ static int afs_proc_cell_servers_show(struct seq_file *m, void *v)
 {
 	struct afs_cell *cell = m->private;
 	struct afs_server *server = list_entry(v, struct afs_server, link);
-	char ipaddr[20];
+	char ipaddr[64];
 
 	/* display header on line 1 */
 	if (v == &cell->servers) {
@@ -661,7 +661,7 @@ static int afs_proc_cell_servers_show(struct seq_file *m, void *v)
 	}
 
 	/* display one cell per line on subsequent lines */
-	sprintf(ipaddr, "%pI4", &server->addr);
+	sprintf(ipaddr, "%pISp", &server->addr.transport);
 	seq_printf(m, "%3d %-15.15s %5d\n",
 		   atomic_read(&server->usage), ipaddr, server->fs_state);
 
diff --git a/fs/afs/rxrpc.c b/fs/afs/rxrpc.c
index 656ceb285b85..c108effb54be 100644
--- a/fs/afs/rxrpc.c
+++ b/fs/afs/rxrpc.c
@@ -322,10 +322,9 @@ static int afs_send_pages(struct afs_call *call, struct msghdr *msg)
 /*
  * initiate a call
  */
-int afs_make_call(struct in_addr *addr, struct afs_call *call, gfp_t gfp,
-		  bool async)
+int afs_make_call(struct sockaddr_rxrpc *srx, struct afs_call *call,
+		  gfp_t gfp, bool async)
 {
-	struct sockaddr_rxrpc srx;
 	struct rxrpc_call *rxcall;
 	struct msghdr msg;
 	struct kvec iov[1];
@@ -334,7 +333,7 @@ int afs_make_call(struct in_addr *addr, struct afs_call *call, gfp_t gfp,
 	u32 abort_code;
 	int ret;
 
-	_enter("%x,{%d},", addr->s_addr, ntohs(call->port));
+	_enter(",{%pISp},", &srx->transport);
 
 	ASSERT(call->type != NULL);
 	ASSERT(call->type->name != NULL);
@@ -345,15 +344,6 @@ int afs_make_call(struct in_addr *addr, struct afs_call *call, gfp_t gfp,
 
 	call->async = async;
 
-	memset(&srx, 0, sizeof(srx));
-	srx.srx_family = AF_RXRPC;
-	srx.srx_service = call->service_id;
-	srx.transport_type = SOCK_DGRAM;
-	srx.transport_len = sizeof(srx.transport.sin);
-	srx.transport.sin.sin_family = AF_INET;
-	srx.transport.sin.sin_port = call->port;
-	memcpy(&srx.transport.sin.sin_addr, addr, 4);
-
 	/* Work out the length we're going to transmit.  This is awkward for
 	 * calls such as FS.StoreData where there's an extra injection of data
 	 * after the initial fixed part.
@@ -365,7 +355,7 @@ int afs_make_call(struct in_addr *addr, struct afs_call *call, gfp_t gfp,
 	}
 
 	/* create a call */
-	rxcall = rxrpc_kernel_begin_call(call->net->socket, &srx, call->key,
+	rxcall = rxrpc_kernel_begin_call(call->net->socket, srx, call->key,
 					 (unsigned long)call,
 					 tx_total_len, gfp,
 					 (async ?
diff --git a/fs/afs/server.c b/fs/afs/server.c
index d8044be913f0..662f7fbf5d05 100644
--- a/fs/afs/server.c
+++ b/fs/afs/server.c
@@ -11,6 +11,7 @@
 
 #include <linux/sched.h>
 #include <linux/slab.h>
+#include "afs_fs.h"
 #include "internal.h"
 
 static unsigned afs_server_timeout = 10;	/* server timeout in seconds */
@@ -42,7 +43,7 @@ static int afs_install_server(struct afs_server *server)
 	struct afs_server *xserver;
 	struct afs_net *net = server->cell->net;
 	struct rb_node **pp, *p;
-	int ret;
+	int ret, diff;
 
 	_enter("%p", server);
 
@@ -55,9 +56,10 @@ static int afs_install_server(struct afs_server *server)
 		p = *pp;
 		_debug("- consider %p", p);
 		xserver = rb_entry(p, struct afs_server, master_rb);
-		if (server->addr.s_addr < xserver->addr.s_addr)
+		diff = memcmp(&server->addr, &xserver->addr, sizeof(server->addr));
+		if (diff < 0)
 			pp = &(*pp)->rb_left;
-		else if (server->addr.s_addr > xserver->addr.s_addr)
+		else if (diff > 0)
 			pp = &(*pp)->rb_right;
 		else
 			goto error;
@@ -76,7 +78,7 @@ error:
  * allocate a new server record
  */
 static struct afs_server *afs_alloc_server(struct afs_cell *cell,
-					   const struct in_addr *addr)
+					   const struct sockaddr_rxrpc *addr)
 {
 	struct afs_server *server;
 
@@ -99,8 +101,7 @@ static struct afs_server *afs_alloc_server(struct afs_cell *cell,
 		INIT_DELAYED_WORK(&server->cb_break_work,
 				  afs_dispatch_give_up_callbacks);
 
-		memcpy(&server->addr, addr, sizeof(struct in_addr));
-		server->addr.s_addr = addr->s_addr;
+		server->addr = *addr;
 		afs_inc_servers_outstanding(cell->net);
 		_leave(" = %p{%d}", server, atomic_read(&server->usage));
 	} else {
@@ -113,17 +114,17 @@ static struct afs_server *afs_alloc_server(struct afs_cell *cell,
  * get an FS-server record for a cell
  */
 struct afs_server *afs_lookup_server(struct afs_cell *cell,
-				     const struct in_addr *addr)
+				     struct sockaddr_rxrpc *addr)
 {
 	struct afs_server *server, *candidate;
 
-	_enter("%p,%pI4", cell, &addr->s_addr);
+	_enter("%p,%pIS", cell, &addr->transport);
 
 	/* quick scan of the list to see if we already have the server */
 	read_lock(&cell->servers_lock);
 
 	list_for_each_entry(server, &cell->servers, link) {
-		if (server->addr.s_addr == addr->s_addr)
+		if (memcmp(&server->addr, addr, sizeof(*addr)) == 0)
 			goto found_server_quickly;
 	}
 	read_unlock(&cell->servers_lock);
@@ -138,7 +139,7 @@ struct afs_server *afs_lookup_server(struct afs_cell *cell,
 
 	/* check the cell's server list again */
 	list_for_each_entry(server, &cell->servers, link) {
-		if (server->addr.s_addr == addr->s_addr)
+		if (memcmp(&server->addr, addr, sizeof(*addr)) == 0)
 			goto found_server;
 	}
 
@@ -195,9 +196,9 @@ struct afs_server *afs_find_server(struct afs_net *net,
 {
 	struct afs_server *server = NULL;
 	struct rb_node *p;
-	struct in_addr addr = srx->transport.sin.sin_addr;
+	int diff;
 
-	_enter("{%d,%pI4}", srx->transport.family, &addr.s_addr);
+	_enter("{%d,%pIS}", srx->transport.family, &srx->transport);
 
 	if (srx->transport.family != AF_INET) {
 		WARN(true, "AFS does not yes support non-IPv4 addresses\n");
@@ -212,9 +213,10 @@ struct afs_server *afs_find_server(struct afs_net *net,
 
 		_debug("- consider %p", p);
 
-		if (addr.s_addr < server->addr.s_addr) {
+		diff = memcmp(srx, &server->addr, sizeof(*srx));
+		if (diff < 0) {
 			p = p->rb_left;
-		} else if (addr.s_addr > server->addr.s_addr) {
+		} else if (diff > 0) {
 			p = p->rb_right;
 		} else {
 			afs_get_server(server);
@@ -225,7 +227,6 @@ struct afs_server *afs_find_server(struct afs_net *net,
 	server = NULL;
 found:
 	read_unlock(&net->servers_lock);
-	ASSERTIFCMP(server, server->addr.s_addr, ==, addr.s_addr);
 	_leave(" = %p", server);
 	return server;
 }
diff --git a/fs/afs/vlclient.c b/fs/afs/vlclient.c
index f5a043a9ba61..48d137628d6a 100644
--- a/fs/afs/vlclient.c
+++ b/fs/afs/vlclient.c
@@ -12,6 +12,7 @@
 #include <linux/gfp.h>
 #include <linux/init.h>
 #include <linux/sched.h>
+#include "afs_fs.h"
 #include "internal.h"
 
 /*
@@ -83,8 +84,15 @@ static int afs_deliver_vl_get_entry_by_xxx(struct afs_call *call)
 	bp++; /* type */
 	entry->nservers = ntohl(*bp++);
 
-	for (loop = 0; loop < 8; loop++)
-		entry->servers[loop].s_addr = *bp++;
+	for (loop = 0; loop < 8; loop++) {
+		entry->servers[loop].srx_family = AF_RXRPC;
+		entry->servers[loop].srx_service = FS_SERVICE;
+		entry->servers[loop].transport_type = SOCK_DGRAM;
+		entry->servers[loop].transport_len = sizeof(entry->servers[loop].transport.sin);
+		entry->servers[loop].transport.sin.sin_family = AF_INET;
+		entry->servers[loop].transport.sin.sin_port = htons(AFS_FS_PORT);
+		entry->servers[loop].transport.sin.sin_addr.s_addr = *bp++;
+	}
 
 	bp += 8; /* partition IDs */
 
@@ -144,7 +152,7 @@ static const struct afs_call_type afs_RXVLGetEntryById = {
  * dispatch a get volume entry by name operation
  */
 int afs_vl_get_entry_by_name(struct afs_net *net,
-			     struct in_addr *addr,
+			     struct sockaddr_rxrpc *addr,
 			     struct key *key,
 			     const char *volname,
 			     struct afs_cache_vlocation *entry,
@@ -166,8 +174,6 @@ int afs_vl_get_entry_by_name(struct afs_net *net,
 
 	call->key = key;
 	call->reply = entry;
-	call->service_id = VL_SERVICE;
-	call->port = htons(AFS_VL_PORT);
 
 	/* marshall the parameters */
 	bp = call->request;
@@ -185,7 +191,7 @@ int afs_vl_get_entry_by_name(struct afs_net *net,
  * dispatch a get volume entry by ID operation
  */
 int afs_vl_get_entry_by_id(struct afs_net *net,
-			   struct in_addr *addr,
+			   struct sockaddr_rxrpc *addr,
 			   struct key *key,
 			   afs_volid_t volid,
 			   afs_voltype_t voltype,
@@ -203,8 +209,6 @@ int afs_vl_get_entry_by_id(struct afs_net *net,
 
 	call->key = key;
 	call->reply = entry;
-	call->service_id = VL_SERVICE;
-	call->port = htons(AFS_VL_PORT);
 
 	/* marshall the parameters */
 	bp = call->request;
diff --git a/fs/afs/vlocation.c b/fs/afs/vlocation.c
index 745921171058..ec5ab8dc9bc8 100644
--- a/fs/afs/vlocation.c
+++ b/fs/afs/vlocation.c
@@ -30,7 +30,6 @@ static int afs_vlocation_access_vl_by_name(struct afs_vlocation *vl,
 					   struct afs_cache_vlocation *vldb)
 {
 	struct afs_cell *cell = vl->cell;
-	struct in_addr addr;
 	int count, ret;
 
 	_enter("%s,%s", cell->name, vl->vldb.name);
@@ -38,12 +37,12 @@ static int afs_vlocation_access_vl_by_name(struct afs_vlocation *vl,
 	down_write(&vl->cell->vl_sem);
 	ret = -ENOMEDIUM;
 	for (count = cell->vl_naddrs; count > 0; count--) {
-		addr = cell->vl_addrs[cell->vl_curr_svix];
+		struct sockaddr_rxrpc *addr = &cell->vl_addrs[cell->vl_curr_svix];
 
-		_debug("CellServ[%hu]: %08x", cell->vl_curr_svix, addr.s_addr);
+		_debug("CellServ[%hu]: %pIS", cell->vl_curr_svix, &addr->transport);
 
 		/* attempt to access the VL server */
-		ret = afs_vl_get_entry_by_name(cell->net, &addr, key,
+		ret = afs_vl_get_entry_by_name(cell->net, addr, key,
 					       vl->vldb.name, vldb, false);
 		switch (ret) {
 		case 0:
@@ -88,7 +87,6 @@ static int afs_vlocation_access_vl_by_id(struct afs_vlocation *vl,
 					 struct afs_cache_vlocation *vldb)
 {
 	struct afs_cell *cell = vl->cell;
-	struct in_addr addr;
 	int count, ret;
 
 	_enter("%s,%x,%d,", cell->name, volid, voltype);
@@ -96,12 +94,12 @@ static int afs_vlocation_access_vl_by_id(struct afs_vlocation *vl,
 	down_write(&vl->cell->vl_sem);
 	ret = -ENOMEDIUM;
 	for (count = cell->vl_naddrs; count > 0; count--) {
-		addr = cell->vl_addrs[cell->vl_curr_svix];
+		struct sockaddr_rxrpc *addr = &cell->vl_addrs[cell->vl_curr_svix];
 
-		_debug("CellServ[%hu]: %08x", cell->vl_curr_svix, addr.s_addr);
+		_debug("CellServ[%hu]: %pIS", cell->vl_curr_svix, &addr->transport);
 
 		/* attempt to access the VL server */
-		ret = afs_vl_get_entry_by_id(cell->net, &addr, key, volid,
+		ret = afs_vl_get_entry_by_id(cell->net, addr, key, volid,
 					     voltype, vldb, false);
 		switch (ret) {
 		case 0:
@@ -192,15 +190,7 @@ static int afs_vlocation_update_record(struct afs_vlocation *vl,
 	int ret;
 
 	/* try to look up a cached volume in the cell VL databases by ID */
-	_debug("Locally Cached: %s %02x { %08x(%x) %08x(%x) %08x(%x) }",
-	       vl->vldb.name,
-	       vl->vldb.vidmask,
-	       ntohl(vl->vldb.servers[0].s_addr),
-	       vl->vldb.srvtmask[0],
-	       ntohl(vl->vldb.servers[1].s_addr),
-	       vl->vldb.srvtmask[1],
-	       ntohl(vl->vldb.servers[2].s_addr),
-	       vl->vldb.srvtmask[2]);
+	_debug("Locally Cached: %s %02x", vl->vldb.name, vl->vldb.vidmask);
 
 	_debug("Vids: %08x %08x %08x",
 	       vl->vldb.vid[0],
@@ -258,11 +248,7 @@ static int afs_vlocation_update_record(struct afs_vlocation *vl,
 static void afs_vlocation_apply_update(struct afs_vlocation *vl,
 				       struct afs_cache_vlocation *vldb)
 {
-	_debug("Done VL Lookup: %s %02x { %08x(%x) %08x(%x) %08x(%x) }",
-	       vldb->name, vldb->vidmask,
-	       ntohl(vldb->servers[0].s_addr), vldb->srvtmask[0],
-	       ntohl(vldb->servers[1].s_addr), vldb->srvtmask[1],
-	       ntohl(vldb->servers[2].s_addr), vldb->srvtmask[2]);
+	_debug("Done VL Lookup: %s %02x", vldb->name, vldb->vidmask);
 
 	_debug("Vids: %08x %08x %08x",
 	       vldb->vid[0], vldb->vid[1], vldb->vid[2]);
diff --git a/fs/afs/vnode.c b/fs/afs/vnode.c
index d5ef834ba4ac..b79d05374878 100644
--- a/fs/afs/vnode.c
+++ b/fs/afs/vnode.c
@@ -354,8 +354,8 @@ get_anyway:
 		if (IS_ERR(server))
 			goto no_server;
 
-		_debug("USING SERVER: %p{%08x}",
-		       server, ntohl(server->addr.s_addr));
+		_debug("USING SERVER: %p{%pIS}",
+		       server, &server->addr.transport);
 
 		ret = afs_fs_fetch_file_status(server, key, vnode, NULL,
 					       false);
@@ -418,7 +418,7 @@ int afs_vnode_fetch_data(struct afs_vnode *vnode, struct key *key,
 		if (IS_ERR(server))
 			goto no_server;
 
-		_debug("USING SERVER: %08x\n", ntohl(server->addr.s_addr));
+		_debug("USING SERVER: %pIS\n", &server->addr.transport);
 
 		ret = afs_fs_fetch_data(server, key, vnode, desc,
 					false);
@@ -474,7 +474,7 @@ int afs_vnode_create(struct afs_vnode *vnode, struct key *key,
 		if (IS_ERR(server))
 			goto no_server;
 
-		_debug("USING SERVER: %08x\n", ntohl(server->addr.s_addr));
+		_debug("USING SERVER: %pIS\n", &server->addr.transport);
 
 		ret = afs_fs_create(server, key, vnode, name, mode, newfid,
 				    newstatus, newcb, false);
@@ -530,7 +530,7 @@ int afs_vnode_remove(struct afs_vnode *vnode, struct key *key, const char *name,
 		if (IS_ERR(server))
 			goto no_server;
 
-		_debug("USING SERVER: %08x\n", ntohl(server->addr.s_addr));
+		_debug("USING SERVER: %pIS\n", &server->addr.transport);
 
 		ret = afs_fs_remove(server, key, vnode, name, isdir,
 				    false);
@@ -592,7 +592,7 @@ int afs_vnode_link(struct afs_vnode *dvnode, struct afs_vnode *vnode,
 		if (IS_ERR(server))
 			goto no_server;
 
-		_debug("USING SERVER: %08x\n", ntohl(server->addr.s_addr));
+		_debug("USING SERVER: %pIS\n", &server->addr.transport);
 
 		ret = afs_fs_link(server, key, dvnode, vnode, name,
 				  false);
@@ -656,7 +656,7 @@ int afs_vnode_symlink(struct afs_vnode *vnode, struct key *key,
 		if (IS_ERR(server))
 			goto no_server;
 
-		_debug("USING SERVER: %08x\n", ntohl(server->addr.s_addr));
+		_debug("USING SERVER: %pIS\n", &server->addr.transport);
 
 		ret = afs_fs_symlink(server, key, vnode, name, content,
 				     newfid, newstatus, false);
@@ -726,7 +726,7 @@ int afs_vnode_rename(struct afs_vnode *orig_dvnode,
 		if (IS_ERR(server))
 			goto no_server;
 
-		_debug("USING SERVER: %08x\n", ntohl(server->addr.s_addr));
+		_debug("USING SERVER: %pIS\n", &server->addr.transport);
 
 		ret = afs_fs_rename(server, key, orig_dvnode, orig_name,
 				    new_dvnode, new_name, false);
@@ -792,7 +792,7 @@ int afs_vnode_store_data(struct afs_writeback *wb, pgoff_t first, pgoff_t last,
 		if (IS_ERR(server))
 			goto no_server;
 
-		_debug("USING SERVER: %08x\n", ntohl(server->addr.s_addr));
+		_debug("USING SERVER: %pIS\n", &server->addr.transport);
 
 		ret = afs_fs_store_data(server, wb, first, last, offset, to,
 					false);
@@ -845,7 +845,7 @@ int afs_vnode_setattr(struct afs_vnode *vnode, struct key *key,
 		if (IS_ERR(server))
 			goto no_server;
 
-		_debug("USING SERVER: %08x\n", ntohl(server->addr.s_addr));
+		_debug("USING SERVER: %pIS\n", &server->addr.transport);
 
 		ret = afs_fs_setattr(server, key, vnode, attr, false);
 
@@ -892,7 +892,7 @@ int afs_vnode_get_volume_status(struct afs_vnode *vnode, struct key *key,
 		if (IS_ERR(server))
 			goto no_server;
 
-		_debug("USING SERVER: %08x\n", ntohl(server->addr.s_addr));
+		_debug("USING SERVER: %pIS\n", &server->addr.transport);
 
 		ret = afs_fs_get_volume_status(server, key, vnode, vs, false);
 
@@ -931,7 +931,7 @@ int afs_vnode_set_lock(struct afs_vnode *vnode, struct key *key,
 		if (IS_ERR(server))
 			goto no_server;
 
-		_debug("USING SERVER: %08x\n", ntohl(server->addr.s_addr));
+		_debug("USING SERVER: %pIS\n", &server->addr.transport);
 
 		ret = afs_fs_set_lock(server, key, vnode, type, false);
 
@@ -969,7 +969,7 @@ int afs_vnode_extend_lock(struct afs_vnode *vnode, struct key *key)
 		if (IS_ERR(server))
 			goto no_server;
 
-		_debug("USING SERVER: %08x\n", ntohl(server->addr.s_addr));
+		_debug("USING SERVER: %pIS\n", &server->addr.transport);
 
 		ret = afs_fs_extend_lock(server, key, vnode, false);
 
@@ -1007,7 +1007,7 @@ int afs_vnode_release_lock(struct afs_vnode *vnode, struct key *key)
 		if (IS_ERR(server))
 			goto no_server;
 
-		_debug("USING SERVER: %08x\n", ntohl(server->addr.s_addr));
+		_debug("USING SERVER: %pIS\n", &server->addr.transport);
 
 		ret = afs_fs_release_lock(server, key, vnode, false);
 
diff --git a/fs/afs/volume.c b/fs/afs/volume.c
index ccac5b1e079d..52f0dc40732b 100644
--- a/fs/afs/volume.c
+++ b/fs/afs/volume.c
@@ -248,8 +248,8 @@ struct afs_server *afs_volume_pick_fileserver(struct afs_vnode *vnode)
 		case 0:
 			afs_get_server(server);
 			up_read(&volume->server_sem);
-			_leave(" = %p (picked %08x)",
-			       server, ntohl(server->addr.s_addr));
+			_leave(" = %p (picked %pIS)",
+			       server, &server->addr.transport);
 			return server;
 
 		case -ENETUNREACH:
@@ -303,9 +303,8 @@ int afs_volume_release_fileserver(struct afs_vnode *vnode,
 	struct afs_volume *volume = vnode->volume;
 	unsigned loop;
 
-	_enter("%s,%08x,%d",
-	       volume->vlocation->vldb.name, ntohl(server->addr.s_addr),
-	       result);
+	_enter("%s,%pIS,%d",
+	       volume->vlocation->vldb.name, &server->addr.transport, result);
 
 	switch (result) {
 		/* success */
-- 
cgit v1.2.3


From 3838d3ecdea496699a8c13c183d4df5dfe8e1a3e Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Thu, 2 Nov 2017 15:27:47 +0000
Subject: afs: Allow IPv6 address specification of VL servers

Allow VL server specifications to be given IPv6 addresses as well as IPv4
addresses, for example as:

	echo add foo.org 1111:2222:3333:0:4444:5555:6666:7777 >/proc/fs/afs/cells

Note that ':' is the expected separator for separating IPv4 addresses, but
if a ',' is detected or no '.' is detected in the string, the delimiter is
switched to ','.

This also works with DNS AFSDB or SRV record strings fetched by upcall from
userspace.

Signed-off-by: David Howells <dhowells@redhat.com>
---
 fs/afs/cell.c     | 31 +++++++++++++++++++++----------
 fs/afs/proc.c     |  2 +-
 fs/afs/rxrpc.c    | 11 +++++------
 fs/afs/server.c   |  5 -----
 fs/afs/vlclient.c | 13 +++++++++----
 5 files changed, 36 insertions(+), 26 deletions(-)

(limited to 'fs/afs')

diff --git a/fs/afs/cell.c b/fs/afs/cell.c
index 5523fa3c05d9..216821fd1a61 100644
--- a/fs/afs/cell.c
+++ b/fs/afs/cell.c
@@ -15,6 +15,7 @@
 #include <linux/ctype.h>
 #include <linux/dns_resolver.h>
 #include <linux/sched.h>
+#include <linux/inet.h>
 #include <keys/rxrpc-type.h>
 #include "internal.h"
 
@@ -86,28 +87,38 @@ static struct afs_cell *afs_cell_alloc(struct afs_net *net,
 		delimiter = ',';
 
 	} else {
+		if (strchr(vllist, ',') || !strchr(vllist, '.'))
+			delimiter = ',';
 		_vllist = vllist;
 	}
 
 	/* fill in the VL server list from the rest of the string */
 	do {
 		struct sockaddr_rxrpc *srx = &cell->vl_addrs[cell->vl_naddrs];
-		unsigned a, b, c, d;
+		const char *end;
 
 		next = strchr(_vllist, delimiter);
 		if (next)
 			*next++ = 0;
 
-		if (sscanf(_vllist, "%u.%u.%u.%u", &a, &b, &c, &d) != 4)
-			goto bad_address;
-
-		if (a > 255 || b > 255 || c > 255 || d > 255)
+		if (in4_pton(_vllist, -1, (u8 *)&srx->transport.sin6.sin6_addr.s6_addr32[3],
+			     -1, &end)) {
+			srx->transport_len		= sizeof(struct sockaddr_in6);
+			srx->transport.sin6.sin6_family	= AF_INET6;
+			srx->transport.sin6.sin6_flowinfo = 0;
+			srx->transport.sin6.sin6_scope_id = 0;
+			srx->transport.sin6.sin6_addr.s6_addr32[0] = 0;
+			srx->transport.sin6.sin6_addr.s6_addr32[1] = 0;
+			srx->transport.sin6.sin6_addr.s6_addr32[2] = htonl(0xffff);
+		} else if (in6_pton(_vllist, -1, srx->transport.sin6.sin6_addr.s6_addr,
+				    -1, &end)) {
+			srx->transport_len		= sizeof(struct sockaddr_in6);
+			srx->transport.sin6.sin6_family	= AF_INET6;
+			srx->transport.sin6.sin6_flowinfo = 0;
+			srx->transport.sin6.sin6_scope_id = 0;
+		} else {
 			goto bad_address;
-
-		srx->transport_len		= sizeof(struct sockaddr_in);
-		srx->transport.sin.sin_family	= AF_INET;
-		srx->transport.sin.sin_addr.s_addr =
-			htonl((a << 24) | (b << 16) | (c << 8) | d);
+		}
 
 	} while (cell->vl_naddrs++,
 		 cell->vl_naddrs < AFS_CELL_MAX_ADDRS && (_vllist = next));
diff --git a/fs/afs/proc.c b/fs/afs/proc.c
index f76018104ae0..d00d550ff2ef 100644
--- a/fs/afs/proc.c
+++ b/fs/afs/proc.c
@@ -662,7 +662,7 @@ static int afs_proc_cell_servers_show(struct seq_file *m, void *v)
 
 	/* display one cell per line on subsequent lines */
 	sprintf(ipaddr, "%pISp", &server->addr.transport);
-	seq_printf(m, "%3d %-15.15s %5d\n",
+	seq_printf(m, "%3d %-15s %5d\n",
 		   atomic_read(&server->usage), ipaddr, server->fs_state);
 
 	return 0;
diff --git a/fs/afs/rxrpc.c b/fs/afs/rxrpc.c
index c108effb54be..5d2c1a34ffd5 100644
--- a/fs/afs/rxrpc.c
+++ b/fs/afs/rxrpc.c
@@ -46,21 +46,20 @@ int afs_open_socket(struct afs_net *net)
 
 	_enter("");
 
-	ret = sock_create_kern(&init_net, AF_RXRPC, SOCK_DGRAM, PF_INET, &socket);
+	ret = sock_create_kern(&init_net, AF_RXRPC, SOCK_DGRAM, PF_INET6, &socket);
 	if (ret < 0)
 		goto error_1;
 
 	socket->sk->sk_allocation = GFP_NOFS;
 
 	/* bind the callback manager's address to make this a server socket */
+	memset(&srx, 0, sizeof(srx));
 	srx.srx_family			= AF_RXRPC;
 	srx.srx_service			= CM_SERVICE;
 	srx.transport_type		= SOCK_DGRAM;
-	srx.transport_len		= sizeof(srx.transport.sin);
-	srx.transport.sin.sin_family	= AF_INET;
-	srx.transport.sin.sin_port	= htons(AFS_CM_PORT);
-	memset(&srx.transport.sin.sin_addr, 0,
-	       sizeof(srx.transport.sin.sin_addr));
+	srx.transport_len		= sizeof(srx.transport.sin6);
+	srx.transport.sin6.sin6_family	= AF_INET6;
+	srx.transport.sin6.sin6_port	= htons(AFS_CM_PORT);
 
 	ret = kernel_bind(socket, (struct sockaddr *) &srx, sizeof(srx));
 	if (ret < 0)
diff --git a/fs/afs/server.c b/fs/afs/server.c
index 662f7fbf5d05..c63974f06385 100644
--- a/fs/afs/server.c
+++ b/fs/afs/server.c
@@ -200,11 +200,6 @@ struct afs_server *afs_find_server(struct afs_net *net,
 
 	_enter("{%d,%pIS}", srx->transport.family, &srx->transport);
 
-	if (srx->transport.family != AF_INET) {
-		WARN(true, "AFS does not yes support non-IPv4 addresses\n");
-		return NULL;
-	}
-
 	read_lock(&net->servers_lock);
 
 	p = net->servers.rb_node;
diff --git a/fs/afs/vlclient.c b/fs/afs/vlclient.c
index 48d137628d6a..276319aa86d8 100644
--- a/fs/afs/vlclient.c
+++ b/fs/afs/vlclient.c
@@ -88,10 +88,15 @@ static int afs_deliver_vl_get_entry_by_xxx(struct afs_call *call)
 		entry->servers[loop].srx_family = AF_RXRPC;
 		entry->servers[loop].srx_service = FS_SERVICE;
 		entry->servers[loop].transport_type = SOCK_DGRAM;
-		entry->servers[loop].transport_len = sizeof(entry->servers[loop].transport.sin);
-		entry->servers[loop].transport.sin.sin_family = AF_INET;
-		entry->servers[loop].transport.sin.sin_port = htons(AFS_FS_PORT);
-		entry->servers[loop].transport.sin.sin_addr.s_addr = *bp++;
+		entry->servers[loop].transport_len = sizeof(entry->servers[loop].transport.sin6);
+		entry->servers[loop].transport.sin6.sin6_family = AF_INET6;
+		entry->servers[loop].transport.sin6.sin6_port = htons(AFS_FS_PORT);
+		entry->servers[loop].transport.sin6.sin6_flowinfo = 0;
+		entry->servers[loop].transport.sin6.sin6_scope_id = 0;
+		entry->servers[loop].transport.sin6.sin6_addr.s6_addr32[0] = 0;
+		entry->servers[loop].transport.sin6.sin6_addr.s6_addr32[1] = 0;
+		entry->servers[loop].transport.sin6.sin6_addr.s6_addr32[2] = htonl(0xffff);
+		entry->servers[loop].transport.sin6.sin6_addr.s6_addr32[3] = *bp++;
 	}
 
 	bp += 8; /* partition IDs */
-- 
cgit v1.2.3


From f780c8ea0e73c0d371ffbebff91bb7555697219f Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Thu, 2 Nov 2017 15:27:48 +0000
Subject: afs: Consolidate abort_to_error translators

The AFS abort code space is shared across all services, so there's no need
for separate abort_to_error translators for each service.

Consolidate them into a single function and remove the function pointers
for them.

Signed-off-by: David Howells <dhowells@redhat.com>
---
 fs/afs/cmservice.c |  6 ------
 fs/afs/fsclient.c  | 18 ------------------
 fs/afs/internal.h  |  3 ---
 fs/afs/misc.c      | 38 ++++++++++++++++++++++++++++++++++----
 fs/afs/rxrpc.c     |  5 ++---
 fs/afs/vlclient.c  | 43 -------------------------------------------
 6 files changed, 36 insertions(+), 77 deletions(-)

(limited to 'fs/afs')

diff --git a/fs/afs/cmservice.c b/fs/afs/cmservice.c
index 9ad39f8a7e87..24032916e0ca 100644
--- a/fs/afs/cmservice.c
+++ b/fs/afs/cmservice.c
@@ -41,7 +41,6 @@ static CM_NAME(CallBack);
 static const struct afs_call_type afs_SRXCBCallBack = {
 	.name		= afs_SRXCBCallBack_name,
 	.deliver	= afs_deliver_cb_callback,
-	.abort_to_error	= afs_abort_to_error,
 	.destructor	= afs_cm_destructor,
 	.work		= SRXAFSCB_CallBack,
 };
@@ -53,7 +52,6 @@ static CM_NAME(InitCallBackState);
 static const struct afs_call_type afs_SRXCBInitCallBackState = {
 	.name		= afs_SRXCBInitCallBackState_name,
 	.deliver	= afs_deliver_cb_init_call_back_state,
-	.abort_to_error	= afs_abort_to_error,
 	.destructor	= afs_cm_destructor,
 	.work		= SRXAFSCB_InitCallBackState,
 };
@@ -65,7 +63,6 @@ static CM_NAME(InitCallBackState3);
 static const struct afs_call_type afs_SRXCBInitCallBackState3 = {
 	.name		= afs_SRXCBInitCallBackState3_name,
 	.deliver	= afs_deliver_cb_init_call_back_state3,
-	.abort_to_error	= afs_abort_to_error,
 	.destructor	= afs_cm_destructor,
 	.work		= SRXAFSCB_InitCallBackState,
 };
@@ -77,7 +74,6 @@ static CM_NAME(Probe);
 static const struct afs_call_type afs_SRXCBProbe = {
 	.name		= afs_SRXCBProbe_name,
 	.deliver	= afs_deliver_cb_probe,
-	.abort_to_error	= afs_abort_to_error,
 	.destructor	= afs_cm_destructor,
 	.work		= SRXAFSCB_Probe,
 };
@@ -89,7 +85,6 @@ static CM_NAME(ProbeUuid);
 static const struct afs_call_type afs_SRXCBProbeUuid = {
 	.name		= afs_SRXCBProbeUuid_name,
 	.deliver	= afs_deliver_cb_probe_uuid,
-	.abort_to_error	= afs_abort_to_error,
 	.destructor	= afs_cm_destructor,
 	.work		= SRXAFSCB_ProbeUuid,
 };
@@ -101,7 +96,6 @@ static CM_NAME(TellMeAboutYourself);
 static const struct afs_call_type afs_SRXCBTellMeAboutYourself = {
 	.name		= afs_SRXCBTellMeAboutYourself_name,
 	.deliver	= afs_deliver_cb_tell_me_about_yourself,
-	.abort_to_error	= afs_abort_to_error,
 	.destructor	= afs_cm_destructor,
 	.work		= SRXAFSCB_TellMeAboutYourself,
 };
diff --git a/fs/afs/fsclient.c b/fs/afs/fsclient.c
index bac2e8db6e75..7acfbc6e1f20 100644
--- a/fs/afs/fsclient.c
+++ b/fs/afs/fsclient.c
@@ -270,7 +270,6 @@ static int afs_deliver_fs_fetch_status(struct afs_call *call)
 static const struct afs_call_type afs_RXFSFetchStatus = {
 	.name		= "FS.FetchStatus",
 	.deliver	= afs_deliver_fs_fetch_status,
-	.abort_to_error	= afs_abort_to_error,
 	.destructor	= afs_flat_call_destructor,
 };
 
@@ -468,14 +467,12 @@ static void afs_fetch_data_destructor(struct afs_call *call)
 static const struct afs_call_type afs_RXFSFetchData = {
 	.name		= "FS.FetchData",
 	.deliver	= afs_deliver_fs_fetch_data,
-	.abort_to_error	= afs_abort_to_error,
 	.destructor	= afs_fetch_data_destructor,
 };
 
 static const struct afs_call_type afs_RXFSFetchData64 = {
 	.name		= "FS.FetchData64",
 	.deliver	= afs_deliver_fs_fetch_data,
-	.abort_to_error	= afs_abort_to_error,
 	.destructor	= afs_fetch_data_destructor,
 };
 
@@ -579,7 +576,6 @@ static int afs_deliver_fs_give_up_callbacks(struct afs_call *call)
 static const struct afs_call_type afs_RXFSGiveUpCallBacks = {
 	.name		= "FS.GiveUpCallBacks",
 	.deliver	= afs_deliver_fs_give_up_callbacks,
-	.abort_to_error	= afs_abort_to_error,
 	.destructor	= afs_flat_call_destructor,
 };
 
@@ -677,7 +673,6 @@ static int afs_deliver_fs_create_vnode(struct afs_call *call)
 static const struct afs_call_type afs_RXFSCreateXXXX = {
 	.name		= "FS.CreateXXXX",
 	.deliver	= afs_deliver_fs_create_vnode,
-	.abort_to_error	= afs_abort_to_error,
 	.destructor	= afs_flat_call_destructor,
 };
 
@@ -769,7 +764,6 @@ static int afs_deliver_fs_remove(struct afs_call *call)
 static const struct afs_call_type afs_RXFSRemoveXXXX = {
 	.name		= "FS.RemoveXXXX",
 	.deliver	= afs_deliver_fs_remove,
-	.abort_to_error	= afs_abort_to_error,
 	.destructor	= afs_flat_call_destructor,
 };
 
@@ -849,7 +843,6 @@ static int afs_deliver_fs_link(struct afs_call *call)
 static const struct afs_call_type afs_RXFSLink = {
 	.name		= "FS.Link",
 	.deliver	= afs_deliver_fs_link,
-	.abort_to_error	= afs_abort_to_error,
 	.destructor	= afs_flat_call_destructor,
 };
 
@@ -934,7 +927,6 @@ static int afs_deliver_fs_symlink(struct afs_call *call)
 static const struct afs_call_type afs_RXFSSymlink = {
 	.name		= "FS.Symlink",
 	.deliver	= afs_deliver_fs_symlink,
-	.abort_to_error	= afs_abort_to_error,
 	.destructor	= afs_flat_call_destructor,
 };
 
@@ -1038,7 +1030,6 @@ static int afs_deliver_fs_rename(struct afs_call *call)
 static const struct afs_call_type afs_RXFSRename = {
 	.name		= "FS.Rename",
 	.deliver	= afs_deliver_fs_rename,
-	.abort_to_error	= afs_abort_to_error,
 	.destructor	= afs_flat_call_destructor,
 };
 
@@ -1140,14 +1131,12 @@ static int afs_deliver_fs_store_data(struct afs_call *call)
 static const struct afs_call_type afs_RXFSStoreData = {
 	.name		= "FS.StoreData",
 	.deliver	= afs_deliver_fs_store_data,
-	.abort_to_error	= afs_abort_to_error,
 	.destructor	= afs_flat_call_destructor,
 };
 
 static const struct afs_call_type afs_RXFSStoreData64 = {
 	.name		= "FS.StoreData64",
 	.deliver	= afs_deliver_fs_store_data,
-	.abort_to_error	= afs_abort_to_error,
 	.destructor	= afs_flat_call_destructor,
 };
 
@@ -1318,21 +1307,18 @@ static int afs_deliver_fs_store_status(struct afs_call *call)
 static const struct afs_call_type afs_RXFSStoreStatus = {
 	.name		= "FS.StoreStatus",
 	.deliver	= afs_deliver_fs_store_status,
-	.abort_to_error	= afs_abort_to_error,
 	.destructor	= afs_flat_call_destructor,
 };
 
 static const struct afs_call_type afs_RXFSStoreData_as_Status = {
 	.name		= "FS.StoreData",
 	.deliver	= afs_deliver_fs_store_status,
-	.abort_to_error	= afs_abort_to_error,
 	.destructor	= afs_flat_call_destructor,
 };
 
 static const struct afs_call_type afs_RXFSStoreData64_as_Status = {
 	.name		= "FS.StoreData64",
 	.deliver	= afs_deliver_fs_store_status,
-	.abort_to_error	= afs_abort_to_error,
 	.destructor	= afs_flat_call_destructor,
 };
 
@@ -1659,7 +1645,6 @@ static void afs_get_volume_status_call_destructor(struct afs_call *call)
 static const struct afs_call_type afs_RXFSGetVolumeStatus = {
 	.name		= "FS.GetVolumeStatus",
 	.deliver	= afs_deliver_fs_get_volume_status,
-	.abort_to_error	= afs_abort_to_error,
 	.destructor	= afs_get_volume_status_call_destructor,
 };
 
@@ -1730,7 +1715,6 @@ static int afs_deliver_fs_xxxx_lock(struct afs_call *call)
 static const struct afs_call_type afs_RXFSSetLock = {
 	.name		= "FS.SetLock",
 	.deliver	= afs_deliver_fs_xxxx_lock,
-	.abort_to_error	= afs_abort_to_error,
 	.destructor	= afs_flat_call_destructor,
 };
 
@@ -1740,7 +1724,6 @@ static const struct afs_call_type afs_RXFSSetLock = {
 static const struct afs_call_type afs_RXFSExtendLock = {
 	.name		= "FS.ExtendLock",
 	.deliver	= afs_deliver_fs_xxxx_lock,
-	.abort_to_error	= afs_abort_to_error,
 	.destructor	= afs_flat_call_destructor,
 };
 
@@ -1750,7 +1733,6 @@ static const struct afs_call_type afs_RXFSExtendLock = {
 static const struct afs_call_type afs_RXFSReleaseLock = {
 	.name		= "FS.ReleaseLock",
 	.deliver	= afs_deliver_fs_xxxx_lock,
-	.abort_to_error	= afs_abort_to_error,
 	.destructor	= afs_flat_call_destructor,
 };
 
diff --git a/fs/afs/internal.h b/fs/afs/internal.h
index 4fe26bd7bfb0..ba31a386bb2d 100644
--- a/fs/afs/internal.h
+++ b/fs/afs/internal.h
@@ -120,9 +120,6 @@ struct afs_call_type {
 	 */
 	int (*deliver)(struct afs_call *call);
 
-	/* map an abort code to an error number */
-	int (*abort_to_error)(u32 abort_code);
-
 	/* clean up a call */
 	void (*destructor)(struct afs_call *call);
 
diff --git a/fs/afs/misc.c b/fs/afs/misc.c
index c05f1f1c0d41..700a5fa7f4ec 100644
--- a/fs/afs/misc.c
+++ b/fs/afs/misc.c
@@ -21,12 +21,12 @@
 int afs_abort_to_error(u32 abort_code)
 {
 	switch (abort_code) {
-	/* low errno codes inserted into abort namespace */
+		/* Low errno codes inserted into abort namespace */
 	case 13:		return -EACCES;
 	case 27:		return -EFBIG;
 	case 30:		return -EROFS;
 
-	/* VICE "special error" codes; 101 - 111 */
+		/* VICE "special error" codes; 101 - 111 */
 	case VSALVAGE:		return -EIO;
 	case VNOVNODE:		return -ENOENT;
 	case VNOVOL:		return -ENOMEDIUM;
@@ -39,7 +39,37 @@ int afs_abort_to_error(u32 abort_code)
 	case VBUSY:		return -EBUSY;
 	case VMOVED:		return -ENXIO;
 
-	/* Unified AFS error table; ET "uae" == 0x2f6df00 */
+		/* Volume Location server errors */
+	case AFSVL_IDEXIST:		return -EEXIST;
+	case AFSVL_IO:			return -EREMOTEIO;
+	case AFSVL_NAMEEXIST:		return -EEXIST;
+	case AFSVL_CREATEFAIL:		return -EREMOTEIO;
+	case AFSVL_NOENT:		return -ENOMEDIUM;
+	case AFSVL_EMPTY:		return -ENOMEDIUM;
+	case AFSVL_ENTDELETED:		return -ENOMEDIUM;
+	case AFSVL_BADNAME:		return -EINVAL;
+	case AFSVL_BADINDEX:		return -EINVAL;
+	case AFSVL_BADVOLTYPE:		return -EINVAL;
+	case AFSVL_BADSERVER:		return -EINVAL;
+	case AFSVL_BADPARTITION:	return -EINVAL;
+	case AFSVL_REPSFULL:		return -EFBIG;
+	case AFSVL_NOREPSERVER:		return -ENOENT;
+	case AFSVL_DUPREPSERVER:	return -EEXIST;
+	case AFSVL_RWNOTFOUND:		return -ENOENT;
+	case AFSVL_BADREFCOUNT:		return -EINVAL;
+	case AFSVL_SIZEEXCEEDED:	return -EINVAL;
+	case AFSVL_BADENTRY:		return -EINVAL;
+	case AFSVL_BADVOLIDBUMP:	return -EINVAL;
+	case AFSVL_IDALREADYHASHED:	return -EINVAL;
+	case AFSVL_ENTRYLOCKED:		return -EBUSY;
+	case AFSVL_BADVOLOPER:		return -EBADRQC;
+	case AFSVL_BADRELLOCKTYPE:	return -EINVAL;
+	case AFSVL_RERELEASE:		return -EREMOTEIO;
+	case AFSVL_BADSERVERFLAG:	return -EINVAL;
+	case AFSVL_PERM:		return -EACCES;
+	case AFSVL_NOMEM:		return -EREMOTEIO;
+
+		/* Unified AFS error table; ET "uae" == 0x2f6df00 */
 	case 0x2f6df00:		return -EPERM;
 	case 0x2f6df01:		return -ENOENT;
 	case 0x2f6df04:		return -EIO;
@@ -68,7 +98,7 @@ int afs_abort_to_error(u32 abort_code)
 	case 0x2f6df6c:		return -ETIMEDOUT;
 	case 0x2f6df78:		return -EDQUOT;
 
-	/* RXKAD abort codes; from include/rxrpc/packet.h.  ET "RXK" == 0x1260B00 */
+		/* RXKAD abort codes; from include/rxrpc/packet.h.  ET "RXK" == 0x1260B00 */
 	case RXKADINCONSISTENCY: return -EPROTO;
 	case RXKADPACKETSHORT:	return -EPROTO;
 	case RXKADLEVELFAIL:	return -EKEYREJECTED;
diff --git a/fs/afs/rxrpc.c b/fs/afs/rxrpc.c
index 5d2c1a34ffd5..5f06cf720340 100644
--- a/fs/afs/rxrpc.c
+++ b/fs/afs/rxrpc.c
@@ -31,7 +31,6 @@ static int afs_deliver_cm_op_id(struct afs_call *);
 static const struct afs_call_type afs_RXCMxxxx = {
 	.name		= "CB.xxxx",
 	.deliver	= afs_deliver_cm_op_id,
-	.abort_to_error	= afs_abort_to_error,
 };
 
 /*
@@ -418,7 +417,7 @@ error_do_abort:
 		rxrpc_kernel_recv_data(call->net->socket, rxcall, NULL,
 				       0, &offset, false, &abort_code,
 				       &call->service_id);
-		ret = call->type->abort_to_error(abort_code);
+		ret = afs_abort_to_error(abort_code);
 	}
 error_kill_call:
 	afs_put_call(call);
@@ -876,7 +875,7 @@ int afs_extract_data(struct afs_call *call, void *buf, size_t count,
 	}
 
 	if (ret == -ECONNABORTED)
-		call->error = call->type->abort_to_error(call->abort_code);
+		call->error = afs_abort_to_error(call->abort_code);
 	else
 		call->error = ret;
 	call->state = AFS_CALL_COMPLETE;
diff --git a/fs/afs/vlclient.c b/fs/afs/vlclient.c
index 276319aa86d8..66e62be07b63 100644
--- a/fs/afs/vlclient.c
+++ b/fs/afs/vlclient.c
@@ -15,47 +15,6 @@
 #include "afs_fs.h"
 #include "internal.h"
 
-/*
- * map volume locator abort codes to error codes
- */
-static int afs_vl_abort_to_error(u32 abort_code)
-{
-	_enter("%u", abort_code);
-
-	switch (abort_code) {
-	case AFSVL_IDEXIST:		return -EEXIST;
-	case AFSVL_IO:			return -EREMOTEIO;
-	case AFSVL_NAMEEXIST:		return -EEXIST;
-	case AFSVL_CREATEFAIL:		return -EREMOTEIO;
-	case AFSVL_NOENT:		return -ENOMEDIUM;
-	case AFSVL_EMPTY:		return -ENOMEDIUM;
-	case AFSVL_ENTDELETED:		return -ENOMEDIUM;
-	case AFSVL_BADNAME:		return -EINVAL;
-	case AFSVL_BADINDEX:		return -EINVAL;
-	case AFSVL_BADVOLTYPE:		return -EINVAL;
-	case AFSVL_BADSERVER:		return -EINVAL;
-	case AFSVL_BADPARTITION:	return -EINVAL;
-	case AFSVL_REPSFULL:		return -EFBIG;
-	case AFSVL_NOREPSERVER:		return -ENOENT;
-	case AFSVL_DUPREPSERVER:	return -EEXIST;
-	case AFSVL_RWNOTFOUND:		return -ENOENT;
-	case AFSVL_BADREFCOUNT:		return -EINVAL;
-	case AFSVL_SIZEEXCEEDED:	return -EINVAL;
-	case AFSVL_BADENTRY:		return -EINVAL;
-	case AFSVL_BADVOLIDBUMP:	return -EINVAL;
-	case AFSVL_IDALREADYHASHED:	return -EINVAL;
-	case AFSVL_ENTRYLOCKED:		return -EBUSY;
-	case AFSVL_BADVOLOPER:		return -EBADRQC;
-	case AFSVL_BADRELLOCKTYPE:	return -EINVAL;
-	case AFSVL_RERELEASE:		return -EREMOTEIO;
-	case AFSVL_BADSERVERFLAG:	return -EINVAL;
-	case AFSVL_PERM:		return -EACCES;
-	case AFSVL_NOMEM:		return -EREMOTEIO;
-	default:
-		return afs_abort_to_error(abort_code);
-	}
-}
-
 /*
  * deliver reply data to a VL.GetEntryByXXX call
  */
@@ -139,7 +98,6 @@ static int afs_deliver_vl_get_entry_by_xxx(struct afs_call *call)
 static const struct afs_call_type afs_RXVLGetEntryByName = {
 	.name		= "VL.GetEntryByName",
 	.deliver	= afs_deliver_vl_get_entry_by_xxx,
-	.abort_to_error	= afs_vl_abort_to_error,
 	.destructor	= afs_flat_call_destructor,
 };
 
@@ -149,7 +107,6 @@ static const struct afs_call_type afs_RXVLGetEntryByName = {
 static const struct afs_call_type afs_RXVLGetEntryById = {
 	.name		= "VL.GetEntryById",
 	.deliver	= afs_deliver_vl_get_entry_by_xxx,
-	.abort_to_error	= afs_vl_abort_to_error,
 	.destructor	= afs_flat_call_destructor,
 };
 
-- 
cgit v1.2.3


From 97e3043ad82c93b7c2e3c4bfc518f7401f175821 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Thu, 2 Nov 2017 15:27:48 +0000
Subject: afs: Condense afs_call's reply{,2,3,4} into an array

Condense struct afs_call's reply anchor members - reply{,2,3,4} - into an
array.

Signed-off-by: David Howells <dhowells@redhat.com>
---
 fs/afs/file.c     |   2 +-
 fs/afs/fsclient.c | 136 +++++++++++++++++++++++++++---------------------------
 fs/afs/internal.h |   5 +-
 fs/afs/rxrpc.c    |   2 +-
 fs/afs/vlclient.c |   6 +--
 5 files changed, 74 insertions(+), 77 deletions(-)

(limited to 'fs/afs')

diff --git a/fs/afs/file.c b/fs/afs/file.c
index 510cba15fa56..08f9f0c5dfac 100644
--- a/fs/afs/file.c
+++ b/fs/afs/file.c
@@ -281,7 +281,7 @@ static int afs_readpage(struct file *file, struct page *page)
 static void afs_readpages_page_done(struct afs_call *call, struct afs_read *req)
 {
 #ifdef CONFIG_AFS_FSCACHE
-	struct afs_vnode *vnode = call->reply;
+	struct afs_vnode *vnode = call->reply[0];
 #endif
 	struct page *page = req->pages[req->index];
 
diff --git a/fs/afs/fsclient.c b/fs/afs/fsclient.c
index 7acfbc6e1f20..36f58adde030 100644
--- a/fs/afs/fsclient.c
+++ b/fs/afs/fsclient.c
@@ -243,7 +243,7 @@ static void xdr_decode_AFSFetchVolumeStatus(const __be32 **_bp,
  */
 static int afs_deliver_fs_fetch_status(struct afs_call *call)
 {
-	struct afs_vnode *vnode = call->reply;
+	struct afs_vnode *vnode = call->reply[0];
 	const __be32 *bp;
 	int ret;
 
@@ -257,8 +257,8 @@ static int afs_deliver_fs_fetch_status(struct afs_call *call)
 	bp = call->buffer;
 	xdr_decode_AFSFetchStatus(&bp, &vnode->status, vnode, NULL);
 	xdr_decode_AFSCallBack(&bp, vnode);
-	if (call->reply2)
-		xdr_decode_AFSVolSync(&bp, call->reply2);
+	if (call->reply[1])
+		xdr_decode_AFSVolSync(&bp, call->reply[1]);
 
 	_leave(" = 0 [done]");
 	return 0;
@@ -294,8 +294,8 @@ int afs_fs_fetch_file_status(struct afs_server *server,
 		return -ENOMEM;
 
 	call->key = key;
-	call->reply = vnode;
-	call->reply2 = volsync;
+	call->reply[0] = vnode;
+	call->reply[1] = volsync;
 
 	/* marshall the parameters */
 	bp = call->request;
@@ -312,8 +312,8 @@ int afs_fs_fetch_file_status(struct afs_server *server,
  */
 static int afs_deliver_fs_fetch_data(struct afs_call *call)
 {
-	struct afs_vnode *vnode = call->reply;
-	struct afs_read *req = call->reply3;
+	struct afs_vnode *vnode = call->reply[0];
+	struct afs_read *req = call->reply[2];
 	const __be32 *bp;
 	unsigned int size;
 	void *buffer;
@@ -430,8 +430,8 @@ static int afs_deliver_fs_fetch_data(struct afs_call *call)
 		bp = call->buffer;
 		xdr_decode_AFSFetchStatus(&bp, &vnode->status, vnode, NULL);
 		xdr_decode_AFSCallBack(&bp, vnode);
-		if (call->reply2)
-			xdr_decode_AFSVolSync(&bp, call->reply2);
+		if (call->reply[1])
+			xdr_decode_AFSVolSync(&bp, call->reply[1]);
 
 		call->offset = 0;
 		call->unmarshall++;
@@ -455,7 +455,7 @@ static int afs_deliver_fs_fetch_data(struct afs_call *call)
 
 static void afs_fetch_data_destructor(struct afs_call *call)
 {
-	struct afs_read *req = call->reply3;
+	struct afs_read *req = call->reply[2];
 
 	afs_put_read(req);
 	afs_flat_call_destructor(call);
@@ -496,9 +496,9 @@ static int afs_fs_fetch_data64(struct afs_server *server,
 		return -ENOMEM;
 
 	call->key = key;
-	call->reply = vnode;
-	call->reply2 = NULL; /* volsync */
-	call->reply3 = req;
+	call->reply[0] = vnode;
+	call->reply[1] = NULL; /* volsync */
+	call->reply[2] = req;
 	call->operation_ID = FSFETCHDATA64;
 
 	/* marshall the parameters */
@@ -541,9 +541,9 @@ int afs_fs_fetch_data(struct afs_server *server,
 		return -ENOMEM;
 
 	call->key = key;
-	call->reply = vnode;
-	call->reply2 = NULL; /* volsync */
-	call->reply3 = req;
+	call->reply[0] = vnode;
+	call->reply[1] = NULL; /* volsync */
+	call->reply[2] = req;
 	call->operation_ID = FSFETCHDATA;
 
 	/* marshall the parameters */
@@ -645,7 +645,7 @@ int afs_fs_give_up_callbacks(struct afs_net *net,
  */
 static int afs_deliver_fs_create_vnode(struct afs_call *call)
 {
-	struct afs_vnode *vnode = call->reply;
+	struct afs_vnode *vnode = call->reply[0];
 	const __be32 *bp;
 	int ret;
 
@@ -657,11 +657,11 @@ static int afs_deliver_fs_create_vnode(struct afs_call *call)
 
 	/* unmarshall the reply once we've received all of it */
 	bp = call->buffer;
-	xdr_decode_AFSFid(&bp, call->reply2);
-	xdr_decode_AFSFetchStatus(&bp, call->reply3, NULL, NULL);
+	xdr_decode_AFSFid(&bp, call->reply[1]);
+	xdr_decode_AFSFetchStatus(&bp, call->reply[2], NULL, NULL);
 	xdr_decode_AFSFetchStatus(&bp, &vnode->status, vnode, NULL);
-	xdr_decode_AFSCallBack_raw(&bp, call->reply4);
-	/* xdr_decode_AFSVolSync(&bp, call->replyX); */
+	xdr_decode_AFSCallBack_raw(&bp, call->reply[3]);
+	/* xdr_decode_AFSVolSync(&bp, call->reply[X]); */
 
 	_leave(" = 0 [done]");
 	return 0;
@@ -706,10 +706,10 @@ int afs_fs_create(struct afs_server *server,
 		return -ENOMEM;
 
 	call->key = key;
-	call->reply = vnode;
-	call->reply2 = newfid;
-	call->reply3 = newstatus;
-	call->reply4 = newcb;
+	call->reply[0] = vnode;
+	call->reply[1] = newfid;
+	call->reply[2] = newstatus;
+	call->reply[3] = newcb;
 
 	/* marshall the parameters */
 	bp = call->request;
@@ -739,7 +739,7 @@ int afs_fs_create(struct afs_server *server,
  */
 static int afs_deliver_fs_remove(struct afs_call *call)
 {
-	struct afs_vnode *vnode = call->reply;
+	struct afs_vnode *vnode = call->reply[0];
 	const __be32 *bp;
 	int ret;
 
@@ -752,7 +752,7 @@ static int afs_deliver_fs_remove(struct afs_call *call)
 	/* unmarshall the reply once we've received all of it */
 	bp = call->buffer;
 	xdr_decode_AFSFetchStatus(&bp, &vnode->status, vnode, NULL);
-	/* xdr_decode_AFSVolSync(&bp, call->replyX); */
+	/* xdr_decode_AFSVolSync(&bp, call->reply[X]); */
 
 	_leave(" = 0 [done]");
 	return 0;
@@ -793,7 +793,7 @@ int afs_fs_remove(struct afs_server *server,
 		return -ENOMEM;
 
 	call->key = key;
-	call->reply = vnode;
+	call->reply[0] = vnode;
 
 	/* marshall the parameters */
 	bp = call->request;
@@ -817,7 +817,7 @@ int afs_fs_remove(struct afs_server *server,
  */
 static int afs_deliver_fs_link(struct afs_call *call)
 {
-	struct afs_vnode *dvnode = call->reply, *vnode = call->reply2;
+	struct afs_vnode *dvnode = call->reply[0], *vnode = call->reply[1];
 	const __be32 *bp;
 	int ret;
 
@@ -831,7 +831,7 @@ static int afs_deliver_fs_link(struct afs_call *call)
 	bp = call->buffer;
 	xdr_decode_AFSFetchStatus(&bp, &vnode->status, vnode, NULL);
 	xdr_decode_AFSFetchStatus(&bp, &dvnode->status, dvnode, NULL);
-	/* xdr_decode_AFSVolSync(&bp, call->replyX); */
+	/* xdr_decode_AFSVolSync(&bp, call->reply[X]); */
 
 	_leave(" = 0 [done]");
 	return 0;
@@ -872,8 +872,8 @@ int afs_fs_link(struct afs_server *server,
 		return -ENOMEM;
 
 	call->key = key;
-	call->reply = dvnode;
-	call->reply2 = vnode;
+	call->reply[0] = dvnode;
+	call->reply[1] = vnode;
 
 	/* marshall the parameters */
 	bp = call->request;
@@ -900,7 +900,7 @@ int afs_fs_link(struct afs_server *server,
  */
 static int afs_deliver_fs_symlink(struct afs_call *call)
 {
-	struct afs_vnode *vnode = call->reply;
+	struct afs_vnode *vnode = call->reply[0];
 	const __be32 *bp;
 	int ret;
 
@@ -912,10 +912,10 @@ static int afs_deliver_fs_symlink(struct afs_call *call)
 
 	/* unmarshall the reply once we've received all of it */
 	bp = call->buffer;
-	xdr_decode_AFSFid(&bp, call->reply2);
-	xdr_decode_AFSFetchStatus(&bp, call->reply3, NULL, NULL);
+	xdr_decode_AFSFid(&bp, call->reply[1]);
+	xdr_decode_AFSFetchStatus(&bp, call->reply[2], NULL, NULL);
 	xdr_decode_AFSFetchStatus(&bp, &vnode->status, vnode, NULL);
-	/* xdr_decode_AFSVolSync(&bp, call->replyX); */
+	/* xdr_decode_AFSVolSync(&bp, call->reply[X]); */
 
 	_leave(" = 0 [done]");
 	return 0;
@@ -963,9 +963,9 @@ int afs_fs_symlink(struct afs_server *server,
 		return -ENOMEM;
 
 	call->key = key;
-	call->reply = vnode;
-	call->reply2 = newfid;
-	call->reply3 = newstatus;
+	call->reply[0] = vnode;
+	call->reply[1] = newfid;
+	call->reply[2] = newstatus;
 
 	/* marshall the parameters */
 	bp = call->request;
@@ -1002,7 +1002,7 @@ int afs_fs_symlink(struct afs_server *server,
  */
 static int afs_deliver_fs_rename(struct afs_call *call)
 {
-	struct afs_vnode *orig_dvnode = call->reply, *new_dvnode = call->reply2;
+	struct afs_vnode *orig_dvnode = call->reply[0], *new_dvnode = call->reply[1];
 	const __be32 *bp;
 	int ret;
 
@@ -1018,7 +1018,7 @@ static int afs_deliver_fs_rename(struct afs_call *call)
 	if (new_dvnode != orig_dvnode)
 		xdr_decode_AFSFetchStatus(&bp, &new_dvnode->status, new_dvnode,
 					  NULL);
-	/* xdr_decode_AFSVolSync(&bp, call->replyX); */
+	/* xdr_decode_AFSVolSync(&bp, call->reply[X]); */
 
 	_leave(" = 0 [done]");
 	return 0;
@@ -1067,8 +1067,8 @@ int afs_fs_rename(struct afs_server *server,
 		return -ENOMEM;
 
 	call->key = key;
-	call->reply = orig_dvnode;
-	call->reply2 = new_dvnode;
+	call->reply[0] = orig_dvnode;
+	call->reply[1] = new_dvnode;
 
 	/* marshall the parameters */
 	bp = call->request;
@@ -1103,7 +1103,7 @@ int afs_fs_rename(struct afs_server *server,
  */
 static int afs_deliver_fs_store_data(struct afs_call *call)
 {
-	struct afs_vnode *vnode = call->reply;
+	struct afs_vnode *vnode = call->reply[0];
 	const __be32 *bp;
 	int ret;
 
@@ -1117,7 +1117,7 @@ static int afs_deliver_fs_store_data(struct afs_call *call)
 	bp = call->buffer;
 	xdr_decode_AFSFetchStatus(&bp, &vnode->status, vnode,
 				  &call->store_version);
-	/* xdr_decode_AFSVolSync(&bp, call->replyX); */
+	/* xdr_decode_AFSVolSync(&bp, call->reply[X]); */
 
 	afs_pages_written_back(vnode, call);
 
@@ -1166,7 +1166,7 @@ static int afs_fs_store_data64(struct afs_server *server,
 
 	call->wb = wb;
 	call->key = wb->key;
-	call->reply = vnode;
+	call->reply[0] = vnode;
 	call->mapping = vnode->vfs_inode.i_mapping;
 	call->first = first;
 	call->last = last;
@@ -1242,7 +1242,7 @@ int afs_fs_store_data(struct afs_server *server, struct afs_writeback *wb,
 
 	call->wb = wb;
 	call->key = wb->key;
-	call->reply = vnode;
+	call->reply[0] = vnode;
 	call->mapping = vnode->vfs_inode.i_mapping;
 	call->first = first;
 	call->last = last;
@@ -1278,7 +1278,7 @@ int afs_fs_store_data(struct afs_server *server, struct afs_writeback *wb,
 static int afs_deliver_fs_store_status(struct afs_call *call)
 {
 	afs_dataversion_t *store_version;
-	struct afs_vnode *vnode = call->reply;
+	struct afs_vnode *vnode = call->reply[0];
 	const __be32 *bp;
 	int ret;
 
@@ -1295,7 +1295,7 @@ static int afs_deliver_fs_store_status(struct afs_call *call)
 
 	bp = call->buffer;
 	xdr_decode_AFSFetchStatus(&bp, &vnode->status, vnode, store_version);
-	/* xdr_decode_AFSVolSync(&bp, call->replyX); */
+	/* xdr_decode_AFSVolSync(&bp, call->reply[X]); */
 
 	_leave(" = 0 [done]");
 	return 0;
@@ -1346,7 +1346,7 @@ static int afs_fs_setattr_size64(struct afs_server *server, struct key *key,
 		return -ENOMEM;
 
 	call->key = key;
-	call->reply = vnode;
+	call->reply[0] = vnode;
 	call->store_version = vnode->status.data_version + 1;
 	call->operation_ID = FSSTOREDATA;
 
@@ -1396,7 +1396,7 @@ static int afs_fs_setattr_size(struct afs_server *server, struct key *key,
 		return -ENOMEM;
 
 	call->key = key;
-	call->reply = vnode;
+	call->reply[0] = vnode;
 	call->store_version = vnode->status.data_version + 1;
 	call->operation_ID = FSSTOREDATA;
 
@@ -1442,7 +1442,7 @@ int afs_fs_setattr(struct afs_server *server, struct key *key,
 		return -ENOMEM;
 
 	call->key = key;
-	call->reply = vnode;
+	call->reply[0] = vnode;
 	call->operation_ID = FSSTORESTATUS;
 
 	/* marshall the parameters */
@@ -1482,7 +1482,7 @@ static int afs_deliver_fs_get_volume_status(struct afs_call *call)
 			return ret;
 
 		bp = call->buffer;
-		xdr_decode_AFSFetchVolumeStatus(&bp, call->reply2);
+		xdr_decode_AFSFetchVolumeStatus(&bp, call->reply[1]);
 		call->offset = 0;
 		call->unmarshall++;
 
@@ -1503,13 +1503,13 @@ static int afs_deliver_fs_get_volume_status(struct afs_call *call)
 	case 3:
 		_debug("extract volname");
 		if (call->count > 0) {
-			ret = afs_extract_data(call, call->reply3,
+			ret = afs_extract_data(call, call->reply[2],
 					       call->count, true);
 			if (ret < 0)
 				return ret;
 		}
 
-		p = call->reply3;
+		p = call->reply[2];
 		p[call->count] = 0;
 		_debug("volname '%s'", p);
 
@@ -1550,13 +1550,13 @@ static int afs_deliver_fs_get_volume_status(struct afs_call *call)
 	case 6:
 		_debug("extract offline");
 		if (call->count > 0) {
-			ret = afs_extract_data(call, call->reply3,
+			ret = afs_extract_data(call, call->reply[2],
 					       call->count, true);
 			if (ret < 0)
 				return ret;
 		}
 
-		p = call->reply3;
+		p = call->reply[2];
 		p[call->count] = 0;
 		_debug("offline '%s'", p);
 
@@ -1597,13 +1597,13 @@ static int afs_deliver_fs_get_volume_status(struct afs_call *call)
 	case 9:
 		_debug("extract motd");
 		if (call->count > 0) {
-			ret = afs_extract_data(call, call->reply3,
+			ret = afs_extract_data(call, call->reply[2],
 					       call->count, true);
 			if (ret < 0)
 				return ret;
 		}
 
-		p = call->reply3;
+		p = call->reply[2];
 		p[call->count] = 0;
 		_debug("motd '%s'", p);
 
@@ -1634,8 +1634,8 @@ static int afs_deliver_fs_get_volume_status(struct afs_call *call)
  */
 static void afs_get_volume_status_call_destructor(struct afs_call *call)
 {
-	kfree(call->reply3);
-	call->reply3 = NULL;
+	kfree(call->reply[2]);
+	call->reply[2] = NULL;
 	afs_flat_call_destructor(call);
 }
 
@@ -1675,9 +1675,9 @@ int afs_fs_get_volume_status(struct afs_server *server,
 	}
 
 	call->key = key;
-	call->reply = vnode;
-	call->reply2 = vs;
-	call->reply3 = tmpbuf;
+	call->reply[0] = vnode;
+	call->reply[1] = vs;
+	call->reply[2] = tmpbuf;
 
 	/* marshall the parameters */
 	bp = call->request;
@@ -1703,7 +1703,7 @@ static int afs_deliver_fs_xxxx_lock(struct afs_call *call)
 
 	/* unmarshall the reply once we've received all of it */
 	bp = call->buffer;
-	/* xdr_decode_AFSVolSync(&bp, call->replyX); */
+	/* xdr_decode_AFSVolSync(&bp, call->reply[X]); */
 
 	_leave(" = 0 [done]");
 	return 0;
@@ -1756,7 +1756,7 @@ int afs_fs_set_lock(struct afs_server *server,
 		return -ENOMEM;
 
 	call->key = key;
-	call->reply = vnode;
+	call->reply[0] = vnode;
 
 	/* marshall the parameters */
 	bp = call->request;
@@ -1788,7 +1788,7 @@ int afs_fs_extend_lock(struct afs_server *server,
 		return -ENOMEM;
 
 	call->key = key;
-	call->reply = vnode;
+	call->reply[0] = vnode;
 
 	/* marshall the parameters */
 	bp = call->request;
@@ -1819,7 +1819,7 @@ int afs_fs_release_lock(struct afs_server *server,
 		return -ENOMEM;
 
 	call->key = key;
-	call->reply = vnode;
+	call->reply[0] = vnode;
 
 	/* marshall the parameters */
 	bp = call->request;
diff --git a/fs/afs/internal.h b/fs/afs/internal.h
index ba31a386bb2d..77a83e1bf56f 100644
--- a/fs/afs/internal.h
+++ b/fs/afs/internal.h
@@ -81,10 +81,7 @@ struct afs_call {
 	struct address_space	*mapping;	/* page set */
 	struct afs_writeback	*wb;		/* writeback being performed */
 	void			*buffer;	/* reply receive buffer */
-	void			*reply;		/* reply buffer (first part) */
-	void			*reply2;	/* reply buffer (second part) */
-	void			*reply3;	/* reply buffer (third part) */
-	void			*reply4;	/* reply buffer (fourth part) */
+	void			*reply[4];	/* Where to put the reply */
 	pgoff_t			first;		/* first page in mapping to deal with */
 	pgoff_t			last;		/* last page in mapping to deal with */
 	size_t			offset;		/* offset into received data store */
diff --git a/fs/afs/rxrpc.c b/fs/afs/rxrpc.c
index 5f06cf720340..fc49193e12c4 100644
--- a/fs/afs/rxrpc.c
+++ b/fs/afs/rxrpc.c
@@ -639,7 +639,7 @@ static void afs_process_async_call(struct work_struct *work)
 	}
 
 	if (call->state == AFS_CALL_COMPLETE) {
-		call->reply = NULL;
+		call->reply[0] = NULL;
 
 		/* We have two refs to release - one from the alloc and one
 		 * queued with the work item - and we can't just deallocate the
diff --git a/fs/afs/vlclient.c b/fs/afs/vlclient.c
index 66e62be07b63..aa79fe3f168b 100644
--- a/fs/afs/vlclient.c
+++ b/fs/afs/vlclient.c
@@ -32,7 +32,7 @@ static int afs_deliver_vl_get_entry_by_xxx(struct afs_call *call)
 		return ret;
 
 	/* unmarshall the reply once we've received all of it */
-	entry = call->reply;
+	entry = call->reply[0];
 	bp = call->buffer;
 
 	for (loop = 0; loop < 64; loop++)
@@ -135,7 +135,7 @@ int afs_vl_get_entry_by_name(struct afs_net *net,
 		return -ENOMEM;
 
 	call->key = key;
-	call->reply = entry;
+	call->reply[0] = entry;
 
 	/* marshall the parameters */
 	bp = call->request;
@@ -170,7 +170,7 @@ int afs_vl_get_entry_by_id(struct afs_net *net,
 		return -ENOMEM;
 
 	call->key = key;
-	call->reply = entry;
+	call->reply[0] = entry;
 
 	/* marshall the parameters */
 	bp = call->request;
-- 
cgit v1.2.3


From 33cd7f2b76717ac8dda566d8b4f518e803ae2618 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Thu, 2 Nov 2017 15:27:48 +0000
Subject: afs: Potentially return call->reply[0] from afs_make_call()

If call->ret_reply0 is set, return call->reply[0] on success.  Change the
return type of afs_make_call() to long so that this can be passed back
without bit loss and then cast to a pointer if required.

Signed-off-by: David Howells <dhowells@redhat.com>
---
 fs/afs/internal.h |  3 ++-
 fs/afs/rxrpc.c    | 26 ++++++++++++++++----------
 2 files changed, 18 insertions(+), 11 deletions(-)

(limited to 'fs/afs')

diff --git a/fs/afs/internal.h b/fs/afs/internal.h
index 77a83e1bf56f..94b676c433c7 100644
--- a/fs/afs/internal.h
+++ b/fs/afs/internal.h
@@ -101,6 +101,7 @@ struct afs_call {
 	bool			send_pages;	/* T if data from mapping should be sent */
 	bool			need_attention;	/* T if RxRPC poked us */
 	bool			async;		/* T if asynchronous */
+	bool			ret_reply0;	/* T if should return reply[0] on success */
 	bool			upgrade;	/* T to request service upgrade */
 	u16			service_id;	/* RxRPC service ID to call */
 	u32			operation_ID;	/* operation ID for an incoming call */
@@ -647,7 +648,7 @@ extern void __net_exit afs_close_socket(struct afs_net *);
 extern void afs_charge_preallocation(struct work_struct *);
 extern void afs_put_call(struct afs_call *);
 extern int afs_queue_call_work(struct afs_call *);
-extern int afs_make_call(struct sockaddr_rxrpc *, struct afs_call *, gfp_t, bool);
+extern long afs_make_call(struct sockaddr_rxrpc *, struct afs_call *, gfp_t, bool);
 extern struct afs_call *afs_alloc_flat_call(struct afs_net *,
 					    const struct afs_call_type *,
 					    size_t, size_t);
diff --git a/fs/afs/rxrpc.c b/fs/afs/rxrpc.c
index fc49193e12c4..d57f09b5d27b 100644
--- a/fs/afs/rxrpc.c
+++ b/fs/afs/rxrpc.c
@@ -20,7 +20,7 @@
 struct workqueue_struct *afs_async_calls;
 
 static void afs_wake_up_call_waiter(struct sock *, struct rxrpc_call *, unsigned long);
-static int afs_wait_for_call_to_complete(struct afs_call *);
+static long afs_wait_for_call_to_complete(struct afs_call *);
 static void afs_wake_up_async_call(struct sock *, struct rxrpc_call *, unsigned long);
 static void afs_process_async_call(struct work_struct *);
 static void afs_rx_new_call(struct sock *, struct rxrpc_call *, unsigned long);
@@ -320,8 +320,8 @@ static int afs_send_pages(struct afs_call *call, struct msghdr *msg)
 /*
  * initiate a call
  */
-int afs_make_call(struct sockaddr_rxrpc *srx, struct afs_call *call,
-		  gfp_t gfp, bool async)
+long afs_make_call(struct sockaddr_rxrpc *srx, struct afs_call *call,
+		   gfp_t gfp, bool async)
 {
 	struct rxrpc_call *rxcall;
 	struct msghdr msg;
@@ -415,9 +415,9 @@ error_do_abort:
 		abort_code = 0;
 		offset = 0;
 		rxrpc_kernel_recv_data(call->net->socket, rxcall, NULL,
-				       0, &offset, false, &abort_code,
+				       0, &offset, false, &call->abort_code,
 				       &call->service_id);
-		ret = afs_abort_to_error(abort_code);
+		ret = afs_abort_to_error(call->abort_code);
 	}
 error_kill_call:
 	afs_put_call(call);
@@ -468,7 +468,7 @@ static void afs_deliver_to_call(struct afs_call *call)
 		case -EAGAIN:
 			goto out;
 		case -ECONNABORTED:
-			goto call_complete;
+			goto save_error;
 		case -ENOTCONN:
 			abort_code = RX_CALL_DEAD;
 			rxrpc_kernel_abort_call(call->net->socket, call->rxcall,
@@ -501,7 +501,6 @@ out:
 
 save_error:
 	call->error = ret;
-call_complete:
 	call->state = AFS_CALL_COMPLETE;
 	goto done;
 }
@@ -509,10 +508,10 @@ call_complete:
 /*
  * wait synchronously for a call to complete
  */
-static int afs_wait_for_call_to_complete(struct afs_call *call)
+static long afs_wait_for_call_to_complete(struct afs_call *call)
 {
 	signed long rtt2, timeout;
-	int ret;
+	long ret;
 	u64 rtt;
 	u32 life, last_life;
 
@@ -567,9 +566,16 @@ static int afs_wait_for_call_to_complete(struct afs_call *call)
 	}
 
 	ret = call->error;
+	if (ret < 0) {
+		ret = afs_abort_to_error(call->abort_code);
+	} else if (ret == 0 && call->ret_reply0) {
+		ret = (long)call->reply[0];
+		call->reply[0] = NULL;
+	}
+
 	_debug("call complete");
 	afs_put_call(call);
-	_leave(" = %d", ret);
+	_leave(" = %p", (void *)ret);
 	return ret;
 }
 
-- 
cgit v1.2.3


From f4b3526d83c40dd8bf5948b9d7a1b2c340f0dcc8 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Thu, 2 Nov 2017 15:27:48 +0000
Subject: afs: Connect up the CB.ProbeUuid

The handler for the CB.ProbeUuid operation in the cache manager is
implemented, but isn't listed in the switch-statement of operation
selection, so won't be used.  Fix this by adding it.

Signed-off-by: David Howells <dhowells@redhat.com>
---
 fs/afs/cmservice.c | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'fs/afs')

diff --git a/fs/afs/cmservice.c b/fs/afs/cmservice.c
index 24032916e0ca..03c0a8572ef4 100644
--- a/fs/afs/cmservice.c
+++ b/fs/afs/cmservice.c
@@ -121,6 +121,9 @@ bool afs_cm_incoming_call(struct afs_call *call)
 	case CBProbe:
 		call->type = &afs_SRXCBProbe;
 		return true;
+	case CBProbeUuid:
+		call->type = &afs_SRXCBProbeUuid;
+		return true;
 	case CBTellMeAboutYourself:
 		call->type = &afs_SRXCBTellMeAboutYourself;
 		return true;
-- 
cgit v1.2.3


From 03dc2cfca536df89f4b1747caad9324c9be482fa Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Thu, 2 Nov 2017 15:27:49 +0000
Subject: afs: Fix the afs_uuid struct to make the char-sized fields signed

In AFS's encoding of a UUID, the eight 'char' fields are all signed, so
represent them with __s8 rather than __u8.  This makes the compiler
sign-extend them correctly when XDR-encoding them.

Signed-off-by: David Howells <dhowells@redhat.com>
---
 fs/afs/afs.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'fs/afs')

diff --git a/fs/afs/afs.h b/fs/afs/afs.h
index 0d837bddbf7d..2e2887a7d331 100644
--- a/fs/afs/afs.h
+++ b/fs/afs/afs.h
@@ -79,9 +79,9 @@ struct afs_uuid {
 	__be32		time_low;			/* low part of timestamp */
 	__be16		time_mid;			/* mid part of timestamp */
 	__be16		time_hi_and_version;		/* high part of timestamp and version  */
-	__u8		clock_seq_hi_and_reserved;	/* clock seq hi and variant */
-	__u8		clock_seq_low;			/* clock seq low */
-	__u8		node[6];			/* spatially unique node ID (MAC addr) */
+	__s8		clock_seq_hi_and_reserved;	/* clock seq hi and variant */
+	__s8		clock_seq_low;			/* clock seq low */
+	__s8		node[6];			/* spatially unique node ID (MAC addr) */
 };
 
 /*
-- 
cgit v1.2.3


From d0676a16781d0972969dff8b3f3f819599cc4b07 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Thu, 2 Nov 2017 15:27:49 +0000
Subject: afs: Rename struct afs_call server member to cm_server

Rename the server member of struct afs_call to cm_server as we're only
going to be using it for incoming calls for the Cache Manager service.
This makes it easier to differentiate from the pointer to the target server
for the client, which will point to a different structure to allow for
callback handling.

Signed-off-by: David Howells <dhowells@redhat.com>
---
 fs/afs/cmservice.c | 18 ++++++++----------
 fs/afs/internal.h  |  2 +-
 fs/afs/rxrpc.c     |  1 +
 3 files changed, 10 insertions(+), 11 deletions(-)

(limited to 'fs/afs')

diff --git a/fs/afs/cmservice.c b/fs/afs/cmservice.c
index 03c0a8572ef4..91e921553453 100644
--- a/fs/afs/cmservice.c
+++ b/fs/afs/cmservice.c
@@ -144,12 +144,10 @@ static void afs_cm_destructor(struct afs_call *call)
 	 * afs_deliver_cb_callback().
 	 */
 	if (call->unmarshall == 5) {
-		ASSERT(call->server && call->count && call->request);
-		afs_break_callbacks(call->server, call->count, call->request);
+		ASSERT(call->cm_server && call->count && call->request);
+		afs_break_callbacks(call->cm_server, call->count, call->request);
 	}
 
-	afs_put_server(call->net, call->server);
-	call->server = NULL;
 	kfree(call->buffer);
 	call->buffer = NULL;
 }
@@ -170,7 +168,7 @@ static void SRXAFSCB_CallBack(struct work_struct *work)
 	 * yet */
 	afs_send_empty_reply(call);
 
-	afs_break_callbacks(call->server, call->count, call->request);
+	afs_break_callbacks(call->cm_server, call->count, call->request);
 	afs_put_call(call);
 	_leave("");
 }
@@ -290,7 +288,7 @@ static int afs_deliver_cb_callback(struct afs_call *call)
 	server = afs_find_server(call->net, &srx);
 	if (!server)
 		return -ENOTCONN;
-	call->server = server;
+	call->cm_server = server;
 
 	return afs_queue_call_work(call);
 }
@@ -302,9 +300,9 @@ static void SRXAFSCB_InitCallBackState(struct work_struct *work)
 {
 	struct afs_call *call = container_of(work, struct afs_call, work);
 
-	_enter("{%p}", call->server);
+	_enter("{%p}", call->cm_server);
 
-	afs_init_callback_state(call->server);
+	afs_init_callback_state(call->cm_server);
 	afs_send_empty_reply(call);
 	afs_put_call(call);
 	_leave("");
@@ -335,7 +333,7 @@ static int afs_deliver_cb_init_call_back_state(struct afs_call *call)
 	server = afs_find_server(call->net, &srx);
 	if (!server)
 		return -ENOTCONN;
-	call->server = server;
+	call->cm_server = server;
 
 	return afs_queue_call_work(call);
 }
@@ -407,7 +405,7 @@ static int afs_deliver_cb_init_call_back_state3(struct afs_call *call)
 	server = afs_find_server(call->net, &srx);
 	if (!server)
 		return -ENOTCONN;
-	call->server = server;
+	call->cm_server = server;
 
 	return afs_queue_call_work(call);
 }
diff --git a/fs/afs/internal.h b/fs/afs/internal.h
index 94b676c433c7..6fa81e04aff3 100644
--- a/fs/afs/internal.h
+++ b/fs/afs/internal.h
@@ -76,7 +76,7 @@ struct afs_call {
 	struct rxrpc_call	*rxcall;	/* RxRPC call handle */
 	struct key		*key;		/* security for this call */
 	struct afs_net		*net;		/* The network namespace */
-	struct afs_server	*server;	/* server affected by incoming CM call */
+	struct afs_server	*cm_server;	/* Server affected by incoming CM call */
 	void			*request;	/* request data (first part) */
 	struct address_space	*mapping;	/* page set */
 	struct afs_writeback	*wb;		/* writeback being performed */
diff --git a/fs/afs/rxrpc.c b/fs/afs/rxrpc.c
index d57f09b5d27b..ac1e25f957b1 100644
--- a/fs/afs/rxrpc.c
+++ b/fs/afs/rxrpc.c
@@ -161,6 +161,7 @@ void afs_put_call(struct afs_call *call)
 		if (call->type->destructor)
 			call->type->destructor(call);
 
+		afs_put_server(call->net, call->cm_server);
 		kfree(call->request);
 		kfree(call);
 
-- 
cgit v1.2.3


From c435ee34551e1f5a02a253ca8e235287efd2727c Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Thu, 2 Nov 2017 15:27:49 +0000
Subject: afs: Overhaul the callback handling

Overhaul the AFS callback handling by the following means:

 (1) Don't give up callback promises on vnodes that we are no longer using,
     rather let them just expire on the server or let the server break
     them.  This is actually more efficient for the server as the callback
     lookup is expensive if there are lots of extant callbacks.

 (2) Only give up the callback promises we have from a server when the
     server record is destroyed.  Then we can just give up *all* the
     callback promises on it in one go.

 (3) Servers can end up being shared between cells if cells are aliased, so
     don't add all the vnodes being backed by a particular server into a
     big FID-indexed tree on that server as there may be duplicates.

     Instead have each volume instance (~= superblock) register an interest
     in a server as it starts to make use of it and use this to allow the
     processor for callbacks from the server to find the superblock and
     thence the inode corresponding to the FID being broken by means of
     ilookup_nowait().

 (4) Rather than iterating over the entire callback list when a mass-break
     comes in from the server, maintain a counter of mass-breaks in
     afs_server (cb_seq) and make afs_validate() check it against the copy
     in afs_vnode.

     It would be nice not to have to take a read_lock whilst doing this,
     but that's tricky without using RCU.

 (5) Save a ref on the fileserver we're using for a call in the afs_call
     struct so that we can access its cb_s_break during call decoding.

 (6) Write-lock around callback and status storage in a vnode and read-lock
     around getattr so that we don't see the status mid-update.

This has the following consequences:

 (1) Data invalidation isn't seen until someone calls afs_validate() on a
     vnode.  Unfortunately, we need to use a key to query the server, but
     getting one from a background thread is tricky without caching loads
     of keys all over the place.

 (2) Mass invalidation isn't seen until someone calls afs_validate().

 (3) Callback breaking is going to hit the inode_hash_lock quite a bit.
     Could this be replaced with rcu_read_lock() since inodes are destroyed
     under RCU conditions.

Signed-off-by: David Howells <dhowells@redhat.com>
---
 fs/afs/afs_fs.h    |   1 +
 fs/afs/callback.c  | 484 +++++++++++++++--------------------------------------
 fs/afs/cmservice.c |   2 +-
 fs/afs/dir.c       |  52 +++---
 fs/afs/flock.c     |   6 +-
 fs/afs/fsclient.c  | 190 +++++++++++----------
 fs/afs/inode.c     | 109 ++++++------
 fs/afs/internal.h  |  93 +++++-----
 fs/afs/main.c      |   7 -
 fs/afs/security.c  |  15 +-
 fs/afs/server.c    |  21 +--
 fs/afs/super.c     |  11 +-
 fs/afs/vnode.c     | 203 ++--------------------
 fs/afs/volume.c    |  43 +++--
 14 files changed, 443 insertions(+), 794 deletions(-)

(limited to 'fs/afs')

diff --git a/fs/afs/afs_fs.h b/fs/afs/afs_fs.h
index accd886b4372..05395d0f1941 100644
--- a/fs/afs/afs_fs.h
+++ b/fs/afs/afs_fs.h
@@ -37,6 +37,7 @@ enum AFS_FS_Operations {
 	FSLOOKUP		= 161,	/* AFS lookup file in directory */
 	FSFETCHDATA64		= 65537, /* AFS Fetch file data */
 	FSSTOREDATA64		= 65538, /* AFS Store file data */
+	FSGIVEUPALLCALLBACKS	= 65539, /* AFS Give up all outstanding callbacks on a server */
 };
 
 enum AFS_FS_Errors {
diff --git a/fs/afs/callback.c b/fs/afs/callback.c
index d12dffb76b67..82f4c7a3b7b6 100644
--- a/fs/afs/callback.c
+++ b/fs/afs/callback.c
@@ -20,116 +20,151 @@
 #include <linux/sched.h>
 #include "internal.h"
 
-#if 0
-unsigned afs_vnode_update_timeout = 10;
-#endif  /*  0  */
-
-#define afs_breakring_space(server) \
-	CIRC_SPACE((server)->cb_break_head, (server)->cb_break_tail,	\
-		   ARRAY_SIZE((server)->cb_break))
-
-struct workqueue_struct *afs_callback_update_worker;
-
 /*
- * allow the fileserver to request callback state (re-)initialisation
+ * Set up an interest-in-callbacks record for a volume on a server and
+ * register it with the server.
+ * - Called with volume->server_sem held.
  */
-void afs_init_callback_state(struct afs_server *server)
+int afs_register_server_cb_interest(struct afs_vnode *vnode,
+				    struct afs_cb_interest **ppcbi,
+				    struct afs_server *server)
 {
-	struct afs_vnode *vnode;
-
-	_enter("{%p}", server);
+	struct afs_cb_interest *cbi = *ppcbi, *vcbi, *new, *x;
+
+again:
+	vcbi = vnode->cb_interest;
+	if (vcbi) {
+		if (vcbi == cbi)
+			return 0;
+
+		if (cbi && vcbi->server == cbi->server) {
+			write_seqlock(&vnode->cb_lock);
+			vnode->cb_interest = afs_get_cb_interest(cbi);
+			write_sequnlock(&vnode->cb_lock);
+			afs_put_cb_interest(afs_v2net(vnode), cbi);
+			return 0;
+		}
 
-	spin_lock(&server->cb_lock);
+		if (!cbi && vcbi->server == server) {
+			afs_get_cb_interest(vcbi);
+			x = cmpxchg(ppcbi, cbi, vcbi);
+			if (x != cbi) {
+				cbi = x;
+				afs_put_cb_interest(afs_v2net(vnode), vcbi);
+				goto again;
+			}
+			return 0;
+		}
+	}
 
-	/* kill all the promises on record from this server */
-	while (!RB_EMPTY_ROOT(&server->cb_promises)) {
-		vnode = rb_entry(server->cb_promises.rb_node,
-				 struct afs_vnode, cb_promise);
-		_debug("UNPROMISE { vid=%x:%u uq=%u}",
-		       vnode->fid.vid, vnode->fid.vnode, vnode->fid.unique);
-		rb_erase(&vnode->cb_promise, &server->cb_promises);
-		vnode->cb_promised = false;
+	if (!cbi) {
+		new = kzalloc(sizeof(struct afs_cb_interest), GFP_KERNEL);
+		if (!new)
+			return -ENOMEM;
+
+		refcount_set(&new->usage, 1);
+		new->sb = vnode->vfs_inode.i_sb;
+		new->vid = vnode->volume->vid;
+		new->server = afs_get_server(server);
+		INIT_LIST_HEAD(&new->cb_link);
+
+		write_lock(&server->cb_break_lock);
+		list_add_tail(&new->cb_link, &server->cb_interests);
+		write_unlock(&server->cb_break_lock);
+
+		x = cmpxchg(ppcbi, cbi, new);
+		if (x == cbi) {
+			cbi = new;
+		} else {
+			cbi = x;
+			afs_put_cb_interest(afs_v2net(vnode), new);
+		}
 	}
 
-	spin_unlock(&server->cb_lock);
-	_leave("");
+	ASSERT(cbi);
+
+	/* Change the server the vnode is using.  This entails scrubbing any
+	 * interest the vnode had in the previous server it was using.
+	 */
+	write_seqlock(&vnode->cb_lock);
+
+	vnode->cb_interest = afs_get_cb_interest(cbi);
+	vnode->cb_s_break = cbi->server->cb_s_break;
+	clear_bit(AFS_VNODE_CB_PROMISED, &vnode->flags);
+
+	write_sequnlock(&vnode->cb_lock);
+	return 0;
 }
 
 /*
- * handle the data invalidation side of a callback being broken
+ * Set a vnode's interest on a server.
  */
-void afs_broken_callback_work(struct work_struct *work)
+void afs_set_cb_interest(struct afs_vnode *vnode, struct afs_cb_interest *cbi)
 {
-	struct afs_vnode *vnode =
-		container_of(work, struct afs_vnode, cb_broken_work);
-
-	_enter("");
+	struct afs_cb_interest *old_cbi = NULL;
 
-	if (test_bit(AFS_VNODE_DELETED, &vnode->flags))
+	if (vnode->cb_interest == cbi)
 		return;
 
-	/* we're only interested in dealing with a broken callback on *this*
-	 * vnode and only if no-one else has dealt with it yet */
-	if (!mutex_trylock(&vnode->validate_lock))
-		return; /* someone else is dealing with it */
-
-	if (test_bit(AFS_VNODE_CB_BROKEN, &vnode->flags)) {
-		if (S_ISDIR(vnode->vfs_inode.i_mode))
-			afs_clear_permits(vnode);
-
-		if (afs_vnode_fetch_status(vnode, NULL, NULL) < 0)
-			goto out;
-
-		if (test_bit(AFS_VNODE_DELETED, &vnode->flags))
-			goto out;
-
-		/* if the vnode's data version number changed then its contents
-		 * are different */
-		if (test_and_clear_bit(AFS_VNODE_ZAP_DATA, &vnode->flags))
-			afs_zap_data(vnode);
+	write_seqlock(&vnode->cb_lock);
+	if (vnode->cb_interest != cbi) {
+		afs_get_cb_interest(cbi);
+		old_cbi = vnode->cb_interest;
+		vnode->cb_interest = cbi;
 	}
+	write_sequnlock(&vnode->cb_lock);
+	afs_put_cb_interest(afs_v2net(vnode), cbi);
+}
 
-out:
-	mutex_unlock(&vnode->validate_lock);
-
-	/* avoid the potential race whereby the mutex_trylock() in this
-	 * function happens again between the clear_bit() and the
-	 * mutex_unlock() */
-	if (test_bit(AFS_VNODE_CB_BROKEN, &vnode->flags)) {
-		_debug("requeue");
-		queue_work(afs_callback_update_worker, &vnode->cb_broken_work);
+/*
+ * Remove an interest on a server.
+ */
+void afs_put_cb_interest(struct afs_net *net, struct afs_cb_interest *cbi)
+{
+	if (cbi && refcount_dec_and_test(&cbi->usage)) {
+		if (!list_empty(&cbi->cb_link)) {
+			write_lock(&cbi->server->cb_break_lock);
+			list_del_init(&cbi->cb_link);
+			write_unlock(&cbi->server->cb_break_lock);
+			afs_put_server(net, cbi->server);
+		}
+		kfree(cbi);
 	}
-	_leave("");
+}
+
+/*
+ * allow the fileserver to request callback state (re-)initialisation
+ */
+void afs_init_callback_state(struct afs_server *server)
+{
+	if (!test_and_clear_bit(AFS_SERVER_NEW, &server->flags))
+		server->cb_s_break++;
 }
 
 /*
  * actually break a callback
  */
-static void afs_break_callback(struct afs_server *server,
-			       struct afs_vnode *vnode)
+void afs_break_callback(struct afs_vnode *vnode)
 {
 	_enter("");
 
-	set_bit(AFS_VNODE_CB_BROKEN, &vnode->flags);
+	write_seqlock(&vnode->cb_lock);
+
+	if (test_and_clear_bit(AFS_VNODE_CB_PROMISED, &vnode->flags)) {
+		vnode->cb_break++;
+		afs_clear_permits(vnode);
 
-	if (vnode->cb_promised) {
 		spin_lock(&vnode->lock);
 
 		_debug("break callback");
 
-		spin_lock(&server->cb_lock);
-		if (vnode->cb_promised) {
-			rb_erase(&vnode->cb_promise, &server->cb_promises);
-			vnode->cb_promised = false;
-		}
-		spin_unlock(&server->cb_lock);
-
-		queue_work(afs_callback_update_worker, &vnode->cb_broken_work);
 		if (list_empty(&vnode->granted_locks) &&
 		    !list_empty(&vnode->pending_locks))
 			afs_lock_may_be_available(vnode);
 		spin_unlock(&vnode->lock);
 	}
+
+	write_sequnlock(&vnode->cb_lock);
 }
 
 /*
@@ -141,49 +176,31 @@ static void afs_break_callback(struct afs_server *server,
 static void afs_break_one_callback(struct afs_server *server,
 				   struct afs_fid *fid)
 {
+	struct afs_cb_interest *cbi;
+	struct afs_iget_data data;
 	struct afs_vnode *vnode;
-	struct rb_node *p;
-
-	_debug("find");
-	spin_lock(&server->fs_lock);
-	p = server->fs_vnodes.rb_node;
-	while (p) {
-		vnode = rb_entry(p, struct afs_vnode, server_rb);
-		if (fid->vid < vnode->fid.vid)
-			p = p->rb_left;
-		else if (fid->vid > vnode->fid.vid)
-			p = p->rb_right;
-		else if (fid->vnode < vnode->fid.vnode)
-			p = p->rb_left;
-		else if (fid->vnode > vnode->fid.vnode)
-			p = p->rb_right;
-		else if (fid->unique < vnode->fid.unique)
-			p = p->rb_left;
-		else if (fid->unique > vnode->fid.unique)
-			p = p->rb_right;
-		else
-			goto found;
-	}
+	struct inode *inode;
 
-	/* not found so we just ignore it (it may have moved to another
-	 * server) */
-not_available:
-	_debug("not avail");
-	spin_unlock(&server->fs_lock);
-	_leave("");
-	return;
-
-found:
-	_debug("found");
-	ASSERTCMP(server, ==, vnode->server);
+	read_lock(&server->cb_break_lock);
 
-	if (!igrab(AFS_VNODE_TO_I(vnode)))
-		goto not_available;
-	spin_unlock(&server->fs_lock);
+	/* Step through all interested superblocks.  There may be more than one
+	 * because of cell aliasing.
+	 */
+	list_for_each_entry(cbi, &server->cb_interests, cb_link) {
+		if (cbi->vid != fid->vid)
+			continue;
+
+		data.volume = NULL;
+		data.fid = *fid;
+		inode = ilookup5_nowait(cbi->sb, fid->vnode, afs_iget5_test, &data);
+		if (inode) {
+			vnode = AFS_FS_I(inode);
+			afs_break_callback(vnode);
+			iput(inode);
+		}
+	}
 
-	afs_break_callback(server, vnode);
-	iput(&vnode->vfs_inode);
-	_leave("");
+	read_unlock(&server->cb_break_lock);
 }
 
 /*
@@ -214,243 +231,14 @@ void afs_break_callbacks(struct afs_server *server, size_t count,
 }
 
 /*
- * record the callback for breaking
- * - the caller must hold server->cb_lock
- */
-static void afs_do_give_up_callback(struct afs_server *server,
-				    struct afs_vnode *vnode)
-{
-	struct afs_callback *cb;
-
-	_enter("%p,%p", server, vnode);
-
-	cb = &server->cb_break[server->cb_break_head];
-	cb->fid		= vnode->fid;
-	cb->version	= vnode->cb_version;
-	cb->expiry	= vnode->cb_expiry;
-	cb->type	= vnode->cb_type;
-	smp_wmb();
-	server->cb_break_head =
-		(server->cb_break_head + 1) &
-		(ARRAY_SIZE(server->cb_break) - 1);
-
-	/* defer the breaking of callbacks to try and collect as many as
-	 * possible to ship in one operation */
-	switch (atomic_inc_return(&server->cb_break_n)) {
-	case 1 ... AFSCBMAX - 1:
-		queue_delayed_work(afs_callback_update_worker,
-				   &server->cb_break_work, HZ * 2);
-		break;
-	case AFSCBMAX:
-		afs_flush_callback_breaks(server);
-		break;
-	default:
-		break;
-	}
-
-	ASSERT(server->cb_promises.rb_node != NULL);
-	rb_erase(&vnode->cb_promise, &server->cb_promises);
-	vnode->cb_promised = false;
-	_leave("");
-}
-
-/*
- * discard the callback on a deleted item
- */
-void afs_discard_callback_on_delete(struct afs_vnode *vnode)
-{
-	struct afs_server *server = vnode->server;
-
-	_enter("%d", vnode->cb_promised);
-
-	if (!vnode->cb_promised) {
-		_leave(" [not promised]");
-		return;
-	}
-
-	ASSERT(server != NULL);
-
-	spin_lock(&server->cb_lock);
-	if (vnode->cb_promised) {
-		ASSERT(server->cb_promises.rb_node != NULL);
-		rb_erase(&vnode->cb_promise, &server->cb_promises);
-		vnode->cb_promised = false;
-	}
-	spin_unlock(&server->cb_lock);
-	_leave("");
-}
-
-/*
- * give up the callback registered for a vnode on the file server when the
- * inode is being cleared
+ * Clear the callback interests in a server list.
  */
-void afs_give_up_callback(struct afs_vnode *vnode)
+void afs_clear_callback_interests(struct afs_net *net, struct afs_volume *volume)
 {
-	struct afs_server *server = vnode->server;
-
-	DECLARE_WAITQUEUE(myself, current);
-
-	_enter("%d", vnode->cb_promised);
-
-	_debug("GIVE UP INODE %p", &vnode->vfs_inode);
-
-	if (!vnode->cb_promised) {
-		_leave(" [not promised]");
-		return;
-	}
-
-	ASSERT(server != NULL);
+	int i;
 
-	spin_lock(&server->cb_lock);
-	if (vnode->cb_promised && afs_breakring_space(server) == 0) {
-		add_wait_queue(&server->cb_break_waitq, &myself);
-		for (;;) {
-			set_current_state(TASK_UNINTERRUPTIBLE);
-			if (!vnode->cb_promised ||
-			    afs_breakring_space(server) != 0)
-				break;
-			spin_unlock(&server->cb_lock);
-			schedule();
-			spin_lock(&server->cb_lock);
-		}
-		remove_wait_queue(&server->cb_break_waitq, &myself);
-		__set_current_state(TASK_RUNNING);
+	for (i = 0; i < ARRAY_SIZE(volume->cb_interests); i++) {
+		afs_put_cb_interest(net, volume->cb_interests[i]);
+		volume->cb_interests[i] = NULL;
 	}
-
-	/* of course, it's always possible for the server to break this vnode's
-	 * callback first... */
-	if (vnode->cb_promised)
-		afs_do_give_up_callback(server, vnode);
-
-	spin_unlock(&server->cb_lock);
-	_leave("");
-}
-
-/*
- * dispatch a deferred give up callbacks operation
- */
-void afs_dispatch_give_up_callbacks(struct work_struct *work)
-{
-	struct afs_server *server =
-		container_of(work, struct afs_server, cb_break_work.work);
-
-	_enter("");
-
-	/* tell the fileserver to discard the callback promises it has
-	 * - in the event of ENOMEM or some other error, we just forget that we
-	 *   had callbacks entirely, and the server will call us later to break
-	 *   them
-	 */
-	afs_fs_give_up_callbacks(server->cell->net, server, true);
-}
-
-/*
- * flush the outstanding callback breaks on a server
- */
-void afs_flush_callback_breaks(struct afs_server *server)
-{
-	mod_delayed_work(afs_callback_update_worker, &server->cb_break_work, 0);
-}
-
-#if 0
-/*
- * update a bunch of callbacks
- */
-static void afs_callback_updater(struct work_struct *work)
-{
-	struct afs_server *server;
-	struct afs_vnode *vnode, *xvnode;
-	time64_t now;
-	long timeout;
-	int ret;
-
-	server = container_of(work, struct afs_server, updater);
-
-	_enter("");
-
-	now = ktime_get_real_seconds();
-
-	/* find the first vnode to update */
-	spin_lock(&server->cb_lock);
-	for (;;) {
-		if (RB_EMPTY_ROOT(&server->cb_promises)) {
-			spin_unlock(&server->cb_lock);
-			_leave(" [nothing]");
-			return;
-		}
-
-		vnode = rb_entry(rb_first(&server->cb_promises),
-				 struct afs_vnode, cb_promise);
-		if (atomic_read(&vnode->usage) > 0)
-			break;
-		rb_erase(&vnode->cb_promise, &server->cb_promises);
-		vnode->cb_promised = false;
-	}
-
-	timeout = vnode->update_at - now;
-	if (timeout > 0) {
-		queue_delayed_work(afs_vnode_update_worker,
-				   &afs_vnode_update, timeout * HZ);
-		spin_unlock(&server->cb_lock);
-		_leave(" [nothing]");
-		return;
-	}
-
-	list_del_init(&vnode->update);
-	atomic_inc(&vnode->usage);
-	spin_unlock(&server->cb_lock);
-
-	/* we can now perform the update */
-	_debug("update %s", vnode->vldb.name);
-	vnode->state = AFS_VL_UPDATING;
-	vnode->upd_rej_cnt = 0;
-	vnode->upd_busy_cnt = 0;
-
-	ret = afs_vnode_update_record(vl, &vldb);
-	switch (ret) {
-	case 0:
-		afs_vnode_apply_update(vl, &vldb);
-		vnode->state = AFS_VL_UPDATING;
-		break;
-	case -ENOMEDIUM:
-		vnode->state = AFS_VL_VOLUME_DELETED;
-		break;
-	default:
-		vnode->state = AFS_VL_UNCERTAIN;
-		break;
-	}
-
-	/* and then reschedule */
-	_debug("reschedule");
-	vnode->update_at = ktime_get_real_seconds() +
-			afs_vnode_update_timeout;
-
-	spin_lock(&server->cb_lock);
-
-	if (!list_empty(&server->cb_promises)) {
-		/* next update in 10 minutes, but wait at least 1 second more
-		 * than the newest record already queued so that we don't spam
-		 * the VL server suddenly with lots of requests
-		 */
-		xvnode = list_entry(server->cb_promises.prev,
-				    struct afs_vnode, update);
-		if (vnode->update_at <= xvnode->update_at)
-			vnode->update_at = xvnode->update_at + 1;
-		xvnode = list_entry(server->cb_promises.next,
-				    struct afs_vnode, update);
-		timeout = xvnode->update_at - now;
-		if (timeout < 0)
-			timeout = 0;
-	} else {
-		timeout = afs_vnode_update_timeout;
-	}
-
-	list_add_tail(&vnode->update, &server->cb_promises);
-
-	_debug("timeout %ld", timeout);
-	queue_delayed_work(afs_vnode_update_worker,
-			   &afs_vnode_update, timeout * HZ);
-	spin_unlock(&server->cb_lock);
-	afs_put_vnode(vl);
 }
-#endif
diff --git a/fs/afs/cmservice.c b/fs/afs/cmservice.c
index 91e921553453..5767f540e0e1 100644
--- a/fs/afs/cmservice.c
+++ b/fs/afs/cmservice.c
@@ -153,7 +153,7 @@ static void afs_cm_destructor(struct afs_call *call)
 }
 
 /*
- * allow the fileserver to see if the cache manager is still alive
+ * The server supplied a list of callbacks that it wanted to break.
  */
 static void SRXAFSCB_CallBack(struct work_struct *work)
 {
diff --git a/fs/afs/dir.c b/fs/afs/dir.c
index 97ec6a74589e..37083699a0df 100644
--- a/fs/afs/dir.c
+++ b/fs/afs/dir.c
@@ -384,7 +384,7 @@ out:
  */
 static int afs_readdir(struct file *file, struct dir_context *ctx)
 {
-	return afs_dir_iterate(file_inode(file), 
+	return afs_dir_iterate(file_inode(file),
 			      ctx, file->private_data);
 }
 
@@ -581,6 +581,7 @@ static int afs_d_revalidate(struct dentry *dentry, unsigned int flags)
 	struct afs_vnode *vnode, *dir;
 	struct afs_fid uninitialized_var(fid);
 	struct dentry *parent;
+	struct inode *inode;
 	struct key *key;
 	void *dir_version;
 	int ret;
@@ -588,30 +589,39 @@ static int afs_d_revalidate(struct dentry *dentry, unsigned int flags)
 	if (flags & LOOKUP_RCU)
 		return -ECHILD;
 
-	vnode = AFS_FS_I(d_inode(dentry));
-
-	if (d_really_is_positive(dentry))
+	if (d_really_is_positive(dentry)) {
+		vnode = AFS_FS_I(d_inode(dentry));
 		_enter("{v={%x:%u} n=%pd fl=%lx},",
 		       vnode->fid.vid, vnode->fid.vnode, dentry,
 		       vnode->flags);
-	else
+	} else {
 		_enter("{neg n=%pd}", dentry);
+	}
 
 	key = afs_request_key(AFS_FS_S(dentry->d_sb)->volume->cell);
 	if (IS_ERR(key))
 		key = NULL;
 
+	if (d_really_is_positive(dentry)) {
+		inode = d_inode(dentry);
+		if (inode) {
+			vnode = AFS_FS_I(inode);
+			afs_validate(vnode, key);
+			if (test_bit(AFS_VNODE_DELETED, &vnode->flags))
+				goto out_bad;
+		}
+	}
+
 	/* lock down the parent dentry so we can peer at it */
 	parent = dget_parent(dentry);
 	dir = AFS_FS_I(d_inode(parent));
 
 	/* validate the parent directory */
-	if (test_bit(AFS_VNODE_MODIFIED, &dir->flags))
-		afs_validate(dir, key);
+	afs_validate(dir, key);
 
 	if (test_bit(AFS_VNODE_DELETED, &dir->flags)) {
 		_debug("%pd: parent dir deleted", dentry);
-		goto out_bad;
+		goto out_bad_parent;
 	}
 
 	dir_version = (void *) (unsigned long) dir->status.data_version;
@@ -626,13 +636,16 @@ static int afs_d_revalidate(struct dentry *dentry, unsigned int flags)
 	case 0:
 		/* the filename maps to something */
 		if (d_really_is_negative(dentry))
-			goto out_bad;
-		if (is_bad_inode(d_inode(dentry))) {
+			goto out_bad_parent;
+		inode = d_inode(dentry);
+		if (is_bad_inode(inode)) {
 			printk("kAFS: afs_d_revalidate: %pd2 has bad inode\n",
 			       dentry);
-			goto out_bad;
+			goto out_bad_parent;
 		}
 
+		vnode = AFS_FS_I(inode);
+
 		/* if the vnode ID has changed, then the dirent points to a
 		 * different file */
 		if (fid.vnode != vnode->fid.vnode) {
@@ -649,10 +662,10 @@ static int afs_d_revalidate(struct dentry *dentry, unsigned int flags)
 			_debug("%pd: file deleted (uq %u -> %u I:%u)",
 			       dentry, fid.unique,
 			       vnode->fid.unique,
-			       d_inode(dentry)->i_generation);
-			spin_lock(&vnode->lock);
+			       vnode->vfs_inode.i_generation);
+			write_seqlock(&vnode->cb_lock);
 			set_bit(AFS_VNODE_DELETED, &vnode->flags);
-			spin_unlock(&vnode->lock);
+			write_sequnlock(&vnode->cb_lock);
 			goto not_found;
 		}
 		goto out_valid;
@@ -667,7 +680,7 @@ static int afs_d_revalidate(struct dentry *dentry, unsigned int flags)
 	default:
 		_debug("failed to iterate dir %pd: %d",
 		       parent, ret);
-		goto out_bad;
+		goto out_bad_parent;
 	}
 
 out_valid:
@@ -683,9 +696,10 @@ not_found:
 	dentry->d_flags |= DCACHE_NFSFS_RENAMED;
 	spin_unlock(&dentry->d_lock);
 
-out_bad:
+out_bad_parent:
 	_debug("dropping dentry %pd2", dentry);
 	dput(parent);
+out_bad:
 	key_put(key);
 
 	_leave(" = 0 [bad]");
@@ -820,7 +834,7 @@ static int afs_rmdir(struct inode *dir, struct dentry *dentry)
 		vnode = AFS_FS_I(d_inode(dentry));
 		clear_nlink(&vnode->vfs_inode);
 		set_bit(AFS_VNODE_DELETED, &vnode->flags);
-		afs_discard_callback_on_delete(vnode);
+		clear_bit(AFS_VNODE_CB_PROMISED, &vnode->flags);
 	}
 
 	key_put(key);
@@ -884,9 +898,7 @@ static int afs_unlink(struct inode *dir, struct dentry *dentry)
 		vnode = AFS_FS_I(d_inode(dentry));
 		if (test_bit(AFS_VNODE_DELETED, &vnode->flags))
 			_debug("AFS_VNODE_DELETED");
-		if (test_bit(AFS_VNODE_CB_BROKEN, &vnode->flags))
-			_debug("AFS_VNODE_CB_BROKEN");
-		set_bit(AFS_VNODE_CB_BROKEN, &vnode->flags);
+		clear_bit(AFS_VNODE_CB_PROMISED, &vnode->flags);
 		ret = afs_validate(vnode, key);
 		_debug("nlink %d [val %d]", vnode->vfs_inode.i_nlink, ret);
 	}
diff --git a/fs/afs/flock.c b/fs/afs/flock.c
index 559ac00af5f7..aba36e0b1460 100644
--- a/fs/afs/flock.c
+++ b/fs/afs/flock.c
@@ -243,7 +243,7 @@ static int afs_do_setlk(struct file *file, struct file_lock *fl)
 
 	/* make sure we've got a callback on this file and that our view of the
 	 * data version is up to date */
-	ret = afs_vnode_fetch_status(vnode, NULL, key);
+	ret = afs_validate(vnode, key);
 	if (ret < 0)
 		goto error;
 
@@ -383,7 +383,7 @@ given_lock:
 	/* again, make sure we've got a callback on this file and, again, make
 	 * sure that our view of the data version is up to date (we ignore
 	 * errors incurred here and deal with the consequences elsewhere) */
-	afs_vnode_fetch_status(vnode, NULL, key);
+	afs_vnode_fetch_status(vnode, NULL, key, false);
 
 error:
 	spin_unlock(&inode->i_lock);
@@ -455,7 +455,7 @@ static int afs_do_getlk(struct file *file, struct file_lock *fl)
 	posix_test_lock(file, fl);
 	if (fl->fl_type == F_UNLCK) {
 		/* no local locks; consult the server */
-		ret = afs_vnode_fetch_status(vnode, NULL, key);
+		ret = afs_vnode_fetch_status(vnode, NULL, key, true);
 		if (ret < 0)
 			goto error;
 		lock_count = vnode->status.lock_count;
diff --git a/fs/afs/fsclient.c b/fs/afs/fsclient.c
index 36f58adde030..c6658405fe91 100644
--- a/fs/afs/fsclient.c
+++ b/fs/afs/fsclient.c
@@ -22,6 +22,11 @@
  */
 static u8 afs_discard_buffer[64];
 
+static inline void afs_use_fs_server(struct afs_call *call, struct afs_server *server)
+{
+	call->server = afs_get_server(server);
+}
+
 /*
  * decode an AFSFid block
  */
@@ -47,14 +52,17 @@ static void xdr_decode_AFSFetchStatus(const __be32 **_bp,
 	const __be32 *bp = *_bp;
 	umode_t mode;
 	u64 data_version, size;
-	u32 changed = 0; /* becomes non-zero if ctime-type changes seen */
+	bool changed = false;
 	kuid_t owner;
 	kgid_t group;
 
+	write_seqlock(&vnode->cb_lock);
+
 #define EXTRACT(DST)				\
 	do {					\
 		u32 x = ntohl(*bp++);		\
-		changed |= DST - x;		\
+		if (DST != x)			\
+			changed |= true;	\
 		DST = x;			\
 	} while (0)
 
@@ -127,25 +135,39 @@ static void xdr_decode_AFSFetchStatus(const __be32 **_bp,
 			_debug("vnode modified %llx on {%x:%u}",
 			       (unsigned long long) data_version,
 			       vnode->fid.vid, vnode->fid.vnode);
-			set_bit(AFS_VNODE_MODIFIED, &vnode->flags);
+			set_bit(AFS_VNODE_DIR_MODIFIED, &vnode->flags);
 			set_bit(AFS_VNODE_ZAP_DATA, &vnode->flags);
 		}
 	} else if (store_version) {
 		status->data_version = data_version;
 	}
+
+	write_sequnlock(&vnode->cb_lock);
 }
 
 /*
  * decode an AFSCallBack block
  */
-static void xdr_decode_AFSCallBack(const __be32 **_bp, struct afs_vnode *vnode)
+static void xdr_decode_AFSCallBack(struct afs_call *call,
+				   struct afs_vnode *vnode,
+				   const __be32 **_bp)
 {
 	const __be32 *bp = *_bp;
+	u32 cb_expiry;
+
+	write_seqlock(&vnode->cb_lock);
+
+	if (call->cb_break == (vnode->cb_break + call->server->cb_s_break)) {
+		vnode->cb_version	= ntohl(*bp++);
+		cb_expiry		= ntohl(*bp++);
+		vnode->cb_type		= ntohl(*bp++);
+		vnode->cb_expires_at	= cb_expiry + ktime_get_real_seconds();
+		set_bit(AFS_VNODE_CB_PROMISED, &vnode->flags);
+	} else {
+		bp += 3;
+	}
 
-	vnode->cb_version	= ntohl(*bp++);
-	vnode->cb_expiry	= ntohl(*bp++);
-	vnode->cb_type		= ntohl(*bp++);
-	vnode->cb_expires	= vnode->cb_expiry + ktime_get_real_seconds();
+	write_sequnlock(&vnode->cb_lock);
 	*_bp = bp;
 }
 
@@ -247,16 +269,16 @@ static int afs_deliver_fs_fetch_status(struct afs_call *call)
 	const __be32 *bp;
 	int ret;
 
-	_enter("");
-
 	ret = afs_transfer_reply(call);
 	if (ret < 0)
 		return ret;
 
+	_enter("{%x:%u}", vnode->fid.vid, vnode->fid.vnode);
+
 	/* unmarshall the reply once we've received all of it */
 	bp = call->buffer;
 	xdr_decode_AFSFetchStatus(&bp, &vnode->status, vnode, NULL);
-	xdr_decode_AFSCallBack(&bp, vnode);
+	xdr_decode_AFSCallBack(call, vnode, &bp);
 	if (call->reply[1])
 		xdr_decode_AFSVolSync(&bp, call->reply[1]);
 
@@ -304,6 +326,8 @@ int afs_fs_fetch_file_status(struct afs_server *server,
 	bp[2] = htonl(vnode->fid.vnode);
 	bp[3] = htonl(vnode->fid.unique);
 
+	call->cb_break = vnode->cb_break + server->cb_s_break;
+	afs_use_fs_server(call, server);
 	return afs_make_call(&server->addr, call, GFP_NOFS, async);
 }
 
@@ -429,7 +453,7 @@ static int afs_deliver_fs_fetch_data(struct afs_call *call)
 
 		bp = call->buffer;
 		xdr_decode_AFSFetchStatus(&bp, &vnode->status, vnode, NULL);
-		xdr_decode_AFSCallBack(&bp, vnode);
+		xdr_decode_AFSCallBack(call, vnode, &bp);
 		if (call->reply[1])
 			xdr_decode_AFSVolSync(&bp, call->reply[1]);
 
@@ -513,6 +537,8 @@ static int afs_fs_fetch_data64(struct afs_server *server,
 	bp[7] = htonl(lower_32_bits(req->len));
 
 	atomic_inc(&req->usage);
+	call->cb_break = vnode->cb_break + server->cb_s_break;
+	afs_use_fs_server(call, server);
 	return afs_make_call(&server->addr, call, GFP_NOFS, async);
 }
 
@@ -556,87 +582,8 @@ int afs_fs_fetch_data(struct afs_server *server,
 	bp[5] = htonl(lower_32_bits(req->len));
 
 	atomic_inc(&req->usage);
-	return afs_make_call(&server->addr, call, GFP_NOFS, async);
-}
-
-/*
- * deliver reply data to an FS.GiveUpCallBacks
- */
-static int afs_deliver_fs_give_up_callbacks(struct afs_call *call)
-{
-	_enter("");
-
-	/* shouldn't be any reply data */
-	return afs_extract_data(call, NULL, 0, false);
-}
-
-/*
- * FS.GiveUpCallBacks operation type
- */
-static const struct afs_call_type afs_RXFSGiveUpCallBacks = {
-	.name		= "FS.GiveUpCallBacks",
-	.deliver	= afs_deliver_fs_give_up_callbacks,
-	.destructor	= afs_flat_call_destructor,
-};
-
-/*
- * give up a set of callbacks
- * - the callbacks are held in the server->cb_break ring
- */
-int afs_fs_give_up_callbacks(struct afs_net *net,
-			     struct afs_server *server,
-			     bool async)
-{
-	struct afs_call *call;
-	size_t ncallbacks;
-	__be32 *bp, *tp;
-	int loop;
-
-	ncallbacks = CIRC_CNT(server->cb_break_head, server->cb_break_tail,
-			      ARRAY_SIZE(server->cb_break));
-
-	_enter("{%zu},", ncallbacks);
-
-	if (ncallbacks == 0)
-		return 0;
-	if (ncallbacks > AFSCBMAX)
-		ncallbacks = AFSCBMAX;
-
-	_debug("break %zu callbacks", ncallbacks);
-
-	call = afs_alloc_flat_call(net, &afs_RXFSGiveUpCallBacks,
-				   12 + ncallbacks * 6 * 4, 0);
-	if (!call)
-		return -ENOMEM;
-
-
-	/* marshall the parameters */
-	bp = call->request;
-	tp = bp + 2 + ncallbacks * 3;
-	*bp++ = htonl(FSGIVEUPCALLBACKS);
-	*bp++ = htonl(ncallbacks);
-	*tp++ = htonl(ncallbacks);
-
-	atomic_sub(ncallbacks, &server->cb_break_n);
-	for (loop = ncallbacks; loop > 0; loop--) {
-		struct afs_callback *cb =
-			&server->cb_break[server->cb_break_tail];
-
-		*bp++ = htonl(cb->fid.vid);
-		*bp++ = htonl(cb->fid.vnode);
-		*bp++ = htonl(cb->fid.unique);
-		*tp++ = htonl(cb->version);
-		*tp++ = htonl(cb->expiry);
-		*tp++ = htonl(cb->type);
-		smp_mb();
-		server->cb_break_tail =
-			(server->cb_break_tail + 1) &
-			(ARRAY_SIZE(server->cb_break) - 1);
-	}
-
-	ASSERT(ncallbacks > 0);
-	wake_up_nr(&server->cb_break_waitq, ncallbacks);
-
+	call->cb_break = vnode->cb_break + server->cb_s_break;
+	afs_use_fs_server(call, server);
 	return afs_make_call(&server->addr, call, GFP_NOFS, async);
 }
 
@@ -731,6 +678,7 @@ int afs_fs_create(struct afs_server *server,
 	*bp++ = htonl(mode & S_IALLUGO); /* unix mode */
 	*bp++ = 0; /* segment size */
 
+	afs_use_fs_server(call, server);
 	return afs_make_call(&server->addr, call, GFP_NOFS, async);
 }
 
@@ -809,6 +757,7 @@ int afs_fs_remove(struct afs_server *server,
 		bp = (void *) bp + padsz;
 	}
 
+	afs_use_fs_server(call, server);
 	return afs_make_call(&server->addr, call, GFP_NOFS, async);
 }
 
@@ -892,6 +841,7 @@ int afs_fs_link(struct afs_server *server,
 	*bp++ = htonl(vnode->fid.vnode);
 	*bp++ = htonl(vnode->fid.unique);
 
+	afs_use_fs_server(call, server);
 	return afs_make_call(&server->addr, call, GFP_NOFS, async);
 }
 
@@ -994,6 +944,7 @@ int afs_fs_symlink(struct afs_server *server,
 	*bp++ = htonl(S_IRWXUGO); /* unix mode */
 	*bp++ = 0; /* segment size */
 
+	afs_use_fs_server(call, server);
 	return afs_make_call(&server->addr, call, GFP_NOFS, async);
 }
 
@@ -1095,6 +1046,7 @@ int afs_fs_rename(struct afs_server *server,
 		bp = (void *) bp + n_padsz;
 	}
 
+	afs_use_fs_server(call, server);
 	return afs_make_call(&server->addr, call, GFP_NOFS, async);
 }
 
@@ -1196,6 +1148,7 @@ static int afs_fs_store_data64(struct afs_server *server,
 	*bp++ = htonl(i_size >> 32);
 	*bp++ = htonl((u32) i_size);
 
+	afs_use_fs_server(call, server);
 	return afs_make_call(&server->addr, call, GFP_NOFS, async);
 }
 
@@ -1269,6 +1222,7 @@ int afs_fs_store_data(struct afs_server *server, struct afs_writeback *wb,
 	*bp++ = htonl(size);
 	*bp++ = htonl(i_size);
 
+	afs_use_fs_server(call, server);
 	return afs_make_call(&server->addr, call, GFP_NOFS, async);
 }
 
@@ -1366,6 +1320,7 @@ static int afs_fs_setattr_size64(struct afs_server *server, struct key *key,
 	*bp++ = htonl(attr->ia_size >> 32);	/* new file length */
 	*bp++ = htonl((u32) attr->ia_size);
 
+	afs_use_fs_server(call, server);
 	return afs_make_call(&server->addr, call, GFP_NOFS, async);
 }
 
@@ -1413,6 +1368,7 @@ static int afs_fs_setattr_size(struct afs_server *server, struct key *key,
 	*bp++ = 0;				/* size of write */
 	*bp++ = htonl(attr->ia_size);		/* new file length */
 
+	afs_use_fs_server(call, server);
 	return afs_make_call(&server->addr, call, GFP_NOFS, async);
 }
 
@@ -1454,6 +1410,7 @@ int afs_fs_setattr(struct afs_server *server, struct key *key,
 
 	xdr_encode_AFS_StoreStatus(&bp, attr);
 
+	afs_use_fs_server(call, server);
 	return afs_make_call(&server->addr, call, GFP_NOFS, async);
 }
 
@@ -1684,6 +1641,7 @@ int afs_fs_get_volume_status(struct afs_server *server,
 	bp[0] = htonl(FSGETVOLUMESTATUS);
 	bp[1] = htonl(vnode->fid.vid);
 
+	afs_use_fs_server(call, server);
 	return afs_make_call(&server->addr, call, GFP_NOFS, async);
 }
 
@@ -1766,6 +1724,7 @@ int afs_fs_set_lock(struct afs_server *server,
 	*bp++ = htonl(vnode->fid.unique);
 	*bp++ = htonl(type);
 
+	afs_use_fs_server(call, server);
 	return afs_make_call(&server->addr, call, GFP_NOFS, async);
 }
 
@@ -1797,6 +1756,7 @@ int afs_fs_extend_lock(struct afs_server *server,
 	*bp++ = htonl(vnode->fid.vnode);
 	*bp++ = htonl(vnode->fid.unique);
 
+	afs_use_fs_server(call, server);
 	return afs_make_call(&server->addr, call, GFP_NOFS, async);
 }
 
@@ -1828,5 +1788,49 @@ int afs_fs_release_lock(struct afs_server *server,
 	*bp++ = htonl(vnode->fid.vnode);
 	*bp++ = htonl(vnode->fid.unique);
 
+	afs_use_fs_server(call, server);
+	return afs_make_call(&server->addr, call, GFP_NOFS, async);
+}
+
+/*
+ * Deliver reply data to an FS.GiveUpAllCallBacks operation.
+ */
+static int afs_deliver_fs_give_up_all_callbacks(struct afs_call *call)
+{
+	return afs_transfer_reply(call);
+}
+
+/*
+ * FS.GiveUpAllCallBacks operation type
+ */
+static const struct afs_call_type afs_RXFSGiveUpAllCallBacks = {
+	.name		= "FS.GiveUpAllCallBacks",
+	.deliver	= afs_deliver_fs_give_up_all_callbacks,
+	.destructor	= afs_flat_call_destructor,
+};
+
+/*
+ * Flush all the callbacks we have on a server.
+ */
+int afs_fs_give_up_all_callbacks(struct afs_server *server,
+				 struct key *key,
+				 bool async)
+{
+	struct afs_call *call;
+	__be32 *bp;
+
+	_enter("");
+
+	call = afs_alloc_flat_call(server->net, &afs_RXFSGiveUpAllCallBacks, 2 * 4, 0);
+	if (!call)
+		return -ENOMEM;
+
+	call->key = key;
+
+	/* marshall the parameters */
+	bp = call->request;
+	*bp++ = htonl(FSGIVEUPALLCALLBACKS);
+
+	/* Can't take a ref on server */
 	return afs_make_call(&server->addr, call, GFP_NOFS, async);
 }
diff --git a/fs/afs/inode.c b/fs/afs/inode.c
index fbb441d25022..4822a2a50a61 100644
--- a/fs/afs/inode.c
+++ b/fs/afs/inode.c
@@ -23,11 +23,6 @@
 #include <linux/namei.h>
 #include "internal.h"
 
-struct afs_iget_data {
-	struct afs_fid		fid;
-	struct afs_volume	*volume;	/* volume on which resides */
-};
-
 static const struct inode_operations afs_symlink_inode_operations = {
 	.get_link	= page_get_link,
 	.listxattr	= afs_listxattr,
@@ -39,6 +34,7 @@ static const struct inode_operations afs_symlink_inode_operations = {
 static int afs_inode_map_status(struct afs_vnode *vnode, struct key *key)
 {
 	struct inode *inode = AFS_VNODE_TO_I(vnode);
+	bool changed;
 
 	_debug("FS: ft=%d lk=%d sz=%llu ver=%Lu mod=%hu",
 	       vnode->status.type,
@@ -47,6 +43,8 @@ static int afs_inode_map_status(struct afs_vnode *vnode, struct key *key)
 	       vnode->status.data_version,
 	       vnode->status.mode);
 
+	read_seqlock_excl(&vnode->cb_lock);
+
 	switch (vnode->status.type) {
 	case AFS_FTYPE_FILE:
 		inode->i_mode	= S_IFREG | vnode->status.mode;
@@ -63,9 +61,7 @@ static int afs_inode_map_status(struct afs_vnode *vnode, struct key *key)
 		if ((vnode->status.mode & 0777) == 0644) {
 			inode->i_flags |= S_AUTOMOUNT;
 
-			spin_lock(&vnode->lock);
 			set_bit(AFS_VNODE_MOUNTPOINT, &vnode->flags);
-			spin_unlock(&vnode->lock);
 
 			inode->i_mode	= S_IFDIR | 0555;
 			inode->i_op	= &afs_mntpt_inode_operations;
@@ -78,13 +74,11 @@ static int afs_inode_map_status(struct afs_vnode *vnode, struct key *key)
 		break;
 	default:
 		printk("kAFS: AFS vnode with undefined type\n");
+		read_sequnlock_excl(&vnode->cb_lock);
 		return -EBADMSG;
 	}
 
-#ifdef CONFIG_AFS_FSCACHE
-	if (vnode->status.size != inode->i_size)
-		fscache_attr_changed(vnode->cache);
-#endif
+	changed = (vnode->status.size != inode->i_size);
 
 	set_nlink(inode, vnode->status.nlink);
 	inode->i_uid		= vnode->status.owner;
@@ -97,13 +91,20 @@ static int afs_inode_map_status(struct afs_vnode *vnode, struct key *key)
 	inode->i_generation	= vnode->fid.unique;
 	inode->i_version	= vnode->status.data_version;
 	inode->i_mapping->a_ops	= &afs_fs_aops;
+
+	read_sequnlock_excl(&vnode->cb_lock);
+
+#ifdef CONFIG_AFS_FSCACHE
+	if (changed)
+		fscache_attr_changed(vnode->cache);
+#endif
 	return 0;
 }
 
 /*
  * iget5() comparator
  */
-static int afs_iget5_test(struct inode *inode, void *opaque)
+int afs_iget5_test(struct inode *inode, void *opaque)
 {
 	struct afs_iget_data *data = opaque;
 
@@ -237,8 +238,7 @@ struct inode *afs_iget(struct super_block *sb, struct key *key,
 
 	if (!status) {
 		/* it's a remotely extant inode */
-		set_bit(AFS_VNODE_CB_BROKEN, &vnode->flags);
-		ret = afs_vnode_fetch_status(vnode, NULL, key);
+		ret = afs_vnode_fetch_status(vnode, NULL, key, true);
 		if (ret < 0)
 			goto bad_inode;
 	} else {
@@ -249,16 +249,16 @@ struct inode *afs_iget(struct super_block *sb, struct key *key,
 			/* it's a symlink we just created (the fileserver
 			 * didn't give us a callback) */
 			vnode->cb_version = 0;
-			vnode->cb_expiry = 0;
 			vnode->cb_type = 0;
-			vnode->cb_expires = ktime_get_real_seconds();
+			vnode->cb_expires_at = 0;
 		} else {
 			vnode->cb_version = cb->version;
-			vnode->cb_expiry = cb->expiry;
 			vnode->cb_type = cb->type;
-			vnode->cb_expires = vnode->cb_expiry +
-				ktime_get_real_seconds();
+			vnode->cb_expires_at = cb->expiry;
+			set_bit(AFS_VNODE_CB_PROMISED, &vnode->flags);
 		}
+
+		vnode->cb_expires_at += ktime_get_real_seconds();
 	}
 
 	/* set up caching before mapping the status, as map-status reads the
@@ -320,25 +320,34 @@ void afs_zap_data(struct afs_vnode *vnode)
  */
 int afs_validate(struct afs_vnode *vnode, struct key *key)
 {
+	time64_t now = ktime_get_real_seconds();
+	bool valid = false;
 	int ret;
 
 	_enter("{v={%x:%u} fl=%lx},%x",
 	       vnode->fid.vid, vnode->fid.vnode, vnode->flags,
 	       key_serial(key));
 
-	if (vnode->cb_promised &&
-	    !test_bit(AFS_VNODE_CB_BROKEN, &vnode->flags) &&
-	    !test_bit(AFS_VNODE_MODIFIED, &vnode->flags) &&
-	    !test_bit(AFS_VNODE_ZAP_DATA, &vnode->flags)) {
-		if (vnode->cb_expires < ktime_get_real_seconds() + 10) {
-			_debug("callback expired");
-			set_bit(AFS_VNODE_CB_BROKEN, &vnode->flags);
-		} else {
-			goto valid;
+	/* Quickly check the callback state.  Ideally, we'd use read_seqbegin
+	 * here, but we have no way to pass the net namespace to the RCU
+	 * cleanup for the server record.
+	 */
+	read_seqlock_excl(&vnode->cb_lock);
+
+	if (test_bit(AFS_VNODE_CB_PROMISED, &vnode->flags)) {
+		if (vnode->cb_s_break != vnode->cb_interest->server->cb_s_break) {
+			vnode->cb_s_break = vnode->cb_interest->server->cb_s_break;
+		} else if (!test_bit(AFS_VNODE_DIR_MODIFIED, &vnode->flags) &&
+			   !test_bit(AFS_VNODE_ZAP_DATA, &vnode->flags) &&
+			   vnode->cb_expires_at - 10 > now) {
+				valid = true;
 		}
+	} else if (test_bit(AFS_VNODE_DELETED, &vnode->flags)) {
+		valid = true;
 	}
 
-	if (test_bit(AFS_VNODE_DELETED, &vnode->flags))
+	read_sequnlock_excl(&vnode->cb_lock);
+	if (valid)
 		goto valid;
 
 	mutex_lock(&vnode->validate_lock);
@@ -347,12 +356,16 @@ int afs_validate(struct afs_vnode *vnode, struct key *key)
 	 * a new promise - note that if the (parent) directory's metadata was
 	 * changed then the security may be different and we may no longer have
 	 * access */
-	if (!vnode->cb_promised ||
-	    test_bit(AFS_VNODE_CB_BROKEN, &vnode->flags)) {
+	if (!test_bit(AFS_VNODE_CB_PROMISED, &vnode->flags)) {
 		_debug("not promised");
-		ret = afs_vnode_fetch_status(vnode, NULL, key);
-		if (ret < 0)
+		ret = afs_vnode_fetch_status(vnode, NULL, key, false);
+		if (ret < 0) {
+			if (ret == -ENOENT) {
+				set_bit(AFS_VNODE_DELETED, &vnode->flags);
+				ret = -ESTALE;
+			}
 			goto error_unlock;
+		}
 		_debug("new promise [fl=%lx]", vnode->flags);
 	}
 
@@ -367,7 +380,7 @@ int afs_validate(struct afs_vnode *vnode, struct key *key)
 	if (test_and_clear_bit(AFS_VNODE_ZAP_DATA, &vnode->flags))
 		afs_zap_data(vnode);
 
-	clear_bit(AFS_VNODE_MODIFIED, &vnode->flags);
+	clear_bit(AFS_VNODE_DIR_MODIFIED, &vnode->flags);
 	mutex_unlock(&vnode->validate_lock);
 valid:
 	_leave(" = 0");
@@ -386,10 +399,17 @@ int afs_getattr(const struct path *path, struct kstat *stat,
 		u32 request_mask, unsigned int query_flags)
 {
 	struct inode *inode = d_inode(path->dentry);
+	struct afs_vnode *vnode = AFS_FS_I(inode);
+	int seq = 0;
 
 	_enter("{ ino=%lu v=%u }", inode->i_ino, inode->i_generation);
 
-	generic_fillattr(inode, stat);
+	do {
+		read_seqbegin_or_lock(&vnode->cb_lock, &seq);
+		generic_fillattr(inode, stat);
+	} while (need_seqretry(&vnode->cb_lock, seq));
+
+	done_seqretry(&vnode->cb_lock, seq);
 	return 0;
 }
 
@@ -416,13 +436,10 @@ void afs_evict_inode(struct inode *inode)
 
 	vnode = AFS_FS_I(inode);
 
-	_enter("{%x:%u.%d} v=%u x=%u t=%u }",
+	_enter("{%x:%u.%d}",
 	       vnode->fid.vid,
 	       vnode->fid.vnode,
-	       vnode->fid.unique,
-	       vnode->cb_version,
-	       vnode->cb_expiry,
-	       vnode->cb_type);
+	       vnode->fid.unique);
 
 	_debug("CLEAR INODE %p", inode);
 
@@ -431,18 +448,12 @@ void afs_evict_inode(struct inode *inode)
 	truncate_inode_pages_final(&inode->i_data);
 	clear_inode(inode);
 
-	afs_give_up_callback(vnode);
-
-	if (vnode->server) {
-		spin_lock(&vnode->server->fs_lock);
-		rb_erase(&vnode->server_rb, &vnode->server->fs_vnodes);
-		spin_unlock(&vnode->server->fs_lock);
-		afs_put_server(afs_i2net(inode), vnode->server);
-		vnode->server = NULL;
+	if (vnode->cb_interest) {
+		afs_put_cb_interest(afs_i2net(inode), vnode->cb_interest);
+		vnode->cb_interest = NULL;
 	}
 
 	ASSERT(list_empty(&vnode->writebacks));
-	ASSERT(!vnode->cb_promised);
 
 #ifdef CONFIG_AFS_FSCACHE
 	fscache_relinquish_cookie(vnode->cache, 0);
diff --git a/fs/afs/internal.h b/fs/afs/internal.h
index 6fa81e04aff3..e3c99437f6e0 100644
--- a/fs/afs/internal.h
+++ b/fs/afs/internal.h
@@ -55,6 +55,11 @@ struct afs_mount_params {
 	struct key		*key;		/* key to use for secure mounting */
 };
 
+struct afs_iget_data {
+	struct afs_fid		fid;
+	struct afs_volume	*volume;	/* volume on which resides */
+};
+
 enum afs_call_state {
 	AFS_CALL_REQUESTING,	/* request is being sent for outgoing call */
 	AFS_CALL_AWAIT_REPLY,	/* awaiting reply to outgoing call */
@@ -77,6 +82,7 @@ struct afs_call {
 	struct key		*key;		/* security for this call */
 	struct afs_net		*net;		/* The network namespace */
 	struct afs_server	*cm_server;	/* Server affected by incoming CM call */
+	struct afs_server	*server;	/* Server used by client call */
 	void			*request;	/* request data (first part) */
 	struct address_space	*mapping;	/* page set */
 	struct afs_writeback	*wb;		/* writeback being performed */
@@ -92,6 +98,7 @@ struct afs_call {
 	unsigned		request_size;	/* size of request data */
 	unsigned		reply_max;	/* maximum size of reply */
 	unsigned		first_offset;	/* offset into mapping[first] */
+	unsigned int		cb_break;	/* cb_break + cb_s_break before the call */
 	union {
 		unsigned	last_to;	/* amount of mapping[last] */
 		unsigned	count2;		/* count used in unmarshalling */
@@ -314,26 +321,31 @@ struct afs_server {
 	struct afs_cell		*cell;		/* cell in which server resides */
 	struct list_head	link;		/* link in cell's server list */
 	struct list_head	grave;		/* link in master graveyard list */
+
 	struct rb_node		master_rb;	/* link in master by-addr tree */
 	struct rw_semaphore	sem;		/* access lock */
+	unsigned long		flags;
+#define AFS_SERVER_NEW		0		/* New server, don't inc cb_s_break */
 
 	/* file service access */
-	struct rb_root		fs_vnodes;	/* vnodes backed by this server (ordered by FID) */
-	unsigned long		fs_act_jif;	/* time at which last activity occurred */
-	unsigned long		fs_dead_jif;	/* time at which no longer to be considered dead */
-	spinlock_t		fs_lock;	/* access lock */
 	int			fs_state;      	/* 0 or reason FS currently marked dead (-errno) */
+	spinlock_t		fs_lock;	/* access lock */
 
 	/* callback promise management */
-	struct rb_root		cb_promises;	/* vnode expiration list (ordered earliest first) */
-	struct delayed_work	cb_updater;	/* callback updater */
-	struct delayed_work	cb_break_work;	/* collected break dispatcher */
-	wait_queue_head_t	cb_break_waitq;	/* space available in cb_break waitqueue */
-	spinlock_t		cb_lock;	/* access lock */
-	struct afs_callback	cb_break[64];	/* ring of callbacks awaiting breaking */
-	atomic_t		cb_break_n;	/* number of pending breaks */
-	u8			cb_break_head;	/* head of callback breaking ring */
-	u8			cb_break_tail;	/* tail of callback breaking ring */
+	struct list_head	cb_interests;	/* List of superblocks using this server */
+	unsigned		cb_s_break;	/* Break-everything counter. */
+	rwlock_t		cb_break_lock;	/* Volume finding lock */
+};
+
+/*
+ * Interest by a superblock on a server.
+ */
+struct afs_cb_interest {
+	struct list_head	cb_link;	/* Link in server->cb_interests */
+	struct afs_server	*server;	/* Server on which this interest resides */
+	struct super_block	*sb;		/* Superblock on which inodes reside */
+	afs_volid_t		vid;		/* Volume ID to match */
+	refcount_t		usage;
 };
 
 /*
@@ -352,6 +364,7 @@ struct afs_volume {
 	unsigned short		nservers;	/* number of server slots filled */
 	unsigned short		rjservers;	/* number of servers discarded due to -ENOMEDIUM */
 	struct afs_server	*servers[8];	/* servers on which volume resides (ordered) */
+	struct afs_cb_interest	*cb_interests[8]; /* Interests on servers for callbacks */
 	struct rw_semaphore	server_sem;	/* lock for accessing current server */
 };
 
@@ -371,7 +384,6 @@ struct afs_vnode {
 	struct inode		vfs_inode;	/* the VFS's inode record */
 
 	struct afs_volume	*volume;	/* volume on which vnode resides */
-	struct afs_server	*server;	/* server currently supplying this file */
 	struct afs_fid		fid;		/* the file identifier for this inode */
 	struct afs_file_status	status;		/* AFS status info for this file */
 #ifdef CONFIG_AFS_FSCACHE
@@ -386,9 +398,9 @@ struct afs_vnode {
 	spinlock_t		writeback_lock;	/* lock for writebacks */
 	spinlock_t		lock;		/* waitqueue/flags lock */
 	unsigned long		flags;
-#define AFS_VNODE_CB_BROKEN	0		/* set if vnode's callback was broken */
+#define AFS_VNODE_CB_PROMISED	0		/* Set if vnode has a callback promise */
 #define AFS_VNODE_UNSET		1		/* set if vnode attributes not yet set */
-#define AFS_VNODE_MODIFIED	2		/* set if vnode's data modified */
+#define AFS_VNODE_DIR_MODIFIED	2		/* set if dir vnode's data modified */
 #define AFS_VNODE_ZAP_DATA	3		/* set if vnode's data should be invalidated */
 #define AFS_VNODE_DELETED	4		/* set if vnode deleted on server */
 #define AFS_VNODE_MOUNTPOINT	5		/* set if vnode is a mountpoint symlink */
@@ -408,15 +420,14 @@ struct afs_vnode {
 	struct key		*unlock_key;	/* key to be used in unlocking */
 
 	/* outstanding callback notification on this file */
-	struct rb_node		server_rb;	/* link in server->fs_vnodes */
-	struct rb_node		cb_promise;	/* link in server->cb_promises */
-	struct work_struct	cb_broken_work;	/* work to be done on callback break */
-	time64_t		cb_expires;	/* time at which callback expires */
-	time64_t		cb_expires_at;	/* time used to order cb_promise */
+	struct afs_cb_interest	*cb_interest;	/* Server on which this resides */
+	unsigned int		cb_s_break;	/* Mass break counter on ->server */
+	unsigned int		cb_break;	/* Break counter on vnode */
+	seqlock_t		cb_lock;	/* Lock for ->cb_interest, ->status, ->cb_*break */
+
+	time64_t		cb_expires_at;	/* time at which callback expires */
 	unsigned		cb_version;	/* callback version */
-	unsigned		cb_expiry;	/* callback expiry time */
 	afs_callback_type_t	cb_type;	/* type of callback */
-	bool			cb_promised;	/* true if promise still holds */
 };
 
 /*
@@ -463,16 +474,20 @@ extern struct fscache_cookie_def afs_vnode_cache_index_def;
 /*
  * callback.c
  */
-extern struct workqueue_struct *afs_callback_update_worker;
-
 extern void afs_init_callback_state(struct afs_server *);
-extern void afs_broken_callback_work(struct work_struct *);
-extern void afs_break_callbacks(struct afs_server *, size_t,
-				struct afs_callback[]);
-extern void afs_discard_callback_on_delete(struct afs_vnode *);
-extern void afs_give_up_callback(struct afs_vnode *);
-extern void afs_dispatch_give_up_callbacks(struct work_struct *);
-extern void afs_flush_callback_breaks(struct afs_server *);
+extern void afs_break_callback(struct afs_vnode *);
+extern void afs_break_callbacks(struct afs_server *, size_t,struct afs_callback[]);
+
+extern int afs_register_server_cb_interest(struct afs_vnode *, struct afs_cb_interest **,
+					   struct afs_server *);
+extern void afs_put_cb_interest(struct afs_net *, struct afs_cb_interest *);
+extern void afs_clear_callback_interests(struct afs_net *, struct afs_volume *);
+
+static inline struct afs_cb_interest *afs_get_cb_interest(struct afs_cb_interest *cbi)
+{
+	refcount_inc(&cbi->usage);
+	return cbi;
+}
 
 /*
  * cell.c
@@ -560,10 +575,12 @@ extern int afs_fs_extend_lock(struct afs_server *, struct key *,
 			      struct afs_vnode *, bool);
 extern int afs_fs_release_lock(struct afs_server *, struct key *,
 			       struct afs_vnode *, bool);
+extern int afs_fs_give_up_all_callbacks(struct afs_server *, struct key *, bool);
 
 /*
  * inode.c
  */
+extern int afs_iget5_test(struct inode *, void *);
 extern struct inode *afs_iget_autocell(struct inode *, const char *, int,
 				       struct key *);
 extern struct inode *afs_iget(struct super_block *, struct key *,
@@ -676,11 +693,11 @@ extern int afs_permission(struct inode *, int);
  */
 extern spinlock_t afs_server_peer_lock;
 
-#define afs_get_server(S)					\
-do {								\
-	_debug("GET SERVER %d", atomic_read(&(S)->usage));	\
-	atomic_inc(&(S)->usage);				\
-} while(0)
+static inline struct afs_server *afs_get_server(struct afs_server *server)
+{
+	atomic_inc(&server->usage);
+	return server;
+}
 
 extern void afs_server_timer(struct timer_list *);
 extern struct afs_server *afs_lookup_server(struct afs_cell *,
@@ -741,7 +758,7 @@ static inline struct inode *AFS_VNODE_TO_I(struct afs_vnode *vnode)
 extern void afs_vnode_finalise_status_update(struct afs_vnode *,
 					     struct afs_server *);
 extern int afs_vnode_fetch_status(struct afs_vnode *, struct afs_vnode *,
-				  struct key *);
+				  struct key *, bool);
 extern int afs_vnode_fetch_data(struct afs_vnode *, struct key *,
 				struct afs_read *);
 extern int afs_vnode_create(struct afs_vnode *, struct key *, const char *,
diff --git a/fs/afs/main.c b/fs/afs/main.c
index 38e15b1f0eec..331c08740861 100644
--- a/fs/afs/main.c
+++ b/fs/afs/main.c
@@ -123,10 +123,6 @@ static int __init afs_init(void)
 		alloc_workqueue("kafs_vlupdated", WQ_MEM_RECLAIM, 0);
 	if (!afs_vlocation_update_worker)
 		goto error_vl_up;
-	afs_callback_update_worker =
-		alloc_ordered_workqueue("kafs_callbackd", WQ_MEM_RECLAIM);
-	if (!afs_callback_update_worker)
-		goto error_callback;
 	afs_lock_manager = alloc_workqueue("kafs_lockd", WQ_MEM_RECLAIM, 0);
 	if (!afs_lock_manager)
 		goto error_lockmgr;
@@ -158,8 +154,6 @@ error_cache:
 #endif
 	destroy_workqueue(afs_lock_manager);
 error_lockmgr:
-	destroy_workqueue(afs_callback_update_worker);
-error_callback:
 	destroy_workqueue(afs_vlocation_update_worker);
 error_vl_up:
 	destroy_workqueue(afs_async_calls);
@@ -189,7 +183,6 @@ static void __exit afs_exit(void)
 	fscache_unregister_netfs(&afs_cache_netfs);
 #endif
 	destroy_workqueue(afs_lock_manager);
-	destroy_workqueue(afs_callback_update_worker);
 	destroy_workqueue(afs_vlocation_update_worker);
 	destroy_workqueue(afs_async_calls);
 	destroy_workqueue(afs_wq);
diff --git a/fs/afs/security.c b/fs/afs/security.c
index faca66227ecf..7cc61c8b748b 100644
--- a/fs/afs/security.c
+++ b/fs/afs/security.c
@@ -115,6 +115,7 @@ void afs_clear_permits(struct afs_vnode *vnode)
 	mutex_lock(&vnode->permits_lock);
 	permits = vnode->permits;
 	RCU_INIT_POINTER(vnode->permits, NULL);
+	vnode->cb_break++;
 	mutex_unlock(&vnode->permits_lock);
 
 	if (permits)
@@ -264,8 +265,7 @@ static int afs_check_permit(struct afs_vnode *vnode, struct key *key,
 		 * (the post-processing will cache the result on auth_vnode) */
 		_debug("no valid permit");
 
-		set_bit(AFS_VNODE_CB_BROKEN, &vnode->flags);
-		ret = afs_vnode_fetch_status(vnode, auth_vnode, key);
+		ret = afs_vnode_fetch_status(vnode, auth_vnode, key, true);
 		if (ret < 0) {
 			iput(&auth_vnode->vfs_inode);
 			*_access = 0;
@@ -304,14 +304,9 @@ int afs_permission(struct inode *inode, int mask)
 		return PTR_ERR(key);
 	}
 
-	/* if the promise has expired, we need to check the server again */
-	if (!vnode->cb_promised) {
-		_debug("not promised");
-		ret = afs_vnode_fetch_status(vnode, NULL, key);
-		if (ret < 0)
-			goto error;
-		_debug("new promise [fl=%lx]", vnode->flags);
-	}
+	ret = afs_validate(vnode, key);
+	if (ret < 0)
+		goto error;
 
 	/* check the permits to see if we've got one yet */
 	ret = afs_check_permit(vnode, key, &access);
diff --git a/fs/afs/server.c b/fs/afs/server.c
index c63974f06385..4e66608fc805 100644
--- a/fs/afs/server.c
+++ b/fs/afs/server.c
@@ -94,12 +94,8 @@ static struct afs_server *afs_alloc_server(struct afs_cell *cell,
 		INIT_LIST_HEAD(&server->grave);
 		init_rwsem(&server->sem);
 		spin_lock_init(&server->fs_lock);
-		server->fs_vnodes = RB_ROOT;
-		server->cb_promises = RB_ROOT;
-		spin_lock_init(&server->cb_lock);
-		init_waitqueue_head(&server->cb_break_waitq);
-		INIT_DELAYED_WORK(&server->cb_break_work,
-				  afs_dispatch_give_up_callbacks);
+		INIT_LIST_HEAD(&server->cb_interests);
+		rwlock_init(&server->cb_break_lock);
 
 		server->addr = *addr;
 		afs_inc_servers_outstanding(cell->net);
@@ -258,8 +254,6 @@ void afs_put_server(struct afs_net *net, struct afs_server *server)
 		return;
 	}
 
-	afs_flush_callback_breaks(server);
-
 	spin_lock(&net->server_graveyard_lock);
 	if (atomic_read(&server->usage) == 0) {
 		list_move_tail(&server->grave, &net->server_graveyard);
@@ -277,15 +271,8 @@ static void afs_destroy_server(struct afs_net *net, struct afs_server *server)
 {
 	_enter("%p", server);
 
-	ASSERTIF(server->cb_break_head != server->cb_break_tail,
-		 delayed_work_pending(&server->cb_break_work));
-
-	ASSERTCMP(server->fs_vnodes.rb_node, ==, NULL);
-	ASSERTCMP(server->cb_promises.rb_node, ==, NULL);
-	ASSERTCMP(server->cb_break_head, ==, server->cb_break_tail);
-	ASSERTCMP(atomic_read(&server->cb_break_n), ==, 0);
-
-	afs_put_cell(server->net, server->cell);
+	afs_fs_give_up_all_callbacks(server, NULL, false);
+	afs_put_cell(net, server->cell);
 	kfree(server);
 	afs_dec_servers_outstanding(net);
 }
diff --git a/fs/afs/super.c b/fs/afs/super.c
index dd218f370359..c8fb1a497a84 100644
--- a/fs/afs/super.c
+++ b/fs/afs/super.c
@@ -512,8 +512,12 @@ error:
 
 static void afs_kill_super(struct super_block *sb)
 {
-	struct afs_super_info *as = sb->s_fs_info;
+	struct afs_super_info *as = AFS_FS_S(sb);
 
+	/* Clear the callback interests (which will do ilookup5) before
+	 * deactivating the superblock.
+	 */
+	afs_clear_callback_interests(as->net, as->volume);
 	kill_anon_super(sb);
 	afs_destroy_sbi(as);
 }
@@ -536,7 +540,7 @@ static void afs_i_init_once(void *_vnode)
 	INIT_LIST_HEAD(&vnode->pending_locks);
 	INIT_LIST_HEAD(&vnode->granted_locks);
 	INIT_DELAYED_WORK(&vnode->lock_work, afs_lock_work);
-	INIT_WORK(&vnode->cb_broken_work, afs_broken_callback_work);
+	seqlock_init(&vnode->cb_lock);
 }
 
 /*
@@ -558,7 +562,6 @@ static struct inode *afs_alloc_inode(struct super_block *sb)
 	vnode->volume		= NULL;
 	vnode->update_cnt	= 0;
 	vnode->flags		= 1 << AFS_VNODE_UNSET;
-	vnode->cb_promised	= false;
 
 	_leave(" = %p", &vnode->vfs_inode);
 	return &vnode->vfs_inode;
@@ -582,7 +585,7 @@ static void afs_destroy_inode(struct inode *inode)
 
 	_debug("DESTROY INODE %p", inode);
 
-	ASSERTCMP(vnode->server, ==, NULL);
+	ASSERTCMP(vnode->cb_interest, ==, NULL);
 
 	call_rcu(&inode->i_rcu, afs_i_callback);
 	atomic_dec(&afs_count_active_inodes);
diff --git a/fs/afs/vnode.c b/fs/afs/vnode.c
index b79d05374878..c1bf2124cef5 100644
--- a/fs/afs/vnode.c
+++ b/fs/afs/vnode.c
@@ -16,189 +16,20 @@
 #include <linux/sched.h>
 #include "internal.h"
 
-#if 0
-static noinline bool dump_tree_aux(struct rb_node *node, struct rb_node *parent,
-				   int depth, char lr)
-{
-	struct afs_vnode *vnode;
-	bool bad = false;
-
-	if (!node)
-		return false;
-
-	if (node->rb_left)
-		bad = dump_tree_aux(node->rb_left, node, depth + 2, '/');
-
-	vnode = rb_entry(node, struct afs_vnode, cb_promise);
-	_debug("%c %*.*s%c%p {%d}",
-	       rb_is_red(node) ? 'R' : 'B',
-	       depth, depth, "", lr,
-	       vnode, vnode->cb_expires_at);
-	if (rb_parent(node) != parent) {
-		printk("BAD: %p != %p\n", rb_parent(node), parent);
-		bad = true;
-	}
-
-	if (node->rb_right)
-		bad |= dump_tree_aux(node->rb_right, node, depth + 2, '\\');
-
-	return bad;
-}
-
-static noinline void dump_tree(const char *name, struct afs_server *server)
-{
-	_enter("%s", name);
-	if (dump_tree_aux(server->cb_promises.rb_node, NULL, 0, '-'))
-		BUG();
-}
-#endif
-
 /*
- * insert a vnode into the backing server's vnode tree
- */
-static void afs_install_vnode(struct afs_vnode *vnode,
-			      struct afs_server *server)
-{
-	struct afs_server *old_server = vnode->server;
-	struct afs_vnode *xvnode;
-	struct rb_node *parent, **p;
-
-	_enter("%p,%p", vnode, server);
-
-	if (old_server) {
-		spin_lock(&old_server->fs_lock);
-		rb_erase(&vnode->server_rb, &old_server->fs_vnodes);
-		spin_unlock(&old_server->fs_lock);
-	}
-
-	afs_get_server(server);
-	vnode->server = server;
-	afs_put_server(afs_v2net(vnode), old_server);
-
-	/* insert into the server's vnode tree in FID order */
-	spin_lock(&server->fs_lock);
-
-	parent = NULL;
-	p = &server->fs_vnodes.rb_node;
-	while (*p) {
-		parent = *p;
-		xvnode = rb_entry(parent, struct afs_vnode, server_rb);
-		if (vnode->fid.vid < xvnode->fid.vid)
-			p = &(*p)->rb_left;
-		else if (vnode->fid.vid > xvnode->fid.vid)
-			p = &(*p)->rb_right;
-		else if (vnode->fid.vnode < xvnode->fid.vnode)
-			p = &(*p)->rb_left;
-		else if (vnode->fid.vnode > xvnode->fid.vnode)
-			p = &(*p)->rb_right;
-		else if (vnode->fid.unique < xvnode->fid.unique)
-			p = &(*p)->rb_left;
-		else if (vnode->fid.unique > xvnode->fid.unique)
-			p = &(*p)->rb_right;
-		else
-			BUG(); /* can't happen unless afs_iget() malfunctions */
-	}
-
-	rb_link_node(&vnode->server_rb, parent, p);
-	rb_insert_color(&vnode->server_rb, &server->fs_vnodes);
-
-	spin_unlock(&server->fs_lock);
-	_leave("");
-}
-
-/*
- * insert a vnode into the promising server's update/expiration tree
- * - caller must hold vnode->lock
- */
-static void afs_vnode_note_promise(struct afs_vnode *vnode,
-				   struct afs_server *server)
-{
-	struct afs_server *old_server;
-	struct afs_vnode *xvnode;
-	struct rb_node *parent, **p;
-
-	_enter("%p,%p", vnode, server);
-
-	ASSERT(server != NULL);
-
-	old_server = vnode->server;
-	if (vnode->cb_promised) {
-		if (server == old_server &&
-		    vnode->cb_expires == vnode->cb_expires_at) {
-			_leave(" [no change]");
-			return;
-		}
-
-		spin_lock(&old_server->cb_lock);
-		if (vnode->cb_promised) {
-			_debug("delete");
-			rb_erase(&vnode->cb_promise, &old_server->cb_promises);
-			vnode->cb_promised = false;
-		}
-		spin_unlock(&old_server->cb_lock);
-	}
-
-	if (vnode->server != server)
-		afs_install_vnode(vnode, server);
-
-	vnode->cb_expires_at = vnode->cb_expires;
-	_debug("PROMISE on %p {%lu}",
-	       vnode, (unsigned long) vnode->cb_expires_at);
-
-	/* abuse an RB-tree to hold the expiration order (we may have multiple
-	 * items with the same expiration time) */
-	spin_lock(&server->cb_lock);
-
-	parent = NULL;
-	p = &server->cb_promises.rb_node;
-	while (*p) {
-		parent = *p;
-		xvnode = rb_entry(parent, struct afs_vnode, cb_promise);
-		if (vnode->cb_expires_at < xvnode->cb_expires_at)
-			p = &(*p)->rb_left;
-		else
-			p = &(*p)->rb_right;
-	}
-
-	rb_link_node(&vnode->cb_promise, parent, p);
-	rb_insert_color(&vnode->cb_promise, &server->cb_promises);
-	vnode->cb_promised = true;
-
-	spin_unlock(&server->cb_lock);
-	_leave("");
-}
-
-/*
- * handle remote file deletion by discarding the callback promise
+ * Handle remote file deletion.
  */
 static void afs_vnode_deleted_remotely(struct afs_vnode *vnode)
 {
-	struct afs_server *server;
+	struct afs_cb_interest *cbi = vnode->cb_interest;
 
-	_enter("{%p}", vnode->server);
+	_enter("{%p}", cbi);
 
 	set_bit(AFS_VNODE_DELETED, &vnode->flags);
 
-	server = vnode->server;
-	if (server) {
-		if (vnode->cb_promised) {
-			spin_lock(&server->cb_lock);
-			if (vnode->cb_promised) {
-				rb_erase(&vnode->cb_promise,
-					 &server->cb_promises);
-				vnode->cb_promised = false;
-			}
-			spin_unlock(&server->cb_lock);
-		}
-
-		spin_lock(&server->fs_lock);
-		rb_erase(&vnode->server_rb, &server->fs_vnodes);
-		spin_unlock(&server->fs_lock);
-
-		vnode->server = NULL;
-		afs_put_server(afs_v2net(vnode), server);
-	} else {
-		ASSERT(!vnode->cb_promised);
+	if (cbi) {
+		vnode->cb_interest = NULL;
+		afs_put_cb_interest(afs_v2net(vnode), cbi);
 	}
 
 	_leave("");
@@ -218,8 +49,6 @@ void afs_vnode_finalise_status_update(struct afs_vnode *vnode,
 	_enter("%p,%p", vnode, server);
 
 	spin_lock(&vnode->lock);
-	clear_bit(AFS_VNODE_CB_BROKEN, &vnode->flags);
-	afs_vnode_note_promise(vnode, server);
 	vnode->update_cnt--;
 	ASSERTCMP(vnode->update_cnt, >=, 0);
 	spin_unlock(&vnode->lock);
@@ -238,8 +67,6 @@ static void afs_vnode_status_update_failed(struct afs_vnode *vnode, int ret)
 
 	spin_lock(&vnode->lock);
 
-	clear_bit(AFS_VNODE_CB_BROKEN, &vnode->flags);
-
 	if (ret == -ENOENT) {
 		/* the file was deleted on the server */
 		_debug("got NOENT from server - marking file deleted");
@@ -261,8 +88,8 @@ static void afs_vnode_status_update_failed(struct afs_vnode *vnode, int ret)
  *   - there are any outstanding ops that will fetch the status
  * - TODO implement local caching
  */
-int afs_vnode_fetch_status(struct afs_vnode *vnode,
-			   struct afs_vnode *auth_vnode, struct key *key)
+int afs_vnode_fetch_status(struct afs_vnode *vnode, struct afs_vnode *auth_vnode,
+			   struct key *key, bool force)
 {
 	struct afs_server *server;
 	unsigned long acl_order;
@@ -270,12 +97,13 @@ int afs_vnode_fetch_status(struct afs_vnode *vnode,
 
 	DECLARE_WAITQUEUE(myself, current);
 
-	_enter("%s,{%x:%u.%u}",
+	_enter("%s,{%x:%u.%u,S=%lx},%u",
 	       vnode->volume->vlocation->vldb.name,
-	       vnode->fid.vid, vnode->fid.vnode, vnode->fid.unique);
+	       vnode->fid.vid, vnode->fid.vnode, vnode->fid.unique,
+	       vnode->flags,
+	       force);
 
-	if (!test_bit(AFS_VNODE_CB_BROKEN, &vnode->flags) &&
-	    vnode->cb_promised) {
+	if (!force && test_bit(AFS_VNODE_CB_PROMISED, &vnode->flags)) {
 		_leave(" [unchanged]");
 		return 0;
 	}
@@ -291,8 +119,7 @@ int afs_vnode_fetch_status(struct afs_vnode *vnode,
 
 	spin_lock(&vnode->lock);
 
-	if (!test_bit(AFS_VNODE_CB_BROKEN, &vnode->flags) &&
-	    vnode->cb_promised) {
+	if (!force && test_bit(AFS_VNODE_CB_PROMISED, &vnode->flags)) {
 		spin_unlock(&vnode->lock);
 		_leave(" [unchanged]");
 		return 0;
@@ -310,7 +137,7 @@ int afs_vnode_fetch_status(struct afs_vnode *vnode,
 
 		/* wait for the status to be updated */
 		for (;;) {
-			if (!test_bit(AFS_VNODE_CB_BROKEN, &vnode->flags))
+			if (test_bit(AFS_VNODE_CB_PROMISED, &vnode->flags))
 				break;
 			if (test_bit(AFS_VNODE_DELETED, &vnode->flags))
 				break;
diff --git a/fs/afs/volume.c b/fs/afs/volume.c
index 52f0dc40732b..4f6fd10094c6 100644
--- a/fs/afs/volume.c
+++ b/fs/afs/volume.c
@@ -153,8 +153,10 @@ error:
 error_discard:
 	up_write(&params->cell->vl_sem);
 
-	for (loop = volume->nservers - 1; loop >= 0; loop--)
+	for (loop = volume->nservers - 1; loop >= 0; loop--) {
+		afs_put_cb_interest(params->net, volume->cb_interests[loop]);
 		afs_put_server(params->net, volume->servers[loop]);
+	}
 
 	kfree(volume);
 	goto error;
@@ -197,8 +199,10 @@ void afs_put_volume(struct afs_cell *cell, struct afs_volume *volume)
 #endif
 	afs_put_vlocation(cell->net, vlocation);
 
-	for (loop = volume->nservers - 1; loop >= 0; loop--)
+	for (loop = volume->nservers - 1; loop >= 0; loop--) {
+		afs_put_cb_interest(cell->net, volume->cb_interests[loop]);
 		afs_put_server(cell->net, volume->servers[loop]);
+	}
 
 	kfree(volume);
 
@@ -218,10 +222,10 @@ struct afs_server *afs_volume_pick_fileserver(struct afs_vnode *vnode)
 	_enter("%s", volume->vlocation->vldb.name);
 
 	/* stick with the server we're already using if we can */
-	if (vnode->server && vnode->server->fs_state == 0) {
-		afs_get_server(vnode->server);
-		_leave(" = %p [current]", vnode->server);
-		return vnode->server;
+	if (vnode->cb_interest && vnode->cb_interest->server->fs_state == 0) {
+		afs_get_server(vnode->cb_interest->server);
+		_leave(" = %p [current]", vnode->cb_interest->server);
+		return vnode->cb_interest->server;
 	}
 
 	down_read(&volume->server_sem);
@@ -244,13 +248,8 @@ struct afs_server *afs_volume_pick_fileserver(struct afs_vnode *vnode)
 		_debug("consider %d [%d]", loop, state);
 
 		switch (state) {
-			/* found an apparently healthy server */
 		case 0:
-			afs_get_server(server);
-			up_read(&volume->server_sem);
-			_leave(" = %p (picked %pIS)",
-			       server, &server->addr.transport);
-			return server;
+			goto picked_server;
 
 		case -ENETUNREACH:
 			if (ret == 0)
@@ -284,9 +283,25 @@ struct afs_server *afs_volume_pick_fileserver(struct afs_vnode *vnode)
 	/* no available servers
 	 * - TODO: handle the no active servers case better
 	 */
+error:
 	up_read(&volume->server_sem);
 	_leave(" = %d", ret);
 	return ERR_PTR(ret);
+
+picked_server:
+	/* Found an apparently healthy server.  We need to register an interest
+	 * in receiving callbacks before we talk to it.
+	 */
+	ret = afs_register_server_cb_interest(vnode,
+					      &volume->cb_interests[loop], server);
+	if (ret < 0)
+		goto error;
+	
+	afs_get_server(server);
+	up_read(&volume->server_sem);
+	_leave(" = %p (picked %pIS)",
+	       server, &server->addr.transport);
+	return server;
 }
 
 /*
@@ -309,14 +324,12 @@ int afs_volume_release_fileserver(struct afs_vnode *vnode,
 	switch (result) {
 		/* success */
 	case 0:
-		server->fs_act_jif = jiffies;
 		server->fs_state = 0;
 		_leave("");
 		return 1;
 
 		/* the fileserver denied all knowledge of the volume */
 	case -ENOMEDIUM:
-		server->fs_act_jif = jiffies;
 		down_write(&volume->server_sem);
 
 		/* firstly, find where the server is in the active list (if it
@@ -365,7 +378,6 @@ int afs_volume_release_fileserver(struct afs_vnode *vnode,
 		 */
 		spin_lock(&server->fs_lock);
 		if (!server->fs_state) {
-			server->fs_dead_jif = jiffies + HZ * 10;
 			server->fs_state = result;
 			printk("kAFS: SERVER DEAD state=%d\n", result);
 		}
@@ -374,7 +386,6 @@ int afs_volume_release_fileserver(struct afs_vnode *vnode,
 
 		/* miscellaneous error */
 	default:
-		server->fs_act_jif = jiffies;
 	case -ENOMEM:
 	case -ENONET:
 		/* tell the caller to accept the result */
-- 
cgit v1.2.3


From be080a6f43c40976afc950ee55e9b7f8e2b53525 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Thu, 2 Nov 2017 15:27:49 +0000
Subject: afs: Overhaul permit caching

Overhaul permit caching in AFS by making it per-vnode and sharing permit
lists where possible.

When most of the fileserver operations are called, they return a status
structure indicating the (revised) details of the vnode or vnodes involved
in the operation.  This includes the access mark derived from the ACL
(named CallerAccess in the protocol definition file).  This is cacheable
and if the ACL changes, the server will tell us that it is breaking the
callback promise, at which point we can discard the currently cached
permits.

With this patch, the afs_permits structure has, at the end, an array of
{ key, CallerAccess } elements, sorted by key pointer.  This is then cached
in a hash table so that it can be shared between vnodes with the same
access permits.

Permit lists can only be shared if they contain the exact same set of
key->CallerAccess mappings.

Note that that table is global rather than being per-net_ns.  If the keys
in a permit list cross net_ns boundaries, there is no problem sharing the
cached permits, since the permits are just integer masks.

Since permit lists pin keys, the permit cache also makes it easier for a
future patch to find all occurrences of a key and remove them by means of
setting the afs_permits::invalidated flag and then clearing the appropriate
key pointer.  In such an event, memory barriers will need adding.

Lastly, the permit caching is skipped if the server has sent either a
vnode-specific or an entire-server callback since the start of the
operation.

Signed-off-by: David Howells <dhowells@redhat.com>
---
 fs/afs/afs.h      |   1 -
 fs/afs/flock.c    |   4 +-
 fs/afs/fsclient.c |   5 +-
 fs/afs/inode.c    |  13 +-
 fs/afs/internal.h |  27 ++--
 fs/afs/main.c     |   1 +
 fs/afs/security.c | 367 ++++++++++++++++++++++++++++++++----------------------
 fs/afs/super.c    |   1 -
 fs/afs/vnode.c    |  12 +-
 9 files changed, 244 insertions(+), 187 deletions(-)

(limited to 'fs/afs')

diff --git a/fs/afs/afs.h b/fs/afs/afs.h
index 2e2887a7d331..b94d0edc2b78 100644
--- a/fs/afs/afs.h
+++ b/fs/afs/afs.h
@@ -136,7 +136,6 @@ struct afs_file_status {
 	afs_access_t		caller_access;	/* access rights for authenticated caller */
 	afs_access_t		anon_access;	/* access rights for unauthenticated caller */
 	umode_t			mode;		/* UNIX mode */
-	struct afs_fid		parent;		/* parent dir ID for non-dirs only */
 	time_t			mtime_client;	/* last time client changed data */
 	time_t			mtime_server;	/* last time server changed data */
 	s32			lock_count;	/* file lock count (0=UNLK -1=WRLCK +ve=#RDLCK */
diff --git a/fs/afs/flock.c b/fs/afs/flock.c
index aba36e0b1460..2b31ea58c50c 100644
--- a/fs/afs/flock.c
+++ b/fs/afs/flock.c
@@ -383,7 +383,7 @@ given_lock:
 	/* again, make sure we've got a callback on this file and, again, make
 	 * sure that our view of the data version is up to date (we ignore
 	 * errors incurred here and deal with the consequences elsewhere) */
-	afs_vnode_fetch_status(vnode, NULL, key, false);
+	afs_vnode_fetch_status(vnode, key, false);
 
 error:
 	spin_unlock(&inode->i_lock);
@@ -455,7 +455,7 @@ static int afs_do_getlk(struct file *file, struct file_lock *fl)
 	posix_test_lock(file, fl);
 	if (fl->fl_type == F_UNLCK) {
 		/* no local locks; consult the server */
-		ret = afs_vnode_fetch_status(vnode, NULL, key, true);
+		ret = afs_vnode_fetch_status(vnode, key, true);
 		if (ret < 0)
 			goto error;
 		lock_count = vnode->status.lock_count;
diff --git a/fs/afs/fsclient.c b/fs/afs/fsclient.c
index c6658405fe91..680c02d510f7 100644
--- a/fs/afs/fsclient.c
+++ b/fs/afs/fsclient.c
@@ -78,8 +78,8 @@ static void xdr_decode_AFSFetchStatus(const __be32 **_bp,
 	EXTRACT(status->caller_access); /* call ticket dependent */
 	EXTRACT(status->anon_access);
 	EXTRACT(status->mode);
-	EXTRACT(status->parent.vnode);
-	EXTRACT(status->parent.unique);
+	bp++; /* parent.vnode */
+	bp++; /* parent.unique */
 	bp++; /* seg size */
 	status->mtime_client = ntohl(*bp++);
 	status->mtime_server = ntohl(*bp++);
@@ -103,7 +103,6 @@ static void xdr_decode_AFSFetchStatus(const __be32 **_bp,
 	       status->mtime_client, status->mtime_server);
 
 	if (vnode) {
-		status->parent.vid = vnode->fid.vid;
 		if (changed && !test_bit(AFS_VNODE_UNSET, &vnode->flags)) {
 			_debug("vnode changed");
 			i_size_write(&vnode->vfs_inode, size);
diff --git a/fs/afs/inode.c b/fs/afs/inode.c
index 4822a2a50a61..ee86d5ad22d1 100644
--- a/fs/afs/inode.c
+++ b/fs/afs/inode.c
@@ -238,7 +238,7 @@ struct inode *afs_iget(struct super_block *sb, struct key *key,
 
 	if (!status) {
 		/* it's a remotely extant inode */
-		ret = afs_vnode_fetch_status(vnode, NULL, key, true);
+		ret = afs_vnode_fetch_status(vnode, key, true);
 		if (ret < 0)
 			goto bad_inode;
 	} else {
@@ -358,7 +358,7 @@ int afs_validate(struct afs_vnode *vnode, struct key *key)
 	 * access */
 	if (!test_bit(AFS_VNODE_CB_PROMISED, &vnode->flags)) {
 		_debug("not promised");
-		ret = afs_vnode_fetch_status(vnode, NULL, key, false);
+		ret = afs_vnode_fetch_status(vnode, key, false);
 		if (ret < 0) {
 			if (ret == -ENOENT) {
 				set_bit(AFS_VNODE_DELETED, &vnode->flags);
@@ -431,7 +431,6 @@ int afs_drop_inode(struct inode *inode)
  */
 void afs_evict_inode(struct inode *inode)
 {
-	struct afs_permits *permits;
 	struct afs_vnode *vnode;
 
 	vnode = AFS_FS_I(inode);
@@ -460,13 +459,7 @@ void afs_evict_inode(struct inode *inode)
 	vnode->cache = NULL;
 #endif
 
-	mutex_lock(&vnode->permits_lock);
-	permits = vnode->permits;
-	RCU_INIT_POINTER(vnode->permits, NULL);
-	mutex_unlock(&vnode->permits_lock);
-	if (permits)
-		call_rcu(&permits->rcu, afs_zap_permits);
-
+	afs_put_permits(vnode->permit_cache);
 	_leave("");
 }
 
diff --git a/fs/afs/internal.h b/fs/afs/internal.h
index e3c99437f6e0..7c318666e436 100644
--- a/fs/afs/internal.h
+++ b/fs/afs/internal.h
@@ -389,8 +389,7 @@ struct afs_vnode {
 #ifdef CONFIG_AFS_FSCACHE
 	struct fscache_cookie	*cache;		/* caching cookie */
 #endif
-	struct afs_permits	*permits;	/* cache of permits so far obtained */
-	struct mutex		permits_lock;	/* lock for altering permits list */
+	struct afs_permits	*permit_cache;	/* cache of permits so far obtained */
 	struct mutex		validate_lock;	/* lock for validating this vnode */
 	wait_queue_head_t	update_waitq;	/* status fetch waitqueue */
 	int			update_cnt;	/* number of outstanding ops that will update the
@@ -411,8 +410,6 @@ struct afs_vnode {
 #define AFS_VNODE_AUTOCELL	10		/* set if Vnode is an auto mount point */
 #define AFS_VNODE_PSEUDODIR	11		/* set if Vnode is a pseudo directory */
 
-	long			acl_order;	/* ACL check count (callback break count) */
-
 	struct list_head	writebacks;	/* alterations in pagecache that need writing */
 	struct list_head	pending_locks;	/* locks waiting to be granted */
 	struct list_head	granted_locks;	/* locks granted on this file */
@@ -435,16 +432,21 @@ struct afs_vnode {
  */
 struct afs_permit {
 	struct key		*key;		/* RxRPC ticket holding a security context */
-	afs_access_t		access_mask;	/* access mask for this key */
+	afs_access_t		access;		/* CallerAccess value for this key */
 };
 
 /*
- * cache of security records from attempts to access a vnode
+ * Immutable cache of CallerAccess records from attempts to access vnodes.
+ * These may be shared between multiple vnodes.
  */
 struct afs_permits {
-	struct rcu_head		rcu;		/* disposal procedure */
-	int			count;		/* number of records */
-	struct afs_permit	permits[0];	/* the permits so far examined */
+	struct rcu_head		rcu;
+	struct hlist_node	hash_node;	/* Link in hash */
+	unsigned long		h;		/* Hash value for this permit list */
+	refcount_t		usage;
+	unsigned short		nr_permits;	/* Number of records */
+	bool			invalidated;	/* Invalidated due to key change */
+	struct afs_permit	permits[];	/* List of permits sorted by key pointer */
 };
 
 /*
@@ -682,11 +684,13 @@ static inline int afs_transfer_reply(struct afs_call *call)
 /*
  * security.c
  */
+extern void afs_put_permits(struct afs_permits *);
 extern void afs_clear_permits(struct afs_vnode *);
-extern void afs_cache_permit(struct afs_vnode *, struct key *, long);
+extern void afs_cache_permit(struct afs_vnode *, struct key *, unsigned int);
 extern void afs_zap_permits(struct rcu_head *);
 extern struct key *afs_request_key(struct afs_cell *);
 extern int afs_permission(struct inode *, int);
+extern void __exit afs_clean_up_permit_cache(void);
 
 /*
  * server.c
@@ -757,8 +761,7 @@ static inline struct inode *AFS_VNODE_TO_I(struct afs_vnode *vnode)
 
 extern void afs_vnode_finalise_status_update(struct afs_vnode *,
 					     struct afs_server *);
-extern int afs_vnode_fetch_status(struct afs_vnode *, struct afs_vnode *,
-				  struct key *, bool);
+extern int afs_vnode_fetch_status(struct afs_vnode *, struct key *, bool);
 extern int afs_vnode_fetch_data(struct afs_vnode *, struct key *,
 				struct afs_read *);
 extern int afs_vnode_create(struct afs_vnode *, struct key *, const char *,
diff --git a/fs/afs/main.c b/fs/afs/main.c
index 331c08740861..010e2e1a40f4 100644
--- a/fs/afs/main.c
+++ b/fs/afs/main.c
@@ -186,6 +186,7 @@ static void __exit afs_exit(void)
 	destroy_workqueue(afs_vlocation_update_worker);
 	destroy_workqueue(afs_async_calls);
 	destroy_workqueue(afs_wq);
+	afs_clean_up_permit_cache();
 	rcu_barrier();
 }
 
diff --git a/fs/afs/security.c b/fs/afs/security.c
index 7cc61c8b748b..1b5198fc1657 100644
--- a/fs/afs/security.c
+++ b/fs/afs/security.c
@@ -1,6 +1,6 @@
 /* AFS security handling
  *
- * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
+ * Copyright (C) 2007, 2017 Red Hat, Inc. All Rights Reserved.
  * Written by David Howells (dhowells@redhat.com)
  *
  * This program is free software; you can redistribute it and/or
@@ -14,9 +14,13 @@
 #include <linux/fs.h>
 #include <linux/ctype.h>
 #include <linux/sched.h>
+#include <linux/hashtable.h>
 #include <keys/rxrpc-type.h>
 #include "internal.h"
 
+static DEFINE_HASHTABLE(afs_permits_cache, 10);
+static DEFINE_SPINLOCK(afs_permits_lock);
+
 /*
  * get a key
  */
@@ -46,168 +50,233 @@ struct key *afs_request_key(struct afs_cell *cell)
 }
 
 /*
- * dispose of a permits list
+ * Dispose of a list of permits.
  */
-void afs_zap_permits(struct rcu_head *rcu)
+static void afs_permits_rcu(struct rcu_head *rcu)
 {
 	struct afs_permits *permits =
 		container_of(rcu, struct afs_permits, rcu);
-	int loop;
-
-	_enter("{%d}", permits->count);
+	int i;
 
-	for (loop = permits->count - 1; loop >= 0; loop--)
-		key_put(permits->permits[loop].key);
+	for (i = 0; i < permits->nr_permits; i++)
+		key_put(permits->permits[i].key);
 	kfree(permits);
 }
 
 /*
- * dispose of a permits list in which all the key pointers have been copied
+ * Discard a permission cache.
  */
-static void afs_dispose_of_permits(struct rcu_head *rcu)
+void afs_put_permits(struct afs_permits *permits)
 {
-	struct afs_permits *permits =
-		container_of(rcu, struct afs_permits, rcu);
-
-	_enter("{%d}", permits->count);
-
-	kfree(permits);
+	if (permits && refcount_dec_and_test(&permits->usage)) {
+		spin_lock(&afs_permits_lock);
+		hash_del_rcu(&permits->hash_node);
+		spin_unlock(&afs_permits_lock);
+		call_rcu(&permits->rcu, afs_permits_rcu);
+	}
 }
 
 /*
- * get the authorising vnode - this is the specified inode itself if it's a
- * directory or it's the parent directory if the specified inode is a file or
- * symlink
- * - the caller must release the ref on the inode
+ * Clear a permit cache on callback break.
  */
-static struct afs_vnode *afs_get_auth_inode(struct afs_vnode *vnode,
-					    struct key *key)
+void afs_clear_permits(struct afs_vnode *vnode)
 {
-	struct afs_vnode *auth_vnode;
-	struct inode *auth_inode;
-
-	_enter("");
+	struct afs_permits *permits;
 
-	if (S_ISDIR(vnode->vfs_inode.i_mode)) {
-		auth_inode = igrab(&vnode->vfs_inode);
-		ASSERT(auth_inode != NULL);
-	} else {
-		auth_inode = afs_iget(vnode->vfs_inode.i_sb, key,
-				      &vnode->status.parent, NULL, NULL);
-		if (IS_ERR(auth_inode))
-			return ERR_CAST(auth_inode);
-	}
+	spin_lock(&vnode->lock);
+	permits = rcu_dereference_protected(vnode->permit_cache,
+					    lockdep_is_held(&vnode->lock));
+	RCU_INIT_POINTER(vnode->permit_cache, NULL);
+	vnode->cb_break++;
+	spin_unlock(&vnode->lock);
 
-	auth_vnode = AFS_FS_I(auth_inode);
-	_leave(" = {%x}", auth_vnode->fid.vnode);
-	return auth_vnode;
+	if (permits)
+		afs_put_permits(permits);
 }
 
 /*
- * clear the permit cache on a directory vnode
+ * Hash a list of permits.  Use simple addition to make it easy to add an extra
+ * one at an as-yet indeterminate position in the list.
  */
-void afs_clear_permits(struct afs_vnode *vnode)
+static void afs_hash_permits(struct afs_permits *permits)
 {
-	struct afs_permits *permits;
+	unsigned long h = permits->nr_permits;
+	int i;
 
-	_enter("{%x:%u}", vnode->fid.vid, vnode->fid.vnode);
-
-	mutex_lock(&vnode->permits_lock);
-	permits = vnode->permits;
-	RCU_INIT_POINTER(vnode->permits, NULL);
-	vnode->cb_break++;
-	mutex_unlock(&vnode->permits_lock);
+	for (i = 0; i < permits->nr_permits; i++) {
+		h += (unsigned long)permits->permits[i].key / sizeof(void *);
+		h += permits->permits[i].access;
+	}
 
-	if (permits)
-		call_rcu(&permits->rcu, afs_zap_permits);
-	_leave("");
+	permits->h = h;
 }
 
 /*
- * add the result obtained for a vnode to its or its parent directory's cache
- * for the key used to access it
+ * Cache the CallerAccess result obtained from doing a fileserver operation
+ * that returned a vnode status for a particular key.  If a callback break
+ * occurs whilst the operation was in progress then we have to ditch the cache
+ * as the ACL *may* have changed.
  */
-void afs_cache_permit(struct afs_vnode *vnode, struct key *key, long acl_order)
+void afs_cache_permit(struct afs_vnode *vnode, struct key *key,
+		      unsigned int cb_break)
 {
-	struct afs_permits *permits, *xpermits;
-	struct afs_permit *permit;
-	struct afs_vnode *auth_vnode;
-	int count, loop;
+	struct afs_permits *permits, *xpermits, *replacement, *new = NULL;
+	afs_access_t caller_access = READ_ONCE(vnode->status.caller_access);
+	size_t size = 0;
+	bool changed = false;
+	int i, j;
+
+	_enter("{%x:%u},%x,%x",
+	       vnode->fid.vid, vnode->fid.vnode, key_serial(key), caller_access);
+
+	rcu_read_lock();
+
+	/* Check for the common case first: We got back the same access as last
+	 * time we tried and already have it recorded.
+	 */
+	permits = rcu_dereference(vnode->permit_cache);
+	if (permits) {
+		if (!permits->invalidated) {
+			for (i = 0; i < permits->nr_permits; i++) {
+				if (permits->permits[i].key < key)
+					continue;
+				if (permits->permits[i].key > key)
+					break;
+				if (permits->permits[i].access != caller_access) {
+					changed = true;
+					break;
+				}
 
-	_enter("{%x:%u},%x,%lx",
-	       vnode->fid.vid, vnode->fid.vnode, key_serial(key), acl_order);
+				if (cb_break != (vnode->cb_break +
+						 vnode->cb_interest->server->cb_s_break)) {
+					changed = true;
+					break;
+				}
 
-	auth_vnode = afs_get_auth_inode(vnode, key);
-	if (IS_ERR(auth_vnode)) {
-		_leave(" [get error %ld]", PTR_ERR(auth_vnode));
-		return;
-	}
+				/* The cache is still good. */
+				rcu_read_unlock();
+				return;
+			}
+		}
 
-	mutex_lock(&auth_vnode->permits_lock);
+		changed |= permits->invalidated;
+		size = permits->nr_permits;
 
-	/* guard against a rename being detected whilst we waited for the
-	 * lock */
-	if (memcmp(&auth_vnode->fid, &vnode->status.parent,
-		   sizeof(struct afs_fid)) != 0) {
-		_debug("renamed");
-		goto out_unlock;
+		/* If this set of permits is now wrong, clear the permits
+		 * pointer so that no one tries to use the stale information.
+		 */
+		if (changed) {
+			spin_lock(&vnode->lock);
+			if (permits != rcu_access_pointer(vnode->permit_cache))
+				goto someone_else_changed_it_unlock;
+			RCU_INIT_POINTER(vnode->permit_cache, NULL);
+			spin_unlock(&vnode->lock);
+
+			afs_put_permits(permits);
+			permits = NULL;
+			size = 0;
+		}
 	}
 
-	/* have to be careful as the directory's callback may be broken between
-	 * us receiving the status we're trying to cache and us getting the
-	 * lock to update the cache for the status */
-	if (auth_vnode->acl_order - acl_order > 0) {
-		_debug("ACL changed?");
-		goto out_unlock;
+	if (cb_break != (vnode->cb_break + vnode->cb_interest->server->cb_s_break)) {
+		rcu_read_unlock();
+		goto someone_else_changed_it;
 	}
 
-	/* always update the anonymous mask */
-	_debug("anon access %x", vnode->status.anon_access);
-	auth_vnode->status.anon_access = vnode->status.anon_access;
-	if (key == vnode->volume->cell->anonymous_key)
-		goto out_unlock;
-
-	xpermits = auth_vnode->permits;
-	count = 0;
-	if (xpermits) {
-		/* see if the permit is already in the list
-		 * - if it is then we just amend the list
-		 */
-		count = xpermits->count;
-		permit = xpermits->permits;
-		for (loop = count; loop > 0; loop--) {
-			if (permit->key == key) {
-				permit->access_mask =
-					vnode->status.caller_access;
-				goto out_unlock;
+	/* We need a ref on any permits list we want to copy as we'll have to
+	 * drop the lock to do memory allocation.
+	 */
+	if (permits && !refcount_inc_not_zero(&permits->usage)) {
+		rcu_read_unlock();
+		goto someone_else_changed_it;
+	}
+
+	rcu_read_unlock();
+
+	/* Speculatively create a new list with the revised permission set.  We
+	 * discard this if we find an extant match already in the hash, but
+	 * it's easier to compare with memcmp this way.
+	 *
+	 * We fill in the key pointers at this time, but we don't get the refs
+	 * yet.
+	 */
+	size++;
+	new = kzalloc(sizeof(struct afs_permits) +
+		      sizeof(struct afs_permit) * size, GFP_NOFS);
+	if (!new)
+		return;
+
+	refcount_set(&new->usage, 1);
+	new->nr_permits = size;
+	i = j = 0;
+	if (permits) {
+		for (i = 0; i < permits->nr_permits; i++) {
+			if (j == i && permits->permits[i].key > key) {
+				new->permits[j].key = key;
+				new->permits[j].access = caller_access;
+				j++;
 			}
-			permit++;
+			new->permits[j].key = permits->permits[i].key;
+			new->permits[j].access = permits->permits[i].access;
+			j++;
 		}
 	}
 
-	permits = kmalloc(sizeof(*permits) + sizeof(*permit) * (count + 1),
-			  GFP_NOFS);
-	if (!permits)
-		goto out_unlock;
-
-	if (xpermits)
-		memcpy(permits->permits, xpermits->permits,
-			count * sizeof(struct afs_permit));
-
-	_debug("key %x access %x",
-	       key_serial(key), vnode->status.caller_access);
-	permits->permits[count].access_mask = vnode->status.caller_access;
-	permits->permits[count].key = key_get(key);
-	permits->count = count + 1;
-
-	rcu_assign_pointer(auth_vnode->permits, permits);
-	if (xpermits)
-		call_rcu(&xpermits->rcu, afs_dispose_of_permits);
-
-out_unlock:
-	mutex_unlock(&auth_vnode->permits_lock);
-	iput(&auth_vnode->vfs_inode);
-	_leave("");
+	if (j == i) {
+		new->permits[j].key = key;
+		new->permits[j].access = caller_access;
+	}
+
+	afs_hash_permits(new);
+
+	afs_put_permits(permits);
+
+	/* Now see if the permit list we want is actually already available */
+	spin_lock(&afs_permits_lock);
+
+	hash_for_each_possible(afs_permits_cache, xpermits, hash_node, new->h) {
+		if (xpermits->h != new->h ||
+		    xpermits->invalidated ||
+		    xpermits->nr_permits != new->nr_permits ||
+		    memcmp(xpermits->permits, new->permits,
+			   new->nr_permits * sizeof(struct afs_permit)) != 0)
+			continue;
+
+		if (refcount_inc_not_zero(&xpermits->usage)) {
+			replacement = xpermits;
+			goto found;
+		}
+
+		break;
+	}
+
+	for (i = 0; i < new->nr_permits; i++)
+		key_get(new->permits[i].key);
+	hash_add_rcu(afs_permits_cache, &new->hash_node, new->h);
+	replacement = new;
+	new = NULL;
+
+found:
+	spin_unlock(&afs_permits_lock);
+
+	kfree(new);
+
+	spin_lock(&vnode->lock);
+	if (cb_break != (vnode->cb_break + vnode->cb_interest->server->cb_s_break) ||
+	    permits != rcu_access_pointer(vnode->permit_cache))
+		goto someone_else_changed_it_unlock;
+	rcu_assign_pointer(vnode->permit_cache, replacement);
+	spin_unlock(&vnode->lock);
+	afs_put_permits(permits);
+	return;
+
+someone_else_changed_it_unlock:
+	spin_unlock(&vnode->lock);
+someone_else_changed_it:
+	/* Someone else changed the cache under us - don't recheck at this
+	 * time.
+	 */
+	return;
 }
 
 /*
@@ -219,55 +288,45 @@ static int afs_check_permit(struct afs_vnode *vnode, struct key *key,
 			    afs_access_t *_access)
 {
 	struct afs_permits *permits;
-	struct afs_permit *permit;
-	struct afs_vnode *auth_vnode;
-	bool valid;
-	int loop, ret;
+	bool valid = false;
+	int i, ret;
 
 	_enter("{%x:%u},%x",
 	       vnode->fid.vid, vnode->fid.vnode, key_serial(key));
 
-	auth_vnode = afs_get_auth_inode(vnode, key);
-	if (IS_ERR(auth_vnode)) {
-		*_access = 0;
-		_leave(" = %ld", PTR_ERR(auth_vnode));
-		return PTR_ERR(auth_vnode);
-	}
-
-	ASSERT(S_ISDIR(auth_vnode->vfs_inode.i_mode));
+	permits = vnode->permit_cache;
 
 	/* check the permits to see if we've got one yet */
-	if (key == auth_vnode->volume->cell->anonymous_key) {
+	if (key == vnode->volume->cell->anonymous_key) {
 		_debug("anon");
-		*_access = auth_vnode->status.anon_access;
+		*_access = vnode->status.anon_access;
 		valid = true;
 	} else {
-		valid = false;
 		rcu_read_lock();
-		permits = rcu_dereference(auth_vnode->permits);
+		permits = rcu_dereference(vnode->permit_cache);
 		if (permits) {
-			permit = permits->permits;
-			for (loop = permits->count; loop > 0; loop--) {
-				if (permit->key == key) {
-					_debug("found in cache");
-					*_access = permit->access_mask;
-					valid = true;
+			for (i = 0; i < permits->nr_permits; i++) {
+				if (permits->permits[i].key < key)
+					continue;
+				if (permits->permits[i].key > key)
 					break;
-				}
-				permit++;
+
+				*_access = permits->permits[i].access;
+				valid = !permits->invalidated;
+				break;
 			}
 		}
 		rcu_read_unlock();
 	}
 
 	if (!valid) {
-		/* check the status on the file we're actually interested in
-		 * (the post-processing will cache the result on auth_vnode) */
+		/* Check the status on the file we're actually interested in
+		 * (the post-processing will cache the result).
+		 */
 		_debug("no valid permit");
 
-		ret = afs_vnode_fetch_status(vnode, auth_vnode, key, true);
+		ret = afs_vnode_fetch_status(vnode, key, true);
 		if (ret < 0) {
-			iput(&auth_vnode->vfs_inode);
 			*_access = 0;
 			_leave(" = %d", ret);
 			return ret;
@@ -275,7 +334,6 @@ static int afs_check_permit(struct afs_vnode *vnode, struct key *key,
 		*_access = vnode->status.caller_access;
 	}
 
-	iput(&auth_vnode->vfs_inode);
 	_leave(" = 0 [access %x]", *_access);
 	return 0;
 }
@@ -360,3 +418,12 @@ error:
 	_leave(" = %d", ret);
 	return ret;
 }
+
+void __exit afs_clean_up_permit_cache(void)
+{
+	int i;
+
+	for (i = 0; i < HASH_SIZE(afs_permits_cache); i++)
+		WARN_ON_ONCE(!hlist_empty(&afs_permits_cache[i]));
+
+}
diff --git a/fs/afs/super.c b/fs/afs/super.c
index c8fb1a497a84..e62fb1bdadc6 100644
--- a/fs/afs/super.c
+++ b/fs/afs/super.c
@@ -532,7 +532,6 @@ static void afs_i_init_once(void *_vnode)
 	memset(vnode, 0, sizeof(*vnode));
 	inode_init_once(&vnode->vfs_inode);
 	init_waitqueue_head(&vnode->update_waitq);
-	mutex_init(&vnode->permits_lock);
 	mutex_init(&vnode->validate_lock);
 	spin_lock_init(&vnode->writeback_lock);
 	spin_lock_init(&vnode->lock);
diff --git a/fs/afs/vnode.c b/fs/afs/vnode.c
index c1bf2124cef5..622e1100099b 100644
--- a/fs/afs/vnode.c
+++ b/fs/afs/vnode.c
@@ -88,11 +88,10 @@ static void afs_vnode_status_update_failed(struct afs_vnode *vnode, int ret)
  *   - there are any outstanding ops that will fetch the status
  * - TODO implement local caching
  */
-int afs_vnode_fetch_status(struct afs_vnode *vnode, struct afs_vnode *auth_vnode,
-			   struct key *key, bool force)
+int afs_vnode_fetch_status(struct afs_vnode *vnode, struct key *key, bool force)
 {
 	struct afs_server *server;
-	unsigned long acl_order;
+	unsigned int cb_break = 0;
 	int ret;
 
 	DECLARE_WAITQUEUE(myself, current);
@@ -113,9 +112,7 @@ int afs_vnode_fetch_status(struct afs_vnode *vnode, struct afs_vnode *auth_vnode
 		return -ENOENT;
 	}
 
-	acl_order = 0;
-	if (auth_vnode)
-		acl_order = auth_vnode->acl_order;
+	cb_break = vnode->cb_break + vnode->cb_s_break;
 
 	spin_lock(&vnode->lock);
 
@@ -192,8 +189,7 @@ get_anyway:
 	/* adjust the flags */
 	if (ret == 0) {
 		_debug("adjust");
-		if (auth_vnode)
-			afs_cache_permit(vnode, key, acl_order);
+		afs_cache_permit(vnode, key, cb_break);
 		afs_vnode_finalise_status_update(vnode, server);
 		afs_put_server(afs_v2net(vnode), server);
 	} else {
-- 
cgit v1.2.3


From 989782dcdc91a5e6d5999c7a52a84a60a0811e56 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Thu, 2 Nov 2017 15:27:50 +0000
Subject: afs: Overhaul cell database management

Overhaul the way that the in-kernel AFS client keeps track of cells in the
following manner:

 (1) Cells are now held in an rbtree to make walking them quicker and RCU
     managed (though this is probably overkill).

 (2) Cells now have a manager work item that:

     (A) Looks after fetching and refreshing the VL server list.

     (B) Manages cell record lifetime, including initialising and
     	 destruction.

     (B) Manages cell record caching whereby threads are kept around for a
     	 certain time after last use and then destroyed.

     (C) Manages the FS-Cache index cookie for a cell.  It is not permitted
     	 for a cookie to be in use twice, so we have to be careful to not
     	 allow a new cell record to exist at the same time as an old record
     	 of the same name.

 (3) Each AFS network namespace is given a manager work item that manages
     the cells within it, maintaining a single timer to prod cells into
     updating their DNS records.

     This uses the reduce_timer() facility to make the timer expire at the
     soonest timed event that needs happening.

 (4) When a module is being unloaded, cells and cell managers are now
     counted out using dec_after_work() to make sure the module text is
     pinned until after the data structures have been cleaned up.

 (5) Each cell's VL server list is now protected by a seqlock rather than a
     semaphore.

Signed-off-by: David Howells <dhowells@redhat.com>
---
 fs/afs/cell.c     | 916 +++++++++++++++++++++++++++++++++++++-----------------
 fs/afs/internal.h |  60 +++-
 fs/afs/main.c     |  16 +-
 fs/afs/proc.c     |  15 +-
 fs/afs/super.c    |  12 +-
 fs/afs/xattr.c    |   2 +-
 6 files changed, 704 insertions(+), 317 deletions(-)

(limited to 'fs/afs')

diff --git a/fs/afs/cell.c b/fs/afs/cell.c
index 216821fd1a61..e83103e8a6fb 100644
--- a/fs/afs/cell.c
+++ b/fs/afs/cell.c
@@ -1,6 +1,6 @@
 /* AFS cell and server record management
  *
- * Copyright (C) 2002 Red Hat, Inc. All Rights Reserved.
+ * Copyright (C) 2002, 2017 Red Hat, Inc. All Rights Reserved.
  * Written by David Howells (dhowells@redhat.com)
  *
  * This program is free software; you can redistribute it and/or
@@ -19,128 +19,194 @@
 #include <keys/rxrpc-type.h>
 #include "internal.h"
 
+unsigned __read_mostly afs_cell_gc_delay = 10;
+
+static void afs_manage_cell(struct work_struct *);
+
+static void afs_dec_cells_outstanding(struct afs_net *net)
+{
+	if (atomic_dec_and_test(&net->cells_outstanding))
+		wake_up_atomic_t(&net->cells_outstanding);
+}
+
 /*
- * allocate a cell record and fill in its name, VL server address list and
- * allocate an anonymous key
+ * Set the cell timer to fire after a given delay, assuming it's not already
+ * set for an earlier time.
  */
-static struct afs_cell *afs_cell_alloc(struct afs_net *net,
-				       const char *name, unsigned namelen,
-				       char *vllist)
+static void afs_set_cell_timer(struct afs_net *net, time64_t delay)
 {
-	struct afs_cell *cell;
-	struct key *key;
-	char keyname[4 + AFS_MAXCELLNAME + 1], *cp, *dp, *next;
-	char  *dvllist = NULL, *_vllist = NULL;
-	char  delimiter = ':';
-	int ret, i;
+	if (net->live) {
+		atomic_inc(&net->cells_outstanding);
+		if (timer_reduce(&net->cells_timer, jiffies + delay * HZ))
+			afs_dec_cells_outstanding(net);
+	}
+}
+
+/*
+ * Look up and get an activation reference on a cell record under RCU
+ * conditions.  The caller must hold the RCU read lock.
+ */
+struct afs_cell *afs_lookup_cell_rcu(struct afs_net *net,
+				     const char *name, unsigned int namesz)
+{
+	struct afs_cell *cell = NULL;
+	struct rb_node *p;
+	int n, seq = 0, ret = 0;
+
+	_enter("%*.*s", namesz, namesz, name);
+
+	if (name && namesz == 0)
+		return ERR_PTR(-EINVAL);
+	if (namesz > AFS_MAXCELLNAME)
+		return ERR_PTR(-ENAMETOOLONG);
+
+	do {
+		/* Unfortunately, rbtree walking doesn't give reliable results
+		 * under just the RCU read lock, so we have to check for
+		 * changes.
+		 */
+		if (cell)
+			afs_put_cell(net, cell);
+		cell = NULL;
+		ret = -ENOENT;
+
+		read_seqbegin_or_lock(&net->cells_lock, &seq);
+
+		if (!name) {
+			cell = rcu_dereference_raw(net->ws_cell);
+			if (cell) {
+				afs_get_cell(cell);
+				continue;
+			}
+			ret = -EDESTADDRREQ;
+			continue;
+		}
+
+		p = rcu_dereference_raw(net->cells.rb_node);
+		while (p) {
+			cell = rb_entry(p, struct afs_cell, net_node);
+
+			n = strncasecmp(cell->name, name,
+					min_t(size_t, cell->name_len, namesz));
+			if (n == 0)
+				n = cell->name_len - namesz;
+			if (n < 0) {
+				p = rcu_dereference_raw(p->rb_left);
+			} else if (n > 0) {
+				p = rcu_dereference_raw(p->rb_right);
+			} else {
+				if (atomic_inc_not_zero(&cell->usage)) {
+					ret = 0;
+					break;
+				}
+				/* We want to repeat the search, this time with
+				 * the lock properly locked.
+				 */
+			}
+			cell = NULL;
+		}
 
-	_enter("%*.*s,%s", namelen, namelen, name ?: "", vllist);
+	} while (need_seqretry(&net->cells_lock, seq));
 
-	BUG_ON(!name); /* TODO: want to look up "this cell" in the cache */
+	done_seqretry(&net->cells_lock, seq);
 
+	return ret == 0 ? cell : ERR_PTR(ret);
+}
+
+/*
+ * Set up a cell record and fill in its name, VL server address list and
+ * allocate an anonymous key
+ */
+static struct afs_cell *afs_alloc_cell(struct afs_net *net,
+				       const char *name, unsigned int namelen,
+				       const char *vllist)
+{
+	struct afs_cell *cell;
+	int i, ret;
+
+	ASSERT(name);
+	if (namelen == 0)
+		return ERR_PTR(-EINVAL);
 	if (namelen > AFS_MAXCELLNAME) {
 		_leave(" = -ENAMETOOLONG");
 		return ERR_PTR(-ENAMETOOLONG);
 	}
 
-	/* allocate and initialise a cell record */
-	cell = kzalloc(sizeof(struct afs_cell) + namelen + 1, GFP_KERNEL);
+	_enter("%*.*s,%s", namelen, namelen, name, vllist);
+
+	cell = kzalloc(sizeof(struct afs_cell), GFP_KERNEL);
 	if (!cell) {
 		_leave(" = -ENOMEM");
 		return ERR_PTR(-ENOMEM);
 	}
 
-	memcpy(cell->name, name, namelen);
-	cell->name[namelen] = 0;
-
-	atomic_set(&cell->usage, 1);
-	INIT_LIST_HEAD(&cell->link);
 	cell->net = net;
+	cell->name_len = namelen;
+	for (i = 0; i < namelen; i++)
+		cell->name[i] = tolower(name[i]);
+
+	atomic_set(&cell->usage, 2);
+	INIT_WORK(&cell->manager, afs_manage_cell);
 	rwlock_init(&cell->servers_lock);
 	INIT_LIST_HEAD(&cell->servers);
 	init_rwsem(&cell->vl_sem);
 	INIT_LIST_HEAD(&cell->vl_list);
 	spin_lock_init(&cell->vl_lock);
+	seqlock_init(&cell->vl_addrs_lock);
+	cell->flags = (1 << AFS_CELL_FL_NOT_READY);
 
 	for (i = 0; i < AFS_CELL_MAX_ADDRS; i++) {
 		struct sockaddr_rxrpc *srx = &cell->vl_addrs[i];
 		srx->srx_family			= AF_RXRPC;
 		srx->srx_service		= VL_SERVICE;
 		srx->transport_type		= SOCK_DGRAM;
-		srx->transport.sin.sin_port	= htons(AFS_VL_PORT);
+		srx->transport.sin6.sin6_family	= AF_INET6;
+		srx->transport.sin6.sin6_port	= htons(AFS_VL_PORT);
 	}
 
-	/* if the ip address is invalid, try dns query */
-	if (!vllist || strlen(vllist) < 7) {
-		ret = dns_query("afsdb", name, namelen, "ipv4", &dvllist, NULL);
-		if (ret < 0) {
-			if (ret == -ENODATA || ret == -EAGAIN || ret == -ENOKEY)
-				/* translate these errors into something
-				 * userspace might understand */
-				ret = -EDESTADDRREQ;
-			_leave(" = %d", ret);
-			return ERR_PTR(ret);
-		}
-		_vllist = dvllist;
-
-		/* change the delimiter for user-space reply */
-		delimiter = ',';
+	/* Fill in the VL server list if we were given a list of addresses to
+	 * use.
+	 */
+	if (vllist) {
+		char delim = ':';
 
-	} else {
 		if (strchr(vllist, ',') || !strchr(vllist, '.'))
-			delimiter = ',';
-		_vllist = vllist;
-	}
-
-	/* fill in the VL server list from the rest of the string */
-	do {
-		struct sockaddr_rxrpc *srx = &cell->vl_addrs[cell->vl_naddrs];
-		const char *end;
-
-		next = strchr(_vllist, delimiter);
-		if (next)
-			*next++ = 0;
-
-		if (in4_pton(_vllist, -1, (u8 *)&srx->transport.sin6.sin6_addr.s6_addr32[3],
-			     -1, &end)) {
-			srx->transport_len		= sizeof(struct sockaddr_in6);
-			srx->transport.sin6.sin6_family	= AF_INET6;
-			srx->transport.sin6.sin6_flowinfo = 0;
-			srx->transport.sin6.sin6_scope_id = 0;
-			srx->transport.sin6.sin6_addr.s6_addr32[0] = 0;
-			srx->transport.sin6.sin6_addr.s6_addr32[1] = 0;
-			srx->transport.sin6.sin6_addr.s6_addr32[2] = htonl(0xffff);
-		} else if (in6_pton(_vllist, -1, srx->transport.sin6.sin6_addr.s6_addr,
-				    -1, &end)) {
-			srx->transport_len		= sizeof(struct sockaddr_in6);
-			srx->transport.sin6.sin6_family	= AF_INET6;
-			srx->transport.sin6.sin6_flowinfo = 0;
-			srx->transport.sin6.sin6_scope_id = 0;
-		} else {
-			goto bad_address;
-		}
+			delim = ',';
+
+		do {
+			struct sockaddr_rxrpc *srx = &cell->vl_addrs[cell->vl_naddrs];
+
+			if (in4_pton(vllist, -1,
+				     (u8 *)&srx->transport.sin6.sin6_addr.s6_addr32[3],
+				     delim, &vllist)) {
+				srx->transport_len = sizeof(struct sockaddr_in6);
+				srx->transport.sin6.sin6_addr.s6_addr32[0] = 0;
+				srx->transport.sin6.sin6_addr.s6_addr32[1] = 0;
+				srx->transport.sin6.sin6_addr.s6_addr32[2] = htonl(0xffff);
+			} else if (in6_pton(vllist, -1,
+					    srx->transport.sin6.sin6_addr.s6_addr,
+					    delim, &vllist)) {
+				srx->transport_len = sizeof(struct sockaddr_in6);
+				srx->transport.sin6.sin6_family	= AF_INET6;
+			} else {
+				goto bad_address;
+			}
 
-	} while (cell->vl_naddrs++,
-		 cell->vl_naddrs < AFS_CELL_MAX_ADDRS && (_vllist = next));
+			cell->vl_naddrs++;
+			if (!*vllist)
+				break;
+			vllist++;
 
-	/* create a key to represent an anonymous user */
-	memcpy(keyname, "afs@", 4);
-	dp = keyname + 4;
-	cp = cell->name;
-	do {
-		*dp++ = toupper(*cp);
-	} while (*cp++);
+		} while (cell->vl_naddrs < AFS_CELL_MAX_ADDRS && vllist);
 
-	key = rxrpc_get_null_key(keyname);
-	if (IS_ERR(key)) {
-		_debug("no key");
-		ret = PTR_ERR(key);
-		goto error;
+		/* Disable DNS refresh for manually-specified cells */
+		cell->dns_expiry = TIME64_MAX;
+	} else {
+		/* We're going to need to 'refresh' this cell's VL server list
+		 * from the DNS before we can use it.
+		 */
+		cell->dns_expiry = S64_MIN;
 	}
-	cell->anonymous_key = key;
-
-	_debug("anon key %p{%x}",
-	       cell->anonymous_key, key_serial(cell->anonymous_key));
 
 	_leave(" = %p", cell);
 	return cell;
@@ -148,92 +214,129 @@ static struct afs_cell *afs_cell_alloc(struct afs_net *net,
 bad_address:
 	printk(KERN_ERR "kAFS: bad VL server IP address\n");
 	ret = -EINVAL;
-error:
-	key_put(cell->anonymous_key);
-	kfree(dvllist);
 	kfree(cell);
 	_leave(" = %d", ret);
 	return ERR_PTR(ret);
 }
 
 /*
- * afs_cell_crate() - create a cell record
+ * afs_lookup_cell - Look up or create a cell record.
  * @net:	The network namespace
- * @name:	is the name of the cell.
- * @namsesz:	is the strlen of the cell name.
- * @vllist:	is a colon separated list of IP addresses in "a.b.c.d" format.
- * @retref:	is T to return the cell reference when the cell exists.
+ * @name:	The name of the cell.
+ * @namesz:	The strlen of the cell name.
+ * @vllist:	A colon/comma separated list of numeric IP addresses or NULL.
+ * @excl:	T if an error should be given if the cell name already exists.
+ *
+ * Look up a cell record by name and query the DNS for VL server addresses if
+ * needed.  Note that that actual DNS query is punted off to the manager thread
+ * so that this function can return immediately if interrupted whilst allowing
+ * cell records to be shared even if not yet fully constructed.
  */
-struct afs_cell *afs_cell_create(struct afs_net *net,
-				 const char *name, unsigned namesz,
-				 char *vllist, bool retref)
+struct afs_cell *afs_lookup_cell(struct afs_net *net,
+				 const char *name, unsigned int namesz,
+				 const char *vllist, bool excl)
 {
-	struct afs_cell *cell;
-	int ret;
-
-	_enter("%*.*s,%s", namesz, namesz, name ?: "", vllist);
+	struct afs_cell *cell, *candidate, *cursor;
+	struct rb_node *parent, **pp;
+	int ret, n;
+
+	_enter("%s,%s", name, vllist);
+
+	if (!excl) {
+		rcu_read_lock();
+		cell = afs_lookup_cell_rcu(net, name, namesz);
+		rcu_read_unlock();
+		if (!IS_ERR(cell)) {
+			if (excl) {
+				afs_put_cell(net, cell);
+				return ERR_PTR(-EEXIST);
+			}
+			goto wait_for_cell;
+		}
+	}
 
-	down_write(&net->cells_sem);
-	read_lock(&net->cells_lock);
-	list_for_each_entry(cell, &net->cells, link) {
-		if (strncasecmp(cell->name, name, namesz) == 0)
-			goto duplicate_name;
+	/* Assume we're probably going to create a cell and preallocate and
+	 * mostly set up a candidate record.  We can then use this to stash the
+	 * name, the net namespace and VL server addresses.
+	 *
+	 * We also want to do this before we hold any locks as it may involve
+	 * upcalling to userspace to make DNS queries.
+	 */
+	candidate = afs_alloc_cell(net, name, namesz, vllist);
+	if (IS_ERR(candidate)) {
+		_leave(" = %ld", PTR_ERR(candidate));
+		return candidate;
 	}
-	read_unlock(&net->cells_lock);
 
-	cell = afs_cell_alloc(net, name, namesz, vllist);
-	if (IS_ERR(cell)) {
-		_leave(" = %ld", PTR_ERR(cell));
-		up_write(&net->cells_sem);
-		return cell;
+	/* Find the insertion point and check to see if someone else added a
+	 * cell whilst we were allocating.
+	 */
+	write_seqlock(&net->cells_lock);
+
+	pp = &net->cells.rb_node;
+	parent = NULL;
+	while (*pp) {
+		parent = *pp;
+		cursor = rb_entry(parent, struct afs_cell, net_node);
+
+		n = strncasecmp(cursor->name, name,
+				min_t(size_t, cursor->name_len, namesz));
+		if (n == 0)
+			n = cursor->name_len - namesz;
+		if (n < 0)
+			pp = &(*pp)->rb_left;
+		else if (n > 0)
+			pp = &(*pp)->rb_right;
+		else
+			goto cell_already_exists;
 	}
 
-	/* add a proc directory for this cell */
-	ret = afs_proc_cell_setup(net, cell);
-	if (ret < 0)
-		goto error;
+	cell = candidate;
+	candidate = NULL;
+	rb_link_node_rcu(&cell->net_node, parent, pp);
+	rb_insert_color(&cell->net_node, &net->cells);
+	atomic_inc(&net->cells_outstanding);
+	write_sequnlock(&net->cells_lock);
 
-#ifdef CONFIG_AFS_FSCACHE
-	/* put it up for caching (this never returns an error) */
-	cell->cache = fscache_acquire_cookie(afs_cache_netfs.primary_index,
-					     &afs_cell_cache_index_def,
-					     cell, true);
-#endif
+	queue_work(afs_wq, &cell->manager);
 
-	/* add to the cell lists */
-	write_lock(&net->cells_lock);
-	list_add_tail(&cell->link, &net->cells);
-	write_unlock(&net->cells_lock);
+wait_for_cell:
+	_debug("wait_for_cell");
+	ret = wait_on_bit(&cell->flags, AFS_CELL_FL_NOT_READY, TASK_INTERRUPTIBLE);
+	smp_rmb();
 
-	down_write(&net->proc_cells_sem);
-	list_add_tail(&cell->proc_link, &net->proc_cells);
-	up_write(&net->proc_cells_sem);
-	up_write(&net->cells_sem);
+	switch (READ_ONCE(cell->state)) {
+	case AFS_CELL_FAILED:
+		ret = cell->error;
+		goto error;
+	default:
+		_debug("weird %u %d", cell->state, cell->error);
+		goto error;
+	case AFS_CELL_ACTIVE:
+		break;
+	}
 
-	_leave(" = %p", cell);
+	_leave(" = %p [cell]", cell);
 	return cell;
 
+cell_already_exists:
+	_debug("cell exists");
+	cell = cursor;
+	if (excl) {
+		ret = -EEXIST;
+	} else {
+		ASSERTCMP(atomic_read(&cursor->usage), >=, 1);
+		afs_get_cell(cursor);
+		ret = 0;
+	}
+	write_sequnlock(&net->cells_lock);
+	kfree(candidate);
+	if (ret == 0)
+		goto wait_for_cell;
 error:
-	up_write(&net->cells_sem);
-	key_put(cell->anonymous_key);
-	kfree(cell);
-	_leave(" = %d", ret);
+	afs_put_cell(net, cell);
+	_leave(" = %d [error]", ret);
 	return ERR_PTR(ret);
-
-duplicate_name:
-	if (retref && !IS_ERR(cell))
-		afs_get_cell(cell);
-
-	read_unlock(&net->cells_lock);
-	up_write(&net->cells_sem);
-
-	if (retref) {
-		_leave(" = %p", cell);
-		return cell;
-	}
-
-	_leave(" = -EEXIST");
-	return ERR_PTR(-EEXIST);
 }
 
 /*
@@ -241,10 +344,11 @@ duplicate_name:
  * - can be called with a module parameter string
  * - can be called from a write to /proc/fs/afs/rootcell
  */
-int afs_cell_init(struct afs_net *net, char *rootcell)
+int afs_cell_init(struct afs_net *net, const char *rootcell)
 {
 	struct afs_cell *old_root, *new_root;
-	char *cp;
+	const char *cp, *vllist;
+	size_t len;
 
 	_enter("");
 
@@ -257,223 +361,471 @@ int afs_cell_init(struct afs_net *net, char *rootcell)
 	}
 
 	cp = strchr(rootcell, ':');
-	if (!cp)
+	if (!cp) {
 		_debug("kAFS: no VL server IP addresses specified");
-	else
-		*cp++ = 0;
+		vllist = NULL;
+		len = strlen(rootcell);
+	} else {
+		vllist = cp + 1;
+		len = cp - rootcell;
+	}
 
 	/* allocate a cell record for the root cell */
-	new_root = afs_cell_create(net, rootcell, strlen(rootcell), cp, false);
+	new_root = afs_lookup_cell(net, rootcell, len, vllist, false);
 	if (IS_ERR(new_root)) {
 		_leave(" = %ld", PTR_ERR(new_root));
 		return PTR_ERR(new_root);
 	}
 
+	set_bit(AFS_CELL_FL_NO_GC, &new_root->flags);
+	afs_get_cell(new_root);
+
 	/* install the new cell */
-	write_lock(&net->cells_lock);
+	write_seqlock(&net->cells_lock);
 	old_root = net->ws_cell;
 	net->ws_cell = new_root;
-	write_unlock(&net->cells_lock);
-	afs_put_cell(net, old_root);
+	write_sequnlock(&net->cells_lock);
 
+	afs_put_cell(net, old_root);
 	_leave(" = 0");
 	return 0;
 }
 
 /*
- * lookup a cell record
+ * Update a cell's VL server address list from the DNS.
  */
-struct afs_cell *afs_cell_lookup(struct afs_net *net,
-				 const char *name, unsigned namesz,
-				 bool dns_cell)
+static void afs_update_cell(struct afs_cell *cell)
 {
-	struct afs_cell *cell;
+	time64_t now, expiry;
+	char *vllist = NULL;
+	int ret;
 
-	_enter("\"%*.*s\",", namesz, namesz, name ?: "");
+	_enter("%s", cell->name);
+
+	ret = dns_query("afsdb", cell->name, cell->name_len,
+			"ipv4", &vllist, &expiry);
+	_debug("query %d", ret);
+	switch (ret) {
+	case 0 ... INT_MAX:
+		clear_bit(AFS_CELL_FL_DNS_FAIL, &cell->flags);
+		clear_bit(AFS_CELL_FL_NOT_FOUND, &cell->flags);
+		goto parse_dns_data;
+
+	case -ENODATA:
+		clear_bit(AFS_CELL_FL_DNS_FAIL, &cell->flags);
+		set_bit(AFS_CELL_FL_NOT_FOUND, &cell->flags);
+		cell->dns_expiry = ktime_get_real_seconds() + 61;
+		cell->error = -EDESTADDRREQ;
+		goto out;
+
+	case -EAGAIN:
+	case -ECONNREFUSED:
+	default:
+		/* Unable to query DNS. */
+		set_bit(AFS_CELL_FL_DNS_FAIL, &cell->flags);
+		cell->dns_expiry = ktime_get_real_seconds() + 10;
+		cell->error = -EDESTADDRREQ;
+		goto out;
+	}
 
-	down_read(&net->cells_sem);
-	read_lock(&net->cells_lock);
+parse_dns_data:
+	write_seqlock(&cell->vl_addrs_lock);
 
-	if (name) {
-		/* if the cell was named, look for it in the cell record list */
-		list_for_each_entry(cell, &net->cells, link) {
-			if (strncmp(cell->name, name, namesz) == 0) {
-				afs_get_cell(cell);
-				goto found;
-			}
-		}
-		cell = ERR_PTR(-ENOENT);
-		if (dns_cell)
-			goto create_cell;
-	found:
-		;
-	} else {
-		cell = net->ws_cell;
-		if (!cell) {
-			/* this should not happen unless user tries to mount
-			 * when root cell is not set. Return an impossibly
-			 * bizarre errno to alert the user. Things like
-			 * ENOENT might be "more appropriate" but they happen
-			 * for other reasons.
-			 */
-			cell = ERR_PTR(-EDESTADDRREQ);
+	ret = -EINVAL;
+	do {
+		struct sockaddr_rxrpc *srx = &cell->vl_addrs[cell->vl_naddrs];
+
+		if (in4_pton(vllist, -1,
+			     (u8 *)&srx->transport.sin6.sin6_addr.s6_addr32[3],
+			     ',', (const char **)&vllist)) {
+			srx->transport_len = sizeof(struct sockaddr_in6);
+			srx->transport.sin6.sin6_addr.s6_addr32[0] = 0;
+			srx->transport.sin6.sin6_addr.s6_addr32[1] = 0;
+			srx->transport.sin6.sin6_addr.s6_addr32[2] = htonl(0xffff);
+		} else if (in6_pton(vllist, -1,
+				    srx->transport.sin6.sin6_addr.s6_addr,
+				    ',', (const char **)&vllist)) {
+			srx->transport_len = sizeof(struct sockaddr_in6);
+			srx->transport.sin6.sin6_family	= AF_INET6;
 		} else {
-			afs_get_cell(cell);
+			goto bad_address;
 		}
 
-	}
+		cell->vl_naddrs++;
+		if (!*vllist)
+			break;
+		vllist++;
 
-	read_unlock(&net->cells_lock);
-	up_read(&net->cells_sem);
-	_leave(" = %p", cell);
-	return cell;
-
-create_cell:
-	read_unlock(&net->cells_lock);
-	up_read(&net->cells_sem);
+	} while (cell->vl_naddrs < AFS_CELL_MAX_ADDRS);
 
-	cell = afs_cell_create(net, name, namesz, NULL, true);
+	if (cell->vl_naddrs < AFS_CELL_MAX_ADDRS)
+		memset(cell->vl_addrs + cell->vl_naddrs, 0,
+		       (AFS_CELL_MAX_ADDRS - cell->vl_naddrs) * sizeof(cell->vl_addrs[0]));
 
-	_leave(" = %p", cell);
-	return cell;
+	now = ktime_get_real_seconds();
+	cell->dns_expiry = expiry;
+	afs_set_cell_timer(cell->net, expiry - now);
+bad_address:
+	write_sequnlock(&cell->vl_addrs_lock);
+out:
+	_leave("");
 }
 
-#if 0
 /*
- * try and get a cell record
+ * Destroy a cell record
  */
-struct afs_cell *afs_get_cell_maybe(struct afs_cell *cell)
+static void afs_cell_destroy(struct rcu_head *rcu)
 {
-	write_lock(&net->cells_lock);
+	struct afs_cell *cell = container_of(rcu, struct afs_cell, rcu);
 
-	if (cell && !list_empty(&cell->link))
-		afs_get_cell(cell);
-	else
-		cell = NULL;
+	_enter("%p{%s}", cell, cell->name);
 
-	write_unlock(&net->cells_lock);
-	return cell;
+	ASSERTCMP(atomic_read(&cell->usage), ==, 0);
+
+	key_put(cell->anonymous_key);
+	kfree(cell);
+
+	_leave(" [destroyed]");
 }
-#endif  /*  0  */
 
 /*
- * destroy a cell record
+ * Queue the cell manager.
  */
-void afs_put_cell(struct afs_net *net, struct afs_cell *cell)
+static void afs_queue_cell_manager(struct afs_net *net)
 {
-	if (!cell)
-		return;
+	int outstanding = atomic_inc_return(&net->cells_outstanding);
 
-	_enter("%p{%d,%s}", cell, atomic_read(&cell->usage), cell->name);
+	_enter("%d", outstanding);
 
-	ASSERTCMP(atomic_read(&cell->usage), >, 0);
+	if (!queue_work(afs_wq, &net->cells_manager))
+		afs_dec_cells_outstanding(net);
+}
+
+/*
+ * Cell management timer.  We have an increment on cells_outstanding that we
+ * need to pass along to the work item.
+ */
+void afs_cells_timer(struct timer_list *timer)
+{
+	struct afs_net *net = container_of(timer, struct afs_net, cells_timer);
+
+	_enter("");
+	if (!queue_work(afs_wq, &net->cells_manager))
+		afs_dec_cells_outstanding(net);
+}
 
-	/* to prevent a race, the decrement and the dequeue must be effectively
-	 * atomic */
-	write_lock(&net->cells_lock);
+/*
+ * Drop a reference on a cell record.
+ */
+void afs_put_cell(struct afs_net *net, struct afs_cell *cell)
+{
+	time64_t now, expire_delay;
 
-	if (likely(!atomic_dec_and_test(&cell->usage))) {
-		write_unlock(&net->cells_lock);
-		_leave("");
+	if (!cell)
 		return;
-	}
 
-	ASSERT(list_empty(&cell->servers));
-	ASSERT(list_empty(&cell->vl_list));
+	_enter("%s", cell->name);
 
-	wake_up(&net->cells_freeable_wq);
+	now = ktime_get_real_seconds();
+	cell->last_inactive = now;
+	expire_delay = 0;
+	if (!test_bit(AFS_CELL_FL_DNS_FAIL, &cell->flags) &&
+	    !test_bit(AFS_CELL_FL_NOT_FOUND, &cell->flags))
+		expire_delay = afs_cell_gc_delay;
 
-	write_unlock(&net->cells_lock);
+	if (atomic_dec_return(&cell->usage) > 1)
+		return;
 
-	_leave(" [unused]");
+	/* 'cell' may now be garbage collected. */
+	afs_set_cell_timer(net, expire_delay);
 }
 
 /*
- * destroy a cell record
- * - must be called with the net->cells_sem write-locked
- * - cell->link should have been broken by the caller
+ * Allocate a key to use as a placeholder for anonymous user security.
  */
-static void afs_cell_destroy(struct afs_net *net, struct afs_cell *cell)
+static int afs_alloc_anon_key(struct afs_cell *cell)
 {
-	_enter("%p{%d,%s}", cell, atomic_read(&cell->usage), cell->name);
+	struct key *key;
+	char keyname[4 + AFS_MAXCELLNAME + 1], *cp, *dp;
 
-	ASSERTCMP(atomic_read(&cell->usage), >=, 0);
-	ASSERT(list_empty(&cell->link));
+	/* Create a key to represent an anonymous user. */
+	memcpy(keyname, "afs@", 4);
+	dp = keyname + 4;
+	cp = cell->name;
+	do {
+		*dp++ = tolower(*cp);
+	} while (*cp++);
 
-	/* wait for everyone to stop using the cell */
-	if (atomic_read(&cell->usage) > 0) {
-		DECLARE_WAITQUEUE(myself, current);
+	key = rxrpc_get_null_key(keyname);
+	if (IS_ERR(key))
+		return PTR_ERR(key);
 
-		_debug("wait for cell %s", cell->name);
-		set_current_state(TASK_UNINTERRUPTIBLE);
-		add_wait_queue(&net->cells_freeable_wq, &myself);
+	cell->anonymous_key = key;
 
-		while (atomic_read(&cell->usage) > 0) {
-			schedule();
-			set_current_state(TASK_UNINTERRUPTIBLE);
-		}
+	_debug("anon key %p{%x}",
+	       cell->anonymous_key, key_serial(cell->anonymous_key));
+	return 0;
+}
 
-		remove_wait_queue(&net->cells_freeable_wq, &myself);
-		set_current_state(TASK_RUNNING);
+/*
+ * Activate a cell.
+ */
+static int afs_activate_cell(struct afs_net *net, struct afs_cell *cell)
+{
+	int ret;
+
+	if (!cell->anonymous_key) {
+		ret = afs_alloc_anon_key(cell);
+		if (ret < 0)
+			return ret;
 	}
 
-	_debug("cell dead");
-	ASSERTCMP(atomic_read(&cell->usage), ==, 0);
-	ASSERT(list_empty(&cell->servers));
-	ASSERT(list_empty(&cell->vl_list));
+#ifdef CONFIG_AFS_FSCACHE
+	cell->cache = fscache_acquire_cookie(afs_cache_netfs.primary_index,
+					     &afs_cell_cache_index_def,
+					     cell, true);
+#endif
+	ret = afs_proc_cell_setup(net, cell);
+	if (ret < 0)
+		return ret;
+	spin_lock(&net->proc_cells_lock);
+	list_add_tail(&cell->proc_link, &net->proc_cells);
+	spin_unlock(&net->proc_cells_lock);
+	return 0;
+}
+
+/*
+ * Deactivate a cell.
+ */
+static void afs_deactivate_cell(struct afs_net *net, struct afs_cell *cell)
+{
+	_enter("%s", cell->name);
 
 	afs_proc_cell_remove(net, cell);
 
-	down_write(&net->proc_cells_sem);
+	spin_lock(&net->proc_cells_lock);
 	list_del_init(&cell->proc_link);
-	up_write(&net->proc_cells_sem);
+	spin_unlock(&net->proc_cells_lock);
 
 #ifdef CONFIG_AFS_FSCACHE
 	fscache_relinquish_cookie(cell->cache, 0);
+	cell->cache = NULL;
 #endif
-	key_put(cell->anonymous_key);
-	kfree(cell);
 
-	_leave(" [destroyed]");
+	_leave("");
 }
 
 /*
- * purge in-memory cell database on module unload or afs_init() failure
- * - the timeout daemon is stopped before calling this
+ * Manage a cell record, initialising and destroying it, maintaining its DNS
+ * records.
  */
-void afs_cell_purge(struct afs_net *net)
+static void afs_manage_cell(struct work_struct *work)
 {
-	struct afs_cell *cell;
+	struct afs_cell *cell = container_of(work, struct afs_cell, manager);
+	struct afs_net *net = cell->net;
+	bool deleted;
+	int ret, usage;
+
+	_enter("%s", cell->name);
+
+again:
+	_debug("state %u", cell->state);
+	switch (cell->state) {
+	case AFS_CELL_INACTIVE:
+	case AFS_CELL_FAILED:
+		write_seqlock(&net->cells_lock);
+		usage = 1;
+		deleted = atomic_try_cmpxchg_relaxed(&cell->usage, &usage, 0);
+		if (deleted)
+			rb_erase(&cell->net_node, &net->cells);
+		write_sequnlock(&net->cells_lock);
+		if (deleted)
+			goto final_destruction;
+		if (cell->state == AFS_CELL_FAILED)
+			goto done;
+		cell->state = AFS_CELL_UNSET;
+		goto again;
+
+	case AFS_CELL_UNSET:
+		cell->state = AFS_CELL_ACTIVATING;
+		goto again;
+
+	case AFS_CELL_ACTIVATING:
+		ret = afs_activate_cell(net, cell);
+		if (ret < 0)
+			goto activation_failed;
+
+		cell->state = AFS_CELL_ACTIVE;
+		smp_wmb();
+		clear_bit(AFS_CELL_FL_NOT_READY, &cell->flags);
+		wake_up_bit(&cell->flags, AFS_CELL_FL_NOT_READY);
+		goto again;
+
+	case AFS_CELL_ACTIVE:
+		if (atomic_read(&cell->usage) > 1) {
+			time64_t now = ktime_get_real_seconds();
+			if (cell->dns_expiry <= now && net->live)
+				afs_update_cell(cell);
+			goto done;
+		}
+		cell->state = AFS_CELL_DEACTIVATING;
+		goto again;
+
+	case AFS_CELL_DEACTIVATING:
+		set_bit(AFS_CELL_FL_NOT_READY, &cell->flags);
+		if (atomic_read(&cell->usage) > 1)
+			goto reverse_deactivation;
+		afs_deactivate_cell(net, cell);
+		cell->state = AFS_CELL_INACTIVE;
+		goto again;
+
+	default:
+		break;
+	}
+	_debug("bad state %u", cell->state);
+	BUG(); /* Unhandled state */
+
+activation_failed:
+	cell->error = ret;
+	afs_deactivate_cell(net, cell);
+
+	cell->state = AFS_CELL_FAILED;
+	smp_wmb();
+	if (test_and_clear_bit(AFS_CELL_FL_NOT_READY, &cell->flags))
+		wake_up_bit(&cell->flags, AFS_CELL_FL_NOT_READY);
+	goto again;
+
+reverse_deactivation:
+	cell->state = AFS_CELL_ACTIVE;
+	smp_wmb();
+	clear_bit(AFS_CELL_FL_NOT_READY, &cell->flags);
+	wake_up_bit(&cell->flags, AFS_CELL_FL_NOT_READY);
+	_leave(" [deact->act]");
+	return;
+
+done:
+	_leave(" [done %u]", cell->state);
+	return;
+
+final_destruction:
+	call_rcu(&cell->rcu, afs_cell_destroy);
+	afs_dec_cells_outstanding(net);
+	_leave(" [destruct %d]", atomic_read(&net->cells_outstanding));
+}
+
+/*
+ * Manage the records of cells known to a network namespace.  This includes
+ * updating the DNS records and garbage collecting unused cells that were
+ * automatically added.
+ *
+ * Note that constructed cell records may only be removed from net->cells by
+ * this work item, so it is safe for this work item to stash a cursor pointing
+ * into the tree and then return to caller (provided it skips cells that are
+ * still under construction).
+ *
+ * Note also that we were given an increment on net->cells_outstanding by
+ * whoever queued us that we need to deal with before returning.
+ */
+void afs_manage_cells(struct work_struct *work)
+{
+	struct afs_net *net = container_of(work, struct afs_net, cells_manager);
+	struct rb_node *cursor;
+	time64_t now = ktime_get_real_seconds(), next_manage = TIME64_MAX;
+	bool purging = !net->live;
 
 	_enter("");
 
-	afs_put_cell(net, net->ws_cell);
+	/* Trawl the cell database looking for cells that have expired from
+	 * lack of use and cells whose DNS results have expired and dispatch
+	 * their managers.
+	 */
+	read_seqlock_excl(&net->cells_lock);
 
-	down_write(&net->cells_sem);
+	for (cursor = rb_first(&net->cells); cursor; cursor = rb_next(cursor)) {
+		struct afs_cell *cell =
+			rb_entry(cursor, struct afs_cell, net_node);
+		unsigned usage;
+		bool sched_cell = false;
 
-	while (!list_empty(&net->cells)) {
-		cell = NULL;
+		usage = atomic_read(&cell->usage);
+		_debug("manage %s %u", cell->name, usage);
+
+		ASSERTCMP(usage, >=, 1);
+
+		if (purging) {
+			if (test_and_clear_bit(AFS_CELL_FL_NO_GC, &cell->flags))
+				usage = atomic_dec_return(&cell->usage);
+			ASSERTCMP(usage, ==, 1);
+		}
 
-		/* remove the next cell from the front of the list */
-		write_lock(&net->cells_lock);
+		if (usage == 1) {
+			time64_t expire_at = cell->last_inactive;
 
-		if (!list_empty(&net->cells)) {
-			cell = list_entry(net->cells.next,
-					  struct afs_cell, link);
-			list_del_init(&cell->link);
+			if (!test_bit(AFS_CELL_FL_DNS_FAIL, &cell->flags) &&
+			    !test_bit(AFS_CELL_FL_NOT_FOUND, &cell->flags))
+				expire_at += afs_cell_gc_delay;
+			if (purging || expire_at <= now)
+				sched_cell = true;
+			else if (expire_at < next_manage)
+				next_manage = expire_at;
 		}
 
-		write_unlock(&net->cells_lock);
+		if (!purging) {
+			if (cell->dns_expiry <= now)
+				sched_cell = true;
+			else if (cell->dns_expiry <= next_manage)
+				next_manage = cell->dns_expiry;
+		}
+
+		if (sched_cell)
+			queue_work(afs_wq, &cell->manager);
+	}
+
+	read_sequnlock_excl(&net->cells_lock);
 
-		if (cell) {
-			_debug("PURGING CELL %s (%d)",
-			       cell->name, atomic_read(&cell->usage));
+	/* Update the timer on the way out.  We have to pass an increment on
+	 * cells_outstanding in the namespace that we are in to the timer or
+	 * the work scheduler.
+	 */
+	if (!purging && next_manage < TIME64_MAX) {
+		now = ktime_get_real_seconds();
 
-			/* now the cell should be left with no references */
-			afs_cell_destroy(net, cell);
+		if (next_manage - now <= 0) {
+			if (queue_work(afs_wq, &net->cells_manager))
+				atomic_inc(&net->cells_outstanding);
+		} else {
+			afs_set_cell_timer(net, next_manage - now);
 		}
 	}
 
-	up_write(&net->cells_sem);
+	afs_dec_cells_outstanding(net);
+	_leave(" [%d]", atomic_read(&net->cells_outstanding));
+}
+
+/*
+ * Purge in-memory cell database.
+ */
+void afs_cell_purge(struct afs_net *net)
+{
+	struct afs_cell *ws;
+
+	_enter("");
+
+	write_seqlock(&net->cells_lock);
+	ws = net->ws_cell;
+	net->ws_cell = NULL;
+	write_sequnlock(&net->cells_lock);
+	afs_put_cell(net, ws);
+
+	_debug("del timer");
+	if (del_timer_sync(&net->cells_timer))
+		atomic_dec(&net->cells_outstanding);
+
+	_debug("kick mgr");
+	afs_queue_cell_manager(net);
+
+	_debug("wait");
+	wait_on_atomic_t(&net->cells_outstanding, atomic_t_wait,
+			 TASK_UNINTERRUPTIBLE);
 	_leave("");
 }
diff --git a/fs/afs/internal.h b/fs/afs/internal.h
index 7c318666e436..51e3825b5ffb 100644
--- a/fs/afs/internal.h
+++ b/fs/afs/internal.h
@@ -207,13 +207,14 @@ struct afs_net {
 	atomic_t		nr_superblocks;
 
 	/* Cell database */
-	struct list_head	cells;
+	struct rb_root		cells;
 	struct afs_cell		*ws_cell;
-	rwlock_t		cells_lock;
-	struct rw_semaphore	cells_sem;
-	wait_queue_head_t	cells_freeable_wq;
+	struct work_struct	cells_manager;
+	struct timer_list	cells_timer;
+	atomic_t		cells_outstanding;
+	seqlock_t		cells_lock;
 
-	struct rw_semaphore	proc_cells_sem;
+	spinlock_t		proc_cells_lock;
 	struct list_head	proc_cells;
 
 	/* Volume location database */
@@ -242,14 +243,26 @@ struct afs_net {
 
 extern struct afs_net __afs_net;// Dummy AFS network namespace; TODO: replace with real netns
 
+enum afs_cell_state {
+	AFS_CELL_UNSET,
+	AFS_CELL_ACTIVATING,
+	AFS_CELL_ACTIVE,
+	AFS_CELL_DEACTIVATING,
+	AFS_CELL_INACTIVE,
+	AFS_CELL_FAILED,
+};
+
 /*
  * AFS cell record
  */
 struct afs_cell {
-	atomic_t		usage;
-	struct list_head	link;		/* main cell list link */
-	struct afs_net		*net;		/* The network namespace */
+	union {
+		struct rcu_head	rcu;
+		struct rb_node	net_node;	/* Node in net->cells */
+	};
+	struct afs_net		*net;
 	struct key		*anonymous_key;	/* anonymous user key for this cell */
+	struct work_struct	manager;	/* Manager for init/deinit/dns */
 	struct list_head	proc_link;	/* /proc cell list link */
 #ifdef CONFIG_AFS_FSCACHE
 	struct fscache_cookie	*cache;		/* caching cookie */
@@ -262,12 +275,26 @@ struct afs_cell {
 	/* volume location record management */
 	struct rw_semaphore	vl_sem;		/* volume management serialisation semaphore */
 	struct list_head	vl_list;	/* cell's active VL record list */
+	time64_t		dns_expiry;	/* Time AFSDB/SRV record expires */
+	time64_t		last_inactive;	/* Time of last drop of usage count */
+	atomic_t		usage;
+	unsigned long		flags;
+#define AFS_CELL_FL_NOT_READY	0		/* The cell record is not ready for use */
+#define AFS_CELL_FL_NO_GC	1		/* The cell was added manually, don't auto-gc */
+#define AFS_CELL_FL_NOT_FOUND	2		/* Permanent DNS error */
+#define AFS_CELL_FL_DNS_FAIL	3		/* Failed to access DNS */
+	enum afs_cell_state	state;
+	short			error;
+
 	spinlock_t		vl_lock;	/* vl_list lock */
+
+	/* VLDB server list. */
+	seqlock_t		vl_addrs_lock;
 	unsigned short		vl_naddrs;	/* number of VL servers in addr list */
 	unsigned short		vl_curr_svix;	/* current server index */
 	struct sockaddr_rxrpc	vl_addrs[AFS_CELL_MAX_ADDRS];	/* cell VL server addresses */
-
-	char			name[0];	/* cell name - must go last */
+	u8			name_len;	/* Length of name */
+	char			name[64 + 1];	/* Cell name, case-flattened and NUL-padded */
 };
 
 /*
@@ -494,17 +521,20 @@ static inline struct afs_cb_interest *afs_get_cb_interest(struct afs_cb_interest
 /*
  * cell.c
  */
-static inline struct afs_cell *afs_get_cell(struct afs_cell *cell)
+ static inline struct afs_cell *afs_get_cell(struct afs_cell *cell)
 {
 	if (cell)
 		atomic_inc(&cell->usage);
 	return cell;
 }
-extern int afs_cell_init(struct afs_net *, char *);
-extern struct afs_cell *afs_cell_create(struct afs_net *, const char *, unsigned, char *, bool);
-extern struct afs_cell *afs_cell_lookup(struct afs_net *, const char *, unsigned, bool);
-extern struct afs_cell *afs_grab_cell(struct afs_cell *);
+
+extern int afs_cell_init(struct afs_net *, const char *);
+extern struct afs_cell *afs_lookup_cell_rcu(struct afs_net *, const char *, unsigned);
+extern struct afs_cell *afs_lookup_cell(struct afs_net *, const char *, unsigned,
+					const char *, bool);
 extern void afs_put_cell(struct afs_net *, struct afs_cell *);
+extern void afs_manage_cells(struct work_struct *);
+extern void afs_cells_timer(struct timer_list *);
 extern void __net_exit afs_cell_purge(struct afs_net *);
 
 /*
diff --git a/fs/afs/main.c b/fs/afs/main.c
index 010e2e1a40f4..e7f87d723761 100644
--- a/fs/afs/main.c
+++ b/fs/afs/main.c
@@ -46,12 +46,15 @@ static int __net_init afs_net_init(struct afs_net *net)
 
 	INIT_WORK(&net->charge_preallocation_work, afs_charge_preallocation);
 	mutex_init(&net->socket_mutex);
-	INIT_LIST_HEAD(&net->cells);
-	rwlock_init(&net->cells_lock);
-	init_rwsem(&net->cells_sem);
-	init_waitqueue_head(&net->cells_freeable_wq);
-	init_rwsem(&net->proc_cells_sem);
+
+	net->cells = RB_ROOT;
+	seqlock_init(&net->cells_lock);
+	INIT_WORK(&net->cells_manager, afs_manage_cells);
+	timer_setup(&net->cells_timer, afs_cells_timer, 0);
+
+	spin_lock_init(&net->proc_cells_lock);
 	INIT_LIST_HEAD(&net->proc_cells);
+
 	INIT_LIST_HEAD(&net->vl_updates);
 	INIT_LIST_HEAD(&net->vl_graveyard);
 	INIT_DELAYED_WORK(&net->vl_reaper, afs_vlocation_reaper);
@@ -83,11 +86,14 @@ static int __net_init afs_net_init(struct afs_net *net)
 	return 0;
 
 error_open_socket:
+	net->live = false;
 	afs_vlocation_purge(net);
 	afs_cell_purge(net);
 error_cell_init:
+	net->live = false;
 	afs_proc_cleanup(net);
 error_proc:
+	net->live = false;
 	return ret;
 }
 
diff --git a/fs/afs/proc.c b/fs/afs/proc.c
index d00d550ff2ef..08565429615d 100644
--- a/fs/afs/proc.c
+++ b/fs/afs/proc.c
@@ -186,7 +186,7 @@ static void *afs_proc_cells_start(struct seq_file *m, loff_t *_pos)
 {
 	struct afs_net *net = afs_seq2net(m);
 
-	down_read(&net->proc_cells_sem);
+	rcu_read_lock();
 	return seq_list_start_head(&net->proc_cells, *_pos);
 }
 
@@ -205,9 +205,7 @@ static void *afs_proc_cells_next(struct seq_file *m, void *v, loff_t *pos)
  */
 static void afs_proc_cells_stop(struct seq_file *m, void *v)
 {
-	struct afs_net *net = afs_seq2net(m);
-
-	up_read(&net->proc_cells_sem);
+	rcu_read_unlock();
 }
 
 /*
@@ -225,8 +223,7 @@ static int afs_proc_cells_show(struct seq_file *m, void *v)
 	}
 
 	/* display one cell per line on subsequent lines */
-	seq_printf(m, "%3d %s\n",
-		   atomic_read(&cell->usage), cell->name);
+	seq_printf(m, "%3u %s\n", atomic_read(&cell->usage), cell->name);
 	return 0;
 }
 
@@ -279,13 +276,13 @@ static ssize_t afs_proc_cells_write(struct file *file, const char __user *buf,
 	if (strcmp(kbuf, "add") == 0) {
 		struct afs_cell *cell;
 
-		cell = afs_cell_create(net, name, strlen(name), args, false);
+		cell = afs_lookup_cell(net, name, strlen(name), args, true);
 		if (IS_ERR(cell)) {
 			ret = PTR_ERR(cell);
 			goto done;
 		}
 
-		afs_put_cell(net, cell);
+		set_bit(AFS_CELL_FL_NO_GC, &cell->flags);
 		printk("kAFS: Added new cell '%s'\n", name);
 	} else {
 		goto inval;
@@ -354,7 +351,7 @@ int afs_proc_cell_setup(struct afs_net *net, struct afs_cell *cell)
 {
 	struct proc_dir_entry *dir;
 
-	_enter("%p{%s}", cell, cell->name);
+	_enter("%p{%s},%p", cell, cell->name, net->proc_afs);
 
 	dir = proc_mkdir(cell->name, net->proc_afs);
 	if (!dir)
diff --git a/fs/afs/super.c b/fs/afs/super.c
index e62fb1bdadc6..3d53b78b350d 100644
--- a/fs/afs/super.c
+++ b/fs/afs/super.c
@@ -200,10 +200,11 @@ static int afs_parse_options(struct afs_mount_params *params,
 		token = match_token(p, afs_options_list, args);
 		switch (token) {
 		case afs_opt_cell:
-			cell = afs_cell_lookup(params->net,
-					       args[0].from,
-					       args[0].to - args[0].from,
-					       false);
+			rcu_read_lock();
+			cell = afs_lookup_cell_rcu(params->net,
+						   args[0].from,
+						   args[0].to - args[0].from);
+			rcu_read_unlock();
 			if (IS_ERR(cell))
 				return PTR_ERR(cell);
 			afs_put_cell(params->net, params->cell);
@@ -308,7 +309,8 @@ static int afs_parse_device_name(struct afs_mount_params *params,
 
 	/* lookup the cell record */
 	if (cellname || !params->cell) {
-		cell = afs_cell_lookup(params->net, cellname, cellnamesz, true);
+		cell = afs_lookup_cell(params->net, cellname, cellnamesz,
+				       NULL, false);
 		if (IS_ERR(cell)) {
 			printk(KERN_ERR "kAFS: unable to lookup cell '%*.*s'\n",
 			       cellnamesz, cellnamesz, cellname ?: "");
diff --git a/fs/afs/xattr.c b/fs/afs/xattr.c
index 2830e4f48d85..e58e00ee9747 100644
--- a/fs/afs/xattr.c
+++ b/fs/afs/xattr.c
@@ -45,7 +45,7 @@ static int afs_xattr_get_cell(const struct xattr_handler *handler,
 	struct afs_cell *cell = vnode->volume->cell;
 	size_t namelen;
 
-	namelen = strlen(cell->name);
+	namelen = cell->name_len;
 	if (size == 0)
 		return namelen;
 	if (namelen > size)
-- 
cgit v1.2.3


From 8b2a464ced77fe35be72ab7d38152a9439daf8d3 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Thu, 2 Nov 2017 15:27:50 +0000
Subject: afs: Add an address list concept

Add an RCU replaceable address list structure to hold a list of server
addresses.  The list also holds the

To this end:

 (1) A cell's VL server address list can be loaded directly via insmod or
     echo to /proc/fs/afs/cells or dynamically from a DNS query for AFSDB
     or SRV records.

 (2) Anyone wanting to use a cell's VL server address must wait until the
     cell record comes online and has tried to obtain some addresses.

 (3) An FS server's address list, for the moment, has a single entry that
     is the key to the server list.  This will change in the future when a
     server is instead keyed on its UUID and the VL.GetAddrsU operation is
     used.

 (4) An 'address cursor' concept is introduced to handle iteration through
     the address list.  This is passed to the afs_make_call() as, in the
     future, stuff (such as abort code) that doesn't outlast the call will
     be returned in it.

In the future, we might want to annotate the list with information about
how each address fares.  We might then want to propagate such annotations
over address list replacement.

Whilst we're at it, we allow IPv6 addresses to be specified in
colon-delimited lists by enclosing them in square brackets.

Signed-off-by: David Howells <dhowells@redhat.com>
---
 fs/afs/Makefile    |   1 +
 fs/afs/addr_list.c | 308 +++++++++++++++++++++++++++++++++++++++++
 fs/afs/cell.c      | 169 ++++++++---------------
 fs/afs/fsclient.c  | 121 ++++++++---------
 fs/afs/internal.h  | 120 +++++++++++-----
 fs/afs/proc.c      |  23 ++--
 fs/afs/rxrpc.c     |   3 +-
 fs/afs/server.c    |  70 ++++++----
 fs/afs/vlclient.c  |   8 +-
 fs/afs/vlocation.c |  75 +++++-----
 fs/afs/vnode.c     | 392 ++++++++++++++++++++---------------------------------
 fs/afs/volume.c    | 115 +++++++++++-----
 12 files changed, 844 insertions(+), 561 deletions(-)
 create mode 100644 fs/afs/addr_list.c

(limited to 'fs/afs')

diff --git a/fs/afs/Makefile b/fs/afs/Makefile
index 641148208e90..849383986d3b 100644
--- a/fs/afs/Makefile
+++ b/fs/afs/Makefile
@@ -7,6 +7,7 @@ afs-cache-$(CONFIG_AFS_FSCACHE) := cache.o
 
 kafs-objs := \
 	$(afs-cache-y) \
+	addr_list.o \
 	callback.o \
 	cell.o \
 	cmservice.o \
diff --git a/fs/afs/addr_list.c b/fs/afs/addr_list.c
new file mode 100644
index 000000000000..ecb9c72aebd2
--- /dev/null
+++ b/fs/afs/addr_list.c
@@ -0,0 +1,308 @@
+/* Server address list management
+ *
+ * Copyright (C) 2017 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public Licence
+ * as published by the Free Software Foundation; either version
+ * 2 of the Licence, or (at your option) any later version.
+ */
+
+#include <linux/slab.h>
+#include <linux/ctype.h>
+#include <linux/dns_resolver.h>
+#include <linux/inet.h>
+#include <keys/rxrpc-type.h>
+#include "internal.h"
+#include "afs_fs.h"
+
+#define AFS_MAX_ADDRESSES						\
+	((unsigned int)((PAGE_SIZE - sizeof(struct afs_addr_list)) /	\
+			sizeof(struct sockaddr_rxrpc)))
+
+/*
+ * Release an address list.
+ */
+void afs_put_addrlist(struct afs_addr_list *alist)
+{
+	if (alist && refcount_dec_and_test(&alist->usage))
+		call_rcu(&alist->rcu, (rcu_callback_t)kfree);
+}
+
+/*
+ * Allocate an address list.
+ */
+struct afs_addr_list *afs_alloc_addrlist(unsigned int nr,
+					 unsigned short service,
+					 unsigned short port)
+{
+	struct afs_addr_list *alist;
+	unsigned int i;
+
+	_enter("%u,%u,%u", nr, service, port);
+
+	alist = kzalloc(sizeof(*alist) + sizeof(alist->addrs[0]) * nr,
+			GFP_KERNEL);
+	if (!alist)
+		return NULL;
+
+	refcount_set(&alist->usage, 1);
+
+	for (i = 0; i < nr; i++) {
+		struct sockaddr_rxrpc *srx = &alist->addrs[i];
+		srx->srx_family			= AF_RXRPC;
+		srx->srx_service		= service;
+		srx->transport_type		= SOCK_DGRAM;
+		srx->transport_len		= sizeof(srx->transport.sin6);
+		srx->transport.sin6.sin6_family	= AF_INET6;
+		srx->transport.sin6.sin6_port	= htons(port);
+	}
+
+	return alist;
+}
+
+/*
+ * Parse a text string consisting of delimited addresses.
+ */
+struct afs_addr_list *afs_parse_text_addrs(const char *text, size_t len,
+					   char delim,
+					   unsigned short service,
+					   unsigned short port)
+{
+	struct afs_addr_list *alist;
+	const char *p, *end = text + len;
+	unsigned int nr = 0;
+
+	_enter("%*.*s,%c", (int)len, (int)len, text, delim);
+
+	if (!len)
+		return ERR_PTR(-EDESTADDRREQ);
+
+	if (delim == ':' && (memchr(text, ',', len) || !memchr(text, '.', len)))
+		delim = ',';
+
+	/* Count the addresses */
+	p = text;
+	do {
+		if (!*p)
+			return ERR_PTR(-EINVAL);
+		if (*p == delim)
+			continue;
+		nr++;
+		if (*p == '[') {
+			p++;
+			if (p == end)
+				return ERR_PTR(-EINVAL);
+			p = memchr(p, ']', end - p);
+			if (!p)
+				return ERR_PTR(-EINVAL);
+			p++;
+			if (p >= end)
+				break;
+		}
+
+		p = memchr(p, delim, end - p);
+		if (!p)
+			break;
+		p++;
+	} while (p < end);
+
+	_debug("%u/%u addresses", nr, AFS_MAX_ADDRESSES);
+	if (nr > AFS_MAX_ADDRESSES)
+		nr = AFS_MAX_ADDRESSES;
+
+	alist = afs_alloc_addrlist(nr, service, port);
+	if (!alist)
+		return ERR_PTR(-ENOMEM);
+
+	/* Extract the addresses */
+	p = text;
+	do {
+		struct sockaddr_rxrpc *srx = &alist->addrs[alist->nr_addrs];
+		char tdelim = delim;
+
+		if (*p == delim) {
+			p++;
+			continue;
+		}
+
+		if (*p == '[') {
+			p++;
+			tdelim = ']';
+		}
+
+		if (in4_pton(p, end - p,
+			     (u8 *)&srx->transport.sin6.sin6_addr.s6_addr32[3],
+			     tdelim, &p)) {
+			srx->transport.sin6.sin6_addr.s6_addr32[0] = 0;
+			srx->transport.sin6.sin6_addr.s6_addr32[1] = 0;
+			srx->transport.sin6.sin6_addr.s6_addr32[2] = htonl(0xffff);
+		} else if (in6_pton(p, end - p,
+				    srx->transport.sin6.sin6_addr.s6_addr,
+				    tdelim, &p)) {
+			/* Nothing to do */
+		} else {
+			goto bad_address;
+		}
+
+		if (tdelim == ']') {
+			if (p == end || *p != ']')
+				goto bad_address;
+			p++;
+		}
+
+		if (p < end) {
+			if (*p == '+') {
+				/* Port number specification "+1234" */
+				unsigned int xport = 0;
+				p++;
+				if (p >= end || !isdigit(*p))
+					goto bad_address;
+				do {
+					xport *= 10;
+					xport += *p - '0';
+					if (xport > 65535)
+						goto bad_address;
+					p++;
+				} while (p < end && isdigit(*p));
+				srx->transport.sin6.sin6_port = htons(xport);
+			} else if (*p == delim) {
+				p++;
+			} else {
+				goto bad_address;
+			}
+		}
+
+		alist->nr_addrs++;
+	} while (p < end && alist->nr_addrs < AFS_MAX_ADDRESSES);
+
+	_leave(" = [nr %u]", alist->nr_addrs);
+	return alist;
+
+bad_address:
+	kfree(alist);
+	return ERR_PTR(-EINVAL);
+}
+
+/*
+ * Compare old and new address lists to see if there's been any change.
+ * - How to do this in better than O(Nlog(N)) time?
+ *   - We don't really want to sort the address list, but would rather take the
+ *     list as we got it so as not to undo record rotation by the DNS server.
+ */
+#if 0
+static int afs_cmp_addr_list(const struct afs_addr_list *a1,
+			     const struct afs_addr_list *a2)
+{
+}
+#endif
+
+/*
+ * Perform a DNS query for VL servers and build a up an address list.
+ */
+struct afs_addr_list *afs_dns_query(struct afs_cell *cell, time64_t *_expiry)
+{
+	struct afs_addr_list *alist;
+	char *vllist = NULL;
+	int ret;
+
+	_enter("%s", cell->name);
+
+	ret = dns_query("afsdb", cell->name, cell->name_len,
+			"ipv4", &vllist, _expiry);
+	if (ret < 0)
+		return ERR_PTR(ret);
+
+	alist = afs_parse_text_addrs(vllist, strlen(vllist), ',',
+				     VL_SERVICE, AFS_VL_PORT);
+	if (IS_ERR(alist)) {
+		kfree(vllist);
+		if (alist != ERR_PTR(-ENOMEM))
+			pr_err("Failed to parse DNS data\n");
+		return alist;
+	}
+
+	kfree(vllist);
+	return alist;
+}
+
+/*
+ * Get an address to try.
+ */
+bool afs_iterate_addresses(struct afs_addr_cursor *ac)
+{
+	_enter("%hu+%hd", ac->start, (short)ac->index);
+
+	if (!ac->alist)
+		return false;
+
+	if (ac->begun) {
+		ac->index++;
+		if (ac->index == ac->alist->nr_addrs)
+			ac->index = 0;
+
+		if (ac->index == ac->start) {
+			ac->error = -EDESTADDRREQ;
+			return false;
+		}
+	}
+
+	ac->begun = true;
+	ac->responded = false;
+	ac->addr = &ac->alist->addrs[ac->index];
+	return true;
+}
+
+/*
+ * Release an address list cursor.
+ */
+int afs_end_cursor(struct afs_addr_cursor *ac)
+{
+	if (ac->responded && ac->index != ac->start)
+		WRITE_ONCE(ac->alist->index, ac->index);
+
+	afs_put_addrlist(ac->alist);
+	ac->alist = NULL;
+	return ac->error;
+}
+
+/*
+ * Set the address cursor for iterating over VL servers.
+ */
+int afs_set_vl_cursor(struct afs_addr_cursor *ac, struct afs_cell *cell)
+{
+	struct afs_addr_list *alist;
+	int ret;
+
+	if (!rcu_access_pointer(cell->vl_addrs)) {
+		ret = wait_on_bit(&cell->flags, AFS_CELL_FL_NO_LOOKUP_YET,
+				  TASK_INTERRUPTIBLE);
+		if (ret < 0)
+			return ret;
+
+		if (!rcu_access_pointer(cell->vl_addrs) &&
+		    ktime_get_real_seconds() < cell->dns_expiry)
+			return cell->error;
+	}
+
+	read_lock(&cell->vl_addrs_lock);
+	alist = rcu_dereference_protected(cell->vl_addrs,
+					  lockdep_is_held(&cell->vl_addrs_lock));
+	if (alist->nr_addrs > 0)
+		afs_get_addrlist(alist);
+	else
+		alist = NULL;
+	read_unlock(&cell->vl_addrs_lock);
+
+	if (!alist)
+		return -EDESTADDRREQ;
+
+	ac->alist = alist;
+	ac->addr = NULL;
+	ac->start = READ_ONCE(alist->index);
+	ac->index = ac->start;
+	ac->error = 0;
+	ac->begun = false;
+	return 0;
+}
diff --git a/fs/afs/cell.c b/fs/afs/cell.c
index e83103e8a6fb..a0e08d3a108c 100644
--- a/fs/afs/cell.c
+++ b/fs/afs/cell.c
@@ -9,7 +9,6 @@
  * 2 of the License, or (at your option) any later version.
  */
 
-#include <linux/module.h>
 #include <linux/slab.h>
 #include <linux/key.h>
 #include <linux/ctype.h>
@@ -152,68 +151,33 @@ static struct afs_cell *afs_alloc_cell(struct afs_net *net,
 	init_rwsem(&cell->vl_sem);
 	INIT_LIST_HEAD(&cell->vl_list);
 	spin_lock_init(&cell->vl_lock);
-	seqlock_init(&cell->vl_addrs_lock);
-	cell->flags = (1 << AFS_CELL_FL_NOT_READY);
-
-	for (i = 0; i < AFS_CELL_MAX_ADDRS; i++) {
-		struct sockaddr_rxrpc *srx = &cell->vl_addrs[i];
-		srx->srx_family			= AF_RXRPC;
-		srx->srx_service		= VL_SERVICE;
-		srx->transport_type		= SOCK_DGRAM;
-		srx->transport.sin6.sin6_family	= AF_INET6;
-		srx->transport.sin6.sin6_port	= htons(AFS_VL_PORT);
-	}
+	cell->flags = ((1 << AFS_CELL_FL_NOT_READY) |
+		       (1 << AFS_CELL_FL_NO_LOOKUP_YET));
+	rwlock_init(&cell->vl_addrs_lock);
 
 	/* Fill in the VL server list if we were given a list of addresses to
 	 * use.
 	 */
 	if (vllist) {
-		char delim = ':';
-
-		if (strchr(vllist, ',') || !strchr(vllist, '.'))
-			delim = ',';
-
-		do {
-			struct sockaddr_rxrpc *srx = &cell->vl_addrs[cell->vl_naddrs];
-
-			if (in4_pton(vllist, -1,
-				     (u8 *)&srx->transport.sin6.sin6_addr.s6_addr32[3],
-				     delim, &vllist)) {
-				srx->transport_len = sizeof(struct sockaddr_in6);
-				srx->transport.sin6.sin6_addr.s6_addr32[0] = 0;
-				srx->transport.sin6.sin6_addr.s6_addr32[1] = 0;
-				srx->transport.sin6.sin6_addr.s6_addr32[2] = htonl(0xffff);
-			} else if (in6_pton(vllist, -1,
-					    srx->transport.sin6.sin6_addr.s6_addr,
-					    delim, &vllist)) {
-				srx->transport_len = sizeof(struct sockaddr_in6);
-				srx->transport.sin6.sin6_family	= AF_INET6;
-			} else {
-				goto bad_address;
-			}
+		struct afs_addr_list *alist;
 
-			cell->vl_naddrs++;
-			if (!*vllist)
-				break;
-			vllist++;
-
-		} while (cell->vl_naddrs < AFS_CELL_MAX_ADDRS && vllist);
+		alist = afs_parse_text_addrs(vllist, strlen(vllist), ':',
+					     VL_SERVICE, AFS_VL_PORT);
+		if (IS_ERR(alist)) {
+			ret = PTR_ERR(alist);
+			goto parse_failed;
+		}
 
-		/* Disable DNS refresh for manually-specified cells */
+		rcu_assign_pointer(cell->vl_addrs, alist);
 		cell->dns_expiry = TIME64_MAX;
-	} else {
-		/* We're going to need to 'refresh' this cell's VL server list
-		 * from the DNS before we can use it.
-		 */
-		cell->dns_expiry = S64_MIN;
 	}
 
 	_leave(" = %p", cell);
 	return cell;
 
-bad_address:
-	printk(KERN_ERR "kAFS: bad VL server IP address\n");
-	ret = -EINVAL;
+parse_failed:
+	if (ret == -EINVAL)
+		printk(KERN_ERR "kAFS: bad VL server IP address\n");
 	kfree(cell);
 	_leave(" = %d", ret);
 	return ERR_PTR(ret);
@@ -325,7 +289,6 @@ cell_already_exists:
 	if (excl) {
 		ret = -EEXIST;
 	} else {
-		ASSERTCMP(atomic_read(&cursor->usage), >=, 1);
 		afs_get_cell(cursor);
 		ret = 0;
 	}
@@ -333,8 +296,10 @@ cell_already_exists:
 	kfree(candidate);
 	if (ret == 0)
 		goto wait_for_cell;
+	goto error_noput;
 error:
 	afs_put_cell(net, cell);
+error_noput:
 	_leave(" = %d [error]", ret);
 	return ERR_PTR(ret);
 }
@@ -396,78 +361,50 @@ int afs_cell_init(struct afs_net *net, const char *rootcell)
  */
 static void afs_update_cell(struct afs_cell *cell)
 {
+	struct afs_addr_list *alist, *old;
 	time64_t now, expiry;
-	char *vllist = NULL;
-	int ret;
 
 	_enter("%s", cell->name);
 
-	ret = dns_query("afsdb", cell->name, cell->name_len,
-			"ipv4", &vllist, &expiry);
-	_debug("query %d", ret);
-	switch (ret) {
-	case 0 ... INT_MAX:
-		clear_bit(AFS_CELL_FL_DNS_FAIL, &cell->flags);
-		clear_bit(AFS_CELL_FL_NOT_FOUND, &cell->flags);
-		goto parse_dns_data;
+	alist = afs_dns_query(cell, &expiry);
+	if (IS_ERR(alist)) {
+		switch (PTR_ERR(alist)) {
+		case -ENODATA:
+			/* The DNS said that the cell does not exist */
+			set_bit(AFS_CELL_FL_NOT_FOUND, &cell->flags);
+			clear_bit(AFS_CELL_FL_DNS_FAIL, &cell->flags);
+			cell->dns_expiry = ktime_get_real_seconds() + 61;
+			break;
 
-	case -ENODATA:
-		clear_bit(AFS_CELL_FL_DNS_FAIL, &cell->flags);
-		set_bit(AFS_CELL_FL_NOT_FOUND, &cell->flags);
-		cell->dns_expiry = ktime_get_real_seconds() + 61;
-		cell->error = -EDESTADDRREQ;
-		goto out;
+		case -EAGAIN:
+		case -ECONNREFUSED:
+		default:
+			set_bit(AFS_CELL_FL_DNS_FAIL, &cell->flags);
+			cell->dns_expiry = ktime_get_real_seconds() + 10;
+			break;
+		}
 
-	case -EAGAIN:
-	case -ECONNREFUSED:
-	default:
-		/* Unable to query DNS. */
-		set_bit(AFS_CELL_FL_DNS_FAIL, &cell->flags);
-		cell->dns_expiry = ktime_get_real_seconds() + 10;
 		cell->error = -EDESTADDRREQ;
-		goto out;
-	}
-
-parse_dns_data:
-	write_seqlock(&cell->vl_addrs_lock);
-
-	ret = -EINVAL;
-	do {
-		struct sockaddr_rxrpc *srx = &cell->vl_addrs[cell->vl_naddrs];
-
-		if (in4_pton(vllist, -1,
-			     (u8 *)&srx->transport.sin6.sin6_addr.s6_addr32[3],
-			     ',', (const char **)&vllist)) {
-			srx->transport_len = sizeof(struct sockaddr_in6);
-			srx->transport.sin6.sin6_addr.s6_addr32[0] = 0;
-			srx->transport.sin6.sin6_addr.s6_addr32[1] = 0;
-			srx->transport.sin6.sin6_addr.s6_addr32[2] = htonl(0xffff);
-		} else if (in6_pton(vllist, -1,
-				    srx->transport.sin6.sin6_addr.s6_addr,
-				    ',', (const char **)&vllist)) {
-			srx->transport_len = sizeof(struct sockaddr_in6);
-			srx->transport.sin6.sin6_family	= AF_INET6;
-		} else {
-			goto bad_address;
-		}
+	} else {
+		clear_bit(AFS_CELL_FL_DNS_FAIL, &cell->flags);
+		clear_bit(AFS_CELL_FL_NOT_FOUND, &cell->flags);
 
-		cell->vl_naddrs++;
-		if (!*vllist)
-			break;
-		vllist++;
+		/* Exclusion on changing vl_addrs is achieved by a
+		 * non-reentrant work item.
+		 */
+		old = rcu_dereference_protected(cell->vl_addrs, true);
+		rcu_assign_pointer(cell->vl_addrs, alist);
+		cell->dns_expiry = expiry;
 
-	} while (cell->vl_naddrs < AFS_CELL_MAX_ADDRS);
+		if (old)
+			afs_put_addrlist(old);
+	}
 
-	if (cell->vl_naddrs < AFS_CELL_MAX_ADDRS)
-		memset(cell->vl_addrs + cell->vl_naddrs, 0,
-		       (AFS_CELL_MAX_ADDRS - cell->vl_naddrs) * sizeof(cell->vl_addrs[0]));
+	if (test_and_clear_bit(AFS_CELL_FL_NO_LOOKUP_YET, &cell->flags))
+		wake_up_bit(&cell->flags, AFS_CELL_FL_NO_LOOKUP_YET);
 
 	now = ktime_get_real_seconds();
-	cell->dns_expiry = expiry;
-	afs_set_cell_timer(cell->net, expiry - now);
-bad_address:
-	write_sequnlock(&cell->vl_addrs_lock);
-out:
+	afs_set_cell_timer(cell->net, cell->dns_expiry - now);
 	_leave("");
 }
 
@@ -482,6 +419,7 @@ static void afs_cell_destroy(struct rcu_head *rcu)
 
 	ASSERTCMP(atomic_read(&cell->usage), ==, 0);
 
+	afs_put_addrlist(cell->vl_addrs);
 	key_put(cell->anonymous_key);
 	kfree(cell);
 
@@ -514,6 +452,15 @@ void afs_cells_timer(struct timer_list *timer)
 		afs_dec_cells_outstanding(net);
 }
 
+/*
+ * Get a reference on a cell record.
+ */
+struct afs_cell *afs_get_cell(struct afs_cell *cell)
+{
+	atomic_inc(&cell->usage);
+	return cell;
+}
+
 /*
  * Drop a reference on a cell record.
  */
diff --git a/fs/afs/fsclient.c b/fs/afs/fsclient.c
index 680c02d510f7..6614d0a78daa 100644
--- a/fs/afs/fsclient.c
+++ b/fs/afs/fsclient.c
@@ -297,7 +297,7 @@ static const struct afs_call_type afs_RXFSFetchStatus = {
 /*
  * fetch the status information for a file
  */
-int afs_fs_fetch_file_status(struct afs_server *server,
+int afs_fs_fetch_file_status(struct afs_fs_cursor *fc,
 			     struct key *key,
 			     struct afs_vnode *vnode,
 			     struct afs_volsync *volsync,
@@ -325,9 +325,9 @@ int afs_fs_fetch_file_status(struct afs_server *server,
 	bp[2] = htonl(vnode->fid.vnode);
 	bp[3] = htonl(vnode->fid.unique);
 
-	call->cb_break = vnode->cb_break + server->cb_s_break;
-	afs_use_fs_server(call, server);
-	return afs_make_call(&server->addr, call, GFP_NOFS, async);
+	call->cb_break = vnode->cb_break + fc->server->cb_s_break;
+	afs_use_fs_server(call, fc->server);
+	return afs_make_call(&fc->ac, call, GFP_NOFS, async);
 }
 
 /*
@@ -502,7 +502,7 @@ static const struct afs_call_type afs_RXFSFetchData64 = {
 /*
  * fetch data from a very large file
  */
-static int afs_fs_fetch_data64(struct afs_server *server,
+static int afs_fs_fetch_data64(struct afs_fs_cursor *fc,
 			       struct key *key,
 			       struct afs_vnode *vnode,
 			       struct afs_read *req,
@@ -536,15 +536,15 @@ static int afs_fs_fetch_data64(struct afs_server *server,
 	bp[7] = htonl(lower_32_bits(req->len));
 
 	atomic_inc(&req->usage);
-	call->cb_break = vnode->cb_break + server->cb_s_break;
-	afs_use_fs_server(call, server);
-	return afs_make_call(&server->addr, call, GFP_NOFS, async);
+	call->cb_break = vnode->cb_break + fc->server->cb_s_break;
+	afs_use_fs_server(call, fc->server);
+	return afs_make_call(&fc->ac, call, GFP_NOFS, async);
 }
 
 /*
  * fetch data from a file
  */
-int afs_fs_fetch_data(struct afs_server *server,
+int afs_fs_fetch_data(struct afs_fs_cursor *fc,
 		      struct key *key,
 		      struct afs_vnode *vnode,
 		      struct afs_read *req,
@@ -557,7 +557,7 @@ int afs_fs_fetch_data(struct afs_server *server,
 	if (upper_32_bits(req->pos) ||
 	    upper_32_bits(req->len) ||
 	    upper_32_bits(req->pos + req->len))
-		return afs_fs_fetch_data64(server, key, vnode, req, async);
+		return afs_fs_fetch_data64(fc, key, vnode, req, async);
 
 	_enter("");
 
@@ -581,9 +581,9 @@ int afs_fs_fetch_data(struct afs_server *server,
 	bp[5] = htonl(lower_32_bits(req->len));
 
 	atomic_inc(&req->usage);
-	call->cb_break = vnode->cb_break + server->cb_s_break;
-	afs_use_fs_server(call, server);
-	return afs_make_call(&server->addr, call, GFP_NOFS, async);
+	call->cb_break = vnode->cb_break + fc->server->cb_s_break;
+	afs_use_fs_server(call, fc->server);
+	return afs_make_call(&fc->ac, call, GFP_NOFS, async);
 }
 
 /*
@@ -625,7 +625,7 @@ static const struct afs_call_type afs_RXFSCreateXXXX = {
 /*
  * create a file or make a directory
  */
-int afs_fs_create(struct afs_server *server,
+int afs_fs_create(struct afs_fs_cursor *fc,
 		  struct key *key,
 		  struct afs_vnode *vnode,
 		  const char *name,
@@ -677,8 +677,8 @@ int afs_fs_create(struct afs_server *server,
 	*bp++ = htonl(mode & S_IALLUGO); /* unix mode */
 	*bp++ = 0; /* segment size */
 
-	afs_use_fs_server(call, server);
-	return afs_make_call(&server->addr, call, GFP_NOFS, async);
+	afs_use_fs_server(call, fc->server);
+	return afs_make_call(&fc->ac, call, GFP_NOFS, async);
 }
 
 /*
@@ -717,7 +717,7 @@ static const struct afs_call_type afs_RXFSRemoveXXXX = {
 /*
  * remove a file or directory
  */
-int afs_fs_remove(struct afs_server *server,
+int afs_fs_remove(struct afs_fs_cursor *fc,
 		  struct key *key,
 		  struct afs_vnode *vnode,
 		  const char *name,
@@ -756,8 +756,8 @@ int afs_fs_remove(struct afs_server *server,
 		bp = (void *) bp + padsz;
 	}
 
-	afs_use_fs_server(call, server);
-	return afs_make_call(&server->addr, call, GFP_NOFS, async);
+	afs_use_fs_server(call, fc->server);
+	return afs_make_call(&fc->ac, call, GFP_NOFS, async);
 }
 
 /*
@@ -797,7 +797,7 @@ static const struct afs_call_type afs_RXFSLink = {
 /*
  * make a hard link
  */
-int afs_fs_link(struct afs_server *server,
+int afs_fs_link(struct afs_fs_cursor *fc,
 		struct key *key,
 		struct afs_vnode *dvnode,
 		struct afs_vnode *vnode,
@@ -840,8 +840,8 @@ int afs_fs_link(struct afs_server *server,
 	*bp++ = htonl(vnode->fid.vnode);
 	*bp++ = htonl(vnode->fid.unique);
 
-	afs_use_fs_server(call, server);
-	return afs_make_call(&server->addr, call, GFP_NOFS, async);
+	afs_use_fs_server(call, fc->server);
+	return afs_make_call(&fc->ac, call, GFP_NOFS, async);
 }
 
 /*
@@ -882,7 +882,7 @@ static const struct afs_call_type afs_RXFSSymlink = {
 /*
  * create a symbolic link
  */
-int afs_fs_symlink(struct afs_server *server,
+int afs_fs_symlink(struct afs_fs_cursor *fc,
 		   struct key *key,
 		   struct afs_vnode *vnode,
 		   const char *name,
@@ -943,8 +943,8 @@ int afs_fs_symlink(struct afs_server *server,
 	*bp++ = htonl(S_IRWXUGO); /* unix mode */
 	*bp++ = 0; /* segment size */
 
-	afs_use_fs_server(call, server);
-	return afs_make_call(&server->addr, call, GFP_NOFS, async);
+	afs_use_fs_server(call, fc->server);
+	return afs_make_call(&fc->ac, call, GFP_NOFS, async);
 }
 
 /*
@@ -986,7 +986,7 @@ static const struct afs_call_type afs_RXFSRename = {
 /*
  * create a symbolic link
  */
-int afs_fs_rename(struct afs_server *server,
+int afs_fs_rename(struct afs_fs_cursor *fc,
 		  struct key *key,
 		  struct afs_vnode *orig_dvnode,
 		  const char *orig_name,
@@ -1045,8 +1045,8 @@ int afs_fs_rename(struct afs_server *server,
 		bp = (void *) bp + n_padsz;
 	}
 
-	afs_use_fs_server(call, server);
-	return afs_make_call(&server->addr, call, GFP_NOFS, async);
+	afs_use_fs_server(call, fc->server);
+	return afs_make_call(&fc->ac, call, GFP_NOFS, async);
 }
 
 /*
@@ -1094,7 +1094,7 @@ static const struct afs_call_type afs_RXFSStoreData64 = {
 /*
  * store a set of pages to a very large file
  */
-static int afs_fs_store_data64(struct afs_server *server,
+static int afs_fs_store_data64(struct afs_fs_cursor *fc,
 			       struct afs_writeback *wb,
 			       pgoff_t first, pgoff_t last,
 			       unsigned offset, unsigned to,
@@ -1147,14 +1147,14 @@ static int afs_fs_store_data64(struct afs_server *server,
 	*bp++ = htonl(i_size >> 32);
 	*bp++ = htonl((u32) i_size);
 
-	afs_use_fs_server(call, server);
-	return afs_make_call(&server->addr, call, GFP_NOFS, async);
+	afs_use_fs_server(call, fc->server);
+	return afs_make_call(&fc->ac, call, GFP_NOFS, async);
 }
 
 /*
  * store a set of pages
  */
-int afs_fs_store_data(struct afs_server *server, struct afs_writeback *wb,
+int afs_fs_store_data(struct afs_fs_cursor *fc, struct afs_writeback *wb,
 		      pgoff_t first, pgoff_t last,
 		      unsigned offset, unsigned to,
 		      bool async)
@@ -1183,7 +1183,7 @@ int afs_fs_store_data(struct afs_server *server, struct afs_writeback *wb,
 	       (unsigned long long) i_size);
 
 	if (pos >> 32 || i_size >> 32 || size >> 32 || (pos + size) >> 32)
-		return afs_fs_store_data64(server, wb, first, last, offset, to,
+		return afs_fs_store_data64(fc, wb, first, last, offset, to,
 					   size, pos, i_size, async);
 
 	call = afs_alloc_flat_call(net, &afs_RXFSStoreData,
@@ -1221,8 +1221,8 @@ int afs_fs_store_data(struct afs_server *server, struct afs_writeback *wb,
 	*bp++ = htonl(size);
 	*bp++ = htonl(i_size);
 
-	afs_use_fs_server(call, server);
-	return afs_make_call(&server->addr, call, GFP_NOFS, async);
+	afs_use_fs_server(call, fc->server);
+	return afs_make_call(&fc->ac, call, GFP_NOFS, async);
 }
 
 /*
@@ -1279,7 +1279,7 @@ static const struct afs_call_type afs_RXFSStoreData64_as_Status = {
  * set the attributes on a very large file, using FS.StoreData rather than
  * FS.StoreStatus so as to alter the file size also
  */
-static int afs_fs_setattr_size64(struct afs_server *server, struct key *key,
+static int afs_fs_setattr_size64(struct afs_fs_cursor *fc, struct key *key,
 				 struct afs_vnode *vnode, struct iattr *attr,
 				 bool async)
 {
@@ -1319,15 +1319,15 @@ static int afs_fs_setattr_size64(struct afs_server *server, struct key *key,
 	*bp++ = htonl(attr->ia_size >> 32);	/* new file length */
 	*bp++ = htonl((u32) attr->ia_size);
 
-	afs_use_fs_server(call, server);
-	return afs_make_call(&server->addr, call, GFP_NOFS, async);
+	afs_use_fs_server(call, fc->server);
+	return afs_make_call(&fc->ac, call, GFP_NOFS, async);
 }
 
 /*
  * set the attributes on a file, using FS.StoreData rather than FS.StoreStatus
  * so as to alter the file size also
  */
-static int afs_fs_setattr_size(struct afs_server *server, struct key *key,
+static int afs_fs_setattr_size(struct afs_fs_cursor *fc, struct key *key,
 			       struct afs_vnode *vnode, struct iattr *attr,
 			       bool async)
 {
@@ -1340,8 +1340,7 @@ static int afs_fs_setattr_size(struct afs_server *server, struct key *key,
 
 	ASSERT(attr->ia_valid & ATTR_SIZE);
 	if (attr->ia_size >> 32)
-		return afs_fs_setattr_size64(server, key, vnode, attr,
-					     async);
+		return afs_fs_setattr_size64(fc, key, vnode, attr, async);
 
 	call = afs_alloc_flat_call(net, &afs_RXFSStoreData_as_Status,
 				   (4 + 6 + 3) * 4,
@@ -1367,15 +1366,15 @@ static int afs_fs_setattr_size(struct afs_server *server, struct key *key,
 	*bp++ = 0;				/* size of write */
 	*bp++ = htonl(attr->ia_size);		/* new file length */
 
-	afs_use_fs_server(call, server);
-	return afs_make_call(&server->addr, call, GFP_NOFS, async);
+	afs_use_fs_server(call, fc->server);
+	return afs_make_call(&fc->ac, call, GFP_NOFS, async);
 }
 
 /*
  * set the attributes on a file, using FS.StoreData if there's a change in file
  * size, and FS.StoreStatus otherwise
  */
-int afs_fs_setattr(struct afs_server *server, struct key *key,
+int afs_fs_setattr(struct afs_fs_cursor *fc, struct key *key,
 		   struct afs_vnode *vnode, struct iattr *attr,
 		   bool async)
 {
@@ -1384,8 +1383,7 @@ int afs_fs_setattr(struct afs_server *server, struct key *key,
 	__be32 *bp;
 
 	if (attr->ia_valid & ATTR_SIZE)
-		return afs_fs_setattr_size(server, key, vnode, attr,
-					   async);
+		return afs_fs_setattr_size(fc, key, vnode, attr, async);
 
 	_enter(",%x,{%x:%u},,",
 	       key_serial(key), vnode->fid.vid, vnode->fid.vnode);
@@ -1409,8 +1407,8 @@ int afs_fs_setattr(struct afs_server *server, struct key *key,
 
 	xdr_encode_AFS_StoreStatus(&bp, attr);
 
-	afs_use_fs_server(call, server);
-	return afs_make_call(&server->addr, call, GFP_NOFS, async);
+	afs_use_fs_server(call, fc->server);
+	return afs_make_call(&fc->ac, call, GFP_NOFS, async);
 }
 
 /*
@@ -1607,7 +1605,7 @@ static const struct afs_call_type afs_RXFSGetVolumeStatus = {
 /*
  * fetch the status of a volume
  */
-int afs_fs_get_volume_status(struct afs_server *server,
+int afs_fs_get_volume_status(struct afs_fs_cursor *fc,
 			     struct key *key,
 			     struct afs_vnode *vnode,
 			     struct afs_volume_status *vs,
@@ -1640,8 +1638,8 @@ int afs_fs_get_volume_status(struct afs_server *server,
 	bp[0] = htonl(FSGETVOLUMESTATUS);
 	bp[1] = htonl(vnode->fid.vid);
 
-	afs_use_fs_server(call, server);
-	return afs_make_call(&server->addr, call, GFP_NOFS, async);
+	afs_use_fs_server(call, fc->server);
+	return afs_make_call(&fc->ac, call, GFP_NOFS, async);
 }
 
 /*
@@ -1696,7 +1694,7 @@ static const struct afs_call_type afs_RXFSReleaseLock = {
 /*
  * get a lock on a file
  */
-int afs_fs_set_lock(struct afs_server *server,
+int afs_fs_set_lock(struct afs_fs_cursor *fc,
 		    struct key *key,
 		    struct afs_vnode *vnode,
 		    afs_lock_type_t type,
@@ -1723,14 +1721,14 @@ int afs_fs_set_lock(struct afs_server *server,
 	*bp++ = htonl(vnode->fid.unique);
 	*bp++ = htonl(type);
 
-	afs_use_fs_server(call, server);
-	return afs_make_call(&server->addr, call, GFP_NOFS, async);
+	afs_use_fs_server(call, fc->server);
+	return afs_make_call(&fc->ac, call, GFP_NOFS, async);
 }
 
 /*
  * extend a lock on a file
  */
-int afs_fs_extend_lock(struct afs_server *server,
+int afs_fs_extend_lock(struct afs_fs_cursor *fc,
 		       struct key *key,
 		       struct afs_vnode *vnode,
 		       bool async)
@@ -1755,14 +1753,14 @@ int afs_fs_extend_lock(struct afs_server *server,
 	*bp++ = htonl(vnode->fid.vnode);
 	*bp++ = htonl(vnode->fid.unique);
 
-	afs_use_fs_server(call, server);
-	return afs_make_call(&server->addr, call, GFP_NOFS, async);
+	afs_use_fs_server(call, fc->server);
+	return afs_make_call(&fc->ac, call, GFP_NOFS, async);
 }
 
 /*
  * release a lock on a file
  */
-int afs_fs_release_lock(struct afs_server *server,
+int afs_fs_release_lock(struct afs_fs_cursor *fc,
 			struct key *key,
 			struct afs_vnode *vnode,
 			bool async)
@@ -1787,8 +1785,8 @@ int afs_fs_release_lock(struct afs_server *server,
 	*bp++ = htonl(vnode->fid.vnode);
 	*bp++ = htonl(vnode->fid.unique);
 
-	afs_use_fs_server(call, server);
-	return afs_make_call(&server->addr, call, GFP_NOFS, async);
+	afs_use_fs_server(call, fc->server);
+	return afs_make_call(&fc->ac, call, GFP_NOFS, async);
 }
 
 /*
@@ -1812,6 +1810,7 @@ static const struct afs_call_type afs_RXFSGiveUpAllCallBacks = {
  * Flush all the callbacks we have on a server.
  */
 int afs_fs_give_up_all_callbacks(struct afs_server *server,
+				 struct afs_addr_cursor *ac,
 				 struct key *key,
 				 bool async)
 {
@@ -1831,5 +1830,5 @@ int afs_fs_give_up_all_callbacks(struct afs_server *server,
 	*bp++ = htonl(FSGIVEUPALLCALLBACKS);
 
 	/* Can't take a ref on server */
-	return afs_make_call(&server->addr, call, GFP_NOFS, async);
+	return afs_make_call(ac, call, GFP_NOFS, async);
 }
diff --git a/fs/afs/internal.h b/fs/afs/internal.h
index 51e3825b5ffb..df52bf18a263 100644
--- a/fs/afs/internal.h
+++ b/fs/afs/internal.h
@@ -70,6 +70,17 @@ enum afs_call_state {
 	AFS_CALL_COMPLETE,	/* Completed or failed */
 };
 
+/*
+ * List of server addresses.
+ */
+struct afs_addr_list {
+	struct rcu_head		rcu;		/* Must be first */
+	refcount_t		usage;
+	unsigned short		nr_addrs;
+	unsigned short		index;		/* Address currently in use */
+	struct sockaddr_rxrpc	addrs[];
+};
+
 /*
  * a record of an in-progress RxRPC call
  */
@@ -283,16 +294,15 @@ struct afs_cell {
 #define AFS_CELL_FL_NO_GC	1		/* The cell was added manually, don't auto-gc */
 #define AFS_CELL_FL_NOT_FOUND	2		/* Permanent DNS error */
 #define AFS_CELL_FL_DNS_FAIL	3		/* Failed to access DNS */
+#define AFS_CELL_FL_NO_LOOKUP_YET 4		/* Not completed first DNS lookup yet */
 	enum afs_cell_state	state;
 	short			error;
 
 	spinlock_t		vl_lock;	/* vl_list lock */
 
 	/* VLDB server list. */
-	seqlock_t		vl_addrs_lock;
-	unsigned short		vl_naddrs;	/* number of VL servers in addr list */
-	unsigned short		vl_curr_svix;	/* current server index */
-	struct sockaddr_rxrpc	vl_addrs[AFS_CELL_MAX_ADDRS];	/* cell VL server addresses */
+	rwlock_t		vl_addrs_lock;	/* Lock on vl_addrs */
+	struct afs_addr_list	__rcu *vl_addrs; /* List of VL servers */
 	u8			name_len;	/* Length of name */
 	char			name[64 + 1];	/* Cell name, case-flattened and NUL-padded */
 };
@@ -343,7 +353,7 @@ struct afs_vlocation {
 struct afs_server {
 	atomic_t		usage;
 	time64_t		time_of_death;	/* time at which put reduced usage to 0 */
-	struct sockaddr_rxrpc	addr;		/* server address */
+	struct afs_addr_list	__rcu *addrs;	/* List of addresses for this server */
 	struct afs_net		*net;		/* Network namespace in which the server resides */
 	struct afs_cell		*cell;		/* cell in which server resides */
 	struct list_head	link;		/* link in cell's server list */
@@ -485,7 +495,48 @@ struct afs_interface {
 	unsigned	mtu;		/* MTU of interface */
 };
 
+/*
+ * Cursor for iterating over a server's address list.
+ */
+struct afs_addr_cursor {
+	struct afs_addr_list	*alist;		/* Current address list (pins ref) */
+	struct sockaddr_rxrpc	*addr;
+	unsigned short		start;		/* Starting point in alist->addrs[] */
+	unsigned short		index;		/* Wrapping offset from start to current addr */
+	short			error;
+	bool			begun;		/* T if we've begun iteration */
+	bool			responded;	/* T if the current address responded */
+};
+
+/*
+ * Cursor for iterating over a set of fileservers.
+ */
+struct afs_fs_cursor {
+	struct afs_addr_cursor	ac;
+	struct afs_server	*server;	/* Current server (pins ref) */
+};
+
 /*****************************************************************************/
+/*
+ * addr_list.c
+ */
+static inline struct afs_addr_list *afs_get_addrlist(struct afs_addr_list *alist)
+{
+	if (alist)
+		refcount_inc(&alist->usage);
+	return alist;
+}
+extern struct afs_addr_list *afs_alloc_addrlist(unsigned int,
+						unsigned short,
+						unsigned short);
+extern void afs_put_addrlist(struct afs_addr_list *);
+extern struct afs_addr_list *afs_parse_text_addrs(const char *, size_t, char,
+						  unsigned short, unsigned short);
+extern struct afs_addr_list *afs_dns_query(struct afs_cell *, time64_t *);
+extern bool afs_iterate_addresses(struct afs_addr_cursor *);
+extern int afs_end_cursor(struct afs_addr_cursor *);
+extern int afs_set_vl_cursor(struct afs_addr_cursor *, struct afs_cell *);
+
 /*
  * cache.c
  */
@@ -521,17 +572,11 @@ static inline struct afs_cb_interest *afs_get_cb_interest(struct afs_cb_interest
 /*
  * cell.c
  */
- static inline struct afs_cell *afs_get_cell(struct afs_cell *cell)
-{
-	if (cell)
-		atomic_inc(&cell->usage);
-	return cell;
-}
-
 extern int afs_cell_init(struct afs_net *, const char *);
 extern struct afs_cell *afs_lookup_cell_rcu(struct afs_net *, const char *, unsigned);
 extern struct afs_cell *afs_lookup_cell(struct afs_net *, const char *, unsigned,
 					const char *, bool);
+extern struct afs_cell *afs_get_cell(struct afs_cell *);
 extern void afs_put_cell(struct afs_net *, struct afs_cell *);
 extern void afs_manage_cells(struct work_struct *);
 extern void afs_cells_timer(struct timer_list *);
@@ -574,40 +619,41 @@ extern int afs_flock(struct file *, int, struct file_lock *);
 /*
  * fsclient.c
  */
-extern int afs_fs_fetch_file_status(struct afs_server *, struct key *,
+extern int afs_fs_fetch_file_status(struct afs_fs_cursor *, struct key *,
 				    struct afs_vnode *, struct afs_volsync *,
 				    bool);
 extern int afs_fs_give_up_callbacks(struct afs_net *, struct afs_server *, bool);
-extern int afs_fs_fetch_data(struct afs_server *, struct key *,
+extern int afs_fs_fetch_data(struct afs_fs_cursor *, struct key *,
 			     struct afs_vnode *, struct afs_read *, bool);
-extern int afs_fs_create(struct afs_server *, struct key *,
+extern int afs_fs_create(struct afs_fs_cursor *, struct key *,
 			 struct afs_vnode *, const char *, umode_t,
 			 struct afs_fid *, struct afs_file_status *,
 			 struct afs_callback *, bool);
-extern int afs_fs_remove(struct afs_server *, struct key *,
+extern int afs_fs_remove(struct afs_fs_cursor *, struct key *,
 			 struct afs_vnode *, const char *, bool, bool);
-extern int afs_fs_link(struct afs_server *, struct key *, struct afs_vnode *,
+extern int afs_fs_link(struct afs_fs_cursor *, struct key *, struct afs_vnode *,
 		       struct afs_vnode *, const char *, bool);
-extern int afs_fs_symlink(struct afs_server *, struct key *,
+extern int afs_fs_symlink(struct afs_fs_cursor *, struct key *,
 			  struct afs_vnode *, const char *, const char *,
 			  struct afs_fid *, struct afs_file_status *, bool);
-extern int afs_fs_rename(struct afs_server *, struct key *,
+extern int afs_fs_rename(struct afs_fs_cursor *, struct key *,
 			 struct afs_vnode *, const char *,
 			 struct afs_vnode *, const char *, bool);
-extern int afs_fs_store_data(struct afs_server *, struct afs_writeback *,
+extern int afs_fs_store_data(struct afs_fs_cursor *, struct afs_writeback *,
 			     pgoff_t, pgoff_t, unsigned, unsigned, bool);
-extern int afs_fs_setattr(struct afs_server *, struct key *,
+extern int afs_fs_setattr(struct afs_fs_cursor *, struct key *,
 			  struct afs_vnode *, struct iattr *, bool);
-extern int afs_fs_get_volume_status(struct afs_server *, struct key *,
+extern int afs_fs_get_volume_status(struct afs_fs_cursor *, struct key *,
 				    struct afs_vnode *,
 				    struct afs_volume_status *, bool);
-extern int afs_fs_set_lock(struct afs_server *, struct key *,
+extern int afs_fs_set_lock(struct afs_fs_cursor *, struct key *,
 			   struct afs_vnode *, afs_lock_type_t, bool);
-extern int afs_fs_extend_lock(struct afs_server *, struct key *,
+extern int afs_fs_extend_lock(struct afs_fs_cursor *, struct key *,
 			      struct afs_vnode *, bool);
-extern int afs_fs_release_lock(struct afs_server *, struct key *,
+extern int afs_fs_release_lock(struct afs_fs_cursor *, struct key *,
 			       struct afs_vnode *, bool);
-extern int afs_fs_give_up_all_callbacks(struct afs_server *, struct key *, bool);
+extern int afs_fs_give_up_all_callbacks(struct afs_server *, struct afs_addr_cursor *,
+					struct key *, bool);
 
 /*
  * inode.c
@@ -697,7 +743,7 @@ extern void __net_exit afs_close_socket(struct afs_net *);
 extern void afs_charge_preallocation(struct work_struct *);
 extern void afs_put_call(struct afs_call *);
 extern int afs_queue_call_work(struct afs_call *);
-extern long afs_make_call(struct sockaddr_rxrpc *, struct afs_call *, gfp_t, bool);
+extern long afs_make_call(struct afs_addr_cursor *, struct afs_call *, gfp_t, bool);
 extern struct afs_call *afs_alloc_flat_call(struct afs_net *,
 					    const struct afs_call_type *,
 					    size_t, size_t);
@@ -751,13 +797,11 @@ extern void __exit afs_fs_exit(void);
 /*
  * vlclient.c
  */
-extern int afs_vl_get_entry_by_name(struct afs_net *,
-				    struct sockaddr_rxrpc *, struct key *,
-				    const char *, struct afs_cache_vlocation *,
-				    bool);
-extern int afs_vl_get_entry_by_id(struct afs_net *,
-				  struct sockaddr_rxrpc *, struct key *,
-				  afs_volid_t, afs_voltype_t,
+extern int afs_vl_get_entry_by_name(struct afs_net *, struct afs_addr_cursor *,
+				    struct key *, const char *,
+				    struct afs_cache_vlocation *, bool);
+extern int afs_vl_get_entry_by_id(struct afs_net *, struct afs_addr_cursor *,
+				  struct key *, afs_volid_t, afs_voltype_t,
 				  struct afs_cache_vlocation *, bool);
 
 /*
@@ -828,9 +872,11 @@ static inline struct afs_volume *afs_get_volume(struct afs_volume *volume)
 
 extern void afs_put_volume(struct afs_cell *, struct afs_volume *);
 extern struct afs_volume *afs_volume_lookup(struct afs_mount_params *);
-extern struct afs_server *afs_volume_pick_fileserver(struct afs_vnode *);
-extern int afs_volume_release_fileserver(struct afs_vnode *,
-					 struct afs_server *, int);
+extern void afs_init_fs_cursor(struct afs_fs_cursor *, struct afs_vnode *);
+extern int afs_set_fs_cursor(struct afs_fs_cursor *, struct afs_vnode *);
+extern bool afs_volume_pick_fileserver(struct afs_fs_cursor *, struct afs_vnode *);
+extern bool afs_iterate_fs_cursor(struct afs_fs_cursor *, struct afs_vnode *);
+extern int afs_end_fs_cursor(struct afs_fs_cursor *, struct afs_net *);
 
 /*
  * write.c
diff --git a/fs/afs/proc.c b/fs/afs/proc.c
index 08565429615d..9cf9ce88a8dd 100644
--- a/fs/afs/proc.c
+++ b/fs/afs/proc.c
@@ -514,23 +514,23 @@ static int afs_proc_cell_vlservers_open(struct inode *inode, struct file *file)
  */
 static void *afs_proc_cell_vlservers_start(struct seq_file *m, loff_t *_pos)
 {
+	struct afs_addr_list *alist;
 	struct afs_cell *cell = m->private;
 	loff_t pos = *_pos;
 
-	_enter("cell=%p pos=%Ld", cell, *_pos);
+	rcu_read_lock();
 
-	/* lock the list against modification */
-	down_read(&cell->vl_sem);
+	alist = rcu_dereference(cell->vl_addrs);
 
 	/* allow for the header line */
 	if (!pos)
 		return (void *) 1;
 	pos--;
 
-	if (pos >= cell->vl_naddrs)
+	if (!alist || pos >= alist->nr_addrs)
 		return NULL;
 
-	return &cell->vl_addrs[pos];
+	return alist->addrs + pos;
 }
 
 /*
@@ -539,17 +539,18 @@ static void *afs_proc_cell_vlservers_start(struct seq_file *m, loff_t *_pos)
 static void *afs_proc_cell_vlservers_next(struct seq_file *p, void *v,
 					  loff_t *_pos)
 {
+	struct afs_addr_list *alist;
 	struct afs_cell *cell = p->private;
 	loff_t pos;
 
-	_enter("cell=%p{nad=%u} pos=%Ld", cell, cell->vl_naddrs, *_pos);
+	alist = rcu_dereference(cell->vl_addrs);
 
 	pos = *_pos;
 	(*_pos)++;
-	if (pos >= cell->vl_naddrs)
+	if (!alist || pos >= alist->nr_addrs)
 		return NULL;
 
-	return &cell->vl_addrs[pos];
+	return alist->addrs + pos;
 }
 
 /*
@@ -557,9 +558,7 @@ static void *afs_proc_cell_vlservers_next(struct seq_file *p, void *v,
  */
 static void afs_proc_cell_vlservers_stop(struct seq_file *p, void *v)
 {
-	struct afs_cell *cell = p->private;
-
-	up_read(&cell->vl_sem);
+	rcu_read_unlock();
 }
 
 /*
@@ -658,7 +657,7 @@ static int afs_proc_cell_servers_show(struct seq_file *m, void *v)
 	}
 
 	/* display one cell per line on subsequent lines */
-	sprintf(ipaddr, "%pISp", &server->addr.transport);
+	sprintf(ipaddr, "%pISp", &server->addrs->addrs[0].transport);
 	seq_printf(m, "%3d %-15s %5d\n",
 		   atomic_read(&server->usage), ipaddr, server->fs_state);
 
diff --git a/fs/afs/rxrpc.c b/fs/afs/rxrpc.c
index ac1e25f957b1..5ddfb7c4cf78 100644
--- a/fs/afs/rxrpc.c
+++ b/fs/afs/rxrpc.c
@@ -321,9 +321,10 @@ static int afs_send_pages(struct afs_call *call, struct msghdr *msg)
 /*
  * initiate a call
  */
-long afs_make_call(struct sockaddr_rxrpc *srx, struct afs_call *call,
+long afs_make_call(struct afs_addr_cursor *ac, struct afs_call *call,
 		   gfp_t gfp, bool async)
 {
+	struct sockaddr_rxrpc *srx = ac->addr;
 	struct rxrpc_call *rxcall;
 	struct msghdr msg;
 	struct kvec iov[1];
diff --git a/fs/afs/server.c b/fs/afs/server.c
index 4e66608fc805..9ca174b24f5b 100644
--- a/fs/afs/server.c
+++ b/fs/afs/server.c
@@ -56,7 +56,9 @@ static int afs_install_server(struct afs_server *server)
 		p = *pp;
 		_debug("- consider %p", p);
 		xserver = rb_entry(p, struct afs_server, master_rb);
-		diff = memcmp(&server->addr, &xserver->addr, sizeof(server->addr));
+		diff = memcmp(&server->addrs->addrs[0],
+			      &xserver->addrs->addrs[0],
+			      sizeof(sizeof(server->addrs->addrs[0])));
 		if (diff < 0)
 			pp = &(*pp)->rb_left;
 		else if (diff > 0)
@@ -85,25 +87,38 @@ static struct afs_server *afs_alloc_server(struct afs_cell *cell,
 	_enter("");
 
 	server = kzalloc(sizeof(struct afs_server), GFP_KERNEL);
-	if (server) {
-		atomic_set(&server->usage, 1);
-		server->net = cell->net;
-		server->cell = cell;
-
-		INIT_LIST_HEAD(&server->link);
-		INIT_LIST_HEAD(&server->grave);
-		init_rwsem(&server->sem);
-		spin_lock_init(&server->fs_lock);
-		INIT_LIST_HEAD(&server->cb_interests);
-		rwlock_init(&server->cb_break_lock);
-
-		server->addr = *addr;
-		afs_inc_servers_outstanding(cell->net);
-		_leave(" = %p{%d}", server, atomic_read(&server->usage));
-	} else {
-		_leave(" = NULL [nomem]");
-	}
+	if (!server)
+		goto enomem;
+	server->addrs = kzalloc(sizeof(struct afs_addr_list) +
+				sizeof(struct sockaddr_rxrpc),
+				GFP_KERNEL);
+	if (!server->addrs)
+		goto enomem_server;
+
+	atomic_set(&server->usage, 1);
+	server->net = cell->net;
+	server->cell = cell;
+
+	INIT_LIST_HEAD(&server->link);
+	INIT_LIST_HEAD(&server->grave);
+	init_rwsem(&server->sem);
+	spin_lock_init(&server->fs_lock);
+	INIT_LIST_HEAD(&server->cb_interests);
+	rwlock_init(&server->cb_break_lock);
+
+	refcount_set(&server->addrs->usage, 1);
+	server->addrs->nr_addrs = 1;
+	server->addrs->addrs[0] = *addr;
+	afs_inc_servers_outstanding(cell->net);
+
+	_leave(" = %p{%d}", server, atomic_read(&server->usage));
 	return server;
+
+enomem_server:
+	kfree(server);
+enomem:
+	_leave(" = NULL [nomem]");
+	return NULL;
 }
 
 /*
@@ -120,7 +135,7 @@ struct afs_server *afs_lookup_server(struct afs_cell *cell,
 	read_lock(&cell->servers_lock);
 
 	list_for_each_entry(server, &cell->servers, link) {
-		if (memcmp(&server->addr, addr, sizeof(*addr)) == 0)
+		if (memcmp(&server->addrs->addrs[0], addr, sizeof(*addr)) == 0)
 			goto found_server_quickly;
 	}
 	read_unlock(&cell->servers_lock);
@@ -135,7 +150,7 @@ struct afs_server *afs_lookup_server(struct afs_cell *cell,
 
 	/* check the cell's server list again */
 	list_for_each_entry(server, &cell->servers, link) {
-		if (memcmp(&server->addr, addr, sizeof(*addr)) == 0)
+		if (memcmp(&server->addrs->addrs[0], addr, sizeof(*addr)) == 0)
 			goto found_server;
 	}
 
@@ -204,7 +219,7 @@ struct afs_server *afs_find_server(struct afs_net *net,
 
 		_debug("- consider %p", p);
 
-		diff = memcmp(srx, &server->addr, sizeof(*srx));
+		diff = memcmp(srx, &server->addrs->addrs[0], sizeof(*srx));
 		if (diff < 0) {
 			p = p->rb_left;
 		} else if (diff > 0) {
@@ -269,10 +284,19 @@ void afs_put_server(struct afs_net *net, struct afs_server *server)
  */
 static void afs_destroy_server(struct afs_net *net, struct afs_server *server)
 {
+	struct afs_addr_list *alist = server->addrs;
+	struct afs_addr_cursor ac = {
+		.alist	= alist,
+		.addr	= &alist->addrs[0],
+		.start	= alist->index,
+		.index	= alist->index,
+		.error	= 0,
+	};
 	_enter("%p", server);
 
-	afs_fs_give_up_all_callbacks(server, NULL, false);
+	afs_fs_give_up_all_callbacks(server, &ac, NULL, false);
 	afs_put_cell(net, server->cell);
+	afs_put_addrlist(server->addrs);
 	kfree(server);
 	afs_dec_servers_outstanding(net);
 }
diff --git a/fs/afs/vlclient.c b/fs/afs/vlclient.c
index aa79fe3f168b..1d1e7df77dd5 100644
--- a/fs/afs/vlclient.c
+++ b/fs/afs/vlclient.c
@@ -114,7 +114,7 @@ static const struct afs_call_type afs_RXVLGetEntryById = {
  * dispatch a get volume entry by name operation
  */
 int afs_vl_get_entry_by_name(struct afs_net *net,
-			     struct sockaddr_rxrpc *addr,
+			     struct afs_addr_cursor *ac,
 			     struct key *key,
 			     const char *volname,
 			     struct afs_cache_vlocation *entry,
@@ -146,14 +146,14 @@ int afs_vl_get_entry_by_name(struct afs_net *net,
 		memset((void *) bp + volnamesz, 0, padsz);
 
 	/* initiate the call */
-	return afs_make_call(addr, call, GFP_KERNEL, async);
+	return afs_make_call(ac, call, GFP_KERNEL, async);
 }
 
 /*
  * dispatch a get volume entry by ID operation
  */
 int afs_vl_get_entry_by_id(struct afs_net *net,
-			   struct sockaddr_rxrpc *addr,
+			   struct afs_addr_cursor *ac,
 			   struct key *key,
 			   afs_volid_t volid,
 			   afs_voltype_t voltype,
@@ -179,5 +179,5 @@ int afs_vl_get_entry_by_id(struct afs_net *net,
 	*bp   = htonl(voltype);
 
 	/* initiate the call */
-	return afs_make_call(addr, call, GFP_KERNEL, async);
+	return afs_make_call(ac, call, GFP_KERNEL, async);
 }
diff --git a/fs/afs/vlocation.c b/fs/afs/vlocation.c
index ec5ab8dc9bc8..52c31ad0ef60 100644
--- a/fs/afs/vlocation.c
+++ b/fs/afs/vlocation.c
@@ -29,22 +29,25 @@ static int afs_vlocation_access_vl_by_name(struct afs_vlocation *vl,
 					   struct key *key,
 					   struct afs_cache_vlocation *vldb)
 {
-	struct afs_cell *cell = vl->cell;
-	int count, ret;
+	struct afs_addr_cursor ac;
+	int ret;
 
-	_enter("%s,%s", cell->name, vl->vldb.name);
+	_enter("%s,%s", vl->cell->name, vl->vldb.name);
+
+	ret = afs_set_vl_cursor(&ac, vl->cell);
+	if (ret < 0)
+		return ret;
 
 	down_write(&vl->cell->vl_sem);
+	
 	ret = -ENOMEDIUM;
-	for (count = cell->vl_naddrs; count > 0; count--) {
-		struct sockaddr_rxrpc *addr = &cell->vl_addrs[cell->vl_curr_svix];
-
-		_debug("CellServ[%hu]: %pIS", cell->vl_curr_svix, &addr->transport);
+	while (afs_iterate_addresses(&ac)) {
+		_debug("CellServ[%hu]: %pIS", ac.index, &ac.addr->transport);
 
 		/* attempt to access the VL server */
-		ret = afs_vl_get_entry_by_name(cell->net, addr, key,
-					       vl->vldb.name, vldb, false);
-		switch (ret) {
+		ac.error = afs_vl_get_entry_by_name(vl->cell->net, &ac, key,
+						    vl->vldb.name, vldb, false);
+		switch (ac.error) {
 		case 0:
 			goto out;
 		case -ENOMEM:
@@ -52,26 +55,24 @@ static int afs_vlocation_access_vl_by_name(struct afs_vlocation *vl,
 		case -ENETUNREACH:
 		case -EHOSTUNREACH:
 		case -ECONNREFUSED:
-			if (ret == -ENOMEM || ret == -ENONET)
+			if (ac.error == -ENOMEM || ac.error == -ENONET)
 				goto out;
-			goto rotate;
+			break;
 		case -ENOMEDIUM:
 		case -EKEYREJECTED:
 		case -EKEYEXPIRED:
+			ac.responded = true;
 			goto out;
 		default:
-			ret = -EIO;
-			goto rotate;
+			ac.responded = true;
+			ac.error = -EIO;
+			break;
 		}
-
-		/* rotate the server records upon lookup failure */
-	rotate:
-		cell->vl_curr_svix++;
-		cell->vl_curr_svix %= cell->vl_naddrs;
 	}
 
 out:
 	up_write(&vl->cell->vl_sem);
+	ret = afs_end_cursor(&ac);
 	_leave(" = %d", ret);
 	return ret;
 }
@@ -86,22 +87,24 @@ static int afs_vlocation_access_vl_by_id(struct afs_vlocation *vl,
 					 afs_voltype_t voltype,
 					 struct afs_cache_vlocation *vldb)
 {
-	struct afs_cell *cell = vl->cell;
-	int count, ret;
+	struct afs_addr_cursor ac;
+	int ret;
 
-	_enter("%s,%x,%d,", cell->name, volid, voltype);
+	_enter("%s,%x,%d,", vl->cell->name, volid, voltype);
+
+	ret = afs_set_vl_cursor(&ac, vl->cell);
+	if (ret < 0)
+		return ret;
 
 	down_write(&vl->cell->vl_sem);
 	ret = -ENOMEDIUM;
-	for (count = cell->vl_naddrs; count > 0; count--) {
-		struct sockaddr_rxrpc *addr = &cell->vl_addrs[cell->vl_curr_svix];
-
-		_debug("CellServ[%hu]: %pIS", cell->vl_curr_svix, &addr->transport);
+	while (afs_iterate_addresses(&ac)) {
+		_debug("CellServ[%hu]: %pIS", ac.index, &ac.addr->transport);
 
 		/* attempt to access the VL server */
-		ret = afs_vl_get_entry_by_id(cell->net, addr, key, volid,
-					     voltype, vldb, false);
-		switch (ret) {
+		ac.error = afs_vl_get_entry_by_id(vl->cell->net, &ac, key, volid,
+						  voltype, vldb, false);
+		switch (ac.error) {
 		case 0:
 			goto out;
 		case -ENOMEM:
@@ -109,10 +112,11 @@ static int afs_vlocation_access_vl_by_id(struct afs_vlocation *vl,
 		case -ENETUNREACH:
 		case -EHOSTUNREACH:
 		case -ECONNREFUSED:
-			if (ret == -ENOMEM || ret == -ENONET)
+			if (ac.error == -ENOMEM || ac.error == -ENONET)
 				goto out;
 			goto rotate;
 		case -EBUSY:
+			ac.responded = true;
 			vl->upd_busy_cnt++;
 			if (vl->upd_busy_cnt <= 3) {
 				if (vl->upd_busy_cnt > 1) {
@@ -124,30 +128,31 @@ static int afs_vlocation_access_vl_by_id(struct afs_vlocation *vl,
 			}
 			break;
 		case -ENOMEDIUM:
+			ac.responded = true;
 			vl->upd_rej_cnt++;
 			goto rotate;
 		default:
-			ret = -EIO;
+			ac.responded = true;
+			ac.error = -EIO;
 			goto rotate;
 		}
 
 		/* rotate the server records upon lookup failure */
 	rotate:
-		cell->vl_curr_svix++;
-		cell->vl_curr_svix %= cell->vl_naddrs;
 		vl->upd_busy_cnt = 0;
 	}
 
 out:
-	if (ret < 0 && vl->upd_rej_cnt > 0) {
+	if (ac.error < 0 && vl->upd_rej_cnt > 0) {
 		printk(KERN_NOTICE "kAFS:"
 		       " Active volume no longer valid '%s'\n",
 		       vl->vldb.name);
 		vl->valid = 0;
-		ret = -ENOMEDIUM;
+		ac.error = -ENOMEDIUM;
 	}
 
 	up_write(&vl->cell->vl_sem);
+	ret = afs_end_cursor(&ac);
 	_leave(" = %d", ret);
 	return ret;
 }
diff --git a/fs/afs/vnode.c b/fs/afs/vnode.c
index 622e1100099b..9c7333eb01c2 100644
--- a/fs/afs/vnode.c
+++ b/fs/afs/vnode.c
@@ -44,30 +44,26 @@ static void afs_vnode_deleted_remotely(struct afs_vnode *vnode)
 void afs_vnode_finalise_status_update(struct afs_vnode *vnode,
 				      struct afs_server *server)
 {
-	struct afs_server *oldserver = NULL;
-
-	_enter("%p,%p", vnode, server);
-
 	spin_lock(&vnode->lock);
 	vnode->update_cnt--;
 	ASSERTCMP(vnode->update_cnt, >=, 0);
 	spin_unlock(&vnode->lock);
 
 	wake_up_all(&vnode->update_waitq);
-	afs_put_server(afs_v2net(vnode), oldserver);
 	_leave("");
 }
 
 /*
  * finish off updating the recorded status of a file after an operation failed
  */
-static void afs_vnode_status_update_failed(struct afs_vnode *vnode, int ret)
+static void afs_vnode_status_update_failed(struct afs_fs_cursor *fc,
+					   struct afs_vnode *vnode)
 {
-	_enter("{%x:%u},%d", vnode->fid.vid, vnode->fid.vnode, ret);
+	_enter("{%x:%u},%d", vnode->fid.vid, vnode->fid.vnode, fc->ac.error);
 
 	spin_lock(&vnode->lock);
 
-	if (ret == -ENOENT) {
+	if (fc->ac.error == -ENOENT) {
 		/* the file was deleted on the server */
 		_debug("got NOENT from server - marking file deleted");
 		afs_vnode_deleted_remotely(vnode);
@@ -90,9 +86,8 @@ static void afs_vnode_status_update_failed(struct afs_vnode *vnode, int ret)
  */
 int afs_vnode_fetch_status(struct afs_vnode *vnode, struct key *key, bool force)
 {
-	struct afs_server *server;
+	struct afs_fs_cursor fc;
 	unsigned int cb_break = 0;
-	int ret;
 
 	DECLARE_WAITQUEUE(myself, current);
 
@@ -172,43 +167,37 @@ get_anyway:
 
 	/* merge AFS status fetches and clear outstanding callback on this
 	 * vnode */
+	afs_init_fs_cursor(&fc, vnode);
 	do {
 		/* pick a server to query */
-		server = afs_volume_pick_fileserver(vnode);
-		if (IS_ERR(server))
+		if (!afs_volume_pick_fileserver(&fc, vnode))
 			goto no_server;
 
-		_debug("USING SERVER: %p{%pIS}",
-		       server, &server->addr.transport);
-
-		ret = afs_fs_fetch_file_status(server, key, vnode, NULL,
-					       false);
+		fc.ac.error = afs_fs_fetch_file_status(&fc, key, vnode, NULL, false);
 
-	} while (!afs_volume_release_fileserver(vnode, server, ret));
+	} while (afs_iterate_fs_cursor(&fc, vnode));
 
 	/* adjust the flags */
-	if (ret == 0) {
+	if (fc.ac.error == 0) {
 		_debug("adjust");
 		afs_cache_permit(vnode, key, cb_break);
-		afs_vnode_finalise_status_update(vnode, server);
-		afs_put_server(afs_v2net(vnode), server);
+		afs_vnode_finalise_status_update(vnode, fc.server);
 	} else {
-		_debug("failed [%d]", ret);
-		afs_vnode_status_update_failed(vnode, ret);
+		_debug("failed [%d]", fc.ac.error);
+		afs_vnode_status_update_failed(&fc, vnode);
 	}
 
+out:
+	afs_end_fs_cursor(&fc, afs_v2net(vnode));
 	ASSERTCMP(vnode->update_cnt, >=, 0);
-
-	_leave(" = %d [cnt %d]", ret, vnode->update_cnt);
-	return ret;
+	_leave(" = %d [cnt %d]", fc.ac.error, vnode->update_cnt);
+	return fc.ac.error;
 
 no_server:
 	spin_lock(&vnode->lock);
 	vnode->update_cnt--;
-	ASSERTCMP(vnode->update_cnt, >=, 0);
 	spin_unlock(&vnode->lock);
-	_leave(" = %ld [cnt %d]", PTR_ERR(server), vnode->update_cnt);
-	return PTR_ERR(server);
+	goto out;
 }
 
 /*
@@ -218,8 +207,7 @@ no_server:
 int afs_vnode_fetch_data(struct afs_vnode *vnode, struct key *key,
 			 struct afs_read *desc)
 {
-	struct afs_server *server;
-	int ret;
+	struct afs_fs_cursor fc;
 
 	_enter("%s{%x:%u.%u},%x,,,",
 	       vnode->volume->vlocation->vldb.name,
@@ -235,36 +223,31 @@ int afs_vnode_fetch_data(struct afs_vnode *vnode, struct key *key,
 
 	/* merge in AFS status fetches and clear outstanding callback on this
 	 * vnode */
+	afs_init_fs_cursor(&fc, vnode);
 	do {
 		/* pick a server to query */
-		server = afs_volume_pick_fileserver(vnode);
-		if (IS_ERR(server))
+		if (!afs_volume_pick_fileserver(&fc, vnode))
 			goto no_server;
 
-		_debug("USING SERVER: %pIS\n", &server->addr.transport);
-
-		ret = afs_fs_fetch_data(server, key, vnode, desc,
-					false);
+		fc.ac.error = afs_fs_fetch_data(&fc, key, vnode, desc, false);
 
-	} while (!afs_volume_release_fileserver(vnode, server, ret));
+	} while (afs_iterate_fs_cursor(&fc, vnode));
 
 	/* adjust the flags */
-	if (ret == 0) {
-		afs_vnode_finalise_status_update(vnode, server);
-		afs_put_server(afs_v2net(vnode), server);
-	} else {
-		afs_vnode_status_update_failed(vnode, ret);
-	}
+	if (fc.ac.error == 0)
+		afs_vnode_finalise_status_update(vnode, fc.server);
+	else
+		afs_vnode_status_update_failed(&fc, vnode);
 
-	_leave(" = %d", ret);
-	return ret;
+out:
+	return afs_end_fs_cursor(&fc, afs_v2net(vnode));
 
 no_server:
 	spin_lock(&vnode->lock);
 	vnode->update_cnt--;
 	ASSERTCMP(vnode->update_cnt, >=, 0);
 	spin_unlock(&vnode->lock);
-	return PTR_ERR(server);
+	goto out;
 }
 
 /*
@@ -275,8 +258,7 @@ int afs_vnode_create(struct afs_vnode *vnode, struct key *key,
 		     struct afs_file_status *newstatus,
 		     struct afs_callback *newcb, struct afs_server **_server)
 {
-	struct afs_server *server;
-	int ret;
+	struct afs_fs_cursor fc;
 
 	_enter("%s{%x:%u.%u},%x,%s,,",
 	       vnode->volume->vlocation->vldb.name,
@@ -291,38 +273,36 @@ int afs_vnode_create(struct afs_vnode *vnode, struct key *key,
 	vnode->update_cnt++;
 	spin_unlock(&vnode->lock);
 
+	afs_init_fs_cursor(&fc, vnode);
 	do {
 		/* pick a server to query */
-		server = afs_volume_pick_fileserver(vnode);
-		if (IS_ERR(server))
+		if (!afs_volume_pick_fileserver(&fc, vnode))
 			goto no_server;
 
-		_debug("USING SERVER: %pIS\n", &server->addr.transport);
-
-		ret = afs_fs_create(server, key, vnode, name, mode, newfid,
-				    newstatus, newcb, false);
+		fc.ac.error = afs_fs_create(&fc, key, vnode, name, mode, newfid,
+					    newstatus, newcb, false);
 
-	} while (!afs_volume_release_fileserver(vnode, server, ret));
+	} while (afs_iterate_fs_cursor(&fc, vnode));
 
 	/* adjust the flags */
-	if (ret == 0) {
-		afs_vnode_finalise_status_update(vnode, server);
-		*_server = server;
+	if (fc.ac.error == 0) {
+		afs_vnode_finalise_status_update(vnode, fc.server);
+		*_server = fc.server;
+		fc.server = NULL;
 	} else {
-		afs_vnode_status_update_failed(vnode, ret);
+		afs_vnode_status_update_failed(&fc, vnode);
 		*_server = NULL;
 	}
 
-	_leave(" = %d [cnt %d]", ret, vnode->update_cnt);
-	return ret;
+out:
+	return afs_end_fs_cursor(&fc, afs_v2net(vnode));
 
 no_server:
 	spin_lock(&vnode->lock);
 	vnode->update_cnt--;
 	ASSERTCMP(vnode->update_cnt, >=, 0);
 	spin_unlock(&vnode->lock);
-	_leave(" = %ld [cnt %d]", PTR_ERR(server), vnode->update_cnt);
-	return PTR_ERR(server);
+	goto out;
 }
 
 /*
@@ -331,8 +311,7 @@ no_server:
 int afs_vnode_remove(struct afs_vnode *vnode, struct key *key, const char *name,
 		     bool isdir)
 {
-	struct afs_server *server;
-	int ret;
+	struct afs_fs_cursor fc;
 
 	_enter("%s{%x:%u.%u},%x,%s",
 	       vnode->volume->vlocation->vldb.name,
@@ -347,37 +326,31 @@ int afs_vnode_remove(struct afs_vnode *vnode, struct key *key, const char *name,
 	vnode->update_cnt++;
 	spin_unlock(&vnode->lock);
 
+	afs_init_fs_cursor(&fc, vnode);
 	do {
 		/* pick a server to query */
-		server = afs_volume_pick_fileserver(vnode);
-		if (IS_ERR(server))
+		if (!afs_volume_pick_fileserver(&fc, vnode))
 			goto no_server;
 
-		_debug("USING SERVER: %pIS\n", &server->addr.transport);
-
-		ret = afs_fs_remove(server, key, vnode, name, isdir,
-				    false);
+		fc.ac.error = afs_fs_remove(&fc, key, vnode, name, isdir, false);
 
-	} while (!afs_volume_release_fileserver(vnode, server, ret));
+	} while (afs_iterate_fs_cursor(&fc, vnode));
 
 	/* adjust the flags */
-	if (ret == 0) {
-		afs_vnode_finalise_status_update(vnode, server);
-		afs_put_server(afs_v2net(vnode), server);
-	} else {
-		afs_vnode_status_update_failed(vnode, ret);
-	}
+	if (fc.ac.error == 0)
+		afs_vnode_finalise_status_update(vnode, fc.server);
+	else
+		afs_vnode_status_update_failed(&fc, vnode);
 
-	_leave(" = %d [cnt %d]", ret, vnode->update_cnt);
-	return ret;
+out:
+	return afs_end_fs_cursor(&fc, afs_v2net(vnode));
 
 no_server:
 	spin_lock(&vnode->lock);
 	vnode->update_cnt--;
 	ASSERTCMP(vnode->update_cnt, >=, 0);
 	spin_unlock(&vnode->lock);
-	_leave(" = %ld [cnt %d]", PTR_ERR(server), vnode->update_cnt);
-	return PTR_ERR(server);
+	goto out;
 }
 
 /*
@@ -386,8 +359,7 @@ no_server:
 int afs_vnode_link(struct afs_vnode *dvnode, struct afs_vnode *vnode,
 			  struct key *key, const char *name)
 {
-	struct afs_server *server;
-	int ret;
+	struct afs_fs_cursor fc;
 
 	_enter("%s{%x:%u.%u},%s{%x:%u.%u},%x,%s",
 	       dvnode->volume->vlocation->vldb.name,
@@ -409,31 +381,27 @@ int afs_vnode_link(struct afs_vnode *dvnode, struct afs_vnode *vnode,
 	dvnode->update_cnt++;
 	spin_unlock(&dvnode->lock);
 
+	afs_init_fs_cursor(&fc, vnode);
 	do {
 		/* pick a server to query */
-		server = afs_volume_pick_fileserver(dvnode);
-		if (IS_ERR(server))
+		if (!afs_volume_pick_fileserver(&fc, dvnode))
 			goto no_server;
 
-		_debug("USING SERVER: %pIS\n", &server->addr.transport);
+		fc.ac.error = afs_fs_link(&fc, key, dvnode, vnode, name, false);
 
-		ret = afs_fs_link(server, key, dvnode, vnode, name,
-				  false);
-
-	} while (!afs_volume_release_fileserver(dvnode, server, ret));
+	} while (afs_iterate_fs_cursor(&fc, dvnode));
 
 	/* adjust the flags */
-	if (ret == 0) {
-		afs_vnode_finalise_status_update(vnode, server);
-		afs_vnode_finalise_status_update(dvnode, server);
-		afs_put_server(afs_v2net(dvnode), server);
+	if (fc.ac.error == 0) {
+		afs_vnode_finalise_status_update(vnode, fc.server);
+		afs_vnode_finalise_status_update(dvnode, fc.server);
 	} else {
-		afs_vnode_status_update_failed(vnode, ret);
-		afs_vnode_status_update_failed(dvnode, ret);
+		afs_vnode_status_update_failed(&fc, vnode);
+		afs_vnode_status_update_failed(&fc, dvnode);
 	}
 
-	_leave(" = %d [cnt %d]", ret, vnode->update_cnt);
-	return ret;
+out:
+	return afs_end_fs_cursor(&fc, afs_v2net(vnode));
 
 no_server:
 	spin_lock(&vnode->lock);
@@ -444,8 +412,7 @@ no_server:
 	dvnode->update_cnt--;
 	ASSERTCMP(dvnode->update_cnt, >=, 0);
 	spin_unlock(&dvnode->lock);
-	_leave(" = %ld [cnt %d]", PTR_ERR(server), vnode->update_cnt);
-	return PTR_ERR(server);
+	goto out;
 }
 
 /*
@@ -457,8 +424,7 @@ int afs_vnode_symlink(struct afs_vnode *vnode, struct key *key,
 		      struct afs_file_status *newstatus,
 		      struct afs_server **_server)
 {
-	struct afs_server *server;
-	int ret;
+	struct afs_fs_cursor fc;
 
 	_enter("%s{%x:%u.%u},%x,%s,%s,,,",
 	       vnode->volume->vlocation->vldb.name,
@@ -473,38 +439,37 @@ int afs_vnode_symlink(struct afs_vnode *vnode, struct key *key,
 	vnode->update_cnt++;
 	spin_unlock(&vnode->lock);
 
+	afs_init_fs_cursor(&fc, vnode);
 	do {
 		/* pick a server to query */
-		server = afs_volume_pick_fileserver(vnode);
-		if (IS_ERR(server))
+		if (!afs_volume_pick_fileserver(&fc, vnode))
 			goto no_server;
 
-		_debug("USING SERVER: %pIS\n", &server->addr.transport);
+		fc.ac.error = afs_fs_symlink(&fc, key, vnode, name, content,
+					     newfid, newstatus, false);
 
-		ret = afs_fs_symlink(server, key, vnode, name, content,
-				     newfid, newstatus, false);
-
-	} while (!afs_volume_release_fileserver(vnode, server, ret));
+	} while (afs_iterate_fs_cursor(&fc, vnode));
 
 	/* adjust the flags */
-	if (ret == 0) {
-		afs_vnode_finalise_status_update(vnode, server);
-		*_server = server;
+	if (fc.ac.error == 0) {
+		afs_vnode_finalise_status_update(vnode, fc.server);
+		*_server = fc.server;
+		fc.server = NULL;
 	} else {
-		afs_vnode_status_update_failed(vnode, ret);
+		afs_vnode_status_update_failed(&fc, vnode);
 		*_server = NULL;
 	}
 
-	_leave(" = %d [cnt %d]", ret, vnode->update_cnt);
-	return ret;
+out:
+	return afs_end_fs_cursor(&fc, afs_v2net(vnode));
 
 no_server:
 	spin_lock(&vnode->lock);
 	vnode->update_cnt--;
 	ASSERTCMP(vnode->update_cnt, >=, 0);
 	spin_unlock(&vnode->lock);
-	_leave(" = %ld [cnt %d]", PTR_ERR(server), vnode->update_cnt);
-	return PTR_ERR(server);
+	*_server = NULL;
+	goto out;
 }
 
 /*
@@ -516,8 +481,7 @@ int afs_vnode_rename(struct afs_vnode *orig_dvnode,
 		     const char *orig_name,
 		     const char *new_name)
 {
-	struct afs_server *server;
-	int ret;
+	struct afs_fs_cursor fc;
 
 	_enter("%s{%x:%u.%u},%s{%u,%u,%u},%x,%s,%s",
 	       orig_dvnode->volume->vlocation->vldb.name,
@@ -543,33 +507,30 @@ int afs_vnode_rename(struct afs_vnode *orig_dvnode,
 		spin_unlock(&new_dvnode->lock);
 	}
 
+	afs_init_fs_cursor(&fc, orig_dvnode);
 	do {
 		/* pick a server to query */
-		server = afs_volume_pick_fileserver(orig_dvnode);
-		if (IS_ERR(server))
+		if (!afs_volume_pick_fileserver(&fc, orig_dvnode))
 			goto no_server;
 
-		_debug("USING SERVER: %pIS\n", &server->addr.transport);
-
-		ret = afs_fs_rename(server, key, orig_dvnode, orig_name,
-				    new_dvnode, new_name, false);
+		fc.ac.error = afs_fs_rename(&fc, key, orig_dvnode, orig_name,
+					    new_dvnode, new_name, false);
 
-	} while (!afs_volume_release_fileserver(orig_dvnode, server, ret));
+	} while (afs_iterate_fs_cursor(&fc, orig_dvnode));
 
 	/* adjust the flags */
-	if (ret == 0) {
-		afs_vnode_finalise_status_update(orig_dvnode, server);
+	if (fc.ac.error == 0) {
+		afs_vnode_finalise_status_update(orig_dvnode, fc.server);
 		if (new_dvnode != orig_dvnode)
-			afs_vnode_finalise_status_update(new_dvnode, server);
-		afs_put_server(afs_v2net(orig_dvnode), server);
+			afs_vnode_finalise_status_update(new_dvnode, fc.server);
 	} else {
-		afs_vnode_status_update_failed(orig_dvnode, ret);
+		afs_vnode_status_update_failed(&fc, orig_dvnode);
 		if (new_dvnode != orig_dvnode)
-			afs_vnode_status_update_failed(new_dvnode, ret);
+			afs_vnode_status_update_failed(&fc, new_dvnode);
 	}
 
-	_leave(" = %d [cnt %d]", ret, orig_dvnode->update_cnt);
-	return ret;
+out:
+	return afs_end_fs_cursor(&fc, afs_v2net(orig_dvnode));
 
 no_server:
 	spin_lock(&orig_dvnode->lock);
@@ -582,8 +543,7 @@ no_server:
 		ASSERTCMP(new_dvnode->update_cnt, >=, 0);
 		spin_unlock(&new_dvnode->lock);
 	}
-	_leave(" = %ld [cnt %d]", PTR_ERR(server), orig_dvnode->update_cnt);
-	return PTR_ERR(server);
+	goto out;
 }
 
 /*
@@ -592,9 +552,8 @@ no_server:
 int afs_vnode_store_data(struct afs_writeback *wb, pgoff_t first, pgoff_t last,
 			 unsigned offset, unsigned to)
 {
-	struct afs_server *server;
+	struct afs_fs_cursor fc;
 	struct afs_vnode *vnode = wb->vnode;
-	int ret;
 
 	_enter("%s{%x:%u.%u},%x,%lx,%lx,%x,%x",
 	       vnode->volume->vlocation->vldb.name,
@@ -609,36 +568,33 @@ int afs_vnode_store_data(struct afs_writeback *wb, pgoff_t first, pgoff_t last,
 	vnode->update_cnt++;
 	spin_unlock(&vnode->lock);
 
+	afs_init_fs_cursor(&fc, vnode);
 	do {
 		/* pick a server to query */
-		server = afs_volume_pick_fileserver(vnode);
-		if (IS_ERR(server))
+		if (!afs_volume_pick_fileserver(&fc, vnode))
 			goto no_server;
 
-		_debug("USING SERVER: %pIS\n", &server->addr.transport);
-
-		ret = afs_fs_store_data(server, wb, first, last, offset, to,
-					false);
+		fc.ac.error = afs_fs_store_data(&fc, wb, first, last, offset, to,
+						false);
 
-	} while (!afs_volume_release_fileserver(vnode, server, ret));
+	} while (afs_iterate_fs_cursor(&fc, vnode));
 
 	/* adjust the flags */
-	if (ret == 0) {
-		afs_vnode_finalise_status_update(vnode, server);
-		afs_put_server(afs_v2net(vnode), server);
+	if (fc.ac.error == 0) {
+		afs_vnode_finalise_status_update(vnode, fc.server);
 	} else {
-		afs_vnode_status_update_failed(vnode, ret);
+		afs_vnode_status_update_failed(&fc, vnode);
 	}
 
-	_leave(" = %d", ret);
-	return ret;
+out:
+	return afs_end_fs_cursor(&fc, afs_v2net(vnode));
 
 no_server:
 	spin_lock(&vnode->lock);
 	vnode->update_cnt--;
 	ASSERTCMP(vnode->update_cnt, >=, 0);
 	spin_unlock(&vnode->lock);
-	return PTR_ERR(server);
+	goto out;
 }
 
 /*
@@ -647,8 +603,7 @@ no_server:
 int afs_vnode_setattr(struct afs_vnode *vnode, struct key *key,
 		      struct iattr *attr)
 {
-	struct afs_server *server;
-	int ret;
+	struct afs_fs_cursor fc;
 
 	_enter("%s{%x:%u.%u},%x",
 	       vnode->volume->vlocation->vldb.name,
@@ -662,35 +617,32 @@ int afs_vnode_setattr(struct afs_vnode *vnode, struct key *key,
 	vnode->update_cnt++;
 	spin_unlock(&vnode->lock);
 
+	afs_init_fs_cursor(&fc, vnode);
 	do {
 		/* pick a server to query */
-		server = afs_volume_pick_fileserver(vnode);
-		if (IS_ERR(server))
+		if (!afs_volume_pick_fileserver(&fc, vnode))
 			goto no_server;
 
-		_debug("USING SERVER: %pIS\n", &server->addr.transport);
-
-		ret = afs_fs_setattr(server, key, vnode, attr, false);
+		fc.ac.error = afs_fs_setattr(&fc, key, vnode, attr, false);
 
-	} while (!afs_volume_release_fileserver(vnode, server, ret));
+	} while (afs_iterate_fs_cursor(&fc, vnode));
 
 	/* adjust the flags */
-	if (ret == 0) {
-		afs_vnode_finalise_status_update(vnode, server);
-		afs_put_server(afs_v2net(vnode), server);
+	if (fc.ac.error == 0) {
+		afs_vnode_finalise_status_update(vnode, fc.server);
 	} else {
-		afs_vnode_status_update_failed(vnode, ret);
+		afs_vnode_status_update_failed(&fc, vnode);
 	}
 
-	_leave(" = %d", ret);
-	return ret;
+out:
+	return afs_end_fs_cursor(&fc, afs_v2net(vnode));
 
 no_server:
 	spin_lock(&vnode->lock);
 	vnode->update_cnt--;
 	ASSERTCMP(vnode->update_cnt, >=, 0);
 	spin_unlock(&vnode->lock);
-	return PTR_ERR(server);
+	goto out;
 }
 
 /*
@@ -699,8 +651,7 @@ no_server:
 int afs_vnode_get_volume_status(struct afs_vnode *vnode, struct key *key,
 				struct afs_volume_status *vs)
 {
-	struct afs_server *server;
-	int ret;
+	struct afs_fs_cursor fc;
 
 	_enter("%s{%x:%u.%u},%x,",
 	       vnode->volume->vlocation->vldb.name,
@@ -709,27 +660,17 @@ int afs_vnode_get_volume_status(struct afs_vnode *vnode, struct key *key,
 	       vnode->fid.unique,
 	       key_serial(key));
 
+	afs_init_fs_cursor(&fc, vnode);
 	do {
 		/* pick a server to query */
-		server = afs_volume_pick_fileserver(vnode);
-		if (IS_ERR(server))
-			goto no_server;
-
-		_debug("USING SERVER: %pIS\n", &server->addr.transport);
+		if (!afs_volume_pick_fileserver(&fc, vnode))
+			break;
 
-		ret = afs_fs_get_volume_status(server, key, vnode, vs, false);
+		fc.ac.error = afs_fs_get_volume_status(&fc, key, vnode, vs, false);
 
-	} while (!afs_volume_release_fileserver(vnode, server, ret));
+	} while (afs_iterate_fs_cursor(&fc, vnode));
 
-	/* adjust the flags */
-	if (ret == 0)
-		afs_put_server(afs_v2net(vnode), server);
-
-	_leave(" = %d", ret);
-	return ret;
-
-no_server:
-	return PTR_ERR(server);
+	return afs_end_fs_cursor(&fc, afs_v2net(vnode));
 }
 
 /*
@@ -738,8 +679,7 @@ no_server:
 int afs_vnode_set_lock(struct afs_vnode *vnode, struct key *key,
 		       afs_lock_type_t type)
 {
-	struct afs_server *server;
-	int ret;
+	struct afs_fs_cursor fc;
 
 	_enter("%s{%x:%u.%u},%x,%u",
 	       vnode->volume->vlocation->vldb.name,
@@ -748,27 +688,17 @@ int afs_vnode_set_lock(struct afs_vnode *vnode, struct key *key,
 	       vnode->fid.unique,
 	       key_serial(key), type);
 
+	afs_init_fs_cursor(&fc, vnode);
 	do {
 		/* pick a server to query */
-		server = afs_volume_pick_fileserver(vnode);
-		if (IS_ERR(server))
-			goto no_server;
+		if (!afs_volume_pick_fileserver(&fc, vnode))
+			break;
 
-		_debug("USING SERVER: %pIS\n", &server->addr.transport);
+		fc.ac.error = afs_fs_set_lock(&fc, key, vnode, type, false);
 
-		ret = afs_fs_set_lock(server, key, vnode, type, false);
+	} while (afs_iterate_fs_cursor(&fc, vnode));
 
-	} while (!afs_volume_release_fileserver(vnode, server, ret));
-
-	/* adjust the flags */
-	if (ret == 0)
-		afs_put_server(afs_v2net(vnode), server);
-
-	_leave(" = %d", ret);
-	return ret;
-
-no_server:
-	return PTR_ERR(server);
+	return afs_end_fs_cursor(&fc, afs_v2net(vnode));
 }
 
 /*
@@ -776,7 +706,7 @@ no_server:
  */
 int afs_vnode_extend_lock(struct afs_vnode *vnode, struct key *key)
 {
-	struct afs_server *server;
+	struct afs_fs_cursor fc;
 	int ret;
 
 	_enter("%s{%x:%u.%u},%x",
@@ -786,27 +716,13 @@ int afs_vnode_extend_lock(struct afs_vnode *vnode, struct key *key)
 	       vnode->fid.unique,
 	       key_serial(key));
 
-	do {
-		/* pick a server to query */
-		server = afs_volume_pick_fileserver(vnode);
-		if (IS_ERR(server))
-			goto no_server;
-
-		_debug("USING SERVER: %pIS\n", &server->addr.transport);
+	ret = afs_set_fs_cursor(&fc, vnode);
+	if (ret < 0)
+		return ret;
 
-		ret = afs_fs_extend_lock(server, key, vnode, false);
+	fc.ac.error = afs_fs_extend_lock(&fc, key, vnode, false);
 
-	} while (!afs_volume_release_fileserver(vnode, server, ret));
-
-	/* adjust the flags */
-	if (ret == 0)
-		afs_put_server(afs_v2net(vnode), server);
-
-	_leave(" = %d", ret);
-	return ret;
-
-no_server:
-	return PTR_ERR(server);
+	return afs_end_fs_cursor(&fc, afs_v2net(vnode));
 }
 
 /*
@@ -814,7 +730,7 @@ no_server:
  */
 int afs_vnode_release_lock(struct afs_vnode *vnode, struct key *key)
 {
-	struct afs_server *server;
+	struct afs_fs_cursor fc;
 	int ret;
 
 	_enter("%s{%x:%u.%u},%x",
@@ -824,25 +740,11 @@ int afs_vnode_release_lock(struct afs_vnode *vnode, struct key *key)
 	       vnode->fid.unique,
 	       key_serial(key));
 
-	do {
-		/* pick a server to query */
-		server = afs_volume_pick_fileserver(vnode);
-		if (IS_ERR(server))
-			goto no_server;
-
-		_debug("USING SERVER: %pIS\n", &server->addr.transport);
+	ret = afs_set_fs_cursor(&fc, vnode);
+	if (ret < 0)
+		return ret;
 
-		ret = afs_fs_release_lock(server, key, vnode, false);
+	fc.ac.error = afs_fs_release_lock(&fc, key, vnode, false);
 
-	} while (!afs_volume_release_fileserver(vnode, server, ret));
-
-	/* adjust the flags */
-	if (ret == 0)
-		afs_put_server(afs_v2net(vnode), server);
-
-	_leave(" = %d", ret);
-	return ret;
-
-no_server:
-	return PTR_ERR(server);
+	return afs_end_fs_cursor(&fc, afs_v2net(vnode));
 }
diff --git a/fs/afs/volume.c b/fs/afs/volume.c
index 4f6fd10094c6..d282cd0ff268 100644
--- a/fs/afs/volume.c
+++ b/fs/afs/volume.c
@@ -209,11 +209,45 @@ void afs_put_volume(struct afs_cell *cell, struct afs_volume *volume)
 	_leave(" [destroyed]");
 }
 
+/*
+ * Initialise a filesystem server cursor for iterating over FS servers.
+ */
+void afs_init_fs_cursor(struct afs_fs_cursor *fc, struct afs_vnode *vnode)
+{
+	fc->ac.alist = NULL;
+	fc->ac.addr = NULL;
+	fc->ac.start = 0;
+	fc->ac.index = 0;
+	fc->ac.error = 0;
+	fc->server = NULL;
+}
+
+/*
+ * Set a filesystem server cursor for using a specific FS server.
+ */
+int afs_set_fs_cursor(struct afs_fs_cursor *fc, struct afs_vnode *vnode)
+{
+	afs_init_fs_cursor(fc, vnode);
+
+	read_seqlock_excl(&vnode->cb_lock);
+	if (vnode->cb_interest) {
+		if (vnode->cb_interest->server->fs_state == 0)
+			fc->server = afs_get_server(vnode->cb_interest->server);
+		else
+			fc->ac.error = vnode->cb_interest->server->fs_state;
+	} else {
+		fc->ac.error = -ESTALE;
+	}
+	read_sequnlock_excl(&vnode->cb_lock);
+
+	return fc->ac.error;
+}
+
 /*
  * pick a server to use to try accessing this volume
  * - returns with an elevated usage count on the server chosen
  */
-struct afs_server *afs_volume_pick_fileserver(struct afs_vnode *vnode)
+bool afs_volume_pick_fileserver(struct afs_fs_cursor *fc, struct afs_vnode *vnode)
 {
 	struct afs_volume *volume = vnode->volume;
 	struct afs_server *server;
@@ -223,19 +257,18 @@ struct afs_server *afs_volume_pick_fileserver(struct afs_vnode *vnode)
 
 	/* stick with the server we're already using if we can */
 	if (vnode->cb_interest && vnode->cb_interest->server->fs_state == 0) {
-		afs_get_server(vnode->cb_interest->server);
-		_leave(" = %p [current]", vnode->cb_interest->server);
-		return vnode->cb_interest->server;
+		fc->server = afs_get_server(vnode->cb_interest->server);
+		goto set_server;
 	}
 
 	down_read(&volume->server_sem);
 
 	/* handle the no-server case */
 	if (volume->nservers == 0) {
-		ret = volume->rjservers ? -ENOMEDIUM : -ESTALE;
+		fc->ac.error = volume->rjservers ? -ENOMEDIUM : -ESTALE;
 		up_read(&volume->server_sem);
-		_leave(" = %d [no servers]", ret);
-		return ERR_PTR(ret);
+		_leave(" = f [no servers %d]", fc->ac.error);
+		return false;
 	}
 
 	/* basically, just search the list for the first live server and use
@@ -280,13 +313,15 @@ struct afs_server *afs_volume_pick_fileserver(struct afs_vnode *vnode)
 		}
 	}
 
+error:
+	fc->ac.error = ret;
+
 	/* no available servers
 	 * - TODO: handle the no active servers case better
 	 */
-error:
 	up_read(&volume->server_sem);
-	_leave(" = %d", ret);
-	return ERR_PTR(ret);
+	_leave(" = f [%d]", fc->ac.error);
+	return false;
 
 picked_server:
 	/* Found an apparently healthy server.  We need to register an interest
@@ -296,37 +331,41 @@ picked_server:
 					      &volume->cb_interests[loop], server);
 	if (ret < 0)
 		goto error;
-	
-	afs_get_server(server);
+
+	fc->server = afs_get_server(server);
 	up_read(&volume->server_sem);
-	_leave(" = %p (picked %pIS)",
-	       server, &server->addr.transport);
-	return server;
+set_server:
+	fc->ac.alist = afs_get_addrlist(fc->server->addrs);
+	fc->ac.addr = &fc->ac.alist->addrs[0];
+	_debug("USING SERVER: %pIS\n", &fc->ac.addr->transport);
+	_leave(" = t (picked %pIS)", &fc->ac.addr->transport);
+	return true;
 }
 
 /*
  * release a server after use
  * - releases the ref on the server struct that was acquired by picking
  * - records result of using a particular server to access a volume
- * - return 0 to try again, 1 if okay or to issue error
- * - the caller must release the server struct if result was 0
+ * - return true to try again, false if okay or to issue error
+ * - the caller must release the server struct if result was false
  */
-int afs_volume_release_fileserver(struct afs_vnode *vnode,
-				  struct afs_server *server,
-				  int result)
+bool afs_iterate_fs_cursor(struct afs_fs_cursor *fc,
+			   struct afs_vnode *vnode)
 {
 	struct afs_volume *volume = vnode->volume;
+	struct afs_server *server = fc->server;
 	unsigned loop;
 
 	_enter("%s,%pIS,%d",
-	       volume->vlocation->vldb.name, &server->addr.transport, result);
+	       volume->vlocation->vldb.name, &fc->ac.addr->transport,
+	       fc->ac.error);
 
-	switch (result) {
+	switch (fc->ac.error) {
 		/* success */
 	case 0:
 		server->fs_state = 0;
-		_leave("");
-		return 1;
+		_leave(" = f");
+		return false;
 
 		/* the fileserver denied all knowledge of the volume */
 	case -ENOMEDIUM:
@@ -363,8 +402,9 @@ int afs_volume_release_fileserver(struct afs_vnode *vnode,
 		 */
 		up_write(&volume->server_sem);
 		afs_put_server(afs_v2net(vnode), server);
-		_leave(" [completely rejected]");
-		return 1;
+		fc->server = NULL;
+		_leave(" = f [completely rejected]");
+		return false;
 
 		/* problem reaching the server */
 	case -ENETUNREACH:
@@ -378,8 +418,8 @@ int afs_volume_release_fileserver(struct afs_vnode *vnode,
 		 */
 		spin_lock(&server->fs_lock);
 		if (!server->fs_state) {
-			server->fs_state = result;
-			printk("kAFS: SERVER DEAD state=%d\n", result);
+			server->fs_state = fc->ac.error;
+			printk("kAFS: SERVER DEAD state=%d\n", fc->ac.error);
 		}
 		spin_unlock(&server->fs_lock);
 		goto try_next_server;
@@ -390,8 +430,9 @@ int afs_volume_release_fileserver(struct afs_vnode *vnode,
 	case -ENONET:
 		/* tell the caller to accept the result */
 		afs_put_server(afs_v2net(vnode), server);
-		_leave(" [local failure]");
-		return 1;
+		fc->server = NULL;
+		_leave(" = f [local failure]");
+		return false;
 	}
 
 	/* tell the caller to loop around and try the next server */
@@ -399,6 +440,16 @@ try_next_server_upw:
 	up_write(&volume->server_sem);
 try_next_server:
 	afs_put_server(afs_v2net(vnode), server);
-	_leave(" [try next server]");
-	return 0;
+	_leave(" = t [try next server]");
+	return true;
+}
+
+/*
+ * Clean up a fileserver cursor.
+ */
+int afs_end_fs_cursor(struct afs_fs_cursor *fc, struct afs_net *net)
+{
+	afs_end_cursor(&fc->ac);
+	afs_put_server(net, fc->server);
+	return fc->ac.error;
 }
-- 
cgit v1.2.3


From 9cc6fc50f7bc69ac28bee45eed13cbc65a86210f Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Thu, 2 Nov 2017 15:27:50 +0000
Subject: afs: Move server rotation code into its own file

Move server rotation code into its own file.

Signed-off-by: David Howells <dhowells@redhat.com>
---
 fs/afs/Makefile |   1 +
 fs/afs/rotate.c | 254 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 fs/afs/volume.c | 250 -------------------------------------------------------
 3 files changed, 255 insertions(+), 250 deletions(-)
 create mode 100644 fs/afs/rotate.c

(limited to 'fs/afs')

diff --git a/fs/afs/Makefile b/fs/afs/Makefile
index 849383986d3b..192d476d7c76 100644
--- a/fs/afs/Makefile
+++ b/fs/afs/Makefile
@@ -20,6 +20,7 @@ kafs-objs := \
 	misc.o \
 	mntpt.o \
 	proc.o \
+	rotate.o \
 	rxrpc.o \
 	security.o \
 	server.o \
diff --git a/fs/afs/rotate.c b/fs/afs/rotate.c
new file mode 100644
index 000000000000..c7975b3ba59a
--- /dev/null
+++ b/fs/afs/rotate.c
@@ -0,0 +1,254 @@
+/* Handle fileserver selection and rotation.
+ *
+ * Copyright (C) 2017 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public Licence
+ * as published by the Free Software Foundation; either version
+ * 2 of the Licence, or (at your option) any later version.
+ */
+
+#include <linux/kernel.h>
+#include <linux/slab.h>
+#include "internal.h"
+
+/*
+ * Initialise a filesystem server cursor for iterating over FS servers.
+ */
+void afs_init_fs_cursor(struct afs_fs_cursor *fc, struct afs_vnode *vnode)
+{
+	memset(fc, 0, sizeof(*fc));
+}
+
+/*
+ * Set a filesystem server cursor for using a specific FS server.
+ */
+int afs_set_fs_cursor(struct afs_fs_cursor *fc, struct afs_vnode *vnode)
+{
+	afs_init_fs_cursor(fc, vnode);
+
+	read_seqlock_excl(&vnode->cb_lock);
+	if (vnode->cb_interest) {
+		if (vnode->cb_interest->server->fs_state == 0)
+			fc->server = afs_get_server(vnode->cb_interest->server);
+		else
+			fc->ac.error = vnode->cb_interest->server->fs_state;
+	} else {
+		fc->ac.error = -ESTALE;
+	}
+	read_sequnlock_excl(&vnode->cb_lock);
+
+	return fc->ac.error;
+}
+
+/*
+ * pick a server to use to try accessing this volume
+ * - returns with an elevated usage count on the server chosen
+ */
+bool afs_volume_pick_fileserver(struct afs_fs_cursor *fc, struct afs_vnode *vnode)
+{
+	struct afs_volume *volume = vnode->volume;
+	struct afs_server *server;
+	int ret, state, loop;
+
+	_enter("%s", volume->vlocation->vldb.name);
+
+	/* stick with the server we're already using if we can */
+	if (vnode->cb_interest && vnode->cb_interest->server->fs_state == 0) {
+		fc->server = afs_get_server(vnode->cb_interest->server);
+		goto set_server;
+	}
+
+	down_read(&volume->server_sem);
+
+	/* handle the no-server case */
+	if (volume->nservers == 0) {
+		fc->ac.error = volume->rjservers ? -ENOMEDIUM : -ESTALE;
+		up_read(&volume->server_sem);
+		_leave(" = f [no servers %d]", fc->ac.error);
+		return false;
+	}
+
+	/* basically, just search the list for the first live server and use
+	 * that */
+	ret = 0;
+	for (loop = 0; loop < volume->nservers; loop++) {
+		server = volume->servers[loop];
+		state = server->fs_state;
+
+		_debug("consider %d [%d]", loop, state);
+
+		switch (state) {
+		case 0:
+			goto picked_server;
+
+		case -ENETUNREACH:
+			if (ret == 0)
+				ret = state;
+			break;
+
+		case -EHOSTUNREACH:
+			if (ret == 0 ||
+			    ret == -ENETUNREACH)
+				ret = state;
+			break;
+
+		case -ECONNREFUSED:
+			if (ret == 0 ||
+			    ret == -ENETUNREACH ||
+			    ret == -EHOSTUNREACH)
+				ret = state;
+			break;
+
+		default:
+		case -EREMOTEIO:
+			if (ret == 0 ||
+			    ret == -ENETUNREACH ||
+			    ret == -EHOSTUNREACH ||
+			    ret == -ECONNREFUSED)
+				ret = state;
+			break;
+		}
+	}
+
+error:
+	fc->ac.error = ret;
+
+	/* no available servers
+	 * - TODO: handle the no active servers case better
+	 */
+	up_read(&volume->server_sem);
+	_leave(" = f [%d]", fc->ac.error);
+	return false;
+
+picked_server:
+	/* Found an apparently healthy server.  We need to register an interest
+	 * in receiving callbacks before we talk to it.
+	 */
+	ret = afs_register_server_cb_interest(vnode,
+					      &volume->cb_interests[loop], server);
+	if (ret < 0)
+		goto error;
+
+	fc->server = afs_get_server(server);
+	up_read(&volume->server_sem);
+set_server:
+	fc->ac.alist = afs_get_addrlist(fc->server->addrs);
+	fc->ac.addr = &fc->ac.alist->addrs[0];
+	_debug("USING SERVER: %pIS\n", &fc->ac.addr->transport);
+	_leave(" = t (picked %pIS)", &fc->ac.addr->transport);
+	return true;
+}
+
+/*
+ * release a server after use
+ * - releases the ref on the server struct that was acquired by picking
+ * - records result of using a particular server to access a volume
+ * - return true to try again, false if okay or to issue error
+ * - the caller must release the server struct if result was false
+ */
+bool afs_iterate_fs_cursor(struct afs_fs_cursor *fc,
+			   struct afs_vnode *vnode)
+{
+	struct afs_volume *volume = vnode->volume;
+	struct afs_server *server = fc->server;
+	unsigned loop;
+
+	_enter("%s,%pIS,%d",
+	       volume->vlocation->vldb.name, &fc->ac.addr->transport,
+	       fc->ac.error);
+
+	switch (fc->ac.error) {
+		/* success */
+	case 0:
+		server->fs_state = 0;
+		_leave(" = f");
+		return false;
+
+		/* the fileserver denied all knowledge of the volume */
+	case -ENOMEDIUM:
+		down_write(&volume->server_sem);
+
+		/* firstly, find where the server is in the active list (if it
+		 * is) */
+		for (loop = 0; loop < volume->nservers; loop++)
+			if (volume->servers[loop] == server)
+				goto present;
+
+		/* no longer there - may have been discarded by another op */
+		goto try_next_server_upw;
+
+	present:
+		volume->nservers--;
+		memmove(&volume->servers[loop],
+			&volume->servers[loop + 1],
+			sizeof(volume->servers[loop]) *
+			(volume->nservers - loop));
+		volume->servers[volume->nservers] = NULL;
+		afs_put_server(afs_v2net(vnode), server);
+		volume->rjservers++;
+
+		if (volume->nservers > 0)
+			/* another server might acknowledge its existence */
+			goto try_next_server_upw;
+
+		/* handle the case where all the fileservers have rejected the
+		 * volume
+		 * - TODO: try asking the fileservers for volume information
+		 * - TODO: contact the VL server again to see if the volume is
+		 *         no longer registered
+		 */
+		up_write(&volume->server_sem);
+		afs_put_server(afs_v2net(vnode), server);
+		fc->server = NULL;
+		_leave(" = f [completely rejected]");
+		return false;
+
+		/* problem reaching the server */
+	case -ENETUNREACH:
+	case -EHOSTUNREACH:
+	case -ECONNREFUSED:
+	case -ETIME:
+	case -ETIMEDOUT:
+	case -EREMOTEIO:
+		/* mark the server as dead
+		 * TODO: vary dead timeout depending on error
+		 */
+		spin_lock(&server->fs_lock);
+		if (!server->fs_state) {
+			server->fs_state = fc->ac.error;
+			printk("kAFS: SERVER DEAD state=%d\n", fc->ac.error);
+		}
+		spin_unlock(&server->fs_lock);
+		goto try_next_server;
+
+		/* miscellaneous error */
+	default:
+	case -ENOMEM:
+	case -ENONET:
+		/* tell the caller to accept the result */
+		afs_put_server(afs_v2net(vnode), server);
+		fc->server = NULL;
+		_leave(" = f [local failure]");
+		return false;
+	}
+
+	/* tell the caller to loop around and try the next server */
+try_next_server_upw:
+	up_write(&volume->server_sem);
+try_next_server:
+	afs_put_server(afs_v2net(vnode), server);
+	_leave(" = t [try next server]");
+	return true;
+}
+
+/*
+ * Clean up a fileserver cursor.
+ */
+int afs_end_fs_cursor(struct afs_fs_cursor *fc, struct afs_net *net)
+{
+	afs_end_cursor(&fc->ac);
+	afs_put_server(net, fc->server);
+	return fc->ac.error;
+}
diff --git a/fs/afs/volume.c b/fs/afs/volume.c
index d282cd0ff268..3c5ad1cc50f3 100644
--- a/fs/afs/volume.c
+++ b/fs/afs/volume.c
@@ -10,12 +10,7 @@
  */
 
 #include <linux/kernel.h>
-#include <linux/module.h>
-#include <linux/init.h>
 #include <linux/slab.h>
-#include <linux/fs.h>
-#include <linux/pagemap.h>
-#include <linux/sched.h>
 #include "internal.h"
 
 static const char *afs_voltypes[] = { "R/W", "R/O", "BAK" };
@@ -208,248 +203,3 @@ void afs_put_volume(struct afs_cell *cell, struct afs_volume *volume)
 
 	_leave(" [destroyed]");
 }
-
-/*
- * Initialise a filesystem server cursor for iterating over FS servers.
- */
-void afs_init_fs_cursor(struct afs_fs_cursor *fc, struct afs_vnode *vnode)
-{
-	fc->ac.alist = NULL;
-	fc->ac.addr = NULL;
-	fc->ac.start = 0;
-	fc->ac.index = 0;
-	fc->ac.error = 0;
-	fc->server = NULL;
-}
-
-/*
- * Set a filesystem server cursor for using a specific FS server.
- */
-int afs_set_fs_cursor(struct afs_fs_cursor *fc, struct afs_vnode *vnode)
-{
-	afs_init_fs_cursor(fc, vnode);
-
-	read_seqlock_excl(&vnode->cb_lock);
-	if (vnode->cb_interest) {
-		if (vnode->cb_interest->server->fs_state == 0)
-			fc->server = afs_get_server(vnode->cb_interest->server);
-		else
-			fc->ac.error = vnode->cb_interest->server->fs_state;
-	} else {
-		fc->ac.error = -ESTALE;
-	}
-	read_sequnlock_excl(&vnode->cb_lock);
-
-	return fc->ac.error;
-}
-
-/*
- * pick a server to use to try accessing this volume
- * - returns with an elevated usage count on the server chosen
- */
-bool afs_volume_pick_fileserver(struct afs_fs_cursor *fc, struct afs_vnode *vnode)
-{
-	struct afs_volume *volume = vnode->volume;
-	struct afs_server *server;
-	int ret, state, loop;
-
-	_enter("%s", volume->vlocation->vldb.name);
-
-	/* stick with the server we're already using if we can */
-	if (vnode->cb_interest && vnode->cb_interest->server->fs_state == 0) {
-		fc->server = afs_get_server(vnode->cb_interest->server);
-		goto set_server;
-	}
-
-	down_read(&volume->server_sem);
-
-	/* handle the no-server case */
-	if (volume->nservers == 0) {
-		fc->ac.error = volume->rjservers ? -ENOMEDIUM : -ESTALE;
-		up_read(&volume->server_sem);
-		_leave(" = f [no servers %d]", fc->ac.error);
-		return false;
-	}
-
-	/* basically, just search the list for the first live server and use
-	 * that */
-	ret = 0;
-	for (loop = 0; loop < volume->nservers; loop++) {
-		server = volume->servers[loop];
-		state = server->fs_state;
-
-		_debug("consider %d [%d]", loop, state);
-
-		switch (state) {
-		case 0:
-			goto picked_server;
-
-		case -ENETUNREACH:
-			if (ret == 0)
-				ret = state;
-			break;
-
-		case -EHOSTUNREACH:
-			if (ret == 0 ||
-			    ret == -ENETUNREACH)
-				ret = state;
-			break;
-
-		case -ECONNREFUSED:
-			if (ret == 0 ||
-			    ret == -ENETUNREACH ||
-			    ret == -EHOSTUNREACH)
-				ret = state;
-			break;
-
-		default:
-		case -EREMOTEIO:
-			if (ret == 0 ||
-			    ret == -ENETUNREACH ||
-			    ret == -EHOSTUNREACH ||
-			    ret == -ECONNREFUSED)
-				ret = state;
-			break;
-		}
-	}
-
-error:
-	fc->ac.error = ret;
-
-	/* no available servers
-	 * - TODO: handle the no active servers case better
-	 */
-	up_read(&volume->server_sem);
-	_leave(" = f [%d]", fc->ac.error);
-	return false;
-
-picked_server:
-	/* Found an apparently healthy server.  We need to register an interest
-	 * in receiving callbacks before we talk to it.
-	 */
-	ret = afs_register_server_cb_interest(vnode,
-					      &volume->cb_interests[loop], server);
-	if (ret < 0)
-		goto error;
-
-	fc->server = afs_get_server(server);
-	up_read(&volume->server_sem);
-set_server:
-	fc->ac.alist = afs_get_addrlist(fc->server->addrs);
-	fc->ac.addr = &fc->ac.alist->addrs[0];
-	_debug("USING SERVER: %pIS\n", &fc->ac.addr->transport);
-	_leave(" = t (picked %pIS)", &fc->ac.addr->transport);
-	return true;
-}
-
-/*
- * release a server after use
- * - releases the ref on the server struct that was acquired by picking
- * - records result of using a particular server to access a volume
- * - return true to try again, false if okay or to issue error
- * - the caller must release the server struct if result was false
- */
-bool afs_iterate_fs_cursor(struct afs_fs_cursor *fc,
-			   struct afs_vnode *vnode)
-{
-	struct afs_volume *volume = vnode->volume;
-	struct afs_server *server = fc->server;
-	unsigned loop;
-
-	_enter("%s,%pIS,%d",
-	       volume->vlocation->vldb.name, &fc->ac.addr->transport,
-	       fc->ac.error);
-
-	switch (fc->ac.error) {
-		/* success */
-	case 0:
-		server->fs_state = 0;
-		_leave(" = f");
-		return false;
-
-		/* the fileserver denied all knowledge of the volume */
-	case -ENOMEDIUM:
-		down_write(&volume->server_sem);
-
-		/* firstly, find where the server is in the active list (if it
-		 * is) */
-		for (loop = 0; loop < volume->nservers; loop++)
-			if (volume->servers[loop] == server)
-				goto present;
-
-		/* no longer there - may have been discarded by another op */
-		goto try_next_server_upw;
-
-	present:
-		volume->nservers--;
-		memmove(&volume->servers[loop],
-			&volume->servers[loop + 1],
-			sizeof(volume->servers[loop]) *
-			(volume->nservers - loop));
-		volume->servers[volume->nservers] = NULL;
-		afs_put_server(afs_v2net(vnode), server);
-		volume->rjservers++;
-
-		if (volume->nservers > 0)
-			/* another server might acknowledge its existence */
-			goto try_next_server_upw;
-
-		/* handle the case where all the fileservers have rejected the
-		 * volume
-		 * - TODO: try asking the fileservers for volume information
-		 * - TODO: contact the VL server again to see if the volume is
-		 *         no longer registered
-		 */
-		up_write(&volume->server_sem);
-		afs_put_server(afs_v2net(vnode), server);
-		fc->server = NULL;
-		_leave(" = f [completely rejected]");
-		return false;
-
-		/* problem reaching the server */
-	case -ENETUNREACH:
-	case -EHOSTUNREACH:
-	case -ECONNREFUSED:
-	case -ETIME:
-	case -ETIMEDOUT:
-	case -EREMOTEIO:
-		/* mark the server as dead
-		 * TODO: vary dead timeout depending on error
-		 */
-		spin_lock(&server->fs_lock);
-		if (!server->fs_state) {
-			server->fs_state = fc->ac.error;
-			printk("kAFS: SERVER DEAD state=%d\n", fc->ac.error);
-		}
-		spin_unlock(&server->fs_lock);
-		goto try_next_server;
-
-		/* miscellaneous error */
-	default:
-	case -ENOMEM:
-	case -ENONET:
-		/* tell the caller to accept the result */
-		afs_put_server(afs_v2net(vnode), server);
-		fc->server = NULL;
-		_leave(" = f [local failure]");
-		return false;
-	}
-
-	/* tell the caller to loop around and try the next server */
-try_next_server_upw:
-	up_write(&volume->server_sem);
-try_next_server:
-	afs_put_server(afs_v2net(vnode), server);
-	_leave(" = t [try next server]");
-	return true;
-}
-
-/*
- * Clean up a fileserver cursor.
- */
-int afs_end_fs_cursor(struct afs_fs_cursor *fc, struct afs_net *net)
-{
-	afs_end_cursor(&fc->ac);
-	afs_put_server(net, fc->server);
-	return fc->ac.error;
-}
-- 
cgit v1.2.3


From d2ddc776a4581d900fc3bdc7803b403daae64d88 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Thu, 2 Nov 2017 15:27:50 +0000
Subject: afs: Overhaul volume and server record caching and fileserver
 rotation

The current code assumes that volumes and servers are per-cell and are
never shared, but this is not enforced, and, indeed, public cells do exist
that are aliases of each other.  Further, an organisation can, say, set up
a public cell and a private cell with overlapping, but not identical, sets
of servers.  The difference is purely in the database attached to the VL
servers.

The current code will malfunction if it sees a server in two cells as it
assumes global address -> server record mappings and that each server is in
just one cell.

Further, each server may have multiple addresses - and may have addresses
of different families (IPv4 and IPv6, say).

To this end, the following structural changes are made:

 (1) Server record management is overhauled:

     (a) Server records are made independent of cell.  The namespace keeps
     	 track of them, volume records have lists of them and each vnode
     	 has a server on which its callback interest currently resides.

     (b) The cell record no longer keeps a list of servers known to be in
     	 that cell.

     (c) The server records are now kept in a flat list because there's no
     	 single address to sort on.

     (d) Server records are now keyed by their UUID within the namespace.

     (e) The addresses for a server are obtained with the VL.GetAddrsU
     	 rather than with VL.GetEntryByName, using the server's UUID as a
     	 parameter.

     (f) Cached server records are garbage collected after a period of
     	 non-use and are counted out of existence before purging is allowed
     	 to complete.  This protects the work functions against rmmod.

     (g) The servers list is now in /proc/fs/afs/servers.

 (2) Volume record management is overhauled:

     (a) An RCU-replaceable server list is introduced.  This tracks both
     	 servers and their coresponding callback interests.

     (b) The superblock is now keyed on cell record and numeric volume ID.

     (c) The volume record is now tied to the superblock which mounts it,
     	 and is activated when mounted and deactivated when unmounted.
     	 This makes it easier to handle the cache cookie without causing a
     	 double-use in fscache.

     (d) The volume record is loaded from the VLDB using VL.GetEntryByNameU
     	 to get the server UUID list.

     (e) The volume name is updated if it is seen to have changed when the
     	 volume is updated (the update is keyed on the volume ID).

 (3) The vlocation record is got rid of and VLDB records are no longer
     cached.  Sufficient information is stored in the volume record, though
     an update to a volume record is now no longer shared between related
     volumes (volumes come in bundles of three: R/W, R/O and backup).

and the following procedural changes are made:

 (1) The fileserver cursor introduced previously is now fleshed out and
     used to iterate over fileservers and their addresses.

 (2) Volume status is checked during iteration, and the server list is
     replaced if a change is detected.

 (3) Server status is checked during iteration, and the address list is
     replaced if a change is detected.

 (4) The abort code is saved into the address list cursor and -ECONNABORTED
     returned in afs_make_call() if a remote abort happened rather than
     translating the abort into an error message.  This allows actions to
     be taken depending on the abort code more easily.

     (a) If a VMOVED abort is seen then this is handled by rechecking the
     	 volume and restarting the iteration.

     (b) If a VBUSY, VRESTARTING or VSALVAGING abort is seen then this is
         handled by sleeping for a short period and retrying and/or trying
         other servers that might serve that volume.  A message is also
         displayed once until the condition has cleared.

     (c) If a VOFFLINE abort is seen, then this is handled as VBUSY for the
     	 moment.

     (d) If a VNOVOL abort is seen, the volume is rechecked in the VLDB to
     	 see if it has been deleted; if not, the fileserver is probably
     	 indicating that the volume couldn't be attached and needs
     	 salvaging.

     (e) If statfs() sees one of these aborts, it does not sleep, but
     	 rather returns an error, so as not to block the umount program.

 (5) The fileserver iteration functions in vnode.c are now merged into
     their callers and more heavily macroised around the cursor.  vnode.c
     is removed.

 (6) Operations on a particular vnode are serialised on that vnode because
     the server will lock that vnode whilst it operates on it, so a second
     op sent will just have to wait.

 (7) Fileservers are probed with FS.GetCapabilities before being used.
     This is where service upgrade will be done.

 (8) A callback interest on a fileserver is set up before an FS operation
     is performed and passed through to afs_make_call() so that it can be
     set on the vnode if the operation returns a callback.  The callback
     interest is passed through to afs_iget() also so that it can be set
     there too.

In general, record updating is done on an as-needed basis when we try to
access servers, volumes or vnodes rather than offloading it to work items
and special threads.

Notes:

 (1) Pre AFS-3.4 servers are no longer supported, though this can be added
     back if necessary (AFS-3.4 was released in 1998).

 (2) VBUSY is retried forever for the moment at intervals of 1s.

 (3) /proc/fs/afs/<cell>/servers no longer exists.

Signed-off-by: David Howells <dhowells@redhat.com>
---
 fs/afs/Makefile      |   3 +-
 fs/afs/addr_list.c   |  31 +++
 fs/afs/afs_fs.h      |   1 +
 fs/afs/afs_vl.h      |  42 +++
 fs/afs/callback.c    |  20 +-
 fs/afs/cell.c        |   7 +-
 fs/afs/dir.c         | 388 +++++++++++++++-----------
 fs/afs/file.c        |  37 ++-
 fs/afs/flock.c       | 108 +++++++-
 fs/afs/fsclient.c    | 347 ++++++++++++++----------
 fs/afs/inode.c       |  50 +++-
 fs/afs/internal.h    | 453 ++++++++++++++++---------------
 fs/afs/main.c        |  35 +--
 fs/afs/proc.c        | 153 ++++-------
 fs/afs/rotate.c      | 461 +++++++++++++++++++++++++++++++
 fs/afs/rxrpc.c       |  45 ++--
 fs/afs/security.c    |   2 +-
 fs/afs/server.c      | 750 ++++++++++++++++++++++++++++++++++++---------------
 fs/afs/server_list.c | 153 +++++++++++
 fs/afs/super.c       |  92 ++++---
 fs/afs/vlclient.c    | 307 ++++++++++++++-------
 fs/afs/vlocation.c   | 669 ---------------------------------------------
 fs/afs/vnode.c       | 750 ---------------------------------------------------
 fs/afs/volume.c      | 426 ++++++++++++++++++++---------
 fs/afs/write.c       |  38 ++-
 fs/afs/xattr.c       |   2 +-
 26 files changed, 2795 insertions(+), 2575 deletions(-)
 create mode 100644 fs/afs/server_list.c
 delete mode 100644 fs/afs/vlocation.c
 delete mode 100644 fs/afs/vnode.c

(limited to 'fs/afs')

diff --git a/fs/afs/Makefile b/fs/afs/Makefile
index 192d476d7c76..45b7fc405fa6 100644
--- a/fs/afs/Makefile
+++ b/fs/afs/Makefile
@@ -24,11 +24,10 @@ kafs-objs := \
 	rxrpc.o \
 	security.o \
 	server.o \
+	server_list.o \
 	super.o \
 	netdevices.o \
 	vlclient.o \
-	vlocation.o \
-	vnode.o \
 	volume.o \
 	write.o \
 	xattr.o
diff --git a/fs/afs/addr_list.c b/fs/afs/addr_list.c
index ecb9c72aebd2..b91e59a77f0e 100644
--- a/fs/afs/addr_list.c
+++ b/fs/afs/addr_list.c
@@ -227,6 +227,37 @@ struct afs_addr_list *afs_dns_query(struct afs_cell *cell, time64_t *_expiry)
 	return alist;
 }
 
+/*
+ * Merge an IPv4 entry into a fileserver address list.
+ */
+void afs_merge_fs_addr4(struct afs_addr_list *alist, __be32 xdr)
+{
+	struct sockaddr_in6 *a;
+	int i;
+
+	for (i = 0; i < alist->nr_ipv4; i++) {
+		a = &alist->addrs[i].transport.sin6;
+		if (xdr == a->sin6_addr.s6_addr32[3])
+			return;
+		if (xdr < a->sin6_addr.s6_addr32[3])
+			break;
+	}
+
+	if (i < alist->nr_addrs)
+		memmove(alist->addrs + i + 1,
+			alist->addrs + i,
+			sizeof(alist->addrs[0]) * (alist->nr_addrs - i));
+
+	a = &alist->addrs[i].transport.sin6;
+	a->sin6_port		  = htons(AFS_FS_PORT);
+	a->sin6_addr.s6_addr32[0] = 0;
+	a->sin6_addr.s6_addr32[1] = 0;
+	a->sin6_addr.s6_addr32[2] = htonl(0xffff);
+	a->sin6_addr.s6_addr32[3] = xdr;
+	alist->nr_ipv4++;
+	alist->nr_addrs++;
+}
+
 /*
  * Get an address to try.
  */
diff --git a/fs/afs/afs_fs.h b/fs/afs/afs_fs.h
index 05395d0f1941..d47b6d01e4c0 100644
--- a/fs/afs/afs_fs.h
+++ b/fs/afs/afs_fs.h
@@ -38,6 +38,7 @@ enum AFS_FS_Operations {
 	FSFETCHDATA64		= 65537, /* AFS Fetch file data */
 	FSSTOREDATA64		= 65538, /* AFS Store file data */
 	FSGIVEUPALLCALLBACKS	= 65539, /* AFS Give up all outstanding callbacks on a server */
+	FSGETCAPABILITIES	= 65540, /* Probe and get the capabilities of a fileserver */
 };
 
 enum AFS_FS_Errors {
diff --git a/fs/afs/afs_vl.h b/fs/afs/afs_vl.h
index 4eaa620992c8..6350b417aee9 100644
--- a/fs/afs/afs_vl.h
+++ b/fs/afs/afs_vl.h
@@ -88,4 +88,46 @@ struct afs_vldbentry {
 
 #define AFS_VLDB_MAXNAMELEN 65
 
+
+struct afs_ListAddrByAttributes__xdr {
+	__be32			Mask;
+#define AFS_VLADDR_IPADDR	0x1	/* Match by ->ipaddr */
+#define AFS_VLADDR_INDEX	0x2	/* Match by ->index */
+#define AFS_VLADDR_UUID		0x4	/* Match by ->uuid */
+	__be32			ipaddr;
+	__be32			index;
+	__be32			spare;
+	struct afs_uuid__xdr	uuid;
+};
+
+struct afs_uvldbentry__xdr {
+	__be32			name[AFS_VLDB_MAXNAMELEN];
+	__be32			nServers;
+	struct afs_uuid__xdr	serverNumber[AFS_NMAXNSERVERS];
+	__be32			serverUnique[AFS_NMAXNSERVERS];
+	__be32			serverPartition[AFS_NMAXNSERVERS];
+	__be32			serverFlags[AFS_NMAXNSERVERS];
+	__be32			volumeId[AFS_MAXTYPES];
+	__be32			cloneId;
+	__be32			flags;
+	__be32			spares1;
+	__be32			spares2;
+	__be32			spares3;
+	__be32			spares4;
+	__be32			spares5;
+	__be32			spares6;
+	__be32			spares7;
+	__be32			spares8;
+	__be32			spares9;
+};
+
+struct afs_address_list {
+	refcount_t		usage;
+	unsigned int		version;
+	unsigned int		nr_addrs;
+	struct sockaddr_rxrpc	addrs[];
+};
+
+extern void afs_put_address_list(struct afs_address_list *alist);
+
 #endif /* AFS_VL_H */
diff --git a/fs/afs/callback.c b/fs/afs/callback.c
index 82f4c7a3b7b6..f4291b576054 100644
--- a/fs/afs/callback.c
+++ b/fs/afs/callback.c
@@ -26,10 +26,10 @@
  * - Called with volume->server_sem held.
  */
 int afs_register_server_cb_interest(struct afs_vnode *vnode,
-				    struct afs_cb_interest **ppcbi,
-				    struct afs_server *server)
+				    struct afs_server_entry *entry)
 {
-	struct afs_cb_interest *cbi = *ppcbi, *vcbi, *new, *x;
+	struct afs_cb_interest *cbi = entry->cb_interest, *vcbi, *new, *x;
+	struct afs_server *server = entry->server;
 
 again:
 	vcbi = vnode->cb_interest;
@@ -47,7 +47,7 @@ again:
 
 		if (!cbi && vcbi->server == server) {
 			afs_get_cb_interest(vcbi);
-			x = cmpxchg(ppcbi, cbi, vcbi);
+			x = cmpxchg(&entry->cb_interest, cbi, vcbi);
 			if (x != cbi) {
 				cbi = x;
 				afs_put_cb_interest(afs_v2net(vnode), vcbi);
@@ -72,7 +72,7 @@ again:
 		list_add_tail(&new->cb_link, &server->cb_interests);
 		write_unlock(&server->cb_break_lock);
 
-		x = cmpxchg(ppcbi, cbi, new);
+		x = cmpxchg(&entry->cb_interest, cbi, new);
 		if (x == cbi) {
 			cbi = new;
 		} else {
@@ -137,7 +137,7 @@ void afs_put_cb_interest(struct afs_net *net, struct afs_cb_interest *cbi)
  */
 void afs_init_callback_state(struct afs_server *server)
 {
-	if (!test_and_clear_bit(AFS_SERVER_NEW, &server->flags))
+	if (!test_and_clear_bit(AFS_SERVER_FL_NEW, &server->flags))
 		server->cb_s_break++;
 }
 
@@ -233,12 +233,12 @@ void afs_break_callbacks(struct afs_server *server, size_t count,
 /*
  * Clear the callback interests in a server list.
  */
-void afs_clear_callback_interests(struct afs_net *net, struct afs_volume *volume)
+void afs_clear_callback_interests(struct afs_net *net, struct afs_server_list *slist)
 {
 	int i;
 
-	for (i = 0; i < ARRAY_SIZE(volume->cb_interests); i++) {
-		afs_put_cb_interest(net, volume->cb_interests[i]);
-		volume->cb_interests[i] = NULL;
+	for (i = 0; i < slist->nr_servers; i++) {
+		afs_put_cb_interest(net, slist->servers[i].cb_interest);
+		slist->servers[i].cb_interest = NULL;
 	}
 }
diff --git a/fs/afs/cell.c b/fs/afs/cell.c
index a0e08d3a108c..1858c91169e4 100644
--- a/fs/afs/cell.c
+++ b/fs/afs/cell.c
@@ -146,13 +146,10 @@ static struct afs_cell *afs_alloc_cell(struct afs_net *net,
 
 	atomic_set(&cell->usage, 2);
 	INIT_WORK(&cell->manager, afs_manage_cell);
-	rwlock_init(&cell->servers_lock);
-	INIT_LIST_HEAD(&cell->servers);
-	init_rwsem(&cell->vl_sem);
-	INIT_LIST_HEAD(&cell->vl_list);
-	spin_lock_init(&cell->vl_lock);
 	cell->flags = ((1 << AFS_CELL_FL_NOT_READY) |
 		       (1 << AFS_CELL_FL_NO_LOOKUP_YET));
+	INIT_LIST_HEAD(&cell->proc_volumes);
+	rwlock_init(&cell->proc_lock);
 	rwlock_init(&cell->vl_addrs_lock);
 
 	/* Fill in the VL server list if we were given a list of addresses to
diff --git a/fs/afs/dir.c b/fs/afs/dir.c
index 37083699a0df..53f3917440e7 100644
--- a/fs/afs/dir.c
+++ b/fs/afs/dir.c
@@ -553,7 +553,7 @@ static struct dentry *afs_lookup(struct inode *dir, struct dentry *dentry,
 	dentry->d_fsdata = (void *)(unsigned long) vnode->status.data_version;
 
 	/* instantiate the dentry */
-	inode = afs_iget(dir->i_sb, key, &fid, NULL, NULL);
+	inode = afs_iget(dir->i_sb, key, &fid, NULL, NULL, NULL);
 	key_put(key);
 	if (IS_ERR(inode)) {
 		_leave(" = %ld", PTR_ERR(inode));
@@ -740,21 +740,49 @@ static void afs_d_release(struct dentry *dentry)
 	_enter("%pd", dentry);
 }
 
+/*
+ * Create a new inode for create/mkdir/symlink
+ */
+static void afs_vnode_new_inode(struct afs_fs_cursor *fc,
+				struct dentry *new_dentry,
+				struct afs_fid *newfid,
+				struct afs_file_status *newstatus,
+				struct afs_callback *newcb)
+{
+	struct inode *inode;
+
+	if (fc->ac.error < 0)
+		return;
+
+	inode = afs_iget(fc->vnode->vfs_inode.i_sb, fc->key,
+			 newfid, newstatus, newcb, fc->cbi);
+	if (IS_ERR(inode)) {
+		/* ENOMEM or EINTR at a really inconvenient time - just abandon
+		 * the new directory on the server.
+		 */
+		fc->ac.error = PTR_ERR(inode);
+		return;
+	}
+
+	d_instantiate(new_dentry, inode);
+	if (d_unhashed(new_dentry))
+		d_rehash(new_dentry);
+}
+
 /*
  * create a directory on an AFS filesystem
  */
 static int afs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
 {
-	struct afs_file_status status;
-	struct afs_callback cb;
-	struct afs_server *server;
-	struct afs_vnode *dvnode, *vnode;
-	struct afs_fid fid;
-	struct inode *inode;
+	struct afs_file_status newstatus;
+	struct afs_fs_cursor fc;
+	struct afs_callback newcb;
+	struct afs_vnode *dvnode = AFS_FS_I(dir);
+	struct afs_fid newfid;
 	struct key *key;
 	int ret;
 
-	dvnode = AFS_FS_I(dir);
+	mode |= S_IFDIR;
 
 	_enter("{%x:%u},{%pd},%ho",
 	       dvnode->fid.vid, dvnode->fid.vnode, dentry, mode);
@@ -765,40 +793,27 @@ static int afs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
 		goto error;
 	}
 
-	mode |= S_IFDIR;
-	ret = afs_vnode_create(dvnode, key, dentry->d_name.name,
-			       mode, &fid, &status, &cb, &server);
-	if (ret < 0)
-		goto mkdir_error;
+	ret = -ERESTARTSYS;
+	if (afs_begin_vnode_operation(&fc, dvnode, key)) {
+		while (afs_select_fileserver(&fc)) {
+			fc.cb_break = dvnode->cb_break + dvnode->cb_s_break;
+			afs_fs_create(&fc, dentry->d_name.name, mode,
+				      &newfid, &newstatus, &newcb);
+		}
 
-	inode = afs_iget(dir->i_sb, key, &fid, &status, &cb);
-	if (IS_ERR(inode)) {
-		/* ENOMEM at a really inconvenient time - just abandon the new
-		 * directory on the server */
-		ret = PTR_ERR(inode);
-		goto iget_error;
+		afs_check_for_remote_deletion(&fc, fc.vnode);
+		afs_vnode_commit_status(&fc, dvnode, fc.cb_break);
+		afs_vnode_new_inode(&fc, dentry, &newfid, &newstatus, &newcb);
+		ret = afs_end_vnode_operation(&fc);
+		if (ret < 0)
+			goto error_key;
 	}
 
-	/* apply the status report we've got for the new vnode */
-	vnode = AFS_FS_I(inode);
-	spin_lock(&vnode->lock);
-	vnode->update_cnt++;
-	spin_unlock(&vnode->lock);
-	afs_vnode_finalise_status_update(vnode, server);
-	afs_put_server(afs_i2net(dir), server);
-
-	d_instantiate(dentry, inode);
-	if (d_unhashed(dentry)) {
-		_debug("not hashed");
-		d_rehash(dentry);
-	}
 	key_put(key);
 	_leave(" = 0");
 	return 0;
 
-iget_error:
-	afs_put_server(afs_i2net(dir), server);
-mkdir_error:
+error_key:
 	key_put(key);
 error:
 	d_drop(dentry);
@@ -806,17 +821,30 @@ error:
 	return ret;
 }
 
+/*
+ * Remove a subdir from a directory.
+ */
+static void afs_dir_remove_subdir(struct dentry *dentry)
+{
+	if (d_really_is_positive(dentry)) {
+		struct afs_vnode *vnode = AFS_FS_I(d_inode(dentry));
+
+		clear_nlink(&vnode->vfs_inode);
+		set_bit(AFS_VNODE_DELETED, &vnode->flags);
+		clear_bit(AFS_VNODE_CB_PROMISED, &vnode->flags);
+	}
+}
+
 /*
  * remove a directory from an AFS filesystem
  */
 static int afs_rmdir(struct inode *dir, struct dentry *dentry)
 {
-	struct afs_vnode *dvnode, *vnode;
+	struct afs_fs_cursor fc;
+	struct afs_vnode *dvnode = AFS_FS_I(dir);
 	struct key *key;
 	int ret;
 
-	dvnode = AFS_FS_I(dir);
-
 	_enter("{%x:%u},{%pd}",
 	       dvnode->fid.vid, dvnode->fid.vnode, dentry);
 
@@ -826,45 +854,69 @@ static int afs_rmdir(struct inode *dir, struct dentry *dentry)
 		goto error;
 	}
 
-	ret = afs_vnode_remove(dvnode, key, dentry->d_name.name, true);
-	if (ret < 0)
-		goto rmdir_error;
+	ret = -ERESTARTSYS;
+	if (afs_begin_vnode_operation(&fc, dvnode, key)) {
+		while (afs_select_fileserver(&fc)) {
+			fc.cb_break = dvnode->cb_break + dvnode->cb_s_break;
+			afs_fs_remove(&fc, dentry->d_name.name, true);
+		}
 
-	if (d_really_is_positive(dentry)) {
-		vnode = AFS_FS_I(d_inode(dentry));
-		clear_nlink(&vnode->vfs_inode);
-		set_bit(AFS_VNODE_DELETED, &vnode->flags);
-		clear_bit(AFS_VNODE_CB_PROMISED, &vnode->flags);
+		afs_vnode_commit_status(&fc, dvnode, fc.cb_break);
+		ret = afs_end_vnode_operation(&fc);
+		if (ret == 0)
+			afs_dir_remove_subdir(dentry);
 	}
 
 	key_put(key);
-	_leave(" = 0");
-	return 0;
-
-rmdir_error:
-	key_put(key);
 error:
-	_leave(" = %d", ret);
 	return ret;
 }
 
 /*
- * remove a file from an AFS filesystem
+ * Remove a link to a file or symlink from a directory.
+ *
+ * If the file was not deleted due to excess hard links, the fileserver will
+ * break the callback promise on the file - if it had one - before it returns
+ * to us, and if it was deleted, it won't
+ *
+ * However, if we didn't have a callback promise outstanding, or it was
+ * outstanding on a different server, then it won't break it either...
+ */
+static int afs_dir_remove_link(struct dentry *dentry, struct key *key)
+{
+	int ret = 0;
+
+	if (d_really_is_positive(dentry)) {
+		struct afs_vnode *vnode = AFS_FS_I(d_inode(dentry));
+
+		if (test_bit(AFS_VNODE_DELETED, &vnode->flags))
+			kdebug("AFS_VNODE_DELETED");
+		clear_bit(AFS_VNODE_CB_PROMISED, &vnode->flags);
+
+		ret = afs_validate(vnode, key);
+		if (ret == -ESTALE)
+			ret = 0;
+		_debug("nlink %d [val %d]", vnode->vfs_inode.i_nlink, ret);
+	}
+
+	return ret;
+}
+
+/*
+ * Remove a file or symlink from an AFS filesystem.
  */
 static int afs_unlink(struct inode *dir, struct dentry *dentry)
 {
-	struct afs_vnode *dvnode, *vnode;
+	struct afs_fs_cursor fc;
+	struct afs_vnode *dvnode = AFS_FS_I(dir), *vnode;
 	struct key *key;
 	int ret;
 
-	dvnode = AFS_FS_I(dir);
-
 	_enter("{%x:%u},{%pd}",
 	       dvnode->fid.vid, dvnode->fid.vnode, dentry);
 
-	ret = -ENAMETOOLONG;
 	if (dentry->d_name.len >= AFSNAMEMAX)
-		goto error;
+		return -ENAMETOOLONG;
 
 	key = afs_request_key(dvnode->volume->cell);
 	if (IS_ERR(key)) {
@@ -872,42 +924,28 @@ static int afs_unlink(struct inode *dir, struct dentry *dentry)
 		goto error;
 	}
 
+	/* Try to make sure we have a callback promise on the victim. */
 	if (d_really_is_positive(dentry)) {
 		vnode = AFS_FS_I(d_inode(dentry));
-
-		/* make sure we have a callback promise on the victim */
 		ret = afs_validate(vnode, key);
 		if (ret < 0)
-			goto error;
+			goto error_key;
 	}
 
-	ret = afs_vnode_remove(dvnode, key, dentry->d_name.name, false);
-	if (ret < 0)
-		goto remove_error;
+	ret = -ERESTARTSYS;
+	if (afs_begin_vnode_operation(&fc, dvnode, key)) {
+		while (afs_select_fileserver(&fc)) {
+			fc.cb_break = dvnode->cb_break + dvnode->cb_s_break;
+			afs_fs_remove(&fc, dentry->d_name.name, false);
+		}
 
-	if (d_really_is_positive(dentry)) {
-		/* if the file wasn't deleted due to excess hard links, the
-		 * fileserver will break the callback promise on the file - if
-		 * it had one - before it returns to us, and if it was deleted,
-		 * it won't
-		 *
-		 * however, if we didn't have a callback promise outstanding,
-		 * or it was outstanding on a different server, then it won't
-		 * break it either...
-		 */
-		vnode = AFS_FS_I(d_inode(dentry));
-		if (test_bit(AFS_VNODE_DELETED, &vnode->flags))
-			_debug("AFS_VNODE_DELETED");
-		clear_bit(AFS_VNODE_CB_PROMISED, &vnode->flags);
-		ret = afs_validate(vnode, key);
-		_debug("nlink %d [val %d]", vnode->vfs_inode.i_nlink, ret);
+		afs_vnode_commit_status(&fc, dvnode, fc.cb_break);
+		ret = afs_end_vnode_operation(&fc);
+		if (ret == 0)
+			ret = afs_dir_remove_link(dentry, key);
 	}
 
-	key_put(key);
-	_leave(" = 0");
-	return 0;
-
-remove_error:
+error_key:
 	key_put(key);
 error:
 	_leave(" = %d", ret);
@@ -920,60 +958,50 @@ error:
 static int afs_create(struct inode *dir, struct dentry *dentry, umode_t mode,
 		      bool excl)
 {
-	struct afs_file_status status;
-	struct afs_callback cb;
-	struct afs_server *server;
-	struct afs_vnode *dvnode, *vnode;
-	struct afs_fid fid;
-	struct inode *inode;
+	struct afs_fs_cursor fc;
+	struct afs_file_status newstatus;
+	struct afs_callback newcb;
+	struct afs_vnode *dvnode = dvnode = AFS_FS_I(dir);
+	struct afs_fid newfid;
 	struct key *key;
 	int ret;
 
-	dvnode = AFS_FS_I(dir);
+	mode |= S_IFREG;
 
 	_enter("{%x:%u},{%pd},%ho,",
 	       dvnode->fid.vid, dvnode->fid.vnode, dentry, mode);
 
+	ret = -ENAMETOOLONG;
+	if (dentry->d_name.len >= AFSNAMEMAX)
+		goto error;
+
 	key = afs_request_key(dvnode->volume->cell);
 	if (IS_ERR(key)) {
 		ret = PTR_ERR(key);
 		goto error;
 	}
 
-	mode |= S_IFREG;
-	ret = afs_vnode_create(dvnode, key, dentry->d_name.name,
-			       mode, &fid, &status, &cb, &server);
-	if (ret < 0)
-		goto create_error;
+	ret = -ERESTARTSYS;
+	if (afs_begin_vnode_operation(&fc, dvnode, key)) {
+		while (afs_select_fileserver(&fc)) {
+			fc.cb_break = dvnode->cb_break + dvnode->cb_s_break;
+			afs_fs_create(&fc, dentry->d_name.name, mode,
+				      &newfid, &newstatus, &newcb);
+		}
 
-	inode = afs_iget(dir->i_sb, key, &fid, &status, &cb);
-	if (IS_ERR(inode)) {
-		/* ENOMEM at a really inconvenient time - just abandon the new
-		 * directory on the server */
-		ret = PTR_ERR(inode);
-		goto iget_error;
+		afs_check_for_remote_deletion(&fc, fc.vnode);
+		afs_vnode_commit_status(&fc, dvnode, fc.cb_break);
+		afs_vnode_new_inode(&fc, dentry, &newfid, &newstatus, &newcb);
+		ret = afs_end_vnode_operation(&fc);
+		if (ret < 0)
+			goto error_key;
 	}
 
-	/* apply the status report we've got for the new vnode */
-	vnode = AFS_FS_I(inode);
-	spin_lock(&vnode->lock);
-	vnode->update_cnt++;
-	spin_unlock(&vnode->lock);
-	afs_vnode_finalise_status_update(vnode, server);
-	afs_put_server(afs_i2net(dir), server);
-
-	d_instantiate(dentry, inode);
-	if (d_unhashed(dentry)) {
-		_debug("not hashed");
-		d_rehash(dentry);
-	}
 	key_put(key);
 	_leave(" = 0");
 	return 0;
 
-iget_error:
-	afs_put_server(afs_i2net(dir), server);
-create_error:
+error_key:
 	key_put(key);
 error:
 	d_drop(dentry);
@@ -987,6 +1015,7 @@ error:
 static int afs_link(struct dentry *from, struct inode *dir,
 		    struct dentry *dentry)
 {
+	struct afs_fs_cursor fc;
 	struct afs_vnode *dvnode, *vnode;
 	struct key *key;
 	int ret;
@@ -999,23 +1028,45 @@ static int afs_link(struct dentry *from, struct inode *dir,
 	       dvnode->fid.vid, dvnode->fid.vnode,
 	       dentry);
 
+	ret = -ENAMETOOLONG;
+	if (dentry->d_name.len >= AFSNAMEMAX)
+		goto error;
+
 	key = afs_request_key(dvnode->volume->cell);
 	if (IS_ERR(key)) {
 		ret = PTR_ERR(key);
 		goto error;
 	}
 
-	ret = afs_vnode_link(dvnode, vnode, key, dentry->d_name.name);
-	if (ret < 0)
-		goto link_error;
+	ret = -ERESTARTSYS;
+	if (afs_begin_vnode_operation(&fc, dvnode, key)) {
+		if (mutex_lock_interruptible_nested(&vnode->io_lock, 1) < 0) {
+			afs_end_vnode_operation(&fc);
+			return -ERESTARTSYS;
+		}
+
+		while (afs_select_fileserver(&fc)) {
+			fc.cb_break = dvnode->cb_break + dvnode->cb_s_break;
+			fc.cb_break_2 = vnode->cb_break + vnode->cb_s_break;
+			afs_fs_link(&fc, vnode, dentry->d_name.name);
+		}
+
+		afs_vnode_commit_status(&fc, dvnode, fc.cb_break);
+		afs_vnode_commit_status(&fc, vnode, fc.cb_break_2);
+		ihold(&vnode->vfs_inode);
+		d_instantiate(dentry, &vnode->vfs_inode);
+
+		mutex_unlock(&vnode->io_lock);
+		ret = afs_end_vnode_operation(&fc);
+		if (ret < 0)
+			goto error_key;
+	}
 
-	ihold(&vnode->vfs_inode);
-	d_instantiate(dentry, &vnode->vfs_inode);
 	key_put(key);
 	_leave(" = 0");
 	return 0;
 
-link_error:
+error_key:
 	key_put(key);
 error:
 	d_drop(dentry);
@@ -1029,20 +1080,21 @@ error:
 static int afs_symlink(struct inode *dir, struct dentry *dentry,
 		       const char *content)
 {
-	struct afs_file_status status;
-	struct afs_server *server;
-	struct afs_vnode *dvnode, *vnode;
-	struct afs_fid fid;
-	struct inode *inode;
+	struct afs_fs_cursor fc;
+	struct afs_file_status newstatus;
+	struct afs_vnode *dvnode = AFS_FS_I(dir);
+	struct afs_fid newfid;
 	struct key *key;
 	int ret;
 
-	dvnode = AFS_FS_I(dir);
-
 	_enter("{%x:%u},{%pd},%s",
 	       dvnode->fid.vid, dvnode->fid.vnode, dentry,
 	       content);
 
+	ret = -ENAMETOOLONG;
+	if (dentry->d_name.len >= AFSNAMEMAX)
+		goto error;
+
 	ret = -EINVAL;
 	if (strlen(content) >= AFSPATHMAX)
 		goto error;
@@ -1053,39 +1105,27 @@ static int afs_symlink(struct inode *dir, struct dentry *dentry,
 		goto error;
 	}
 
-	ret = afs_vnode_symlink(dvnode, key, dentry->d_name.name, content,
-				&fid, &status, &server);
-	if (ret < 0)
-		goto create_error;
+	ret = -ERESTARTSYS;
+	if (afs_begin_vnode_operation(&fc, dvnode, key)) {
+		while (afs_select_fileserver(&fc)) {
+			fc.cb_break = dvnode->cb_break + dvnode->cb_s_break;
+			afs_fs_symlink(&fc, dentry->d_name.name, content,
+				       &newfid, &newstatus);
+		}
 
-	inode = afs_iget(dir->i_sb, key, &fid, &status, NULL);
-	if (IS_ERR(inode)) {
-		/* ENOMEM at a really inconvenient time - just abandon the new
-		 * directory on the server */
-		ret = PTR_ERR(inode);
-		goto iget_error;
+		afs_check_for_remote_deletion(&fc, fc.vnode);
+		afs_vnode_commit_status(&fc, dvnode, fc.cb_break);
+		afs_vnode_new_inode(&fc, dentry, &newfid, &newstatus, NULL);
+		ret = afs_end_vnode_operation(&fc);
+		if (ret < 0)
+			goto error_key;
 	}
 
-	/* apply the status report we've got for the new vnode */
-	vnode = AFS_FS_I(inode);
-	spin_lock(&vnode->lock);
-	vnode->update_cnt++;
-	spin_unlock(&vnode->lock);
-	afs_vnode_finalise_status_update(vnode, server);
-	afs_put_server(afs_i2net(dir), server);
-
-	d_instantiate(dentry, inode);
-	if (d_unhashed(dentry)) {
-		_debug("not hashed");
-		d_rehash(dentry);
-	}
 	key_put(key);
 	_leave(" = 0");
 	return 0;
 
-iget_error:
-	afs_put_server(afs_i2net(dir), server);
-create_error:
+error_key:
 	key_put(key);
 error:
 	d_drop(dentry);
@@ -1100,6 +1140,7 @@ static int afs_rename(struct inode *old_dir, struct dentry *old_dentry,
 		      struct inode *new_dir, struct dentry *new_dentry,
 		      unsigned int flags)
 {
+	struct afs_fs_cursor fc;
 	struct afs_vnode *orig_dvnode, *new_dvnode, *vnode;
 	struct key *key;
 	int ret;
@@ -1123,16 +1164,35 @@ static int afs_rename(struct inode *old_dir, struct dentry *old_dentry,
 		goto error;
 	}
 
-	ret = afs_vnode_rename(orig_dvnode, new_dvnode, key,
-			       old_dentry->d_name.name,
-			       new_dentry->d_name.name);
-	if (ret < 0)
-		goto rename_error;
+	ret = -ERESTARTSYS;
+	if (afs_begin_vnode_operation(&fc, orig_dvnode, key)) {
+		if (orig_dvnode != new_dvnode) {
+			if (mutex_lock_interruptible_nested(&new_dvnode->io_lock, 1) < 0) {
+				afs_end_vnode_operation(&fc);
+				return -ERESTARTSYS;
+			}
+		}
+		while (afs_select_fileserver(&fc)) {
+			fc.cb_break = orig_dvnode->cb_break + orig_dvnode->cb_s_break;
+			fc.cb_break_2 = new_dvnode->cb_break + new_dvnode->cb_s_break;
+			afs_fs_rename(&fc, old_dentry->d_name.name,
+				      new_dvnode, new_dentry->d_name.name);
+		}
+
+		afs_vnode_commit_status(&fc, orig_dvnode, fc.cb_break);
+		afs_vnode_commit_status(&fc, new_dvnode, fc.cb_break_2);
+		if (orig_dvnode != new_dvnode)
+			mutex_unlock(&new_dvnode->io_lock);
+		ret = afs_end_vnode_operation(&fc);
+		if (ret < 0)
+			goto error_key;
+	}
+
 	key_put(key);
 	_leave(" = 0");
 	return 0;
 
-rename_error:
+error_key:
 	key_put(key);
 error:
 	d_drop(new_dentry);
diff --git a/fs/afs/file.c b/fs/afs/file.c
index 08f9f0c5dfac..1f26ac9f816d 100644
--- a/fs/afs/file.c
+++ b/fs/afs/file.c
@@ -137,6 +137,37 @@ static void afs_file_readpage_read_complete(struct page *page,
 }
 #endif
 
+/*
+ * Fetch file data from the volume.
+ */
+int afs_fetch_data(struct afs_vnode *vnode, struct key *key, struct afs_read *desc)
+{
+	struct afs_fs_cursor fc;
+	int ret;
+
+	_enter("%s{%x:%u.%u},%x,,,",
+	       vnode->volume->name,
+	       vnode->fid.vid,
+	       vnode->fid.vnode,
+	       vnode->fid.unique,
+	       key_serial(key));
+
+	ret = -ERESTARTSYS;
+	if (afs_begin_vnode_operation(&fc, vnode, key)) {
+		while (afs_select_fileserver(&fc)) {
+			fc.cb_break = vnode->cb_break + vnode->cb_s_break;
+			afs_fs_fetch_data(&fc, desc);
+		}
+
+		afs_check_for_remote_deletion(&fc, fc.vnode);
+		afs_vnode_commit_status(&fc, vnode, fc.cb_break);
+		ret = afs_end_vnode_operation(&fc);
+	}
+
+	_leave(" = %d", ret);
+	return ret;
+}
+
 /*
  * read page from file, directory or symlink, given a key to use
  */
@@ -199,7 +230,7 @@ int afs_page_filler(void *data, struct page *page)
 
 		/* read the contents of the file from the server into the
 		 * page */
-		ret = afs_vnode_fetch_data(vnode, key, req);
+		ret = afs_fetch_data(vnode, key, req);
 		afs_put_read(req);
 		if (ret < 0) {
 			if (ret == -ENOENT) {
@@ -264,7 +295,7 @@ static int afs_readpage(struct file *file, struct page *page)
 		ret = afs_page_filler(key, page);
 	} else {
 		struct inode *inode = page->mapping->host;
-		key = afs_request_key(AFS_FS_S(inode->i_sb)->volume->cell);
+		key = afs_request_key(AFS_FS_S(inode->i_sb)->cell);
 		if (IS_ERR(key)) {
 			ret = PTR_ERR(key);
 		} else {
@@ -369,7 +400,7 @@ static int afs_readpages_one(struct file *file, struct address_space *mapping,
 		return 0;
 	}
 
-	ret = afs_vnode_fetch_data(vnode, key, req);
+	ret = afs_fetch_data(vnode, key, req);
 	if (ret < 0)
 		goto error;
 
diff --git a/fs/afs/flock.c b/fs/afs/flock.c
index 2b31ea58c50c..77b0a4606efd 100644
--- a/fs/afs/flock.c
+++ b/fs/afs/flock.c
@@ -67,6 +67,100 @@ static void afs_grant_locks(struct afs_vnode *vnode, struct file_lock *fl)
 	}
 }
 
+/*
+ * Get a lock on a file
+ */
+static int afs_set_lock(struct afs_vnode *vnode, struct key *key,
+			afs_lock_type_t type)
+{
+	struct afs_fs_cursor fc;
+	int ret;
+
+	_enter("%s{%x:%u.%u},%x,%u",
+	       vnode->volume->name,
+	       vnode->fid.vid,
+	       vnode->fid.vnode,
+	       vnode->fid.unique,
+	       key_serial(key), type);
+
+	ret = -ERESTARTSYS;
+	if (afs_begin_vnode_operation(&fc, vnode, key)) {
+		while (afs_select_fileserver(&fc)) {
+			fc.cb_break = vnode->cb_break + vnode->cb_s_break;
+			afs_fs_set_lock(&fc, type);
+		}
+
+		afs_check_for_remote_deletion(&fc, fc.vnode);
+		afs_vnode_commit_status(&fc, vnode, fc.cb_break);
+		ret = afs_end_vnode_operation(&fc);
+	}
+
+	_leave(" = %d", ret);
+	return ret;
+}
+
+/*
+ * Extend a lock on a file
+ */
+static int afs_extend_lock(struct afs_vnode *vnode, struct key *key)
+{
+	struct afs_fs_cursor fc;
+	int ret;
+
+	_enter("%s{%x:%u.%u},%x",
+	       vnode->volume->name,
+	       vnode->fid.vid,
+	       vnode->fid.vnode,
+	       vnode->fid.unique,
+	       key_serial(key));
+
+	ret = -ERESTARTSYS;
+	if (afs_begin_vnode_operation(&fc, vnode, key)) {
+		while (afs_select_current_fileserver(&fc)) {
+			fc.cb_break = vnode->cb_break + vnode->cb_s_break;
+			afs_fs_extend_lock(&fc);
+		}
+
+		afs_check_for_remote_deletion(&fc, fc.vnode);
+		afs_vnode_commit_status(&fc, vnode, fc.cb_break);
+		ret = afs_end_vnode_operation(&fc);
+	}
+
+	_leave(" = %d", ret);
+	return ret;
+}
+
+/*
+ * Release a lock on a file
+ */
+static int afs_release_lock(struct afs_vnode *vnode, struct key *key)
+{
+	struct afs_fs_cursor fc;
+	int ret;
+
+	_enter("%s{%x:%u.%u},%x",
+	       vnode->volume->name,
+	       vnode->fid.vid,
+	       vnode->fid.vnode,
+	       vnode->fid.unique,
+	       key_serial(key));
+
+	ret = -ERESTARTSYS;
+	if (afs_begin_vnode_operation(&fc, vnode, key)) {
+		while (afs_select_current_fileserver(&fc)) {
+			fc.cb_break = vnode->cb_break + vnode->cb_s_break;
+			afs_fs_release_lock(&fc);
+		}
+
+		afs_check_for_remote_deletion(&fc, fc.vnode);
+		afs_vnode_commit_status(&fc, vnode, fc.cb_break);
+		ret = afs_end_vnode_operation(&fc);
+	}
+
+	_leave(" = %d", ret);
+	return ret;
+}
+
 /*
  * do work for a lock, including:
  * - probing for a lock we're waiting on but didn't get immediately
@@ -91,7 +185,7 @@ void afs_lock_work(struct work_struct *work)
 
 		/* attempt to release the server lock; if it fails, we just
 		 * wait 5 minutes and it'll time out anyway */
-		ret = afs_vnode_release_lock(vnode, vnode->unlock_key);
+		ret = afs_release_lock(vnode, vnode->unlock_key);
 		if (ret < 0)
 			printk(KERN_WARNING "AFS:"
 			       " Failed to release lock on {%x:%x} error %d\n",
@@ -115,7 +209,7 @@ void afs_lock_work(struct work_struct *work)
 		key = key_get(fl->fl_file->private_data);
 		spin_unlock(&vnode->lock);
 
-		ret = afs_vnode_extend_lock(vnode, key);
+		ret = afs_extend_lock(vnode, key);
 		clear_bit(AFS_VNODE_LOCKING, &vnode->flags);
 		key_put(key);
 		switch (ret) {
@@ -151,7 +245,7 @@ void afs_lock_work(struct work_struct *work)
 			AFS_LOCK_READ : AFS_LOCK_WRITE;
 		spin_unlock(&vnode->lock);
 
-		ret = afs_vnode_set_lock(vnode, key, type);
+		ret = afs_set_lock(vnode, key, type);
 		clear_bit(AFS_VNODE_LOCKING, &vnode->flags);
 		switch (ret) {
 		case -EWOULDBLOCK:
@@ -182,7 +276,7 @@ void afs_lock_work(struct work_struct *work)
 				clear_bit(AFS_VNODE_READLOCKED, &vnode->flags);
 				clear_bit(AFS_VNODE_WRITELOCKED, &vnode->flags);
 				spin_unlock(&vnode->lock);
-				afs_vnode_release_lock(vnode, key);
+				afs_release_lock(vnode, key);
 				if (!list_empty(&vnode->pending_locks))
 					afs_lock_may_be_available(vnode);
 			}
@@ -280,7 +374,7 @@ static int afs_do_setlk(struct file *file, struct file_lock *fl)
 		set_bit(AFS_VNODE_LOCKING, &vnode->flags);
 		spin_unlock(&vnode->lock);
 
-		ret = afs_vnode_set_lock(vnode, key, type);
+		ret = afs_set_lock(vnode, key, type);
 		clear_bit(AFS_VNODE_LOCKING, &vnode->flags);
 		switch (ret) {
 		case 0:
@@ -383,7 +477,7 @@ given_lock:
 	/* again, make sure we've got a callback on this file and, again, make
 	 * sure that our view of the data version is up to date (we ignore
 	 * errors incurred here and deal with the consequences elsewhere) */
-	afs_vnode_fetch_status(vnode, key, false);
+	afs_validate(vnode, key);
 
 error:
 	spin_unlock(&inode->i_lock);
@@ -455,7 +549,7 @@ static int afs_do_getlk(struct file *file, struct file_lock *fl)
 	posix_test_lock(file, fl);
 	if (fl->fl_type == F_UNLCK) {
 		/* no local locks; consult the server */
-		ret = afs_vnode_fetch_status(vnode, key, true);
+		ret = afs_fetch_status(vnode, key);
 		if (ret < 0)
 			goto error;
 		lock_count = vnode->status.lock_count;
diff --git a/fs/afs/fsclient.c b/fs/afs/fsclient.c
index 6614d0a78daa..72ff3679fa2a 100644
--- a/fs/afs/fsclient.c
+++ b/fs/afs/fsclient.c
@@ -22,9 +22,9 @@
  */
 static u8 afs_discard_buffer[64];
 
-static inline void afs_use_fs_server(struct afs_call *call, struct afs_server *server)
+static inline void afs_use_fs_server(struct afs_call *call, struct afs_cb_interest *cbi)
 {
-	call->server = afs_get_server(server);
+	call->cbi = afs_get_cb_interest(cbi);
 }
 
 /*
@@ -56,7 +56,8 @@ static void xdr_decode_AFSFetchStatus(const __be32 **_bp,
 	kuid_t owner;
 	kgid_t group;
 
-	write_seqlock(&vnode->cb_lock);
+	if (vnode)
+		write_seqlock(&vnode->cb_lock);
 
 #define EXTRACT(DST)				\
 	do {					\
@@ -141,7 +142,8 @@ static void xdr_decode_AFSFetchStatus(const __be32 **_bp,
 		status->data_version = data_version;
 	}
 
-	write_sequnlock(&vnode->cb_lock);
+	if (vnode)
+		write_sequnlock(&vnode->cb_lock);
 }
 
 /*
@@ -151,22 +153,29 @@ static void xdr_decode_AFSCallBack(struct afs_call *call,
 				   struct afs_vnode *vnode,
 				   const __be32 **_bp)
 {
+	struct afs_cb_interest *old, *cbi = call->cbi;
 	const __be32 *bp = *_bp;
 	u32 cb_expiry;
 
 	write_seqlock(&vnode->cb_lock);
 
-	if (call->cb_break == (vnode->cb_break + call->server->cb_s_break)) {
+	if (call->cb_break == (vnode->cb_break + cbi->server->cb_s_break)) {
 		vnode->cb_version	= ntohl(*bp++);
 		cb_expiry		= ntohl(*bp++);
 		vnode->cb_type		= ntohl(*bp++);
 		vnode->cb_expires_at	= cb_expiry + ktime_get_real_seconds();
+		old = vnode->cb_interest;
+		if (old != call->cbi) {
+			vnode->cb_interest = cbi;
+			cbi = old;
+		}
 		set_bit(AFS_VNODE_CB_PROMISED, &vnode->flags);
 	} else {
 		bp += 3;
 	}
 
 	write_sequnlock(&vnode->cb_lock);
+	call->cbi = cbi;
 	*_bp = bp;
 }
 
@@ -297,24 +306,23 @@ static const struct afs_call_type afs_RXFSFetchStatus = {
 /*
  * fetch the status information for a file
  */
-int afs_fs_fetch_file_status(struct afs_fs_cursor *fc,
-			     struct key *key,
-			     struct afs_vnode *vnode,
-			     struct afs_volsync *volsync,
-			     bool async)
+int afs_fs_fetch_file_status(struct afs_fs_cursor *fc, struct afs_volsync *volsync)
 {
+	struct afs_vnode *vnode = fc->vnode;
 	struct afs_call *call;
 	struct afs_net *net = afs_v2net(vnode);
 	__be32 *bp;
 
 	_enter(",%x,{%x:%u},,",
-	       key_serial(key), vnode->fid.vid, vnode->fid.vnode);
+	       key_serial(fc->key), vnode->fid.vid, vnode->fid.vnode);
 
 	call = afs_alloc_flat_call(net, &afs_RXFSFetchStatus, 16, (21 + 3 + 6) * 4);
-	if (!call)
+	if (!call) {
+		fc->ac.error = -ENOMEM;
 		return -ENOMEM;
+	}
 
-	call->key = key;
+	call->key = fc->key;
 	call->reply[0] = vnode;
 	call->reply[1] = volsync;
 
@@ -325,9 +333,9 @@ int afs_fs_fetch_file_status(struct afs_fs_cursor *fc,
 	bp[2] = htonl(vnode->fid.vnode);
 	bp[3] = htonl(vnode->fid.unique);
 
-	call->cb_break = vnode->cb_break + fc->server->cb_s_break;
-	afs_use_fs_server(call, fc->server);
-	return afs_make_call(&fc->ac, call, GFP_NOFS, async);
+	call->cb_break = fc->cb_break;
+	afs_use_fs_server(call, fc->cbi);
+	return afs_make_call(&fc->ac, call, GFP_NOFS, false);
 }
 
 /*
@@ -502,12 +510,9 @@ static const struct afs_call_type afs_RXFSFetchData64 = {
 /*
  * fetch data from a very large file
  */
-static int afs_fs_fetch_data64(struct afs_fs_cursor *fc,
-			       struct key *key,
-			       struct afs_vnode *vnode,
-			       struct afs_read *req,
-			       bool async)
+static int afs_fs_fetch_data64(struct afs_fs_cursor *fc, struct afs_read *req)
 {
+	struct afs_vnode *vnode = fc->vnode;
 	struct afs_call *call;
 	struct afs_net *net = afs_v2net(vnode);
 	__be32 *bp;
@@ -518,7 +523,7 @@ static int afs_fs_fetch_data64(struct afs_fs_cursor *fc,
 	if (!call)
 		return -ENOMEM;
 
-	call->key = key;
+	call->key = fc->key;
 	call->reply[0] = vnode;
 	call->reply[1] = NULL; /* volsync */
 	call->reply[2] = req;
@@ -536,20 +541,17 @@ static int afs_fs_fetch_data64(struct afs_fs_cursor *fc,
 	bp[7] = htonl(lower_32_bits(req->len));
 
 	atomic_inc(&req->usage);
-	call->cb_break = vnode->cb_break + fc->server->cb_s_break;
-	afs_use_fs_server(call, fc->server);
-	return afs_make_call(&fc->ac, call, GFP_NOFS, async);
+	call->cb_break = fc->cb_break;
+	afs_use_fs_server(call, fc->cbi);
+	return afs_make_call(&fc->ac, call, GFP_NOFS, false);
 }
 
 /*
  * fetch data from a file
  */
-int afs_fs_fetch_data(struct afs_fs_cursor *fc,
-		      struct key *key,
-		      struct afs_vnode *vnode,
-		      struct afs_read *req,
-		      bool async)
+int afs_fs_fetch_data(struct afs_fs_cursor *fc, struct afs_read *req)
 {
+	struct afs_vnode *vnode = fc->vnode;
 	struct afs_call *call;
 	struct afs_net *net = afs_v2net(vnode);
 	__be32 *bp;
@@ -557,7 +559,7 @@ int afs_fs_fetch_data(struct afs_fs_cursor *fc,
 	if (upper_32_bits(req->pos) ||
 	    upper_32_bits(req->len) ||
 	    upper_32_bits(req->pos + req->len))
-		return afs_fs_fetch_data64(fc, key, vnode, req, async);
+		return afs_fs_fetch_data64(fc, req);
 
 	_enter("");
 
@@ -565,7 +567,7 @@ int afs_fs_fetch_data(struct afs_fs_cursor *fc,
 	if (!call)
 		return -ENOMEM;
 
-	call->key = key;
+	call->key = fc->key;
 	call->reply[0] = vnode;
 	call->reply[1] = NULL; /* volsync */
 	call->reply[2] = req;
@@ -581,9 +583,9 @@ int afs_fs_fetch_data(struct afs_fs_cursor *fc,
 	bp[5] = htonl(lower_32_bits(req->len));
 
 	atomic_inc(&req->usage);
-	call->cb_break = vnode->cb_break + fc->server->cb_s_break;
-	afs_use_fs_server(call, fc->server);
-	return afs_make_call(&fc->ac, call, GFP_NOFS, async);
+	call->cb_break = fc->cb_break;
+	afs_use_fs_server(call, fc->cbi);
+	return afs_make_call(&fc->ac, call, GFP_NOFS, false);
 }
 
 /*
@@ -626,15 +628,13 @@ static const struct afs_call_type afs_RXFSCreateXXXX = {
  * create a file or make a directory
  */
 int afs_fs_create(struct afs_fs_cursor *fc,
-		  struct key *key,
-		  struct afs_vnode *vnode,
 		  const char *name,
 		  umode_t mode,
 		  struct afs_fid *newfid,
 		  struct afs_file_status *newstatus,
-		  struct afs_callback *newcb,
-		  bool async)
+		  struct afs_callback *newcb)
 {
+	struct afs_vnode *vnode = fc->vnode;
 	struct afs_call *call;
 	struct afs_net *net = afs_v2net(vnode);
 	size_t namesz, reqsz, padsz;
@@ -651,7 +651,7 @@ int afs_fs_create(struct afs_fs_cursor *fc,
 	if (!call)
 		return -ENOMEM;
 
-	call->key = key;
+	call->key = fc->key;
 	call->reply[0] = vnode;
 	call->reply[1] = newfid;
 	call->reply[2] = newstatus;
@@ -677,8 +677,8 @@ int afs_fs_create(struct afs_fs_cursor *fc,
 	*bp++ = htonl(mode & S_IALLUGO); /* unix mode */
 	*bp++ = 0; /* segment size */
 
-	afs_use_fs_server(call, fc->server);
-	return afs_make_call(&fc->ac, call, GFP_NOFS, async);
+	afs_use_fs_server(call, fc->cbi);
+	return afs_make_call(&fc->ac, call, GFP_NOFS, false);
 }
 
 /*
@@ -717,13 +717,9 @@ static const struct afs_call_type afs_RXFSRemoveXXXX = {
 /*
  * remove a file or directory
  */
-int afs_fs_remove(struct afs_fs_cursor *fc,
-		  struct key *key,
-		  struct afs_vnode *vnode,
-		  const char *name,
-		  bool isdir,
-		  bool async)
+int afs_fs_remove(struct afs_fs_cursor *fc, const char *name, bool isdir)
 {
+	struct afs_vnode *vnode = fc->vnode;
 	struct afs_call *call;
 	struct afs_net *net = afs_v2net(vnode);
 	size_t namesz, reqsz, padsz;
@@ -739,7 +735,7 @@ int afs_fs_remove(struct afs_fs_cursor *fc,
 	if (!call)
 		return -ENOMEM;
 
-	call->key = key;
+	call->key = fc->key;
 	call->reply[0] = vnode;
 
 	/* marshall the parameters */
@@ -756,8 +752,8 @@ int afs_fs_remove(struct afs_fs_cursor *fc,
 		bp = (void *) bp + padsz;
 	}
 
-	afs_use_fs_server(call, fc->server);
-	return afs_make_call(&fc->ac, call, GFP_NOFS, async);
+	afs_use_fs_server(call, fc->cbi);
+	return afs_make_call(&fc->ac, call, GFP_NOFS, false);
 }
 
 /*
@@ -797,13 +793,10 @@ static const struct afs_call_type afs_RXFSLink = {
 /*
  * make a hard link
  */
-int afs_fs_link(struct afs_fs_cursor *fc,
-		struct key *key,
-		struct afs_vnode *dvnode,
-		struct afs_vnode *vnode,
-		const char *name,
-		bool async)
+int afs_fs_link(struct afs_fs_cursor *fc, struct afs_vnode *vnode,
+		const char *name)
 {
+	struct afs_vnode *dvnode = fc->vnode;
 	struct afs_call *call;
 	struct afs_net *net = afs_v2net(vnode);
 	size_t namesz, reqsz, padsz;
@@ -819,7 +812,7 @@ int afs_fs_link(struct afs_fs_cursor *fc,
 	if (!call)
 		return -ENOMEM;
 
-	call->key = key;
+	call->key = fc->key;
 	call->reply[0] = dvnode;
 	call->reply[1] = vnode;
 
@@ -840,8 +833,8 @@ int afs_fs_link(struct afs_fs_cursor *fc,
 	*bp++ = htonl(vnode->fid.vnode);
 	*bp++ = htonl(vnode->fid.unique);
 
-	afs_use_fs_server(call, fc->server);
-	return afs_make_call(&fc->ac, call, GFP_NOFS, async);
+	afs_use_fs_server(call, fc->cbi);
+	return afs_make_call(&fc->ac, call, GFP_NOFS, false);
 }
 
 /*
@@ -883,14 +876,12 @@ static const struct afs_call_type afs_RXFSSymlink = {
  * create a symbolic link
  */
 int afs_fs_symlink(struct afs_fs_cursor *fc,
-		   struct key *key,
-		   struct afs_vnode *vnode,
 		   const char *name,
 		   const char *contents,
 		   struct afs_fid *newfid,
-		   struct afs_file_status *newstatus,
-		   bool async)
+		   struct afs_file_status *newstatus)
 {
+	struct afs_vnode *vnode = fc->vnode;
 	struct afs_call *call;
 	struct afs_net *net = afs_v2net(vnode);
 	size_t namesz, reqsz, padsz, c_namesz, c_padsz;
@@ -911,7 +902,7 @@ int afs_fs_symlink(struct afs_fs_cursor *fc,
 	if (!call)
 		return -ENOMEM;
 
-	call->key = key;
+	call->key = fc->key;
 	call->reply[0] = vnode;
 	call->reply[1] = newfid;
 	call->reply[2] = newstatus;
@@ -943,8 +934,8 @@ int afs_fs_symlink(struct afs_fs_cursor *fc,
 	*bp++ = htonl(S_IRWXUGO); /* unix mode */
 	*bp++ = 0; /* segment size */
 
-	afs_use_fs_server(call, fc->server);
-	return afs_make_call(&fc->ac, call, GFP_NOFS, async);
+	afs_use_fs_server(call, fc->cbi);
+	return afs_make_call(&fc->ac, call, GFP_NOFS, false);
 }
 
 /*
@@ -987,13 +978,11 @@ static const struct afs_call_type afs_RXFSRename = {
  * create a symbolic link
  */
 int afs_fs_rename(struct afs_fs_cursor *fc,
-		  struct key *key,
-		  struct afs_vnode *orig_dvnode,
 		  const char *orig_name,
 		  struct afs_vnode *new_dvnode,
-		  const char *new_name,
-		  bool async)
+		  const char *new_name)
 {
+	struct afs_vnode *orig_dvnode = fc->vnode;
 	struct afs_call *call;
 	struct afs_net *net = afs_v2net(orig_dvnode);
 	size_t reqsz, o_namesz, o_padsz, n_namesz, n_padsz;
@@ -1016,7 +1005,7 @@ int afs_fs_rename(struct afs_fs_cursor *fc,
 	if (!call)
 		return -ENOMEM;
 
-	call->key = key;
+	call->key = fc->key;
 	call->reply[0] = orig_dvnode;
 	call->reply[1] = new_dvnode;
 
@@ -1045,8 +1034,8 @@ int afs_fs_rename(struct afs_fs_cursor *fc,
 		bp = (void *) bp + n_padsz;
 	}
 
-	afs_use_fs_server(call, fc->server);
-	return afs_make_call(&fc->ac, call, GFP_NOFS, async);
+	afs_use_fs_server(call, fc->cbi);
+	return afs_make_call(&fc->ac, call, GFP_NOFS, false);
 }
 
 /*
@@ -1098,8 +1087,7 @@ static int afs_fs_store_data64(struct afs_fs_cursor *fc,
 			       struct afs_writeback *wb,
 			       pgoff_t first, pgoff_t last,
 			       unsigned offset, unsigned to,
-			       loff_t size, loff_t pos, loff_t i_size,
-			       bool async)
+			       loff_t size, loff_t pos, loff_t i_size)
 {
 	struct afs_vnode *vnode = wb->vnode;
 	struct afs_call *call;
@@ -1147,8 +1135,7 @@ static int afs_fs_store_data64(struct afs_fs_cursor *fc,
 	*bp++ = htonl(i_size >> 32);
 	*bp++ = htonl((u32) i_size);
 
-	afs_use_fs_server(call, fc->server);
-	return afs_make_call(&fc->ac, call, GFP_NOFS, async);
+	return afs_make_call(&fc->ac, call, GFP_NOFS, false);
 }
 
 /*
@@ -1156,8 +1143,7 @@ static int afs_fs_store_data64(struct afs_fs_cursor *fc,
  */
 int afs_fs_store_data(struct afs_fs_cursor *fc, struct afs_writeback *wb,
 		      pgoff_t first, pgoff_t last,
-		      unsigned offset, unsigned to,
-		      bool async)
+		      unsigned offset, unsigned to)
 {
 	struct afs_vnode *vnode = wb->vnode;
 	struct afs_call *call;
@@ -1184,7 +1170,7 @@ int afs_fs_store_data(struct afs_fs_cursor *fc, struct afs_writeback *wb,
 
 	if (pos >> 32 || i_size >> 32 || size >> 32 || (pos + size) >> 32)
 		return afs_fs_store_data64(fc, wb, first, last, offset, to,
-					   size, pos, i_size, async);
+					   size, pos, i_size);
 
 	call = afs_alloc_flat_call(net, &afs_RXFSStoreData,
 				   (4 + 6 + 3) * 4,
@@ -1221,8 +1207,8 @@ int afs_fs_store_data(struct afs_fs_cursor *fc, struct afs_writeback *wb,
 	*bp++ = htonl(size);
 	*bp++ = htonl(i_size);
 
-	afs_use_fs_server(call, fc->server);
-	return afs_make_call(&fc->ac, call, GFP_NOFS, async);
+	afs_use_fs_server(call, fc->cbi);
+	return afs_make_call(&fc->ac, call, GFP_NOFS, false);
 }
 
 /*
@@ -1279,16 +1265,15 @@ static const struct afs_call_type afs_RXFSStoreData64_as_Status = {
  * set the attributes on a very large file, using FS.StoreData rather than
  * FS.StoreStatus so as to alter the file size also
  */
-static int afs_fs_setattr_size64(struct afs_fs_cursor *fc, struct key *key,
-				 struct afs_vnode *vnode, struct iattr *attr,
-				 bool async)
+static int afs_fs_setattr_size64(struct afs_fs_cursor *fc, struct iattr *attr)
 {
+	struct afs_vnode *vnode = fc->vnode;
 	struct afs_call *call;
 	struct afs_net *net = afs_v2net(vnode);
 	__be32 *bp;
 
 	_enter(",%x,{%x:%u},,",
-	       key_serial(key), vnode->fid.vid, vnode->fid.vnode);
+	       key_serial(fc->key), vnode->fid.vid, vnode->fid.vnode);
 
 	ASSERT(attr->ia_valid & ATTR_SIZE);
 
@@ -1298,7 +1283,7 @@ static int afs_fs_setattr_size64(struct afs_fs_cursor *fc, struct key *key,
 	if (!call)
 		return -ENOMEM;
 
-	call->key = key;
+	call->key = fc->key;
 	call->reply[0] = vnode;
 	call->store_version = vnode->status.data_version + 1;
 	call->operation_ID = FSSTOREDATA;
@@ -1319,28 +1304,27 @@ static int afs_fs_setattr_size64(struct afs_fs_cursor *fc, struct key *key,
 	*bp++ = htonl(attr->ia_size >> 32);	/* new file length */
 	*bp++ = htonl((u32) attr->ia_size);
 
-	afs_use_fs_server(call, fc->server);
-	return afs_make_call(&fc->ac, call, GFP_NOFS, async);
+	afs_use_fs_server(call, fc->cbi);
+	return afs_make_call(&fc->ac, call, GFP_NOFS, false);
 }
 
 /*
  * set the attributes on a file, using FS.StoreData rather than FS.StoreStatus
  * so as to alter the file size also
  */
-static int afs_fs_setattr_size(struct afs_fs_cursor *fc, struct key *key,
-			       struct afs_vnode *vnode, struct iattr *attr,
-			       bool async)
+static int afs_fs_setattr_size(struct afs_fs_cursor *fc, struct iattr *attr)
 {
+	struct afs_vnode *vnode = fc->vnode;
 	struct afs_call *call;
 	struct afs_net *net = afs_v2net(vnode);
 	__be32 *bp;
 
 	_enter(",%x,{%x:%u},,",
-	       key_serial(key), vnode->fid.vid, vnode->fid.vnode);
+	       key_serial(fc->key), vnode->fid.vid, vnode->fid.vnode);
 
 	ASSERT(attr->ia_valid & ATTR_SIZE);
 	if (attr->ia_size >> 32)
-		return afs_fs_setattr_size64(fc, key, vnode, attr, async);
+		return afs_fs_setattr_size64(fc, attr);
 
 	call = afs_alloc_flat_call(net, &afs_RXFSStoreData_as_Status,
 				   (4 + 6 + 3) * 4,
@@ -1348,7 +1332,7 @@ static int afs_fs_setattr_size(struct afs_fs_cursor *fc, struct key *key,
 	if (!call)
 		return -ENOMEM;
 
-	call->key = key;
+	call->key = fc->key;
 	call->reply[0] = vnode;
 	call->store_version = vnode->status.data_version + 1;
 	call->operation_ID = FSSTOREDATA;
@@ -1366,27 +1350,26 @@ static int afs_fs_setattr_size(struct afs_fs_cursor *fc, struct key *key,
 	*bp++ = 0;				/* size of write */
 	*bp++ = htonl(attr->ia_size);		/* new file length */
 
-	afs_use_fs_server(call, fc->server);
-	return afs_make_call(&fc->ac, call, GFP_NOFS, async);
+	afs_use_fs_server(call, fc->cbi);
+	return afs_make_call(&fc->ac, call, GFP_NOFS, false);
 }
 
 /*
  * set the attributes on a file, using FS.StoreData if there's a change in file
  * size, and FS.StoreStatus otherwise
  */
-int afs_fs_setattr(struct afs_fs_cursor *fc, struct key *key,
-		   struct afs_vnode *vnode, struct iattr *attr,
-		   bool async)
+int afs_fs_setattr(struct afs_fs_cursor *fc, struct iattr *attr)
 {
+	struct afs_vnode *vnode = fc->vnode;
 	struct afs_call *call;
 	struct afs_net *net = afs_v2net(vnode);
 	__be32 *bp;
 
 	if (attr->ia_valid & ATTR_SIZE)
-		return afs_fs_setattr_size(fc, key, vnode, attr, async);
+		return afs_fs_setattr_size(fc, attr);
 
 	_enter(",%x,{%x:%u},,",
-	       key_serial(key), vnode->fid.vid, vnode->fid.vnode);
+	       key_serial(fc->key), vnode->fid.vid, vnode->fid.vnode);
 
 	call = afs_alloc_flat_call(net, &afs_RXFSStoreStatus,
 				   (4 + 6) * 4,
@@ -1394,7 +1377,7 @@ int afs_fs_setattr(struct afs_fs_cursor *fc, struct key *key,
 	if (!call)
 		return -ENOMEM;
 
-	call->key = key;
+	call->key = fc->key;
 	call->reply[0] = vnode;
 	call->operation_ID = FSSTORESTATUS;
 
@@ -1407,8 +1390,8 @@ int afs_fs_setattr(struct afs_fs_cursor *fc, struct key *key,
 
 	xdr_encode_AFS_StoreStatus(&bp, attr);
 
-	afs_use_fs_server(call, fc->server);
-	return afs_make_call(&fc->ac, call, GFP_NOFS, async);
+	afs_use_fs_server(call, fc->cbi);
+	return afs_make_call(&fc->ac, call, GFP_NOFS, false);
 }
 
 /*
@@ -1606,11 +1589,9 @@ static const struct afs_call_type afs_RXFSGetVolumeStatus = {
  * fetch the status of a volume
  */
 int afs_fs_get_volume_status(struct afs_fs_cursor *fc,
-			     struct key *key,
-			     struct afs_vnode *vnode,
-			     struct afs_volume_status *vs,
-			     bool async)
+			     struct afs_volume_status *vs)
 {
+	struct afs_vnode *vnode = fc->vnode;
 	struct afs_call *call;
 	struct afs_net *net = afs_v2net(vnode);
 	__be32 *bp;
@@ -1628,7 +1609,7 @@ int afs_fs_get_volume_status(struct afs_fs_cursor *fc,
 		return -ENOMEM;
 	}
 
-	call->key = key;
+	call->key = fc->key;
 	call->reply[0] = vnode;
 	call->reply[1] = vs;
 	call->reply[2] = tmpbuf;
@@ -1638,8 +1619,8 @@ int afs_fs_get_volume_status(struct afs_fs_cursor *fc,
 	bp[0] = htonl(FSGETVOLUMESTATUS);
 	bp[1] = htonl(vnode->fid.vid);
 
-	afs_use_fs_server(call, fc->server);
-	return afs_make_call(&fc->ac, call, GFP_NOFS, async);
+	afs_use_fs_server(call, fc->cbi);
+	return afs_make_call(&fc->ac, call, GFP_NOFS, false);
 }
 
 /*
@@ -1692,14 +1673,11 @@ static const struct afs_call_type afs_RXFSReleaseLock = {
 };
 
 /*
- * get a lock on a file
+ * Set a lock on a file
  */
-int afs_fs_set_lock(struct afs_fs_cursor *fc,
-		    struct key *key,
-		    struct afs_vnode *vnode,
-		    afs_lock_type_t type,
-		    bool async)
+int afs_fs_set_lock(struct afs_fs_cursor *fc, afs_lock_type_t type)
 {
+	struct afs_vnode *vnode = fc->vnode;
 	struct afs_call *call;
 	struct afs_net *net = afs_v2net(vnode);
 	__be32 *bp;
@@ -1710,7 +1688,7 @@ int afs_fs_set_lock(struct afs_fs_cursor *fc,
 	if (!call)
 		return -ENOMEM;
 
-	call->key = key;
+	call->key = fc->key;
 	call->reply[0] = vnode;
 
 	/* marshall the parameters */
@@ -1721,18 +1699,16 @@ int afs_fs_set_lock(struct afs_fs_cursor *fc,
 	*bp++ = htonl(vnode->fid.unique);
 	*bp++ = htonl(type);
 
-	afs_use_fs_server(call, fc->server);
-	return afs_make_call(&fc->ac, call, GFP_NOFS, async);
+	afs_use_fs_server(call, fc->cbi);
+	return afs_make_call(&fc->ac, call, GFP_NOFS, false);
 }
 
 /*
  * extend a lock on a file
  */
-int afs_fs_extend_lock(struct afs_fs_cursor *fc,
-		       struct key *key,
-		       struct afs_vnode *vnode,
-		       bool async)
+int afs_fs_extend_lock(struct afs_fs_cursor *fc)
 {
+	struct afs_vnode *vnode = fc->vnode;
 	struct afs_call *call;
 	struct afs_net *net = afs_v2net(vnode);
 	__be32 *bp;
@@ -1743,7 +1719,7 @@ int afs_fs_extend_lock(struct afs_fs_cursor *fc,
 	if (!call)
 		return -ENOMEM;
 
-	call->key = key;
+	call->key = fc->key;
 	call->reply[0] = vnode;
 
 	/* marshall the parameters */
@@ -1753,18 +1729,16 @@ int afs_fs_extend_lock(struct afs_fs_cursor *fc,
 	*bp++ = htonl(vnode->fid.vnode);
 	*bp++ = htonl(vnode->fid.unique);
 
-	afs_use_fs_server(call, fc->server);
-	return afs_make_call(&fc->ac, call, GFP_NOFS, async);
+	afs_use_fs_server(call, fc->cbi);
+	return afs_make_call(&fc->ac, call, GFP_NOFS, false);
 }
 
 /*
  * release a lock on a file
  */
-int afs_fs_release_lock(struct afs_fs_cursor *fc,
-			struct key *key,
-			struct afs_vnode *vnode,
-			bool async)
+int afs_fs_release_lock(struct afs_fs_cursor *fc)
 {
+	struct afs_vnode *vnode = fc->vnode;
 	struct afs_call *call;
 	struct afs_net *net = afs_v2net(vnode);
 	__be32 *bp;
@@ -1775,7 +1749,7 @@ int afs_fs_release_lock(struct afs_fs_cursor *fc,
 	if (!call)
 		return -ENOMEM;
 
-	call->key = key;
+	call->key = fc->key;
 	call->reply[0] = vnode;
 
 	/* marshall the parameters */
@@ -1785,8 +1759,8 @@ int afs_fs_release_lock(struct afs_fs_cursor *fc,
 	*bp++ = htonl(vnode->fid.vnode);
 	*bp++ = htonl(vnode->fid.unique);
 
-	afs_use_fs_server(call, fc->server);
-	return afs_make_call(&fc->ac, call, GFP_NOFS, async);
+	afs_use_fs_server(call, fc->cbi);
+	return afs_make_call(&fc->ac, call, GFP_NOFS, false);
 }
 
 /*
@@ -1809,17 +1783,17 @@ static const struct afs_call_type afs_RXFSGiveUpAllCallBacks = {
 /*
  * Flush all the callbacks we have on a server.
  */
-int afs_fs_give_up_all_callbacks(struct afs_server *server,
+int afs_fs_give_up_all_callbacks(struct afs_net *net,
+				 struct afs_server *server,
 				 struct afs_addr_cursor *ac,
-				 struct key *key,
-				 bool async)
+				 struct key *key)
 {
 	struct afs_call *call;
 	__be32 *bp;
 
 	_enter("");
 
-	call = afs_alloc_flat_call(server->net, &afs_RXFSGiveUpAllCallBacks, 2 * 4, 0);
+	call = afs_alloc_flat_call(net, &afs_RXFSGiveUpAllCallBacks, 1 * 4, 0);
 	if (!call)
 		return -ENOMEM;
 
@@ -1830,5 +1804,96 @@ int afs_fs_give_up_all_callbacks(struct afs_server *server,
 	*bp++ = htonl(FSGIVEUPALLCALLBACKS);
 
 	/* Can't take a ref on server */
-	return afs_make_call(ac, call, GFP_NOFS, async);
+	return afs_make_call(ac, call, GFP_NOFS, false);
+}
+
+/*
+ * Deliver reply data to an FS.GetCapabilities operation.
+ */
+static int afs_deliver_fs_get_capabilities(struct afs_call *call)
+{
+	u32 count;
+	int ret;
+
+	_enter("{%u,%zu/%u}", call->unmarshall, call->offset, call->count);
+
+again:
+	switch (call->unmarshall) {
+	case 0:
+		call->offset = 0;
+		call->unmarshall++;
+
+		/* Extract the capabilities word count */
+	case 1:
+		ret = afs_extract_data(call, &call->tmp,
+				       1 * sizeof(__be32),
+				       true);
+		if (ret < 0)
+			return ret;
+
+		count = ntohl(call->tmp);
+
+		call->count = count;
+		call->count2 = count;
+		call->offset = 0;
+		call->unmarshall++;
+
+		/* Extract capabilities words */
+	case 2:
+		count = min(call->count, 16U);
+		ret = afs_extract_data(call, call->buffer,
+				       count * sizeof(__be32),
+				       call->count > 16);
+		if (ret < 0)
+			return ret;
+
+		/* TODO: Examine capabilities */
+
+		call->count -= count;
+		if (call->count > 0)
+			goto again;
+		call->offset = 0;
+		call->unmarshall++;
+		break;
+	}
+
+	_leave(" = 0 [done]");
+	return 0;
+}
+
+/*
+ * FS.GetCapabilities operation type
+ */
+static const struct afs_call_type afs_RXFSGetCapabilities = {
+	.name		= "FS.GetCapabilities",
+	.deliver	= afs_deliver_fs_get_capabilities,
+	.destructor	= afs_flat_call_destructor,
+};
+
+/*
+ * Probe a fileserver for the capabilities that it supports.  This can
+ * return up to 196 words.
+ */
+int afs_fs_get_capabilities(struct afs_net *net,
+			    struct afs_server *server,
+			    struct afs_addr_cursor *ac,
+			    struct key *key)
+{
+	struct afs_call *call;
+	__be32 *bp;
+
+	_enter("");
+
+	call = afs_alloc_flat_call(net, &afs_RXFSGetCapabilities, 1 * 4, 16 * 4);
+	if (!call)
+		return -ENOMEM;
+
+	call->key = key;
+
+	/* marshall the parameters */
+	bp = call->request;
+	*bp++ = htonl(FSGETCAPABILITIES);
+
+	/* Can't take a ref on server */
+	return afs_make_call(ac, call, GFP_NOFS, false);
 }
diff --git a/fs/afs/inode.c b/fs/afs/inode.c
index ee86d5ad22d1..5a2f5854f349 100644
--- a/fs/afs/inode.c
+++ b/fs/afs/inode.c
@@ -101,6 +101,35 @@ static int afs_inode_map_status(struct afs_vnode *vnode, struct key *key)
 	return 0;
 }
 
+/*
+ * Fetch file status from the volume.
+ */
+int afs_fetch_status(struct afs_vnode *vnode, struct key *key)
+{
+	struct afs_fs_cursor fc;
+	int ret;
+
+	_enter("%s,{%x:%u.%u,S=%lx}",
+	       vnode->volume->name,
+	       vnode->fid.vid, vnode->fid.vnode, vnode->fid.unique,
+	       vnode->flags);
+
+	ret = -ERESTARTSYS;
+	if (afs_begin_vnode_operation(&fc, vnode, key)) {
+		while (afs_select_fileserver(&fc)) {
+			fc.cb_break = vnode->cb_break + vnode->cb_s_break;
+			afs_fs_fetch_file_status(&fc, NULL);
+		}
+
+		afs_check_for_remote_deletion(&fc, fc.vnode);
+		afs_vnode_commit_status(&fc, vnode, fc.cb_break);
+		ret = afs_end_vnode_operation(&fc);
+	}
+
+	_leave(" = %d", ret);
+	return ret;
+}
+
 /*
  * iget5() comparator
  */
@@ -205,7 +234,7 @@ struct inode *afs_iget_autocell(struct inode *dir, const char *dev_name,
  */
 struct inode *afs_iget(struct super_block *sb, struct key *key,
 		       struct afs_fid *fid, struct afs_file_status *status,
-		       struct afs_callback *cb)
+		       struct afs_callback *cb, struct afs_cb_interest *cbi)
 {
 	struct afs_iget_data data = { .fid = *fid };
 	struct afs_super_info *as;
@@ -238,7 +267,7 @@ struct inode *afs_iget(struct super_block *sb, struct key *key,
 
 	if (!status) {
 		/* it's a remotely extant inode */
-		ret = afs_vnode_fetch_status(vnode, key, true);
+		ret = afs_fetch_status(vnode, key);
 		if (ret < 0)
 			goto bad_inode;
 	} else {
@@ -255,6 +284,7 @@ struct inode *afs_iget(struct super_block *sb, struct key *key,
 			vnode->cb_version = cb->version;
 			vnode->cb_type = cb->type;
 			vnode->cb_expires_at = cb->expiry;
+			vnode->cb_interest = afs_get_cb_interest(cbi);
 			set_bit(AFS_VNODE_CB_PROMISED, &vnode->flags);
 		}
 
@@ -358,7 +388,7 @@ int afs_validate(struct afs_vnode *vnode, struct key *key)
 	 * access */
 	if (!test_bit(AFS_VNODE_CB_PROMISED, &vnode->flags)) {
 		_debug("not promised");
-		ret = afs_vnode_fetch_status(vnode, key, false);
+		ret = afs_fetch_status(vnode, key);
 		if (ret < 0) {
 			if (ret == -ENOENT) {
 				set_bit(AFS_VNODE_DELETED, &vnode->flags);
@@ -468,6 +498,7 @@ void afs_evict_inode(struct inode *inode)
  */
 int afs_setattr(struct dentry *dentry, struct iattr *attr)
 {
+	struct afs_fs_cursor fc;
 	struct afs_vnode *vnode = AFS_FS_I(d_inode(dentry));
 	struct key *key;
 	int ret;
@@ -498,7 +529,18 @@ int afs_setattr(struct dentry *dentry, struct iattr *attr)
 		}
 	}
 
-	ret = afs_vnode_setattr(vnode, key, attr);
+	ret = -ERESTARTSYS;
+	if (afs_begin_vnode_operation(&fc, vnode, key)) {
+		while (afs_select_fileserver(&fc)) {
+			fc.cb_break = vnode->cb_break + vnode->cb_s_break;
+			afs_fs_setattr(&fc, attr);
+		}
+
+		afs_check_for_remote_deletion(&fc, fc.vnode);
+		afs_vnode_commit_status(&fc, vnode, fc.cb_break);
+		ret = afs_end_vnode_operation(&fc);
+	}
+
 	if (!(attr->ia_valid & ATTR_FILE))
 		key_put(key);
 
diff --git a/fs/afs/internal.h b/fs/afs/internal.h
index df52bf18a263..1fadf40551fd 100644
--- a/fs/afs/internal.h
+++ b/fs/afs/internal.h
@@ -32,16 +32,6 @@
 struct pagevec;
 struct afs_call;
 
-typedef enum {
-	AFS_VL_NEW,			/* new, uninitialised record */
-	AFS_VL_CREATING,		/* creating record */
-	AFS_VL_VALID,			/* record is pending */
-	AFS_VL_NO_VOLUME,		/* no such volume available */
-	AFS_VL_UPDATING,		/* update in progress */
-	AFS_VL_VOLUME_DELETED,		/* volume was deleted */
-	AFS_VL_UNCERTAIN,		/* uncertain state (update failed) */
-} __attribute__((packed)) afs_vlocation_state_t;
-
 struct afs_mount_params {
 	bool			rwpath;		/* T if the parent should be considered R/W */
 	bool			force;		/* T to force cell type */
@@ -76,8 +66,10 @@ enum afs_call_state {
 struct afs_addr_list {
 	struct rcu_head		rcu;		/* Must be first */
 	refcount_t		usage;
+	u32			version;	/* Version */
 	unsigned short		nr_addrs;
 	unsigned short		index;		/* Address currently in use */
+	unsigned short		nr_ipv4;	/* Number of IPv4 addresses */
 	struct sockaddr_rxrpc	addrs[];
 };
 
@@ -93,7 +85,7 @@ struct afs_call {
 	struct key		*key;		/* security for this call */
 	struct afs_net		*net;		/* The network namespace */
 	struct afs_server	*cm_server;	/* Server affected by incoming CM call */
-	struct afs_server	*server;	/* Server used by client call */
+	struct afs_cb_interest	*cbi;		/* Callback interest for server used */
 	void			*request;	/* request data (first part) */
 	struct address_space	*mapping;	/* page set */
 	struct afs_writeback	*wb;		/* writeback being performed */
@@ -192,7 +184,6 @@ struct afs_super_info {
 	struct afs_net		*net;		/* Network namespace */
 	struct afs_cell		*cell;		/* The cell in which the volume resides */
 	struct afs_volume	*volume;	/* volume record */
-	char			rwparent;	/* T if parent is R/W AFS volume */
 };
 
 static inline struct afs_super_info *AFS_FS_S(struct super_block *sb)
@@ -228,26 +219,26 @@ struct afs_net {
 	spinlock_t		proc_cells_lock;
 	struct list_head	proc_cells;
 
-	/* Volume location database */
-	struct list_head	vl_updates;		/* VL records in need-update order */
-	struct list_head	vl_graveyard;		/* Inactive VL records */
-	struct delayed_work	vl_reaper;
-	struct delayed_work	vl_updater;
-	spinlock_t		vl_updates_lock;
-	spinlock_t		vl_graveyard_lock;
+	/* Known servers.  Theoretically each fileserver can only be in one
+	 * cell, but in practice, people create aliases and subsets and there's
+	 * no easy way to distinguish them.
+	 */
+	seqlock_t		fs_lock;	/* For fs_servers */
+	struct rb_root		fs_servers;	/* afs_server (by server UUID or address) */
+	struct list_head	fs_updates;	/* afs_server (by update_at) */
+	struct hlist_head	fs_proc;	/* procfs servers list */
 
-	/* File locking renewal management */
-	struct mutex		lock_manager_mutex;
+	struct hlist_head	fs_addresses4;	/* afs_server (by lowest IPv4 addr) */
+	struct hlist_head	fs_addresses6;	/* afs_server (by lowest IPv6 addr) */
+	seqlock_t		fs_addr_lock;	/* For fs_addresses[46] */
 
-	/* Server database */
-	struct rb_root		servers;		/* Active servers */
-	rwlock_t		servers_lock;
-	struct list_head	server_graveyard;	/* Inactive server LRU list */
-	spinlock_t		server_graveyard_lock;
-	struct timer_list	server_timer;
-	struct work_struct	server_reaper;
+	struct work_struct	fs_manager;
+	struct timer_list	fs_timer;
 	atomic_t		servers_outstanding;
 
+	/* File locking renewal management */
+	struct mutex		lock_manager_mutex;
+
 	/* Misc */
 	struct proc_dir_entry	*proc_afs;		/* /proc/net/afs directory */
 };
@@ -264,7 +255,21 @@ enum afs_cell_state {
 };
 
 /*
- * AFS cell record
+ * AFS cell record.
+ *
+ * This is a tricky concept to get right as it is possible to create aliases
+ * simply by pointing AFSDB/SRV records for two names at the same set of VL
+ * servers; it is also possible to do things like setting up two sets of VL
+ * servers, one of which provides a superset of the volumes provided by the
+ * other (for internal/external division, for example).
+ *
+ * Cells only exist in the sense that (a) a cell's name maps to a set of VL
+ * servers and (b) a cell's name is used by the client to select the key to use
+ * for authentication and encryption.  The cell name is not typically used in
+ * the protocol.
+ *
+ * There is no easy way to determine if two cells are aliases or one is a
+ * subset of another.
  */
 struct afs_cell {
 	union {
@@ -278,14 +283,6 @@ struct afs_cell {
 #ifdef CONFIG_AFS_FSCACHE
 	struct fscache_cookie	*cache;		/* caching cookie */
 #endif
-
-	/* server record management */
-	rwlock_t		servers_lock;	/* active server list lock */
-	struct list_head	servers;	/* active server list */
-
-	/* volume location record management */
-	struct rw_semaphore	vl_sem;		/* volume management serialisation semaphore */
-	struct list_head	vl_list;	/* cell's active VL record list */
 	time64_t		dns_expiry;	/* Time AFSDB/SRV record expires */
 	time64_t		last_inactive;	/* Time of last drop of usage count */
 	atomic_t		usage;
@@ -298,9 +295,11 @@ struct afs_cell {
 	enum afs_cell_state	state;
 	short			error;
 
-	spinlock_t		vl_lock;	/* vl_list lock */
+	/* Active fileserver interaction state. */
+	struct list_head	proc_volumes;	/* procfs volume list */
+	rwlock_t		proc_lock;
 
-	/* VLDB server list. */
+	/* VL server list. */
 	rwlock_t		vl_addrs_lock;	/* Lock on vl_addrs */
 	struct afs_addr_list	__rcu *vl_addrs; /* List of VL servers */
 	u8			name_len;	/* Length of name */
@@ -308,65 +307,62 @@ struct afs_cell {
 };
 
 /*
- * entry in the cached volume location catalogue
+ * Cached VLDB entry.
+ *
+ * This is pointed to by cell->vldb_entries, indexed by name.
  */
-struct afs_cache_vlocation {
-	/* volume name (lowercase, padded with NULs) */
-	uint8_t			name[AFS_MAXVOLNAME + 1];
+struct afs_vldb_entry {
+	afs_volid_t		vid[3];		/* Volume IDs for R/W, R/O and Bak volumes */
 
-	uint8_t			nservers;	/* number of entries used in servers[] */
-	uint8_t			vidmask;	/* voltype mask for vid[] */
-	uint8_t			srvtmask[8];	/* voltype masks for servers[] */
+	unsigned long		flags;
+#define AFS_VLDB_HAS_RW		0		/* - R/W volume exists */
+#define AFS_VLDB_HAS_RO		1		/* - R/O volume exists */
+#define AFS_VLDB_HAS_BAK	2		/* - Backup volume exists */
+#define AFS_VLDB_QUERY_VALID	3		/* - Record is valid */
+#define AFS_VLDB_QUERY_ERROR	4		/* - VL server returned error */
+
+	uuid_t			fs_server[AFS_NMAXNSERVERS];
+	u8			fs_mask[AFS_NMAXNSERVERS];
 #define AFS_VOL_VTM_RW	0x01 /* R/W version of the volume is available (on this server) */
 #define AFS_VOL_VTM_RO	0x02 /* R/O version of the volume is available (on this server) */
 #define AFS_VOL_VTM_BAK	0x04 /* backup version of the volume is available (on this server) */
-
-	afs_volid_t		vid[3];		/* volume IDs for R/W, R/O and Bak volumes */
-	struct sockaddr_rxrpc	servers[8];	/* fileserver addresses */
-	time_t			rtime;		/* last retrieval time */
-};
-
-/*
- * AFS volume location record
- */
-struct afs_vlocation {
-	atomic_t		usage;
-	time64_t		time_of_death;	/* time at which put reduced usage to 0 */
-	struct list_head	link;		/* link in cell volume location list */
-	struct list_head	grave;		/* link in master graveyard list */
-	struct list_head	update;		/* link in master update list */
-	struct afs_cell		*cell;		/* cell to which volume belongs */
-	struct afs_cache_vlocation vldb;	/* volume information DB record */
-	struct afs_volume	*vols[3];	/* volume access record pointer (index by type) */
-	wait_queue_head_t	waitq;		/* status change waitqueue */
-	time64_t		update_at;	/* time at which record should be updated */
-	spinlock_t		lock;		/* access lock */
-	afs_vlocation_state_t	state;		/* volume location state */
-	unsigned short		upd_rej_cnt;	/* ENOMEDIUM count during update */
-	unsigned short		upd_busy_cnt;	/* EBUSY count during update */
-	bool			valid;		/* T if valid */
+	short			error;
+	u8			nr_servers;	/* Number of server records */
+	u8			name_len;
+	u8			name[AFS_MAXVOLNAME + 1]; /* NUL-padded volume name */
 };
 
 /*
- * AFS fileserver record
+ * Record of fileserver with which we're actively communicating.
  */
 struct afs_server {
-	atomic_t		usage;
-	time64_t		time_of_death;	/* time at which put reduced usage to 0 */
-	struct afs_addr_list	__rcu *addrs;	/* List of addresses for this server */
-	struct afs_net		*net;		/* Network namespace in which the server resides */
-	struct afs_cell		*cell;		/* cell in which server resides */
-	struct list_head	link;		/* link in cell's server list */
-	struct list_head	grave;		/* link in master graveyard list */
-
-	struct rb_node		master_rb;	/* link in master by-addr tree */
-	struct rw_semaphore	sem;		/* access lock */
+	struct rcu_head		rcu;
+	union {
+		uuid_t		uuid;		/* Server ID */
+		struct afs_uuid	_uuid;
+	};
+
+	struct afs_addr_list	__rcu *addresses;
+	struct rb_node		uuid_rb;	/* Link in net->servers */
+	struct hlist_node	addr4_link;	/* Link in net->fs_addresses4 */
+	struct hlist_node	addr6_link;	/* Link in net->fs_addresses6 */
+	struct hlist_node	proc_link;	/* Link in net->fs_proc */
+	struct afs_server	*gc_next;	/* Next server in manager's list */
+	time64_t		put_time;	/* Time at which last put */
+	time64_t		update_at;	/* Time at which to next update the record */
 	unsigned long		flags;
-#define AFS_SERVER_NEW		0		/* New server, don't inc cb_s_break */
+#define AFS_SERVER_FL_NEW	0		/* New server, don't inc cb_s_break */
+#define AFS_SERVER_FL_NOT_READY	1		/* The record is not ready for use */
+#define AFS_SERVER_FL_NOT_FOUND	2		/* VL server says no such server */
+#define AFS_SERVER_FL_VL_FAIL	3		/* Failed to access VL server */
+#define AFS_SERVER_FL_UPDATING	4
+#define AFS_SERVER_FL_PROBED	5		/* The fileserver has been probed */
+#define AFS_SERVER_FL_PROBING	6		/* Fileserver is being probed */
+	atomic_t		usage;
+	u32			addr_version;	/* Address list version */
 
 	/* file service access */
-	int			fs_state;      	/* 0 or reason FS currently marked dead (-errno) */
-	spinlock_t		fs_lock;	/* access lock */
+	rwlock_t		fs_lock;	/* access lock */
 
 	/* callback promise management */
 	struct list_head	cb_interests;	/* List of superblocks using this server */
@@ -386,32 +382,50 @@ struct afs_cb_interest {
 };
 
 /*
- * AFS volume access record
+ * Replaceable server list.
+ */
+struct afs_server_entry {
+	struct afs_server	*server;
+	struct afs_cb_interest	*cb_interest;
+};
+
+struct afs_server_list {
+	refcount_t		usage;
+	unsigned short		nr_servers;
+	unsigned short		index;		/* Server currently in use */
+	unsigned short		vnovol_mask;	/* Servers to be skipped due to VNOVOL */
+	unsigned int		seq;		/* Set to ->servers_seq when installed */
+	struct afs_server_entry	servers[];
+};
+
+/*
+ * Live AFS volume management.
  */
 struct afs_volume {
+	afs_volid_t		vid;		/* volume ID */
 	atomic_t		usage;
-	struct afs_cell		*cell;		/* cell to which belongs (unrefd ptr) */
-	struct afs_vlocation	*vlocation;	/* volume location */
+	time64_t		update_at;	/* Time at which to next update */
+	struct afs_cell		*cell;		/* Cell to which belongs (pins ref) */
+	struct list_head	proc_link;	/* Link in cell->vl_proc */
+	unsigned long		flags;
+#define AFS_VOLUME_NEEDS_UPDATE	0	/* - T if an update needs performing */
+#define AFS_VOLUME_UPDATING	1	/* - T if an update is in progress */
+#define AFS_VOLUME_WAIT		2	/* - T if users must wait for update */
+#define AFS_VOLUME_DELETED	3	/* - T if volume appears deleted */
+#define AFS_VOLUME_OFFLINE	4	/* - T if volume offline notice given */
+#define AFS_VOLUME_BUSY		5	/* - T if volume busy notice given */
 #ifdef CONFIG_AFS_FSCACHE
 	struct fscache_cookie	*cache;		/* caching cookie */
 #endif
-	afs_volid_t		vid;		/* volume ID */
+	struct afs_server_list	*servers;	/* List of servers on which volume resides */
+	rwlock_t		servers_lock;	/* Lock for ->servers */
+	unsigned int		servers_seq;	/* Incremented each time ->servers changes */
+
 	afs_voltype_t		type;		/* type of volume */
+	short			error;
 	char			type_force;	/* force volume type (suppress R/O -> R/W) */
-	unsigned short		nservers;	/* number of server slots filled */
-	unsigned short		rjservers;	/* number of servers discarded due to -ENOMEDIUM */
-	struct afs_server	*servers[8];	/* servers on which volume resides (ordered) */
-	struct afs_cb_interest	*cb_interests[8]; /* Interests on servers for callbacks */
-	struct rw_semaphore	server_sem;	/* lock for accessing current server */
-};
-
-/*
- * vnode catalogue entry
- */
-struct afs_cache_vnode {
-	afs_vnodeid_t		vnode_id;	/* vnode ID */
-	unsigned		vnode_unique;	/* vnode ID uniquifier */
-	afs_dataversion_t	data_version;	/* data version */
+	u8			name_len;
+	u8			name[AFS_MAXVOLNAME + 1]; /* NUL-padded volume name */
 };
 
 /*
@@ -427,10 +441,8 @@ struct afs_vnode {
 	struct fscache_cookie	*cache;		/* caching cookie */
 #endif
 	struct afs_permits	*permit_cache;	/* cache of permits so far obtained */
+	struct mutex		io_lock;	/* Lock for serialising I/O on this mutex */
 	struct mutex		validate_lock;	/* lock for validating this vnode */
-	wait_queue_head_t	update_waitq;	/* status fetch waitqueue */
-	int			update_cnt;	/* number of outstanding ops that will update the
-						 * status */
 	spinlock_t		writeback_lock;	/* lock for writebacks */
 	spinlock_t		lock;		/* waitqueue/flags lock */
 	unsigned long		flags;
@@ -501,6 +513,7 @@ struct afs_interface {
 struct afs_addr_cursor {
 	struct afs_addr_list	*alist;		/* Current address list (pins ref) */
 	struct sockaddr_rxrpc	*addr;
+	u32			abort_code;
 	unsigned short		start;		/* Starting point in alist->addrs[] */
 	unsigned short		index;		/* Wrapping offset from start to current addr */
 	short			error;
@@ -513,7 +526,21 @@ struct afs_addr_cursor {
  */
 struct afs_fs_cursor {
 	struct afs_addr_cursor	ac;
-	struct afs_server	*server;	/* Current server (pins ref) */
+	struct afs_vnode	*vnode;
+	struct afs_server_list	*server_list;	/* Current server list (pins ref) */
+	struct afs_cb_interest	*cbi;		/* Server on which this resides (pins ref) */
+	struct key		*key;		/* Key for the server */
+	unsigned int		cb_break;	/* cb_break + cb_s_break before the call */
+	unsigned int		cb_break_2;	/* cb_break + cb_s_break (2nd vnode) */
+	unsigned char		start;		/* Initial index in server list */
+	unsigned char		index;		/* Number of servers tried beyond start */
+	unsigned short		flags;
+#define AFS_FS_CURSOR_STOP	0x0001		/* Set to cease iteration */
+#define AFS_FS_CURSOR_VBUSY	0x0002		/* Set if seen VBUSY */
+#define AFS_FS_CURSOR_VMOVED	0x0004		/* Set if seen VMOVED */
+#define AFS_FS_CURSOR_VNOVOL	0x0008		/* Set if seen VNOVOL */
+#define AFS_FS_CURSOR_CUR_ONLY	0x0010		/* Set if current server only (file lock held) */
+#define AFS_FS_CURSOR_NO_VSLEEP	0x0020		/* Set to prevent sleep on VBUSY, VOFFLINE, ... */
 };
 
 /*****************************************************************************/
@@ -537,6 +564,8 @@ extern bool afs_iterate_addresses(struct afs_addr_cursor *);
 extern int afs_end_cursor(struct afs_addr_cursor *);
 extern int afs_set_vl_cursor(struct afs_addr_cursor *, struct afs_cell *);
 
+extern void afs_merge_fs_addr4(struct afs_addr_list *, __be32);
+
 /*
  * cache.c
  */
@@ -558,10 +587,9 @@ extern void afs_init_callback_state(struct afs_server *);
 extern void afs_break_callback(struct afs_vnode *);
 extern void afs_break_callbacks(struct afs_server *, size_t,struct afs_callback[]);
 
-extern int afs_register_server_cb_interest(struct afs_vnode *, struct afs_cb_interest **,
-					   struct afs_server *);
+extern int afs_register_server_cb_interest(struct afs_vnode *, struct afs_server_entry *);
 extern void afs_put_cb_interest(struct afs_net *, struct afs_cb_interest *);
-extern void afs_clear_callback_interests(struct afs_net *, struct afs_volume *);
+extern void afs_clear_callback_interests(struct afs_net *, struct afs_server_list *);
 
 static inline struct afs_cb_interest *afs_get_cb_interest(struct afs_cb_interest *cbi)
 {
@@ -603,6 +631,7 @@ extern const struct file_operations afs_file_operations;
 
 extern int afs_open(struct inode *, struct file *);
 extern int afs_release(struct inode *, struct file *);
+extern int afs_fetch_data(struct afs_vnode *, struct key *, struct afs_read *);
 extern int afs_page_filler(void *, struct page *);
 extern void afs_put_read(struct afs_read *);
 
@@ -619,51 +648,40 @@ extern int afs_flock(struct file *, int, struct file_lock *);
 /*
  * fsclient.c
  */
-extern int afs_fs_fetch_file_status(struct afs_fs_cursor *, struct key *,
-				    struct afs_vnode *, struct afs_volsync *,
-				    bool);
-extern int afs_fs_give_up_callbacks(struct afs_net *, struct afs_server *, bool);
-extern int afs_fs_fetch_data(struct afs_fs_cursor *, struct key *,
-			     struct afs_vnode *, struct afs_read *, bool);
-extern int afs_fs_create(struct afs_fs_cursor *, struct key *,
-			 struct afs_vnode *, const char *, umode_t,
-			 struct afs_fid *, struct afs_file_status *,
-			 struct afs_callback *, bool);
-extern int afs_fs_remove(struct afs_fs_cursor *, struct key *,
-			 struct afs_vnode *, const char *, bool, bool);
-extern int afs_fs_link(struct afs_fs_cursor *, struct key *, struct afs_vnode *,
-		       struct afs_vnode *, const char *, bool);
-extern int afs_fs_symlink(struct afs_fs_cursor *, struct key *,
-			  struct afs_vnode *, const char *, const char *,
-			  struct afs_fid *, struct afs_file_status *, bool);
-extern int afs_fs_rename(struct afs_fs_cursor *, struct key *,
-			 struct afs_vnode *, const char *,
-			 struct afs_vnode *, const char *, bool);
+extern int afs_fs_fetch_file_status(struct afs_fs_cursor *, struct afs_volsync *);
+extern int afs_fs_give_up_callbacks(struct afs_net *, struct afs_server *);
+extern int afs_fs_fetch_data(struct afs_fs_cursor *, struct afs_read *);
+extern int afs_fs_create(struct afs_fs_cursor *, const char *, umode_t,
+			 struct afs_fid *, struct afs_file_status *, struct afs_callback *);
+extern int afs_fs_remove(struct afs_fs_cursor *, const char *, bool);
+extern int afs_fs_link(struct afs_fs_cursor *, struct afs_vnode *, const char *);
+extern int afs_fs_symlink(struct afs_fs_cursor *, const char *, const char *,
+			  struct afs_fid *, struct afs_file_status *);
+extern int afs_fs_rename(struct afs_fs_cursor *, const char *,
+			 struct afs_vnode *, const char *);
 extern int afs_fs_store_data(struct afs_fs_cursor *, struct afs_writeback *,
-			     pgoff_t, pgoff_t, unsigned, unsigned, bool);
-extern int afs_fs_setattr(struct afs_fs_cursor *, struct key *,
-			  struct afs_vnode *, struct iattr *, bool);
-extern int afs_fs_get_volume_status(struct afs_fs_cursor *, struct key *,
-				    struct afs_vnode *,
-				    struct afs_volume_status *, bool);
-extern int afs_fs_set_lock(struct afs_fs_cursor *, struct key *,
-			   struct afs_vnode *, afs_lock_type_t, bool);
-extern int afs_fs_extend_lock(struct afs_fs_cursor *, struct key *,
-			      struct afs_vnode *, bool);
-extern int afs_fs_release_lock(struct afs_fs_cursor *, struct key *,
-			       struct afs_vnode *, bool);
-extern int afs_fs_give_up_all_callbacks(struct afs_server *, struct afs_addr_cursor *,
-					struct key *, bool);
+			     pgoff_t, pgoff_t, unsigned, unsigned);
+extern int afs_fs_setattr(struct afs_fs_cursor *, struct iattr *);
+extern int afs_fs_get_volume_status(struct afs_fs_cursor *, struct afs_volume_status *);
+extern int afs_fs_set_lock(struct afs_fs_cursor *, afs_lock_type_t);
+extern int afs_fs_extend_lock(struct afs_fs_cursor *);
+extern int afs_fs_release_lock(struct afs_fs_cursor *);
+extern int afs_fs_give_up_all_callbacks(struct afs_net *, struct afs_server *,
+					struct afs_addr_cursor *, struct key *);
+extern int afs_fs_get_capabilities(struct afs_net *, struct afs_server *,
+				   struct afs_addr_cursor *, struct key *);
 
 /*
  * inode.c
  */
+extern int afs_fetch_status(struct afs_vnode *, struct key *);
 extern int afs_iget5_test(struct inode *, void *);
 extern struct inode *afs_iget_autocell(struct inode *, const char *, int,
 				       struct key *);
 extern struct inode *afs_iget(struct super_block *, struct key *,
 			      struct afs_fid *, struct afs_file_status *,
-			      struct afs_callback *);
+			      struct afs_callback *,
+			      struct afs_cb_interest *);
 extern void afs_zap_data(struct afs_vnode *);
 extern int afs_validate(struct afs_vnode *, struct key *);
 extern int afs_getattr(const struct path *, struct kstat *, u32, unsigned int);
@@ -733,6 +751,15 @@ extern void __net_exit afs_proc_cleanup(struct afs_net *);
 extern int afs_proc_cell_setup(struct afs_net *, struct afs_cell *);
 extern void afs_proc_cell_remove(struct afs_net *, struct afs_cell *);
 
+/*
+ * rotate.c
+ */
+extern bool afs_begin_vnode_operation(struct afs_fs_cursor *, struct afs_vnode *,
+				      struct key *);
+extern bool afs_select_fileserver(struct afs_fs_cursor *);
+extern bool afs_select_current_fileserver(struct afs_fs_cursor *);
+extern int afs_end_vnode_operation(struct afs_fs_cursor *);
+
 /*
  * rxrpc.c
  */
@@ -779,104 +806,62 @@ static inline struct afs_server *afs_get_server(struct afs_server *server)
 	return server;
 }
 
-extern void afs_server_timer(struct timer_list *);
-extern struct afs_server *afs_lookup_server(struct afs_cell *,
-					    struct sockaddr_rxrpc *);
 extern struct afs_server *afs_find_server(struct afs_net *,
 					  const struct sockaddr_rxrpc *);
+extern struct afs_server *afs_find_server_by_uuid(struct afs_net *, const uuid_t *);
+extern struct afs_server *afs_lookup_server(struct afs_cell *, struct key *, const uuid_t *);
 extern void afs_put_server(struct afs_net *, struct afs_server *);
-extern void afs_reap_server(struct work_struct *);
+extern void afs_manage_servers(struct work_struct *);
+extern void afs_servers_timer(struct timer_list *);
 extern void __net_exit afs_purge_servers(struct afs_net *);
+extern bool afs_probe_fileserver(struct afs_fs_cursor *);
+extern bool afs_check_server_record(struct afs_fs_cursor *, struct afs_server *);
 
 /*
- * super.c
+ * server_list.c
  */
-extern int __init afs_fs_init(void);
-extern void __exit afs_fs_exit(void);
+static inline struct afs_server_list *afs_get_serverlist(struct afs_server_list *slist)
+{
+	refcount_inc(&slist->usage);
+	return slist;
+}
 
-/*
- * vlclient.c
- */
-extern int afs_vl_get_entry_by_name(struct afs_net *, struct afs_addr_cursor *,
-				    struct key *, const char *,
-				    struct afs_cache_vlocation *, bool);
-extern int afs_vl_get_entry_by_id(struct afs_net *, struct afs_addr_cursor *,
-				  struct key *, afs_volid_t, afs_voltype_t,
-				  struct afs_cache_vlocation *, bool);
+extern void afs_put_serverlist(struct afs_net *, struct afs_server_list *);
+extern struct afs_server_list *afs_alloc_server_list(struct afs_cell *, struct key *,
+						     struct afs_vldb_entry *,
+						     u8);
+extern bool afs_annotate_server_list(struct afs_server_list *, struct afs_server_list *);
 
 /*
- * vlocation.c
+ * super.c
  */
-extern struct workqueue_struct *afs_vlocation_update_worker;
-
-#define afs_get_vlocation(V) do { atomic_inc(&(V)->usage); } while(0)
-
-extern struct afs_vlocation *afs_vlocation_lookup(struct afs_net *,
-						  struct afs_cell *,
-						  struct key *,
-						  const char *, size_t);
-extern void afs_put_vlocation(struct afs_net *, struct afs_vlocation *);
-extern void afs_vlocation_updater(struct work_struct *);
-extern void afs_vlocation_reaper(struct work_struct *);
-extern void __net_exit afs_vlocation_purge(struct afs_net *);
+extern int __init afs_fs_init(void);
+extern void __exit afs_fs_exit(void);
 
 /*
- * vnode.c
+ * vlclient.c
  */
-static inline struct afs_vnode *AFS_FS_I(struct inode *inode)
-{
-	return container_of(inode, struct afs_vnode, vfs_inode);
-}
-
-static inline struct inode *AFS_VNODE_TO_I(struct afs_vnode *vnode)
-{
-	return &vnode->vfs_inode;
-}
-
-extern void afs_vnode_finalise_status_update(struct afs_vnode *,
-					     struct afs_server *);
-extern int afs_vnode_fetch_status(struct afs_vnode *, struct key *, bool);
-extern int afs_vnode_fetch_data(struct afs_vnode *, struct key *,
-				struct afs_read *);
-extern int afs_vnode_create(struct afs_vnode *, struct key *, const char *,
-			    umode_t, struct afs_fid *, struct afs_file_status *,
-			    struct afs_callback *, struct afs_server **);
-extern int afs_vnode_remove(struct afs_vnode *, struct key *, const char *,
-			    bool);
-extern int afs_vnode_link(struct afs_vnode *, struct afs_vnode *, struct key *,
-			  const char *);
-extern int afs_vnode_symlink(struct afs_vnode *, struct key *, const char *,
-			     const char *, struct afs_fid *,
-			     struct afs_file_status *, struct afs_server **);
-extern int afs_vnode_rename(struct afs_vnode *, struct afs_vnode *,
-			    struct key *, const char *, const char *);
-extern int afs_vnode_store_data(struct afs_writeback *, pgoff_t, pgoff_t,
-				unsigned, unsigned);
-extern int afs_vnode_setattr(struct afs_vnode *, struct key *, struct iattr *);
-extern int afs_vnode_get_volume_status(struct afs_vnode *, struct key *,
-				       struct afs_volume_status *);
-extern int afs_vnode_set_lock(struct afs_vnode *, struct key *,
-			      afs_lock_type_t);
-extern int afs_vnode_extend_lock(struct afs_vnode *, struct key *);
-extern int afs_vnode_release_lock(struct afs_vnode *, struct key *);
+extern struct afs_vldb_entry *afs_vl_get_entry_by_name_u(struct afs_net *,
+							 struct afs_addr_cursor *,
+							 struct key *, const char *, int);
+extern struct afs_addr_list *afs_vl_get_addrs_u(struct afs_net *, struct afs_addr_cursor *,
+						struct key *, const uuid_t *);
 
 /*
  * volume.c
  */
-static inline struct afs_volume *afs_get_volume(struct afs_volume *volume)
+static inline struct afs_volume *__afs_get_volume(struct afs_volume *volume)
 {
 	if (volume)
 		atomic_inc(&volume->usage);
 	return volume;
 }
 
+extern struct afs_volume *afs_create_volume(struct afs_mount_params *);
+extern void afs_activate_volume(struct afs_volume *);
+extern void afs_deactivate_volume(struct afs_volume *);
 extern void afs_put_volume(struct afs_cell *, struct afs_volume *);
-extern struct afs_volume *afs_volume_lookup(struct afs_mount_params *);
-extern void afs_init_fs_cursor(struct afs_fs_cursor *, struct afs_vnode *);
-extern int afs_set_fs_cursor(struct afs_fs_cursor *, struct afs_vnode *);
-extern bool afs_volume_pick_fileserver(struct afs_fs_cursor *, struct afs_vnode *);
-extern bool afs_iterate_fs_cursor(struct afs_fs_cursor *, struct afs_vnode *);
-extern int afs_end_fs_cursor(struct afs_fs_cursor *, struct afs_net *);
+extern int afs_check_volume_status(struct afs_volume *, struct key *);
 
 /*
  * write.c
@@ -903,6 +888,38 @@ extern int afs_fsync(struct file *, loff_t, loff_t, int);
 extern const struct xattr_handler *afs_xattr_handlers[];
 extern ssize_t afs_listxattr(struct dentry *, char *, size_t);
 
+
+/*
+ * Miscellaneous inline functions.
+ */
+static inline struct afs_vnode *AFS_FS_I(struct inode *inode)
+{
+	return container_of(inode, struct afs_vnode, vfs_inode);
+}
+
+static inline struct inode *AFS_VNODE_TO_I(struct afs_vnode *vnode)
+{
+	return &vnode->vfs_inode;
+}
+
+static inline void afs_vnode_commit_status(struct afs_fs_cursor *fc,
+					   struct afs_vnode *vnode,
+					   unsigned int cb_break)
+{
+	if (fc->ac.error == 0)
+		afs_cache_permit(vnode, fc->key, cb_break);
+}
+
+static inline void afs_check_for_remote_deletion(struct afs_fs_cursor *fc,
+						 struct afs_vnode *vnode)
+{
+	if (fc->ac.error == -ENOENT) {
+		set_bit(AFS_VNODE_DELETED, &vnode->flags);
+		afs_break_callback(vnode);
+	}
+}
+
+
 /*****************************************************************************/
 /*
  * debug tracing
diff --git a/fs/afs/main.c b/fs/afs/main.c
index e7f87d723761..15a02a05ff40 100644
--- a/fs/afs/main.c
+++ b/fs/afs/main.c
@@ -55,18 +55,17 @@ static int __net_init afs_net_init(struct afs_net *net)
 	spin_lock_init(&net->proc_cells_lock);
 	INIT_LIST_HEAD(&net->proc_cells);
 
-	INIT_LIST_HEAD(&net->vl_updates);
-	INIT_LIST_HEAD(&net->vl_graveyard);
-	INIT_DELAYED_WORK(&net->vl_reaper, afs_vlocation_reaper);
-	INIT_DELAYED_WORK(&net->vl_updater, afs_vlocation_updater);
-	spin_lock_init(&net->vl_updates_lock);
-	spin_lock_init(&net->vl_graveyard_lock);
-	net->servers = RB_ROOT;
-	rwlock_init(&net->servers_lock);
-	INIT_LIST_HEAD(&net->server_graveyard);
-	spin_lock_init(&net->server_graveyard_lock);
-	INIT_WORK(&net->server_reaper, afs_reap_server);
-	timer_setup(&net->server_timer, afs_server_timer, 0);
+	seqlock_init(&net->fs_lock);
+	net->fs_servers = RB_ROOT;
+	INIT_LIST_HEAD(&net->fs_updates);
+	INIT_HLIST_HEAD(&net->fs_proc);
+
+	INIT_HLIST_HEAD(&net->fs_addresses4);
+	INIT_HLIST_HEAD(&net->fs_addresses6);
+	seqlock_init(&net->fs_addr_lock);
+
+	INIT_WORK(&net->fs_manager, afs_manage_servers);
+	timer_setup(&net->fs_timer, afs_servers_timer, 0);
 
 	/* Register the /proc stuff */
 	ret = afs_proc_init(net);
@@ -87,8 +86,8 @@ static int __net_init afs_net_init(struct afs_net *net)
 
 error_open_socket:
 	net->live = false;
-	afs_vlocation_purge(net);
 	afs_cell_purge(net);
+	afs_purge_servers(net);
 error_cell_init:
 	net->live = false;
 	afs_proc_cleanup(net);
@@ -103,9 +102,8 @@ error_proc:
 static void __net_exit afs_net_exit(struct afs_net *net)
 {
 	net->live = false;
-	afs_purge_servers(net);
-	afs_vlocation_purge(net);
 	afs_cell_purge(net);
+	afs_purge_servers(net);
 	afs_close_socket(net);
 	afs_proc_cleanup(net);
 }
@@ -125,10 +123,6 @@ static int __init afs_init(void)
 	afs_async_calls = alloc_workqueue("kafsd", WQ_MEM_RECLAIM, 0);
 	if (!afs_async_calls)
 		goto error_async;
-	afs_vlocation_update_worker =
-		alloc_workqueue("kafs_vlupdated", WQ_MEM_RECLAIM, 0);
-	if (!afs_vlocation_update_worker)
-		goto error_vl_up;
 	afs_lock_manager = alloc_workqueue("kafs_lockd", WQ_MEM_RECLAIM, 0);
 	if (!afs_lock_manager)
 		goto error_lockmgr;
@@ -160,8 +154,6 @@ error_cache:
 #endif
 	destroy_workqueue(afs_lock_manager);
 error_lockmgr:
-	destroy_workqueue(afs_vlocation_update_worker);
-error_vl_up:
 	destroy_workqueue(afs_async_calls);
 error_async:
 	destroy_workqueue(afs_wq);
@@ -189,7 +181,6 @@ static void __exit afs_exit(void)
 	fscache_unregister_netfs(&afs_cache_netfs);
 #endif
 	destroy_workqueue(afs_lock_manager);
-	destroy_workqueue(afs_vlocation_update_worker);
 	destroy_workqueue(afs_async_calls);
 	destroy_workqueue(afs_wq);
 	afs_clean_up_permit_cache();
diff --git a/fs/afs/proc.c b/fs/afs/proc.c
index 9cf9ce88a8dd..4508dd54f789 100644
--- a/fs/afs/proc.c
+++ b/fs/afs/proc.c
@@ -105,22 +105,22 @@ static const struct file_operations afs_proc_cell_vlservers_fops = {
 	.release	= seq_release,
 };
 
-static int afs_proc_cell_servers_open(struct inode *inode, struct file *file);
-static void *afs_proc_cell_servers_start(struct seq_file *p, loff_t *pos);
-static void *afs_proc_cell_servers_next(struct seq_file *p, void *v,
+static int afs_proc_servers_open(struct inode *inode, struct file *file);
+static void *afs_proc_servers_start(struct seq_file *p, loff_t *pos);
+static void *afs_proc_servers_next(struct seq_file *p, void *v,
 					loff_t *pos);
-static void afs_proc_cell_servers_stop(struct seq_file *p, void *v);
-static int afs_proc_cell_servers_show(struct seq_file *m, void *v);
-
-static const struct seq_operations afs_proc_cell_servers_ops = {
-	.start	= afs_proc_cell_servers_start,
-	.next	= afs_proc_cell_servers_next,
-	.stop	= afs_proc_cell_servers_stop,
-	.show	= afs_proc_cell_servers_show,
+static void afs_proc_servers_stop(struct seq_file *p, void *v);
+static int afs_proc_servers_show(struct seq_file *m, void *v);
+
+static const struct seq_operations afs_proc_servers_ops = {
+	.start	= afs_proc_servers_start,
+	.next	= afs_proc_servers_next,
+	.stop	= afs_proc_servers_stop,
+	.show	= afs_proc_servers_show,
 };
 
-static const struct file_operations afs_proc_cell_servers_fops = {
-	.open		= afs_proc_cell_servers_open,
+static const struct file_operations afs_proc_servers_fops = {
+	.open		= afs_proc_servers_open,
 	.read		= seq_read,
 	.llseek		= seq_lseek,
 	.release	= seq_release,
@@ -138,7 +138,8 @@ int afs_proc_init(struct afs_net *net)
 		goto error_dir;
 
 	if (!proc_create("cells", 0644, net->proc_afs, &afs_proc_cells_fops) ||
-	    !proc_create("rootcell", 0644, net->proc_afs, &afs_proc_rootcell_fops))
+	    !proc_create("rootcell", 0644, net->proc_afs, &afs_proc_rootcell_fops) ||
+	    !proc_create("servers", 0644, net->proc_afs, &afs_proc_servers_fops))
 		goto error_tree;
 
 	_leave(" = 0");
@@ -174,7 +175,6 @@ static int afs_proc_cells_open(struct inode *inode, struct file *file)
 
 	m = file->private_data;
 	m->private = PDE_DATA(inode);
-
 	return 0;
 }
 
@@ -357,12 +357,10 @@ int afs_proc_cell_setup(struct afs_net *net, struct afs_cell *cell)
 	if (!dir)
 		goto error_dir;
 
-	if (!proc_create_data("servers", 0, dir,
-			     &afs_proc_cell_servers_fops, cell) ||
-	    !proc_create_data("vlservers", 0, dir,
-			     &afs_proc_cell_vlservers_fops, cell) ||
+	if (!proc_create_data("vlservers", 0, dir,
+			      &afs_proc_cell_vlservers_fops, cell) ||
 	    !proc_create_data("volumes", 0, dir,
-			     &afs_proc_cell_volumes_fops, cell))
+			      &afs_proc_cell_volumes_fops, cell))
 		goto error_tree;
 
 	_leave(" = 0");
@@ -420,9 +418,8 @@ static void *afs_proc_cell_volumes_start(struct seq_file *m, loff_t *_pos)
 
 	_enter("cell=%p pos=%Ld", cell, *_pos);
 
-	/* lock the list against modification */
-	down_read(&cell->vl_sem);
-	return seq_list_start_head(&cell->vl_list, *_pos);
+	read_lock(&cell->proc_lock);
+	return seq_list_start_head(&cell->proc_volumes, *_pos);
 }
 
 /*
@@ -434,7 +431,7 @@ static void *afs_proc_cell_volumes_next(struct seq_file *p, void *v,
 	struct afs_cell *cell = p->private;
 
 	_enter("cell=%p pos=%Ld", cell, *_pos);
-	return seq_list_next(v, &cell->vl_list, _pos);
+	return seq_list_next(v, &cell->proc_volumes, _pos);
 }
 
 /*
@@ -444,17 +441,13 @@ static void afs_proc_cell_volumes_stop(struct seq_file *p, void *v)
 {
 	struct afs_cell *cell = p->private;
 
-	up_read(&cell->vl_sem);
+	read_unlock(&cell->proc_lock);
 }
 
-static const char afs_vlocation_states[][4] = {
-	[AFS_VL_NEW]			= "New",
-	[AFS_VL_CREATING]		= "Crt",
-	[AFS_VL_VALID]			= "Val",
-	[AFS_VL_NO_VOLUME]		= "NoV",
-	[AFS_VL_UPDATING]		= "Upd",
-	[AFS_VL_VOLUME_DELETED]		= "Del",
-	[AFS_VL_UNCERTAIN]		= "Unc",
+static const char afs_vol_types[3][3] = {
+	[AFSVL_RWVOL]	= "RW",
+	[AFSVL_ROVOL]	= "RO",
+	[AFSVL_BACKVOL]	= "BK",
 };
 
 /*
@@ -463,23 +456,17 @@ static const char afs_vlocation_states[][4] = {
 static int afs_proc_cell_volumes_show(struct seq_file *m, void *v)
 {
 	struct afs_cell *cell = m->private;
-	struct afs_vlocation *vlocation =
-		list_entry(v, struct afs_vlocation, link);
+	struct afs_volume *vol = list_entry(v, struct afs_volume, proc_link);
 
-	/* display header on line 1 */
-	if (v == &cell->vl_list) {
-		seq_puts(m, "USE STT VLID[0]  VLID[1]  VLID[2]  NAME\n");
+	/* Display header on line 1 */
+	if (v == &cell->proc_volumes) {
+		seq_puts(m, "USE VID      TY\n");
 		return 0;
 	}
 
-	/* display one cell per line on subsequent lines */
-	seq_printf(m, "%3d %s %08x %08x %08x %s\n",
-		   atomic_read(&vlocation->usage),
-		   afs_vlocation_states[vlocation->state],
-		   vlocation->vldb.vid[0],
-		   vlocation->vldb.vid[1],
-		   vlocation->vldb.vid[2],
-		   vlocation->vldb.name);
+	seq_printf(m, "%3d %08x %s\n",
+		   atomic_read(&vol->usage), vol->vid,
+		   afs_vol_types[vol->type]);
 
 	return 0;
 }
@@ -580,86 +567,62 @@ static int afs_proc_cell_vlservers_show(struct seq_file *m, void *v)
 }
 
 /*
- * open "/proc/fs/afs/<cell>/servers" which provides a summary of active
+ * open "/proc/fs/afs/servers" which provides a summary of active
  * servers
  */
-static int afs_proc_cell_servers_open(struct inode *inode, struct file *file)
+static int afs_proc_servers_open(struct inode *inode, struct file *file)
 {
-	struct afs_cell *cell;
-	struct seq_file *m;
-	int ret;
-
-	cell = PDE_DATA(inode);
-	if (!cell)
-		return -ENOENT;
-
-	ret = seq_open(file, &afs_proc_cell_servers_ops);
-	if (ret < 0)
-		return ret;
-
-	m = file->private_data;
-	m->private = cell;
-	return 0;
+	return seq_open(file, &afs_proc_servers_ops);
 }
 
 /*
- * set up the iterator to start reading from the cells list and return the
- * first item
+ * Set up the iterator to start reading from the server list and return the
+ * first item.
  */
-static void *afs_proc_cell_servers_start(struct seq_file *m, loff_t *_pos)
-	__acquires(m->private->servers_lock)
+static void *afs_proc_servers_start(struct seq_file *m, loff_t *_pos)
 {
-	struct afs_cell *cell = m->private;
-
-	_enter("cell=%p pos=%Ld", cell, *_pos);
+	struct afs_net *net = afs_seq2net(m);
 
-	/* lock the list against modification */
-	read_lock(&cell->servers_lock);
-	return seq_list_start_head(&cell->servers, *_pos);
+	rcu_read_lock();
+	return seq_hlist_start_head_rcu(&net->fs_proc, *_pos);
 }
 
 /*
  * move to next cell in cells list
  */
-static void *afs_proc_cell_servers_next(struct seq_file *p, void *v,
-					loff_t *_pos)
+static void *afs_proc_servers_next(struct seq_file *m, void *v, loff_t *_pos)
 {
-	struct afs_cell *cell = p->private;
+	struct afs_net *net = afs_seq2net(m);
 
-	_enter("cell=%p pos=%Ld", cell, *_pos);
-	return seq_list_next(v, &cell->servers, _pos);
+	return seq_hlist_next_rcu(v, &net->fs_proc, _pos);
 }
 
 /*
  * clean up after reading from the cells list
  */
-static void afs_proc_cell_servers_stop(struct seq_file *p, void *v)
-	__releases(p->private->servers_lock)
+static void afs_proc_servers_stop(struct seq_file *p, void *v)
 {
-	struct afs_cell *cell = p->private;
-
-	read_unlock(&cell->servers_lock);
+	rcu_read_unlock();
 }
 
 /*
  * display a header line followed by a load of volume lines
  */
-static int afs_proc_cell_servers_show(struct seq_file *m, void *v)
+static int afs_proc_servers_show(struct seq_file *m, void *v)
 {
-	struct afs_cell *cell = m->private;
-	struct afs_server *server = list_entry(v, struct afs_server, link);
-	char ipaddr[64];
+	struct afs_server *server;
+	struct afs_addr_list *alist;
 
-	/* display header on line 1 */
-	if (v == &cell->servers) {
-		seq_puts(m, "USE ADDR            STATE\n");
+	if (v == SEQ_START_TOKEN) {
+		seq_puts(m, "UUID                                 USE ADDR\n");
 		return 0;
 	}
 
-	/* display one cell per line on subsequent lines */
-	sprintf(ipaddr, "%pISp", &server->addrs->addrs[0].transport);
-	seq_printf(m, "%3d %-15s %5d\n",
-		   atomic_read(&server->usage), ipaddr, server->fs_state);
-
+	server = list_entry(v, struct afs_server, proc_link);
+	alist = rcu_dereference(server->addresses);
+	seq_printf(m, "%pU %3d %pISp\n",
+		   &server->uuid,
+		   atomic_read(&server->usage),
+		   &alist->addrs[alist->index].transport);
 	return 0;
 }
diff --git a/fs/afs/rotate.c b/fs/afs/rotate.c
index c7975b3ba59a..e728ca1776c9 100644
--- a/fs/afs/rotate.c
+++ b/fs/afs/rotate.c
@@ -11,7 +11,12 @@
 
 #include <linux/kernel.h>
 #include <linux/slab.h>
+#include <linux/fs.h>
+#include <linux/sched.h>
+#include <linux/delay.h>
+#include <linux/sched/signal.h>
 #include "internal.h"
+#include "afs_fs.h"
 
 /*
  * Initialise a filesystem server cursor for iterating over FS servers.
@@ -21,6 +26,460 @@ void afs_init_fs_cursor(struct afs_fs_cursor *fc, struct afs_vnode *vnode)
 	memset(fc, 0, sizeof(*fc));
 }
 
+/*
+ * Begin an operation on the fileserver.
+ *
+ * Fileserver operations are serialised on the server by vnode, so we serialise
+ * them here also using the io_lock.
+ */
+bool afs_begin_vnode_operation(struct afs_fs_cursor *fc, struct afs_vnode *vnode,
+			       struct key *key)
+{
+	afs_init_fs_cursor(fc, vnode);
+	fc->vnode = vnode;
+	fc->key = key;
+	fc->ac.error = SHRT_MAX;
+
+	if (mutex_lock_interruptible(&vnode->io_lock) < 0) {
+		fc->ac.error = -EINTR;
+		fc->flags |= AFS_FS_CURSOR_STOP;
+		return false;
+	}
+
+	if (test_bit(AFS_VNODE_READLOCKED, &vnode->flags) ||
+	    test_bit(AFS_VNODE_WRITELOCKED, &vnode->flags))
+		fc->flags |= AFS_FS_CURSOR_CUR_ONLY;
+	return true;
+}
+
+/*
+ * Begin iteration through a server list, starting with the vnode's last used
+ * server if possible, or the last recorded good server if not.
+ */
+static bool afs_start_fs_iteration(struct afs_fs_cursor *fc,
+				   struct afs_vnode *vnode)
+{
+	struct afs_cb_interest *cbi;
+	int i;
+
+	read_lock(&vnode->volume->servers_lock);
+	fc->server_list = afs_get_serverlist(vnode->volume->servers);
+	read_unlock(&vnode->volume->servers_lock);
+
+	cbi = vnode->cb_interest;
+	if (cbi) {
+		/* See if the vnode's preferred record is still available */
+		for (i = 0; i < fc->server_list->nr_servers; i++) {
+			if (fc->server_list->servers[i].cb_interest == cbi) {
+				fc->start = i;
+				goto found_interest;
+			}
+		}
+
+		/* If we have a lock outstanding on a server that's no longer
+		 * serving this vnode, then we can't switch to another server
+		 * and have to return an error.
+		 */
+		if (fc->flags & AFS_FS_CURSOR_CUR_ONLY) {
+			fc->ac.error = -ESTALE;
+			return false;
+		}
+
+		/* Note that the callback promise is effectively broken */
+		write_seqlock(&vnode->cb_lock);
+		ASSERTCMP(cbi, ==, vnode->cb_interest);
+		vnode->cb_interest = NULL;
+		if (test_and_clear_bit(AFS_VNODE_CB_PROMISED, &vnode->flags))
+			vnode->cb_break++;
+		write_sequnlock(&vnode->cb_lock);
+
+		afs_put_cb_interest(afs_v2net(vnode), cbi);
+		cbi = NULL;
+	} else {
+		fc->start = READ_ONCE(fc->server_list->index);
+	}
+
+found_interest:
+	fc->index = fc->start;
+	return true;
+}
+
+/*
+ * Post volume busy note.
+ */
+static void afs_busy(struct afs_volume *volume, u32 abort_code)
+{
+	const char *m;
+
+	switch (abort_code) {
+	case VOFFLINE:		m = "offline";		break;
+	case VRESTARTING:	m = "restarting";	break;
+	case VSALVAGING:	m = "being salvaged";	break;
+	default:		m = "busy";		break;
+	}
+	
+	pr_notice("kAFS: Volume %u '%s' is %s\n", volume->vid, volume->name, m);
+}
+
+/*
+ * Sleep and retry the operation to the same fileserver.
+ */
+static bool afs_sleep_and_retry(struct afs_fs_cursor *fc)
+{
+	msleep_interruptible(1000);
+	if (signal_pending(current)) {
+		fc->ac.error = -ERESTARTSYS;
+		return false;
+	}
+
+	return true;
+}
+
+/*
+ * Select the fileserver to use.  May be called multiple times to rotate
+ * through the fileservers.
+ */
+bool afs_select_fileserver(struct afs_fs_cursor *fc)
+{
+	struct afs_addr_list *alist;
+	struct afs_server *server;
+	struct afs_vnode *vnode = fc->vnode;
+
+	_enter("%u/%u,%u/%u,%d,%d",
+	       fc->index, fc->start,
+	       fc->ac.index, fc->ac.start,
+	       fc->ac.error, fc->ac.abort_code);
+
+	if (fc->flags & AFS_FS_CURSOR_STOP) {
+		_leave(" = f [stopped]");
+		return false;
+	}
+
+	/* Evaluate the result of the previous operation, if there was one. */
+	switch (fc->ac.error) {
+	case SHRT_MAX:
+		goto start;
+
+	case 0:
+	default:
+		/* Success or local failure.  Stop. */
+		fc->flags |= AFS_FS_CURSOR_STOP;
+		_leave(" = f [okay/local %d]", fc->ac.error);
+		return false;
+
+	case -ECONNABORTED:
+		/* The far side rejected the operation on some grounds.  This
+		 * might involve the server being busy or the volume having been moved.
+		 */
+		switch (fc->ac.abort_code) {
+		case VNOVOL:
+			/* This fileserver doesn't know about the volume.
+			 * - May indicate that the VL is wrong - retry once and compare
+			 *   the results.
+			 * - May indicate that the fileserver couldn't attach to the vol.
+			 */
+			if (fc->flags & AFS_FS_CURSOR_VNOVOL) {
+				fc->ac.error = -EREMOTEIO;
+				goto failed;
+			}
+
+			write_lock(&vnode->volume->servers_lock);
+			fc->server_list->vnovol_mask |= 1 << fc->index;
+			write_unlock(&vnode->volume->servers_lock);
+
+			set_bit(AFS_VOLUME_NEEDS_UPDATE, &vnode->volume->flags);
+			fc->ac.error = afs_check_volume_status(vnode->volume, fc->key);
+			if (fc->ac.error < 0)
+				goto failed;
+
+			if (test_bit(AFS_VOLUME_DELETED, &vnode->volume->flags)) {
+				fc->ac.error = -ENOMEDIUM;
+				goto failed;
+			}
+
+			/* If the server list didn't change, then assume that
+			 * it's the fileserver having trouble.
+			 */
+			if (vnode->volume->servers == fc->server_list) {
+				fc->ac.error = -EREMOTEIO;
+				goto failed;
+			}
+
+			/* Try again */
+			fc->flags |= AFS_FS_CURSOR_VNOVOL;
+			_leave(" = t [vnovol]");
+			return true;
+
+		case VSALVAGE: /* TODO: Should this return an error or iterate? */
+		case VVOLEXISTS:
+		case VNOSERVICE:
+		case VONLINE:
+		case VDISKFULL:
+		case VOVERQUOTA:
+			fc->ac.error = afs_abort_to_error(fc->ac.abort_code);
+			goto next_server;
+
+		case VOFFLINE:
+			if (!test_and_set_bit(AFS_VOLUME_OFFLINE, &vnode->volume->flags)) {
+				afs_busy(vnode->volume, fc->ac.abort_code);
+				clear_bit(AFS_VOLUME_BUSY, &vnode->volume->flags);
+			}
+			if (fc->flags & AFS_FS_CURSOR_NO_VSLEEP) {
+				fc->ac.error = -EADV;
+				goto failed;
+			}
+			if (fc->flags & AFS_FS_CURSOR_CUR_ONLY) {
+				fc->ac.error = -ESTALE;
+				goto failed;
+			}
+			goto busy;
+
+		case VSALVAGING:
+		case VRESTARTING:
+		case VBUSY:
+			/* Retry after going round all the servers unless we
+			 * have a file lock we need to maintain.
+			 */
+			if (fc->flags & AFS_FS_CURSOR_NO_VSLEEP) {
+				fc->ac.error = -EBUSY;
+				goto failed;
+			}
+			if (!test_and_set_bit(AFS_VOLUME_BUSY, &vnode->volume->flags)) {
+				afs_busy(vnode->volume, fc->ac.abort_code);
+				clear_bit(AFS_VOLUME_OFFLINE, &vnode->volume->flags);
+			}
+		busy:
+			if (fc->flags & AFS_FS_CURSOR_CUR_ONLY) {
+				if (!afs_sleep_and_retry(fc))
+					goto failed;
+
+				 /* Retry with same server & address */
+				_leave(" = t [vbusy]");
+				return true;
+			}
+
+			fc->flags |= AFS_FS_CURSOR_VBUSY;
+			goto next_server;
+
+		case VMOVED:
+			/* The volume migrated to another server.  We consider
+			 * consider all locks and callbacks broken and request
+			 * an update from the VLDB.
+			 *
+			 * We also limit the number of VMOVED hops we will
+			 * honour, just in case someone sets up a loop.
+			 */
+			if (fc->flags & AFS_FS_CURSOR_VMOVED) {
+				fc->ac.error = -EREMOTEIO;
+				goto failed;
+			}
+			fc->flags |= AFS_FS_CURSOR_VMOVED;
+
+			set_bit(AFS_VOLUME_WAIT, &vnode->volume->flags);
+			set_bit(AFS_VOLUME_NEEDS_UPDATE, &vnode->volume->flags);
+			fc->ac.error = afs_check_volume_status(vnode->volume, fc->key);
+			if (fc->ac.error < 0)
+				goto failed;
+
+			/* If the server list didn't change, then the VLDB is
+			 * out of sync with the fileservers.  This is hopefully
+			 * a temporary condition, however, so we don't want to
+			 * permanently block access to the file.
+			 *
+			 * TODO: Try other fileservers if we can.
+			 *
+			 * TODO: Retry a few times with sleeps.
+			 */
+			if (vnode->volume->servers == fc->server_list) {
+				fc->ac.error = -ENOMEDIUM;
+				goto failed;
+			}
+
+			goto restart_from_beginning;
+
+		default:
+			clear_bit(AFS_VOLUME_OFFLINE, &vnode->volume->flags);
+			clear_bit(AFS_VOLUME_BUSY, &vnode->volume->flags);
+			fc->ac.error = afs_abort_to_error(fc->ac.abort_code);
+			goto failed;
+		}
+
+	case -ENETUNREACH:
+	case -EHOSTUNREACH:
+	case -ECONNREFUSED:
+	case -ETIMEDOUT:
+	case -ETIME:
+		_debug("no conn");
+		goto iterate_address;
+	}
+
+restart_from_beginning:
+	_debug("restart");
+	afs_end_cursor(&fc->ac);
+	afs_put_cb_interest(afs_v2net(vnode), fc->cbi);
+	fc->cbi = NULL;
+	afs_put_serverlist(afs_v2net(vnode), fc->server_list);
+	fc->server_list = NULL;
+start:
+	_debug("start");
+	/* See if we need to do an update of the volume record.  Note that the
+	 * volume may have moved or even have been deleted.
+	 */
+	fc->ac.error = afs_check_volume_status(vnode->volume, fc->key);
+	if (fc->ac.error < 0)
+		goto failed;
+
+	if (!afs_start_fs_iteration(fc, vnode))
+		goto failed;
+	goto use_server;
+
+next_server:
+	_debug("next");
+	afs_put_cb_interest(afs_v2net(vnode), fc->cbi);
+	fc->cbi = NULL;
+	fc->index++;
+	if (fc->index >= fc->server_list->nr_servers)
+		fc->index = 0;
+	if (fc->index != fc->start)
+		goto use_server;
+
+	/* That's all the servers poked to no good effect.  Try again if some
+	 * of them were busy.
+	 */
+	if (fc->flags & AFS_FS_CURSOR_VBUSY)
+		goto restart_from_beginning;
+
+	fc->ac.error = -EDESTADDRREQ;
+	goto failed;
+
+use_server:
+	_debug("use");
+	/* We're starting on a different fileserver from the list.  We need to
+	 * check it, create a callback intercept, find its address list and
+	 * probe its capabilities before we use it.
+	 */
+	ASSERTCMP(fc->ac.alist, ==, NULL);
+	server = fc->server_list->servers[fc->index].server;
+
+	if (!afs_check_server_record(fc, server))
+		goto failed;
+
+	_debug("USING SERVER: %pU", &server->uuid);
+
+	/* Make sure we've got a callback interest record for this server.  We
+	 * have to link it in before we send the request as we can be sent a
+	 * break request before we've finished decoding the reply and
+	 * installing the vnode.
+	 */
+	fc->ac.error = afs_register_server_cb_interest(
+		vnode, &fc->server_list->servers[fc->index]);
+	if (fc->ac.error < 0)
+		goto failed;
+
+	fc->cbi = afs_get_cb_interest(vnode->cb_interest);
+
+	read_lock(&server->fs_lock);
+	alist = rcu_dereference_protected(server->addresses,
+					  lockdep_is_held(&server->fs_lock));
+	afs_get_addrlist(alist);
+	read_unlock(&server->fs_lock);
+
+
+	/* Probe the current fileserver if we haven't done so yet. */
+	if (!test_bit(AFS_SERVER_FL_PROBED, &server->flags)) {
+		fc->ac.alist = afs_get_addrlist(alist);
+
+		if (!afs_probe_fileserver(fc))
+			goto failed;
+	}
+
+	if (!fc->ac.alist)
+		fc->ac.alist = alist;
+	else
+		afs_put_addrlist(alist);
+
+	fc->ac.addr  = NULL;
+	fc->ac.start = READ_ONCE(alist->index);
+	fc->ac.index = fc->ac.start;
+	fc->ac.error = 0;
+	fc->ac.begun = false;
+	goto iterate_address;
+
+iterate_address:
+	ASSERT(fc->ac.alist);
+	_debug("iterate %d/%d", fc->ac.index, fc->ac.alist->nr_addrs);
+	/* Iterate over the current server's address list to try and find an
+	 * address on which it will respond to us.
+	 */
+	if (afs_iterate_addresses(&fc->ac)) {
+		_leave(" = t");
+		return true;
+	}
+
+	afs_end_cursor(&fc->ac);
+	goto next_server;
+
+failed:
+	fc->flags |= AFS_FS_CURSOR_STOP;
+	_leave(" = f [failed %d]", fc->ac.error);
+	return false;
+}
+
+/*
+ * Select the same fileserver we used for a vnode before and only that
+ * fileserver.  We use this when we have a lock on that file, which is backed
+ * only by the fileserver we obtained it from.
+ */
+bool afs_select_current_fileserver(struct afs_fs_cursor *fc)
+{
+	struct afs_vnode *vnode = fc->vnode;
+	struct afs_cb_interest *cbi = vnode->cb_interest;
+	struct afs_addr_list *alist;
+
+	_enter("");
+
+	if (!cbi) {
+		fc->ac.error = -ESTALE;
+		fc->flags |= AFS_FS_CURSOR_STOP;
+		return false;
+	}
+
+	read_lock(&cbi->server->fs_lock);
+	alist = afs_get_addrlist(cbi->server->addresses);
+	read_unlock(&cbi->server->fs_lock);
+	if (!alist) {
+		fc->ac.error = -ESTALE;
+		fc->flags |= AFS_FS_CURSOR_STOP;
+		return false;
+	}
+
+	fc->ac.alist = alist;
+	fc->ac.error = 0;
+	return true;
+}
+
+/*
+ * Tidy up a filesystem cursor and unlock the vnode.
+ */
+int afs_end_vnode_operation(struct afs_fs_cursor *fc)
+{
+	struct afs_net *net = afs_v2net(fc->vnode);
+	int ret;
+
+	mutex_unlock(&fc->vnode->io_lock);
+
+	afs_end_cursor(&fc->ac);
+	afs_put_cb_interest(net, fc->cbi);
+	afs_put_serverlist(net, fc->server_list);
+
+	ret = fc->ac.error;
+	if (ret == -ECONNABORTED)
+		afs_abort_to_error(fc->ac.abort_code);
+
+	return fc->ac.error;
+}
+
+#if 0
 /*
  * Set a filesystem server cursor for using a specific FS server.
  */
@@ -252,3 +711,5 @@ int afs_end_fs_cursor(struct afs_fs_cursor *fc, struct afs_net *net)
 	afs_put_server(net, fc->server);
 	return fc->ac.error;
 }
+
+#endif
diff --git a/fs/afs/rxrpc.c b/fs/afs/rxrpc.c
index 5ddfb7c4cf78..1bbd5854507d 100644
--- a/fs/afs/rxrpc.c
+++ b/fs/afs/rxrpc.c
@@ -20,7 +20,7 @@
 struct workqueue_struct *afs_async_calls;
 
 static void afs_wake_up_call_waiter(struct sock *, struct rxrpc_call *, unsigned long);
-static long afs_wait_for_call_to_complete(struct afs_call *);
+static long afs_wait_for_call_to_complete(struct afs_call *, struct afs_addr_cursor *);
 static void afs_wake_up_async_call(struct sock *, struct rxrpc_call *, unsigned long);
 static void afs_process_async_call(struct work_struct *);
 static void afs_rx_new_call(struct sock *, struct rxrpc_call *, unsigned long);
@@ -162,6 +162,7 @@ void afs_put_call(struct afs_call *call)
 			call->type->destructor(call);
 
 		afs_put_server(call->net, call->cm_server);
+		afs_put_cb_interest(call->net, call->cbi);
 		kfree(call->request);
 		kfree(call);
 
@@ -330,7 +331,6 @@ long afs_make_call(struct afs_addr_cursor *ac, struct afs_call *call,
 	struct kvec iov[1];
 	size_t offset;
 	s64 tx_total_len;
-	u32 abort_code;
 	int ret;
 
 	_enter(",{%pISp},", &srx->transport);
@@ -362,7 +362,6 @@ long afs_make_call(struct afs_addr_cursor *ac, struct afs_call *call,
 					  afs_wake_up_async_call :
 					  afs_wake_up_call_waiter),
 					 call->upgrade);
-	call->key = NULL;
 	if (IS_ERR(rxcall)) {
 		ret = PTR_ERR(rxcall);
 		goto error_kill_call;
@@ -406,7 +405,7 @@ long afs_make_call(struct afs_addr_cursor *ac, struct afs_call *call,
 	if (call->async)
 		return -EINPROGRESS;
 
-	return afs_wait_for_call_to_complete(call);
+	return afs_wait_for_call_to_complete(call, ac);
 
 error_do_abort:
 	call->state = AFS_CALL_COMPLETE;
@@ -414,15 +413,16 @@ error_do_abort:
 		rxrpc_kernel_abort_call(call->net->socket, rxcall,
 					RX_USER_ABORT, ret, "KSD");
 	} else {
-		abort_code = 0;
 		offset = 0;
 		rxrpc_kernel_recv_data(call->net->socket, rxcall, NULL,
 				       0, &offset, false, &call->abort_code,
 				       &call->service_id);
-		ret = afs_abort_to_error(call->abort_code);
+		ac->abort_code = call->abort_code;
+		ac->responded = true;
 	}
 error_kill_call:
 	afs_put_call(call);
+	ac->error = ret;
 	_leave(" = %d", ret);
 	return ret;
 }
@@ -510,7 +510,8 @@ save_error:
 /*
  * wait synchronously for a call to complete
  */
-static long afs_wait_for_call_to_complete(struct afs_call *call)
+static long afs_wait_for_call_to_complete(struct afs_call *call,
+					  struct afs_addr_cursor *ac)
 {
 	signed long rtt2, timeout;
 	long ret;
@@ -563,16 +564,25 @@ static long afs_wait_for_call_to_complete(struct afs_call *call)
 	/* Kill off the call if it's still live. */
 	if (call->state < AFS_CALL_COMPLETE) {
 		_debug("call interrupted");
-		rxrpc_kernel_abort_call(call->net->socket, call->rxcall,
-					RX_USER_ABORT, -EINTR, "KWI");
+		if (rxrpc_kernel_abort_call(call->net->socket, call->rxcall,
+					    RX_USER_ABORT, -EINTR, "KWI"))
+			call->error = -ERESTARTSYS;
 	}
 
-	ret = call->error;
-	if (ret < 0) {
-		ret = afs_abort_to_error(call->abort_code);
-	} else if (ret == 0 && call->ret_reply0) {
-		ret = (long)call->reply[0];
-		call->reply[0] = NULL;
+	ac->abort_code = call->abort_code;
+	ac->error = call->error;
+
+	ret = ac->error;
+	switch (ret) {
+	case 0:
+		if (call->ret_reply0) {
+			ret = (long)call->reply[0];
+			call->reply[0] = NULL;
+		}
+		/* Fall through */
+	case -ECONNABORTED:
+		ac->responded = true;
+		break;
 	}
 
 	_debug("call complete");
@@ -882,10 +892,7 @@ int afs_extract_data(struct afs_call *call, void *buf, size_t count,
 		return 0;
 	}
 
-	if (ret == -ECONNABORTED)
-		call->error = afs_abort_to_error(call->abort_code);
-	else
-		call->error = ret;
+	call->error = ret;
 	call->state = AFS_CALL_COMPLETE;
 	return ret;
 }
diff --git a/fs/afs/security.c b/fs/afs/security.c
index 1b5198fc1657..46a881a4d08f 100644
--- a/fs/afs/security.c
+++ b/fs/afs/security.c
@@ -325,7 +325,7 @@ static int afs_check_permit(struct afs_vnode *vnode, struct key *key,
 		 */
 		_debug("no valid permit");
 
-		ret = afs_vnode_fetch_status(vnode, key, true);
+		ret = afs_fetch_status(vnode, key);
 		if (ret < 0) {
 			*_access = 0;
 			_leave(" = %d", ret);
diff --git a/fs/afs/server.c b/fs/afs/server.c
index 9ca174b24f5b..a6c860bcf391 100644
--- a/fs/afs/server.c
+++ b/fs/afs/server.c
@@ -14,7 +14,8 @@
 #include "afs_fs.h"
 #include "internal.h"
 
-static unsigned afs_server_timeout = 10;	/* server timeout in seconds */
+static unsigned afs_server_gc_delay = 10;	/* Server record timeout in seconds */
+static unsigned afs_server_update_delay = 30;	/* Time till VLDB recheck in secs */
 
 static void afs_inc_servers_outstanding(struct afs_net *net)
 {
@@ -27,60 +28,201 @@ static void afs_dec_servers_outstanding(struct afs_net *net)
 		wake_up_atomic_t(&net->servers_outstanding);
 }
 
-void afs_server_timer(struct timer_list *timer)
+/*
+ * Find a server by one of its addresses.
+ */
+struct afs_server *afs_find_server(struct afs_net *net,
+				   const struct sockaddr_rxrpc *srx)
 {
-	struct afs_net *net = container_of(timer, struct afs_net, server_timer);
+	const struct sockaddr_in6 *a = &srx->transport.sin6, *b;
+	const struct afs_addr_list *alist;
+	struct afs_server *server = NULL;
+	unsigned int i;
+	bool ipv6 = true;
+	int seq = 0, diff;
+
+	if (srx->transport.sin6.sin6_addr.s6_addr32[0] == 0 ||
+	    srx->transport.sin6.sin6_addr.s6_addr32[1] == 0 ||
+	    srx->transport.sin6.sin6_addr.s6_addr32[2] == htonl(0xffff))
+		ipv6 = false;
+
+	rcu_read_lock();
+
+	do {
+		if (server)
+			afs_put_server(net, server);
+		server = NULL;
+		read_seqbegin_or_lock(&net->fs_addr_lock, &seq);
+
+		if (ipv6) {
+			hlist_for_each_entry_rcu(server, &net->fs_addresses6, addr6_link) {
+				alist = rcu_dereference(server->addresses);
+				for (i = alist->nr_ipv4; i < alist->nr_addrs; i++) {
+					b = &alist->addrs[i].transport.sin6;
+					diff = (u16)a->sin6_port - (u16)b->sin6_port;
+					if (diff == 0)
+						diff = memcmp(&a->sin6_addr,
+							      &b->sin6_addr,
+							      sizeof(struct in6_addr));
+					if (diff == 0)
+						goto found;
+					if (diff < 0) {
+						// TODO: Sort the list
+						//if (i == alist->nr_ipv4)
+						//	goto not_found;
+						break;
+					}
+				}
+			}
+		} else {
+			hlist_for_each_entry_rcu(server, &net->fs_addresses4, addr4_link) {
+				alist = rcu_dereference(server->addresses);
+				for (i = 0; i < alist->nr_ipv4; i++) {
+					b = &alist->addrs[i].transport.sin6;
+					diff = (u16)a->sin6_port - (u16)b->sin6_port;
+					if (diff == 0)
+						diff = ((u32)a->sin6_addr.s6_addr32[3] -
+							(u32)b->sin6_addr.s6_addr32[3]);
+					if (diff == 0)
+						goto found;
+					if (diff < 0) {
+						// TODO: Sort the list
+						//if (i == 0)
+						//	goto not_found;
+						break;
+					}
+				}
+			}
+		}
 
-	if (!queue_work(afs_wq, &net->server_reaper))
-		afs_dec_servers_outstanding(net);
+	//not_found:
+		server = NULL;
+	found:
+		if (server && !atomic_inc_not_zero(&server->usage))
+			server = NULL;
+
+	} while (need_seqretry(&net->fs_addr_lock, seq));
+
+	done_seqretry(&net->fs_addr_lock, seq);
+
+	rcu_read_unlock();
+	return server;
 }
 
 /*
- * install a server record in the master tree
+ * Look up a server by its UUID
  */
-static int afs_install_server(struct afs_server *server)
+struct afs_server *afs_find_server_by_uuid(struct afs_net *net, const uuid_t *uuid)
 {
-	struct afs_server *xserver;
-	struct afs_net *net = server->cell->net;
+	struct afs_server *server = NULL;
+	struct rb_node *p;
+	int diff, seq = 0;
+
+	_enter("%pU", uuid);
+
+	do {
+		/* Unfortunately, rbtree walking doesn't give reliable results
+		 * under just the RCU read lock, so we have to check for
+		 * changes.
+		 */
+		if (server)
+			afs_put_server(net, server);
+		server = NULL;
+
+		read_seqbegin_or_lock(&net->fs_lock, &seq);
+
+		p = net->fs_servers.rb_node;
+		while (p) {
+			server = rb_entry(p, struct afs_server, uuid_rb);
+
+			diff = memcmp(uuid, &server->uuid, sizeof(*uuid));
+			if (diff < 0) {
+				p = p->rb_left;
+			} else if (diff > 0) {
+				p = p->rb_right;
+			} else {
+				afs_get_server(server);
+				break;
+			}
+
+			server = NULL;
+		}
+	} while (need_seqretry(&net->fs_lock, seq));
+
+	done_seqretry(&net->fs_lock, seq);
+
+	_leave(" = %p", server);
+	return server;
+}
+
+/*
+ * Install a server record in the namespace tree
+ */
+static struct afs_server *afs_install_server(struct afs_net *net,
+					     struct afs_server *candidate)
+{
+	const struct afs_addr_list *alist;
+	struct afs_server *server;
 	struct rb_node **pp, *p;
-	int ret, diff;
+	int ret = -EEXIST, diff;
 
-	_enter("%p", server);
+	_enter("%p", candidate);
 
-	write_lock(&net->servers_lock);
+	write_seqlock(&net->fs_lock);
 
-	ret = -EEXIST;
-	pp = &net->servers.rb_node;
+	/* Firstly install the server in the UUID lookup tree */
+	pp = &net->fs_servers.rb_node;
 	p = NULL;
 	while (*pp) {
 		p = *pp;
 		_debug("- consider %p", p);
-		xserver = rb_entry(p, struct afs_server, master_rb);
-		diff = memcmp(&server->addrs->addrs[0],
-			      &xserver->addrs->addrs[0],
-			      sizeof(sizeof(server->addrs->addrs[0])));
+		server = rb_entry(p, struct afs_server, uuid_rb);
+		diff = memcmp(&candidate->uuid, &server->uuid, sizeof(uuid_t));
 		if (diff < 0)
 			pp = &(*pp)->rb_left;
 		else if (diff > 0)
 			pp = &(*pp)->rb_right;
 		else
-			goto error;
+			goto exists;
 	}
 
-	rb_link_node(&server->master_rb, p, pp);
-	rb_insert_color(&server->master_rb, &net->servers);
+	server = candidate;
+	rb_link_node(&server->uuid_rb, p, pp);
+	rb_insert_color(&server->uuid_rb, &net->fs_servers);
+	hlist_add_head_rcu(&server->proc_link, &net->fs_proc);
+
+	write_seqlock(&net->fs_addr_lock);
+	alist = rcu_dereference_protected(server->addresses,
+					  lockdep_is_held(&net->fs_addr_lock.lock));
+
+	/* Secondly, if the server has any IPv4 and/or IPv6 addresses, install
+	 * it in the IPv4 and/or IPv6 reverse-map lists.
+	 *
+	 * TODO: For speed we want to use something other than a flat list
+	 * here; even sorting the list in terms of lowest address would help a
+	 * bit, but anything we might want to do gets messy and memory
+	 * intensive.
+	 */
+	if (alist->nr_ipv4 > 0)
+		hlist_add_head_rcu(&server->addr4_link, &net->fs_addresses4);
+	if (alist->nr_addrs > alist->nr_ipv4)
+		hlist_add_head_rcu(&server->addr6_link, &net->fs_addresses6);
+
+	write_sequnlock(&net->fs_addr_lock);
 	ret = 0;
 
-error:
-	write_unlock(&net->servers_lock);
-	return ret;
+exists:
+	afs_get_server(server);
+	write_sequnlock(&net->fs_lock);
+	return server;
 }
 
 /*
  * allocate a new server record
  */
-static struct afs_server *afs_alloc_server(struct afs_cell *cell,
-					   const struct sockaddr_rxrpc *addr)
+static struct afs_server *afs_alloc_server(struct afs_net *net,
+					   const uuid_t *uuid,
+					   struct afs_addr_list *alist)
 {
 	struct afs_server *server;
 
@@ -89,194 +231,155 @@ static struct afs_server *afs_alloc_server(struct afs_cell *cell,
 	server = kzalloc(sizeof(struct afs_server), GFP_KERNEL);
 	if (!server)
 		goto enomem;
-	server->addrs = kzalloc(sizeof(struct afs_addr_list) +
-				sizeof(struct sockaddr_rxrpc),
-				GFP_KERNEL);
-	if (!server->addrs)
-		goto enomem_server;
 
 	atomic_set(&server->usage, 1);
-	server->net = cell->net;
-	server->cell = cell;
-
-	INIT_LIST_HEAD(&server->link);
-	INIT_LIST_HEAD(&server->grave);
-	init_rwsem(&server->sem);
-	spin_lock_init(&server->fs_lock);
+	RCU_INIT_POINTER(server->addresses, alist);
+	server->addr_version = alist->version;
+	server->uuid = *uuid;
+	server->flags = (1UL << AFS_SERVER_FL_NEW);
+	server->update_at = ktime_get_real_seconds() + afs_server_update_delay;
+	rwlock_init(&server->fs_lock);
 	INIT_LIST_HEAD(&server->cb_interests);
 	rwlock_init(&server->cb_break_lock);
 
-	refcount_set(&server->addrs->usage, 1);
-	server->addrs->nr_addrs = 1;
-	server->addrs->addrs[0] = *addr;
-	afs_inc_servers_outstanding(cell->net);
-
-	_leave(" = %p{%d}", server, atomic_read(&server->usage));
+	afs_inc_servers_outstanding(net);
+	_leave(" = %p", server);
 	return server;
 
-enomem_server:
-	kfree(server);
 enomem:
 	_leave(" = NULL [nomem]");
 	return NULL;
 }
 
 /*
- * get an FS-server record for a cell
+ * Look up an address record for a server
  */
-struct afs_server *afs_lookup_server(struct afs_cell *cell,
-				     struct sockaddr_rxrpc *addr)
+static struct afs_addr_list *afs_vl_lookup_addrs(struct afs_cell *cell,
+						 struct key *key, const uuid_t *uuid)
 {
-	struct afs_server *server, *candidate;
-
-	_enter("%p,%pIS", cell, &addr->transport);
-
-	/* quick scan of the list to see if we already have the server */
-	read_lock(&cell->servers_lock);
-
-	list_for_each_entry(server, &cell->servers, link) {
-		if (memcmp(&server->addrs->addrs[0], addr, sizeof(*addr)) == 0)
-			goto found_server_quickly;
-	}
-	read_unlock(&cell->servers_lock);
-
-	candidate = afs_alloc_server(cell, addr);
-	if (!candidate) {
-		_leave(" = -ENOMEM");
-		return ERR_PTR(-ENOMEM);
-	}
-
-	write_lock(&cell->servers_lock);
-
-	/* check the cell's server list again */
-	list_for_each_entry(server, &cell->servers, link) {
-		if (memcmp(&server->addrs->addrs[0], addr, sizeof(*addr)) == 0)
-			goto found_server;
-	}
-
-	_debug("new");
-	server = candidate;
-	if (afs_install_server(server) < 0)
-		goto server_in_two_cells;
-
-	afs_get_cell(cell);
-	list_add_tail(&server->link, &cell->servers);
-
-	write_unlock(&cell->servers_lock);
-	_leave(" = %p{%d}", server, atomic_read(&server->usage));
-	return server;
-
-	/* found a matching server quickly */
-found_server_quickly:
-	_debug("found quickly");
-	afs_get_server(server);
-	read_unlock(&cell->servers_lock);
-no_longer_unused:
-	if (!list_empty(&server->grave)) {
-		spin_lock(&cell->net->server_graveyard_lock);
-		list_del_init(&server->grave);
-		spin_unlock(&cell->net->server_graveyard_lock);
+	struct afs_addr_cursor ac;
+	struct afs_addr_list *alist;
+	int ret;
+
+	ret = afs_set_vl_cursor(&ac, cell);
+	if (ret < 0)
+		return ERR_PTR(ret);
+
+	while (afs_iterate_addresses(&ac)) {
+		alist = afs_vl_get_addrs_u(cell->net, &ac, key, uuid);
+		switch (ac.error) {
+		case 0:
+			afs_end_cursor(&ac);
+			return alist;
+		case -ECONNABORTED:
+			ac.error = afs_abort_to_error(ac.abort_code);
+			goto error;
+		case -ENOMEM:
+		case -ENONET:
+			goto error;
+		case -ENETUNREACH:
+		case -EHOSTUNREACH:
+		case -ECONNREFUSED:
+			break;
+		default:
+			ac.error = -EIO;
+			goto error;
+		}
 	}
-	_leave(" = %p{%d}", server, atomic_read(&server->usage));
-	return server;
 
-	/* found a matching server on the second pass */
-found_server:
-	_debug("found");
-	afs_get_server(server);
-	write_unlock(&cell->servers_lock);
-	kfree(candidate);
-	goto no_longer_unused;
-
-	/* found a server that seems to be in two cells */
-server_in_two_cells:
-	write_unlock(&cell->servers_lock);
-	kfree(candidate);
-	afs_dec_servers_outstanding(cell->net);
-	printk(KERN_NOTICE "kAFS: Server %pI4 appears to be in two cells\n",
-	       addr);
-	_leave(" = -EEXIST");
-	return ERR_PTR(-EEXIST);
+error:
+	return ERR_PTR(afs_end_cursor(&ac));
 }
 
 /*
- * look up a server by its IP address
+ * Get or create a fileserver record.
  */
-struct afs_server *afs_find_server(struct afs_net *net,
-				   const struct sockaddr_rxrpc *srx)
+struct afs_server *afs_lookup_server(struct afs_cell *cell, struct key *key,
+				     const uuid_t *uuid)
 {
-	struct afs_server *server = NULL;
-	struct rb_node *p;
-	int diff;
+	struct afs_addr_list *alist;
+	struct afs_server *server, *candidate;
 
-	_enter("{%d,%pIS}", srx->transport.family, &srx->transport);
+	_enter("%p,%pU", cell->net, uuid);
 
-	read_lock(&net->servers_lock);
+	server = afs_find_server_by_uuid(cell->net, uuid);
+	if (server)
+		return server;
 
-	p = net->servers.rb_node;
-	while (p) {
-		server = rb_entry(p, struct afs_server, master_rb);
+	alist = afs_vl_lookup_addrs(cell, key, uuid);
+	if (IS_ERR(alist))
+		return ERR_CAST(alist);
 
-		_debug("- consider %p", p);
+	candidate = afs_alloc_server(cell->net, uuid, alist);
+	if (!candidate) {
+		afs_put_addrlist(alist);
+		return ERR_PTR(-ENOMEM);
+	}
 
-		diff = memcmp(srx, &server->addrs->addrs[0], sizeof(*srx));
-		if (diff < 0) {
-			p = p->rb_left;
-		} else if (diff > 0) {
-			p = p->rb_right;
-		} else {
-			afs_get_server(server);
-			goto found;
-		}
+	server = afs_install_server(cell->net, candidate);
+	if (server != candidate) {
+		afs_put_addrlist(alist);
+		kfree(candidate);
 	}
 
-	server = NULL;
-found:
-	read_unlock(&net->servers_lock);
-	_leave(" = %p", server);
+	_leave(" = %p{%d}", server, atomic_read(&server->usage));
 	return server;
 }
 
+/*
+ * Set the server timer to fire after a given delay, assuming it's not already
+ * set for an earlier time.
+ */
 static void afs_set_server_timer(struct afs_net *net, time64_t delay)
 {
-	afs_inc_servers_outstanding(net);
 	if (net->live) {
-		if (timer_reduce(&net->server_timer, jiffies + delay * HZ))
-			afs_dec_servers_outstanding(net);
-	} else {
-		if (!queue_work(afs_wq, &net->server_reaper))
+		afs_inc_servers_outstanding(net);
+		if (timer_reduce(&net->fs_timer, jiffies + delay * HZ))
 			afs_dec_servers_outstanding(net);
 	}
 }
 
 /*
- * destroy a server record
- * - removes from the cell list
+ * Server management timer.  We have an increment on fs_outstanding that we
+ * need to pass along to the work item.
+ */
+void afs_servers_timer(struct timer_list *timer)
+{
+	struct afs_net *net = container_of(timer, struct afs_net, fs_timer);
+
+	_enter("");
+	if (!queue_work(afs_wq, &net->fs_manager))
+		afs_dec_servers_outstanding(net);
+}
+
+/*
+ * Release a reference on a server record.
  */
 void afs_put_server(struct afs_net *net, struct afs_server *server)
 {
+	unsigned int usage;
+
 	if (!server)
 		return;
 
-	_enter("%p{%d}", server, atomic_read(&server->usage));
+	server->put_time = ktime_get_real_seconds();
 
-	_debug("PUT SERVER %d", atomic_read(&server->usage));
+	usage = atomic_dec_return(&server->usage);
 
-	ASSERTCMP(atomic_read(&server->usage), >, 0);
+	_enter("{%u}", usage);
 
-	if (likely(!atomic_dec_and_test(&server->usage))) {
-		_leave("");
+	if (likely(usage > 0))
 		return;
-	}
 
-	spin_lock(&net->server_graveyard_lock);
-	if (atomic_read(&server->usage) == 0) {
-		list_move_tail(&server->grave, &net->server_graveyard);
-		server->time_of_death = ktime_get_real_seconds();
-		afs_set_server_timer(net, afs_server_timeout);
-	}
-	spin_unlock(&net->server_graveyard_lock);
-	_leave(" [dead]");
+	afs_set_server_timer(net, afs_server_gc_delay);
+}
+
+static void afs_server_rcu(struct rcu_head *rcu)
+{
+	struct afs_server *server = container_of(rcu, struct afs_server, rcu);
+
+	afs_put_addrlist(server->addresses);
+	kfree(server);
 }
 
 /*
@@ -284,7 +387,7 @@ void afs_put_server(struct afs_net *net, struct afs_server *server)
  */
 static void afs_destroy_server(struct afs_net *net, struct afs_server *server)
 {
-	struct afs_addr_list *alist = server->addrs;
+	struct afs_addr_list *alist = server->addresses;
 	struct afs_addr_cursor ac = {
 		.alist	= alist,
 		.addr	= &alist->addrs[0],
@@ -294,79 +397,300 @@ static void afs_destroy_server(struct afs_net *net, struct afs_server *server)
 	};
 	_enter("%p", server);
 
-	afs_fs_give_up_all_callbacks(server, &ac, NULL, false);
-	afs_put_cell(net, server->cell);
-	afs_put_addrlist(server->addrs);
-	kfree(server);
+	afs_fs_give_up_all_callbacks(net, server, &ac, NULL);
+	call_rcu(&server->rcu, afs_server_rcu);
 	afs_dec_servers_outstanding(net);
 }
 
 /*
- * reap dead server records
+ * Garbage collect any expired servers.
  */
-void afs_reap_server(struct work_struct *work)
+static void afs_gc_servers(struct afs_net *net, struct afs_server *gc_list)
 {
-	LIST_HEAD(corpses);
 	struct afs_server *server;
-	struct afs_net *net = container_of(work, struct afs_net, server_reaper);
-	unsigned long delay, expiry;
-	time64_t now;
-
-	now = ktime_get_real_seconds();
-	spin_lock(&net->server_graveyard_lock);
-
-	while (!list_empty(&net->server_graveyard)) {
-		server = list_entry(net->server_graveyard.next,
-				    struct afs_server, grave);
-
-		/* the queue is ordered most dead first */
-		if (net->live) {
-			expiry = server->time_of_death + afs_server_timeout;
-			if (expiry > now) {
-				delay = (expiry - now);
-				afs_set_server_timer(net, delay);
-				break;
-			}
+	bool deleted;
+	int usage;
+
+	while ((server = gc_list)) {
+		gc_list = server->gc_next;
+
+		write_seqlock(&net->fs_lock);
+		usage = 1;
+		deleted = atomic_try_cmpxchg(&server->usage, &usage, 0);
+		if (deleted) {
+			rb_erase(&server->uuid_rb, &net->fs_servers);
+			hlist_del_rcu(&server->proc_link);
 		}
+		write_sequnlock(&net->fs_lock);
 
-		write_lock(&server->cell->servers_lock);
-		write_lock(&net->servers_lock);
-		if (atomic_read(&server->usage) > 0) {
-			list_del_init(&server->grave);
-		} else {
-			list_move_tail(&server->grave, &corpses);
-			list_del_init(&server->link);
-			rb_erase(&server->master_rb, &net->servers);
+		if (deleted)
+			afs_destroy_server(net, server);
+	}
+}
+
+/*
+ * Manage the records of servers known to be within a network namespace.  This
+ * includes garbage collecting unused servers.
+ *
+ * Note also that we were given an increment on net->servers_outstanding by
+ * whoever queued us that we need to deal with before returning.
+ */
+void afs_manage_servers(struct work_struct *work)
+{
+	struct afs_net *net = container_of(work, struct afs_net, fs_manager);
+	struct afs_server *gc_list = NULL;
+	struct rb_node *cursor;
+	time64_t now = ktime_get_real_seconds(), next_manage = TIME64_MAX;
+	bool purging = !net->live;
+
+	_enter("");
+
+	/* Trawl the server list looking for servers that have expired from
+	 * lack of use.
+	 */
+	read_seqlock_excl(&net->fs_lock);
+
+	for (cursor = rb_first(&net->fs_servers); cursor; cursor = rb_next(cursor)) {
+		struct afs_server *server =
+			rb_entry(cursor, struct afs_server, uuid_rb);
+		int usage = atomic_read(&server->usage);
+
+		_debug("manage %pU %u", &server->uuid, usage);
+
+		ASSERTCMP(usage, >=, 1);
+		ASSERTIFCMP(purging, usage, ==, 1);
+
+		if (usage == 1) {
+			time64_t expire_at = server->put_time;
+
+			if (!test_bit(AFS_SERVER_FL_VL_FAIL, &server->flags) &&
+			    !test_bit(AFS_SERVER_FL_NOT_FOUND, &server->flags))
+				expire_at += afs_server_gc_delay;
+			if (purging || expire_at <= now) {
+				server->gc_next = gc_list;
+				gc_list = server;
+			} else if (expire_at < next_manage) {
+				next_manage = expire_at;
+			}
 		}
-		write_unlock(&net->servers_lock);
-		write_unlock(&server->cell->servers_lock);
 	}
 
-	spin_unlock(&net->server_graveyard_lock);
+	read_sequnlock_excl(&net->fs_lock);
 
-	/* now reap the corpses we've extracted */
-	while (!list_empty(&corpses)) {
-		server = list_entry(corpses.next, struct afs_server, grave);
-		list_del(&server->grave);
-		afs_destroy_server(net, server);
+	/* Update the timer on the way out.  We have to pass an increment on
+	 * servers_outstanding in the namespace that we are in to the timer or
+	 * the work scheduler.
+	 */
+	if (!purging && next_manage < TIME64_MAX) {
+		now = ktime_get_real_seconds();
+
+		if (next_manage - now <= 0) {
+			if (queue_work(afs_wq, &net->fs_manager))
+				afs_inc_servers_outstanding(net);
+		} else {
+			afs_set_server_timer(net, next_manage - now);
+		}
 	}
 
+	afs_gc_servers(net, gc_list);
+
 	afs_dec_servers_outstanding(net);
+	_leave(" [%d]", atomic_read(&net->servers_outstanding));
+}
+
+static void afs_queue_server_manager(struct afs_net *net)
+{
+	afs_inc_servers_outstanding(net);
+	if (!queue_work(afs_wq, &net->fs_manager))
+		afs_dec_servers_outstanding(net);
 }
 
 /*
- * Discard all the server records from a net namespace when it is destroyed or
- * the afs module is removed.
+ * Purge list of servers.
  */
-void __net_exit afs_purge_servers(struct afs_net *net)
+void afs_purge_servers(struct afs_net *net)
 {
-	if (del_timer_sync(&net->server_timer))
+	_enter("");
+
+	if (del_timer_sync(&net->fs_timer))
 		atomic_dec(&net->servers_outstanding);
 
-	afs_inc_servers_outstanding(net);
-	if (!queue_work(afs_wq, &net->server_reaper))
-		afs_dec_servers_outstanding(net);
+	afs_queue_server_manager(net);
 
+	_debug("wait");
 	wait_on_atomic_t(&net->servers_outstanding, atomic_t_wait,
 			 TASK_UNINTERRUPTIBLE);
+	_leave("");
+}
+
+/*
+ * Probe a fileserver to find its capabilities.
+ *
+ * TODO: Try service upgrade.
+ */
+static bool afs_do_probe_fileserver(struct afs_fs_cursor *fc)
+{
+	_enter("");
+
+	fc->ac.addr = NULL;
+	fc->ac.start = READ_ONCE(fc->ac.alist->index);
+	fc->ac.index = fc->ac.start;
+	fc->ac.error = 0;
+	fc->ac.begun = false;
+
+	while (afs_iterate_addresses(&fc->ac)) {
+		afs_fs_get_capabilities(afs_v2net(fc->vnode), fc->cbi->server,
+					&fc->ac, fc->key);
+		switch (fc->ac.error) {
+		case 0:
+			afs_end_cursor(&fc->ac);
+			set_bit(AFS_SERVER_FL_PROBED, &fc->cbi->server->flags);
+			return true;
+		case -ECONNABORTED:
+			fc->ac.error = afs_abort_to_error(fc->ac.abort_code);
+			goto error;
+		case -ENOMEM:
+		case -ENONET:
+			goto error;
+		case -ENETUNREACH:
+		case -EHOSTUNREACH:
+		case -ECONNREFUSED:
+		case -ETIMEDOUT:
+		case -ETIME:
+			break;
+		default:
+			fc->ac.error = -EIO;
+			goto error;
+		}
+	}
+
+error:
+	afs_end_cursor(&fc->ac);
+	return false;
+}
+
+/*
+ * If we haven't already, try probing the fileserver to get its capabilities.
+ * We try not to instigate parallel probes, but it's possible that the parallel
+ * probes will fail due to authentication failure when ours would succeed.
+ *
+ * TODO: Try sending an anonymous probe if an authenticated probe fails.
+ */
+bool afs_probe_fileserver(struct afs_fs_cursor *fc)
+{
+	bool success;
+	int ret, retries = 0;
+
+	_enter("");
+
+retry:
+	if (test_bit(AFS_SERVER_FL_PROBED, &fc->cbi->server->flags)) {
+		_leave(" = t");
+		return true;
+	}
+
+	if (!test_and_set_bit_lock(AFS_SERVER_FL_PROBING, &fc->cbi->server->flags)) {
+		success = afs_do_probe_fileserver(fc);
+		clear_bit_unlock(AFS_SERVER_FL_PROBING, &fc->cbi->server->flags);
+		wake_up_bit(&fc->cbi->server->flags, AFS_SERVER_FL_PROBING);
+		_leave(" = t");
+		return success;
+	}
+
+	_debug("wait");
+	ret = wait_on_bit(&fc->cbi->server->flags, AFS_SERVER_FL_PROBING,
+			  TASK_INTERRUPTIBLE);
+	if (ret == -ERESTARTSYS) {
+		fc->ac.error = ret;
+		_leave(" = f [%d]", ret);
+		return false;
+	}
+
+	retries++;
+	if (retries == 4) {
+		fc->ac.error = -ESTALE;
+		_leave(" = f [stale]");
+		return false;
+	}
+	_debug("retry");
+	goto retry;
+}
+
+/*
+ * Get an update for a server's address list.
+ */
+static noinline bool afs_update_server_record(struct afs_fs_cursor *fc, struct afs_server *server)
+{
+	struct afs_addr_list *alist, *discard;
+
+	_enter("");
+
+	alist = afs_vl_lookup_addrs(fc->vnode->volume->cell, fc->key,
+				    &server->uuid);
+	if (IS_ERR(alist)) {
+		fc->ac.error = PTR_ERR(alist);
+		_leave(" = f [%d]", fc->ac.error);
+		return false;
+	}
+
+	discard = alist;
+	if (server->addr_version != alist->version) {
+		write_lock(&server->fs_lock);
+		discard = rcu_dereference_protected(server->addresses,
+						    lockdep_is_held(&server->fs_lock));
+		rcu_assign_pointer(server->addresses, alist);
+		server->addr_version = alist->version;
+		write_unlock(&server->fs_lock);
+	}
+
+	server->update_at = ktime_get_real_seconds() + afs_server_update_delay;
+	afs_put_addrlist(discard);
+	_leave(" = t");
+	return true;
+}
+
+/*
+ * See if a server's address list needs updating.
+ */
+bool afs_check_server_record(struct afs_fs_cursor *fc, struct afs_server *server)
+{
+	time64_t now = ktime_get_real_seconds();
+	long diff;
+	bool success;
+	int ret, retries = 0;
+
+	_enter("");
+
+	ASSERT(server);
+
+retry:
+	diff = READ_ONCE(server->update_at) - now;
+	if (diff > 0) {
+		_leave(" = t [not now %ld]", diff);
+		return true;
+	}
+
+	if (!test_and_set_bit_lock(AFS_SERVER_FL_UPDATING, &server->flags)) {
+		success = afs_update_server_record(fc, server);
+		clear_bit_unlock(AFS_SERVER_FL_UPDATING, &server->flags);
+		wake_up_bit(&server->flags, AFS_SERVER_FL_UPDATING);
+		_leave(" = %d", success);
+		return success;
+	}
+
+	ret = wait_on_bit(&server->flags, AFS_SERVER_FL_UPDATING,
+			  TASK_INTERRUPTIBLE);
+	if (ret == -ERESTARTSYS) {
+		fc->ac.error = ret;
+		_leave(" = f [intr]");
+		return false;
+	}
+
+	retries++;
+	if (retries == 4) {
+		_leave(" = f [stale]");
+		ret = -ESTALE;
+		return false;
+	}
+	goto retry;
 }
diff --git a/fs/afs/server_list.c b/fs/afs/server_list.c
new file mode 100644
index 000000000000..26bad7032bba
--- /dev/null
+++ b/fs/afs/server_list.c
@@ -0,0 +1,153 @@
+/* AFS fileserver list management.
+ *
+ * Copyright (C) 2017 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/kernel.h>
+#include <linux/slab.h>
+#include "internal.h"
+
+void afs_put_serverlist(struct afs_net *net, struct afs_server_list *slist)
+{
+	int i;
+
+	if (refcount_dec_and_test(&slist->usage)) {
+		for (i = 0; i < slist->nr_servers; i++) {
+			afs_put_cb_interest(net, slist->servers[i].cb_interest);
+			afs_put_server(net, slist->servers[i].server);
+		}
+		kfree(slist);
+	}
+}
+
+/*
+ * Build a server list from a VLDB record.
+ */
+struct afs_server_list *afs_alloc_server_list(struct afs_cell *cell,
+					      struct key *key,
+					      struct afs_vldb_entry *vldb,
+					      u8 type_mask)
+{
+	struct afs_server_list *slist;
+	struct afs_server *server;
+	int ret = -ENOMEM, nr_servers = 0, i, j;
+
+	for (i = 0; i < vldb->nr_servers; i++)
+		if (vldb->fs_mask[i] & type_mask)
+			nr_servers++;
+
+	slist = kzalloc(sizeof(struct afs_server_list) +
+			sizeof(struct afs_server_entry) * nr_servers,
+			GFP_KERNEL);
+	if (!slist)
+		goto error;
+
+	refcount_set(&slist->usage, 1);
+
+	/* Make sure a records exists for each server in the list. */
+	for (i = 0; i < vldb->nr_servers; i++) {
+		if (!(vldb->fs_mask[i] & type_mask))
+			continue;
+
+		server = afs_lookup_server(cell, key, &vldb->fs_server[i]);
+		if (IS_ERR(server)) {
+			ret = PTR_ERR(server);
+			if (ret == -ENOENT)
+				continue;
+			goto error_2;
+		}
+
+		/* Insertion-sort by server pointer */
+		for (j = 0; j < slist->nr_servers; j++)
+			if (slist->servers[j].server >= server)
+				break;
+		if (j < slist->nr_servers) {
+			if (slist->servers[j].server == server) {
+				afs_put_server(cell->net, server);
+				continue;
+			}
+
+			memmove(slist->servers + j + 1,
+				slist->servers + j,
+				(slist->nr_servers - j) * sizeof(struct afs_server_entry));
+		}
+
+		slist->servers[j].server = server;
+		slist->nr_servers++;
+	}
+
+	if (slist->nr_servers == 0) {
+		ret = -EDESTADDRREQ;
+		goto error_2;
+	}
+
+	return slist;
+
+error_2:
+	afs_put_serverlist(cell->net, slist);
+error:
+	return ERR_PTR(ret);
+}
+
+/*
+ * Copy the annotations from an old server list to its potential replacement.
+ */
+bool afs_annotate_server_list(struct afs_server_list *new,
+			      struct afs_server_list *old)
+{
+	struct afs_server *cur;
+	int i, j;
+
+	if (old->nr_servers != new->nr_servers)
+		goto changed;
+
+	for (i = 0; i < old->nr_servers; i++)
+		if (old->servers[i].server != new->servers[i].server)
+			goto changed;
+
+	return false;
+
+changed:
+	/* Maintain the same current server as before if possible. */
+	cur = old->servers[old->index].server;
+	for (j = 0; j < new->nr_servers; j++) {
+		if (new->servers[j].server == cur) {
+			new->index = j;
+			break;
+		}
+	}
+
+	/* Keep the old callback interest records where possible so that we
+	 * maintain callback interception.
+	 */
+	i = 0;
+	j = 0;
+	while (i < old->nr_servers && j < new->nr_servers) {
+		if (new->servers[j].server == old->servers[i].server) {
+			struct afs_cb_interest *cbi = old->servers[i].cb_interest;
+			if (cbi) {
+				new->servers[j].cb_interest = cbi;
+				refcount_inc(&cbi->usage);
+			}
+			i++;
+			j++;
+			continue;
+		}
+
+		if (new->servers[j].server < old->servers[i].server) {
+			j++;
+			continue;
+		}
+
+		i++;
+		continue;
+	}
+
+	return true;
+}
diff --git a/fs/afs/super.c b/fs/afs/super.c
index 3d53b78b350d..af1e769aaebf 100644
--- a/fs/afs/super.c
+++ b/fs/afs/super.c
@@ -142,9 +142,9 @@ void __exit afs_fs_exit(void)
  */
 static int afs_show_devname(struct seq_file *m, struct dentry *root)
 {
-	struct afs_super_info *as = root->d_sb->s_fs_info;
+	struct afs_super_info *as = AFS_FS_S(root->d_sb);
 	struct afs_volume *volume = as->volume;
-	struct afs_cell *cell = volume->cell;
+	struct afs_cell *cell = as->cell;
 	const char *suf = "";
 	char pref = '%';
 
@@ -162,7 +162,7 @@ static int afs_show_devname(struct seq_file *m, struct dentry *root)
 		break;
 	}
 
-	seq_printf(m, "%c%s:%s%s", pref, cell->name, volume->vlocation->vldb.name, suf);
+	seq_printf(m, "%c%s:%s%s", pref, cell->name, volume->name, suf);
 	return 0;
 }
 
@@ -334,14 +334,16 @@ static int afs_parse_device_name(struct afs_mount_params *params,
 static int afs_test_super(struct super_block *sb, void *data)
 {
 	struct afs_super_info *as1 = data;
-	struct afs_super_info *as = sb->s_fs_info;
+	struct afs_super_info *as = AFS_FS_S(sb);
 
-	return as->net == as1->net && as->volume == as1->volume;
+	return as->net == as1->net && as->volume->vid == as1->volume->vid;
 }
 
 static int afs_set_super(struct super_block *sb, void *data)
 {
-	sb->s_fs_info = data;
+	struct afs_super_info *as = data;
+
+	sb->s_fs_info = as;
 	return set_anon_super(sb, NULL);
 }
 
@@ -351,7 +353,7 @@ static int afs_set_super(struct super_block *sb, void *data)
 static int afs_fill_super(struct super_block *sb,
 			  struct afs_mount_params *params)
 {
-	struct afs_super_info *as = sb->s_fs_info;
+	struct afs_super_info *as = AFS_FS_S(sb);
 	struct afs_fid fid;
 	struct inode *inode = NULL;
 	int ret;
@@ -368,13 +370,15 @@ static int afs_fill_super(struct super_block *sb,
 	if (ret)
 		return ret;
 	sb->s_bdi->ra_pages	= VM_MAX_READAHEAD * 1024 / PAGE_SIZE;
-	strlcpy(sb->s_id, as->volume->vlocation->vldb.name, sizeof(sb->s_id));
+	sprintf(sb->s_id, "%u", as->volume->vid);
+
+	afs_activate_volume(as->volume);
 
 	/* allocate the root inode and dentry */
 	fid.vid		= as->volume->vid;
 	fid.vnode	= 1;
 	fid.unique	= 1;
-	inode = afs_iget(sb, params->key, &fid, NULL, NULL);
+	inode = afs_iget(sb, params->key, &fid, NULL, NULL, NULL);
 	if (IS_ERR(inode))
 		return PTR_ERR(inode);
 
@@ -426,7 +430,7 @@ static struct dentry *afs_mount(struct file_system_type *fs_type,
 {
 	struct afs_mount_params params;
 	struct super_block *sb;
-	struct afs_volume *vol;
+	struct afs_volume *candidate;
 	struct key *key;
 	struct afs_super_info *as;
 	int ret;
@@ -464,15 +468,19 @@ static struct dentry *afs_mount(struct file_system_type *fs_type,
 	ret = -ENOMEM;
 	as = afs_alloc_sbi(&params);
 	if (!as)
-		goto error;
+		goto error_key;
 
-	/* parse the device name */
-	vol = afs_volume_lookup(&params);
-	if (IS_ERR(vol)) {
-		ret = PTR_ERR(vol);
-		goto error;
+	/* Assume we're going to need a volume record; at the very least we can
+	 * use it to update the volume record if we have one already.  This
+	 * checks that the volume exists within the cell.
+	 */
+	candidate = afs_create_volume(&params);
+	if (IS_ERR(candidate)) {
+		ret = PTR_ERR(candidate);
+		goto error_as;
 	}
-	as->volume = vol;
+
+	as->volume = candidate;
 
 	/* allocate a deviceless superblock */
 	sb = sget(fs_type, afs_test_super, afs_set_super, flags, as);
@@ -503,11 +511,13 @@ static struct dentry *afs_mount(struct file_system_type *fs_type,
 
 error_sb:
 	deactivate_locked_super(sb);
+	goto error_key;
 error_as:
 	afs_destroy_sbi(as);
+error_key:
+	key_put(params.key);
 error:
 	afs_put_cell(params.net, params.cell);
-	key_put(params.key);
 	_leave(" = %d", ret);
 	return ERR_PTR(ret);
 }
@@ -519,8 +529,9 @@ static void afs_kill_super(struct super_block *sb)
 	/* Clear the callback interests (which will do ilookup5) before
 	 * deactivating the superblock.
 	 */
-	afs_clear_callback_interests(as->net, as->volume);
+	afs_clear_callback_interests(as->net, as->volume->servers);
 	kill_anon_super(sb);
+	afs_deactivate_volume(as->volume);
 	afs_destroy_sbi(as);
 }
 
@@ -533,7 +544,7 @@ static void afs_i_init_once(void *_vnode)
 
 	memset(vnode, 0, sizeof(*vnode));
 	inode_init_once(&vnode->vfs_inode);
-	init_waitqueue_head(&vnode->update_waitq);
+	mutex_init(&vnode->io_lock);
 	mutex_init(&vnode->validate_lock);
 	spin_lock_init(&vnode->writeback_lock);
 	spin_lock_init(&vnode->lock);
@@ -561,7 +572,6 @@ static struct inode *afs_alloc_inode(struct super_block *sb)
 	memset(&vnode->status, 0, sizeof(vnode->status));
 
 	vnode->volume		= NULL;
-	vnode->update_cnt	= 0;
 	vnode->flags		= 1 << AFS_VNODE_UNSET;
 
 	_leave(" = %p", &vnode->vfs_inode);
@@ -597,6 +607,7 @@ static void afs_destroy_inode(struct inode *inode)
  */
 static int afs_statfs(struct dentry *dentry, struct kstatfs *buf)
 {
+	struct afs_fs_cursor fc;
 	struct afs_volume_status vs;
 	struct afs_vnode *vnode = AFS_FS_I(d_inode(dentry));
 	struct key *key;
@@ -606,21 +617,32 @@ static int afs_statfs(struct dentry *dentry, struct kstatfs *buf)
 	if (IS_ERR(key))
 		return PTR_ERR(key);
 
-	ret = afs_vnode_get_volume_status(vnode, key, &vs);
-	key_put(key);
-	if (ret < 0) {
-		_leave(" = %d", ret);
-		return ret;
+	ret = -ERESTARTSYS;
+	if (afs_begin_vnode_operation(&fc, vnode, key)) {
+		fc.flags |= AFS_FS_CURSOR_NO_VSLEEP;
+		while (afs_select_fileserver(&fc)) {
+			fc.cb_break = vnode->cb_break + vnode->cb_s_break;
+			afs_fs_get_volume_status(&fc, &vs);
+		}
+
+		afs_check_for_remote_deletion(&fc, fc.vnode);
+		afs_vnode_commit_status(&fc, vnode, fc.cb_break);
+		ret = afs_end_vnode_operation(&fc);
 	}
 
-	buf->f_type	= dentry->d_sb->s_magic;
-	buf->f_bsize	= AFS_BLOCK_SIZE;
-	buf->f_namelen	= AFSNAMEMAX - 1;
+	key_put(key);
 
-	if (vs.max_quota == 0)
-		buf->f_blocks = vs.part_max_blocks;
-	else
-		buf->f_blocks = vs.max_quota;
-	buf->f_bavail = buf->f_bfree = buf->f_blocks - vs.blocks_in_use;
-	return 0;
+	if (ret == 0) {
+		buf->f_type	= dentry->d_sb->s_magic;
+		buf->f_bsize	= AFS_BLOCK_SIZE;
+		buf->f_namelen	= AFSNAMEMAX - 1;
+
+		if (vs.max_quota == 0)
+			buf->f_blocks = vs.part_max_blocks;
+		else
+			buf->f_blocks = vs.max_quota;
+		buf->f_bavail = buf->f_bfree = buf->f_blocks - vs.blocks_in_use;
+	}
+
+	return ret;
 }
diff --git a/fs/afs/vlclient.c b/fs/afs/vlclient.c
index 1d1e7df77dd5..173c652fe875 100644
--- a/fs/afs/vlclient.c
+++ b/fs/afs/vlclient.c
@@ -16,14 +16,15 @@
 #include "internal.h"
 
 /*
- * deliver reply data to a VL.GetEntryByXXX call
+ * Deliver reply data to a VL.GetEntryByNameU call.
  */
-static int afs_deliver_vl_get_entry_by_xxx(struct afs_call *call)
+static int afs_deliver_vl_get_entry_by_name_u(struct afs_call *call)
 {
-	struct afs_cache_vlocation *entry;
-	__be32 *bp;
+	struct afs_uvldbentry__xdr *uvldb;
+	struct afs_vldb_entry *entry;
+	bool new_only = false;
 	u32 tmp;
-	int loop, ret;
+	int i, ret;
 
 	_enter("");
 
@@ -32,152 +33,270 @@ static int afs_deliver_vl_get_entry_by_xxx(struct afs_call *call)
 		return ret;
 
 	/* unmarshall the reply once we've received all of it */
+	uvldb = call->buffer;
 	entry = call->reply[0];
-	bp = call->buffer;
-
-	for (loop = 0; loop < 64; loop++)
-		entry->name[loop] = ntohl(*bp++);
-	entry->name[loop] = 0;
-	bp++; /* final NUL */
-
-	bp++; /* type */
-	entry->nservers = ntohl(*bp++);
-
-	for (loop = 0; loop < 8; loop++) {
-		entry->servers[loop].srx_family = AF_RXRPC;
-		entry->servers[loop].srx_service = FS_SERVICE;
-		entry->servers[loop].transport_type = SOCK_DGRAM;
-		entry->servers[loop].transport_len = sizeof(entry->servers[loop].transport.sin6);
-		entry->servers[loop].transport.sin6.sin6_family = AF_INET6;
-		entry->servers[loop].transport.sin6.sin6_port = htons(AFS_FS_PORT);
-		entry->servers[loop].transport.sin6.sin6_flowinfo = 0;
-		entry->servers[loop].transport.sin6.sin6_scope_id = 0;
-		entry->servers[loop].transport.sin6.sin6_addr.s6_addr32[0] = 0;
-		entry->servers[loop].transport.sin6.sin6_addr.s6_addr32[1] = 0;
-		entry->servers[loop].transport.sin6.sin6_addr.s6_addr32[2] = htonl(0xffff);
-		entry->servers[loop].transport.sin6.sin6_addr.s6_addr32[3] = *bp++;
+
+	for (i = 0; i < ARRAY_SIZE(uvldb->name) - 1; i++)
+		entry->name[i] = (u8)ntohl(uvldb->name[i]);
+	entry->name[i] = 0;
+	entry->name_len = strlen(entry->name);
+
+	/* If there is a new replication site that we can use, ignore all the
+	 * sites that aren't marked as new.
+	 */
+	for (i = 0; i < AFS_NMAXNSERVERS; i++) {
+		tmp = ntohl(uvldb->serverFlags[i]);
+		if (!(tmp & AFS_VLSF_DONTUSE) &&
+		    (tmp & AFS_VLSF_NEWREPSITE))
+			new_only = true;
 	}
 
-	bp += 8; /* partition IDs */
+	for (i = 0; i < AFS_NMAXNSERVERS; i++) {
+		struct afs_uuid__xdr *xdr;
+		struct afs_uuid *uuid;
+		int j;
 
-	for (loop = 0; loop < 8; loop++) {
-		tmp = ntohl(*bp++);
-		entry->srvtmask[loop] = 0;
+		tmp = ntohl(uvldb->serverFlags[i]);
+		if (tmp & AFS_VLSF_DONTUSE ||
+		    (new_only && !(tmp & AFS_VLSF_NEWREPSITE)))
+			continue;
 		if (tmp & AFS_VLSF_RWVOL)
-			entry->srvtmask[loop] |= AFS_VOL_VTM_RW;
+			entry->fs_mask[i] |= AFS_VOL_VTM_RW;
 		if (tmp & AFS_VLSF_ROVOL)
-			entry->srvtmask[loop] |= AFS_VOL_VTM_RO;
+			entry->fs_mask[i] |= AFS_VOL_VTM_RO;
 		if (tmp & AFS_VLSF_BACKVOL)
-			entry->srvtmask[loop] |= AFS_VOL_VTM_BAK;
-	}
+			entry->fs_mask[i] |= AFS_VOL_VTM_BAK;
+		if (!entry->fs_mask[i])
+			continue;
+
+		xdr = &uvldb->serverNumber[i];
+		uuid = (struct afs_uuid *)&entry->fs_server[i];
+		uuid->time_low			= xdr->time_low;
+		uuid->time_mid			= htons(ntohl(xdr->time_mid));
+		uuid->time_hi_and_version	= htons(ntohl(xdr->time_hi_and_version));
+		uuid->clock_seq_hi_and_reserved	= (u8)ntohl(xdr->clock_seq_hi_and_reserved);
+		uuid->clock_seq_low		= (u8)ntohl(xdr->clock_seq_low);
+		for (j = 0; j < 6; j++)
+			uuid->node[j] = (u8)ntohl(xdr->node[j]);
 
-	entry->vid[0] = ntohl(*bp++);
-	entry->vid[1] = ntohl(*bp++);
-	entry->vid[2] = ntohl(*bp++);
+		entry->nr_servers++;
+	}
 
-	bp++; /* clone ID */
+	for (i = 0; i < AFS_MAXTYPES; i++)
+		entry->vid[i] = ntohl(uvldb->volumeId[i]);
 
-	tmp = ntohl(*bp++); /* flags */
-	entry->vidmask = 0;
+	tmp = ntohl(uvldb->flags);
 	if (tmp & AFS_VLF_RWEXISTS)
-		entry->vidmask |= AFS_VOL_VTM_RW;
+		__set_bit(AFS_VLDB_HAS_RW, &entry->flags);
 	if (tmp & AFS_VLF_ROEXISTS)
-		entry->vidmask |= AFS_VOL_VTM_RO;
+		__set_bit(AFS_VLDB_HAS_RO, &entry->flags);
 	if (tmp & AFS_VLF_BACKEXISTS)
-		entry->vidmask |= AFS_VOL_VTM_BAK;
-	if (!entry->vidmask)
-		return -EBADMSG;
+		__set_bit(AFS_VLDB_HAS_BAK, &entry->flags);
 
+	if (!(tmp & (AFS_VLF_RWEXISTS | AFS_VLF_ROEXISTS | AFS_VLF_BACKEXISTS))) {
+		entry->error = -ENOMEDIUM;
+		__set_bit(AFS_VLDB_QUERY_ERROR, &entry->flags);
+	}
+
+	__set_bit(AFS_VLDB_QUERY_VALID, &entry->flags);
 	_leave(" = 0 [done]");
 	return 0;
 }
 
-/*
- * VL.GetEntryByName operation type
- */
-static const struct afs_call_type afs_RXVLGetEntryByName = {
-	.name		= "VL.GetEntryByName",
-	.deliver	= afs_deliver_vl_get_entry_by_xxx,
-	.destructor	= afs_flat_call_destructor,
-};
+static void afs_destroy_vl_get_entry_by_name_u(struct afs_call *call)
+{
+	kfree(call->reply[0]);
+	afs_flat_call_destructor(call);
+}
 
 /*
- * VL.GetEntryById operation type
+ * VL.GetEntryByNameU operation type.
  */
-static const struct afs_call_type afs_RXVLGetEntryById = {
-	.name		= "VL.GetEntryById",
-	.deliver	= afs_deliver_vl_get_entry_by_xxx,
-	.destructor	= afs_flat_call_destructor,
+static const struct afs_call_type afs_RXVLGetEntryByNameU = {
+	.name		= "VL.GetEntryByNameU",
+	.deliver	= afs_deliver_vl_get_entry_by_name_u,
+	.destructor	= afs_destroy_vl_get_entry_by_name_u,
 };
 
 /*
- * dispatch a get volume entry by name operation
+ * Dispatch a get volume entry by name or ID operation (uuid variant).  If the
+ * volname is a decimal number then it's a volume ID not a volume name.
  */
-int afs_vl_get_entry_by_name(struct afs_net *net,
-			     struct afs_addr_cursor *ac,
-			     struct key *key,
-			     const char *volname,
-			     struct afs_cache_vlocation *entry,
-			     bool async)
+struct afs_vldb_entry *afs_vl_get_entry_by_name_u(struct afs_net *net,
+						  struct afs_addr_cursor *ac,
+						  struct key *key,
+						  const char *volname,
+						  int volnamesz)
 {
+	struct afs_vldb_entry *entry;
 	struct afs_call *call;
-	size_t volnamesz, reqsz, padsz;
+	size_t reqsz, padsz;
 	__be32 *bp;
 
 	_enter("");
 
-	volnamesz = strlen(volname);
 	padsz = (4 - (volnamesz & 3)) & 3;
 	reqsz = 8 + volnamesz + padsz;
 
-	call = afs_alloc_flat_call(net, &afs_RXVLGetEntryByName, reqsz, 384);
-	if (!call)
-		return -ENOMEM;
+	entry = kzalloc(sizeof(struct afs_vldb_entry), GFP_KERNEL);
+	if (!entry)
+		return ERR_PTR(-ENOMEM);
+
+	call = afs_alloc_flat_call(net, &afs_RXVLGetEntryByNameU, reqsz,
+				   sizeof(struct afs_uvldbentry__xdr));
+	if (!call) {
+		kfree(entry);
+		return ERR_PTR(-ENOMEM);
+	}
 
 	call->key = key;
 	call->reply[0] = entry;
+	call->ret_reply0 = true;
 
-	/* marshall the parameters */
+	/* Marshall the parameters */
 	bp = call->request;
-	*bp++ = htonl(VLGETENTRYBYNAME);
+	*bp++ = htonl(VLGETENTRYBYNAMEU);
 	*bp++ = htonl(volnamesz);
 	memcpy(bp, volname, volnamesz);
 	if (padsz > 0)
-		memset((void *) bp + volnamesz, 0, padsz);
+		memset((void *)bp + volnamesz, 0, padsz);
 
-	/* initiate the call */
-	return afs_make_call(ac, call, GFP_KERNEL, async);
+	return (struct afs_vldb_entry *)afs_make_call(ac, call, GFP_KERNEL, false);
 }
 
 /*
- * dispatch a get volume entry by ID operation
+ * Deliver reply data to a VL.GetAddrsU call.
+ *
+ *	GetAddrsU(IN ListAddrByAttributes *inaddr,
+ *		  OUT afsUUID *uuidp1,
+ *		  OUT uint32_t *uniquifier,
+ *		  OUT uint32_t *nentries,
+ *		  OUT bulkaddrs *blkaddrs);
  */
-int afs_vl_get_entry_by_id(struct afs_net *net,
-			   struct afs_addr_cursor *ac,
-			   struct key *key,
-			   afs_volid_t volid,
-			   afs_voltype_t voltype,
-			   struct afs_cache_vlocation *entry,
-			   bool async)
+static int afs_deliver_vl_get_addrs_u(struct afs_call *call)
 {
+	struct afs_addr_list *alist;
+	__be32 *bp;
+	u32 uniquifier, nentries, count;
+	int i, ret;
+
+	_enter("{%u,%zu/%u}", call->unmarshall, call->offset, call->count);
+
+again:
+	switch (call->unmarshall) {
+	case 0:
+		call->offset = 0;
+		call->unmarshall++;
+
+		/* Extract the returned uuid, uniquifier, nentries and blkaddrs size */
+	case 1:
+		ret = afs_extract_data(call, call->buffer,
+				       sizeof(struct afs_uuid__xdr) + 3 * sizeof(__be32),
+				       true);
+		if (ret < 0)
+			return ret;
+
+		bp = call->buffer + sizeof(struct afs_uuid__xdr);
+		uniquifier	= ntohl(*bp++);
+		nentries	= ntohl(*bp++);
+		count		= ntohl(*bp);
+
+		nentries = min(nentries, count);
+		alist = afs_alloc_addrlist(nentries, FS_SERVICE, AFS_FS_PORT);
+		if (!alist)
+			return -ENOMEM;
+		alist->version = uniquifier;
+		call->reply[0] = alist;
+		call->count = count;
+		call->count2 = nentries;
+		call->offset = 0;
+		call->unmarshall++;
+
+		/* Extract entries */
+	case 2:
+		count = min(call->count, 4U);
+		ret = afs_extract_data(call, call->buffer,
+				       count * sizeof(__be32),
+				       call->count > 4);
+		if (ret < 0)
+			return ret;
+
+		alist = call->reply[0];
+		bp = call->buffer;
+		for (i = 0; i < count; i++)
+			if (alist->nr_addrs < call->count2)
+				afs_merge_fs_addr4(alist, *bp++);
+
+		call->count -= count;
+		if (call->count > 0)
+			goto again;
+		call->offset = 0;
+		call->unmarshall++;
+		break;
+	}
+
+	_leave(" = 0 [done]");
+	return 0;
+}
+
+static void afs_vl_get_addrs_u_destructor(struct afs_call *call)
+{
+	afs_put_server(call->net, (struct afs_server *)call->reply[0]);
+	kfree(call->reply[1]);
+	return afs_flat_call_destructor(call);
+}
+
+/*
+ * VL.GetAddrsU operation type.
+ */
+static const struct afs_call_type afs_RXVLGetAddrsU = {
+	.name		= "VL.GetAddrsU",
+	.deliver	= afs_deliver_vl_get_addrs_u,
+	.destructor	= afs_vl_get_addrs_u_destructor,
+};
+
+/*
+ * Dispatch an operation to get the addresses for a server, where the server is
+ * nominated by UUID.
+ */
+struct afs_addr_list *afs_vl_get_addrs_u(struct afs_net *net,
+					 struct afs_addr_cursor *ac,
+					 struct key *key,
+					 const uuid_t *uuid)
+{
+	struct afs_ListAddrByAttributes__xdr *r;
+	const struct afs_uuid *u = (const struct afs_uuid *)uuid;
 	struct afs_call *call;
 	__be32 *bp;
+	int i;
 
 	_enter("");
 
-	call = afs_alloc_flat_call(net, &afs_RXVLGetEntryById, 12, 384);
+	call = afs_alloc_flat_call(net, &afs_RXVLGetAddrsU,
+				   sizeof(__be32) + sizeof(struct afs_ListAddrByAttributes__xdr),
+				   sizeof(struct afs_uuid__xdr) + 3 * sizeof(__be32));
 	if (!call)
-		return -ENOMEM;
+		return ERR_PTR(-ENOMEM);
 
 	call->key = key;
-	call->reply[0] = entry;
+	call->reply[0] = NULL;
+	call->ret_reply0 = true;
 
-	/* marshall the parameters */
+	/* Marshall the parameters */
 	bp = call->request;
-	*bp++ = htonl(VLGETENTRYBYID);
-	*bp++ = htonl(volid);
-	*bp   = htonl(voltype);
+	*bp++ = htonl(VLGETADDRSU);
+	r = (struct afs_ListAddrByAttributes__xdr *)bp;
+	r->Mask		= htonl(AFS_VLADDR_UUID);
+	r->ipaddr	= 0;
+	r->index	= 0;
+	r->spare	= 0;
+	r->uuid.time_low			= u->time_low;
+	r->uuid.time_mid			= htonl(ntohs(u->time_mid));
+	r->uuid.time_hi_and_version		= htonl(ntohs(u->time_hi_and_version));
+	r->uuid.clock_seq_hi_and_reserved 	= htonl(u->clock_seq_hi_and_reserved);
+	r->uuid.clock_seq_low			= htonl(u->clock_seq_low);
+	for (i = 0; i < 6; i++)
+		r->uuid.node[i] = ntohl(u->node[i]);
 
-	/* initiate the call */
-	return afs_make_call(ac, call, GFP_KERNEL, async);
+	return (struct afs_addr_list *)afs_make_call(ac, call, GFP_KERNEL, false);
 }
diff --git a/fs/afs/vlocation.c b/fs/afs/vlocation.c
deleted file mode 100644
index 52c31ad0ef60..000000000000
--- a/fs/afs/vlocation.c
+++ /dev/null
@@ -1,669 +0,0 @@
-/* AFS volume location management
- *
- * Copyright (C) 2002, 2007 Red Hat, Inc. All Rights Reserved.
- * Written by David Howells (dhowells@redhat.com)
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- */
-
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <linux/slab.h>
-#include <linux/init.h>
-#include <linux/sched.h>
-#include "internal.h"
-
-struct workqueue_struct *afs_vlocation_update_worker;
-
-static unsigned afs_vlocation_timeout = 10;	/* volume location timeout in seconds */
-static unsigned afs_vlocation_update_timeout = 10 * 60;
-
-/*
- * iterate through the VL servers in a cell until one of them admits knowing
- * about the volume in question
- */
-static int afs_vlocation_access_vl_by_name(struct afs_vlocation *vl,
-					   struct key *key,
-					   struct afs_cache_vlocation *vldb)
-{
-	struct afs_addr_cursor ac;
-	int ret;
-
-	_enter("%s,%s", vl->cell->name, vl->vldb.name);
-
-	ret = afs_set_vl_cursor(&ac, vl->cell);
-	if (ret < 0)
-		return ret;
-
-	down_write(&vl->cell->vl_sem);
-	
-	ret = -ENOMEDIUM;
-	while (afs_iterate_addresses(&ac)) {
-		_debug("CellServ[%hu]: %pIS", ac.index, &ac.addr->transport);
-
-		/* attempt to access the VL server */
-		ac.error = afs_vl_get_entry_by_name(vl->cell->net, &ac, key,
-						    vl->vldb.name, vldb, false);
-		switch (ac.error) {
-		case 0:
-			goto out;
-		case -ENOMEM:
-		case -ENONET:
-		case -ENETUNREACH:
-		case -EHOSTUNREACH:
-		case -ECONNREFUSED:
-			if (ac.error == -ENOMEM || ac.error == -ENONET)
-				goto out;
-			break;
-		case -ENOMEDIUM:
-		case -EKEYREJECTED:
-		case -EKEYEXPIRED:
-			ac.responded = true;
-			goto out;
-		default:
-			ac.responded = true;
-			ac.error = -EIO;
-			break;
-		}
-	}
-
-out:
-	up_write(&vl->cell->vl_sem);
-	ret = afs_end_cursor(&ac);
-	_leave(" = %d", ret);
-	return ret;
-}
-
-/*
- * iterate through the VL servers in a cell until one of them admits knowing
- * about the volume in question
- */
-static int afs_vlocation_access_vl_by_id(struct afs_vlocation *vl,
-					 struct key *key,
-					 afs_volid_t volid,
-					 afs_voltype_t voltype,
-					 struct afs_cache_vlocation *vldb)
-{
-	struct afs_addr_cursor ac;
-	int ret;
-
-	_enter("%s,%x,%d,", vl->cell->name, volid, voltype);
-
-	ret = afs_set_vl_cursor(&ac, vl->cell);
-	if (ret < 0)
-		return ret;
-
-	down_write(&vl->cell->vl_sem);
-	ret = -ENOMEDIUM;
-	while (afs_iterate_addresses(&ac)) {
-		_debug("CellServ[%hu]: %pIS", ac.index, &ac.addr->transport);
-
-		/* attempt to access the VL server */
-		ac.error = afs_vl_get_entry_by_id(vl->cell->net, &ac, key, volid,
-						  voltype, vldb, false);
-		switch (ac.error) {
-		case 0:
-			goto out;
-		case -ENOMEM:
-		case -ENONET:
-		case -ENETUNREACH:
-		case -EHOSTUNREACH:
-		case -ECONNREFUSED:
-			if (ac.error == -ENOMEM || ac.error == -ENONET)
-				goto out;
-			goto rotate;
-		case -EBUSY:
-			ac.responded = true;
-			vl->upd_busy_cnt++;
-			if (vl->upd_busy_cnt <= 3) {
-				if (vl->upd_busy_cnt > 1) {
-					/* second+ BUSY - sleep a little bit */
-					set_current_state(TASK_UNINTERRUPTIBLE);
-					schedule_timeout(1);
-				}
-				continue;
-			}
-			break;
-		case -ENOMEDIUM:
-			ac.responded = true;
-			vl->upd_rej_cnt++;
-			goto rotate;
-		default:
-			ac.responded = true;
-			ac.error = -EIO;
-			goto rotate;
-		}
-
-		/* rotate the server records upon lookup failure */
-	rotate:
-		vl->upd_busy_cnt = 0;
-	}
-
-out:
-	if (ac.error < 0 && vl->upd_rej_cnt > 0) {
-		printk(KERN_NOTICE "kAFS:"
-		       " Active volume no longer valid '%s'\n",
-		       vl->vldb.name);
-		vl->valid = 0;
-		ac.error = -ENOMEDIUM;
-	}
-
-	up_write(&vl->cell->vl_sem);
-	ret = afs_end_cursor(&ac);
-	_leave(" = %d", ret);
-	return ret;
-}
-
-/*
- * allocate a volume location record
- */
-static struct afs_vlocation *afs_vlocation_alloc(struct afs_cell *cell,
-						 const char *name,
-						 size_t namesz)
-{
-	struct afs_vlocation *vl;
-
-	vl = kzalloc(sizeof(struct afs_vlocation), GFP_KERNEL);
-	if (vl) {
-		vl->cell = cell;
-		vl->state = AFS_VL_NEW;
-		atomic_set(&vl->usage, 1);
-		INIT_LIST_HEAD(&vl->link);
-		INIT_LIST_HEAD(&vl->grave);
-		INIT_LIST_HEAD(&vl->update);
-		init_waitqueue_head(&vl->waitq);
-		spin_lock_init(&vl->lock);
-		memcpy(vl->vldb.name, name, namesz);
-	}
-
-	_leave(" = %p", vl);
-	return vl;
-}
-
-/*
- * update record if we found it in the cache
- */
-static int afs_vlocation_update_record(struct afs_vlocation *vl,
-				       struct key *key,
-				       struct afs_cache_vlocation *vldb)
-{
-	afs_voltype_t voltype;
-	afs_volid_t vid;
-	int ret;
-
-	/* try to look up a cached volume in the cell VL databases by ID */
-	_debug("Locally Cached: %s %02x", vl->vldb.name, vl->vldb.vidmask);
-
-	_debug("Vids: %08x %08x %08x",
-	       vl->vldb.vid[0],
-	       vl->vldb.vid[1],
-	       vl->vldb.vid[2]);
-
-	if (vl->vldb.vidmask & AFS_VOL_VTM_RW) {
-		vid = vl->vldb.vid[0];
-		voltype = AFSVL_RWVOL;
-	} else if (vl->vldb.vidmask & AFS_VOL_VTM_RO) {
-		vid = vl->vldb.vid[1];
-		voltype = AFSVL_ROVOL;
-	} else if (vl->vldb.vidmask & AFS_VOL_VTM_BAK) {
-		vid = vl->vldb.vid[2];
-		voltype = AFSVL_BACKVOL;
-	} else {
-		BUG();
-		vid = 0;
-		voltype = 0;
-	}
-
-	/* contact the server to make sure the volume is still available
-	 * - TODO: need to handle disconnected operation here
-	 */
-	ret = afs_vlocation_access_vl_by_id(vl, key, vid, voltype, vldb);
-	switch (ret) {
-		/* net error */
-	default:
-		printk(KERN_WARNING "kAFS:"
-		       " failed to update volume '%s' (%x) up in '%s': %d\n",
-		       vl->vldb.name, vid, vl->cell->name, ret);
-		_leave(" = %d", ret);
-		return ret;
-
-		/* pulled from local cache into memory */
-	case 0:
-		_leave(" = 0");
-		return 0;
-
-		/* uh oh... looks like the volume got deleted */
-	case -ENOMEDIUM:
-		printk(KERN_ERR "kAFS:"
-		       " volume '%s' (%x) does not exist '%s'\n",
-		       vl->vldb.name, vid, vl->cell->name);
-
-		/* TODO: make existing record unavailable */
-		_leave(" = %d", ret);
-		return ret;
-	}
-}
-
-/*
- * apply the update to a VL record
- */
-static void afs_vlocation_apply_update(struct afs_vlocation *vl,
-				       struct afs_cache_vlocation *vldb)
-{
-	_debug("Done VL Lookup: %s %02x", vldb->name, vldb->vidmask);
-
-	_debug("Vids: %08x %08x %08x",
-	       vldb->vid[0], vldb->vid[1], vldb->vid[2]);
-
-	if (strcmp(vldb->name, vl->vldb.name) != 0)
-		printk(KERN_NOTICE "kAFS:"
-		       " name of volume '%s' changed to '%s' on server\n",
-		       vl->vldb.name, vldb->name);
-
-	vl->vldb = *vldb;
-}
-
-/*
- * fill in a volume location record, consulting the cache and the VL server
- * both
- */
-static int afs_vlocation_fill_in_record(struct afs_vlocation *vl,
-					struct key *key)
-{
-	struct afs_cache_vlocation vldb;
-	int ret;
-
-	_enter("");
-
-	ASSERTCMP(vl->valid, ==, 0);
-
-	memset(&vldb, 0, sizeof(vldb));
-
-	/* Try to look up an unknown volume in the cell VL databases by name */
-	ret = afs_vlocation_access_vl_by_name(vl, key, &vldb);
-	if (ret < 0) {
-		printk("kAFS: failed to locate '%s' in cell '%s'\n",
-		       vl->vldb.name, vl->cell->name);
-		return ret;
-	}
-
-	afs_vlocation_apply_update(vl, &vldb);
-	_leave(" = 0");
-	return 0;
-}
-
-/*
- * queue a vlocation record for updates
- */
-static void afs_vlocation_queue_for_updates(struct afs_net *net,
-					    struct afs_vlocation *vl)
-{
-	struct afs_vlocation *xvl;
-
-	/* wait at least 10 minutes before updating... */
-	vl->update_at = ktime_get_real_seconds() +
-			afs_vlocation_update_timeout;
-
-	spin_lock(&net->vl_updates_lock);
-
-	if (!list_empty(&net->vl_updates)) {
-		/* ... but wait at least 1 second more than the newest record
-		 * already queued so that we don't spam the VL server suddenly
-		 * with lots of requests
-		 */
-		xvl = list_entry(net->vl_updates.prev,
-				 struct afs_vlocation, update);
-		if (vl->update_at <= xvl->update_at)
-			vl->update_at = xvl->update_at + 1;
-	} else if (net->live) {
-		queue_delayed_work(afs_vlocation_update_worker,
-				   &net->vl_updater,
-				   afs_vlocation_update_timeout * HZ);
-	}
-
-	list_add_tail(&vl->update, &net->vl_updates);
-	spin_unlock(&net->vl_updates_lock);
-}
-
-/*
- * lookup volume location
- * - iterate through the VL servers in a cell until one of them admits knowing
- *   about the volume in question
- * - lookup in the local cache if not able to find on the VL server
- * - insert/update in the local cache if did get a VL response
- */
-struct afs_vlocation *afs_vlocation_lookup(struct afs_net *net,
-					   struct afs_cell *cell,
-					   struct key *key,
-					   const char *name,
-					   size_t namesz)
-{
-	struct afs_vlocation *vl;
-	int ret;
-
-	_enter("{%s},{%x},%*.*s,%zu",
-	       cell->name, key_serial(key),
-	       (int) namesz, (int) namesz, name, namesz);
-
-	if (namesz >= sizeof(vl->vldb.name)) {
-		_leave(" = -ENAMETOOLONG");
-		return ERR_PTR(-ENAMETOOLONG);
-	}
-
-	/* see if we have an in-memory copy first */
-	down_write(&cell->vl_sem);
-	spin_lock(&cell->vl_lock);
-	list_for_each_entry(vl, &cell->vl_list, link) {
-		if (vl->vldb.name[namesz] != '\0')
-			continue;
-		if (memcmp(vl->vldb.name, name, namesz) == 0)
-			goto found_in_memory;
-	}
-	spin_unlock(&cell->vl_lock);
-
-	/* not in the cell's in-memory lists - create a new record */
-	vl = afs_vlocation_alloc(cell, name, namesz);
-	if (!vl) {
-		up_write(&cell->vl_sem);
-		return ERR_PTR(-ENOMEM);
-	}
-
-	afs_get_cell(cell);
-
-	list_add_tail(&vl->link, &cell->vl_list);
-	vl->state = AFS_VL_CREATING;
-	up_write(&cell->vl_sem);
-
-fill_in_record:
-	ret = afs_vlocation_fill_in_record(vl, key);
-	if (ret < 0)
-		goto error_abandon;
-	spin_lock(&vl->lock);
-	vl->state = AFS_VL_VALID;
-	spin_unlock(&vl->lock);
-	wake_up(&vl->waitq);
-
-	/* schedule for regular updates */
-	afs_vlocation_queue_for_updates(net, vl);
-	goto success;
-
-found_in_memory:
-	/* found in memory */
-	_debug("found in memory");
-	atomic_inc(&vl->usage);
-	spin_unlock(&cell->vl_lock);
-	if (!list_empty(&vl->grave)) {
-		spin_lock(&net->vl_graveyard_lock);
-		list_del_init(&vl->grave);
-		spin_unlock(&net->vl_graveyard_lock);
-	}
-	up_write(&cell->vl_sem);
-
-	/* see if it was an abandoned record that we might try filling in */
-	spin_lock(&vl->lock);
-	while (vl->state != AFS_VL_VALID) {
-		afs_vlocation_state_t state = vl->state;
-
-		_debug("invalid [state %d]", state);
-
-		if (state == AFS_VL_NEW || state == AFS_VL_NO_VOLUME) {
-			vl->state = AFS_VL_CREATING;
-			spin_unlock(&vl->lock);
-			goto fill_in_record;
-		}
-
-		/* must now wait for creation or update by someone else to
-		 * complete */
-		_debug("wait");
-
-		spin_unlock(&vl->lock);
-		ret = wait_event_interruptible(vl->waitq,
-					       vl->state == AFS_VL_NEW ||
-					       vl->state == AFS_VL_VALID ||
-					       vl->state == AFS_VL_NO_VOLUME);
-		if (ret < 0)
-			goto error;
-		spin_lock(&vl->lock);
-	}
-	spin_unlock(&vl->lock);
-
-success:
-	_leave(" = %p", vl);
-	return vl;
-
-error_abandon:
-	spin_lock(&vl->lock);
-	vl->state = AFS_VL_NEW;
-	spin_unlock(&vl->lock);
-	wake_up(&vl->waitq);
-error:
-	ASSERT(vl != NULL);
-	afs_put_vlocation(net, vl);
-	_leave(" = %d", ret);
-	return ERR_PTR(ret);
-}
-
-/*
- * finish using a volume location record
- */
-void afs_put_vlocation(struct afs_net *net, struct afs_vlocation *vl)
-{
-	if (!vl)
-		return;
-
-	_enter("%s", vl->vldb.name);
-
-	ASSERTCMP(atomic_read(&vl->usage), >, 0);
-
-	if (likely(!atomic_dec_and_test(&vl->usage))) {
-		_leave("");
-		return;
-	}
-
-	spin_lock(&net->vl_graveyard_lock);
-	if (atomic_read(&vl->usage) == 0) {
-		_debug("buried");
-		list_move_tail(&vl->grave, &net->vl_graveyard);
-		vl->time_of_death = ktime_get_real_seconds();
-		queue_delayed_work(afs_wq, &net->vl_reaper,
-				   afs_vlocation_timeout * HZ);
-
-		/* suspend updates on this record */
-		if (!list_empty(&vl->update)) {
-			spin_lock(&net->vl_updates_lock);
-			list_del_init(&vl->update);
-			spin_unlock(&net->vl_updates_lock);
-		}
-	}
-	spin_unlock(&net->vl_graveyard_lock);
-	_leave(" [killed?]");
-}
-
-/*
- * destroy a dead volume location record
- */
-static void afs_vlocation_destroy(struct afs_net *net, struct afs_vlocation *vl)
-{
-	_enter("%p", vl);
-
-	afs_put_cell(net, vl->cell);
-	kfree(vl);
-}
-
-/*
- * reap dead volume location records
- */
-void afs_vlocation_reaper(struct work_struct *work)
-{
-	LIST_HEAD(corpses);
-	struct afs_vlocation *vl;
-	struct afs_net *net = container_of(work, struct afs_net, vl_reaper.work);
-	unsigned long delay, expiry;
-	time64_t now;
-
-	_enter("");
-
-	now = ktime_get_real_seconds();
-	spin_lock(&net->vl_graveyard_lock);
-
-	while (!list_empty(&net->vl_graveyard)) {
-		vl = list_entry(net->vl_graveyard.next,
-				struct afs_vlocation, grave);
-
-		_debug("check %p", vl);
-
-		/* the queue is ordered most dead first */
-		if (net->live) {
-			expiry = vl->time_of_death + afs_vlocation_timeout;
-			if (expiry > now) {
-				delay = (expiry - now) * HZ;
-				_debug("delay %lu", delay);
-				mod_delayed_work(afs_wq, &net->vl_reaper, delay);
-				break;
-			}
-		}
-
-		spin_lock(&vl->cell->vl_lock);
-		if (atomic_read(&vl->usage) > 0) {
-			_debug("no reap");
-			list_del_init(&vl->grave);
-		} else {
-			_debug("reap");
-			list_move_tail(&vl->grave, &corpses);
-			list_del_init(&vl->link);
-		}
-		spin_unlock(&vl->cell->vl_lock);
-	}
-
-	spin_unlock(&net->vl_graveyard_lock);
-
-	/* now reap the corpses we've extracted */
-	while (!list_empty(&corpses)) {
-		vl = list_entry(corpses.next, struct afs_vlocation, grave);
-		list_del(&vl->grave);
-		afs_vlocation_destroy(net, vl);
-	}
-
-	_leave("");
-}
-
-/*
- * discard all the volume location records for rmmod
- */
-void __net_exit afs_vlocation_purge(struct afs_net *net)
-{
-	spin_lock(&net->vl_updates_lock);
-	list_del_init(&net->vl_updates);
-	spin_unlock(&net->vl_updates_lock);
-	mod_delayed_work(afs_vlocation_update_worker, &net->vl_updater, 0);
-	mod_delayed_work(afs_wq, &net->vl_reaper, 0);
-}
-
-/*
- * update a volume location
- */
-void afs_vlocation_updater(struct work_struct *work)
-{
-	struct afs_cache_vlocation vldb;
-	struct afs_vlocation *vl, *xvl;
-	struct afs_net *net = container_of(work, struct afs_net, vl_updater.work);
-	time64_t now;
-	long timeout;
-	int ret;
-
-	if (!net->live)
-		return;
-
-	_enter("");
-
-	now = ktime_get_real_seconds();
-
-	/* find a record to update */
-	spin_lock(&net->vl_updates_lock);
-	for (;;) {
-		if (list_empty(&net->vl_updates) || !net->live) {
-			spin_unlock(&net->vl_updates_lock);
-			_leave(" [nothing]");
-			return;
-		}
-
-		vl = list_entry(net->vl_updates.next,
-				struct afs_vlocation, update);
-		if (atomic_read(&vl->usage) > 0)
-			break;
-		list_del_init(&vl->update);
-	}
-
-	timeout = vl->update_at - now;
-	if (timeout > 0) {
-		queue_delayed_work(afs_vlocation_update_worker,
-				   &net->vl_updater, timeout * HZ);
-		spin_unlock(&net->vl_updates_lock);
-		_leave(" [nothing]");
-		return;
-	}
-
-	list_del_init(&vl->update);
-	atomic_inc(&vl->usage);
-	spin_unlock(&net->vl_updates_lock);
-
-	/* we can now perform the update */
-	_debug("update %s", vl->vldb.name);
-	vl->state = AFS_VL_UPDATING;
-	vl->upd_rej_cnt = 0;
-	vl->upd_busy_cnt = 0;
-
-	ret = afs_vlocation_update_record(vl, NULL, &vldb);
-	spin_lock(&vl->lock);
-	switch (ret) {
-	case 0:
-		afs_vlocation_apply_update(vl, &vldb);
-		vl->state = AFS_VL_VALID;
-		break;
-	case -ENOMEDIUM:
-		vl->state = AFS_VL_VOLUME_DELETED;
-		break;
-	default:
-		vl->state = AFS_VL_UNCERTAIN;
-		break;
-	}
-	spin_unlock(&vl->lock);
-	wake_up(&vl->waitq);
-
-	/* and then reschedule */
-	_debug("reschedule");
-	vl->update_at = ktime_get_real_seconds() +
-			afs_vlocation_update_timeout;
-
-	spin_lock(&net->vl_updates_lock);
-
-	if (!list_empty(&net->vl_updates)) {
-		/* next update in 10 minutes, but wait at least 1 second more
-		 * than the newest record already queued so that we don't spam
-		 * the VL server suddenly with lots of requests
-		 */
-		xvl = list_entry(net->vl_updates.prev,
-				 struct afs_vlocation, update);
-		if (vl->update_at <= xvl->update_at)
-			vl->update_at = xvl->update_at + 1;
-		xvl = list_entry(net->vl_updates.next,
-				 struct afs_vlocation, update);
-		timeout = xvl->update_at - now;
-		if (timeout < 0)
-			timeout = 0;
-	} else {
-		timeout = afs_vlocation_update_timeout;
-	}
-
-	ASSERT(list_empty(&vl->update));
-
-	list_add_tail(&vl->update, &net->vl_updates);
-
-	_debug("timeout %ld", timeout);
-	queue_delayed_work(afs_vlocation_update_worker, &net->vl_updater, timeout * HZ);
-	spin_unlock(&net->vl_updates_lock);
-	afs_put_vlocation(net, vl);
-}
diff --git a/fs/afs/vnode.c b/fs/afs/vnode.c
deleted file mode 100644
index 9c7333eb01c2..000000000000
--- a/fs/afs/vnode.c
+++ /dev/null
@@ -1,750 +0,0 @@
-/* AFS vnode management
- *
- * Copyright (C) 2002, 2007 Red Hat, Inc. All Rights Reserved.
- * Written by David Howells (dhowells@redhat.com)
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- */
-
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <linux/init.h>
-#include <linux/fs.h>
-#include <linux/sched.h>
-#include "internal.h"
-
-/*
- * Handle remote file deletion.
- */
-static void afs_vnode_deleted_remotely(struct afs_vnode *vnode)
-{
-	struct afs_cb_interest *cbi = vnode->cb_interest;
-
-	_enter("{%p}", cbi);
-
-	set_bit(AFS_VNODE_DELETED, &vnode->flags);
-
-	if (cbi) {
-		vnode->cb_interest = NULL;
-		afs_put_cb_interest(afs_v2net(vnode), cbi);
-	}
-
-	_leave("");
-}
-
-/*
- * finish off updating the recorded status of a file after a successful
- * operation completion
- * - starts callback expiry timer
- * - adds to server's callback list
- */
-void afs_vnode_finalise_status_update(struct afs_vnode *vnode,
-				      struct afs_server *server)
-{
-	spin_lock(&vnode->lock);
-	vnode->update_cnt--;
-	ASSERTCMP(vnode->update_cnt, >=, 0);
-	spin_unlock(&vnode->lock);
-
-	wake_up_all(&vnode->update_waitq);
-	_leave("");
-}
-
-/*
- * finish off updating the recorded status of a file after an operation failed
- */
-static void afs_vnode_status_update_failed(struct afs_fs_cursor *fc,
-					   struct afs_vnode *vnode)
-{
-	_enter("{%x:%u},%d", vnode->fid.vid, vnode->fid.vnode, fc->ac.error);
-
-	spin_lock(&vnode->lock);
-
-	if (fc->ac.error == -ENOENT) {
-		/* the file was deleted on the server */
-		_debug("got NOENT from server - marking file deleted");
-		afs_vnode_deleted_remotely(vnode);
-	}
-
-	vnode->update_cnt--;
-	ASSERTCMP(vnode->update_cnt, >=, 0);
-	spin_unlock(&vnode->lock);
-
-	wake_up_all(&vnode->update_waitq);
-	_leave("");
-}
-
-/*
- * fetch file status from the volume
- * - don't issue a fetch if:
- *   - the changed bit is not set and there's a valid callback
- *   - there are any outstanding ops that will fetch the status
- * - TODO implement local caching
- */
-int afs_vnode_fetch_status(struct afs_vnode *vnode, struct key *key, bool force)
-{
-	struct afs_fs_cursor fc;
-	unsigned int cb_break = 0;
-
-	DECLARE_WAITQUEUE(myself, current);
-
-	_enter("%s,{%x:%u.%u,S=%lx},%u",
-	       vnode->volume->vlocation->vldb.name,
-	       vnode->fid.vid, vnode->fid.vnode, vnode->fid.unique,
-	       vnode->flags,
-	       force);
-
-	if (!force && test_bit(AFS_VNODE_CB_PROMISED, &vnode->flags)) {
-		_leave(" [unchanged]");
-		return 0;
-	}
-
-	if (test_bit(AFS_VNODE_DELETED, &vnode->flags)) {
-		_leave(" [deleted]");
-		return -ENOENT;
-	}
-
-	cb_break = vnode->cb_break + vnode->cb_s_break;
-
-	spin_lock(&vnode->lock);
-
-	if (!force && test_bit(AFS_VNODE_CB_PROMISED, &vnode->flags)) {
-		spin_unlock(&vnode->lock);
-		_leave(" [unchanged]");
-		return 0;
-	}
-
-	ASSERTCMP(vnode->update_cnt, >=, 0);
-
-	if (vnode->update_cnt > 0) {
-		/* someone else started a fetch */
-		_debug("wait on fetch %d", vnode->update_cnt);
-
-		set_current_state(TASK_UNINTERRUPTIBLE);
-		ASSERT(myself.func != NULL);
-		add_wait_queue(&vnode->update_waitq, &myself);
-
-		/* wait for the status to be updated */
-		for (;;) {
-			if (test_bit(AFS_VNODE_CB_PROMISED, &vnode->flags))
-				break;
-			if (test_bit(AFS_VNODE_DELETED, &vnode->flags))
-				break;
-
-			/* check to see if it got updated and invalidated all
-			 * before we saw it */
-			if (vnode->update_cnt == 0) {
-				remove_wait_queue(&vnode->update_waitq,
-						  &myself);
-				set_current_state(TASK_RUNNING);
-				goto get_anyway;
-			}
-
-			spin_unlock(&vnode->lock);
-
-			schedule();
-			set_current_state(TASK_UNINTERRUPTIBLE);
-
-			spin_lock(&vnode->lock);
-		}
-
-		remove_wait_queue(&vnode->update_waitq, &myself);
-		spin_unlock(&vnode->lock);
-		set_current_state(TASK_RUNNING);
-
-		return test_bit(AFS_VNODE_DELETED, &vnode->flags) ?
-			-ENOENT : 0;
-	}
-
-get_anyway:
-	/* okay... we're going to have to initiate the op */
-	vnode->update_cnt++;
-
-	spin_unlock(&vnode->lock);
-
-	/* merge AFS status fetches and clear outstanding callback on this
-	 * vnode */
-	afs_init_fs_cursor(&fc, vnode);
-	do {
-		/* pick a server to query */
-		if (!afs_volume_pick_fileserver(&fc, vnode))
-			goto no_server;
-
-		fc.ac.error = afs_fs_fetch_file_status(&fc, key, vnode, NULL, false);
-
-	} while (afs_iterate_fs_cursor(&fc, vnode));
-
-	/* adjust the flags */
-	if (fc.ac.error == 0) {
-		_debug("adjust");
-		afs_cache_permit(vnode, key, cb_break);
-		afs_vnode_finalise_status_update(vnode, fc.server);
-	} else {
-		_debug("failed [%d]", fc.ac.error);
-		afs_vnode_status_update_failed(&fc, vnode);
-	}
-
-out:
-	afs_end_fs_cursor(&fc, afs_v2net(vnode));
-	ASSERTCMP(vnode->update_cnt, >=, 0);
-	_leave(" = %d [cnt %d]", fc.ac.error, vnode->update_cnt);
-	return fc.ac.error;
-
-no_server:
-	spin_lock(&vnode->lock);
-	vnode->update_cnt--;
-	spin_unlock(&vnode->lock);
-	goto out;
-}
-
-/*
- * fetch file data from the volume
- * - TODO implement caching
- */
-int afs_vnode_fetch_data(struct afs_vnode *vnode, struct key *key,
-			 struct afs_read *desc)
-{
-	struct afs_fs_cursor fc;
-
-	_enter("%s{%x:%u.%u},%x,,,",
-	       vnode->volume->vlocation->vldb.name,
-	       vnode->fid.vid,
-	       vnode->fid.vnode,
-	       vnode->fid.unique,
-	       key_serial(key));
-
-	/* this op will fetch the status */
-	spin_lock(&vnode->lock);
-	vnode->update_cnt++;
-	spin_unlock(&vnode->lock);
-
-	/* merge in AFS status fetches and clear outstanding callback on this
-	 * vnode */
-	afs_init_fs_cursor(&fc, vnode);
-	do {
-		/* pick a server to query */
-		if (!afs_volume_pick_fileserver(&fc, vnode))
-			goto no_server;
-
-		fc.ac.error = afs_fs_fetch_data(&fc, key, vnode, desc, false);
-
-	} while (afs_iterate_fs_cursor(&fc, vnode));
-
-	/* adjust the flags */
-	if (fc.ac.error == 0)
-		afs_vnode_finalise_status_update(vnode, fc.server);
-	else
-		afs_vnode_status_update_failed(&fc, vnode);
-
-out:
-	return afs_end_fs_cursor(&fc, afs_v2net(vnode));
-
-no_server:
-	spin_lock(&vnode->lock);
-	vnode->update_cnt--;
-	ASSERTCMP(vnode->update_cnt, >=, 0);
-	spin_unlock(&vnode->lock);
-	goto out;
-}
-
-/*
- * make a file or a directory
- */
-int afs_vnode_create(struct afs_vnode *vnode, struct key *key,
-		     const char *name, umode_t mode, struct afs_fid *newfid,
-		     struct afs_file_status *newstatus,
-		     struct afs_callback *newcb, struct afs_server **_server)
-{
-	struct afs_fs_cursor fc;
-
-	_enter("%s{%x:%u.%u},%x,%s,,",
-	       vnode->volume->vlocation->vldb.name,
-	       vnode->fid.vid,
-	       vnode->fid.vnode,
-	       vnode->fid.unique,
-	       key_serial(key),
-	       name);
-
-	/* this op will fetch the status on the directory we're creating in */
-	spin_lock(&vnode->lock);
-	vnode->update_cnt++;
-	spin_unlock(&vnode->lock);
-
-	afs_init_fs_cursor(&fc, vnode);
-	do {
-		/* pick a server to query */
-		if (!afs_volume_pick_fileserver(&fc, vnode))
-			goto no_server;
-
-		fc.ac.error = afs_fs_create(&fc, key, vnode, name, mode, newfid,
-					    newstatus, newcb, false);
-
-	} while (afs_iterate_fs_cursor(&fc, vnode));
-
-	/* adjust the flags */
-	if (fc.ac.error == 0) {
-		afs_vnode_finalise_status_update(vnode, fc.server);
-		*_server = fc.server;
-		fc.server = NULL;
-	} else {
-		afs_vnode_status_update_failed(&fc, vnode);
-		*_server = NULL;
-	}
-
-out:
-	return afs_end_fs_cursor(&fc, afs_v2net(vnode));
-
-no_server:
-	spin_lock(&vnode->lock);
-	vnode->update_cnt--;
-	ASSERTCMP(vnode->update_cnt, >=, 0);
-	spin_unlock(&vnode->lock);
-	goto out;
-}
-
-/*
- * remove a file or directory
- */
-int afs_vnode_remove(struct afs_vnode *vnode, struct key *key, const char *name,
-		     bool isdir)
-{
-	struct afs_fs_cursor fc;
-
-	_enter("%s{%x:%u.%u},%x,%s",
-	       vnode->volume->vlocation->vldb.name,
-	       vnode->fid.vid,
-	       vnode->fid.vnode,
-	       vnode->fid.unique,
-	       key_serial(key),
-	       name);
-
-	/* this op will fetch the status on the directory we're removing from */
-	spin_lock(&vnode->lock);
-	vnode->update_cnt++;
-	spin_unlock(&vnode->lock);
-
-	afs_init_fs_cursor(&fc, vnode);
-	do {
-		/* pick a server to query */
-		if (!afs_volume_pick_fileserver(&fc, vnode))
-			goto no_server;
-
-		fc.ac.error = afs_fs_remove(&fc, key, vnode, name, isdir, false);
-
-	} while (afs_iterate_fs_cursor(&fc, vnode));
-
-	/* adjust the flags */
-	if (fc.ac.error == 0)
-		afs_vnode_finalise_status_update(vnode, fc.server);
-	else
-		afs_vnode_status_update_failed(&fc, vnode);
-
-out:
-	return afs_end_fs_cursor(&fc, afs_v2net(vnode));
-
-no_server:
-	spin_lock(&vnode->lock);
-	vnode->update_cnt--;
-	ASSERTCMP(vnode->update_cnt, >=, 0);
-	spin_unlock(&vnode->lock);
-	goto out;
-}
-
-/*
- * create a hard link
- */
-int afs_vnode_link(struct afs_vnode *dvnode, struct afs_vnode *vnode,
-			  struct key *key, const char *name)
-{
-	struct afs_fs_cursor fc;
-
-	_enter("%s{%x:%u.%u},%s{%x:%u.%u},%x,%s",
-	       dvnode->volume->vlocation->vldb.name,
-	       dvnode->fid.vid,
-	       dvnode->fid.vnode,
-	       dvnode->fid.unique,
-	       vnode->volume->vlocation->vldb.name,
-	       vnode->fid.vid,
-	       vnode->fid.vnode,
-	       vnode->fid.unique,
-	       key_serial(key),
-	       name);
-
-	/* this op will fetch the status on the directory we're removing from */
-	spin_lock(&vnode->lock);
-	vnode->update_cnt++;
-	spin_unlock(&vnode->lock);
-	spin_lock(&dvnode->lock);
-	dvnode->update_cnt++;
-	spin_unlock(&dvnode->lock);
-
-	afs_init_fs_cursor(&fc, vnode);
-	do {
-		/* pick a server to query */
-		if (!afs_volume_pick_fileserver(&fc, dvnode))
-			goto no_server;
-
-		fc.ac.error = afs_fs_link(&fc, key, dvnode, vnode, name, false);
-
-	} while (afs_iterate_fs_cursor(&fc, dvnode));
-
-	/* adjust the flags */
-	if (fc.ac.error == 0) {
-		afs_vnode_finalise_status_update(vnode, fc.server);
-		afs_vnode_finalise_status_update(dvnode, fc.server);
-	} else {
-		afs_vnode_status_update_failed(&fc, vnode);
-		afs_vnode_status_update_failed(&fc, dvnode);
-	}
-
-out:
-	return afs_end_fs_cursor(&fc, afs_v2net(vnode));
-
-no_server:
-	spin_lock(&vnode->lock);
-	vnode->update_cnt--;
-	ASSERTCMP(vnode->update_cnt, >=, 0);
-	spin_unlock(&vnode->lock);
-	spin_lock(&dvnode->lock);
-	dvnode->update_cnt--;
-	ASSERTCMP(dvnode->update_cnt, >=, 0);
-	spin_unlock(&dvnode->lock);
-	goto out;
-}
-
-/*
- * create a symbolic link
- */
-int afs_vnode_symlink(struct afs_vnode *vnode, struct key *key,
-		      const char *name, const char *content,
-		      struct afs_fid *newfid,
-		      struct afs_file_status *newstatus,
-		      struct afs_server **_server)
-{
-	struct afs_fs_cursor fc;
-
-	_enter("%s{%x:%u.%u},%x,%s,%s,,,",
-	       vnode->volume->vlocation->vldb.name,
-	       vnode->fid.vid,
-	       vnode->fid.vnode,
-	       vnode->fid.unique,
-	       key_serial(key),
-	       name, content);
-
-	/* this op will fetch the status on the directory we're creating in */
-	spin_lock(&vnode->lock);
-	vnode->update_cnt++;
-	spin_unlock(&vnode->lock);
-
-	afs_init_fs_cursor(&fc, vnode);
-	do {
-		/* pick a server to query */
-		if (!afs_volume_pick_fileserver(&fc, vnode))
-			goto no_server;
-
-		fc.ac.error = afs_fs_symlink(&fc, key, vnode, name, content,
-					     newfid, newstatus, false);
-
-	} while (afs_iterate_fs_cursor(&fc, vnode));
-
-	/* adjust the flags */
-	if (fc.ac.error == 0) {
-		afs_vnode_finalise_status_update(vnode, fc.server);
-		*_server = fc.server;
-		fc.server = NULL;
-	} else {
-		afs_vnode_status_update_failed(&fc, vnode);
-		*_server = NULL;
-	}
-
-out:
-	return afs_end_fs_cursor(&fc, afs_v2net(vnode));
-
-no_server:
-	spin_lock(&vnode->lock);
-	vnode->update_cnt--;
-	ASSERTCMP(vnode->update_cnt, >=, 0);
-	spin_unlock(&vnode->lock);
-	*_server = NULL;
-	goto out;
-}
-
-/*
- * rename a file
- */
-int afs_vnode_rename(struct afs_vnode *orig_dvnode,
-		     struct afs_vnode *new_dvnode,
-		     struct key *key,
-		     const char *orig_name,
-		     const char *new_name)
-{
-	struct afs_fs_cursor fc;
-
-	_enter("%s{%x:%u.%u},%s{%u,%u,%u},%x,%s,%s",
-	       orig_dvnode->volume->vlocation->vldb.name,
-	       orig_dvnode->fid.vid,
-	       orig_dvnode->fid.vnode,
-	       orig_dvnode->fid.unique,
-	       new_dvnode->volume->vlocation->vldb.name,
-	       new_dvnode->fid.vid,
-	       new_dvnode->fid.vnode,
-	       new_dvnode->fid.unique,
-	       key_serial(key),
-	       orig_name,
-	       new_name);
-
-	/* this op will fetch the status on both the directories we're dealing
-	 * with */
-	spin_lock(&orig_dvnode->lock);
-	orig_dvnode->update_cnt++;
-	spin_unlock(&orig_dvnode->lock);
-	if (new_dvnode != orig_dvnode) {
-		spin_lock(&new_dvnode->lock);
-		new_dvnode->update_cnt++;
-		spin_unlock(&new_dvnode->lock);
-	}
-
-	afs_init_fs_cursor(&fc, orig_dvnode);
-	do {
-		/* pick a server to query */
-		if (!afs_volume_pick_fileserver(&fc, orig_dvnode))
-			goto no_server;
-
-		fc.ac.error = afs_fs_rename(&fc, key, orig_dvnode, orig_name,
-					    new_dvnode, new_name, false);
-
-	} while (afs_iterate_fs_cursor(&fc, orig_dvnode));
-
-	/* adjust the flags */
-	if (fc.ac.error == 0) {
-		afs_vnode_finalise_status_update(orig_dvnode, fc.server);
-		if (new_dvnode != orig_dvnode)
-			afs_vnode_finalise_status_update(new_dvnode, fc.server);
-	} else {
-		afs_vnode_status_update_failed(&fc, orig_dvnode);
-		if (new_dvnode != orig_dvnode)
-			afs_vnode_status_update_failed(&fc, new_dvnode);
-	}
-
-out:
-	return afs_end_fs_cursor(&fc, afs_v2net(orig_dvnode));
-
-no_server:
-	spin_lock(&orig_dvnode->lock);
-	orig_dvnode->update_cnt--;
-	ASSERTCMP(orig_dvnode->update_cnt, >=, 0);
-	spin_unlock(&orig_dvnode->lock);
-	if (new_dvnode != orig_dvnode) {
-		spin_lock(&new_dvnode->lock);
-		new_dvnode->update_cnt--;
-		ASSERTCMP(new_dvnode->update_cnt, >=, 0);
-		spin_unlock(&new_dvnode->lock);
-	}
-	goto out;
-}
-
-/*
- * write to a file
- */
-int afs_vnode_store_data(struct afs_writeback *wb, pgoff_t first, pgoff_t last,
-			 unsigned offset, unsigned to)
-{
-	struct afs_fs_cursor fc;
-	struct afs_vnode *vnode = wb->vnode;
-
-	_enter("%s{%x:%u.%u},%x,%lx,%lx,%x,%x",
-	       vnode->volume->vlocation->vldb.name,
-	       vnode->fid.vid,
-	       vnode->fid.vnode,
-	       vnode->fid.unique,
-	       key_serial(wb->key),
-	       first, last, offset, to);
-
-	/* this op will fetch the status */
-	spin_lock(&vnode->lock);
-	vnode->update_cnt++;
-	spin_unlock(&vnode->lock);
-
-	afs_init_fs_cursor(&fc, vnode);
-	do {
-		/* pick a server to query */
-		if (!afs_volume_pick_fileserver(&fc, vnode))
-			goto no_server;
-
-		fc.ac.error = afs_fs_store_data(&fc, wb, first, last, offset, to,
-						false);
-
-	} while (afs_iterate_fs_cursor(&fc, vnode));
-
-	/* adjust the flags */
-	if (fc.ac.error == 0) {
-		afs_vnode_finalise_status_update(vnode, fc.server);
-	} else {
-		afs_vnode_status_update_failed(&fc, vnode);
-	}
-
-out:
-	return afs_end_fs_cursor(&fc, afs_v2net(vnode));
-
-no_server:
-	spin_lock(&vnode->lock);
-	vnode->update_cnt--;
-	ASSERTCMP(vnode->update_cnt, >=, 0);
-	spin_unlock(&vnode->lock);
-	goto out;
-}
-
-/*
- * set the attributes on a file
- */
-int afs_vnode_setattr(struct afs_vnode *vnode, struct key *key,
-		      struct iattr *attr)
-{
-	struct afs_fs_cursor fc;
-
-	_enter("%s{%x:%u.%u},%x",
-	       vnode->volume->vlocation->vldb.name,
-	       vnode->fid.vid,
-	       vnode->fid.vnode,
-	       vnode->fid.unique,
-	       key_serial(key));
-
-	/* this op will fetch the status */
-	spin_lock(&vnode->lock);
-	vnode->update_cnt++;
-	spin_unlock(&vnode->lock);
-
-	afs_init_fs_cursor(&fc, vnode);
-	do {
-		/* pick a server to query */
-		if (!afs_volume_pick_fileserver(&fc, vnode))
-			goto no_server;
-
-		fc.ac.error = afs_fs_setattr(&fc, key, vnode, attr, false);
-
-	} while (afs_iterate_fs_cursor(&fc, vnode));
-
-	/* adjust the flags */
-	if (fc.ac.error == 0) {
-		afs_vnode_finalise_status_update(vnode, fc.server);
-	} else {
-		afs_vnode_status_update_failed(&fc, vnode);
-	}
-
-out:
-	return afs_end_fs_cursor(&fc, afs_v2net(vnode));
-
-no_server:
-	spin_lock(&vnode->lock);
-	vnode->update_cnt--;
-	ASSERTCMP(vnode->update_cnt, >=, 0);
-	spin_unlock(&vnode->lock);
-	goto out;
-}
-
-/*
- * get the status of a volume
- */
-int afs_vnode_get_volume_status(struct afs_vnode *vnode, struct key *key,
-				struct afs_volume_status *vs)
-{
-	struct afs_fs_cursor fc;
-
-	_enter("%s{%x:%u.%u},%x,",
-	       vnode->volume->vlocation->vldb.name,
-	       vnode->fid.vid,
-	       vnode->fid.vnode,
-	       vnode->fid.unique,
-	       key_serial(key));
-
-	afs_init_fs_cursor(&fc, vnode);
-	do {
-		/* pick a server to query */
-		if (!afs_volume_pick_fileserver(&fc, vnode))
-			break;
-
-		fc.ac.error = afs_fs_get_volume_status(&fc, key, vnode, vs, false);
-
-	} while (afs_iterate_fs_cursor(&fc, vnode));
-
-	return afs_end_fs_cursor(&fc, afs_v2net(vnode));
-}
-
-/*
- * get a lock on a file
- */
-int afs_vnode_set_lock(struct afs_vnode *vnode, struct key *key,
-		       afs_lock_type_t type)
-{
-	struct afs_fs_cursor fc;
-
-	_enter("%s{%x:%u.%u},%x,%u",
-	       vnode->volume->vlocation->vldb.name,
-	       vnode->fid.vid,
-	       vnode->fid.vnode,
-	       vnode->fid.unique,
-	       key_serial(key), type);
-
-	afs_init_fs_cursor(&fc, vnode);
-	do {
-		/* pick a server to query */
-		if (!afs_volume_pick_fileserver(&fc, vnode))
-			break;
-
-		fc.ac.error = afs_fs_set_lock(&fc, key, vnode, type, false);
-
-	} while (afs_iterate_fs_cursor(&fc, vnode));
-
-	return afs_end_fs_cursor(&fc, afs_v2net(vnode));
-}
-
-/*
- * extend a lock on a file
- */
-int afs_vnode_extend_lock(struct afs_vnode *vnode, struct key *key)
-{
-	struct afs_fs_cursor fc;
-	int ret;
-
-	_enter("%s{%x:%u.%u},%x",
-	       vnode->volume->vlocation->vldb.name,
-	       vnode->fid.vid,
-	       vnode->fid.vnode,
-	       vnode->fid.unique,
-	       key_serial(key));
-
-	ret = afs_set_fs_cursor(&fc, vnode);
-	if (ret < 0)
-		return ret;
-
-	fc.ac.error = afs_fs_extend_lock(&fc, key, vnode, false);
-
-	return afs_end_fs_cursor(&fc, afs_v2net(vnode));
-}
-
-/*
- * release a lock on a file
- */
-int afs_vnode_release_lock(struct afs_vnode *vnode, struct key *key)
-{
-	struct afs_fs_cursor fc;
-	int ret;
-
-	_enter("%s{%x:%u.%u},%x",
-	       vnode->volume->vlocation->vldb.name,
-	       vnode->fid.vid,
-	       vnode->fid.vnode,
-	       vnode->fid.unique,
-	       key_serial(key));
-
-	ret = afs_set_fs_cursor(&fc, vnode);
-	if (ret < 0)
-		return ret;
-
-	fc.ac.error = afs_fs_release_lock(&fc, key, vnode, false);
-
-	return afs_end_fs_cursor(&fc, afs_v2net(vnode));
-}
diff --git a/fs/afs/volume.c b/fs/afs/volume.c
index 3c5ad1cc50f3..2295dd4f9b15 100644
--- a/fs/afs/volume.c
+++ b/fs/afs/volume.c
@@ -13,11 +13,148 @@
 #include <linux/slab.h>
 #include "internal.h"
 
-static const char *afs_voltypes[] = { "R/W", "R/O", "BAK" };
+unsigned __read_mostly afs_volume_gc_delay = 10;
+unsigned __read_mostly afs_volume_record_life = 60 * 60;
+
+static const char *const afs_voltypes[] = { "R/W", "R/O", "BAK" };
 
 /*
- * lookup a volume by name
- * - this can be one of the following:
+ * Allocate a volume record and load it up from a vldb record.
+ */
+static struct afs_volume *afs_alloc_volume(struct afs_mount_params *params,
+					   struct afs_vldb_entry *vldb,
+					   unsigned long type_mask)
+{
+	struct afs_server_list *slist;
+	struct afs_server *server;
+	struct afs_volume *volume;
+	int ret = -ENOMEM, nr_servers = 0, i, j;
+
+	for (i = 0; i < vldb->nr_servers; i++)
+		if (vldb->fs_mask[i] & type_mask)
+			nr_servers++;
+
+	volume = kzalloc(sizeof(struct afs_volume), GFP_KERNEL);
+	if (!volume)
+		goto error_0;
+
+	volume->vid		= vldb->vid[params->type];
+	volume->update_at	= ktime_get_real_seconds() + afs_volume_record_life;
+	volume->cell		= afs_get_cell(params->cell);
+	volume->type		= params->type;
+	volume->type_force	= params->force;
+	volume->name_len	= vldb->name_len;
+
+	atomic_set(&volume->usage, 1);
+	INIT_LIST_HEAD(&volume->proc_link);
+	rwlock_init(&volume->servers_lock);
+	memcpy(volume->name, vldb->name, vldb->name_len + 1);
+
+	slist = afs_alloc_server_list(params->cell, params->key, vldb, type_mask);
+	if (IS_ERR(slist)) {
+		ret = PTR_ERR(slist);
+		goto error_1;
+	}
+
+	refcount_set(&slist->usage, 1);
+	volume->servers = slist;
+
+	/* Make sure a records exists for each server this volume occupies. */
+	for (i = 0; i < nr_servers; i++) {
+		if (!(vldb->fs_mask[i] & type_mask))
+			continue;
+
+		server = afs_lookup_server(params->cell, params->key,
+					   &vldb->fs_server[i]);
+		if (IS_ERR(server)) {
+			ret = PTR_ERR(server);
+			if (ret == -ENOENT)
+				continue;
+			goto error_2;
+		}
+
+		/* Insertion-sort by server pointer */
+		for (j = 0; j < slist->nr_servers; j++)
+			if (slist->servers[j].server >= server)
+				break;
+		if (j < slist->nr_servers) {
+			if (slist->servers[j].server == server) {
+				afs_put_server(params->net, server);
+				continue;
+			}
+
+			memmove(slist->servers + j + 1,
+				slist->servers + j,
+				(slist->nr_servers - j) * sizeof(struct afs_server_entry));
+		}
+
+		slist->servers[j].server = server;
+		slist->nr_servers++;
+	}
+
+	if (slist->nr_servers == 0) {
+		ret = -EDESTADDRREQ;
+		goto error_2;
+	}
+
+	return volume;
+
+error_2:
+	afs_put_serverlist(params->net, slist);
+error_1:
+	kfree(volume);
+error_0:
+	return ERR_PTR(ret);
+}
+
+/*
+ * Look up a VLDB record for a volume.
+ */
+static struct afs_vldb_entry *afs_vl_lookup_vldb(struct afs_cell *cell,
+						 struct key *key,
+						 const char *volname,
+						 size_t volnamesz)
+{
+	struct afs_addr_cursor ac;
+	struct afs_vldb_entry *vldb;
+	int ret;
+
+	ret = afs_set_vl_cursor(&ac, cell);
+	if (ret < 0)
+		return ERR_PTR(ret);
+
+	while (afs_iterate_addresses(&ac)) {
+		vldb = afs_vl_get_entry_by_name_u(cell->net, &ac, key,
+						  volname, volnamesz);
+		switch (ac.error) {
+		case 0:
+			afs_end_cursor(&ac);
+			return vldb;
+		case -ECONNABORTED:
+			ac.error = afs_abort_to_error(ac.abort_code);
+			goto error;
+		case -ENOMEM:
+		case -ENONET:
+			goto error;
+		case -ENETUNREACH:
+		case -EHOSTUNREACH:
+		case -ECONNREFUSED:
+			break;
+		default:
+			ac.error = -EIO;
+			goto error;
+		}
+	}
+
+error:
+	return ERR_PTR(afs_end_cursor(&ac));
+}
+
+/*
+ * Look up a volume in the VL server and create a candidate volume record for
+ * it.
+ *
+ * The volume name can be one of the following:
  *	"%[cell:]volume[.]"		R/W volume
  *	"#[cell:]volume[.]"		R/O or R/W volume (rwparent=0),
  *					 or R/W (rwparent=1) volume
@@ -37,169 +174,218 @@ static const char *afs_voltypes[] = { "R/W", "R/O", "BAK" };
  * - Rule 3: If parent volume is R/W, then only mount R/W volume unless
  *           explicitly told otherwise
  */
-struct afs_volume *afs_volume_lookup(struct afs_mount_params *params)
+struct afs_volume *afs_create_volume(struct afs_mount_params *params)
 {
-	struct afs_vlocation *vlocation = NULL;
-	struct afs_volume *volume = NULL;
-	struct afs_server *server = NULL;
-	char srvtmask;
-	int ret, loop;
-
-	_enter("{%*.*s,%d}",
-	       params->volnamesz, params->volnamesz, params->volname, params->rwpath);
-
-	/* lookup the volume location record */
-	vlocation = afs_vlocation_lookup(params->net, params->cell, params->key,
-					 params->volname, params->volnamesz);
-	if (IS_ERR(vlocation)) {
-		ret = PTR_ERR(vlocation);
-		vlocation = NULL;
-		goto error;
-	}
+	struct afs_vldb_entry *vldb;
+	struct afs_volume *volume;
+	unsigned long type_mask = 1UL << params->type;
 
-	/* make the final decision on the type we want */
-	ret = -ENOMEDIUM;
-	if (params->force && !(vlocation->vldb.vidmask & (1 << params->type)))
-		goto error;
+	vldb = afs_vl_lookup_vldb(params->cell, params->key,
+				  params->volname, params->volnamesz);
+	if (IS_ERR(vldb))
+		return ERR_CAST(vldb);
 
-	srvtmask = 0;
-	for (loop = 0; loop < vlocation->vldb.nservers; loop++)
-		srvtmask |= vlocation->vldb.srvtmask[loop];
+	if (test_bit(AFS_VLDB_QUERY_ERROR, &vldb->flags)) {
+		volume = ERR_PTR(vldb->error);
+		goto error;
+	}
 
+	/* Make the final decision on the type we want */
+	volume = ERR_PTR(-ENOMEDIUM);
 	if (params->force) {
-		if (!(srvtmask & (1 << params->type)))
+		if (!(vldb->flags & type_mask))
 			goto error;
-	} else if (srvtmask & AFS_VOL_VTM_RO) {
+	} else if (test_bit(AFS_VLDB_HAS_RO, &vldb->flags)) {
 		params->type = AFSVL_ROVOL;
-	} else if (srvtmask & AFS_VOL_VTM_RW) {
+	} else if (test_bit(AFS_VLDB_HAS_RW, &vldb->flags)) {
 		params->type = AFSVL_RWVOL;
 	} else {
 		goto error;
 	}
 
-	down_write(&params->cell->vl_sem);
+	type_mask = 1UL << params->type;
+	volume = afs_alloc_volume(params, vldb, type_mask);
 
-	/* is the volume already active? */
-	if (vlocation->vols[params->type]) {
-		/* yes - re-use it */
-		volume = vlocation->vols[params->type];
-		afs_get_volume(volume);
-		goto success;
-	}
+error:
+	kfree(vldb);
+	return volume;
+}
 
-	/* create a new volume record */
-	_debug("creating new volume record");
+/*
+ * Destroy a volume record
+ */
+static void afs_destroy_volume(struct afs_net *net, struct afs_volume *volume)
+{
+	_enter("%p", volume);
 
-	ret = -ENOMEM;
-	volume = kzalloc(sizeof(struct afs_volume), GFP_KERNEL);
-	if (!volume)
-		goto error_up;
+#ifdef CONFIG_AFS_FSCACHE
+	ASSERTCMP(volume->cache, ==, NULL);
+#endif
 
-	atomic_set(&volume->usage, 1);
-	volume->type		= params->type;
-	volume->type_force	= params->force;
-	volume->cell		= params->cell;
-	volume->vid		= vlocation->vldb.vid[params->type];
-
-	init_rwsem(&volume->server_sem);
-
-	/* look up all the applicable server records */
-	for (loop = 0; loop < 8; loop++) {
-		if (vlocation->vldb.srvtmask[loop] & (1 << volume->type)) {
-			server = afs_lookup_server(
-			       volume->cell, &vlocation->vldb.servers[loop]);
-			if (IS_ERR(server)) {
-				ret = PTR_ERR(server);
-				goto error_discard;
-			}
+	afs_put_serverlist(net, volume->servers);
+	afs_put_cell(net, volume->cell);
+	kfree(volume);
 
-			volume->servers[volume->nservers] = server;
-			volume->nservers++;
-		}
+	_leave(" [destroyed]");
+}
+
+/*
+ * Drop a reference on a volume record.
+ */
+void afs_put_volume(struct afs_cell *cell, struct afs_volume *volume)
+{
+	if (volume) {
+		_enter("%s", volume->name);
+
+		if (atomic_dec_and_test(&volume->usage))
+			afs_destroy_volume(cell->net, volume);
 	}
+}
 
-	/* attach the cache and volume location */
+/*
+ * Activate a volume.
+ */
+void afs_activate_volume(struct afs_volume *volume)
+{
 #ifdef CONFIG_AFS_FSCACHE
 	volume->cache = fscache_acquire_cookie(volume->cell->cache,
 					       &afs_volume_cache_index_def,
 					       volume, true);
 #endif
-	afs_get_vlocation(vlocation);
-	volume->vlocation = vlocation;
-
-	vlocation->vols[volume->type] = volume;
 
-success:
-	_debug("kAFS selected %s volume %08x",
-	       afs_voltypes[volume->type], volume->vid);
-	up_write(&params->cell->vl_sem);
-	afs_put_vlocation(params->net, vlocation);
-	_leave(" = %p", volume);
-	return volume;
+	write_lock(&volume->cell->proc_lock);
+	list_add_tail(&volume->proc_link, &volume->cell->proc_volumes);
+	write_unlock(&volume->cell->proc_lock);
+}
 
-	/* clean up */
-error_up:
-	up_write(&params->cell->vl_sem);
-error:
-	afs_put_vlocation(params->net, vlocation);
-	_leave(" = %d", ret);
-	return ERR_PTR(ret);
+/*
+ * Deactivate a volume.
+ */
+void afs_deactivate_volume(struct afs_volume *volume)
+{
+	_enter("%s", volume->name);
 
-error_discard:
-	up_write(&params->cell->vl_sem);
+	write_lock(&volume->cell->proc_lock);
+	list_del_init(&volume->proc_link);
+	write_unlock(&volume->cell->proc_lock);
 
-	for (loop = volume->nservers - 1; loop >= 0; loop--) {
-		afs_put_cb_interest(params->net, volume->cb_interests[loop]);
-		afs_put_server(params->net, volume->servers[loop]);
-	}
+#ifdef CONFIG_AFS_FSCACHE
+	fscache_relinquish_cookie(volume->cache,
+				  test_bit(AFS_VOLUME_DELETED, &volume->flags));
+	volume->cache = NULL;
+#endif
 
-	kfree(volume);
-	goto error;
+	_leave("");
 }
 
 /*
- * destroy a volume record
+ * Query the VL service to update the volume status.
  */
-void afs_put_volume(struct afs_cell *cell, struct afs_volume *volume)
+static int afs_update_volume_status(struct afs_volume *volume, struct key *key)
 {
-	struct afs_vlocation *vlocation;
-	int loop;
+	struct afs_server_list *new, *old, *discard;
+	struct afs_vldb_entry *vldb;
+	char idbuf[16];
+	int ret, idsz;
 
-	if (!volume)
-		return;
+	_enter("");
 
-	_enter("%p", volume);
+	/* We look up an ID by passing it as a decimal string in the
+	 * operation's name parameter.
+	 */
+	idsz = sprintf(idbuf, "%u", volume->vid);
 
-	ASSERTCMP(atomic_read(&volume->usage), >, 0);
+	vldb = afs_vl_lookup_vldb(volume->cell, key, idbuf, idsz);
+	if (IS_ERR(vldb)) {
+		ret = PTR_ERR(vldb);
+		goto error;
+	}
 
-	vlocation = volume->vlocation;
+	/* See if the volume got renamed. */
+	if (vldb->name_len != volume->name_len ||
+	    memcmp(vldb->name, volume->name, vldb->name_len) != 0) {
+		/* TODO: Use RCU'd string. */
+		memcpy(volume->name, vldb->name, AFS_MAXVOLNAME);
+		volume->name_len = vldb->name_len;
+	}
+
+	/* See if the volume's server list got updated. */
+	new = afs_alloc_server_list(volume->cell, key,
+				      vldb, (1 << volume->type));
+	if (IS_ERR(new)) {
+		ret = PTR_ERR(new);
+		goto error_vldb;
+	}
 
-	/* to prevent a race, the decrement and the dequeue must be effectively
-	 * atomic */
-	down_write(&cell->vl_sem);
+	write_lock(&volume->servers_lock);
 
-	if (likely(!atomic_dec_and_test(&volume->usage))) {
-		up_write(&vlocation->cell->vl_sem);
-		_leave("");
-		return;
+	discard = new;
+	old = volume->servers;
+	if (afs_annotate_server_list(new, old)) {
+		new->seq = volume->servers_seq + 1;
+		volume->servers = new;
+		smp_wmb();
+		volume->servers_seq++;
+		discard = old;
 	}
 
-	vlocation->vols[volume->type] = NULL;
+	volume->update_at = ktime_get_real_seconds() + afs_volume_record_life;
+	clear_bit(AFS_VOLUME_NEEDS_UPDATE, &volume->flags);
+	write_unlock(&volume->servers_lock);
+	ret = 0;
 
-	up_write(&cell->vl_sem);
+	afs_put_serverlist(volume->cell->net, discard);
+error_vldb:
+	kfree(vldb);
+error:
+	_leave(" = %d", ret);
+	return ret;
+}
 
-	/* finish cleaning up the volume */
-#ifdef CONFIG_AFS_FSCACHE
-	fscache_relinquish_cookie(volume->cache, 0);
-#endif
-	afs_put_vlocation(cell->net, vlocation);
+/*
+ * Make sure the volume record is up to date.
+ */
+int afs_check_volume_status(struct afs_volume *volume, struct key *key)
+{
+	time64_t now = ktime_get_real_seconds();
+	int ret, retries = 0;
 
-	for (loop = volume->nservers - 1; loop >= 0; loop--) {
-		afs_put_cb_interest(cell->net, volume->cb_interests[loop]);
-		afs_put_server(cell->net, volume->servers[loop]);
+	_enter("");
+
+	if (volume->update_at <= now)
+		set_bit(AFS_VOLUME_NEEDS_UPDATE, &volume->flags);
+
+retry:
+	if (!test_bit(AFS_VOLUME_NEEDS_UPDATE, &volume->flags) &&
+	    !test_bit(AFS_VOLUME_WAIT, &volume->flags)) {
+		_leave(" = 0");
+		return 0;
 	}
 
-	kfree(volume);
+	if (!test_and_set_bit_lock(AFS_VOLUME_UPDATING, &volume->flags)) {
+		ret = afs_update_volume_status(volume, key);
+		clear_bit_unlock(AFS_VOLUME_WAIT, &volume->flags);
+		clear_bit_unlock(AFS_VOLUME_UPDATING, &volume->flags);
+		wake_up_bit(&volume->flags, AFS_VOLUME_WAIT);
+		_leave(" = %d", ret);
+		return ret;
+	}
 
-	_leave(" [destroyed]");
+	if (!test_bit(AFS_VOLUME_WAIT, &volume->flags)) {
+		_leave(" = 0 [no wait]");
+		return 0;
+	}
+
+	ret = wait_on_bit(&volume->flags, AFS_VOLUME_WAIT, TASK_INTERRUPTIBLE);
+	if (ret == -ERESTARTSYS) {
+		_leave(" = %d", ret);
+		return ret;
+	}
+
+	retries++;
+	if (retries == 4) {
+		_leave(" = -ESTALE");
+		return -ESTALE;
+	}
+	goto retry;
 }
diff --git a/fs/afs/write.c b/fs/afs/write.c
index 106e43db1115..1377a40ecdbb 100644
--- a/fs/afs/write.c
+++ b/fs/afs/write.c
@@ -103,7 +103,7 @@ static int afs_fill_page(struct afs_vnode *vnode, struct key *key,
 	req->pages[0] = page;
 	get_page(page);
 
-	ret = afs_vnode_fetch_data(vnode, key, req);
+	ret = afs_fetch_data(vnode, key, req);
 	afs_put_read(req);
 	if (ret < 0) {
 		if (ret == -ENOENT) {
@@ -337,6 +337,40 @@ static void afs_kill_pages(struct afs_vnode *vnode, bool error,
 	_leave("");
 }
 
+/*
+ * write to a file
+ */
+static int afs_store_data(struct afs_writeback *wb, pgoff_t first, pgoff_t last,
+			  unsigned offset, unsigned to)
+{
+	struct afs_fs_cursor fc;
+	struct afs_vnode *vnode = wb->vnode;
+	int ret;
+
+	_enter("%s{%x:%u.%u},%x,%lx,%lx,%x,%x",
+	       vnode->volume->name,
+	       vnode->fid.vid,
+	       vnode->fid.vnode,
+	       vnode->fid.unique,
+	       key_serial(wb->key),
+	       first, last, offset, to);
+
+	ret = -ERESTARTSYS;
+	if (afs_begin_vnode_operation(&fc, vnode, wb->key)) {
+		while (afs_select_fileserver(&fc)) {
+			fc.cb_break = vnode->cb_break + vnode->cb_s_break;
+			afs_fs_store_data(&fc, wb, first, last, offset, to);
+		}
+
+		afs_check_for_remote_deletion(&fc, fc.vnode);
+		afs_vnode_commit_status(&fc, vnode, fc.cb_break);
+		ret = afs_end_vnode_operation(&fc);
+	}
+
+	_leave(" = %d", ret);
+	return ret;
+}
+
 /*
  * synchronously write back the locked page and any subsequent non-locked dirty
  * pages also covered by the same writeback record
@@ -420,7 +454,7 @@ no_more:
 
 	_debug("write back %lx[%u..] to %lx[..%u]", first, offset, last, to);
 
-	ret = afs_vnode_store_data(wb, first, last, offset, to);
+	ret = afs_store_data(wb, first, last, offset, to);
 	if (ret < 0) {
 		switch (ret) {
 		case -EDQUOT:
diff --git a/fs/afs/xattr.c b/fs/afs/xattr.c
index e58e00ee9747..cfcc674e64a5 100644
--- a/fs/afs/xattr.c
+++ b/fs/afs/xattr.c
@@ -96,7 +96,7 @@ static int afs_xattr_get_volume(const struct xattr_handler *handler,
 			      void *buffer, size_t size)
 {
 	struct afs_vnode *vnode = AFS_FS_I(inode);
-	const char *volname = vnode->volume->vlocation->vldb.name;
+	const char *volname = vnode->volume->name;
 	size_t namelen;
 
 	namelen = strlen(volname);
-- 
cgit v1.2.3


From bf99a53ce22a29d64d3190093edf52f1d44d53b3 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Thu, 2 Nov 2017 15:27:51 +0000
Subject: afs: Make use of the YFS service upgrade to fully support IPv6

YFS VL servers offer an upgraded Volume Location service that can return
IPv6 addresses to fileservers and volume servers in addition to IPv4
addresses using the YFSVL.GetEndpoints operation which we should use if
it's available.

To this end:

 (1) Make rxrpc_kernel_recv_data() return the call's current service ID so
     that the caller can detect service upgrade and see what the service
     was upgraded to.

 (2) When we see a VL server address we haven't seen before, send a
     VL.GetCapabilities operation to it with the service upgrade bit set.

     If we get an upgrade to the YFS VL service, change the service ID in
     the address list for that address to use the upgraded service and set
     a flag to note that this appears to be a YFS-compatible server.

 (3) If, when a server's addresses are being looked up, we note that we
     previously detected a YFS-compatible server, then send the
     YFSVL.GetEndpoints operation rather than VL.GetAddrsU.

 (4) Build a fileserver address list from the reply of YFSVL.GetEndpoints,
     including both IPv4 and IPv6 addresses.  Volume server addresses are
     discarded.

 (5) The address list is sorted by address and port now, instead of just
     address.  This allows multiple servers on the same host sitting on
     different ports.

Signed-off-by: David Howells <dhowells@redhat.com>
---
 fs/afs/addr_list.c |  54 ++++++++-
 fs/afs/afs_vl.h    |  16 +++
 fs/afs/internal.h  |  10 +-
 fs/afs/server.c    |   5 +-
 fs/afs/vlclient.c  | 337 ++++++++++++++++++++++++++++++++++++++++++++++++++++-
 fs/afs/volume.c    |  16 +++
 6 files changed, 428 insertions(+), 10 deletions(-)

(limited to 'fs/afs')

diff --git a/fs/afs/addr_list.c b/fs/afs/addr_list.c
index b91e59a77f0e..a537368ba0db 100644
--- a/fs/afs/addr_list.c
+++ b/fs/afs/addr_list.c
@@ -17,9 +17,10 @@
 #include "internal.h"
 #include "afs_fs.h"
 
-#define AFS_MAX_ADDRESSES						\
-	((unsigned int)((PAGE_SIZE - sizeof(struct afs_addr_list)) /	\
-			sizeof(struct sockaddr_rxrpc)))
+//#define AFS_MAX_ADDRESSES
+//	((unsigned int)((PAGE_SIZE - sizeof(struct afs_addr_list)) /
+//			sizeof(struct sockaddr_rxrpc)))
+#define AFS_MAX_ADDRESSES ((unsigned int)(sizeof(unsigned long) * 8))
 
 /*
  * Release an address list.
@@ -230,15 +231,20 @@ struct afs_addr_list *afs_dns_query(struct afs_cell *cell, time64_t *_expiry)
 /*
  * Merge an IPv4 entry into a fileserver address list.
  */
-void afs_merge_fs_addr4(struct afs_addr_list *alist, __be32 xdr)
+void afs_merge_fs_addr4(struct afs_addr_list *alist, __be32 xdr, u16 port)
 {
 	struct sockaddr_in6 *a;
+	__be16 xport = htons(port);
 	int i;
 
 	for (i = 0; i < alist->nr_ipv4; i++) {
 		a = &alist->addrs[i].transport.sin6;
-		if (xdr == a->sin6_addr.s6_addr32[3])
+		if (xdr == a->sin6_addr.s6_addr32[3] &&
+		    xport == a->sin6_port)
 			return;
+		if (xdr == a->sin6_addr.s6_addr32[3] &&
+		    xport < a->sin6_port)
+			break;
 		if (xdr < a->sin6_addr.s6_addr32[3])
 			break;
 	}
@@ -249,7 +255,7 @@ void afs_merge_fs_addr4(struct afs_addr_list *alist, __be32 xdr)
 			sizeof(alist->addrs[0]) * (alist->nr_addrs - i));
 
 	a = &alist->addrs[i].transport.sin6;
-	a->sin6_port		  = htons(AFS_FS_PORT);
+	a->sin6_port		  = xport;
 	a->sin6_addr.s6_addr32[0] = 0;
 	a->sin6_addr.s6_addr32[1] = 0;
 	a->sin6_addr.s6_addr32[2] = htonl(0xffff);
@@ -258,6 +264,42 @@ void afs_merge_fs_addr4(struct afs_addr_list *alist, __be32 xdr)
 	alist->nr_addrs++;
 }
 
+/*
+ * Merge an IPv6 entry into a fileserver address list.
+ */
+void afs_merge_fs_addr6(struct afs_addr_list *alist, __be32 *xdr, u16 port)
+{
+	struct sockaddr_in6 *a;
+	__be16 xport = htons(port);
+	int i, diff;
+
+	for (i = alist->nr_ipv4; i < alist->nr_addrs; i++) {
+		a = &alist->addrs[i].transport.sin6;
+		diff = memcmp(xdr, &a->sin6_addr, 16);
+		if (diff == 0 &&
+		    xport == a->sin6_port)
+			return;
+		if (diff == 0 &&
+		    xport < a->sin6_port)
+			break;
+		if (diff < 0)
+			break;
+	}
+
+	if (i < alist->nr_addrs)
+		memmove(alist->addrs + i + 1,
+			alist->addrs + i,
+			sizeof(alist->addrs[0]) * (alist->nr_addrs - i));
+
+	a = &alist->addrs[i].transport.sin6;
+	a->sin6_port		  = xport;
+	a->sin6_addr.s6_addr32[0] = xdr[0];
+	a->sin6_addr.s6_addr32[1] = xdr[1];
+	a->sin6_addr.s6_addr32[2] = xdr[2];
+	a->sin6_addr.s6_addr32[3] = xdr[3];
+	alist->nr_addrs++;
+}
+
 /*
  * Get an address to try.
  */
diff --git a/fs/afs/afs_vl.h b/fs/afs/afs_vl.h
index 6350b417aee9..e3c4688f573b 100644
--- a/fs/afs/afs_vl.h
+++ b/fs/afs/afs_vl.h
@@ -16,6 +16,7 @@
 
 #define AFS_VL_PORT		7003	/* volume location service port */
 #define VL_SERVICE		52	/* RxRPC service ID for the Volume Location service */
+#define YFS_VL_SERVICE		2503	/* Service ID for AuriStor upgraded VL service */
 
 enum AFSVL_Operations {
 	VLGETENTRYBYID		= 503,	/* AFS Get VLDB entry by ID */
@@ -24,6 +25,8 @@ enum AFSVL_Operations {
 	VLGETENTRYBYIDU		= 526,	/* AFS Get VLDB entry by ID (UUID-variant) */
 	VLGETENTRYBYNAMEU	= 527,	/* AFS Get VLDB entry by name (UUID-variant) */
 	VLGETADDRSU		= 533,	/* AFS Get addrs for fileserver */
+	YVLGETENDPOINTS		= 64002, /* YFS Get endpoints for file/volume server */
+	VLGETCAPABILITIES	= 65537, /* AFS Get server capabilities */
 };
 
 enum AFSVL_Errors {
@@ -57,6 +60,19 @@ enum AFSVL_Errors {
 	AFSVL_NOMEM 		= 363547,	/* malloc/realloc failed to alloc enough memory */
 };
 
+enum {
+	YFS_SERVER_INDEX	= 0,
+	YFS_SERVER_UUID		= 1,
+	YFS_SERVER_ENDPOINT	= 2,
+};
+
+enum {
+	YFS_ENDPOINT_IPV4	= 0,
+	YFS_ENDPOINT_IPV6	= 1,
+};
+
+#define YFS_MAXENDPOINTS	16
+
 /*
  * maps to "struct vldbentry" in vvl-spec.pdf
  */
diff --git a/fs/afs/internal.h b/fs/afs/internal.h
index 1fadf40551fd..767317bf33db 100644
--- a/fs/afs/internal.h
+++ b/fs/afs/internal.h
@@ -70,6 +70,8 @@ struct afs_addr_list {
 	unsigned short		nr_addrs;
 	unsigned short		index;		/* Address currently in use */
 	unsigned short		nr_ipv4;	/* Number of IPv4 addresses */
+	unsigned long		probed;		/* Mask of servers that have been probed */
+	unsigned long		yfs;		/* Mask of servers that are YFS */
 	struct sockaddr_rxrpc	addrs[];
 };
 
@@ -113,7 +115,7 @@ struct afs_call {
 	bool			async;		/* T if asynchronous */
 	bool			ret_reply0;	/* T if should return reply[0] on success */
 	bool			upgrade;	/* T to request service upgrade */
-	u16			service_id;	/* RxRPC service ID to call */
+	u16			service_id;	/* Actual service ID (after upgrade) */
 	u32			operation_ID;	/* operation ID for an incoming call */
 	u32			count;		/* count for use in unmarshalling */
 	__be32			tmp;		/* place to extract temporary data */
@@ -564,7 +566,8 @@ extern bool afs_iterate_addresses(struct afs_addr_cursor *);
 extern int afs_end_cursor(struct afs_addr_cursor *);
 extern int afs_set_vl_cursor(struct afs_addr_cursor *, struct afs_cell *);
 
-extern void afs_merge_fs_addr4(struct afs_addr_list *, __be32);
+extern void afs_merge_fs_addr4(struct afs_addr_list *, __be32, u16);
+extern void afs_merge_fs_addr6(struct afs_addr_list *, __be32 *, u16);
 
 /*
  * cache.c
@@ -846,6 +849,9 @@ extern struct afs_vldb_entry *afs_vl_get_entry_by_name_u(struct afs_net *,
 							 struct key *, const char *, int);
 extern struct afs_addr_list *afs_vl_get_addrs_u(struct afs_net *, struct afs_addr_cursor *,
 						struct key *, const uuid_t *);
+extern int afs_vl_get_capabilities(struct afs_net *, struct afs_addr_cursor *, struct key *);
+extern struct afs_addr_list *afs_yfsvl_get_endpoints(struct afs_net *, struct afs_addr_cursor *,
+						     struct key *, const uuid_t *);
 
 /*
  * volume.c
diff --git a/fs/afs/server.c b/fs/afs/server.c
index a6c860bcf391..1880f1b6a9f1 100644
--- a/fs/afs/server.c
+++ b/fs/afs/server.c
@@ -266,7 +266,10 @@ static struct afs_addr_list *afs_vl_lookup_addrs(struct afs_cell *cell,
 		return ERR_PTR(ret);
 
 	while (afs_iterate_addresses(&ac)) {
-		alist = afs_vl_get_addrs_u(cell->net, &ac, key, uuid);
+		if (test_bit(ac.index, &ac.alist->yfs))
+			alist = afs_yfsvl_get_endpoints(cell->net, &ac, key, uuid);
+		else
+			alist = afs_vl_get_addrs_u(cell->net, &ac, key, uuid);
 		switch (ac.error) {
 		case 0:
 			afs_end_cursor(&ac);
diff --git a/fs/afs/vlclient.c b/fs/afs/vlclient.c
index 173c652fe875..1d38cbdf6cad 100644
--- a/fs/afs/vlclient.c
+++ b/fs/afs/vlclient.c
@@ -225,7 +225,7 @@ again:
 		bp = call->buffer;
 		for (i = 0; i < count; i++)
 			if (alist->nr_addrs < call->count2)
-				afs_merge_fs_addr4(alist, *bp++);
+				afs_merge_fs_addr4(alist, *bp++, AFS_FS_PORT);
 
 		call->count -= count;
 		if (call->count > 0)
@@ -300,3 +300,338 @@ struct afs_addr_list *afs_vl_get_addrs_u(struct afs_net *net,
 
 	return (struct afs_addr_list *)afs_make_call(ac, call, GFP_KERNEL, false);
 }
+
+/*
+ * Deliver reply data to an VL.GetCapabilities operation.
+ */
+static int afs_deliver_vl_get_capabilities(struct afs_call *call)
+{
+	u32 count;
+	int ret;
+
+	_enter("{%u,%zu/%u}", call->unmarshall, call->offset, call->count);
+
+again:
+	switch (call->unmarshall) {
+	case 0:
+		call->offset = 0;
+		call->unmarshall++;
+
+		/* Extract the capabilities word count */
+	case 1:
+		ret = afs_extract_data(call, &call->tmp,
+				       1 * sizeof(__be32),
+				       true);
+		if (ret < 0)
+			return ret;
+
+		count = ntohl(call->tmp);
+
+		call->count = count;
+		call->count2 = count;
+		call->offset = 0;
+		call->unmarshall++;
+
+		/* Extract capabilities words */
+	case 2:
+		count = min(call->count, 16U);
+		ret = afs_extract_data(call, call->buffer,
+				       count * sizeof(__be32),
+				       call->count > 16);
+		if (ret < 0)
+			return ret;
+
+		/* TODO: Examine capabilities */
+
+		call->count -= count;
+		if (call->count > 0)
+			goto again;
+		call->offset = 0;
+		call->unmarshall++;
+		break;
+	}
+
+	call->reply[0] = (void *)(unsigned long)call->service_id;
+
+	_leave(" = 0 [done]");
+	return 0;
+}
+
+/*
+ * VL.GetCapabilities operation type
+ */
+static const struct afs_call_type afs_RXVLGetCapabilities = {
+	.name		= "VL.GetCapabilities",
+	.deliver	= afs_deliver_vl_get_capabilities,
+	.destructor	= afs_flat_call_destructor,
+};
+
+/*
+ * Probe a fileserver for the capabilities that it supports.  This can
+ * return up to 196 words.
+ *
+ * We use this to probe for service upgrade to determine what the server at the
+ * other end supports.
+ */
+int afs_vl_get_capabilities(struct afs_net *net,
+			    struct afs_addr_cursor *ac,
+			    struct key *key)
+{
+	struct afs_call *call;
+	__be32 *bp;
+
+	_enter("");
+
+	call = afs_alloc_flat_call(net, &afs_RXVLGetCapabilities, 1 * 4, 16 * 4);
+	if (!call)
+		return -ENOMEM;
+
+	call->key = key;
+	call->upgrade = true; /* Let's see if this is a YFS server */
+	call->reply[0] = (void *)VLGETCAPABILITIES;
+	call->ret_reply0 = true;
+
+	/* marshall the parameters */
+	bp = call->request;
+	*bp++ = htonl(VLGETCAPABILITIES);
+
+	/* Can't take a ref on server */
+	return afs_make_call(ac, call, GFP_KERNEL, false);
+}
+
+/*
+ * Deliver reply data to a YFSVL.GetEndpoints call.
+ *
+ *	GetEndpoints(IN yfsServerAttributes *attr,
+ *		     OUT opr_uuid *uuid,
+ *		     OUT afs_int32 *uniquifier,
+ *		     OUT endpoints *fsEndpoints,
+ *		     OUT endpoints *volEndpoints)
+ */
+static int afs_deliver_yfsvl_get_endpoints(struct afs_call *call)
+{
+	struct afs_addr_list *alist;
+	__be32 *bp;
+	u32 uniquifier, size;
+	int ret;
+
+	_enter("{%u,%zu/%u,%u}", call->unmarshall, call->offset, call->count, call->count2);
+
+again:
+	switch (call->unmarshall) {
+	case 0:
+		call->offset = 0;
+		call->unmarshall = 1;
+
+		/* Extract the returned uuid, uniquifier, fsEndpoints count and
+		 * either the first fsEndpoint type or the volEndpoints
+		 * count if there are no fsEndpoints. */
+	case 1:
+		ret = afs_extract_data(call, call->buffer,
+				       sizeof(uuid_t) +
+				       3 * sizeof(__be32),
+				       true);
+		if (ret < 0)
+			return ret;
+
+		bp = call->buffer + sizeof(uuid_t);
+		uniquifier	= ntohl(*bp++);
+		call->count	= ntohl(*bp++);
+		call->count2	= ntohl(*bp); /* Type or next count */
+
+		if (call->count > YFS_MAXENDPOINTS)
+			return -EBADMSG;
+
+		alist = afs_alloc_addrlist(call->count, FS_SERVICE, AFS_FS_PORT);
+		if (!alist)
+			return -ENOMEM;
+		alist->version = uniquifier;
+		call->reply[0] = alist;
+		call->offset = 0;
+
+		if (call->count == 0)
+			goto extract_volendpoints;
+
+		call->unmarshall = 2;
+
+		/* Extract fsEndpoints[] entries */
+	case 2:
+		switch (call->count2) {
+		case YFS_ENDPOINT_IPV4:
+			size = sizeof(__be32) * (1 + 1 + 1);
+			break;
+		case YFS_ENDPOINT_IPV6:
+			size = sizeof(__be32) * (1 + 4 + 1);
+			break;
+		default:
+			return -EBADMSG;
+		}
+
+		size += sizeof(__be32);
+		ret = afs_extract_data(call, call->buffer, size, true);
+		if (ret < 0)
+			return ret;
+
+		alist = call->reply[0];
+		bp = call->buffer;
+		switch (call->count2) {
+		case YFS_ENDPOINT_IPV4:
+			if (ntohl(bp[0]) != sizeof(__be32) * 2)
+				return -EBADMSG;
+			afs_merge_fs_addr4(alist, bp[1], ntohl(bp[2]));
+			bp += 3;
+			break;
+		case YFS_ENDPOINT_IPV6:
+			if (ntohl(bp[0]) != sizeof(__be32) * 5)
+				return -EBADMSG;
+			afs_merge_fs_addr6(alist, bp + 1, ntohl(bp[5]));
+			bp += 6;
+			break;
+		default:
+			return -EBADMSG;
+		}
+
+		/* Got either the type of the next entry or the count of
+		 * volEndpoints if no more fsEndpoints.
+		 */
+		call->count2 = htonl(*bp++);
+
+		call->offset = 0;
+		call->count--;
+		if (call->count > 0)
+			goto again;
+
+	extract_volendpoints:
+		/* Extract the list of volEndpoints. */
+		call->count = call->count2;
+		if (!call->count)
+			goto end;
+		if (call->count > YFS_MAXENDPOINTS)
+			return -EBADMSG;
+
+		call->unmarshall = 3;
+
+		/* Extract the type of volEndpoints[0].  Normally we would
+		 * extract the type of the next endpoint when we extract the
+		 * data of the current one, but this is the first...
+		 */
+	case 3:
+		ret = afs_extract_data(call, call->buffer, sizeof(__be32), true);
+		if (ret < 0)
+			return ret;
+
+		bp = call->buffer;
+		call->count2 = htonl(*bp++);
+		call->offset = 0;
+		call->unmarshall = 4;
+
+		/* Extract volEndpoints[] entries */
+	case 4:
+		switch (call->count2) {
+		case YFS_ENDPOINT_IPV4:
+			size = sizeof(__be32) * (1 + 1 + 1);
+			break;
+		case YFS_ENDPOINT_IPV6:
+			size = sizeof(__be32) * (1 + 4 + 1);
+			break;
+		default:
+			return -EBADMSG;
+		}
+
+		if (call->count > 1)
+			size += sizeof(__be32);
+		ret = afs_extract_data(call, call->buffer, size, true);
+		if (ret < 0)
+			return ret;
+
+		bp = call->buffer;
+		switch (call->count2) {
+		case YFS_ENDPOINT_IPV4:
+			if (ntohl(bp[0]) != sizeof(__be32) * 2)
+				return -EBADMSG;
+			bp += 3;
+			break;
+		case YFS_ENDPOINT_IPV6:
+			if (ntohl(bp[0]) != sizeof(__be32) * 5)
+				return -EBADMSG;
+			bp += 6;
+			break;
+		default:
+			return -EBADMSG;
+		}
+
+		/* Got either the type of the next entry or the count of
+		 * volEndpoints if no more fsEndpoints.
+		 */
+		call->offset = 0;
+		call->count--;
+		if (call->count > 0) {
+			call->count2 = htonl(*bp++);
+			goto again;
+		}
+
+	end:
+		call->unmarshall = 5;
+
+		/* Done */
+	case 5:
+		ret = afs_extract_data(call, call->buffer, 0, false);
+		if (ret < 0)
+			return ret;
+		call->unmarshall = 6;
+
+	case 6:
+		break;
+	}
+
+	alist = call->reply[0];
+
+	/* Start with IPv6 if available. */
+	if (alist->nr_ipv4 < alist->nr_addrs)
+		alist->index = alist->nr_ipv4;
+
+	_leave(" = 0 [done]");
+	return 0;
+}
+
+/*
+ * YFSVL.GetEndpoints operation type.
+ */
+static const struct afs_call_type afs_YFSVLGetEndpoints = {
+	.name		= "VL.GetEndpoints",
+	.deliver	= afs_deliver_yfsvl_get_endpoints,
+	.destructor	= afs_vl_get_addrs_u_destructor,
+};
+
+/*
+ * Dispatch an operation to get the addresses for a server, where the server is
+ * nominated by UUID.
+ */
+struct afs_addr_list *afs_yfsvl_get_endpoints(struct afs_net *net,
+					      struct afs_addr_cursor *ac,
+					      struct key *key,
+					      const uuid_t *uuid)
+{
+	struct afs_call *call;
+	__be32 *bp;
+
+	_enter("");
+
+	call = afs_alloc_flat_call(net, &afs_YFSVLGetEndpoints,
+				   sizeof(__be32) * 2 + sizeof(*uuid),
+				   sizeof(struct in6_addr) + sizeof(__be32) * 3);
+	if (!call)
+		return ERR_PTR(-ENOMEM);
+
+	call->key = key;
+	call->reply[0] = NULL;
+	call->ret_reply0 = true;
+
+	/* Marshall the parameters */
+	bp = call->request;
+	*bp++ = htonl(YVLGETENDPOINTS);
+	*bp++ = htonl(YFS_SERVER_UUID);
+	memcpy(bp, uuid, sizeof(*uuid)); /* Type opr_uuid */
+
+	return (struct afs_addr_list *)afs_make_call(ac, call, GFP_KERNEL, false);
+}
diff --git a/fs/afs/volume.c b/fs/afs/volume.c
index 2295dd4f9b15..684c48293353 100644
--- a/fs/afs/volume.c
+++ b/fs/afs/volume.c
@@ -124,6 +124,22 @@ static struct afs_vldb_entry *afs_vl_lookup_vldb(struct afs_cell *cell,
 		return ERR_PTR(ret);
 
 	while (afs_iterate_addresses(&ac)) {
+		if (!test_bit(ac.index, &ac.alist->probed)) {
+			ret = afs_vl_get_capabilities(cell->net, &ac, key);
+			switch (ret) {
+			case VL_SERVICE:
+				clear_bit(ac.index, &ac.alist->yfs);
+				set_bit(ac.index, &ac.alist->probed);
+				ac.addr->srx_service = ret;
+				break;
+			case YFS_VL_SERVICE:
+				set_bit(ac.index, &ac.alist->yfs);
+				set_bit(ac.index, &ac.alist->probed);
+				ac.addr->srx_service = ret;
+				break;
+			}
+		}
+		
 		vldb = afs_vl_get_entry_by_name_u(cell->net, &ac, key,
 						  volname, volnamesz);
 		switch (ac.error) {
-- 
cgit v1.2.3


From 5f0fc8ba6a1eec510a1e43def48697985d948a2c Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Thu, 2 Nov 2017 15:27:51 +0000
Subject: afs: Only progress call state at end of Tx phase from rxrpc callback

Only progress the AFS call state at the end of Tx phase from the callback
passed to rxrpc_kernel_send_data() rather than setting it before the last
data send call.

Signed-off-by: David Howells <dhowells@redhat.com>
---
 fs/afs/rxrpc.c | 9 ---------
 1 file changed, 9 deletions(-)

(limited to 'fs/afs')

diff --git a/fs/afs/rxrpc.c b/fs/afs/rxrpc.c
index 1bbd5854507d..21f43d3acb91 100644
--- a/fs/afs/rxrpc.c
+++ b/fs/afs/rxrpc.c
@@ -381,13 +381,6 @@ long afs_make_call(struct afs_addr_cursor *ac, struct afs_call *call,
 	msg.msg_controllen	= 0;
 	msg.msg_flags		= MSG_WAITALL | (call->send_pages ? MSG_MORE : 0);
 
-	/* We have to change the state *before* sending the last packet as
-	 * rxrpc might give us the reply before it returns from sending the
-	 * request.  Further, if the send fails, we may already have been given
-	 * a notification and may have collected it.
-	 */
-	if (!call->send_pages)
-		call->state = AFS_CALL_AWAIT_REPLY;
 	ret = rxrpc_kernel_send_data(call->net->socket, rxcall,
 				     &msg, call->request_size,
 				     afs_notify_end_request_tx);
@@ -799,7 +792,6 @@ void afs_send_empty_reply(struct afs_call *call)
 	msg.msg_controllen	= 0;
 	msg.msg_flags		= 0;
 
-	call->state = AFS_CALL_AWAIT_ACK;
 	switch (rxrpc_kernel_send_data(net->socket, call->rxcall, &msg, 0,
 				       afs_notify_end_reply_tx)) {
 	case 0:
@@ -839,7 +831,6 @@ void afs_send_simple_reply(struct afs_call *call, const void *buf, size_t len)
 	msg.msg_controllen	= 0;
 	msg.msg_flags		= 0;
 
-	call->state = AFS_CALL_AWAIT_ACK;
 	n = rxrpc_kernel_send_data(net->socket, call->rxcall, &msg, len,
 				   afs_notify_end_reply_tx);
 	if (n >= 0) {
-- 
cgit v1.2.3


From 1199db603511d7463d9d3840f96f61967affc766 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Thu, 2 Nov 2017 15:27:51 +0000
Subject: afs: Fix total-length calculation for multiple-page send

Fix the total-length calculation in afs_make_call() when the operation
being dispatched has data from a series of pages attached.

Despite the patched code looking like that it should reduce mathematically
to the current code, it doesn't because the 32-bit unsigned arithmetic
being used to calculate the page-offset-difference doesn't correctly extend
to a 64-bit value when the result is effectively negative.

Without this, some FS.StoreData operations that span multiple pages fail,
reporting too little or too much data.

Signed-off-by: David Howells <dhowells@redhat.com>
---
 fs/afs/rxrpc.c | 13 +++++++++++--
 1 file changed, 11 insertions(+), 2 deletions(-)

(limited to 'fs/afs')

diff --git a/fs/afs/rxrpc.c b/fs/afs/rxrpc.c
index 21f43d3acb91..1d075696bf55 100644
--- a/fs/afs/rxrpc.c
+++ b/fs/afs/rxrpc.c
@@ -350,8 +350,17 @@ long afs_make_call(struct afs_addr_cursor *ac, struct afs_call *call,
 	 */
 	tx_total_len = call->request_size;
 	if (call->send_pages) {
-		tx_total_len += call->last_to - call->first_offset;
-		tx_total_len += (call->last - call->first) * PAGE_SIZE;
+		if (call->last == call->first) {
+			tx_total_len += call->last_to - call->first_offset;
+		} else {
+			/* It looks mathematically like you should be able to
+			 * combine the following lines with the ones above, but
+			 * unsigned arithmetic is fun when it wraps...
+			 */
+			tx_total_len += PAGE_SIZE - call->first_offset;
+			tx_total_len += call->last_to;
+			tx_total_len += (call->last - call->first - 1) * PAGE_SIZE;
+		}
 	}
 
 	/* create a call */
-- 
cgit v1.2.3


From 025db80c9e421efef11f2b83b7f78a11476f06db Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Thu, 2 Nov 2017 15:27:51 +0000
Subject: afs: Trace the initiation and completion of client calls

Add tracepoints to trace the initiation and completion of client calls
within the kafs filesystem.

The afs_make_vl_call tracepoint watches calls to the volume location
database server.

The afs_make_fs_call tracepoint watches calls to the file server.

The afs_call_done tracepoint watches for call completion.

Signed-off-by: David Howells <dhowells@redhat.com>
---
 fs/afs/fsclient.c | 75 ++++++++++++++++++++++++++++++++++++++++++++++---------
 fs/afs/internal.h |  1 +
 fs/afs/rxrpc.c    | 25 +++++++++++++------
 fs/afs/vlclient.c | 10 +++++++-
 4 files changed, 91 insertions(+), 20 deletions(-)

(limited to 'fs/afs')

diff --git a/fs/afs/fsclient.c b/fs/afs/fsclient.c
index 72ff3679fa2a..10e5ead629c2 100644
--- a/fs/afs/fsclient.c
+++ b/fs/afs/fsclient.c
@@ -16,6 +16,8 @@
 #include "internal.h"
 #include "afs_fs.h"
 
+static const struct afs_fid afs_zero_fid;
+
 /*
  * We need somewhere to discard into in case the server helpfully returns more
  * than we asked for in FS.FetchData{,64}.
@@ -299,6 +301,7 @@ static int afs_deliver_fs_fetch_status(struct afs_call *call)
  */
 static const struct afs_call_type afs_RXFSFetchStatus = {
 	.name		= "FS.FetchStatus",
+	.op		= afs_FS_FetchStatus,
 	.deliver	= afs_deliver_fs_fetch_status,
 	.destructor	= afs_flat_call_destructor,
 };
@@ -335,6 +338,7 @@ int afs_fs_fetch_file_status(struct afs_fs_cursor *fc, struct afs_volsync *volsy
 
 	call->cb_break = fc->cb_break;
 	afs_use_fs_server(call, fc->cbi);
+	trace_afs_make_fs_call(call, &vnode->fid);
 	return afs_make_call(&fc->ac, call, GFP_NOFS, false);
 }
 
@@ -497,12 +501,14 @@ static void afs_fetch_data_destructor(struct afs_call *call)
  */
 static const struct afs_call_type afs_RXFSFetchData = {
 	.name		= "FS.FetchData",
+	.op		= afs_FS_FetchData,
 	.deliver	= afs_deliver_fs_fetch_data,
 	.destructor	= afs_fetch_data_destructor,
 };
 
 static const struct afs_call_type afs_RXFSFetchData64 = {
 	.name		= "FS.FetchData64",
+	.op		= afs_FS_FetchData64,
 	.deliver	= afs_deliver_fs_fetch_data,
 	.destructor	= afs_fetch_data_destructor,
 };
@@ -527,7 +533,6 @@ static int afs_fs_fetch_data64(struct afs_fs_cursor *fc, struct afs_read *req)
 	call->reply[0] = vnode;
 	call->reply[1] = NULL; /* volsync */
 	call->reply[2] = req;
-	call->operation_ID = FSFETCHDATA64;
 
 	/* marshall the parameters */
 	bp = call->request;
@@ -543,6 +548,7 @@ static int afs_fs_fetch_data64(struct afs_fs_cursor *fc, struct afs_read *req)
 	atomic_inc(&req->usage);
 	call->cb_break = fc->cb_break;
 	afs_use_fs_server(call, fc->cbi);
+	trace_afs_make_fs_call(call, &vnode->fid);
 	return afs_make_call(&fc->ac, call, GFP_NOFS, false);
 }
 
@@ -571,7 +577,6 @@ int afs_fs_fetch_data(struct afs_fs_cursor *fc, struct afs_read *req)
 	call->reply[0] = vnode;
 	call->reply[1] = NULL; /* volsync */
 	call->reply[2] = req;
-	call->operation_ID = FSFETCHDATA;
 
 	/* marshall the parameters */
 	bp = call->request;
@@ -585,6 +590,7 @@ int afs_fs_fetch_data(struct afs_fs_cursor *fc, struct afs_read *req)
 	atomic_inc(&req->usage);
 	call->cb_break = fc->cb_break;
 	afs_use_fs_server(call, fc->cbi);
+	trace_afs_make_fs_call(call, &vnode->fid);
 	return afs_make_call(&fc->ac, call, GFP_NOFS, false);
 }
 
@@ -618,8 +624,16 @@ static int afs_deliver_fs_create_vnode(struct afs_call *call)
 /*
  * FS.CreateFile and FS.MakeDir operation type
  */
-static const struct afs_call_type afs_RXFSCreateXXXX = {
-	.name		= "FS.CreateXXXX",
+static const struct afs_call_type afs_RXFSCreateFile = {
+	.name		= "FS.CreateFile",
+	.op		= afs_FS_CreateFile,
+	.deliver	= afs_deliver_fs_create_vnode,
+	.destructor	= afs_flat_call_destructor,
+};
+
+static const struct afs_call_type afs_RXFSMakeDir = {
+	.name		= "FS.MakeDir",
+	.op		= afs_FS_MakeDir,
 	.deliver	= afs_deliver_fs_create_vnode,
 	.destructor	= afs_flat_call_destructor,
 };
@@ -646,8 +660,9 @@ int afs_fs_create(struct afs_fs_cursor *fc,
 	padsz = (4 - (namesz & 3)) & 3;
 	reqsz = (5 * 4) + namesz + padsz + (6 * 4);
 
-	call = afs_alloc_flat_call(net, &afs_RXFSCreateXXXX, reqsz,
-				   (3 + 21 + 21 + 3 + 6) * 4);
+	call = afs_alloc_flat_call(
+		net, S_ISDIR(mode) ? &afs_RXFSMakeDir : &afs_RXFSCreateFile,
+		reqsz, (3 + 21 + 21 + 3 + 6) * 4);
 	if (!call)
 		return -ENOMEM;
 
@@ -678,6 +693,7 @@ int afs_fs_create(struct afs_fs_cursor *fc,
 	*bp++ = 0; /* segment size */
 
 	afs_use_fs_server(call, fc->cbi);
+	trace_afs_make_fs_call(call, &vnode->fid);
 	return afs_make_call(&fc->ac, call, GFP_NOFS, false);
 }
 
@@ -708,8 +724,16 @@ static int afs_deliver_fs_remove(struct afs_call *call)
 /*
  * FS.RemoveDir/FS.RemoveFile operation type
  */
-static const struct afs_call_type afs_RXFSRemoveXXXX = {
-	.name		= "FS.RemoveXXXX",
+static const struct afs_call_type afs_RXFSRemoveFile = {
+	.name		= "FS.RemoveFile",
+	.op		= afs_FS_RemoveFile,
+	.deliver	= afs_deliver_fs_remove,
+	.destructor	= afs_flat_call_destructor,
+};
+
+static const struct afs_call_type afs_RXFSRemoveDir = {
+	.name		= "FS.RemoveDir",
+	.op		= afs_FS_RemoveDir,
 	.deliver	= afs_deliver_fs_remove,
 	.destructor	= afs_flat_call_destructor,
 };
@@ -731,7 +755,9 @@ int afs_fs_remove(struct afs_fs_cursor *fc, const char *name, bool isdir)
 	padsz = (4 - (namesz & 3)) & 3;
 	reqsz = (5 * 4) + namesz + padsz;
 
-	call = afs_alloc_flat_call(net, &afs_RXFSRemoveXXXX, reqsz, (21 + 6) * 4);
+	call = afs_alloc_flat_call(
+		net, isdir ? &afs_RXFSRemoveDir : &afs_RXFSRemoveFile,
+		reqsz, (21 + 6) * 4);
 	if (!call)
 		return -ENOMEM;
 
@@ -753,6 +779,7 @@ int afs_fs_remove(struct afs_fs_cursor *fc, const char *name, bool isdir)
 	}
 
 	afs_use_fs_server(call, fc->cbi);
+	trace_afs_make_fs_call(call, &vnode->fid);
 	return afs_make_call(&fc->ac, call, GFP_NOFS, false);
 }
 
@@ -786,6 +813,7 @@ static int afs_deliver_fs_link(struct afs_call *call)
  */
 static const struct afs_call_type afs_RXFSLink = {
 	.name		= "FS.Link",
+	.op		= afs_FS_Link,
 	.deliver	= afs_deliver_fs_link,
 	.destructor	= afs_flat_call_destructor,
 };
@@ -834,6 +862,7 @@ int afs_fs_link(struct afs_fs_cursor *fc, struct afs_vnode *vnode,
 	*bp++ = htonl(vnode->fid.unique);
 
 	afs_use_fs_server(call, fc->cbi);
+	trace_afs_make_fs_call(call, &vnode->fid);
 	return afs_make_call(&fc->ac, call, GFP_NOFS, false);
 }
 
@@ -868,6 +897,7 @@ static int afs_deliver_fs_symlink(struct afs_call *call)
  */
 static const struct afs_call_type afs_RXFSSymlink = {
 	.name		= "FS.Symlink",
+	.op		= afs_FS_Symlink,
 	.deliver	= afs_deliver_fs_symlink,
 	.destructor	= afs_flat_call_destructor,
 };
@@ -935,6 +965,7 @@ int afs_fs_symlink(struct afs_fs_cursor *fc,
 	*bp++ = 0; /* segment size */
 
 	afs_use_fs_server(call, fc->cbi);
+	trace_afs_make_fs_call(call, &vnode->fid);
 	return afs_make_call(&fc->ac, call, GFP_NOFS, false);
 }
 
@@ -970,6 +1001,7 @@ static int afs_deliver_fs_rename(struct afs_call *call)
  */
 static const struct afs_call_type afs_RXFSRename = {
 	.name		= "FS.Rename",
+	.op		= afs_FS_Rename,
 	.deliver	= afs_deliver_fs_rename,
 	.destructor	= afs_flat_call_destructor,
 };
@@ -1035,6 +1067,7 @@ int afs_fs_rename(struct afs_fs_cursor *fc,
 	}
 
 	afs_use_fs_server(call, fc->cbi);
+	trace_afs_make_fs_call(call, &orig_dvnode->fid);
 	return afs_make_call(&fc->ac, call, GFP_NOFS, false);
 }
 
@@ -1070,12 +1103,14 @@ static int afs_deliver_fs_store_data(struct afs_call *call)
  */
 static const struct afs_call_type afs_RXFSStoreData = {
 	.name		= "FS.StoreData",
+	.op		= afs_FS_StoreData,
 	.deliver	= afs_deliver_fs_store_data,
 	.destructor	= afs_flat_call_destructor,
 };
 
 static const struct afs_call_type afs_RXFSStoreData64 = {
 	.name		= "FS.StoreData64",
+	.op		= afs_FS_StoreData64,
 	.deliver	= afs_deliver_fs_store_data,
 	.destructor	= afs_flat_call_destructor,
 };
@@ -1135,6 +1170,7 @@ static int afs_fs_store_data64(struct afs_fs_cursor *fc,
 	*bp++ = htonl(i_size >> 32);
 	*bp++ = htonl((u32) i_size);
 
+	trace_afs_make_fs_call(call, &vnode->fid);
 	return afs_make_call(&fc->ac, call, GFP_NOFS, false);
 }
 
@@ -1208,6 +1244,7 @@ int afs_fs_store_data(struct afs_fs_cursor *fc, struct afs_writeback *wb,
 	*bp++ = htonl(i_size);
 
 	afs_use_fs_server(call, fc->cbi);
+	trace_afs_make_fs_call(call, &vnode->fid);
 	return afs_make_call(&fc->ac, call, GFP_NOFS, false);
 }
 
@@ -1245,18 +1282,21 @@ static int afs_deliver_fs_store_status(struct afs_call *call)
  */
 static const struct afs_call_type afs_RXFSStoreStatus = {
 	.name		= "FS.StoreStatus",
+	.op		= afs_FS_StoreStatus,
 	.deliver	= afs_deliver_fs_store_status,
 	.destructor	= afs_flat_call_destructor,
 };
 
 static const struct afs_call_type afs_RXFSStoreData_as_Status = {
 	.name		= "FS.StoreData",
+	.op		= afs_FS_StoreData,
 	.deliver	= afs_deliver_fs_store_status,
 	.destructor	= afs_flat_call_destructor,
 };
 
 static const struct afs_call_type afs_RXFSStoreData64_as_Status = {
 	.name		= "FS.StoreData64",
+	.op		= afs_FS_StoreData64,
 	.deliver	= afs_deliver_fs_store_status,
 	.destructor	= afs_flat_call_destructor,
 };
@@ -1286,7 +1326,6 @@ static int afs_fs_setattr_size64(struct afs_fs_cursor *fc, struct iattr *attr)
 	call->key = fc->key;
 	call->reply[0] = vnode;
 	call->store_version = vnode->status.data_version + 1;
-	call->operation_ID = FSSTOREDATA;
 
 	/* marshall the parameters */
 	bp = call->request;
@@ -1305,6 +1344,7 @@ static int afs_fs_setattr_size64(struct afs_fs_cursor *fc, struct iattr *attr)
 	*bp++ = htonl((u32) attr->ia_size);
 
 	afs_use_fs_server(call, fc->cbi);
+	trace_afs_make_fs_call(call, &vnode->fid);
 	return afs_make_call(&fc->ac, call, GFP_NOFS, false);
 }
 
@@ -1335,7 +1375,6 @@ static int afs_fs_setattr_size(struct afs_fs_cursor *fc, struct iattr *attr)
 	call->key = fc->key;
 	call->reply[0] = vnode;
 	call->store_version = vnode->status.data_version + 1;
-	call->operation_ID = FSSTOREDATA;
 
 	/* marshall the parameters */
 	bp = call->request;
@@ -1351,6 +1390,7 @@ static int afs_fs_setattr_size(struct afs_fs_cursor *fc, struct iattr *attr)
 	*bp++ = htonl(attr->ia_size);		/* new file length */
 
 	afs_use_fs_server(call, fc->cbi);
+	trace_afs_make_fs_call(call, &vnode->fid);
 	return afs_make_call(&fc->ac, call, GFP_NOFS, false);
 }
 
@@ -1379,7 +1419,6 @@ int afs_fs_setattr(struct afs_fs_cursor *fc, struct iattr *attr)
 
 	call->key = fc->key;
 	call->reply[0] = vnode;
-	call->operation_ID = FSSTORESTATUS;
 
 	/* marshall the parameters */
 	bp = call->request;
@@ -1391,6 +1430,7 @@ int afs_fs_setattr(struct afs_fs_cursor *fc, struct iattr *attr)
 	xdr_encode_AFS_StoreStatus(&bp, attr);
 
 	afs_use_fs_server(call, fc->cbi);
+	trace_afs_make_fs_call(call, &vnode->fid);
 	return afs_make_call(&fc->ac, call, GFP_NOFS, false);
 }
 
@@ -1581,6 +1621,7 @@ static void afs_get_volume_status_call_destructor(struct afs_call *call)
  */
 static const struct afs_call_type afs_RXFSGetVolumeStatus = {
 	.name		= "FS.GetVolumeStatus",
+	.op		= afs_FS_GetVolumeStatus,
 	.deliver	= afs_deliver_fs_get_volume_status,
 	.destructor	= afs_get_volume_status_call_destructor,
 };
@@ -1620,6 +1661,7 @@ int afs_fs_get_volume_status(struct afs_fs_cursor *fc,
 	bp[1] = htonl(vnode->fid.vid);
 
 	afs_use_fs_server(call, fc->cbi);
+	trace_afs_make_fs_call(call, &vnode->fid);
 	return afs_make_call(&fc->ac, call, GFP_NOFS, false);
 }
 
@@ -1650,6 +1692,7 @@ static int afs_deliver_fs_xxxx_lock(struct afs_call *call)
  */
 static const struct afs_call_type afs_RXFSSetLock = {
 	.name		= "FS.SetLock",
+	.op		= afs_FS_SetLock,
 	.deliver	= afs_deliver_fs_xxxx_lock,
 	.destructor	= afs_flat_call_destructor,
 };
@@ -1659,6 +1702,7 @@ static const struct afs_call_type afs_RXFSSetLock = {
  */
 static const struct afs_call_type afs_RXFSExtendLock = {
 	.name		= "FS.ExtendLock",
+	.op		= afs_FS_ExtendLock,
 	.deliver	= afs_deliver_fs_xxxx_lock,
 	.destructor	= afs_flat_call_destructor,
 };
@@ -1668,6 +1712,7 @@ static const struct afs_call_type afs_RXFSExtendLock = {
  */
 static const struct afs_call_type afs_RXFSReleaseLock = {
 	.name		= "FS.ReleaseLock",
+	.op		= afs_FS_ReleaseLock,
 	.deliver	= afs_deliver_fs_xxxx_lock,
 	.destructor	= afs_flat_call_destructor,
 };
@@ -1700,6 +1745,7 @@ int afs_fs_set_lock(struct afs_fs_cursor *fc, afs_lock_type_t type)
 	*bp++ = htonl(type);
 
 	afs_use_fs_server(call, fc->cbi);
+	trace_afs_make_fs_call(call, &vnode->fid);
 	return afs_make_call(&fc->ac, call, GFP_NOFS, false);
 }
 
@@ -1730,6 +1776,7 @@ int afs_fs_extend_lock(struct afs_fs_cursor *fc)
 	*bp++ = htonl(vnode->fid.unique);
 
 	afs_use_fs_server(call, fc->cbi);
+	trace_afs_make_fs_call(call, &vnode->fid);
 	return afs_make_call(&fc->ac, call, GFP_NOFS, false);
 }
 
@@ -1760,6 +1807,7 @@ int afs_fs_release_lock(struct afs_fs_cursor *fc)
 	*bp++ = htonl(vnode->fid.unique);
 
 	afs_use_fs_server(call, fc->cbi);
+	trace_afs_make_fs_call(call, &vnode->fid);
 	return afs_make_call(&fc->ac, call, GFP_NOFS, false);
 }
 
@@ -1776,6 +1824,7 @@ static int afs_deliver_fs_give_up_all_callbacks(struct afs_call *call)
  */
 static const struct afs_call_type afs_RXFSGiveUpAllCallBacks = {
 	.name		= "FS.GiveUpAllCallBacks",
+	.op		= afs_FS_GiveUpAllCallBacks,
 	.deliver	= afs_deliver_fs_give_up_all_callbacks,
 	.destructor	= afs_flat_call_destructor,
 };
@@ -1866,6 +1915,7 @@ again:
  */
 static const struct afs_call_type afs_RXFSGetCapabilities = {
 	.name		= "FS.GetCapabilities",
+	.op		= afs_FS_GetCapabilities,
 	.deliver	= afs_deliver_fs_get_capabilities,
 	.destructor	= afs_flat_call_destructor,
 };
@@ -1895,5 +1945,6 @@ int afs_fs_get_capabilities(struct afs_net *net,
 	*bp++ = htonl(FSGETCAPABILITIES);
 
 	/* Can't take a ref on server */
+	trace_afs_make_fs_call(call, NULL);
 	return afs_make_call(ac, call, GFP_NOFS, false);
 }
diff --git a/fs/afs/internal.h b/fs/afs/internal.h
index 767317bf33db..aad12546e0ea 100644
--- a/fs/afs/internal.h
+++ b/fs/afs/internal.h
@@ -124,6 +124,7 @@ struct afs_call {
 
 struct afs_call_type {
 	const char *name;
+	unsigned int op; /* Really enum afs_fs_operation */
 
 	/* deliver request or reply data to an call
 	 * - returning an error will cause the call to be aborted
diff --git a/fs/afs/rxrpc.c b/fs/afs/rxrpc.c
index 1d075696bf55..59cc58022c4d 100644
--- a/fs/afs/rxrpc.c
+++ b/fs/afs/rxrpc.c
@@ -219,6 +219,7 @@ struct afs_call *afs_alloc_flat_call(struct afs_net *net,
 			goto nomem_free;
 	}
 
+	call->operation_ID = type->op;
 	init_waitqueue_head(&call->waitq);
 	return call;
 
@@ -422,6 +423,8 @@ error_do_abort:
 		ac->abort_code = call->abort_code;
 		ac->responded = true;
 	}
+	call->error = ret;
+	trace_afs_call_done(call);
 error_kill_call:
 	afs_put_call(call);
 	ac->error = ret;
@@ -455,10 +458,10 @@ static void afs_deliver_to_call(struct afs_call *call)
 
 			if (ret == -EINPROGRESS || ret == -EAGAIN)
 				return;
-			if (ret == 1 || ret < 0) {
-				call->state = AFS_CALL_COMPLETE;
-				goto done;
-			}
+			if (ret < 0)
+				call->error = ret;
+			if (ret < 0 || ret == 1)
+				goto call_complete;
 			return;
 		}
 
@@ -466,7 +469,7 @@ static void afs_deliver_to_call(struct afs_call *call)
 		switch (ret) {
 		case 0:
 			if (call->state == AFS_CALL_AWAIT_REPLY)
-				call->state = AFS_CALL_COMPLETE;
+				goto call_complete;
 			goto done;
 		case -EINPROGRESS:
 		case -EAGAIN:
@@ -505,7 +508,11 @@ out:
 
 save_error:
 	call->error = ret;
-	call->state = AFS_CALL_COMPLETE;
+call_complete:
+	if (call->state != AFS_CALL_COMPLETE) {
+		call->state = AFS_CALL_COMPLETE;
+		trace_afs_call_done(call);
+	}
 	goto done;
 }
 
@@ -567,8 +574,10 @@ static long afs_wait_for_call_to_complete(struct afs_call *call,
 	if (call->state < AFS_CALL_COMPLETE) {
 		_debug("call interrupted");
 		if (rxrpc_kernel_abort_call(call->net->socket, call->rxcall,
-					    RX_USER_ABORT, -EINTR, "KWI"))
+					    RX_USER_ABORT, -EINTR, "KWI")) {
 			call->error = -ERESTARTSYS;
+			trace_afs_call_done(call);
+		}
 	}
 
 	ac->abort_code = call->abort_code;
@@ -882,6 +891,7 @@ int afs_extract_data(struct afs_call *call, void *buf, size_t count,
 		switch (call->state) {
 		case AFS_CALL_AWAIT_REPLY:
 			call->state = AFS_CALL_COMPLETE;
+			trace_afs_call_done(call);
 			break;
 		case AFS_CALL_AWAIT_REQUEST:
 			call->state = AFS_CALL_REPLYING;
@@ -894,5 +904,6 @@ int afs_extract_data(struct afs_call *call, void *buf, size_t count,
 
 	call->error = ret;
 	call->state = AFS_CALL_COMPLETE;
+	trace_afs_call_done(call);
 	return ret;
 }
diff --git a/fs/afs/vlclient.c b/fs/afs/vlclient.c
index 1d38cbdf6cad..e372f89fd36a 100644
--- a/fs/afs/vlclient.c
+++ b/fs/afs/vlclient.c
@@ -114,6 +114,7 @@ static void afs_destroy_vl_get_entry_by_name_u(struct afs_call *call)
  */
 static const struct afs_call_type afs_RXVLGetEntryByNameU = {
 	.name		= "VL.GetEntryByNameU",
+	.op		= afs_VL_GetEntryByNameU,
 	.deliver	= afs_deliver_vl_get_entry_by_name_u,
 	.destructor	= afs_destroy_vl_get_entry_by_name_u,
 };
@@ -161,6 +162,7 @@ struct afs_vldb_entry *afs_vl_get_entry_by_name_u(struct afs_net *net,
 	if (padsz > 0)
 		memset((void *)bp + volnamesz, 0, padsz);
 
+	trace_afs_make_vl_call(call);
 	return (struct afs_vldb_entry *)afs_make_call(ac, call, GFP_KERNEL, false);
 }
 
@@ -251,6 +253,7 @@ static void afs_vl_get_addrs_u_destructor(struct afs_call *call)
  */
 static const struct afs_call_type afs_RXVLGetAddrsU = {
 	.name		= "VL.GetAddrsU",
+	.op		= afs_VL_GetAddrsU,
 	.deliver	= afs_deliver_vl_get_addrs_u,
 	.destructor	= afs_vl_get_addrs_u_destructor,
 };
@@ -298,6 +301,7 @@ struct afs_addr_list *afs_vl_get_addrs_u(struct afs_net *net,
 	for (i = 0; i < 6; i++)
 		r->uuid.node[i] = ntohl(u->node[i]);
 
+	trace_afs_make_vl_call(call);
 	return (struct afs_addr_list *)afs_make_call(ac, call, GFP_KERNEL, false);
 }
 
@@ -362,6 +366,7 @@ again:
  */
 static const struct afs_call_type afs_RXVLGetCapabilities = {
 	.name		= "VL.GetCapabilities",
+	.op		= afs_VL_GetCapabilities,
 	.deliver	= afs_deliver_vl_get_capabilities,
 	.destructor	= afs_flat_call_destructor,
 };
@@ -396,6 +401,7 @@ int afs_vl_get_capabilities(struct afs_net *net,
 	*bp++ = htonl(VLGETCAPABILITIES);
 
 	/* Can't take a ref on server */
+	trace_afs_make_vl_call(call);
 	return afs_make_call(ac, call, GFP_KERNEL, false);
 }
 
@@ -598,7 +604,8 @@ again:
  * YFSVL.GetEndpoints operation type.
  */
 static const struct afs_call_type afs_YFSVLGetEndpoints = {
-	.name		= "VL.GetEndpoints",
+	.name		= "YFSVL.GetEndpoints",
+	.op		= afs_YFSVL_GetEndpoints,
 	.deliver	= afs_deliver_yfsvl_get_endpoints,
 	.destructor	= afs_vl_get_addrs_u_destructor,
 };
@@ -633,5 +640,6 @@ struct afs_addr_list *afs_yfsvl_get_endpoints(struct afs_net *net,
 	*bp++ = htonl(YFS_SERVER_UUID);
 	memcpy(bp, uuid, sizeof(*uuid)); /* Type opr_uuid */
 
+	trace_afs_make_vl_call(call);
 	return (struct afs_addr_list *)afs_make_call(ac, call, GFP_KERNEL, false);
 }
-- 
cgit v1.2.3


From 2c099014a0a456012c1778e80adce839bf956b77 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Thu, 2 Nov 2017 15:27:51 +0000
Subject: afs: Trace the sending of pages

Add a pair of tracepoints to log the sending of pages for an FS.StoreData
or FS.StoreData64 operation.

Tracepoint afs_send_pages notes each set of pages added to the operation.
There may be several of these per operation as we get up at most 8
contiguous pages in one go because the bvec we're using is on the stack.

Tracepoint afs_sent_pages notes the end of adding data from a whole run of
pages to the operation and the completion of the request phase.

Signed-off-by: David Howells <dhowells@redhat.com>
---
 fs/afs/rxrpc.c | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'fs/afs')

diff --git a/fs/afs/rxrpc.c b/fs/afs/rxrpc.c
index 59cc58022c4d..06a51d70b82b 100644
--- a/fs/afs/rxrpc.c
+++ b/fs/afs/rxrpc.c
@@ -303,6 +303,8 @@ static int afs_send_pages(struct afs_call *call, struct msghdr *msg)
 
 	do {
 		afs_load_bvec(call, msg, bv, first, last, offset);
+		trace_afs_send_pages(call, msg, first, last, offset);
+
 		offset = 0;
 		bytes = msg->msg_iter.count;
 		nr = msg->msg_iter.nr_segs;
@@ -317,6 +319,7 @@ static int afs_send_pages(struct afs_call *call, struct msghdr *msg)
 		first += nr;
 	} while (first <= last);
 
+	trace_afs_sent_pages(call, call->first, last, first, ret);
 	return ret;
 }
 
-- 
cgit v1.2.3


From dab17c1add5c51b68027a9a3861af3a99cb5485a Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Thu, 2 Nov 2017 15:27:52 +0000
Subject: afs: Fix directory read/modify race

Because parsing of the directory wasn't being done under any sort of lock,
the pages holding the directory content can get invalidated whilst the
parsing is ongoing.

Further, the directory page check function gets called outside of the page
lock, so if the page gets cleared or updated, this may return reports of
bad magic numbers in the directory page.

Also, the directory may change size whilst checking and parsing are
ongoing, so more care needs to be taken here.

Fix this by:

 (1) Perform the page check from the page filling function before we set
     PageUptodate and drop the page lock.

 (2) Check for the file having shrunk and the page having been abandoned
     before checking the page contents.

 (3) Lock the page whilst parsing it for the directory iterator.

Whilst we're at it, add a tracepoint to report check failure.

Signed-off-by: David Howells <dhowells@redhat.com>
---
 fs/afs/dir.c      | 27 +++++++++++++++++++--------
 fs/afs/file.c     |  5 +++++
 fs/afs/internal.h |  1 +
 3 files changed, 25 insertions(+), 8 deletions(-)

(limited to 'fs/afs')

diff --git a/fs/afs/dir.c b/fs/afs/dir.c
index 53f3917440e7..ecda0e6a9f7e 100644
--- a/fs/afs/dir.c
+++ b/fs/afs/dir.c
@@ -130,10 +130,11 @@ struct afs_lookup_cookie {
 /*
  * check that a directory page is valid
  */
-static inline bool afs_dir_check_page(struct inode *dir, struct page *page)
+bool afs_dir_check_page(struct inode *dir, struct page *page)
 {
 	struct afs_dir_page *dbuf;
-	loff_t latter;
+	struct afs_vnode *vnode = AFS_FS_I(dir);
+	loff_t latter, i_size, off;
 	int tmp, qty;
 
 #if 0
@@ -150,8 +151,15 @@ static inline bool afs_dir_check_page(struct inode *dir, struct page *page)
 	}
 #endif
 
-	/* determine how many magic numbers there should be in this page */
-	latter = dir->i_size - page_offset(page);
+	/* Determine how many magic numbers there should be in this page, but
+	 * we must take care because the directory may change size under us.
+	 */
+	off = page_offset(page);
+	i_size = i_size_read(dir);
+	if (i_size <= off)
+		goto checked;
+
+	latter = i_size - off;
 	if (latter >= PAGE_SIZE)
 		qty = PAGE_SIZE;
 	else
@@ -162,13 +170,15 @@ static inline bool afs_dir_check_page(struct inode *dir, struct page *page)
 	dbuf = page_address(page);
 	for (tmp = 0; tmp < qty; tmp++) {
 		if (dbuf->blocks[tmp].pagehdr.magic != AFS_DIR_MAGIC) {
-			printk("kAFS: %s(%lu): bad magic %d/%d is %04hx\n",
+			printk("kAFS: %s(%lx): bad magic %d/%d is %04hx\n",
 			       __func__, dir->i_ino, tmp, qty,
 			       ntohs(dbuf->blocks[tmp].pagehdr.magic));
+			trace_afs_dir_check_failed(vnode, off, i_size);
 			goto error;
 		}
 	}
 
+checked:
 	SetPageChecked(page);
 	return true;
 
@@ -183,6 +193,7 @@ error:
 static inline void afs_dir_put_page(struct page *page)
 {
 	kunmap(page);
+	unlock_page(page);
 	put_page(page);
 }
 
@@ -197,9 +208,10 @@ static struct page *afs_dir_get_page(struct inode *dir, unsigned long index,
 
 	page = read_cache_page(dir->i_mapping, index, afs_page_filler, key);
 	if (!IS_ERR(page)) {
+		lock_page(page);
 		kmap(page);
 		if (unlikely(!PageChecked(page))) {
-			if (PageError(page) || !afs_dir_check_page(dir, page))
+			if (PageError(page))
 				goto fail;
 		}
 	}
@@ -384,8 +396,7 @@ out:
  */
 static int afs_readdir(struct file *file, struct dir_context *ctx)
 {
-	return afs_dir_iterate(file_inode(file),
-			      ctx, file->private_data);
+	return afs_dir_iterate(file_inode(file), ctx, file->private_data);
 }
 
 /*
diff --git a/fs/afs/file.c b/fs/afs/file.c
index 1f26ac9f816d..5786f68f87f1 100644
--- a/fs/afs/file.c
+++ b/fs/afs/file.c
@@ -232,6 +232,11 @@ int afs_page_filler(void *data, struct page *page)
 		 * page */
 		ret = afs_fetch_data(vnode, key, req);
 		afs_put_read(req);
+
+		if (ret >= 0 && S_ISDIR(inode->i_mode) &&
+		    !afs_dir_check_page(inode, page))
+			ret = -EIO;
+
 		if (ret < 0) {
 			if (ret == -ENOENT) {
 				_debug("got NOENT from server"
diff --git a/fs/afs/internal.h b/fs/afs/internal.h
index aad12546e0ea..6aa6e9957c44 100644
--- a/fs/afs/internal.h
+++ b/fs/afs/internal.h
@@ -622,6 +622,7 @@ extern bool afs_cm_incoming_call(struct afs_call *);
 /*
  * dir.c
  */
+extern bool afs_dir_check_page(struct inode *, struct page *);
 extern const struct inode_operations afs_dir_inode_operations;
 extern const struct dentry_operations afs_fs_dentry_operations;
 extern const struct file_operations afs_dir_file_operations;
-- 
cgit v1.2.3


From 83732ec5146916bd49b3036b0ea7dedb7831b90e Mon Sep 17 00:00:00 2001
From: Marc Dionne <marc.dionne@auristor.com>
Date: Thu, 2 Nov 2017 15:27:52 +0000
Subject: afs: Use a dynamic port if 7001 is in use

It is not required that the afs client operate on port 7001.
The port could be in use because another kernel or userspace
client has already bound to it.

If the port is in use, just fallback to using a dynamic port.

Signed-off-by: Marc Dionne <marc.dionne@auristor.com>
Signed-off-by: David Howells <dhowells@redhat.com>
---
 fs/afs/rxrpc.c | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'fs/afs')

diff --git a/fs/afs/rxrpc.c b/fs/afs/rxrpc.c
index 06a51d70b82b..bd44ae8b63d8 100644
--- a/fs/afs/rxrpc.c
+++ b/fs/afs/rxrpc.c
@@ -61,6 +61,10 @@ int afs_open_socket(struct afs_net *net)
 	srx.transport.sin6.sin6_port	= htons(AFS_CM_PORT);
 
 	ret = kernel_bind(socket, (struct sockaddr *) &srx, sizeof(srx));
+	if (ret == -EADDRINUSE) {
+		srx.transport.sin6.sin6_port = 0;
+		ret = kernel_bind(socket, (struct sockaddr *) &srx, sizeof(srx));
+	}
 	if (ret < 0)
 		goto error_2;
 
-- 
cgit v1.2.3


From 215804a99283c57fdd869aab350fdf6acc3460b6 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Thu, 2 Nov 2017 15:27:52 +0000
Subject: afs: Introduce a file-private data record

Introduce a file-private data record for kAFS and put the key into it
rather than storing the key in file->private_data.

Signed-off-by: David Howells <dhowells@redhat.com>
---
 fs/afs/dir.c      |  2 +-
 fs/afs/file.c     | 39 ++++++++++++++++++++++++++++-----------
 fs/afs/flock.c    | 10 +++++-----
 fs/afs/inode.c    |  2 +-
 fs/afs/internal.h | 14 ++++++++++++++
 fs/afs/write.c    |  4 ++--
 6 files changed, 51 insertions(+), 20 deletions(-)

(limited to 'fs/afs')

diff --git a/fs/afs/dir.c b/fs/afs/dir.c
index ecda0e6a9f7e..ab618d32554c 100644
--- a/fs/afs/dir.c
+++ b/fs/afs/dir.c
@@ -396,7 +396,7 @@ out:
  */
 static int afs_readdir(struct file *file, struct dir_context *ctx)
 {
-	return afs_dir_iterate(file_inode(file), ctx, file->private_data);
+	return afs_dir_iterate(file_inode(file), ctx, afs_file_key(file));
 }
 
 /*
diff --git a/fs/afs/file.c b/fs/afs/file.c
index 5786f68f87f1..e33b34f01795 100644
--- a/fs/afs/file.c
+++ b/fs/afs/file.c
@@ -68,6 +68,7 @@ const struct address_space_operations afs_fs_aops = {
 int afs_open(struct inode *inode, struct file *file)
 {
 	struct afs_vnode *vnode = AFS_FS_I(inode);
+	struct afs_file *af;
 	struct key *key;
 	int ret;
 
@@ -75,19 +76,32 @@ int afs_open(struct inode *inode, struct file *file)
 
 	key = afs_request_key(vnode->volume->cell);
 	if (IS_ERR(key)) {
-		_leave(" = %ld [key]", PTR_ERR(key));
-		return PTR_ERR(key);
+		ret = PTR_ERR(key);
+		goto error;
 	}
 
-	ret = afs_validate(vnode, key);
-	if (ret < 0) {
-		_leave(" = %d [val]", ret);
-		return ret;
+	af = kzalloc(sizeof(*af), GFP_KERNEL);
+	if (!af) {
+		ret = -ENOMEM;
+		goto error_key;
 	}
 
-	file->private_data = key;
+	ret = afs_validate(vnode, key);
+	if (ret < 0)
+		goto error_af;
+
+	af->key = key;
+	file->private_data = af;
 	_leave(" = 0");
 	return 0;
+
+error_af:
+	kfree(af);
+error_key:
+	key_put(key);
+error:
+	_leave(" = %d", ret);
+	return ret;
 }
 
 /*
@@ -96,10 +110,13 @@ int afs_open(struct inode *inode, struct file *file)
 int afs_release(struct inode *inode, struct file *file)
 {
 	struct afs_vnode *vnode = AFS_FS_I(inode);
+	struct afs_file *af = file->private_data;
 
 	_enter("{%x:%u},", vnode->fid.vid, vnode->fid.vnode);
 
-	key_put(file->private_data);
+	file->private_data = NULL;
+	key_put(af->key);
+	kfree(af);
 	_leave(" = 0");
 	return 0;
 }
@@ -295,7 +312,7 @@ static int afs_readpage(struct file *file, struct page *page)
 	int ret;
 
 	if (file) {
-		key = file->private_data;
+		key = afs_file_key(file);
 		ASSERT(key != NULL);
 		ret = afs_page_filler(key, page);
 	} else {
@@ -346,7 +363,7 @@ static int afs_readpages_one(struct file *file, struct address_space *mapping,
 	struct afs_read *req;
 	struct list_head *p;
 	struct page *first, *page;
-	struct key *key = file->private_data;
+	struct key *key = afs_file_key(file);
 	pgoff_t index;
 	int ret, n, i;
 
@@ -442,7 +459,7 @@ error:
 static int afs_readpages(struct file *file, struct address_space *mapping,
 			 struct list_head *pages, unsigned nr_pages)
 {
-	struct key *key = file->private_data;
+	struct key *key = afs_file_key(file);
 	struct afs_vnode *vnode;
 	int ret = 0;
 
diff --git a/fs/afs/flock.c b/fs/afs/flock.c
index 77b0a4606efd..7571a5dfd5a3 100644
--- a/fs/afs/flock.c
+++ b/fs/afs/flock.c
@@ -206,7 +206,7 @@ void afs_lock_work(struct work_struct *work)
 			BUG();
 		fl = list_entry(vnode->granted_locks.next,
 				struct file_lock, fl_u.afs.link);
-		key = key_get(fl->fl_file->private_data);
+		key = key_get(afs_file_key(fl->fl_file));
 		spin_unlock(&vnode->lock);
 
 		ret = afs_extend_lock(vnode, key);
@@ -240,7 +240,7 @@ void afs_lock_work(struct work_struct *work)
 			BUG();
 		fl = list_entry(vnode->pending_locks.next,
 				struct file_lock, fl_u.afs.link);
-		key = key_get(fl->fl_file->private_data);
+		key = key_get(afs_file_key(fl->fl_file));
 		type = (fl->fl_type == F_RDLCK) ?
 			AFS_LOCK_READ : AFS_LOCK_WRITE;
 		spin_unlock(&vnode->lock);
@@ -318,7 +318,7 @@ static int afs_do_setlk(struct file *file, struct file_lock *fl)
 	struct inode *inode = file_inode(file);
 	struct afs_vnode *vnode = AFS_FS_I(inode);
 	afs_lock_type_t type;
-	struct key *key = file->private_data;
+	struct key *key = afs_file_key(file);
 	int ret;
 
 	_enter("{%x:%u},%u", vnode->fid.vid, vnode->fid.vnode, fl->fl_type);
@@ -500,7 +500,7 @@ vfs_rejected_lock:
 static int afs_do_unlk(struct file *file, struct file_lock *fl)
 {
 	struct afs_vnode *vnode = AFS_FS_I(file->f_mapping->host);
-	struct key *key = file->private_data;
+	struct key *key = afs_file_key(file);
 	int ret;
 
 	_enter("{%x:%u},%u", vnode->fid.vid, vnode->fid.vnode, fl->fl_type);
@@ -535,7 +535,7 @@ static int afs_do_unlk(struct file *file, struct file_lock *fl)
 static int afs_do_getlk(struct file *file, struct file_lock *fl)
 {
 	struct afs_vnode *vnode = AFS_FS_I(file->f_mapping->host);
-	struct key *key = file->private_data;
+	struct key *key = afs_file_key(file);
 	int ret, lock_count;
 
 	_enter("");
diff --git a/fs/afs/inode.c b/fs/afs/inode.c
index 5a2f5854f349..da2ba7a68cac 100644
--- a/fs/afs/inode.c
+++ b/fs/afs/inode.c
@@ -520,7 +520,7 @@ int afs_setattr(struct dentry *dentry, struct iattr *attr)
 	}
 
 	if (attr->ia_valid & ATTR_FILE) {
-		key = attr->ia_file->private_data;
+		key = afs_file_key(attr->ia_file);
 	} else {
 		key = afs_request_key(vnode->volume->cell);
 		if (IS_ERR(key)) {
diff --git a/fs/afs/internal.h b/fs/afs/internal.h
index 6aa6e9957c44..facf5b9844d2 100644
--- a/fs/afs/internal.h
+++ b/fs/afs/internal.h
@@ -138,6 +138,20 @@ struct afs_call_type {
 	void (*work)(struct work_struct *work);
 };
 
+/*
+ * AFS open file information record.  Pointed to by file->private_data.
+ */
+struct afs_file {
+	struct key		*key;		/* The key this file was opened with */
+};
+
+static inline struct key *afs_file_key(struct file *file)
+{
+	struct afs_file *af = file->private_data;
+
+	return af->key;
+}
+
 /*
  * Record of an outstanding read operation on a vnode.
  */
diff --git a/fs/afs/write.c b/fs/afs/write.c
index 1377a40ecdbb..1cdd0e3cd531 100644
--- a/fs/afs/write.c
+++ b/fs/afs/write.c
@@ -128,7 +128,7 @@ int afs_write_begin(struct file *file, struct address_space *mapping,
 	struct afs_writeback *candidate, *wb;
 	struct afs_vnode *vnode = AFS_FS_I(file_inode(file));
 	struct page *page;
-	struct key *key = file->private_data;
+	struct key *key = afs_file_key(file);
 	unsigned from = pos & (PAGE_SIZE - 1);
 	unsigned to = from + len;
 	pgoff_t index = pos >> PAGE_SHIFT;
@@ -255,7 +255,7 @@ int afs_write_end(struct file *file, struct address_space *mapping,
 		  struct page *page, void *fsdata)
 {
 	struct afs_vnode *vnode = AFS_FS_I(file_inode(file));
-	struct key *key = file->private_data;
+	struct key *key = afs_file_key(file);
 	loff_t i_size, maybe_i_size;
 	int ret;
 
-- 
cgit v1.2.3


From 4343d00872e1de9a470d951bf09bdd18bc73f555 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Thu, 2 Nov 2017 15:27:52 +0000
Subject: afs: Get rid of the afs_writeback record

Get rid of the afs_writeback record that kAFS is using to match keys with
writes made by that key.

Instead, keep a list of keys that have a file open for writing and/or
sync'ing and iterate through those.

Signed-off-by: David Howells <dhowells@redhat.com>
---
 fs/afs/file.c     |  83 ++++---
 fs/afs/fsclient.c |  24 +--
 fs/afs/inode.c    |  11 +-
 fs/afs/internal.h |  51 ++---
 fs/afs/super.c    |   4 +-
 fs/afs/write.c    | 633 +++++++++++++++++++++++++++---------------------------
 6 files changed, 411 insertions(+), 395 deletions(-)

(limited to 'fs/afs')

diff --git a/fs/afs/file.c b/fs/afs/file.c
index e33b34f01795..c3a7bc1281f5 100644
--- a/fs/afs/file.c
+++ b/fs/afs/file.c
@@ -23,7 +23,6 @@ static int afs_readpage(struct file *file, struct page *page);
 static void afs_invalidatepage(struct page *page, unsigned int offset,
 			       unsigned int length);
 static int afs_releasepage(struct page *page, gfp_t gfp_flags);
-static int afs_launder_page(struct page *page);
 
 static int afs_readpages(struct file *filp, struct address_space *mapping,
 			 struct list_head *pages, unsigned nr_pages);
@@ -62,6 +61,50 @@ const struct address_space_operations afs_fs_aops = {
 	.writepages	= afs_writepages,
 };
 
+/*
+ * Discard a pin on a writeback key.
+ */
+void afs_put_wb_key(struct afs_wb_key *wbk)
+{
+	if (refcount_dec_and_test(&wbk->usage)) {
+		key_put(wbk->key);
+		kfree(wbk);
+	}
+}
+
+/*
+ * Cache key for writeback.
+ */
+int afs_cache_wb_key(struct afs_vnode *vnode, struct afs_file *af)
+{
+	struct afs_wb_key *wbk, *p;
+
+	wbk = kzalloc(sizeof(struct afs_wb_key), GFP_KERNEL);
+	if (!wbk)
+		return -ENOMEM;
+	refcount_set(&wbk->usage, 2);
+	wbk->key = af->key;
+
+	spin_lock(&vnode->wb_lock);
+	list_for_each_entry(p, &vnode->wb_keys, vnode_link) {
+		if (p->key == wbk->key)
+			goto found;
+	}
+
+	key_get(wbk->key);
+	list_add_tail(&wbk->vnode_link, &vnode->wb_keys);
+	spin_unlock(&vnode->wb_lock);
+	af->wb = wbk;
+	return 0;
+
+found:
+	refcount_inc(&p->usage);
+	spin_unlock(&vnode->wb_lock);
+	af->wb = p;
+	kfree(wbk);
+	return 0;
+}
+
 /*
  * open an AFS file or directory and attach a key to it
  */
@@ -85,12 +128,18 @@ int afs_open(struct inode *inode, struct file *file)
 		ret = -ENOMEM;
 		goto error_key;
 	}
+	af->key = key;
 
 	ret = afs_validate(vnode, key);
 	if (ret < 0)
 		goto error_af;
 
-	af->key = key;
+	if (file->f_mode & FMODE_WRITE) {
+		ret = afs_cache_wb_key(vnode, af);
+		if (ret < 0)
+			goto error_af;
+	}
+	
 	file->private_data = af;
 	_leave(" = 0");
 	return 0;
@@ -115,8 +164,11 @@ int afs_release(struct inode *inode, struct file *file)
 	_enter("{%x:%u},", vnode->fid.vid, vnode->fid.vnode);
 
 	file->private_data = NULL;
+	if (af->wb)
+		afs_put_wb_key(af->wb);
 	key_put(af->key);
 	kfree(af);
+	afs_prune_wb_keys(vnode);
 	_leave(" = 0");
 	return 0;
 }
@@ -516,16 +568,6 @@ static int afs_readpages(struct file *file, struct address_space *mapping,
 	return ret;
 }
 
-/*
- * write back a dirty page
- */
-static int afs_launder_page(struct page *page)
-{
-	_enter("{%lu}", page->index);
-
-	return 0;
-}
-
 /*
  * invalidate part or all of a page
  * - release a page and clean up its private data if offset is 0 (indicating
@@ -534,8 +576,6 @@ static int afs_launder_page(struct page *page)
 static void afs_invalidatepage(struct page *page, unsigned int offset,
 			       unsigned int length)
 {
-	struct afs_writeback *wb = (struct afs_writeback *) page_private(page);
-
 	_enter("{%lu},%u,%u", page->index, offset, length);
 
 	BUG_ON(!PageLocked(page));
@@ -551,13 +591,8 @@ static void afs_invalidatepage(struct page *page, unsigned int offset,
 #endif
 
 		if (PagePrivate(page)) {
-			if (wb && !PageWriteback(page)) {
-				set_page_private(page, 0);
-				afs_put_writeback(wb);
-			}
-
-			if (!page_private(page))
-				ClearPagePrivate(page);
+			set_page_private(page, 0);
+			ClearPagePrivate(page);
 		}
 	}
 
@@ -570,7 +605,6 @@ static void afs_invalidatepage(struct page *page, unsigned int offset,
  */
 static int afs_releasepage(struct page *page, gfp_t gfp_flags)
 {
-	struct afs_writeback *wb = (struct afs_writeback *) page_private(page);
 	struct afs_vnode *vnode = AFS_FS_I(page->mapping->host);
 
 	_enter("{{%x:%u}[%lu],%lx},%x",
@@ -587,10 +621,7 @@ static int afs_releasepage(struct page *page, gfp_t gfp_flags)
 #endif
 
 	if (PagePrivate(page)) {
-		if (wb) {
-			set_page_private(page, 0);
-			afs_put_writeback(wb);
-		}
+		set_page_private(page, 0);
 		ClearPagePrivate(page);
 	}
 
diff --git a/fs/afs/fsclient.c b/fs/afs/fsclient.c
index 10e5ead629c2..b90ef39ae914 100644
--- a/fs/afs/fsclient.c
+++ b/fs/afs/fsclient.c
@@ -1119,18 +1119,18 @@ static const struct afs_call_type afs_RXFSStoreData64 = {
  * store a set of pages to a very large file
  */
 static int afs_fs_store_data64(struct afs_fs_cursor *fc,
-			       struct afs_writeback *wb,
+			       struct address_space *mapping,
 			       pgoff_t first, pgoff_t last,
 			       unsigned offset, unsigned to,
 			       loff_t size, loff_t pos, loff_t i_size)
 {
-	struct afs_vnode *vnode = wb->vnode;
+	struct afs_vnode *vnode = fc->vnode;
 	struct afs_call *call;
 	struct afs_net *net = afs_v2net(vnode);
 	__be32 *bp;
 
 	_enter(",%x,{%x:%u},,",
-	       key_serial(wb->key), vnode->fid.vid, vnode->fid.vnode);
+	       key_serial(fc->key), vnode->fid.vid, vnode->fid.vnode);
 
 	call = afs_alloc_flat_call(net, &afs_RXFSStoreData64,
 				   (4 + 6 + 3 * 2) * 4,
@@ -1138,10 +1138,9 @@ static int afs_fs_store_data64(struct afs_fs_cursor *fc,
 	if (!call)
 		return -ENOMEM;
 
-	call->wb = wb;
-	call->key = wb->key;
+	call->key = fc->key;
+	call->mapping = mapping;
 	call->reply[0] = vnode;
-	call->mapping = vnode->vfs_inode.i_mapping;
 	call->first = first;
 	call->last = last;
 	call->first_offset = offset;
@@ -1177,18 +1176,18 @@ static int afs_fs_store_data64(struct afs_fs_cursor *fc,
 /*
  * store a set of pages
  */
-int afs_fs_store_data(struct afs_fs_cursor *fc, struct afs_writeback *wb,
+int afs_fs_store_data(struct afs_fs_cursor *fc, struct address_space *mapping,
 		      pgoff_t first, pgoff_t last,
 		      unsigned offset, unsigned to)
 {
-	struct afs_vnode *vnode = wb->vnode;
+	struct afs_vnode *vnode = fc->vnode;
 	struct afs_call *call;
 	struct afs_net *net = afs_v2net(vnode);
 	loff_t size, pos, i_size;
 	__be32 *bp;
 
 	_enter(",%x,{%x:%u},,",
-	       key_serial(wb->key), vnode->fid.vid, vnode->fid.vnode);
+	       key_serial(fc->key), vnode->fid.vid, vnode->fid.vnode);
 
 	size = (loff_t)to - (loff_t)offset;
 	if (first != last)
@@ -1205,7 +1204,7 @@ int afs_fs_store_data(struct afs_fs_cursor *fc, struct afs_writeback *wb,
 	       (unsigned long long) i_size);
 
 	if (pos >> 32 || i_size >> 32 || size >> 32 || (pos + size) >> 32)
-		return afs_fs_store_data64(fc, wb, first, last, offset, to,
+		return afs_fs_store_data64(fc, mapping, first, last, offset, to,
 					   size, pos, i_size);
 
 	call = afs_alloc_flat_call(net, &afs_RXFSStoreData,
@@ -1214,10 +1213,9 @@ int afs_fs_store_data(struct afs_fs_cursor *fc, struct afs_writeback *wb,
 	if (!call)
 		return -ENOMEM;
 
-	call->wb = wb;
-	call->key = wb->key;
+	call->key = fc->key;
+	call->mapping = mapping;
 	call->reply[0] = vnode;
-	call->mapping = vnode->vfs_inode.i_mapping;
 	call->first = first;
 	call->last = last;
 	call->first_offset = offset;
diff --git a/fs/afs/inode.c b/fs/afs/inode.c
index da2ba7a68cac..3415eb7484f6 100644
--- a/fs/afs/inode.c
+++ b/fs/afs/inode.c
@@ -482,7 +482,12 @@ void afs_evict_inode(struct inode *inode)
 		vnode->cb_interest = NULL;
 	}
 
-	ASSERT(list_empty(&vnode->writebacks));
+	while (!list_empty(&vnode->wb_keys)) {
+		struct afs_wb_key *wbk = list_entry(vnode->wb_keys.next,
+						    struct afs_wb_key, vnode_link);
+		list_del(&wbk->vnode_link);
+		afs_put_wb_key(wbk);
+	}
 
 #ifdef CONFIG_AFS_FSCACHE
 	fscache_relinquish_cookie(vnode->cache, 0);
@@ -514,10 +519,8 @@ int afs_setattr(struct dentry *dentry, struct iattr *attr)
 	}
 
 	/* flush any dirty data outstanding on a regular file */
-	if (S_ISREG(vnode->vfs_inode.i_mode)) {
+	if (S_ISREG(vnode->vfs_inode.i_mode))
 		filemap_write_and_wait(vnode->vfs_inode.i_mapping);
-		afs_writeback_all(vnode);
-	}
 
 	if (attr->ia_valid & ATTR_FILE) {
 		key = afs_file_key(attr->ia_file);
diff --git a/fs/afs/internal.h b/fs/afs/internal.h
index facf5b9844d2..688562ae3bf8 100644
--- a/fs/afs/internal.h
+++ b/fs/afs/internal.h
@@ -89,8 +89,7 @@ struct afs_call {
 	struct afs_server	*cm_server;	/* Server affected by incoming CM call */
 	struct afs_cb_interest	*cbi;		/* Callback interest for server used */
 	void			*request;	/* request data (first part) */
-	struct address_space	*mapping;	/* page set */
-	struct afs_writeback	*wb;		/* writeback being performed */
+	struct address_space	*mapping;	/* Pages being written from */
 	void			*buffer;	/* reply receive buffer */
 	void			*reply[4];	/* Where to put the reply */
 	pgoff_t			first;		/* first page in mapping to deal with */
@@ -138,11 +137,21 @@ struct afs_call_type {
 	void (*work)(struct work_struct *work);
 };
 
+/*
+ * Key available for writeback on a file.
+ */
+struct afs_wb_key {
+	refcount_t		usage;
+	struct key		*key;
+	struct list_head	vnode_link;	/* Link in vnode->wb_keys */
+};
+
 /*
  * AFS open file information record.  Pointed to by file->private_data.
  */
 struct afs_file {
 	struct key		*key;		/* The key this file was opened with */
+	struct afs_wb_key	*wb;		/* Writeback key record for this file */
 };
 
 static inline struct key *afs_file_key(struct file *file)
@@ -167,32 +176,6 @@ struct afs_read {
 	struct page		*pages[];
 };
 
-/*
- * record of an outstanding writeback on a vnode
- */
-struct afs_writeback {
-	struct list_head	link;		/* link in vnode->writebacks */
-	struct work_struct	writer;		/* work item to perform the writeback */
-	struct afs_vnode	*vnode;		/* vnode to which this write applies */
-	struct key		*key;		/* owner of this write */
-	wait_queue_head_t	waitq;		/* completion and ready wait queue */
-	pgoff_t			first;		/* first page in batch */
-	pgoff_t			point;		/* last page in current store op */
-	pgoff_t			last;		/* last page in batch (inclusive) */
-	unsigned		offset_first;	/* offset into first page of start of write */
-	unsigned		to_last;	/* offset into last page of end of write */
-	int			num_conflicts;	/* count of conflicting writes in list */
-	int			usage;
-	bool			conflicts;	/* T if has dependent conflicts */
-	enum {
-		AFS_WBACK_SYNCING,		/* synchronisation being performed */
-		AFS_WBACK_PENDING,		/* write pending */
-		AFS_WBACK_CONFLICTING,		/* conflicting writes posted */
-		AFS_WBACK_WRITING,		/* writing back */
-		AFS_WBACK_COMPLETE		/* the writeback record has been unlinked */
-	} state __attribute__((packed));
-};
-
 /*
  * AFS superblock private data
  * - there's one superblock per volume
@@ -460,7 +443,7 @@ struct afs_vnode {
 	struct afs_permits	*permit_cache;	/* cache of permits so far obtained */
 	struct mutex		io_lock;	/* Lock for serialising I/O on this mutex */
 	struct mutex		validate_lock;	/* lock for validating this vnode */
-	spinlock_t		writeback_lock;	/* lock for writebacks */
+	spinlock_t		wb_lock;	/* lock for wb_keys */
 	spinlock_t		lock;		/* waitqueue/flags lock */
 	unsigned long		flags;
 #define AFS_VNODE_CB_PROMISED	0		/* Set if vnode has a callback promise */
@@ -476,7 +459,7 @@ struct afs_vnode {
 #define AFS_VNODE_AUTOCELL	10		/* set if Vnode is an auto mount point */
 #define AFS_VNODE_PSEUDODIR	11		/* set if Vnode is a pseudo directory */
 
-	struct list_head	writebacks;	/* alterations in pagecache that need writing */
+	struct list_head	wb_keys;	/* List of keys available for writeback */
 	struct list_head	pending_locks;	/* locks waiting to be granted */
 	struct list_head	granted_locks;	/* locks granted on this file */
 	struct delayed_work	lock_work;	/* work to be done in locking */
@@ -648,6 +631,8 @@ extern const struct address_space_operations afs_fs_aops;
 extern const struct inode_operations afs_file_inode_operations;
 extern const struct file_operations afs_file_operations;
 
+extern int afs_cache_wb_key(struct afs_vnode *, struct afs_file *);
+extern void afs_put_wb_key(struct afs_wb_key *);
 extern int afs_open(struct inode *, struct file *);
 extern int afs_release(struct inode *, struct file *);
 extern int afs_fetch_data(struct afs_vnode *, struct key *, struct afs_read *);
@@ -678,7 +663,7 @@ extern int afs_fs_symlink(struct afs_fs_cursor *, const char *, const char *,
 			  struct afs_fid *, struct afs_file_status *);
 extern int afs_fs_rename(struct afs_fs_cursor *, const char *,
 			 struct afs_vnode *, const char *);
-extern int afs_fs_store_data(struct afs_fs_cursor *, struct afs_writeback *,
+extern int afs_fs_store_data(struct afs_fs_cursor *, struct address_space *,
 			     pgoff_t, pgoff_t, unsigned, unsigned);
 extern int afs_fs_setattr(struct afs_fs_cursor *, struct iattr *);
 extern int afs_fs_get_volume_status(struct afs_fs_cursor *, struct afs_volume_status *);
@@ -889,7 +874,6 @@ extern int afs_check_volume_status(struct afs_volume *, struct key *);
  * write.c
  */
 extern int afs_set_page_dirty(struct page *);
-extern void afs_put_writeback(struct afs_writeback *);
 extern int afs_write_begin(struct file *file, struct address_space *mapping,
 			loff_t pos, unsigned len, unsigned flags,
 			struct page **pagep, void **fsdata);
@@ -900,9 +884,10 @@ extern int afs_writepage(struct page *, struct writeback_control *);
 extern int afs_writepages(struct address_space *, struct writeback_control *);
 extern void afs_pages_written_back(struct afs_vnode *, struct afs_call *);
 extern ssize_t afs_file_write(struct kiocb *, struct iov_iter *);
-extern int afs_writeback_all(struct afs_vnode *);
 extern int afs_flush(struct file *, fl_owner_t);
 extern int afs_fsync(struct file *, loff_t, loff_t, int);
+extern void afs_prune_wb_keys(struct afs_vnode *);
+extern int afs_launder_page(struct page *);
 
 /*
  * xattr.c
diff --git a/fs/afs/super.c b/fs/afs/super.c
index af1e769aaebf..875b5eb02242 100644
--- a/fs/afs/super.c
+++ b/fs/afs/super.c
@@ -546,9 +546,9 @@ static void afs_i_init_once(void *_vnode)
 	inode_init_once(&vnode->vfs_inode);
 	mutex_init(&vnode->io_lock);
 	mutex_init(&vnode->validate_lock);
-	spin_lock_init(&vnode->writeback_lock);
+	spin_lock_init(&vnode->wb_lock);
 	spin_lock_init(&vnode->lock);
-	INIT_LIST_HEAD(&vnode->writebacks);
+	INIT_LIST_HEAD(&vnode->wb_keys);
 	INIT_LIST_HEAD(&vnode->pending_locks);
 	INIT_LIST_HEAD(&vnode->granted_locks);
 	INIT_DELAYED_WORK(&vnode->lock_work, afs_lock_work);
diff --git a/fs/afs/write.c b/fs/afs/write.c
index 1cdd0e3cd531..4c131371005b 100644
--- a/fs/afs/write.c
+++ b/fs/afs/write.c
@@ -8,6 +8,7 @@
  * as published by the Free Software Foundation; either version
  * 2 of the License, or (at your option) any later version.
  */
+
 #include <linux/backing-dev.h>
 #include <linux/slab.h>
 #include <linux/fs.h>
@@ -16,8 +17,18 @@
 #include <linux/pagevec.h>
 #include "internal.h"
 
-static int afs_write_back_from_locked_page(struct afs_writeback *wb,
-					   struct page *page);
+/*
+ * We use page->private to hold the amount of the page that we've written to,
+ * splitting the field into two parts.  However, we need to represent a range
+ * 0...PAGE_SIZE inclusive, so we can't support 64K pages on a 32-bit system.
+ */
+#if PAGE_SIZE > 32768
+#define AFS_PRIV_MAX	0xffffffff
+#define AFS_PRIV_SHIFT	32
+#else
+#define AFS_PRIV_MAX	0xffff
+#define AFS_PRIV_SHIFT	16
+#endif
 
 /*
  * mark a page as having been made dirty and thus needing writeback
@@ -28,58 +39,6 @@ int afs_set_page_dirty(struct page *page)
 	return __set_page_dirty_nobuffers(page);
 }
 
-/*
- * unlink a writeback record because its usage has reached zero
- * - must be called with the wb->vnode->writeback_lock held
- */
-static void afs_unlink_writeback(struct afs_writeback *wb)
-{
-	struct afs_writeback *front;
-	struct afs_vnode *vnode = wb->vnode;
-
-	list_del_init(&wb->link);
-	if (!list_empty(&vnode->writebacks)) {
-		/* if an fsync rises to the front of the queue then wake it
-		 * up */
-		front = list_entry(vnode->writebacks.next,
-				   struct afs_writeback, link);
-		if (front->state == AFS_WBACK_SYNCING) {
-			_debug("wake up sync");
-			front->state = AFS_WBACK_COMPLETE;
-			wake_up(&front->waitq);
-		}
-	}
-}
-
-/*
- * free a writeback record
- */
-static void afs_free_writeback(struct afs_writeback *wb)
-{
-	_enter("");
-	key_put(wb->key);
-	kfree(wb);
-}
-
-/*
- * dispose of a reference to a writeback record
- */
-void afs_put_writeback(struct afs_writeback *wb)
-{
-	struct afs_vnode *vnode = wb->vnode;
-
-	_enter("{%d}", wb->usage);
-
-	spin_lock(&vnode->writeback_lock);
-	if (--wb->usage == 0)
-		afs_unlink_writeback(wb);
-	else
-		wb = NULL;
-	spin_unlock(&vnode->writeback_lock);
-	if (wb)
-		afs_free_writeback(wb);
-}
-
 /*
  * partly or wholly fill a page that's under preparation for writing
  */
@@ -125,42 +84,32 @@ int afs_write_begin(struct file *file, struct address_space *mapping,
 		    loff_t pos, unsigned len, unsigned flags,
 		    struct page **pagep, void **fsdata)
 {
-	struct afs_writeback *candidate, *wb;
 	struct afs_vnode *vnode = AFS_FS_I(file_inode(file));
 	struct page *page;
 	struct key *key = afs_file_key(file);
-	unsigned from = pos & (PAGE_SIZE - 1);
-	unsigned to = from + len;
+	unsigned long priv;
+	unsigned f, from = pos & (PAGE_SIZE - 1);
+	unsigned t, to = from + len;
 	pgoff_t index = pos >> PAGE_SHIFT;
 	int ret;
 
 	_enter("{%x:%u},{%lx},%u,%u",
 	       vnode->fid.vid, vnode->fid.vnode, index, from, to);
 
-	candidate = kzalloc(sizeof(*candidate), GFP_KERNEL);
-	if (!candidate)
-		return -ENOMEM;
-	candidate->vnode = vnode;
-	candidate->first = candidate->last = index;
-	candidate->offset_first = from;
-	candidate->to_last = to;
-	INIT_LIST_HEAD(&candidate->link);
-	candidate->usage = 1;
-	candidate->state = AFS_WBACK_PENDING;
-	init_waitqueue_head(&candidate->waitq);
+	/* We want to store information about how much of a page is altered in
+	 * page->private.
+	 */
+	BUILD_BUG_ON(PAGE_SIZE > 32768 && sizeof(page->private) < 8);
 
 	page = grab_cache_page_write_begin(mapping, index, flags);
-	if (!page) {
-		kfree(candidate);
+	if (!page)
 		return -ENOMEM;
-	}
 
 	if (!PageUptodate(page) && len != PAGE_SIZE) {
 		ret = afs_fill_page(vnode, key, pos & PAGE_MASK, PAGE_SIZE, page);
 		if (ret < 0) {
 			unlock_page(page);
 			put_page(page);
-			kfree(candidate);
 			_leave(" = %d [prep]", ret);
 			return ret;
 		}
@@ -171,79 +120,52 @@ int afs_write_begin(struct file *file, struct address_space *mapping,
 	*pagep = page;
 
 try_again:
-	spin_lock(&vnode->writeback_lock);
-
-	/* see if this page is already pending a writeback under a suitable key
-	 * - if so we can just join onto that one */
-	wb = (struct afs_writeback *) page_private(page);
-	if (wb) {
-		if (wb->key == key && wb->state == AFS_WBACK_PENDING)
-			goto subsume_in_current_wb;
-		goto flush_conflicting_wb;
+	/* See if this page is already partially written in a way that we can
+	 * merge the new write with.
+	 */
+	t = f = 0;
+	if (PagePrivate(page)) {
+		priv = page_private(page);
+		f = priv & AFS_PRIV_MAX;
+		t = priv >> AFS_PRIV_SHIFT;
+		ASSERTCMP(f, <=, t);
 	}
 
-	if (index > 0) {
-		/* see if we can find an already pending writeback that we can
-		 * append this page to */
-		list_for_each_entry(wb, &vnode->writebacks, link) {
-			if (wb->last == index - 1 && wb->key == key &&
-			    wb->state == AFS_WBACK_PENDING)
-				goto append_to_previous_wb;
-		}
+	if (f != t) {
+		if (to < f || from > t)
+			goto flush_conflicting_write;
+		if (from < f)
+			f = from;
+		if (to > t)
+			t = to;
+	} else {
+		f = from;
+		t = to;
 	}
 
-	list_add_tail(&candidate->link, &vnode->writebacks);
-	candidate->key = key_get(key);
-	spin_unlock(&vnode->writeback_lock);
+	priv = (unsigned long)t << AFS_PRIV_SHIFT;
+	priv |= f;
 	SetPagePrivate(page);
-	set_page_private(page, (unsigned long) candidate);
-	_leave(" = 0 [new]");
-	return 0;
-
-subsume_in_current_wb:
-	_debug("subsume");
-	ASSERTRANGE(wb->first, <=, index, <=, wb->last);
-	if (index == wb->first && from < wb->offset_first)
-		wb->offset_first = from;
-	if (index == wb->last && to > wb->to_last)
-		wb->to_last = to;
-	spin_unlock(&vnode->writeback_lock);
-	kfree(candidate);
-	_leave(" = 0 [sub]");
-	return 0;
-
-append_to_previous_wb:
-	_debug("append into %lx-%lx", wb->first, wb->last);
-	wb->usage++;
-	wb->last++;
-	wb->to_last = to;
-	spin_unlock(&vnode->writeback_lock);
-	SetPagePrivate(page);
-	set_page_private(page, (unsigned long) wb);
-	kfree(candidate);
-	_leave(" = 0 [app]");
+	set_page_private(page, priv);
+	_leave(" = 0");
 	return 0;
 
-	/* the page is currently bound to another context, so if it's dirty we
-	 * need to flush it before we can use the new context */
-flush_conflicting_wb:
+	/* The previous write and this write aren't adjacent or overlapping, so
+	 * flush the page out.
+	 */
+flush_conflicting_write:
 	_debug("flush conflict");
-	if (wb->state == AFS_WBACK_PENDING)
-		wb->state = AFS_WBACK_CONFLICTING;
-	spin_unlock(&vnode->writeback_lock);
-	if (clear_page_dirty_for_io(page)) {
-		ret = afs_write_back_from_locked_page(wb, page);
-		if (ret < 0) {
-			afs_put_writeback(candidate);
-			_leave(" = %d", ret);
-			return ret;
-		}
+	ret = write_one_page(page);
+	if (ret < 0) {
+		_leave(" = %d", ret);
+		return ret;
 	}
 
-	/* the page holds a ref on the writeback record */
-	afs_put_writeback(wb);
-	set_page_private(page, 0);
-	ClearPagePrivate(page);
+	ret = lock_page_killable(page);
+	if (ret < 0) {
+		_leave(" = %d", ret);
+		return ret;
+	}
 	goto try_again;
 }
 
@@ -266,11 +188,11 @@ int afs_write_end(struct file *file, struct address_space *mapping,
 
 	i_size = i_size_read(&vnode->vfs_inode);
 	if (maybe_i_size > i_size) {
-		spin_lock(&vnode->writeback_lock);
+		spin_lock(&vnode->wb_lock);
 		i_size = i_size_read(&vnode->vfs_inode);
 		if (maybe_i_size > i_size)
 			i_size_write(&vnode->vfs_inode, maybe_i_size);
-		spin_unlock(&vnode->writeback_lock);
+		spin_unlock(&vnode->wb_lock);
 	}
 
 	if (!PageUptodate(page)) {
@@ -299,9 +221,10 @@ int afs_write_end(struct file *file, struct address_space *mapping,
 /*
  * kill all the pages in the given range
  */
-static void afs_kill_pages(struct afs_vnode *vnode, bool error,
+static void afs_kill_pages(struct address_space *mapping,
 			   pgoff_t first, pgoff_t last)
 {
+	struct afs_vnode *vnode = AFS_FS_I(mapping->host);
 	struct pagevec pv;
 	unsigned count, loop;
 
@@ -316,23 +239,62 @@ static void afs_kill_pages(struct afs_vnode *vnode, bool error,
 		count = last - first + 1;
 		if (count > PAGEVEC_SIZE)
 			count = PAGEVEC_SIZE;
-		pv.nr = find_get_pages_contig(vnode->vfs_inode.i_mapping,
-					      first, count, pv.pages);
+		pv.nr = find_get_pages_contig(mapping, first, count, pv.pages);
 		ASSERTCMP(pv.nr, ==, count);
 
 		for (loop = 0; loop < count; loop++) {
 			struct page *page = pv.pages[loop];
 			ClearPageUptodate(page);
-			if (error)
-				SetPageError(page);
-			if (PageWriteback(page))
-				end_page_writeback(page);
+			SetPageError(page);
+			end_page_writeback(page);
 			if (page->index >= first)
 				first = page->index + 1;
+			lock_page(page);
+			generic_error_remove_page(mapping, page);
 		}
 
 		__pagevec_release(&pv);
-	} while (first < last);
+	} while (first <= last);
+
+	_leave("");
+}
+
+/*
+ * Redirty all the pages in a given range.
+ */
+static void afs_redirty_pages(struct writeback_control *wbc,
+			      struct address_space *mapping,
+			      pgoff_t first, pgoff_t last)
+{
+	struct afs_vnode *vnode = AFS_FS_I(mapping->host);
+	struct pagevec pv;
+	unsigned count, loop;
+
+	_enter("{%x:%u},%lx-%lx",
+	       vnode->fid.vid, vnode->fid.vnode, first, last);
+
+	pagevec_init(&pv, 0);
+
+	do {
+		_debug("redirty %lx-%lx", first, last);
+
+		count = last - first + 1;
+		if (count > PAGEVEC_SIZE)
+			count = PAGEVEC_SIZE;
+		pv.nr = find_get_pages_contig(mapping, first, count, pv.pages);
+		ASSERTCMP(pv.nr, ==, count);
+
+		for (loop = 0; loop < count; loop++) {
+			struct page *page = pv.pages[loop];
+
+			redirty_page_for_writepage(wbc, page);
+			end_page_writeback(page);
+			if (page->index >= first)
+				first = page->index + 1;
+		}
+
+		__pagevec_release(&pv);
+	} while (first <= last);
 
 	_leave("");
 }
@@ -340,26 +302,55 @@ static void afs_kill_pages(struct afs_vnode *vnode, bool error,
 /*
  * write to a file
  */
-static int afs_store_data(struct afs_writeback *wb, pgoff_t first, pgoff_t last,
+static int afs_store_data(struct address_space *mapping,
+			  pgoff_t first, pgoff_t last,
 			  unsigned offset, unsigned to)
 {
+	struct afs_vnode *vnode = AFS_FS_I(mapping->host);
 	struct afs_fs_cursor fc;
-	struct afs_vnode *vnode = wb->vnode;
-	int ret;
+	struct afs_wb_key *wbk = NULL;
+	struct list_head *p;
+	int ret = -ENOKEY, ret2;
 
-	_enter("%s{%x:%u.%u},%x,%lx,%lx,%x,%x",
+	_enter("%s{%x:%u.%u},%lx,%lx,%x,%x",
 	       vnode->volume->name,
 	       vnode->fid.vid,
 	       vnode->fid.vnode,
 	       vnode->fid.unique,
-	       key_serial(wb->key),
 	       first, last, offset, to);
 
+	spin_lock(&vnode->wb_lock);
+	p = vnode->wb_keys.next;
+
+	/* Iterate through the list looking for a valid key to use. */
+try_next_key:
+	while (p != &vnode->wb_keys) {
+		wbk = list_entry(p, struct afs_wb_key, vnode_link);
+		_debug("wbk %u", key_serial(wbk->key));
+		ret2 = key_validate(wbk->key);
+		if (ret2 == 0)
+			goto found_key;
+		if (ret == -ENOKEY)
+			ret = ret2;
+		p = p->next;
+	}
+
+	spin_unlock(&vnode->wb_lock);
+	afs_put_wb_key(wbk);
+	_leave(" = %d [no keys]", ret);
+	return ret;
+
+found_key:
+	refcount_inc(&wbk->usage);
+	spin_unlock(&vnode->wb_lock);
+
+	_debug("USE WB KEY %u", key_serial(wbk->key));
+
 	ret = -ERESTARTSYS;
-	if (afs_begin_vnode_operation(&fc, vnode, wb->key)) {
+	if (afs_begin_vnode_operation(&fc, vnode, wbk->key)) {
 		while (afs_select_fileserver(&fc)) {
 			fc.cb_break = vnode->cb_break + vnode->cb_s_break;
-			afs_fs_store_data(&fc, wb, first, last, offset, to);
+			afs_fs_store_data(&fc, mapping, first, last, offset, to);
 		}
 
 		afs_check_for_remote_deletion(&fc, fc.vnode);
@@ -367,20 +358,37 @@ static int afs_store_data(struct afs_writeback *wb, pgoff_t first, pgoff_t last,
 		ret = afs_end_vnode_operation(&fc);
 	}
 
+	switch (ret) {
+	case -EACCES:
+	case -EPERM:
+	case -ENOKEY:
+	case -EKEYEXPIRED:
+	case -EKEYREJECTED:
+	case -EKEYREVOKED:
+		_debug("next");
+		spin_lock(&vnode->wb_lock);
+		p = wbk->vnode_link.next;
+		afs_put_wb_key(wbk);
+		goto try_next_key;
+	}
+
+	afs_put_wb_key(wbk);
 	_leave(" = %d", ret);
 	return ret;
 }
 
 /*
- * synchronously write back the locked page and any subsequent non-locked dirty
- * pages also covered by the same writeback record
+ * Synchronously write back the locked page and any subsequent non-locked dirty
+ * pages.
  */
-static int afs_write_back_from_locked_page(struct afs_writeback *wb,
-					   struct page *primary_page)
+static int afs_write_back_from_locked_page(struct address_space *mapping,
+					   struct writeback_control *wbc,
+					   struct page *primary_page,
+					   pgoff_t final_page)
 {
 	struct page *pages[8], *page;
-	unsigned long count;
-	unsigned n, offset, to;
+	unsigned long count, priv;
+	unsigned n, offset, to, f, t;
 	pgoff_t start, first, last;
 	int loop, ret;
 
@@ -390,20 +398,28 @@ static int afs_write_back_from_locked_page(struct afs_writeback *wb,
 	if (test_set_page_writeback(primary_page))
 		BUG();
 
-	/* find all consecutive lockable dirty pages, stopping when we find a
-	 * page that is not immediately lockable, is not dirty or is missing,
-	 * or we reach the end of the range */
+	/* Find all consecutive lockable dirty pages that have contiguous
+	 * written regions, stopping when we find a page that is not
+	 * immediately lockable, is not dirty or is missing, or we reach the
+	 * end of the range.
+	 */
 	start = primary_page->index;
-	if (start >= wb->last)
+	priv = page_private(primary_page);
+	offset = priv & AFS_PRIV_MAX;
+	to = priv >> AFS_PRIV_SHIFT;
+
+	WARN_ON(offset == to);
+
+	if (start >= final_page || to < PAGE_SIZE)
 		goto no_more;
+
 	start++;
 	do {
 		_debug("more %lx [%lx]", start, count);
-		n = wb->last - start + 1;
+		n = final_page - start + 1;
 		if (n > ARRAY_SIZE(pages))
 			n = ARRAY_SIZE(pages);
-		n = find_get_pages_contig(wb->vnode->vfs_inode.i_mapping,
-					  start, n, pages);
+		n = find_get_pages_contig(mapping, start, ARRAY_SIZE(pages), pages);
 		_debug("fgpc %u", n);
 		if (n == 0)
 			goto no_more;
@@ -415,16 +431,27 @@ static int afs_write_back_from_locked_page(struct afs_writeback *wb,
 		}
 
 		for (loop = 0; loop < n; loop++) {
+			if (to != PAGE_SIZE)
+				break;
 			page = pages[loop];
-			if (page->index > wb->last)
+			if (page->index > final_page)
 				break;
 			if (!trylock_page(page))
 				break;
-			if (!PageDirty(page) ||
-			    page_private(page) != (unsigned long) wb) {
+			if (!PageDirty(page) || PageWriteback(page)) {
+				unlock_page(page);
+				break;
+			}
+
+			priv = page_private(page);
+			f = priv & AFS_PRIV_MAX;
+			t = priv >> AFS_PRIV_SHIFT;
+			if (f != 0) {
 				unlock_page(page);
 				break;
 			}
+			to = t;
+
 			if (!clear_page_dirty_for_io(page))
 				BUG();
 			if (test_set_page_writeback(page))
@@ -440,50 +467,55 @@ static int afs_write_back_from_locked_page(struct afs_writeback *wb,
 		}
 
 		start += loop;
-	} while (start <= wb->last && count < 65536);
+	} while (start <= final_page && count < 65536);
 
 no_more:
-	/* we now have a contiguous set of dirty pages, each with writeback set
-	 * and the dirty mark cleared; the first page is locked and must remain
-	 * so, all the rest are unlocked */
+	/* We now have a contiguous set of dirty pages, each with writeback
+	 * set; the first page is still locked at this point, but all the rest
+	 * have been unlocked.
+	 */
+	unlock_page(primary_page);
+
 	first = primary_page->index;
 	last = first + count - 1;
 
-	offset = (first == wb->first) ? wb->offset_first : 0;
-	to = (last == wb->last) ? wb->to_last : PAGE_SIZE;
-
 	_debug("write back %lx[%u..] to %lx[..%u]", first, offset, last, to);
 
-	ret = afs_store_data(wb, first, last, offset, to);
-	if (ret < 0) {
-		switch (ret) {
-		case -EDQUOT:
-		case -ENOSPC:
-			mapping_set_error(wb->vnode->vfs_inode.i_mapping, -ENOSPC);
-			break;
-		case -EROFS:
-		case -EIO:
-		case -EREMOTEIO:
-		case -EFBIG:
-		case -ENOENT:
-		case -ENOMEDIUM:
-		case -ENXIO:
-			afs_kill_pages(wb->vnode, true, first, last);
-			mapping_set_error(wb->vnode->vfs_inode.i_mapping, -EIO);
-			break;
-		case -EACCES:
-		case -EPERM:
-		case -ENOKEY:
-		case -EKEYEXPIRED:
-		case -EKEYREJECTED:
-		case -EKEYREVOKED:
-			afs_kill_pages(wb->vnode, false, first, last);
-			break;
-		default:
-			break;
-		}
-	} else {
+	ret = afs_store_data(mapping, first, last, offset, to);
+	switch (ret) {
+	case 0:
 		ret = count;
+		break;
+
+	default:
+		pr_notice("kAFS: Unexpected error from FS.StoreData %d\n", ret);
+		/* Fall through */
+	case -EACCES:
+	case -EPERM:
+	case -ENOKEY:
+	case -EKEYEXPIRED:
+	case -EKEYREJECTED:
+	case -EKEYREVOKED:
+		afs_redirty_pages(wbc, mapping, first, last);
+		mapping_set_error(mapping, ret);
+		break;
+
+	case -EDQUOT:
+	case -ENOSPC:
+		afs_redirty_pages(wbc, mapping, first, last);
+		mapping_set_error(mapping, -ENOSPC);
+		break;
+
+	case -EROFS:
+	case -EIO:
+	case -EREMOTEIO:
+	case -EFBIG:
+	case -ENOENT:
+	case -ENOMEDIUM:
+	case -ENXIO:
+		afs_kill_pages(mapping, first, last);
+		mapping_set_error(mapping, ret);
+		break;
 	}
 
 	_leave(" = %d", ret);
@@ -496,16 +528,12 @@ no_more:
  */
 int afs_writepage(struct page *page, struct writeback_control *wbc)
 {
-	struct afs_writeback *wb;
 	int ret;
 
 	_enter("{%lx},", page->index);
 
-	wb = (struct afs_writeback *) page_private(page);
-	ASSERT(wb != NULL);
-
-	ret = afs_write_back_from_locked_page(wb, page);
-	unlock_page(page);
+	ret = afs_write_back_from_locked_page(page->mapping, wbc, page,
+					      wbc->range_end >> PAGE_SHIFT);
 	if (ret < 0) {
 		_leave(" = %d", ret);
 		return 0;
@@ -524,7 +552,6 @@ static int afs_writepages_region(struct address_space *mapping,
 				 struct writeback_control *wbc,
 				 pgoff_t index, pgoff_t end, pgoff_t *_next)
 {
-	struct afs_writeback *wb;
 	struct page *page;
 	int ret, n;
 
@@ -550,7 +577,12 @@ static int afs_writepages_region(struct address_space *mapping,
 		 * (changing page->mapping to NULL), or even swizzled back from
 		 * swapper_space to tmpfs file mapping
 		 */
-		lock_page(page);
+		ret = lock_page_killable(page);
+		if (ret < 0) {
+			put_page(page);
+			_leave(" = %d", ret);
+			return ret;
+		}
 
 		if (page->mapping != mapping || !PageDirty(page)) {
 			unlock_page(page);
@@ -566,17 +598,9 @@ static int afs_writepages_region(struct address_space *mapping,
 			continue;
 		}
 
-		wb = (struct afs_writeback *) page_private(page);
-		ASSERT(wb != NULL);
-
-		spin_lock(&wb->vnode->writeback_lock);
-		wb->state = AFS_WBACK_WRITING;
-		spin_unlock(&wb->vnode->writeback_lock);
-
 		if (!clear_page_dirty_for_io(page))
 			BUG();
-		ret = afs_write_back_from_locked_page(wb, page);
-		unlock_page(page);
+		ret = afs_write_back_from_locked_page(mapping, wbc, page, end);
 		put_page(page);
 		if (ret < 0) {
 			_leave(" = %d", ret);
@@ -632,17 +656,13 @@ int afs_writepages(struct address_space *mapping,
  */
 void afs_pages_written_back(struct afs_vnode *vnode, struct afs_call *call)
 {
-	struct afs_writeback *wb = call->wb;
 	struct pagevec pv;
 	unsigned count, loop;
 	pgoff_t first = call->first, last = call->last;
-	bool free_wb;
 
 	_enter("{%x:%u},{%lx-%lx}",
 	       vnode->fid.vid, vnode->fid.vnode, first, last);
 
-	ASSERT(wb != NULL);
-
 	pagevec_init(&pv, 0);
 
 	do {
@@ -651,35 +671,19 @@ void afs_pages_written_back(struct afs_vnode *vnode, struct afs_call *call)
 		count = last - first + 1;
 		if (count > PAGEVEC_SIZE)
 			count = PAGEVEC_SIZE;
-		pv.nr = find_get_pages_contig(call->mapping, first, count,
-					      pv.pages);
+		pv.nr = find_get_pages_contig(vnode->vfs_inode.i_mapping,
+					      first, count, pv.pages);
 		ASSERTCMP(pv.nr, ==, count);
 
-		spin_lock(&vnode->writeback_lock);
 		for (loop = 0; loop < count; loop++) {
-			struct page *page = pv.pages[loop];
-			end_page_writeback(page);
-			if (page_private(page) == (unsigned long) wb) {
-				set_page_private(page, 0);
-				ClearPagePrivate(page);
-				wb->usage--;
-			}
+			set_page_private(pv.pages[loop], 0);
+			end_page_writeback(pv.pages[loop]);
 		}
-		free_wb = false;
-		if (wb->usage == 0) {
-			afs_unlink_writeback(wb);
-			free_wb = true;
-		}
-		spin_unlock(&vnode->writeback_lock);
 		first += count;
-		if (free_wb) {
-			afs_free_writeback(wb);
-			wb = NULL;
-		}
-
 		__pagevec_release(&pv);
 	} while (first <= last);
 
+	afs_prune_wb_keys(vnode);
 	_leave("");
 }
 
@@ -710,28 +714,6 @@ ssize_t afs_file_write(struct kiocb *iocb, struct iov_iter *from)
 	return result;
 }
 
-/*
- * flush the vnode to the fileserver
- */
-int afs_writeback_all(struct afs_vnode *vnode)
-{
-	struct address_space *mapping = vnode->vfs_inode.i_mapping;
-	struct writeback_control wbc = {
-		.sync_mode	= WB_SYNC_ALL,
-		.nr_to_write	= LONG_MAX,
-		.range_cyclic	= 1,
-	};
-	int ret;
-
-	_enter("");
-
-	ret = mapping->a_ops->writepages(mapping, &wbc);
-	__mark_inode_dirty(mapping->host, I_DIRTY_PAGES);
-
-	_leave(" = %d", ret);
-	return ret;
-}
-
 /*
  * flush any dirty pages for this process, and check for write errors.
  * - the return status from this call provides a reliable indication of
@@ -740,61 +722,13 @@ int afs_writeback_all(struct afs_vnode *vnode)
 int afs_fsync(struct file *file, loff_t start, loff_t end, int datasync)
 {
 	struct inode *inode = file_inode(file);
-	struct afs_writeback *wb, *xwb;
 	struct afs_vnode *vnode = AFS_FS_I(inode);
-	int ret;
 
 	_enter("{%x:%u},{n=%pD},%d",
 	       vnode->fid.vid, vnode->fid.vnode, file,
 	       datasync);
 
-	ret = file_write_and_wait_range(file, start, end);
-	if (ret)
-		return ret;
-	inode_lock(inode);
-
-	/* use a writeback record as a marker in the queue - when this reaches
-	 * the front of the queue, all the outstanding writes are either
-	 * completed or rejected */
-	wb = kzalloc(sizeof(*wb), GFP_KERNEL);
-	if (!wb) {
-		ret = -ENOMEM;
-		goto out;
-	}
-	wb->vnode = vnode;
-	wb->first = 0;
-	wb->last = -1;
-	wb->offset_first = 0;
-	wb->to_last = PAGE_SIZE;
-	wb->usage = 1;
-	wb->state = AFS_WBACK_SYNCING;
-	init_waitqueue_head(&wb->waitq);
-
-	spin_lock(&vnode->writeback_lock);
-	list_for_each_entry(xwb, &vnode->writebacks, link) {
-		if (xwb->state == AFS_WBACK_PENDING)
-			xwb->state = AFS_WBACK_CONFLICTING;
-	}
-	list_add_tail(&wb->link, &vnode->writebacks);
-	spin_unlock(&vnode->writeback_lock);
-
-	/* push all the outstanding writebacks to the server */
-	ret = afs_writeback_all(vnode);
-	if (ret < 0) {
-		afs_put_writeback(wb);
-		_leave(" = %d [wb]", ret);
-		goto out;
-	}
-
-	/* wait for the preceding writes to actually complete */
-	ret = wait_event_interruptible(wb->waitq,
-				       wb->state == AFS_WBACK_COMPLETE ||
-				       vnode->writebacks.next == &wb->link);
-	afs_put_writeback(wb);
-	_leave(" = %d", ret);
-out:
-	inode_unlock(inode);
-	return ret;
+	return file_write_and_wait_range(file, start, end);
 }
 
 /*
@@ -831,3 +765,68 @@ int afs_page_mkwrite(struct vm_area_struct *vma, struct page *page)
 	_leave(" = 0");
 	return 0;
 }
+
+/*
+ * Prune the keys cached for writeback.  The caller must hold vnode->wb_lock.
+ */
+void afs_prune_wb_keys(struct afs_vnode *vnode)
+{
+	LIST_HEAD(graveyard);
+	struct afs_wb_key *wbk, *tmp;
+
+	/* Discard unused keys */
+	spin_lock(&vnode->wb_lock);
+
+	if (!mapping_tagged(&vnode->vfs_inode.i_data, PAGECACHE_TAG_WRITEBACK) &&
+	    !mapping_tagged(&vnode->vfs_inode.i_data, PAGECACHE_TAG_DIRTY)) {
+		list_for_each_entry_safe(wbk, tmp, &vnode->wb_keys, vnode_link) {
+			if (refcount_read(&wbk->usage) == 1)
+				list_move(&wbk->vnode_link, &graveyard);
+		}
+	}
+
+	spin_unlock(&vnode->wb_lock);
+
+	while (!list_empty(&graveyard)) {
+		wbk = list_entry(graveyard.next, struct afs_wb_key, vnode_link);
+		list_del(&wbk->vnode_link);
+		afs_put_wb_key(wbk);
+	}
+}
+
+/*
+ * Clean up a page during invalidation.
+ */
+int afs_launder_page(struct page *page)
+{
+	struct address_space *mapping = page->mapping;
+	struct afs_vnode *vnode = AFS_FS_I(mapping->host);
+	unsigned long priv;
+	unsigned int f, t;
+	int ret = 0;
+
+	_enter("{%lx}", page->index);
+
+	priv = page_private(page);
+	if (clear_page_dirty_for_io(page)) {
+		f = 0;
+		t = PAGE_SIZE;
+		if (PagePrivate(page)) {
+			f = priv & AFS_PRIV_MAX;
+			t = priv >> AFS_PRIV_SHIFT;
+		}
+
+		ret = afs_store_data(mapping, page->index, page->index, t, f);
+	}
+
+	set_page_private(page, 0);
+	ClearPagePrivate(page);
+
+#ifdef CONFIG_AFS_FSCACHE
+	if (PageFsCache(page)) {
+		fscache_wait_on_page_write(vnode->cache, page);
+		fscache_uncache_page(vnode->cache, page);
+	}
+#endif
+	return ret;
+}
-- 
cgit v1.2.3


From 1cf7a1518aefa69ac6ba0c3f9206073e4221e3c8 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Thu, 2 Nov 2017 15:27:52 +0000
Subject: afs: Implement shared-writeable mmap

Implement shared-writeable mmap for AFS.

Signed-off-by: David Howells <dhowells@redhat.com>
---
 fs/afs/file.c     | 22 +++++++++++++++++++++-
 fs/afs/internal.h |  1 +
 fs/afs/write.c    | 40 ++++++++++++++++++++++++++++++++--------
 3 files changed, 54 insertions(+), 9 deletions(-)

(limited to 'fs/afs')

diff --git a/fs/afs/file.c b/fs/afs/file.c
index c3a7bc1281f5..675c5c268a52 100644
--- a/fs/afs/file.c
+++ b/fs/afs/file.c
@@ -19,6 +19,7 @@
 #include <linux/task_io_accounting_ops.h>
 #include "internal.h"
 
+static int afs_file_mmap(struct file *file, struct vm_area_struct *vma);
 static int afs_readpage(struct file *file, struct page *page);
 static void afs_invalidatepage(struct page *page, unsigned int offset,
 			       unsigned int length);
@@ -34,7 +35,7 @@ const struct file_operations afs_file_operations = {
 	.llseek		= generic_file_llseek,
 	.read_iter	= generic_file_read_iter,
 	.write_iter	= afs_file_write,
-	.mmap		= generic_file_readonly_mmap,
+	.mmap		= afs_file_mmap,
 	.splice_read	= generic_file_splice_read,
 	.fsync		= afs_fsync,
 	.lock		= afs_lock,
@@ -61,6 +62,12 @@ const struct address_space_operations afs_fs_aops = {
 	.writepages	= afs_writepages,
 };
 
+static const struct vm_operations_struct afs_vm_ops = {
+	.fault		= filemap_fault,
+	.map_pages	= filemap_map_pages,
+	.page_mkwrite	= afs_page_mkwrite,
+};
+
 /*
  * Discard a pin on a writeback key.
  */
@@ -629,3 +636,16 @@ static int afs_releasepage(struct page *page, gfp_t gfp_flags)
 	_leave(" = T");
 	return 1;
 }
+
+/*
+ * Handle setting up a memory mapping on an AFS file.
+ */
+static int afs_file_mmap(struct file *file, struct vm_area_struct *vma)
+{
+	int ret;
+
+	ret = generic_file_mmap(file, vma);
+	if (ret == 0)
+		vma->vm_ops = &afs_vm_ops;
+	return ret;
+}
diff --git a/fs/afs/internal.h b/fs/afs/internal.h
index 688562ae3bf8..1de36e6abd5e 100644
--- a/fs/afs/internal.h
+++ b/fs/afs/internal.h
@@ -886,6 +886,7 @@ extern void afs_pages_written_back(struct afs_vnode *, struct afs_call *);
 extern ssize_t afs_file_write(struct kiocb *, struct iov_iter *);
 extern int afs_flush(struct file *, fl_owner_t);
 extern int afs_fsync(struct file *, loff_t, loff_t, int);
+extern int afs_page_mkwrite(struct vm_fault *);
 extern void afs_prune_wb_keys(struct afs_vnode *);
 extern int afs_launder_page(struct page *);
 
diff --git a/fs/afs/write.c b/fs/afs/write.c
index 4c131371005b..6807277ef956 100644
--- a/fs/afs/write.c
+++ b/fs/afs/write.c
@@ -749,21 +749,45 @@ int afs_flush(struct file *file, fl_owner_t id)
  * notification that a previously read-only page is about to become writable
  * - if it returns an error, the caller will deliver a bus error signal
  */
-int afs_page_mkwrite(struct vm_area_struct *vma, struct page *page)
+int afs_page_mkwrite(struct vm_fault *vmf)
 {
-	struct afs_vnode *vnode = AFS_FS_I(vma->vm_file->f_mapping->host);
+	struct file *file = vmf->vma->vm_file;
+	struct inode *inode = file_inode(file);
+	struct afs_vnode *vnode = AFS_FS_I(inode);
+	unsigned long priv;
 
 	_enter("{{%x:%u}},{%lx}",
-	       vnode->fid.vid, vnode->fid.vnode, page->index);
+	       vnode->fid.vid, vnode->fid.vnode, vmf->page->index);
 
-	/* wait for the page to be written to the cache before we allow it to
-	 * be modified */
+	sb_start_pagefault(inode->i_sb);
+
+	/* Wait for the page to be written to the cache before we allow it to
+	 * be modified.  We then assume the entire page will need writing back.
+	 */
 #ifdef CONFIG_AFS_FSCACHE
-	fscache_wait_on_page_write(vnode->cache, page);
+	fscache_wait_on_page_write(vnode->cache, vmf->page);
 #endif
 
-	_leave(" = 0");
-	return 0;
+	if (PageWriteback(vmf->page) &&
+	    wait_on_page_bit_killable(vmf->page, PG_writeback) < 0)
+		return VM_FAULT_RETRY;
+
+	if (lock_page_killable(vmf->page) < 0)
+		return VM_FAULT_RETRY;
+
+	/* We mustn't change page->private until writeback is complete as that
+	 * details the portion of the page we need to write back and we might
+	 * need to redirty the page if there's a problem.
+	 */
+	wait_on_page_writeback(vmf->page);
+
+	priv = (unsigned long)PAGE_SIZE << AFS_PRIV_SHIFT; /* To */
+	priv |= 0; /* From */
+	SetPagePrivate(vmf->page);
+	set_page_private(vmf->page, priv);
+
+	sb_end_pagefault(inode->i_sb);
+	return VM_FAULT_LOCKED;
 }
 
 /*
-- 
cgit v1.2.3


From 13524ab3c6f41bcd257d28644414297bea8282b7 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Thu, 2 Nov 2017 15:27:53 +0000
Subject: afs: Trace page dirty/clean

Add a trace event that logs the dirtying and cleaning of pages attached to
AFS inodes.

Signed-off-by: David Howells <dhowells@redhat.com>
---
 fs/afs/file.c  | 10 ++++++++++
 fs/afs/write.c | 34 +++++++++++++++++++++-------------
 2 files changed, 31 insertions(+), 13 deletions(-)

(limited to 'fs/afs')

diff --git a/fs/afs/file.c b/fs/afs/file.c
index 675c5c268a52..a39192ced99e 100644
--- a/fs/afs/file.c
+++ b/fs/afs/file.c
@@ -583,6 +583,9 @@ static int afs_readpages(struct file *file, struct address_space *mapping,
 static void afs_invalidatepage(struct page *page, unsigned int offset,
 			       unsigned int length)
 {
+	struct afs_vnode *vnode = AFS_FS_I(page->mapping->host);
+	unsigned long priv;
+
 	_enter("{%lu},%u,%u", page->index, offset, length);
 
 	BUG_ON(!PageLocked(page));
@@ -598,6 +601,9 @@ static void afs_invalidatepage(struct page *page, unsigned int offset,
 #endif
 
 		if (PagePrivate(page)) {
+			priv = page_private(page);
+			trace_afs_page_dirty(vnode, tracepoint_string("inval"),
+					     page->index, priv);
 			set_page_private(page, 0);
 			ClearPagePrivate(page);
 		}
@@ -613,6 +619,7 @@ static void afs_invalidatepage(struct page *page, unsigned int offset,
 static int afs_releasepage(struct page *page, gfp_t gfp_flags)
 {
 	struct afs_vnode *vnode = AFS_FS_I(page->mapping->host);
+	unsigned long priv;
 
 	_enter("{{%x:%u}[%lu],%lx},%x",
 	       vnode->fid.vid, vnode->fid.vnode, page->index, page->flags,
@@ -628,6 +635,9 @@ static int afs_releasepage(struct page *page, gfp_t gfp_flags)
 #endif
 
 	if (PagePrivate(page)) {
+		priv = page_private(page);
+		trace_afs_page_dirty(vnode, tracepoint_string("rel"),
+				     page->index, priv);
 		set_page_private(page, 0);
 		ClearPagePrivate(page);
 	}
diff --git a/fs/afs/write.c b/fs/afs/write.c
index 6807277ef956..4472882f06df 100644
--- a/fs/afs/write.c
+++ b/fs/afs/write.c
@@ -17,19 +17,6 @@
 #include <linux/pagevec.h>
 #include "internal.h"
 
-/*
- * We use page->private to hold the amount of the page that we've written to,
- * splitting the field into two parts.  However, we need to represent a range
- * 0...PAGE_SIZE inclusive, so we can't support 64K pages on a 32-bit system.
- */
-#if PAGE_SIZE > 32768
-#define AFS_PRIV_MAX	0xffffffff
-#define AFS_PRIV_SHIFT	32
-#else
-#define AFS_PRIV_MAX	0xffff
-#define AFS_PRIV_SHIFT	16
-#endif
-
 /*
  * mark a page as having been made dirty and thus needing writeback
  */
@@ -145,6 +132,8 @@ try_again:
 
 	priv = (unsigned long)t << AFS_PRIV_SHIFT;
 	priv |= f;
+	trace_afs_page_dirty(vnode, tracepoint_string("begin"),
+			     page->index, priv);
 	SetPagePrivate(page);
 	set_page_private(page, priv);
 	_leave(" = 0");
@@ -386,6 +375,7 @@ static int afs_write_back_from_locked_page(struct address_space *mapping,
 					   struct page *primary_page,
 					   pgoff_t final_page)
 {
+	struct afs_vnode *vnode = AFS_FS_I(mapping->host);
 	struct page *pages[8], *page;
 	unsigned long count, priv;
 	unsigned n, offset, to, f, t;
@@ -407,8 +397,13 @@ static int afs_write_back_from_locked_page(struct address_space *mapping,
 	priv = page_private(primary_page);
 	offset = priv & AFS_PRIV_MAX;
 	to = priv >> AFS_PRIV_SHIFT;
+	trace_afs_page_dirty(vnode, tracepoint_string("store"),
+			     primary_page->index, priv);
 
 	WARN_ON(offset == to);
+	if (offset == to)
+		trace_afs_page_dirty(vnode, tracepoint_string("WARN"),
+				     primary_page->index, priv);
 
 	if (start >= final_page || to < PAGE_SIZE)
 		goto no_more;
@@ -452,6 +447,9 @@ static int afs_write_back_from_locked_page(struct address_space *mapping,
 			}
 			to = t;
 
+			trace_afs_page_dirty(vnode, tracepoint_string("store+"),
+					     page->index, priv);
+
 			if (!clear_page_dirty_for_io(page))
 				BUG();
 			if (test_set_page_writeback(page))
@@ -657,6 +655,7 @@ int afs_writepages(struct address_space *mapping,
 void afs_pages_written_back(struct afs_vnode *vnode, struct afs_call *call)
 {
 	struct pagevec pv;
+	unsigned long priv;
 	unsigned count, loop;
 	pgoff_t first = call->first, last = call->last;
 
@@ -676,6 +675,9 @@ void afs_pages_written_back(struct afs_vnode *vnode, struct afs_call *call)
 		ASSERTCMP(pv.nr, ==, count);
 
 		for (loop = 0; loop < count; loop++) {
+			priv = page_private(pv.pages[loop]);
+			trace_afs_page_dirty(vnode, tracepoint_string("clear"),
+					     pv.pages[loop]->index, priv);
 			set_page_private(pv.pages[loop], 0);
 			end_page_writeback(pv.pages[loop]);
 		}
@@ -783,6 +785,8 @@ int afs_page_mkwrite(struct vm_fault *vmf)
 
 	priv = (unsigned long)PAGE_SIZE << AFS_PRIV_SHIFT; /* To */
 	priv |= 0; /* From */
+	trace_afs_page_dirty(vnode, tracepoint_string("mkwrite"),
+			     vmf->page->index, priv);
 	SetPagePrivate(vmf->page);
 	set_page_private(vmf->page, priv);
 
@@ -840,9 +844,13 @@ int afs_launder_page(struct page *page)
 			t = priv >> AFS_PRIV_SHIFT;
 		}
 
+		trace_afs_page_dirty(vnode, tracepoint_string("launder"),
+				     page->index, priv);
 		ret = afs_store_data(mapping, page->index, page->index, t, f);
 	}
 
+	trace_afs_page_dirty(vnode, tracepoint_string("laundered"),
+			     page->index, priv);
 	set_page_private(page, 0);
 	ClearPagePrivate(page);
 
-- 
cgit v1.2.3


From 98bf40cd99fcfed0705812b6cbdbb3b441a42970 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Thu, 2 Nov 2017 15:27:53 +0000
Subject: afs: Protect call->state changes against signals

Protect call->state changes against the call being prematurely terminated
due to a signal.

What can happen is that a signal causes afs_wait_for_call_to_complete() to
abort an afs_call because it's not yet complete whilst afs_deliver_to_call()
is delivering data to that call.

If the data delivery causes the state to change, this may overwrite the state
of the afs_call, making it not-yet-complete again - but no further
notifications will be forthcoming from AF_RXRPC as the rxrpc call has been
aborted and completed, so kAFS will just hang in various places waiting for
that call or on page bits that need clearing by that call.

A tracepoint to monitor call state changes is also provided.

Signed-off-by: David Howells <dhowells@redhat.com>
---
 fs/afs/cmservice.c |  26 +++++++-------
 fs/afs/internal.h  |  63 ++++++++++++++++++++++++++++-----
 fs/afs/rxrpc.c     | 100 +++++++++++++++++++++++++++++------------------------
 3 files changed, 120 insertions(+), 69 deletions(-)

(limited to 'fs/afs')

diff --git a/fs/afs/cmservice.c b/fs/afs/cmservice.c
index 5767f540e0e1..41e277f57b20 100644
--- a/fs/afs/cmservice.c
+++ b/fs/afs/cmservice.c
@@ -188,7 +188,6 @@ static int afs_deliver_cb_callback(struct afs_call *call)
 
 	switch (call->unmarshall) {
 	case 0:
-		rxrpc_kernel_get_peer(call->net->socket, call->rxcall, &srx);
 		call->offset = 0;
 		call->unmarshall++;
 
@@ -281,10 +280,12 @@ static int afs_deliver_cb_callback(struct afs_call *call)
 		break;
 	}
 
-	call->state = AFS_CALL_REPLYING;
+	if (!afs_check_call_state(call, AFS_CALL_SV_REPLYING))
+		return -EIO;
 
 	/* we'll need the file server record as that tells us which set of
 	 * vnodes to operate upon */
+	rxrpc_kernel_get_peer(call->net->socket, call->rxcall, &srx);
 	server = afs_find_server(call->net, &srx);
 	if (!server)
 		return -ENOTCONN;
@@ -325,9 +326,6 @@ static int afs_deliver_cb_init_call_back_state(struct afs_call *call)
 	if (ret < 0)
 		return ret;
 
-	/* no unmarshalling required */
-	call->state = AFS_CALL_REPLYING;
-
 	/* we'll need the file server record as that tells us which set of
 	 * vnodes to operate upon */
 	server = afs_find_server(call->net, &srx);
@@ -352,8 +350,6 @@ static int afs_deliver_cb_init_call_back_state3(struct afs_call *call)
 
 	_enter("");
 
-	rxrpc_kernel_get_peer(call->net->socket, call->rxcall, &srx);
-
 	_enter("{%u}", call->unmarshall);
 
 	switch (call->unmarshall) {
@@ -397,11 +393,12 @@ static int afs_deliver_cb_init_call_back_state3(struct afs_call *call)
 		break;
 	}
 
-	/* no unmarshalling required */
-	call->state = AFS_CALL_REPLYING;
+	if (!afs_check_call_state(call, AFS_CALL_SV_REPLYING))
+		return -EIO;
 
 	/* we'll need the file server record as that tells us which set of
 	 * vnodes to operate upon */
+	rxrpc_kernel_get_peer(call->net->socket, call->rxcall, &srx);
 	server = afs_find_server(call->net, &srx);
 	if (!server)
 		return -ENOTCONN;
@@ -436,8 +433,8 @@ static int afs_deliver_cb_probe(struct afs_call *call)
 	if (ret < 0)
 		return ret;
 
-	/* no unmarshalling required */
-	call->state = AFS_CALL_REPLYING;
+	if (!afs_check_call_state(call, AFS_CALL_SV_REPLYING))
+		return -EIO;
 
 	return afs_queue_call_work(call);
 }
@@ -519,7 +516,8 @@ static int afs_deliver_cb_probe_uuid(struct afs_call *call)
 		break;
 	}
 
-	call->state = AFS_CALL_REPLYING;
+	if (!afs_check_call_state(call, AFS_CALL_SV_REPLYING))
+		return -EIO;
 
 	return afs_queue_call_work(call);
 }
@@ -600,8 +598,8 @@ static int afs_deliver_cb_tell_me_about_yourself(struct afs_call *call)
 	if (ret < 0)
 		return ret;
 
-	/* no unmarshalling required */
-	call->state = AFS_CALL_REPLYING;
+	if (!afs_check_call_state(call, AFS_CALL_SV_REPLYING))
+		return -EIO;
 
 	return afs_queue_call_work(call);
 }
diff --git a/fs/afs/internal.h b/fs/afs/internal.h
index 1de36e6abd5e..bd8dcee7e066 100644
--- a/fs/afs/internal.h
+++ b/fs/afs/internal.h
@@ -51,13 +51,14 @@ struct afs_iget_data {
 };
 
 enum afs_call_state {
-	AFS_CALL_REQUESTING,	/* request is being sent for outgoing call */
-	AFS_CALL_AWAIT_REPLY,	/* awaiting reply to outgoing call */
-	AFS_CALL_AWAIT_OP_ID,	/* awaiting op ID on incoming call */
-	AFS_CALL_AWAIT_REQUEST,	/* awaiting request data on incoming call */
-	AFS_CALL_REPLYING,	/* replying to incoming call */
-	AFS_CALL_AWAIT_ACK,	/* awaiting final ACK of incoming call */
-	AFS_CALL_COMPLETE,	/* Completed or failed */
+	AFS_CALL_CL_REQUESTING,		/* Client: Request is being sent */
+	AFS_CALL_CL_AWAIT_REPLY,	/* Client: Awaiting reply */
+	AFS_CALL_CL_PROC_REPLY,		/* Client: rxrpc call complete; processing reply */
+	AFS_CALL_SV_AWAIT_OP_ID,	/* Server: Awaiting op ID */
+	AFS_CALL_SV_AWAIT_REQUEST,	/* Server: Awaiting request data */
+	AFS_CALL_SV_REPLYING,		/* Server: Replying */
+	AFS_CALL_SV_AWAIT_ACK,		/* Server: Awaiting final ACK */
+	AFS_CALL_COMPLETE,		/* Completed or failed */
 };
 
 /*
@@ -97,6 +98,7 @@ struct afs_call {
 	size_t			offset;		/* offset into received data store */
 	atomic_t		usage;
 	enum afs_call_state	state;
+	spinlock_t		state_lock;
 	int			error;		/* error code */
 	u32			abort_code;	/* Remote abort ID or 0 */
 	unsigned		request_size;	/* size of request data */
@@ -543,6 +545,8 @@ struct afs_fs_cursor {
 #define AFS_FS_CURSOR_NO_VSLEEP	0x0020		/* Set to prevent sleep on VBUSY, VOFFLINE, ... */
 };
 
+#include <trace/events/afs.h>
+
 /*****************************************************************************/
 /*
  * addr_list.c
@@ -788,6 +792,49 @@ static inline int afs_transfer_reply(struct afs_call *call)
 	return afs_extract_data(call, call->buffer, call->reply_max, false);
 }
 
+static inline bool afs_check_call_state(struct afs_call *call,
+					enum afs_call_state state)
+{
+	return READ_ONCE(call->state) == state;
+}
+
+static inline bool afs_set_call_state(struct afs_call *call,
+				      enum afs_call_state from,
+				      enum afs_call_state to)
+{
+	bool ok = false;
+
+	spin_lock_bh(&call->state_lock);
+	if (call->state == from) {
+		call->state = to;
+		trace_afs_call_state(call, from, to, 0, 0);
+		ok = true;
+	}
+	spin_unlock_bh(&call->state_lock);
+	return ok;
+}
+
+static inline void afs_set_call_complete(struct afs_call *call,
+					 int error, u32 remote_abort)
+{
+	enum afs_call_state state;
+	bool ok = false;
+
+	spin_lock_bh(&call->state_lock);
+	state = call->state;
+	if (state != AFS_CALL_COMPLETE) {
+		call->abort_code = remote_abort;
+		call->error = error;
+		call->state = AFS_CALL_COMPLETE;
+		trace_afs_call_state(call, state, AFS_CALL_COMPLETE,
+				     error, remote_abort);
+		ok = true;
+	}
+	spin_unlock_bh(&call->state_lock);
+	if (ok)
+		trace_afs_call_done(call);
+}
+
 /*
  * security.c
  */
@@ -932,8 +979,6 @@ static inline void afs_check_for_remote_deletion(struct afs_fs_cursor *fc,
 /*
  * debug tracing
  */
-#include <trace/events/afs.h>
-
 extern unsigned afs_debug;
 
 #define dbgprintk(FMT,...) \
diff --git a/fs/afs/rxrpc.c b/fs/afs/rxrpc.c
index bd44ae8b63d8..ea1460b9b71a 100644
--- a/fs/afs/rxrpc.c
+++ b/fs/afs/rxrpc.c
@@ -134,6 +134,7 @@ static struct afs_call *afs_alloc_call(struct afs_net *net,
 	atomic_set(&call->usage, 1);
 	INIT_WORK(&call->async_work, afs_process_async_call);
 	init_waitqueue_head(&call->waitq);
+	spin_lock_init(&call->state_lock);
 
 	o = atomic_inc_return(&net->nr_outstanding_calls);
 	trace_afs_call(call, afs_call_trace_alloc, 1, o,
@@ -288,8 +289,7 @@ static void afs_notify_end_request_tx(struct sock *sock,
 {
 	struct afs_call *call = (struct afs_call *)call_user_ID;
 
-	if (call->state == AFS_CALL_REQUESTING)
-		call->state = AFS_CALL_AWAIT_REPLY;
+	afs_set_call_state(call, AFS_CALL_CL_REQUESTING, AFS_CALL_CL_AWAIT_REPLY);
 }
 
 /*
@@ -444,82 +444,87 @@ error_kill_call:
  */
 static void afs_deliver_to_call(struct afs_call *call)
 {
-	u32 abort_code;
+	enum afs_call_state state;
+	u32 abort_code, remote_abort = 0;
 	int ret;
 
 	_enter("%s", call->type->name);
 
-	while (call->state == AFS_CALL_AWAIT_REPLY ||
-	       call->state == AFS_CALL_AWAIT_OP_ID ||
-	       call->state == AFS_CALL_AWAIT_REQUEST ||
-	       call->state == AFS_CALL_AWAIT_ACK
+	while (state = READ_ONCE(call->state),
+	       state == AFS_CALL_CL_AWAIT_REPLY ||
+	       state == AFS_CALL_SV_AWAIT_OP_ID ||
+	       state == AFS_CALL_SV_AWAIT_REQUEST ||
+	       state == AFS_CALL_SV_AWAIT_ACK
 	       ) {
-		if (call->state == AFS_CALL_AWAIT_ACK) {
+		if (state == AFS_CALL_SV_AWAIT_ACK) {
 			size_t offset = 0;
 			ret = rxrpc_kernel_recv_data(call->net->socket,
 						     call->rxcall,
 						     NULL, 0, &offset, false,
-						     &call->abort_code,
+						     &remote_abort,
 						     &call->service_id);
 			trace_afs_recv_data(call, 0, offset, false, ret);
 
 			if (ret == -EINPROGRESS || ret == -EAGAIN)
 				return;
-			if (ret < 0)
-				call->error = ret;
-			if (ret < 0 || ret == 1)
+			if (ret < 0 || ret == 1) {
+				if (ret == 1)
+					ret = 0;
 				goto call_complete;
+			}
 			return;
 		}
 
 		ret = call->type->deliver(call);
+		state = READ_ONCE(call->state);
 		switch (ret) {
 		case 0:
-			if (call->state == AFS_CALL_AWAIT_REPLY)
+			if (state == AFS_CALL_CL_PROC_REPLY)
 				goto call_complete;
+			ASSERTCMP(state, >, AFS_CALL_CL_PROC_REPLY);
 			goto done;
 		case -EINPROGRESS:
 		case -EAGAIN:
 			goto out;
+		case -EIO:
 		case -ECONNABORTED:
-			goto save_error;
+			ASSERTCMP(state, ==, AFS_CALL_COMPLETE);
+			goto done;
 		case -ENOTCONN:
 			abort_code = RX_CALL_DEAD;
 			rxrpc_kernel_abort_call(call->net->socket, call->rxcall,
 						abort_code, ret, "KNC");
-			goto save_error;
+			goto local_abort;
 		case -ENOTSUPP:
 			abort_code = RXGEN_OPCODE;
 			rxrpc_kernel_abort_call(call->net->socket, call->rxcall,
 						abort_code, ret, "KIV");
-			goto save_error;
+			goto local_abort;
 		case -ENODATA:
 		case -EBADMSG:
 		case -EMSGSIZE:
 		default:
 			abort_code = RXGEN_CC_UNMARSHAL;
-			if (call->state != AFS_CALL_AWAIT_REPLY)
+			if (state != AFS_CALL_CL_AWAIT_REPLY)
 				abort_code = RXGEN_SS_UNMARSHAL;
 			rxrpc_kernel_abort_call(call->net->socket, call->rxcall,
 						abort_code, -EBADMSG, "KUM");
-			goto save_error;
+			goto local_abort;
 		}
 	}
 
 done:
-	if (call->state == AFS_CALL_COMPLETE && call->incoming)
+	if (state == AFS_CALL_COMPLETE && call->incoming)
 		afs_put_call(call);
 out:
 	_leave("");
 	return;
 
-save_error:
-	call->error = ret;
+local_abort:
+	abort_code = 0;
 call_complete:
-	if (call->state != AFS_CALL_COMPLETE) {
-		call->state = AFS_CALL_COMPLETE;
-		trace_afs_call_done(call);
-	}
+	afs_set_call_complete(call, ret, remote_abort);
+	state = AFS_CALL_COMPLETE;
 	goto done;
 }
 
@@ -551,14 +556,15 @@ static long afs_wait_for_call_to_complete(struct afs_call *call,
 		set_current_state(TASK_UNINTERRUPTIBLE);
 
 		/* deliver any messages that are in the queue */
-		if (call->state < AFS_CALL_COMPLETE && call->need_attention) {
+		if (!afs_check_call_state(call, AFS_CALL_COMPLETE) &&
+		    call->need_attention) {
 			call->need_attention = false;
 			__set_current_state(TASK_RUNNING);
 			afs_deliver_to_call(call);
 			continue;
 		}
 
-		if (call->state == AFS_CALL_COMPLETE)
+		if (afs_check_call_state(call, AFS_CALL_COMPLETE))
 			break;
 
 		life = rxrpc_kernel_check_life(call->net->socket, call->rxcall);
@@ -578,17 +584,17 @@ static long afs_wait_for_call_to_complete(struct afs_call *call,
 	__set_current_state(TASK_RUNNING);
 
 	/* Kill off the call if it's still live. */
-	if (call->state < AFS_CALL_COMPLETE) {
+	if (!afs_check_call_state(call, AFS_CALL_COMPLETE)) {
 		_debug("call interrupted");
 		if (rxrpc_kernel_abort_call(call->net->socket, call->rxcall,
-					    RX_USER_ABORT, -EINTR, "KWI")) {
-			call->error = -ERESTARTSYS;
-			trace_afs_call_done(call);
-		}
+					    RX_USER_ABORT, -EINTR, "KWI"))
+			afs_set_call_complete(call, -EINTR, 0);
 	}
 
+	spin_lock_bh(&call->state_lock);
 	ac->abort_code = call->abort_code;
 	ac->error = call->error;
+	spin_unlock_bh(&call->state_lock);
 
 	ret = ac->error;
 	switch (ret) {
@@ -713,7 +719,7 @@ void afs_charge_preallocation(struct work_struct *work)
 				break;
 
 			call->async = true;
-			call->state = AFS_CALL_AWAIT_OP_ID;
+			call->state = AFS_CALL_SV_AWAIT_OP_ID;
 			init_waitqueue_head(&call->waitq);
 		}
 
@@ -769,7 +775,7 @@ static int afs_deliver_cm_op_id(struct afs_call *call)
 		return ret;
 
 	call->operation_ID = ntohl(call->tmp);
-	call->state = AFS_CALL_AWAIT_REQUEST;
+	afs_set_call_state(call, AFS_CALL_SV_AWAIT_OP_ID, AFS_CALL_SV_AWAIT_REQUEST);
 	call->offset = 0;
 
 	/* ask the cache manager to route the call (it'll change the call type
@@ -794,8 +800,7 @@ static void afs_notify_end_reply_tx(struct sock *sock,
 {
 	struct afs_call *call = (struct afs_call *)call_user_ID;
 
-	if (call->state == AFS_CALL_REPLYING)
-		call->state = AFS_CALL_AWAIT_ACK;
+	afs_set_call_state(call, AFS_CALL_SV_REPLYING, AFS_CALL_SV_AWAIT_ACK);
 }
 
 /*
@@ -879,6 +884,8 @@ int afs_extract_data(struct afs_call *call, void *buf, size_t count,
 		     bool want_more)
 {
 	struct afs_net *net = call->net;
+	enum afs_call_state state;
+	u32 remote_abort;
 	int ret;
 
 	_enter("{%s,%zu},,%zu,%d",
@@ -888,29 +895,30 @@ int afs_extract_data(struct afs_call *call, void *buf, size_t count,
 
 	ret = rxrpc_kernel_recv_data(net->socket, call->rxcall,
 				     buf, count, &call->offset,
-				     want_more, &call->abort_code,
+				     want_more, &remote_abort,
 				     &call->service_id);
 	trace_afs_recv_data(call, count, call->offset, want_more, ret);
 	if (ret == 0 || ret == -EAGAIN)
 		return ret;
 
+	state = READ_ONCE(call->state);
 	if (ret == 1) {
-		switch (call->state) {
-		case AFS_CALL_AWAIT_REPLY:
-			call->state = AFS_CALL_COMPLETE;
-			trace_afs_call_done(call);
+		switch (state) {
+		case AFS_CALL_CL_AWAIT_REPLY:
+			afs_set_call_state(call, state, AFS_CALL_CL_PROC_REPLY);
 			break;
-		case AFS_CALL_AWAIT_REQUEST:
-			call->state = AFS_CALL_REPLYING;
+		case AFS_CALL_SV_AWAIT_REQUEST:
+			afs_set_call_state(call, state, AFS_CALL_SV_REPLYING);
 			break;
+		case AFS_CALL_COMPLETE:
+			kdebug("prem complete %d", call->error);
+			return -EIO;
 		default:
 			break;
 		}
 		return 0;
 	}
 
-	call->error = ret;
-	call->state = AFS_CALL_COMPLETE;
-	trace_afs_call_done(call);
+	afs_set_call_complete(call, ret, remote_abort);
 	return ret;
 }
-- 
cgit v1.2.3