summaryrefslogtreecommitdiff
path: root/fs
diff options
context:
space:
mode:
Diffstat (limited to 'fs')
-rw-r--r--fs/btrfs/ctree.c8
-rw-r--r--fs/btrfs/extent-tree.c16
-rw-r--r--fs/btrfs/file.c87
-rw-r--r--fs/btrfs/inode.c1
-rw-r--r--fs/btrfs/ordered-data.c7
-rw-r--r--fs/btrfs/send.c171
-rw-r--r--fs/btrfs/transaction.c3
-rw-r--r--fs/btrfs/tree-log.c2
-rw-r--r--fs/btrfs/volumes.c9
-rw-r--r--fs/btrfs/xattr.c8
-rw-r--r--fs/ecryptfs/ecryptfs_kernel.h4
-rw-r--r--fs/ecryptfs/file.c34
-rw-r--r--fs/ecryptfs/keystore.c2
-rw-r--r--fs/ecryptfs/main.c2
-rw-r--r--fs/locks.c5
-rw-r--r--fs/nfs/client.c2
-rw-r--r--fs/nfs/delegation.c45
-rw-r--r--fs/nfs/dir.c22
-rw-r--r--fs/nfs/file.c11
-rw-r--r--fs/nfs/inode.c111
-rw-r--r--fs/nfs/internal.h1
-rw-r--r--fs/nfs/nfs3proc.c4
-rw-r--r--fs/nfs/nfs3xdr.c5
-rw-r--r--fs/nfs/nfs4client.c9
-rw-r--r--fs/nfs/nfs4proc.c31
-rw-r--r--fs/nfs/nfs4session.h1
-rw-r--r--fs/nfs/nfs4state.c18
-rw-r--r--fs/nfs/proc.c6
-rw-r--r--fs/nfs/write.c30
-rw-r--r--fs/nfsd/nfs4state.c2
-rw-r--r--fs/nilfs2/btree.c47
-rw-r--r--fs/nilfs2/segment.c7
-rw-r--r--fs/notify/fanotify/fanotify.c3
-rw-r--r--fs/ocfs2/ocfs2.h2
-rw-r--r--fs/ocfs2/ocfs2_fs.h15
-rw-r--r--fs/proc/task_mmu.c3
-rw-r--r--fs/xfs/xfs_file.c14
-rw-r--r--fs/xfs/xfs_inode.c4
-rw-r--r--fs/xfs/xfs_inode.h9
-rw-r--r--fs/xfs/xfs_iops.c36
-rw-r--r--fs/xfs/xfs_pnfs.c4
-rw-r--r--fs/xfs/xfs_qm.c5
42 files changed, 627 insertions, 179 deletions
diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c
index 993642199326..6d67f32e648d 100644
--- a/fs/btrfs/ctree.c
+++ b/fs/btrfs/ctree.c
@@ -1645,14 +1645,14 @@ int btrfs_realloc_node(struct btrfs_trans_handle *trans,
parent_nritems = btrfs_header_nritems(parent);
blocksize = root->nodesize;
- end_slot = parent_nritems;
+ end_slot = parent_nritems - 1;
- if (parent_nritems == 1)
+ if (parent_nritems <= 1)
return 0;
btrfs_set_lock_blocking(parent);
- for (i = start_slot; i < end_slot; i++) {
+ for (i = start_slot; i <= end_slot; i++) {
int close = 1;
btrfs_node_key(parent, &disk_key, i);
@@ -1669,7 +1669,7 @@ int btrfs_realloc_node(struct btrfs_trans_handle *trans,
other = btrfs_node_blockptr(parent, i - 1);
close = close_blocks(blocknr, other, blocksize);
}
- if (!close && i < end_slot - 2) {
+ if (!close && i < end_slot) {
other = btrfs_node_blockptr(parent, i + 1);
close = close_blocks(blocknr, other, blocksize);
}
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 571f402d3fc4..6f080451fcb1 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -3208,6 +3208,8 @@ static int cache_save_setup(struct btrfs_block_group_cache *block_group,
return 0;
}
+ if (trans->aborted)
+ return 0;
again:
inode = lookup_free_space_inode(root, block_group, path);
if (IS_ERR(inode) && PTR_ERR(inode) != -ENOENT) {
@@ -3243,6 +3245,20 @@ again:
*/
BTRFS_I(inode)->generation = 0;
ret = btrfs_update_inode(trans, root, inode);
+ if (ret) {
+ /*
+ * So theoretically we could recover from this, simply set the
+ * super cache generation to 0 so we know to invalidate the
+ * cache, but then we'd have to keep track of the block groups
+ * that fail this way so we know we _have_ to reset this cache
+ * before the next commit or risk reading stale cache. So to
+ * limit our exposure to horrible edge cases lets just abort the
+ * transaction, this only happens in really bad situations
+ * anyway.
+ */
+ btrfs_abort_transaction(trans, root, ret);
+ goto out_put;
+ }
WARN_ON(ret);
if (i_size_read(inode) > 0) {
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index b78bbbac900d..30982bbd31c3 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -1811,22 +1811,10 @@ static ssize_t btrfs_file_write_iter(struct kiocb *iocb,
mutex_unlock(&inode->i_mutex);
/*
- * we want to make sure fsync finds this change
- * but we haven't joined a transaction running right now.
- *
- * Later on, someone is sure to update the inode and get the
- * real transid recorded.
- *
- * We set last_trans now to the fs_info generation + 1,
- * this will either be one more than the running transaction
- * or the generation used for the next transaction if there isn't
- * one running right now.
- *
* We also have to set last_sub_trans to the current log transid,
* otherwise subsequent syncs to a file that's been synced in this
* transaction will appear to have already occured.
*/
- BTRFS_I(inode)->last_trans = root->fs_info->generation + 1;
BTRFS_I(inode)->last_sub_trans = root->log_transid;
if (num_written > 0) {
err = generic_write_sync(file, pos, num_written);
@@ -1959,25 +1947,37 @@ int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
atomic_inc(&root->log_batch);
/*
- * check the transaction that last modified this inode
- * and see if its already been committed
- */
- if (!BTRFS_I(inode)->last_trans) {
- mutex_unlock(&inode->i_mutex);
- goto out;
- }
-
- /*
- * if the last transaction that changed this file was before
- * the current transaction, we can bail out now without any
- * syncing
+ * If the last transaction that changed this file was before the current
+ * transaction and we have the full sync flag set in our inode, we can
+ * bail out now without any syncing.
+ *
+ * Note that we can't bail out if the full sync flag isn't set. This is
+ * because when the full sync flag is set we start all ordered extents
+ * and wait for them to fully complete - when they complete they update
+ * the inode's last_trans field through:
+ *
+ * btrfs_finish_ordered_io() ->
+ * btrfs_update_inode_fallback() ->
+ * btrfs_update_inode() ->
+ * btrfs_set_inode_last_trans()
+ *
+ * So we are sure that last_trans is up to date and can do this check to
+ * bail out safely. For the fast path, when the full sync flag is not
+ * set in our inode, we can not do it because we start only our ordered
+ * extents and don't wait for them to complete (that is when
+ * btrfs_finish_ordered_io runs), so here at this point their last_trans
+ * value might be less than or equals to fs_info->last_trans_committed,
+ * and setting a speculative last_trans for an inode when a buffered
+ * write is made (such as fs_info->generation + 1 for example) would not
+ * be reliable since after setting the value and before fsync is called
+ * any number of transactions can start and commit (transaction kthread
+ * commits the current transaction periodically), and a transaction
+ * commit does not start nor waits for ordered extents to complete.
*/
smp_mb();
if (btrfs_inode_in_log(inode, root->fs_info->generation) ||
- BTRFS_I(inode)->last_trans <=
- root->fs_info->last_trans_committed) {
- BTRFS_I(inode)->last_trans = 0;
-
+ (full_sync && BTRFS_I(inode)->last_trans <=
+ root->fs_info->last_trans_committed)) {
/*
* We'v had everything committed since the last time we were
* modified so clear this flag in case it was set for whatever
@@ -2275,6 +2275,8 @@ static int btrfs_punch_hole(struct inode *inode, loff_t offset, loff_t len)
bool same_page;
bool no_holes = btrfs_fs_incompat(root->fs_info, NO_HOLES);
u64 ino_size;
+ bool truncated_page = false;
+ bool updated_inode = false;
ret = btrfs_wait_ordered_range(inode, offset, len);
if (ret)
@@ -2306,13 +2308,18 @@ static int btrfs_punch_hole(struct inode *inode, loff_t offset, loff_t len)
* entire page.
*/
if (same_page && len < PAGE_CACHE_SIZE) {
- if (offset < ino_size)
+ if (offset < ino_size) {
+ truncated_page = true;
ret = btrfs_truncate_page(inode, offset, len, 0);
+ } else {
+ ret = 0;
+ }
goto out_only_mutex;
}
/* zero back part of the first page */
if (offset < ino_size) {
+ truncated_page = true;
ret = btrfs_truncate_page(inode, offset, 0, 0);
if (ret) {
mutex_unlock(&inode->i_mutex);
@@ -2348,6 +2355,7 @@ static int btrfs_punch_hole(struct inode *inode, loff_t offset, loff_t len)
if (!ret) {
/* zero the front end of the last page */
if (tail_start + tail_len < ino_size) {
+ truncated_page = true;
ret = btrfs_truncate_page(inode,
tail_start + tail_len, 0, 1);
if (ret)
@@ -2357,8 +2365,8 @@ static int btrfs_punch_hole(struct inode *inode, loff_t offset, loff_t len)
}
if (lockend < lockstart) {
- mutex_unlock(&inode->i_mutex);
- return 0;
+ ret = 0;
+ goto out_only_mutex;
}
while (1) {
@@ -2506,6 +2514,7 @@ out_trans:
trans->block_rsv = &root->fs_info->trans_block_rsv;
ret = btrfs_update_inode(trans, root, inode);
+ updated_inode = true;
btrfs_end_transaction(trans, root);
btrfs_btree_balance_dirty(root);
out_free:
@@ -2515,6 +2524,22 @@ out:
unlock_extent_cached(&BTRFS_I(inode)->io_tree, lockstart, lockend,
&cached_state, GFP_NOFS);
out_only_mutex:
+ if (!updated_inode && truncated_page && !ret && !err) {
+ /*
+ * If we only end up zeroing part of a page, we still need to
+ * update the inode item, so that all the time fields are
+ * updated as well as the necessary btrfs inode in memory fields
+ * for detecting, at fsync time, if the inode isn't yet in the
+ * log tree or it's there but not up to date.
+ */
+ trans = btrfs_start_transaction(root, 1);
+ if (IS_ERR(trans)) {
+ err = PTR_ERR(trans);
+ } else {
+ err = btrfs_update_inode(trans, root, inode);
+ ret = btrfs_end_transaction(trans, root);
+ }
+ }
mutex_unlock(&inode->i_mutex);
if (ret && !err)
err = ret;
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index a85c23dfcddb..da828cf5e8f8 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -7285,7 +7285,6 @@ static int btrfs_get_blocks_direct(struct inode *inode, sector_t iblock,
((BTRFS_I(inode)->flags & BTRFS_INODE_NODATACOW) &&
em->block_start != EXTENT_MAP_HOLE)) {
int type;
- int ret;
u64 block_start, orig_start, orig_block_len, ram_bytes;
if (test_bit(EXTENT_FLAG_PREALLOC, &em->flags))
diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c
index 534544e08f76..157cc54fc634 100644
--- a/fs/btrfs/ordered-data.c
+++ b/fs/btrfs/ordered-data.c
@@ -452,9 +452,7 @@ void btrfs_get_logged_extents(struct inode *inode,
continue;
if (entry_end(ordered) <= start)
break;
- if (!list_empty(&ordered->log_list))
- continue;
- if (test_bit(BTRFS_ORDERED_LOGGED, &ordered->flags))
+ if (test_and_set_bit(BTRFS_ORDERED_LOGGED, &ordered->flags))
continue;
list_add(&ordered->log_list, logged_list);
atomic_inc(&ordered->refs);
@@ -511,8 +509,7 @@ void btrfs_wait_logged_extents(struct btrfs_trans_handle *trans,
wait_event(ordered->wait, test_bit(BTRFS_ORDERED_IO_DONE,
&ordered->flags));
- if (!test_and_set_bit(BTRFS_ORDERED_LOGGED, &ordered->flags))
- list_add_tail(&ordered->trans_list, &trans->ordered);
+ list_add_tail(&ordered->trans_list, &trans->ordered);
spin_lock_irq(&log->log_extents_lock[index]);
}
spin_unlock_irq(&log->log_extents_lock[index]);
diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c
index fe5857223515..d6033f540cc7 100644
--- a/fs/btrfs/send.c
+++ b/fs/btrfs/send.c
@@ -230,6 +230,7 @@ struct pending_dir_move {
u64 parent_ino;
u64 ino;
u64 gen;
+ bool is_orphan;
struct list_head update_refs;
};
@@ -2984,7 +2985,8 @@ static int add_pending_dir_move(struct send_ctx *sctx,
u64 ino_gen,
u64 parent_ino,
struct list_head *new_refs,
- struct list_head *deleted_refs)
+ struct list_head *deleted_refs,
+ const bool is_orphan)
{
struct rb_node **p = &sctx->pending_dir_moves.rb_node;
struct rb_node *parent = NULL;
@@ -2999,6 +3001,7 @@ static int add_pending_dir_move(struct send_ctx *sctx,
pm->parent_ino = parent_ino;
pm->ino = ino;
pm->gen = ino_gen;
+ pm->is_orphan = is_orphan;
INIT_LIST_HEAD(&pm->list);
INIT_LIST_HEAD(&pm->update_refs);
RB_CLEAR_NODE(&pm->node);
@@ -3131,16 +3134,20 @@ static int apply_dir_move(struct send_ctx *sctx, struct pending_dir_move *pm)
rmdir_ino = dm->rmdir_ino;
free_waiting_dir_move(sctx, dm);
- ret = get_first_ref(sctx->parent_root, pm->ino,
- &parent_ino, &parent_gen, name);
- if (ret < 0)
- goto out;
-
- ret = get_cur_path(sctx, parent_ino, parent_gen,
- from_path);
- if (ret < 0)
- goto out;
- ret = fs_path_add_path(from_path, name);
+ if (pm->is_orphan) {
+ ret = gen_unique_name(sctx, pm->ino,
+ pm->gen, from_path);
+ } else {
+ ret = get_first_ref(sctx->parent_root, pm->ino,
+ &parent_ino, &parent_gen, name);
+ if (ret < 0)
+ goto out;
+ ret = get_cur_path(sctx, parent_ino, parent_gen,
+ from_path);
+ if (ret < 0)
+ goto out;
+ ret = fs_path_add_path(from_path, name);
+ }
if (ret < 0)
goto out;
@@ -3150,7 +3157,8 @@ static int apply_dir_move(struct send_ctx *sctx, struct pending_dir_move *pm)
LIST_HEAD(deleted_refs);
ASSERT(ancestor > BTRFS_FIRST_FREE_OBJECTID);
ret = add_pending_dir_move(sctx, pm->ino, pm->gen, ancestor,
- &pm->update_refs, &deleted_refs);
+ &pm->update_refs, &deleted_refs,
+ pm->is_orphan);
if (ret < 0)
goto out;
if (rmdir_ino) {
@@ -3283,6 +3291,127 @@ out:
return ret;
}
+/*
+ * We might need to delay a directory rename even when no ancestor directory
+ * (in the send root) with a higher inode number than ours (sctx->cur_ino) was
+ * renamed. This happens when we rename a directory to the old name (the name
+ * in the parent root) of some other unrelated directory that got its rename
+ * delayed due to some ancestor with higher number that got renamed.
+ *
+ * Example:
+ *
+ * Parent snapshot:
+ * . (ino 256)
+ * |---- a/ (ino 257)
+ * | |---- file (ino 260)
+ * |
+ * |---- b/ (ino 258)
+ * |---- c/ (ino 259)
+ *
+ * Send snapshot:
+ * . (ino 256)
+ * |---- a/ (ino 258)
+ * |---- x/ (ino 259)
+ * |---- y/ (ino 257)
+ * |----- file (ino 260)
+ *
+ * Here we can not rename 258 from 'b' to 'a' without the rename of inode 257
+ * from 'a' to 'x/y' happening first, which in turn depends on the rename of
+ * inode 259 from 'c' to 'x'. So the order of rename commands the send stream
+ * must issue is:
+ *
+ * 1 - rename 259 from 'c' to 'x'
+ * 2 - rename 257 from 'a' to 'x/y'
+ * 3 - rename 258 from 'b' to 'a'
+ *
+ * Returns 1 if the rename of sctx->cur_ino needs to be delayed, 0 if it can
+ * be done right away and < 0 on error.
+ */
+static int wait_for_dest_dir_move(struct send_ctx *sctx,
+ struct recorded_ref *parent_ref,
+ const bool is_orphan)
+{
+ struct btrfs_path *path;
+ struct btrfs_key key;
+ struct btrfs_key di_key;
+ struct btrfs_dir_item *di;
+ u64 left_gen;
+ u64 right_gen;
+ int ret = 0;
+
+ if (RB_EMPTY_ROOT(&sctx->waiting_dir_moves))
+ return 0;
+
+ path = alloc_path_for_send();
+ if (!path)
+ return -ENOMEM;
+
+ key.objectid = parent_ref->dir;
+ key.type = BTRFS_DIR_ITEM_KEY;
+ key.offset = btrfs_name_hash(parent_ref->name, parent_ref->name_len);
+
+ ret = btrfs_search_slot(NULL, sctx->parent_root, &key, path, 0, 0);
+ if (ret < 0) {
+ goto out;
+ } else if (ret > 0) {
+ ret = 0;
+ goto out;
+ }
+
+ di = btrfs_match_dir_item_name(sctx->parent_root, path,
+ parent_ref->name, parent_ref->name_len);
+ if (!di) {
+ ret = 0;
+ goto out;
+ }
+ /*
+ * di_key.objectid has the number of the inode that has a dentry in the
+ * parent directory with the same name that sctx->cur_ino is being
+ * renamed to. We need to check if that inode is in the send root as
+ * well and if it is currently marked as an inode with a pending rename,
+ * if it is, we need to delay the rename of sctx->cur_ino as well, so
+ * that it happens after that other inode is renamed.
+ */
+ btrfs_dir_item_key_to_cpu(path->nodes[0], di, &di_key);
+ if (di_key.type != BTRFS_INODE_ITEM_KEY) {
+ ret = 0;
+ goto out;
+ }
+
+ ret = get_inode_info(sctx->parent_root, di_key.objectid, NULL,
+ &left_gen, NULL, NULL, NULL, NULL);
+ if (ret < 0)
+ goto out;
+ ret = get_inode_info(sctx->send_root, di_key.objectid, NULL,
+ &right_gen, NULL, NULL, NULL, NULL);
+ if (ret < 0) {
+ if (ret == -ENOENT)
+ ret = 0;
+ goto out;
+ }
+
+ /* Different inode, no need to delay the rename of sctx->cur_ino */
+ if (right_gen != left_gen) {
+ ret = 0;
+ goto out;
+ }
+
+ if (is_waiting_for_move(sctx, di_key.objectid)) {
+ ret = add_pending_dir_move(sctx,
+ sctx->cur_ino,
+ sctx->cur_inode_gen,
+ di_key.objectid,
+ &sctx->new_refs,
+ &sctx->deleted_refs,
+ is_orphan);
+ if (!ret)
+ ret = 1;
+ }
+out:
+ btrfs_free_path(path);
+ return ret;
+}
+
static int wait_for_parent_move(struct send_ctx *sctx,
struct recorded_ref *parent_ref)
{
@@ -3349,7 +3478,8 @@ out:
sctx->cur_inode_gen,
ino,
&sctx->new_refs,
- &sctx->deleted_refs);
+ &sctx->deleted_refs,
+ false);
if (!ret)
ret = 1;
}
@@ -3372,6 +3502,7 @@ static int process_recorded_refs(struct send_ctx *sctx, int *pending_move)
int did_overwrite = 0;
int is_orphan = 0;
u64 last_dir_ino_rm = 0;
+ bool can_rename = true;
verbose_printk("btrfs: process_recorded_refs %llu\n", sctx->cur_ino);
@@ -3490,12 +3621,22 @@ verbose_printk("btrfs: process_recorded_refs %llu\n", sctx->cur_ino);
}
}
+ if (S_ISDIR(sctx->cur_inode_mode) && sctx->parent_root) {
+ ret = wait_for_dest_dir_move(sctx, cur, is_orphan);
+ if (ret < 0)
+ goto out;
+ if (ret == 1) {
+ can_rename = false;
+ *pending_move = 1;
+ }
+ }
+
/*
* link/move the ref to the new place. If we have an orphan
* inode, move it and update valid_path. If not, link or move
* it depending on the inode mode.
*/
- if (is_orphan) {
+ if (is_orphan && can_rename) {
ret = send_rename(sctx, valid_path, cur->full_path);
if (ret < 0)
goto out;
@@ -3503,7 +3644,7 @@ verbose_printk("btrfs: process_recorded_refs %llu\n", sctx->cur_ino);
ret = fs_path_copy(valid_path, cur->full_path);
if (ret < 0)
goto out;
- } else {
+ } else if (can_rename) {
if (S_ISDIR(sctx->cur_inode_mode)) {
/*
* Dirs can't be linked, so move it. For moved
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c
index 7e80f32550a6..88e51aded6bd 100644
--- a/fs/btrfs/transaction.c
+++ b/fs/btrfs/transaction.c
@@ -1052,9 +1052,6 @@ static int update_cowonly_root(struct btrfs_trans_handle *trans,
ret = btrfs_run_delayed_refs(trans, root, (unsigned long)-1);
if (ret)
return ret;
- ret = btrfs_run_delayed_refs(trans, root, (unsigned long)-1);
- if (ret)
- return ret;
}
return 0;
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
index 9a37f8b39bae..c5b8ba37f88e 100644
--- a/fs/btrfs/tree-log.c
+++ b/fs/btrfs/tree-log.c
@@ -1012,7 +1012,7 @@ again:
base = btrfs_item_ptr_offset(leaf, path->slots[0]);
while (cur_offset < item_size) {
- extref = (struct btrfs_inode_extref *)base + cur_offset;
+ extref = (struct btrfs_inode_extref *)(base + cur_offset);
victim_name_len = btrfs_inode_extref_name_len(leaf, extref);
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index cd4d1315aaa9..8222f6f74147 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -4903,10 +4903,17 @@ static void sort_parity_stripes(struct btrfs_bio *bbio, int num_stripes)
static struct btrfs_bio *alloc_btrfs_bio(int total_stripes, int real_stripes)
{
struct btrfs_bio *bbio = kzalloc(
+ /* the size of the btrfs_bio */
sizeof(struct btrfs_bio) +
+ /* plus the variable array for the stripes */
sizeof(struct btrfs_bio_stripe) * (total_stripes) +
+ /* plus the variable array for the tgt dev */
sizeof(int) * (real_stripes) +
- sizeof(u64) * (real_stripes),
+ /*
+ * plus the raid_map, which includes both the tgt dev
+ * and the stripes
+ */
+ sizeof(u64) * (total_stripes),
GFP_NOFS);
if (!bbio)
return NULL;
diff --git a/fs/btrfs/xattr.c b/fs/btrfs/xattr.c
index 47b19465f0dc..883b93623bc5 100644
--- a/fs/btrfs/xattr.c
+++ b/fs/btrfs/xattr.c
@@ -111,6 +111,8 @@ static int do_setxattr(struct btrfs_trans_handle *trans,
name, name_len, -1);
if (!di && (flags & XATTR_REPLACE))
ret = -ENODATA;
+ else if (IS_ERR(di))
+ ret = PTR_ERR(di);
else if (di)
ret = btrfs_delete_one_dir_name(trans, root, path, di);
goto out;
@@ -127,10 +129,12 @@ static int do_setxattr(struct btrfs_trans_handle *trans,
ASSERT(mutex_is_locked(&inode->i_mutex));
di = btrfs_lookup_xattr(NULL, root, path, btrfs_ino(inode),
name, name_len, 0);
- if (!di) {
+ if (!di)
ret = -ENODATA;
+ else if (IS_ERR(di))
+ ret = PTR_ERR(di);
+ if (ret)
goto out;
- }
btrfs_release_path(path);
di = NULL;
}
diff --git a/fs/ecryptfs/ecryptfs_kernel.h b/fs/ecryptfs/ecryptfs_kernel.h
index 90d1882b306f..5ba029e627cc 100644
--- a/fs/ecryptfs/ecryptfs_kernel.h
+++ b/fs/ecryptfs/ecryptfs_kernel.h
@@ -124,7 +124,7 @@ ecryptfs_get_key_payload_data(struct key *key)
}
#define ECRYPTFS_MAX_KEYSET_SIZE 1024
-#define ECRYPTFS_MAX_CIPHER_NAME_SIZE 32
+#define ECRYPTFS_MAX_CIPHER_NAME_SIZE 31
#define ECRYPTFS_MAX_NUM_ENC_KEYS 64
#define ECRYPTFS_MAX_IV_BYTES 16 /* 128 bits */
#define ECRYPTFS_SALT_BYTES 2
@@ -237,7 +237,7 @@ struct ecryptfs_crypt_stat {
struct crypto_ablkcipher *tfm;
struct crypto_hash *hash_tfm; /* Crypto context for generating
* the initialization vectors */
- unsigned char cipher[ECRYPTFS_MAX_CIPHER_NAME_SIZE];
+ unsigned char cipher[ECRYPTFS_MAX_CIPHER_NAME_SIZE + 1];
unsigned char key[ECRYPTFS_MAX_KEY_BYTES];
unsigned char root_iv[ECRYPTFS_MAX_IV_BYTES];
struct list_head keysig_list;
diff --git a/fs/ecryptfs/file.c b/fs/ecryptfs/file.c
index b07731e68c0b..fd39bad6f1bd 100644
--- a/fs/ecryptfs/file.c
+++ b/fs/ecryptfs/file.c
@@ -303,9 +303,22 @@ ecryptfs_unlocked_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
struct file *lower_file = ecryptfs_file_to_lower(file);
long rc = -ENOTTY;
- if (lower_file->f_op->unlocked_ioctl)
+ if (!lower_file->f_op->unlocked_ioctl)
+ return rc;
+
+ switch (cmd) {
+ case FITRIM:
+ case FS_IOC_GETFLAGS:
+ case FS_IOC_SETFLAGS:
+ case FS_IOC_GETVERSION:
+ case FS_IOC_SETVERSION:
rc = lower_file->f_op->unlocked_ioctl(lower_file, cmd, arg);
- return rc;
+ fsstack_copy_attr_all(file_inode(file), file_inode(lower_file));
+
+ return rc;
+ default:
+ return rc;
+ }
}
#ifdef CONFIG_COMPAT
@@ -315,9 +328,22 @@ ecryptfs_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
struct file *lower_file = ecryptfs_file_to_lower(file);
long rc = -ENOIOCTLCMD;
- if (lower_file->f_op->compat_ioctl)
+ if (!lower_file->f_op->compat_ioctl)
+ return rc;
+
+ switch (cmd) {
+ case FITRIM:
+ case FS_IOC32_GETFLAGS:
+ case FS_IOC32_SETFLAGS:
+ case FS_IOC32_GETVERSION:
+ case FS_IOC32_SETVERSION:
rc = lower_file->f_op->compat_ioctl(lower_file, cmd, arg);
- return rc;
+ fsstack_copy_attr_all(file_inode(file), file_inode(lower_file));
+
+ return rc;
+ default:
+ return rc;
+ }
}
#endif
diff --git a/fs/ecryptfs/keystore.c b/fs/ecryptfs/keystore.c
index 917bd5c9776a..6bd67e2011f0 100644
--- a/fs/ecryptfs/keystore.c
+++ b/fs/ecryptfs/keystore.c
@@ -891,7 +891,7 @@ struct ecryptfs_parse_tag_70_packet_silly_stack {
struct blkcipher_desc desc;
char fnek_sig_hex[ECRYPTFS_SIG_SIZE_HEX + 1];
char iv[ECRYPTFS_MAX_IV_BYTES];
- char cipher_string[ECRYPTFS_MAX_CIPHER_NAME_SIZE];
+ char cipher_string[ECRYPTFS_MAX_CIPHER_NAME_SIZE + 1];
};
/**
diff --git a/fs/ecryptfs/main.c b/fs/ecryptfs/main.c
index 1895d60f4122..c095d3264259 100644
--- a/fs/ecryptfs/main.c
+++ b/fs/ecryptfs/main.c
@@ -407,7 +407,7 @@ static int ecryptfs_parse_options(struct ecryptfs_sb_info *sbi, char *options,
if (!cipher_name_set) {
int cipher_name_len = strlen(ECRYPTFS_DEFAULT_CIPHER);
- BUG_ON(cipher_name_len >= ECRYPTFS_MAX_CIPHER_NAME_SIZE);
+ BUG_ON(cipher_name_len > ECRYPTFS_MAX_CIPHER_NAME_SIZE);
strcpy(mount_crypt_stat->global_default_cipher_name,
ECRYPTFS_DEFAULT_CIPHER);
}
diff --git a/fs/locks.c b/fs/locks.c
index 365c82e1b3a9..528fedfda15e 100644
--- a/fs/locks.c
+++ b/fs/locks.c
@@ -1665,7 +1665,8 @@ generic_add_lease(struct file *filp, long arg, struct file_lock **flp, void **pr
}
if (my_fl != NULL) {
- error = lease->fl_lmops->lm_change(my_fl, arg, &dispose);
+ lease = my_fl;
+ error = lease->fl_lmops->lm_change(lease, arg, &dispose);
if (error)
goto out;
goto out_setup;
@@ -1727,7 +1728,7 @@ static int generic_delete_lease(struct file *filp, void *owner)
break;
}
}
- trace_generic_delete_lease(inode, fl);
+ trace_generic_delete_lease(inode, victim);
if (victim)
error = fl->fl_lmops->lm_change(victim, F_UNLCK, &dispose);
spin_unlock(&ctx->flc_lock);
diff --git a/fs/nfs/client.c b/fs/nfs/client.c
index f9f4845db989..19874151e95c 100644
--- a/fs/nfs/client.c
+++ b/fs/nfs/client.c
@@ -433,7 +433,7 @@ static struct nfs_client *nfs_match_client(const struct nfs_client_initdata *dat
static bool nfs_client_init_is_complete(const struct nfs_client *clp)
{
- return clp->cl_cons_state != NFS_CS_INITING;
+ return clp->cl_cons_state <= NFS_CS_READY;
}
int nfs_wait_client_init_complete(const struct nfs_client *clp)
diff --git a/fs/nfs/delegation.c b/fs/nfs/delegation.c
index a1f0685b42ff..a6ad68865880 100644
--- a/fs/nfs/delegation.c
+++ b/fs/nfs/delegation.c
@@ -181,8 +181,8 @@ void nfs_inode_reclaim_delegation(struct inode *inode, struct rpc_cred *cred,
clear_bit(NFS_DELEGATION_NEED_RECLAIM,
&delegation->flags);
spin_unlock(&delegation->lock);
- put_rpccred(oldcred);
rcu_read_unlock();
+ put_rpccred(oldcred);
trace_nfs4_reclaim_delegation(inode, res->delegation_type);
} else {
/* We appear to have raced with a delegation return. */
@@ -370,7 +370,10 @@ int nfs_inode_set_delegation(struct inode *inode, struct rpc_cred *cred, struct
delegation = NULL;
goto out;
}
- freeme = nfs_detach_delegation_locked(nfsi,
+ if (test_and_set_bit(NFS_DELEGATION_RETURNING,
+ &old_delegation->flags))
+ goto out;
+ freeme = nfs_detach_delegation_locked(nfsi,
old_delegation, clp);
if (freeme == NULL)
goto out;
@@ -433,6 +436,8 @@ static bool nfs_delegation_need_return(struct nfs_delegation *delegation)
{
bool ret = false;
+ if (test_bit(NFS_DELEGATION_RETURNING, &delegation->flags))
+ goto out;
if (test_and_clear_bit(NFS_DELEGATION_RETURN, &delegation->flags))
ret = true;
if (test_and_clear_bit(NFS_DELEGATION_RETURN_IF_CLOSED, &delegation->flags) && !ret) {
@@ -444,6 +449,7 @@ static bool nfs_delegation_need_return(struct nfs_delegation *delegation)
ret = true;
spin_unlock(&delegation->lock);
}
+out:
return ret;
}
@@ -471,14 +477,20 @@ restart:
super_list) {
if (!nfs_delegation_need_return(delegation))
continue;
- inode = nfs_delegation_grab_inode(delegation);
- if (inode == NULL)
+ if (!nfs_sb_active(server->super))
continue;
+ inode = nfs_delegation_grab_inode(delegation);
+ if (inode == NULL) {
+ rcu_read_unlock();
+ nfs_sb_deactive(server->super);
+ goto restart;
+ }
delegation = nfs_start_delegation_return_locked(NFS_I(inode));
rcu_read_unlock();
err = nfs_end_delegation_return(inode, delegation, 0);
iput(inode);
+ nfs_sb_deactive(server->super);
if (!err)
goto restart;
set_bit(NFS4CLNT_DELEGRETURN, &clp->cl_state);
@@ -809,19 +821,30 @@ restart:
list_for_each_entry_rcu(server, &clp->cl_superblocks, client_link) {
list_for_each_entry_rcu(delegation, &server->delegations,
super_list) {
+ if (test_bit(NFS_DELEGATION_RETURNING,
+ &delegation->flags))
+ continue;
if (test_bit(NFS_DELEGATION_NEED_RECLAIM,
&delegation->flags) == 0)
continue;
- inode = nfs_delegation_grab_inode(delegation);
- if (inode == NULL)
+ if (!nfs_sb_active(server->super))
continue;
- delegation = nfs_detach_delegation(NFS_I(inode),
- delegation, server);
+ inode = nfs_delegation_grab_inode(delegation);
+ if (inode == NULL) {
+ rcu_read_unlock();
+ nfs_sb_deactive(server->super);
+ goto restart;
+ }
+ delegation = nfs_start_delegation_return_locked(NFS_I(inode));
rcu_read_unlock();
-
- if (delegation != NULL)
- nfs_free_delegation(delegation);
+ if (delegation != NULL) {
+ delegation = nfs_detach_delegation(NFS_I(inode),
+ delegation, server);
+ if (delegation != NULL)
+ nfs_free_delegation(delegation);
+ }
iput(inode);
+ nfs_sb_deactive(server->super);
goto restart;
}
}
diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index 9b0c55cb2a2e..c19e16f0b2d0 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -408,14 +408,22 @@ static int xdr_decode(nfs_readdir_descriptor_t *desc,
return 0;
}
+/* Match file and dirent using either filehandle or fileid
+ * Note: caller is responsible for checking the fsid
+ */
static
int nfs_same_file(struct dentry *dentry, struct nfs_entry *entry)
{
+ struct nfs_inode *nfsi;
+
if (dentry->d_inode == NULL)
goto different;
- if (nfs_compare_fh(entry->fh, NFS_FH(dentry->d_inode)) != 0)
- goto different;
- return 1;
+
+ nfsi = NFS_I(dentry->d_inode);
+ if (entry->fattr->fileid == nfsi->fileid)
+ return 1;
+ if (nfs_compare_fh(entry->fh, &nfsi->fh) == 0)
+ return 1;
different:
return 0;
}
@@ -469,6 +477,10 @@ void nfs_prime_dcache(struct dentry *parent, struct nfs_entry *entry)
struct inode *inode;
int status;
+ if (!(entry->fattr->valid & NFS_ATTR_FATTR_FILEID))
+ return;
+ if (!(entry->fattr->valid & NFS_ATTR_FATTR_FSID))
+ return;
if (filename.name[0] == '.') {
if (filename.len == 1)
return;
@@ -479,6 +491,10 @@ void nfs_prime_dcache(struct dentry *parent, struct nfs_entry *entry)
dentry = d_lookup(parent, &filename);
if (dentry != NULL) {
+ /* Is there a mountpoint here? If so, just exit */
+ if (!nfs_fsid_equal(&NFS_SB(dentry->d_sb)->fsid,
+ &entry->fattr->fsid))
+ goto out;
if (nfs_same_file(dentry, entry)) {
nfs_set_verifier(dentry, nfs_save_change_attribute(dir));
status = nfs_refresh_inode(dentry->d_inode, entry->fattr);
diff --git a/fs/nfs/file.c b/fs/nfs/file.c
index 94712fc781fa..e679d24c39d3 100644
--- a/fs/nfs/file.c
+++ b/fs/nfs/file.c
@@ -178,7 +178,7 @@ nfs_file_read(struct kiocb *iocb, struct iov_iter *to)
iocb->ki_filp,
iov_iter_count(to), (unsigned long) iocb->ki_pos);
- result = nfs_revalidate_mapping(inode, iocb->ki_filp->f_mapping);
+ result = nfs_revalidate_mapping_protected(inode, iocb->ki_filp->f_mapping);
if (!result) {
result = generic_file_read_iter(iocb, to);
if (result > 0)
@@ -199,7 +199,7 @@ nfs_file_splice_read(struct file *filp, loff_t *ppos,
dprintk("NFS: splice_read(%pD2, %lu@%Lu)\n",
filp, (unsigned long) count, (unsigned long long) *ppos);
- res = nfs_revalidate_mapping(inode, filp->f_mapping);
+ res = nfs_revalidate_mapping_protected(inode, filp->f_mapping);
if (!res) {
res = generic_file_splice_read(filp, ppos, pipe, count, flags);
if (res > 0)
@@ -372,6 +372,10 @@ start:
nfs_wait_bit_killable, TASK_KILLABLE);
if (ret)
return ret;
+ /*
+ * Wait for O_DIRECT to complete
+ */
+ nfs_inode_dio_wait(mapping->host);
page = grab_cache_page_write_begin(mapping, index, flags);
if (!page)
@@ -619,6 +623,9 @@ static int nfs_vm_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
/* make sure the cache has finished storing the page */
nfs_fscache_wait_on_page_write(NFS_I(inode), page);
+ wait_on_bit_action(&NFS_I(inode)->flags, NFS_INO_INVALIDATING,
+ nfs_wait_bit_killable, TASK_KILLABLE);
+
lock_page(page);
mapping = page_file_mapping(page);
if (mapping != inode->i_mapping)
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index 83107be3dd01..d42dff6d5e98 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -556,6 +556,7 @@ EXPORT_SYMBOL_GPL(nfs_setattr);
* This is a copy of the common vmtruncate, but with the locking
* corrected to take into account the fact that NFS requires
* inode->i_size to be updated under the inode->i_lock.
+ * Note: must be called with inode->i_lock held!
*/
static int nfs_vmtruncate(struct inode * inode, loff_t offset)
{
@@ -565,14 +566,14 @@ static int nfs_vmtruncate(struct inode * inode, loff_t offset)
if (err)
goto out;
- spin_lock(&inode->i_lock);
i_size_write(inode, offset);
/* Optimisation */
if (offset == 0)
NFS_I(inode)->cache_validity &= ~NFS_INO_INVALID_DATA;
- spin_unlock(&inode->i_lock);
+ spin_unlock(&inode->i_lock);
truncate_pagecache(inode, offset);
+ spin_lock(&inode->i_lock);
out:
return err;
}
@@ -585,10 +586,15 @@ out:
* Note: we do this in the *proc.c in order to ensure that
* it works for things like exclusive creates too.
*/
-void nfs_setattr_update_inode(struct inode *inode, struct iattr *attr)
+void nfs_setattr_update_inode(struct inode *inode, struct iattr *attr,
+ struct nfs_fattr *fattr)
{
+ /* Barrier: bump the attribute generation count. */
+ nfs_fattr_set_barrier(fattr);
+
+ spin_lock(&inode->i_lock);
+ NFS_I(inode)->attr_gencount = fattr->gencount;
if ((attr->ia_valid & (ATTR_MODE|ATTR_UID|ATTR_GID)) != 0) {
- spin_lock(&inode->i_lock);
if ((attr->ia_valid & ATTR_MODE) != 0) {
int mode = attr->ia_mode & S_IALLUGO;
mode |= inode->i_mode & ~S_IALLUGO;
@@ -600,12 +606,13 @@ void nfs_setattr_update_inode(struct inode *inode, struct iattr *attr)
inode->i_gid = attr->ia_gid;
nfs_set_cache_invalid(inode, NFS_INO_INVALID_ACCESS
| NFS_INO_INVALID_ACL);
- spin_unlock(&inode->i_lock);
}
if ((attr->ia_valid & ATTR_SIZE) != 0) {
nfs_inc_stats(inode, NFSIOS_SETATTRTRUNC);
nfs_vmtruncate(inode, attr->ia_size);
}
+ nfs_update_inode(inode, fattr);
+ spin_unlock(&inode->i_lock);
}
EXPORT_SYMBOL_GPL(nfs_setattr_update_inode);
@@ -1028,6 +1035,7 @@ static int nfs_invalidate_mapping(struct inode *inode, struct address_space *map
if (mapping->nrpages != 0) {
if (S_ISREG(inode->i_mode)) {
+ unmap_mapping_range(mapping, 0, 0, 0);
ret = nfs_sync_mapping(mapping);
if (ret < 0)
return ret;
@@ -1060,11 +1068,14 @@ static bool nfs_mapping_need_revalidate_inode(struct inode *inode)
}
/**
- * nfs_revalidate_mapping - Revalidate the pagecache
+ * __nfs_revalidate_mapping - Revalidate the pagecache
* @inode - pointer to host inode
* @mapping - pointer to mapping
+ * @may_lock - take inode->i_mutex?
*/
-int nfs_revalidate_mapping(struct inode *inode, struct address_space *mapping)
+static int __nfs_revalidate_mapping(struct inode *inode,
+ struct address_space *mapping,
+ bool may_lock)
{
struct nfs_inode *nfsi = NFS_I(inode);
unsigned long *bitlock = &nfsi->flags;
@@ -1113,7 +1124,12 @@ int nfs_revalidate_mapping(struct inode *inode, struct address_space *mapping)
nfsi->cache_validity &= ~NFS_INO_INVALID_DATA;
spin_unlock(&inode->i_lock);
trace_nfs_invalidate_mapping_enter(inode);
- ret = nfs_invalidate_mapping(inode, mapping);
+ if (may_lock) {
+ mutex_lock(&inode->i_mutex);
+ ret = nfs_invalidate_mapping(inode, mapping);
+ mutex_unlock(&inode->i_mutex);
+ } else
+ ret = nfs_invalidate_mapping(inode, mapping);
trace_nfs_invalidate_mapping_exit(inode, ret);
clear_bit_unlock(NFS_INO_INVALIDATING, bitlock);
@@ -1123,6 +1139,29 @@ out:
return ret;
}
+/**
+ * nfs_revalidate_mapping - Revalidate the pagecache
+ * @inode - pointer to host inode
+ * @mapping - pointer to mapping
+ */
+int nfs_revalidate_mapping(struct inode *inode, struct address_space *mapping)
+{
+ return __nfs_revalidate_mapping(inode, mapping, false);
+}
+
+/**
+ * nfs_revalidate_mapping_protected - Revalidate the pagecache
+ * @inode - pointer to host inode
+ * @mapping - pointer to mapping
+ *
+ * Differs from nfs_revalidate_mapping() in that it grabs the inode->i_mutex
+ * while invalidating the mapping.
+ */
+int nfs_revalidate_mapping_protected(struct inode *inode, struct address_space *mapping)
+{
+ return __nfs_revalidate_mapping(inode, mapping, true);
+}
+
static unsigned long nfs_wcc_update_inode(struct inode *inode, struct nfs_fattr *fattr)
{
struct nfs_inode *nfsi = NFS_I(inode);
@@ -1231,13 +1270,6 @@ static int nfs_ctime_need_update(const struct inode *inode, const struct nfs_fat
return timespec_compare(&fattr->ctime, &inode->i_ctime) > 0;
}
-static int nfs_size_need_update(const struct inode *inode, const struct nfs_fattr *fattr)
-{
- if (!(fattr->valid & NFS_ATTR_FATTR_SIZE))
- return 0;
- return nfs_size_to_loff_t(fattr->size) > i_size_read(inode);
-}
-
static atomic_long_t nfs_attr_generation_counter;
static unsigned long nfs_read_attr_generation_counter(void)
@@ -1249,6 +1281,7 @@ unsigned long nfs_inc_attr_generation_counter(void)
{
return atomic_long_inc_return(&nfs_attr_generation_counter);
}
+EXPORT_SYMBOL_GPL(nfs_inc_attr_generation_counter);
void nfs_fattr_init(struct nfs_fattr *fattr)
{
@@ -1260,6 +1293,22 @@ void nfs_fattr_init(struct nfs_fattr *fattr)
}
EXPORT_SYMBOL_GPL(nfs_fattr_init);
+/**
+ * nfs_fattr_set_barrier
+ * @fattr: attributes
+ *
+ * Used to set a barrier after an attribute was updated. This
+ * barrier ensures that older attributes from RPC calls that may
+ * have raced with our update cannot clobber these new values.
+ * Note that you are still responsible for ensuring that other
+ * operations which change the attribute on the server do not
+ * collide.
+ */
+void nfs_fattr_set_barrier(struct nfs_fattr *fattr)
+{
+ fattr->gencount = nfs_inc_attr_generation_counter();
+}
+
struct nfs_fattr *nfs_alloc_fattr(void)
{
struct nfs_fattr *fattr;
@@ -1370,7 +1419,6 @@ static int nfs_inode_attrs_need_update(const struct inode *inode, const struct n
return ((long)fattr->gencount - (long)nfsi->attr_gencount) > 0 ||
nfs_ctime_need_update(inode, fattr) ||
- nfs_size_need_update(inode, fattr) ||
((long)nfsi->attr_gencount - (long)nfs_read_attr_generation_counter() > 0);
}
@@ -1460,6 +1508,7 @@ int nfs_post_op_update_inode(struct inode *inode, struct nfs_fattr *fattr)
int status;
spin_lock(&inode->i_lock);
+ nfs_fattr_set_barrier(fattr);
status = nfs_post_op_update_inode_locked(inode, fattr);
spin_unlock(&inode->i_lock);
@@ -1468,7 +1517,7 @@ int nfs_post_op_update_inode(struct inode *inode, struct nfs_fattr *fattr)
EXPORT_SYMBOL_GPL(nfs_post_op_update_inode);
/**
- * nfs_post_op_update_inode_force_wcc - try to update the inode attribute cache
+ * nfs_post_op_update_inode_force_wcc_locked - update the inode attribute cache
* @inode - pointer to inode
* @fattr - updated attributes
*
@@ -1478,11 +1527,10 @@ EXPORT_SYMBOL_GPL(nfs_post_op_update_inode);
*
* This function is mainly designed to be used by the ->write_done() functions.
*/
-int nfs_post_op_update_inode_force_wcc(struct inode *inode, struct nfs_fattr *fattr)
+int nfs_post_op_update_inode_force_wcc_locked(struct inode *inode, struct nfs_fattr *fattr)
{
int status;
- spin_lock(&inode->i_lock);
/* Don't do a WCC update if these attributes are already stale */
if ((fattr->valid & NFS_ATTR_FATTR) == 0 ||
!nfs_inode_attrs_need_update(inode, fattr)) {
@@ -1514,6 +1562,27 @@ int nfs_post_op_update_inode_force_wcc(struct inode *inode, struct nfs_fattr *fa
}
out_noforce:
status = nfs_post_op_update_inode_locked(inode, fattr);
+ return status;
+}
+
+/**
+ * nfs_post_op_update_inode_force_wcc - try to update the inode attribute cache
+ * @inode - pointer to inode
+ * @fattr - updated attributes
+ *
+ * After an operation that has changed the inode metadata, mark the
+ * attribute cache as being invalid, then try to update it. Fake up
+ * weak cache consistency data, if none exist.
+ *
+ * This function is mainly designed to be used by the ->write_done() functions.
+ */
+int nfs_post_op_update_inode_force_wcc(struct inode *inode, struct nfs_fattr *fattr)
+{
+ int status;
+
+ spin_lock(&inode->i_lock);
+ nfs_fattr_set_barrier(fattr);
+ status = nfs_post_op_update_inode_force_wcc_locked(inode, fattr);
spin_unlock(&inode->i_lock);
return status;
}
@@ -1715,6 +1784,7 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr)
nfs_inc_stats(inode, NFSIOS_ATTRINVALIDATE);
nfsi->attrtimeo = NFS_MINATTRTIMEO(inode);
nfsi->attrtimeo_timestamp = now;
+ /* Set barrier to be more recent than all outstanding updates */
nfsi->attr_gencount = nfs_inc_attr_generation_counter();
} else {
if (!time_in_range_open(now, nfsi->attrtimeo_timestamp, nfsi->attrtimeo_timestamp + nfsi->attrtimeo)) {
@@ -1722,6 +1792,9 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr)
nfsi->attrtimeo = NFS_MAXATTRTIMEO(inode);
nfsi->attrtimeo_timestamp = now;
}
+ /* Set the barrier to be more recent than this fattr */
+ if ((long)fattr->gencount - (long)nfsi->attr_gencount > 0)
+ nfsi->attr_gencount = fattr->gencount;
}
invalid &= ~NFS_INO_INVALID_ATTR;
/* Don't invalidate the data if we were to blame */
diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h
index b802fb3a2d99..9e6475bc5ba2 100644
--- a/fs/nfs/internal.h
+++ b/fs/nfs/internal.h
@@ -459,6 +459,7 @@ void nfs_mark_request_commit(struct nfs_page *req,
struct nfs_commit_info *cinfo,
u32 ds_commit_idx);
int nfs_write_need_commit(struct nfs_pgio_header *);
+void nfs_writeback_update_inode(struct nfs_pgio_header *hdr);
int nfs_generic_commit_list(struct inode *inode, struct list_head *head,
int how, struct nfs_commit_info *cinfo);
void nfs_retry_commit(struct list_head *page_list,
diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c
index 78e557c3ab87..1f11d2533ee4 100644
--- a/fs/nfs/nfs3proc.c
+++ b/fs/nfs/nfs3proc.c
@@ -138,7 +138,7 @@ nfs3_proc_setattr(struct dentry *dentry, struct nfs_fattr *fattr,
nfs_fattr_init(fattr);
status = rpc_call_sync(NFS_CLIENT(inode), &msg, 0);
if (status == 0)
- nfs_setattr_update_inode(inode, sattr);
+ nfs_setattr_update_inode(inode, sattr, fattr);
dprintk("NFS reply setattr: %d\n", status);
return status;
}
@@ -834,7 +834,7 @@ static int nfs3_write_done(struct rpc_task *task, struct nfs_pgio_header *hdr)
if (nfs3_async_handle_jukebox(task, inode))
return -EAGAIN;
if (task->tk_status >= 0)
- nfs_post_op_update_inode_force_wcc(inode, hdr->res.fattr);
+ nfs_writeback_update_inode(hdr);
return 0;
}
diff --git a/fs/nfs/nfs3xdr.c b/fs/nfs/nfs3xdr.c
index 2a932fdc57cb..53852a4bd88b 100644
--- a/fs/nfs/nfs3xdr.c
+++ b/fs/nfs/nfs3xdr.c
@@ -1987,6 +1987,11 @@ int nfs3_decode_dirent(struct xdr_stream *xdr, struct nfs_entry *entry,
if (entry->fattr->valid & NFS_ATTR_FATTR_V3)
entry->d_type = nfs_umode_to_dtype(entry->fattr->mode);
+ if (entry->fattr->fileid != entry->ino) {
+ entry->fattr->mounted_on_fileid = entry->ino;
+ entry->fattr->valid |= NFS_ATTR_FATTR_MOUNTED_ON_FILEID;
+ }
+
/* In fact, a post_op_fh3: */
p = xdr_inline_decode(xdr, 4);
if (unlikely(p == NULL))
diff --git a/fs/nfs/nfs4client.c b/fs/nfs/nfs4client.c
index 8646af9b11d2..86d6214ea022 100644
--- a/fs/nfs/nfs4client.c
+++ b/fs/nfs/nfs4client.c
@@ -621,6 +621,9 @@ int nfs41_walk_client_list(struct nfs_client *new,
spin_lock(&nn->nfs_client_lock);
list_for_each_entry(pos, &nn->nfs_client_list, cl_share_link) {
+ if (pos == new)
+ goto found;
+
if (pos->rpc_ops != new->rpc_ops)
continue;
@@ -639,10 +642,6 @@ int nfs41_walk_client_list(struct nfs_client *new,
prev = pos;
status = nfs_wait_client_init_complete(pos);
- if (pos->cl_cons_state == NFS_CS_SESSION_INITING) {
- nfs4_schedule_lease_recovery(pos);
- status = nfs4_wait_clnt_recover(pos);
- }
spin_lock(&nn->nfs_client_lock);
if (status < 0)
break;
@@ -668,7 +667,7 @@ int nfs41_walk_client_list(struct nfs_client *new,
*/
if (!nfs4_match_client_owner_id(pos, new))
continue;
-
+found:
atomic_inc(&pos->cl_count);
*result = pos;
status = 0;
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 88180ac5ea0e..627f37c44456 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -901,6 +901,7 @@ static void update_changeattr(struct inode *dir, struct nfs4_change_info *cinfo)
if (!cinfo->atomic || cinfo->before != dir->i_version)
nfs_force_lookup_revalidate(dir);
dir->i_version = cinfo->after;
+ nfsi->attr_gencount = nfs_inc_attr_generation_counter();
nfs_fscache_invalidate(dir);
spin_unlock(&dir->i_lock);
}
@@ -1552,6 +1553,9 @@ static int nfs4_open_recover_helper(struct nfs4_opendata *opendata, fmode_t fmod
opendata->o_arg.open_flags = 0;
opendata->o_arg.fmode = fmode;
+ opendata->o_arg.share_access = nfs4_map_atomic_open_share(
+ NFS_SB(opendata->dentry->d_sb),
+ fmode, 0);
memset(&opendata->o_res, 0, sizeof(opendata->o_res));
memset(&opendata->c_res, 0, sizeof(opendata->c_res));
nfs4_init_opendata_res(opendata);
@@ -2413,8 +2417,8 @@ static int _nfs4_do_open(struct inode *dir,
opendata->o_res.f_attr, sattr,
state, label, olabel);
if (status == 0) {
- nfs_setattr_update_inode(state->inode, sattr);
- nfs_post_op_update_inode(state->inode, opendata->o_res.f_attr);
+ nfs_setattr_update_inode(state->inode, sattr,
+ opendata->o_res.f_attr);
nfs_setsecurity(state->inode, opendata->o_res.f_attr, olabel);
}
}
@@ -2651,7 +2655,7 @@ static void nfs4_close_done(struct rpc_task *task, void *data)
case -NFS4ERR_BAD_STATEID:
case -NFS4ERR_EXPIRED:
if (!nfs4_stateid_match(&calldata->arg.stateid,
- &state->stateid)) {
+ &state->open_stateid)) {
rpc_restart_call_prepare(task);
goto out_release;
}
@@ -2687,7 +2691,7 @@ static void nfs4_close_prepare(struct rpc_task *task, void *data)
is_rdwr = test_bit(NFS_O_RDWR_STATE, &state->flags);
is_rdonly = test_bit(NFS_O_RDONLY_STATE, &state->flags);
is_wronly = test_bit(NFS_O_WRONLY_STATE, &state->flags);
- nfs4_stateid_copy(&calldata->arg.stateid, &state->stateid);
+ nfs4_stateid_copy(&calldata->arg.stateid, &state->open_stateid);
/* Calculate the change in open mode */
calldata->arg.fmode = 0;
if (state->n_rdwr == 0) {
@@ -3288,7 +3292,7 @@ nfs4_proc_setattr(struct dentry *dentry, struct nfs_fattr *fattr,
status = nfs4_do_setattr(inode, cred, fattr, sattr, state, NULL, label);
if (status == 0) {
- nfs_setattr_update_inode(inode, sattr);
+ nfs_setattr_update_inode(inode, sattr, fattr);
nfs_setsecurity(inode, fattr, label);
}
nfs4_label_free(label);
@@ -4234,7 +4238,7 @@ static int nfs4_write_done_cb(struct rpc_task *task,
}
if (task->tk_status >= 0) {
renew_lease(NFS_SERVER(inode), hdr->timestamp);
- nfs_post_op_update_inode_force_wcc(inode, &hdr->fattr);
+ nfs_writeback_update_inode(hdr);
}
return 0;
}
@@ -6893,9 +6897,13 @@ static int _nfs4_proc_exchange_id(struct nfs_client *clp, struct rpc_cred *cred,
if (status == 0) {
clp->cl_clientid = res.clientid;
- clp->cl_exchange_flags = (res.flags & ~EXCHGID4_FLAG_CONFIRMED_R);
- if (!(res.flags & EXCHGID4_FLAG_CONFIRMED_R))
+ clp->cl_exchange_flags = res.flags;
+ /* Client ID is not confirmed */
+ if (!(res.flags & EXCHGID4_FLAG_CONFIRMED_R)) {
+ clear_bit(NFS4_SESSION_ESTABLISHED,
+ &clp->cl_session->session_state);
clp->cl_seqid = res.seqid;
+ }
kfree(clp->cl_serverowner);
clp->cl_serverowner = res.server_owner;
@@ -7227,6 +7235,9 @@ static void nfs4_update_session(struct nfs4_session *session,
struct nfs41_create_session_res *res)
{
nfs4_copy_sessionid(&session->sess_id, &res->sessionid);
+ /* Mark client id and session as being confirmed */
+ session->clp->cl_exchange_flags |= EXCHGID4_FLAG_CONFIRMED_R;
+ set_bit(NFS4_SESSION_ESTABLISHED, &session->session_state);
session->flags = res->flags;
memcpy(&session->fc_attrs, &res->fc_attrs, sizeof(session->fc_attrs));
if (res->flags & SESSION4_BACK_CHAN)
@@ -7322,8 +7333,8 @@ int nfs4_proc_destroy_session(struct nfs4_session *session,
dprintk("--> nfs4_proc_destroy_session\n");
/* session is still being setup */
- if (session->clp->cl_cons_state != NFS_CS_READY)
- return status;
+ if (!test_and_clear_bit(NFS4_SESSION_ESTABLISHED, &session->session_state))
+ return 0;
status = rpc_call_sync(session->clp->cl_rpcclient, &msg, RPC_TASK_TIMEOUT);
trace_nfs4_destroy_session(session->clp, status);
diff --git a/fs/nfs/nfs4session.h b/fs/nfs/nfs4session.h
index fc46c7455898..e3ea2c5324d6 100644
--- a/fs/nfs/nfs4session.h
+++ b/fs/nfs/nfs4session.h
@@ -70,6 +70,7 @@ struct nfs4_session {
enum nfs4_session_state {
NFS4_SESSION_INITING,
+ NFS4_SESSION_ESTABLISHED,
};
extern int nfs4_setup_slot_table(struct nfs4_slot_table *tbl,
diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c
index 5ad908e9ce9c..f95e3b58bbc3 100644
--- a/fs/nfs/nfs4state.c
+++ b/fs/nfs/nfs4state.c
@@ -346,9 +346,23 @@ int nfs41_discover_server_trunking(struct nfs_client *clp,
status = nfs4_proc_exchange_id(clp, cred);
if (status != NFS4_OK)
return status;
- set_bit(NFS4CLNT_LEASE_CONFIRM, &clp->cl_state);
- return nfs41_walk_client_list(clp, result, cred);
+ status = nfs41_walk_client_list(clp, result, cred);
+ if (status < 0)
+ return status;
+ if (clp != *result)
+ return 0;
+
+ /* Purge state if the client id was established in a prior instance */
+ if (clp->cl_exchange_flags & EXCHGID4_FLAG_CONFIRMED_R)
+ set_bit(NFS4CLNT_PURGE_STATE, &clp->cl_state);
+ else
+ set_bit(NFS4CLNT_LEASE_CONFIRM, &clp->cl_state);
+ nfs4_schedule_state_manager(clp);
+ status = nfs_wait_client_init_complete(clp);
+ if (status < 0)
+ nfs_put_client(clp);
+ return status;
}
#endif /* CONFIG_NFS_V4_1 */
diff --git a/fs/nfs/proc.c b/fs/nfs/proc.c
index b09cc23d6f43..c63189acd052 100644
--- a/fs/nfs/proc.c
+++ b/fs/nfs/proc.c
@@ -139,7 +139,7 @@ nfs_proc_setattr(struct dentry *dentry, struct nfs_fattr *fattr,
nfs_fattr_init(fattr);
status = rpc_call_sync(NFS_CLIENT(inode), &msg, 0);
if (status == 0)
- nfs_setattr_update_inode(inode, sattr);
+ nfs_setattr_update_inode(inode, sattr, fattr);
dprintk("NFS reply setattr: %d\n", status);
return status;
}
@@ -609,10 +609,8 @@ static int nfs_proc_pgio_rpc_prepare(struct rpc_task *task,
static int nfs_write_done(struct rpc_task *task, struct nfs_pgio_header *hdr)
{
- struct inode *inode = hdr->inode;
-
if (task->tk_status >= 0)
- nfs_post_op_update_inode_force_wcc(inode, hdr->res.fattr);
+ nfs_writeback_update_inode(hdr);
return 0;
}
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index 595d81e354d1..849ed784d6ac 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -1377,6 +1377,36 @@ static int nfs_should_remove_suid(const struct inode *inode)
return 0;
}
+static void nfs_writeback_check_extend(struct nfs_pgio_header *hdr,
+ struct nfs_fattr *fattr)
+{
+ struct nfs_pgio_args *argp = &hdr->args;
+ struct nfs_pgio_res *resp = &hdr->res;
+
+ if (!(fattr->valid & NFS_ATTR_FATTR_SIZE))
+ return;
+ if (argp->offset + resp->count != fattr->size)
+ return;
+ if (nfs_size_to_loff_t(fattr->size) < i_size_read(hdr->inode))
+ return;
+ /* Set attribute barrier */
+ nfs_fattr_set_barrier(fattr);
+}
+
+void nfs_writeback_update_inode(struct nfs_pgio_header *hdr)
+{
+ struct nfs_fattr *fattr = hdr->res.fattr;
+ struct inode *inode = hdr->inode;
+
+ if (fattr == NULL)
+ return;
+ spin_lock(&inode->i_lock);
+ nfs_writeback_check_extend(hdr, fattr);
+ nfs_post_op_update_inode_force_wcc_locked(inode, fattr);
+ spin_unlock(&inode->i_lock);
+}
+EXPORT_SYMBOL_GPL(nfs_writeback_update_inode);
+
/*
* This function is called when the WRITE call is complete.
*/
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index f6b2a09f793f..d2f2c37dc2db 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -1638,7 +1638,7 @@ __destroy_client(struct nfs4_client *clp)
nfs4_put_stid(&dp->dl_stid);
}
while (!list_empty(&clp->cl_revoked)) {
- dp = list_entry(reaplist.next, struct nfs4_delegation, dl_recall_lru);
+ dp = list_entry(clp->cl_revoked.next, struct nfs4_delegation, dl_recall_lru);
list_del_init(&dp->dl_recall_lru);
nfs4_put_stid(&dp->dl_stid);
}
diff --git a/fs/nilfs2/btree.c b/fs/nilfs2/btree.c
index b2e3ff347620..ecdbae19a766 100644
--- a/fs/nilfs2/btree.c
+++ b/fs/nilfs2/btree.c
@@ -31,6 +31,8 @@
#include "alloc.h"
#include "dat.h"
+static void __nilfs_btree_init(struct nilfs_bmap *bmap);
+
static struct nilfs_btree_path *nilfs_btree_alloc_path(void)
{
struct nilfs_btree_path *path;
@@ -368,6 +370,34 @@ static int nilfs_btree_node_broken(const struct nilfs_btree_node *node,
return ret;
}
+/**
+ * nilfs_btree_root_broken - verify consistency of btree root node
+ * @node: btree root node to be examined
+ * @ino: inode number
+ *
+ * Return Value: If node is broken, 1 is returned. Otherwise, 0 is returned.
+ */
+static int nilfs_btree_root_broken(const struct nilfs_btree_node *node,
+ unsigned long ino)
+{
+ int level, flags, nchildren;
+ int ret = 0;
+
+ level = nilfs_btree_node_get_level(node);
+ flags = nilfs_btree_node_get_flags(node);
+ nchildren = nilfs_btree_node_get_nchildren(node);
+
+ if (unlikely(level < NILFS_BTREE_LEVEL_NODE_MIN ||
+ level > NILFS_BTREE_LEVEL_MAX ||
+ nchildren < 0 ||
+ nchildren > NILFS_BTREE_ROOT_NCHILDREN_MAX)) {
+ pr_crit("NILFS: bad btree root (inode number=%lu): level = %d, flags = 0x%x, nchildren = %d\n",
+ ino, level, flags, nchildren);
+ ret = 1;
+ }
+ return ret;
+}
+
int nilfs_btree_broken_node_block(struct buffer_head *bh)
{
int ret;
@@ -1713,7 +1743,7 @@ nilfs_btree_commit_convert_and_insert(struct nilfs_bmap *btree,
/* convert and insert */
dat = NILFS_BMAP_USE_VBN(btree) ? nilfs_bmap_get_dat(btree) : NULL;
- nilfs_btree_init(btree);
+ __nilfs_btree_init(btree);
if (nreq != NULL) {
nilfs_bmap_commit_alloc_ptr(btree, dreq, dat);
nilfs_bmap_commit_alloc_ptr(btree, nreq, dat);
@@ -2294,12 +2324,23 @@ static const struct nilfs_bmap_operations nilfs_btree_ops_gc = {
.bop_gather_data = NULL,
};
-int nilfs_btree_init(struct nilfs_bmap *bmap)
+static void __nilfs_btree_init(struct nilfs_bmap *bmap)
{
bmap->b_ops = &nilfs_btree_ops;
bmap->b_nchildren_per_block =
NILFS_BTREE_NODE_NCHILDREN_MAX(nilfs_btree_node_size(bmap));
- return 0;
+}
+
+int nilfs_btree_init(struct nilfs_bmap *bmap)
+{
+ int ret = 0;
+
+ __nilfs_btree_init(bmap);
+
+ if (nilfs_btree_root_broken(nilfs_btree_get_root(bmap),
+ bmap->b_inode->i_ino))
+ ret = -EIO;
+ return ret;
}
void nilfs_btree_init_gc(struct nilfs_bmap *bmap)
diff --git a/fs/nilfs2/segment.c b/fs/nilfs2/segment.c
index 469086b9f99b..0c3f303baf32 100644
--- a/fs/nilfs2/segment.c
+++ b/fs/nilfs2/segment.c
@@ -1907,6 +1907,7 @@ static void nilfs_segctor_drop_written_files(struct nilfs_sc_info *sci,
struct the_nilfs *nilfs)
{
struct nilfs_inode_info *ii, *n;
+ int during_mount = !(sci->sc_super->s_flags & MS_ACTIVE);
int defer_iput = false;
spin_lock(&nilfs->ns_inode_lock);
@@ -1919,10 +1920,10 @@ static void nilfs_segctor_drop_written_files(struct nilfs_sc_info *sci,
brelse(ii->i_bh);
ii->i_bh = NULL;
list_del_init(&ii->i_dirty);
- if (!ii->vfs_inode.i_nlink) {
+ if (!ii->vfs_inode.i_nlink || during_mount) {
/*
- * Defer calling iput() to avoid a deadlock
- * over I_SYNC flag for inodes with i_nlink == 0
+ * Defer calling iput() to avoid deadlocks if
+ * i_nlink == 0 or mount is not yet finished.
*/
list_add_tail(&ii->i_dirty, &sci->sc_iput_queue);
defer_iput = true;
diff --git a/fs/notify/fanotify/fanotify.c b/fs/notify/fanotify/fanotify.c
index 9a66ff79ff27..d2f97ecca6a5 100644
--- a/fs/notify/fanotify/fanotify.c
+++ b/fs/notify/fanotify/fanotify.c
@@ -143,7 +143,8 @@ static bool fanotify_should_send_event(struct fsnotify_mark *inode_mark,
!(marks_mask & FS_ISDIR & ~marks_ignored_mask))
return false;
- if (event_mask & marks_mask & ~marks_ignored_mask)
+ if (event_mask & FAN_ALL_OUTGOING_EVENTS & marks_mask &
+ ~marks_ignored_mask)
return true;
return false;
diff --git a/fs/ocfs2/ocfs2.h b/fs/ocfs2/ocfs2.h
index 8490c64d34fe..460c6c37e683 100644
--- a/fs/ocfs2/ocfs2.h
+++ b/fs/ocfs2/ocfs2.h
@@ -502,7 +502,7 @@ static inline int ocfs2_writes_unwritten_extents(struct ocfs2_super *osb)
static inline int ocfs2_supports_append_dio(struct ocfs2_super *osb)
{
- if (osb->s_feature_ro_compat & OCFS2_FEATURE_RO_COMPAT_APPEND_DIO)
+ if (osb->s_feature_incompat & OCFS2_FEATURE_INCOMPAT_APPEND_DIO)
return 1;
return 0;
}
diff --git a/fs/ocfs2/ocfs2_fs.h b/fs/ocfs2/ocfs2_fs.h
index 20e37a3ed26f..db64ce2d4667 100644
--- a/fs/ocfs2/ocfs2_fs.h
+++ b/fs/ocfs2/ocfs2_fs.h
@@ -102,11 +102,11 @@
| OCFS2_FEATURE_INCOMPAT_INDEXED_DIRS \
| OCFS2_FEATURE_INCOMPAT_REFCOUNT_TREE \
| OCFS2_FEATURE_INCOMPAT_DISCONTIG_BG \
- | OCFS2_FEATURE_INCOMPAT_CLUSTERINFO)
+ | OCFS2_FEATURE_INCOMPAT_CLUSTERINFO \
+ | OCFS2_FEATURE_INCOMPAT_APPEND_DIO)
#define OCFS2_FEATURE_RO_COMPAT_SUPP (OCFS2_FEATURE_RO_COMPAT_UNWRITTEN \
| OCFS2_FEATURE_RO_COMPAT_USRQUOTA \
- | OCFS2_FEATURE_RO_COMPAT_GRPQUOTA \
- | OCFS2_FEATURE_RO_COMPAT_APPEND_DIO)
+ | OCFS2_FEATURE_RO_COMPAT_GRPQUOTA)
/*
* Heartbeat-only devices are missing journals and other files. The
@@ -179,6 +179,11 @@
#define OCFS2_FEATURE_INCOMPAT_CLUSTERINFO 0x4000
/*
+ * Append Direct IO support
+ */
+#define OCFS2_FEATURE_INCOMPAT_APPEND_DIO 0x8000
+
+/*
* backup superblock flag is used to indicate that this volume
* has backup superblocks.
*/
@@ -200,10 +205,6 @@
#define OCFS2_FEATURE_RO_COMPAT_USRQUOTA 0x0002
#define OCFS2_FEATURE_RO_COMPAT_GRPQUOTA 0x0004
-/*
- * Append Direct IO support
- */
-#define OCFS2_FEATURE_RO_COMPAT_APPEND_DIO 0x0008
/* The byte offset of the first backup block will be 1G.
* The following will be 4G, 16G, 64G, 256G and 1T.
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index 956b75d61809..6dee68d013ff 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -1325,6 +1325,9 @@ out:
static int pagemap_open(struct inode *inode, struct file *file)
{
+ /* do not disclose physical addresses: attack vector */
+ if (!capable(CAP_SYS_ADMIN))
+ return -EPERM;
pr_warn_once("Bits 55-60 of /proc/PID/pagemap entries are about "
"to stop being page-shift some time soon. See the "
"linux/Documentation/vm/pagemap.txt for details.\n");
diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
index ce615d12fb44..a2e1cb8a568b 100644
--- a/fs/xfs/xfs_file.c
+++ b/fs/xfs/xfs_file.c
@@ -397,7 +397,8 @@ STATIC int /* error (positive) */
xfs_zero_last_block(
struct xfs_inode *ip,
xfs_fsize_t offset,
- xfs_fsize_t isize)
+ xfs_fsize_t isize,
+ bool *did_zeroing)
{
struct xfs_mount *mp = ip->i_mount;
xfs_fileoff_t last_fsb = XFS_B_TO_FSBT(mp, isize);
@@ -425,6 +426,7 @@ xfs_zero_last_block(
zero_len = mp->m_sb.sb_blocksize - zero_offset;
if (isize + zero_len > offset)
zero_len = offset - isize;
+ *did_zeroing = true;
return xfs_iozero(ip, isize, zero_len);
}
@@ -443,7 +445,8 @@ int /* error (positive) */
xfs_zero_eof(
struct xfs_inode *ip,
xfs_off_t offset, /* starting I/O offset */
- xfs_fsize_t isize) /* current inode size */
+ xfs_fsize_t isize, /* current inode size */
+ bool *did_zeroing)
{
struct xfs_mount *mp = ip->i_mount;
xfs_fileoff_t start_zero_fsb;
@@ -465,7 +468,7 @@ xfs_zero_eof(
* We only zero a part of that block so it is handled specially.
*/
if (XFS_B_FSB_OFFSET(mp, isize) != 0) {
- error = xfs_zero_last_block(ip, offset, isize);
+ error = xfs_zero_last_block(ip, offset, isize, did_zeroing);
if (error)
return error;
}
@@ -525,6 +528,7 @@ xfs_zero_eof(
if (error)
return error;
+ *did_zeroing = true;
start_zero_fsb = imap.br_startoff + imap.br_blockcount;
ASSERT(start_zero_fsb <= (end_zero_fsb + 1));
}
@@ -567,13 +571,15 @@ restart:
* having to redo all checks before.
*/
if (*pos > i_size_read(inode)) {
+ bool zero = false;
+
if (*iolock == XFS_IOLOCK_SHARED) {
xfs_rw_iunlock(ip, *iolock);
*iolock = XFS_IOLOCK_EXCL;
xfs_rw_ilock(ip, *iolock);
goto restart;
}
- error = xfs_zero_eof(ip, *pos, i_size_read(inode));
+ error = xfs_zero_eof(ip, *pos, i_size_read(inode), &zero);
if (error)
return error;
}
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index daafa1f6d260..6163767aa856 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -2867,6 +2867,10 @@ xfs_rename(
* Handle RENAME_EXCHANGE flags
*/
if (flags & RENAME_EXCHANGE) {
+ if (target_ip == NULL) {
+ error = -EINVAL;
+ goto error_return;
+ }
error = xfs_cross_rename(tp, src_dp, src_name, src_ip,
target_dp, target_name, target_ip,
&free_list, &first_block, spaceres);
diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h
index 86cd6b39bed7..a1cd55f3f351 100644
--- a/fs/xfs/xfs_inode.h
+++ b/fs/xfs/xfs_inode.h
@@ -384,10 +384,11 @@ enum xfs_prealloc_flags {
XFS_PREALLOC_INVISIBLE = (1 << 4),
};
-int xfs_update_prealloc_flags(struct xfs_inode *,
- enum xfs_prealloc_flags);
-int xfs_zero_eof(struct xfs_inode *, xfs_off_t, xfs_fsize_t);
-int xfs_iozero(struct xfs_inode *, loff_t, size_t);
+int xfs_update_prealloc_flags(struct xfs_inode *ip,
+ enum xfs_prealloc_flags flags);
+int xfs_zero_eof(struct xfs_inode *ip, xfs_off_t offset,
+ xfs_fsize_t isize, bool *did_zeroing);
+int xfs_iozero(struct xfs_inode *ip, loff_t pos, size_t count);
#define IHOLD(ip) \
diff --git a/fs/xfs/xfs_iops.c b/fs/xfs/xfs_iops.c
index d919ad7b16bf..e53a90331422 100644
--- a/fs/xfs/xfs_iops.c
+++ b/fs/xfs/xfs_iops.c
@@ -751,6 +751,7 @@ xfs_setattr_size(
int error;
uint lock_flags = 0;
uint commit_flags = 0;
+ bool did_zeroing = false;
trace_xfs_setattr(ip);
@@ -794,20 +795,16 @@ xfs_setattr_size(
return error;
/*
- * Now we can make the changes. Before we join the inode to the
- * transaction, take care of the part of the truncation that must be
- * done without the inode lock. This needs to be done before joining
- * the inode to the transaction, because the inode cannot be unlocked
- * once it is a part of the transaction.
+ * File data changes must be complete before we start the transaction to
+ * modify the inode. This needs to be done before joining the inode to
+ * the transaction because the inode cannot be unlocked once it is a
+ * part of the transaction.
+ *
+ * Start with zeroing any data block beyond EOF that we may expose on
+ * file extension.
*/
if (newsize > oldsize) {
- /*
- * Do the first part of growing a file: zero any data in the
- * last block that is beyond the old EOF. We need to do this
- * before the inode is joined to the transaction to modify
- * i_size.
- */
- error = xfs_zero_eof(ip, newsize, oldsize);
+ error = xfs_zero_eof(ip, newsize, oldsize, &did_zeroing);
if (error)
return error;
}
@@ -817,23 +814,18 @@ xfs_setattr_size(
* any previous writes that are beyond the on disk EOF and the new
* EOF that have not been written out need to be written here. If we
* do not write the data out, we expose ourselves to the null files
- * problem.
- *
- * Only flush from the on disk size to the smaller of the in memory
- * file size or the new size as that's the range we really care about
- * here and prevents waiting for other data not within the range we
- * care about here.
+ * problem. Note that this includes any block zeroing we did above;
+ * otherwise those blocks may not be zeroed after a crash.
*/
- if (oldsize != ip->i_d.di_size && newsize > ip->i_d.di_size) {
+ if (newsize > ip->i_d.di_size &&
+ (oldsize != ip->i_d.di_size || did_zeroing)) {
error = filemap_write_and_wait_range(VFS_I(ip)->i_mapping,
ip->i_d.di_size, newsize);
if (error)
return error;
}
- /*
- * Wait for all direct I/O to complete.
- */
+ /* Now wait for all direct I/O to complete. */
inode_dio_wait(inode);
/*
diff --git a/fs/xfs/xfs_pnfs.c b/fs/xfs/xfs_pnfs.c
index 4b33ef112400..365dd57ea760 100644
--- a/fs/xfs/xfs_pnfs.c
+++ b/fs/xfs/xfs_pnfs.c
@@ -300,8 +300,10 @@ xfs_fs_commit_blocks(
tp = xfs_trans_alloc(mp, XFS_TRANS_SETATTR_NOT_SIZE);
error = xfs_trans_reserve(tp, &M_RES(mp)->tr_ichange, 0, 0);
- if (error)
+ if (error) {
+ xfs_trans_cancel(tp, 0);
goto out_drop_iolock;
+ }
xfs_ilock(ip, XFS_ILOCK_EXCL);
xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
diff --git a/fs/xfs/xfs_qm.c b/fs/xfs/xfs_qm.c
index 53cc2aaf8d2b..fbbb9e62e274 100644
--- a/fs/xfs/xfs_qm.c
+++ b/fs/xfs/xfs_qm.c
@@ -836,6 +836,11 @@ xfs_qm_reset_dqcounts(
*/
xfs_dqcheck(mp, ddq, id+j, type, XFS_QMOPT_DQREPAIR,
"xfs_quotacheck");
+ /*
+ * Reset type in case we are reusing group quota file for
+ * project quotas or vice versa
+ */
+ ddq->d_flags = type;
ddq->d_bcount = 0;
ddq->d_icount = 0;
ddq->d_rtbcount = 0;