summaryrefslogtreecommitdiff
path: root/fs
diff options
context:
space:
mode:
Diffstat (limited to 'fs')
-rw-r--r--fs/afs/flock.c2
-rw-r--r--fs/afs/fsclient.c2
-rw-r--r--fs/afs/internal.h3
-rw-r--r--fs/afs/misc.c1
-rw-r--r--fs/afs/rxrpc.c7
-rw-r--r--fs/afs/yfsclient.c3
-rw-r--r--fs/btrfs/block-group.c47
-rw-r--r--fs/btrfs/block-group.h4
-rw-r--r--fs/btrfs/ctree.h3
-rw-r--r--fs/btrfs/dev-replace.c5
-rw-r--r--fs/btrfs/disk-io.c1
-rw-r--r--fs/btrfs/extent-tree.c30
-rw-r--r--fs/btrfs/extent_io.c33
-rw-r--r--fs/btrfs/file.c2
-rw-r--r--fs/btrfs/inode.c21
-rw-r--r--fs/btrfs/root-tree.c5
-rw-r--r--fs/btrfs/space-info.c2
-rw-r--r--fs/btrfs/volumes.c8
-rw-r--r--fs/btrfs/xattr.c3
-rw-r--r--fs/btrfs/zoned.c99
-rw-r--r--fs/cachefiles/internal.h1
-rw-r--r--fs/cachefiles/ondemand.c22
-rw-r--r--fs/cifs/cifsencrypt.c3
-rw-r--r--fs/cifs/cifsfs.c6
-rw-r--r--fs/cifs/cifsfs.h4
-rw-r--r--fs/cifs/cifsglob.h7
-rw-r--r--fs/cifs/connect.c37
-rw-r--r--fs/cifs/file.c3
-rw-r--r--fs/cifs/smb2ops.c136
-rw-r--r--fs/cifs/smb2pdu.c28
-rw-r--r--fs/cifs/transport.c27
-rw-r--r--fs/debugfs/inode.c22
-rw-r--r--fs/erofs/fscache.c8
-rw-r--r--fs/erofs/internal.h29
-rw-r--r--fs/erofs/zmap.c16
-rw-r--r--fs/fs-writeback.c12
-rw-r--r--fs/inode.c14
-rw-r--r--fs/locks.c1
-rw-r--r--fs/namespace.c7
-rw-r--r--fs/nfs/dir.c7
-rw-r--r--fs/nfs/file.c15
-rw-r--r--fs/nfs/inode.c1
-rw-r--r--fs/nfs/internal.h25
-rw-r--r--fs/nfs/nfs42proc.c9
-rw-r--r--fs/nfs/nfs4file.c6
-rw-r--r--fs/nfs/pnfs.c1
-rw-r--r--fs/nfs/super.c27
-rw-r--r--fs/nfs/write.c31
-rw-r--r--fs/nfsd/vfs.c31
-rw-r--r--fs/ntfs3/xattr.c16
-rw-r--r--fs/ocfs2/dlmglue.c8
-rw-r--r--fs/ocfs2/super.c3
-rw-r--r--fs/overlayfs/inode.c11
-rw-r--r--fs/posix_acl.c15
-rw-r--r--fs/proc/task_mmu.c7
-rw-r--r--fs/squashfs/file.c2
-rw-r--r--fs/squashfs/file_direct.c2
-rw-r--r--fs/squashfs/page_actor.c34
-rw-r--r--fs/squashfs/page_actor.h5
-rw-r--r--fs/tracefs/inode.c31
-rw-r--r--fs/userfaultfd.c4
61 files changed, 528 insertions, 427 deletions
diff --git a/fs/afs/flock.c b/fs/afs/flock.c
index c4210a3964d8..bbcc5afd1576 100644
--- a/fs/afs/flock.c
+++ b/fs/afs/flock.c
@@ -76,7 +76,7 @@ void afs_lock_op_done(struct afs_call *call)
if (call->error == 0) {
spin_lock(&vnode->lock);
trace_afs_flock_ev(vnode, NULL, afs_flock_timestamp, 0);
- vnode->locked_at = call->reply_time;
+ vnode->locked_at = call->issue_time;
afs_schedule_lock_extension(vnode);
spin_unlock(&vnode->lock);
}
diff --git a/fs/afs/fsclient.c b/fs/afs/fsclient.c
index 4943413d9c5f..7d37f63ef0f0 100644
--- a/fs/afs/fsclient.c
+++ b/fs/afs/fsclient.c
@@ -131,7 +131,7 @@ bad:
static time64_t xdr_decode_expiry(struct afs_call *call, u32 expiry)
{
- return ktime_divns(call->reply_time, NSEC_PER_SEC) + expiry;
+ return ktime_divns(call->issue_time, NSEC_PER_SEC) + expiry;
}
static void xdr_decode_AFSCallBack(const __be32 **_bp,
diff --git a/fs/afs/internal.h b/fs/afs/internal.h
index 64ad55494349..723d162078a3 100644
--- a/fs/afs/internal.h
+++ b/fs/afs/internal.h
@@ -137,7 +137,6 @@ struct afs_call {
bool need_attention; /* T if RxRPC poked us */
bool async; /* T if asynchronous */
bool upgrade; /* T to request service upgrade */
- bool have_reply_time; /* T if have got reply_time */
bool intr; /* T if interruptible */
bool unmarshalling_error; /* T if an unmarshalling error occurred */
u16 service_id; /* Actual service ID (after upgrade) */
@@ -151,7 +150,7 @@ struct afs_call {
} __attribute__((packed));
__be64 tmp64;
};
- ktime_t reply_time; /* Time of first reply packet */
+ ktime_t issue_time; /* Time of issue of operation */
};
struct afs_call_type {
diff --git a/fs/afs/misc.c b/fs/afs/misc.c
index 933e67fcdab1..805328ca5428 100644
--- a/fs/afs/misc.c
+++ b/fs/afs/misc.c
@@ -69,6 +69,7 @@ int afs_abort_to_error(u32 abort_code)
/* Unified AFS error table */
case UAEPERM: return -EPERM;
case UAENOENT: return -ENOENT;
+ case UAEAGAIN: return -EAGAIN;
case UAEACCES: return -EACCES;
case UAEBUSY: return -EBUSY;
case UAEEXIST: return -EEXIST;
diff --git a/fs/afs/rxrpc.c b/fs/afs/rxrpc.c
index d5c4785c862d..eccc3cd0cb70 100644
--- a/fs/afs/rxrpc.c
+++ b/fs/afs/rxrpc.c
@@ -351,6 +351,7 @@ void afs_make_call(struct afs_addr_cursor *ac, struct afs_call *call, gfp_t gfp)
if (call->max_lifespan)
rxrpc_kernel_set_max_life(call->net->socket, rxcall,
call->max_lifespan);
+ call->issue_time = ktime_get_real();
/* send the request */
iov[0].iov_base = call->request;
@@ -501,12 +502,6 @@ static void afs_deliver_to_call(struct afs_call *call)
return;
}
- if (!call->have_reply_time &&
- rxrpc_kernel_get_reply_time(call->net->socket,
- call->rxcall,
- &call->reply_time))
- call->have_reply_time = true;
-
ret = call->type->deliver(call);
state = READ_ONCE(call->state);
if (ret == 0 && call->unmarshalling_error)
diff --git a/fs/afs/yfsclient.c b/fs/afs/yfsclient.c
index fdc7d675b4b0..11571cca86c1 100644
--- a/fs/afs/yfsclient.c
+++ b/fs/afs/yfsclient.c
@@ -232,8 +232,7 @@ static void xdr_decode_YFSCallBack(const __be32 **_bp,
struct afs_callback *cb = &scb->callback;
ktime_t cb_expiry;
- cb_expiry = call->reply_time;
- cb_expiry = ktime_add(cb_expiry, xdr_to_u64(x->expiration_time) * 100);
+ cb_expiry = ktime_add(call->issue_time, xdr_to_u64(x->expiration_time) * 100);
cb->expires_at = ktime_divns(cb_expiry, NSEC_PER_SEC);
scb->have_cb = true;
*_bp += xdr_size(x);
diff --git a/fs/btrfs/block-group.c b/fs/btrfs/block-group.c
index 993aca2f1e18..e0375ba9d0fe 100644
--- a/fs/btrfs/block-group.c
+++ b/fs/btrfs/block-group.c
@@ -440,39 +440,26 @@ void btrfs_wait_block_group_cache_progress(struct btrfs_block_group *cache,
btrfs_put_caching_control(caching_ctl);
}
-int btrfs_wait_block_group_cache_done(struct btrfs_block_group *cache)
+static int btrfs_caching_ctl_wait_done(struct btrfs_block_group *cache,
+ struct btrfs_caching_control *caching_ctl)
+{
+ wait_event(caching_ctl->wait, btrfs_block_group_done(cache));
+ return cache->cached == BTRFS_CACHE_ERROR ? -EIO : 0;
+}
+
+static int btrfs_wait_block_group_cache_done(struct btrfs_block_group *cache)
{
struct btrfs_caching_control *caching_ctl;
- int ret = 0;
+ int ret;
caching_ctl = btrfs_get_caching_control(cache);
if (!caching_ctl)
return (cache->cached == BTRFS_CACHE_ERROR) ? -EIO : 0;
-
- wait_event(caching_ctl->wait, btrfs_block_group_done(cache));
- if (cache->cached == BTRFS_CACHE_ERROR)
- ret = -EIO;
+ ret = btrfs_caching_ctl_wait_done(cache, caching_ctl);
btrfs_put_caching_control(caching_ctl);
return ret;
}
-static bool space_cache_v1_done(struct btrfs_block_group *cache)
-{
- bool ret;
-
- spin_lock(&cache->lock);
- ret = cache->cached != BTRFS_CACHE_FAST;
- spin_unlock(&cache->lock);
-
- return ret;
-}
-
-void btrfs_wait_space_cache_v1_finished(struct btrfs_block_group *cache,
- struct btrfs_caching_control *caching_ctl)
-{
- wait_event(caching_ctl->wait, space_cache_v1_done(cache));
-}
-
#ifdef CONFIG_BTRFS_DEBUG
static void fragment_free_space(struct btrfs_block_group *block_group)
{
@@ -750,9 +737,8 @@ done:
btrfs_put_block_group(block_group);
}
-int btrfs_cache_block_group(struct btrfs_block_group *cache, int load_cache_only)
+int btrfs_cache_block_group(struct btrfs_block_group *cache, bool wait)
{
- DEFINE_WAIT(wait);
struct btrfs_fs_info *fs_info = cache->fs_info;
struct btrfs_caching_control *caching_ctl = NULL;
int ret = 0;
@@ -785,10 +771,7 @@ int btrfs_cache_block_group(struct btrfs_block_group *cache, int load_cache_only
}
WARN_ON(cache->caching_ctl);
cache->caching_ctl = caching_ctl;
- if (btrfs_test_opt(fs_info, SPACE_CACHE))
- cache->cached = BTRFS_CACHE_FAST;
- else
- cache->cached = BTRFS_CACHE_STARTED;
+ cache->cached = BTRFS_CACHE_STARTED;
cache->has_caching_ctl = 1;
spin_unlock(&cache->lock);
@@ -801,8 +784,8 @@ int btrfs_cache_block_group(struct btrfs_block_group *cache, int load_cache_only
btrfs_queue_work(fs_info->caching_workers, &caching_ctl->work);
out:
- if (load_cache_only && caching_ctl)
- btrfs_wait_space_cache_v1_finished(cache, caching_ctl);
+ if (wait && caching_ctl)
+ ret = btrfs_caching_ctl_wait_done(cache, caching_ctl);
if (caching_ctl)
btrfs_put_caching_control(caching_ctl);
@@ -3312,7 +3295,7 @@ int btrfs_update_block_group(struct btrfs_trans_handle *trans,
* space back to the block group, otherwise we will leak space.
*/
if (!alloc && !btrfs_block_group_done(cache))
- btrfs_cache_block_group(cache, 1);
+ btrfs_cache_block_group(cache, true);
byte_in_group = bytenr - cache->start;
WARN_ON(byte_in_group > cache->length);
diff --git a/fs/btrfs/block-group.h b/fs/btrfs/block-group.h
index 35e0e860cc0b..6b3cdc4cbc41 100644
--- a/fs/btrfs/block-group.h
+++ b/fs/btrfs/block-group.h
@@ -263,9 +263,7 @@ void btrfs_dec_nocow_writers(struct btrfs_block_group *bg);
void btrfs_wait_nocow_writers(struct btrfs_block_group *bg);
void btrfs_wait_block_group_cache_progress(struct btrfs_block_group *cache,
u64 num_bytes);
-int btrfs_wait_block_group_cache_done(struct btrfs_block_group *cache);
-int btrfs_cache_block_group(struct btrfs_block_group *cache,
- int load_cache_only);
+int btrfs_cache_block_group(struct btrfs_block_group *cache, bool wait);
void btrfs_put_caching_control(struct btrfs_caching_control *ctl);
struct btrfs_caching_control *btrfs_get_caching_control(
struct btrfs_block_group *cache);
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 4edb4bfb2166..df8c99c99df9 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -505,7 +505,6 @@ struct btrfs_free_cluster {
enum btrfs_caching_type {
BTRFS_CACHE_NO,
BTRFS_CACHE_STARTED,
- BTRFS_CACHE_FAST,
BTRFS_CACHE_FINISHED,
BTRFS_CACHE_ERROR,
};
@@ -1089,8 +1088,6 @@ struct btrfs_fs_info {
spinlock_t zone_active_bgs_lock;
struct list_head zone_active_bgs;
- /* Waiters when BTRFS_FS_NEED_ZONE_FINISH is set */
- wait_queue_head_t zone_finish_wait;
/* Updates are not protected by any lock */
struct btrfs_commit_stats commit_stats;
diff --git a/fs/btrfs/dev-replace.c b/fs/btrfs/dev-replace.c
index f43196a893ca..41cddd3ff059 100644
--- a/fs/btrfs/dev-replace.c
+++ b/fs/btrfs/dev-replace.c
@@ -165,7 +165,7 @@ no_valid_dev_replace_entry_found:
*/
if (btrfs_find_device(fs_info->fs_devices, &args)) {
btrfs_err(fs_info,
- "replace devid present without an active replace item");
+"replace without active item, run 'device scan --forget' on the target device");
ret = -EUCLEAN;
} else {
dev_replace->srcdev = NULL;
@@ -1129,8 +1129,7 @@ int btrfs_dev_replace_cancel(struct btrfs_fs_info *fs_info)
up_write(&dev_replace->rwsem);
/* Scrub for replace must not be running in suspended state */
- ret = btrfs_scrub_cancel(fs_info);
- ASSERT(ret != -ENOTCONN);
+ btrfs_scrub_cancel(fs_info);
trans = btrfs_start_transaction(root, 0);
if (IS_ERR(trans)) {
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 820b1f1e6b67..1af28b066b42 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -3068,7 +3068,6 @@ void btrfs_init_fs_info(struct btrfs_fs_info *fs_info)
init_waitqueue_head(&fs_info->transaction_blocked_wait);
init_waitqueue_head(&fs_info->async_submit_wait);
init_waitqueue_head(&fs_info->delayed_iputs_wait);
- init_waitqueue_head(&fs_info->zone_finish_wait);
/* Usable values until the real ones are cached from the superblock */
fs_info->nodesize = 4096;
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index ab944d1f94ef..6914cd8024ba 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -2551,17 +2551,10 @@ int btrfs_pin_extent_for_log_replay(struct btrfs_trans_handle *trans,
return -EINVAL;
/*
- * pull in the free space cache (if any) so that our pin
- * removes the free space from the cache. We have load_only set
- * to one because the slow code to read in the free extents does check
- * the pinned extents.
+ * Fully cache the free space first so that our pin removes the free space
+ * from the cache.
*/
- btrfs_cache_block_group(cache, 1);
- /*
- * Make sure we wait until the cache is completely built in case it is
- * missing or is invalid and therefore needs to be rebuilt.
- */
- ret = btrfs_wait_block_group_cache_done(cache);
+ ret = btrfs_cache_block_group(cache, true);
if (ret)
goto out;
@@ -2584,12 +2577,7 @@ static int __exclude_logged_extent(struct btrfs_fs_info *fs_info,
if (!block_group)
return -EINVAL;
- btrfs_cache_block_group(block_group, 1);
- /*
- * Make sure we wait until the cache is completely built in case it is
- * missing or is invalid and therefore needs to be rebuilt.
- */
- ret = btrfs_wait_block_group_cache_done(block_group);
+ ret = btrfs_cache_block_group(block_group, true);
if (ret)
goto out;
@@ -4399,7 +4387,7 @@ have_block_group:
ffe_ctl->cached = btrfs_block_group_done(block_group);
if (unlikely(!ffe_ctl->cached)) {
ffe_ctl->have_caching_bg = true;
- ret = btrfs_cache_block_group(block_group, 0);
+ ret = btrfs_cache_block_group(block_group, false);
/*
* If we get ENOMEM here or something else we want to
@@ -6169,13 +6157,7 @@ int btrfs_trim_fs(struct btrfs_fs_info *fs_info, struct fstrim_range *range)
if (end - start >= range->minlen) {
if (!btrfs_block_group_done(cache)) {
- ret = btrfs_cache_block_group(cache, 0);
- if (ret) {
- bg_failed++;
- bg_ret = ret;
- continue;
- }
- ret = btrfs_wait_block_group_cache_done(cache);
+ ret = btrfs_cache_block_group(cache, true);
if (ret) {
bg_failed++;
bg_ret = ret;
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index eed81a7e36a4..cf4f19e80e2f 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -3233,7 +3233,7 @@ static int btrfs_bio_add_page(struct btrfs_bio_ctrl *bio_ctrl,
u32 bio_size = bio->bi_iter.bi_size;
u32 real_size;
const sector_t sector = disk_bytenr >> SECTOR_SHIFT;
- bool contig;
+ bool contig = false;
int ret;
ASSERT(bio);
@@ -3242,10 +3242,35 @@ static int btrfs_bio_add_page(struct btrfs_bio_ctrl *bio_ctrl,
if (bio_ctrl->compress_type != compress_type)
return 0;
- if (bio_ctrl->compress_type != BTRFS_COMPRESS_NONE)
+
+ if (bio->bi_iter.bi_size == 0) {
+ /* We can always add a page into an empty bio. */
+ contig = true;
+ } else if (bio_ctrl->compress_type == BTRFS_COMPRESS_NONE) {
+ struct bio_vec *bvec = bio_last_bvec_all(bio);
+
+ /*
+ * The contig check requires the following conditions to be met:
+ * 1) The pages are belonging to the same inode
+ * This is implied by the call chain.
+ *
+ * 2) The range has adjacent logical bytenr
+ *
+ * 3) The range has adjacent file offset
+ * This is required for the usage of btrfs_bio->file_offset.
+ */
+ if (bio_end_sector(bio) == sector &&
+ page_offset(bvec->bv_page) + bvec->bv_offset +
+ bvec->bv_len == page_offset(page) + pg_offset)
+ contig = true;
+ } else {
+ /*
+ * For compression, all IO should have its logical bytenr
+ * set to the starting bytenr of the compressed extent.
+ */
contig = bio->bi_iter.bi_sector == sector;
- else
- contig = bio_end_sector(bio) == sector;
+ }
+
if (!contig)
return 0;
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index 66c822182ecc..5a3f6e0d9688 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -2482,6 +2482,7 @@ static int fill_holes(struct btrfs_trans_handle *trans,
btrfs_set_file_extent_num_bytes(leaf, fi, num_bytes);
btrfs_set_file_extent_ram_bytes(leaf, fi, num_bytes);
btrfs_set_file_extent_offset(leaf, fi, 0);
+ btrfs_set_file_extent_generation(leaf, fi, trans->transid);
btrfs_mark_buffer_dirty(leaf);
goto out;
}
@@ -2498,6 +2499,7 @@ static int fill_holes(struct btrfs_trans_handle *trans,
btrfs_set_file_extent_num_bytes(leaf, fi, num_bytes);
btrfs_set_file_extent_ram_bytes(leaf, fi, num_bytes);
btrfs_set_file_extent_offset(leaf, fi, 0);
+ btrfs_set_file_extent_generation(leaf, fi, trans->transid);
btrfs_mark_buffer_dirty(leaf);
goto out;
}
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index f0c97d25b4a0..1372210869b1 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -1644,10 +1644,9 @@ static noinline int run_delalloc_zoned(struct btrfs_inode *inode,
done_offset = end;
if (done_offset == start) {
- struct btrfs_fs_info *info = inode->root->fs_info;
-
- wait_var_event(&info->zone_finish_wait,
- !test_bit(BTRFS_FS_NEED_ZONE_FINISH, &info->flags));
+ wait_on_bit_io(&inode->root->fs_info->flags,
+ BTRFS_FS_NEED_ZONE_FINISH,
+ TASK_UNINTERRUPTIBLE);
continue;
}
@@ -7694,6 +7693,20 @@ static int btrfs_dio_iomap_begin(struct inode *inode, loff_t start,
bool unlock_extents = false;
/*
+ * We could potentially fault if we have a buffer > PAGE_SIZE, and if
+ * we're NOWAIT we may submit a bio for a partial range and return
+ * EIOCBQUEUED, which would result in an errant short read.
+ *
+ * The best way to handle this would be to allow for partial completions
+ * of iocb's, so we could submit the partial bio, return and fault in
+ * the rest of the pages, and then submit the io for the rest of the
+ * range. However we don't have that currently, so simply return
+ * -EAGAIN at this point so that the normal path is used.
+ */
+ if (!write && (flags & IOMAP_NOWAIT) && length > PAGE_SIZE)
+ return -EAGAIN;
+
+ /*
* Cap the size of reads to that usually seen in buffered I/O as we need
* to allocate a contiguous array for the checksums.
*/
diff --git a/fs/btrfs/root-tree.c b/fs/btrfs/root-tree.c
index a64b26b16904..d647cb2938c0 100644
--- a/fs/btrfs/root-tree.c
+++ b/fs/btrfs/root-tree.c
@@ -349,9 +349,10 @@ int btrfs_del_root_ref(struct btrfs_trans_handle *trans, u64 root_id,
key.offset = ref_id;
again:
ret = btrfs_search_slot(trans, tree_root, &key, path, -1, 1);
- if (ret < 0)
+ if (ret < 0) {
+ err = ret;
goto out;
- if (ret == 0) {
+ } else if (ret == 0) {
leaf = path->nodes[0];
ref = btrfs_item_ptr(leaf, path->slots[0],
struct btrfs_root_ref);
diff --git a/fs/btrfs/space-info.c b/fs/btrfs/space-info.c
index d0cbeb7ae81c..435559ba94fa 100644
--- a/fs/btrfs/space-info.c
+++ b/fs/btrfs/space-info.c
@@ -199,7 +199,7 @@ static u64 calc_chunk_size(const struct btrfs_fs_info *fs_info, u64 flags)
ASSERT(flags & BTRFS_BLOCK_GROUP_TYPE_MASK);
if (flags & BTRFS_BLOCK_GROUP_DATA)
- return SZ_1G;
+ return BTRFS_MAX_DATA_CHUNK_SIZE;
else if (flags & BTRFS_BLOCK_GROUP_SYSTEM)
return SZ_32M;
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index 272901514b0c..f63ff91e2883 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -2345,8 +2345,11 @@ int btrfs_get_dev_args_from_path(struct btrfs_fs_info *fs_info,
ret = btrfs_get_bdev_and_sb(path, FMODE_READ, fs_info->bdev_holder, 0,
&bdev, &disk_super);
- if (ret)
+ if (ret) {
+ btrfs_put_dev_args_from_path(args);
return ret;
+ }
+
args->devid = btrfs_stack_device_id(&disk_super->dev_item);
memcpy(args->uuid, disk_super->dev_item.uuid, BTRFS_UUID_SIZE);
if (btrfs_fs_incompat(fs_info, METADATA_UUID))
@@ -5264,6 +5267,9 @@ static int decide_stripe_size_regular(struct alloc_chunk_ctl *ctl,
ctl->stripe_size);
}
+ /* Stripe size should not go beyond 1G. */
+ ctl->stripe_size = min_t(u64, ctl->stripe_size, SZ_1G);
+
/* Align to BTRFS_STRIPE_LEN */
ctl->stripe_size = round_down(ctl->stripe_size, BTRFS_STRIPE_LEN);
ctl->chunk_size = ctl->stripe_size * data_stripes;
diff --git a/fs/btrfs/xattr.c b/fs/btrfs/xattr.c
index 7421abcf325a..5bb8d8c86311 100644
--- a/fs/btrfs/xattr.c
+++ b/fs/btrfs/xattr.c
@@ -371,6 +371,9 @@ static int btrfs_xattr_handler_set(const struct xattr_handler *handler,
const char *name, const void *buffer,
size_t size, int flags)
{
+ if (btrfs_root_readonly(BTRFS_I(inode)->root))
+ return -EROFS;
+
name = xattr_full_name(handler, name);
return btrfs_setxattr_trans(inode, name, buffer, size, flags);
}
diff --git a/fs/btrfs/zoned.c b/fs/btrfs/zoned.c
index b150b07ba1a7..62e7007a7e46 100644
--- a/fs/btrfs/zoned.c
+++ b/fs/btrfs/zoned.c
@@ -421,10 +421,19 @@ int btrfs_get_dev_zone_info(struct btrfs_device *device, bool populate_cache)
* since btrfs adds the pages one by one to a bio, and btrfs cannot
* increase the metadata reservation even if it increases the number of
* extents, it is safe to stick with the limit.
+ *
+ * With the zoned emulation, we can have non-zoned device on the zoned
+ * mode. In this case, we don't have a valid max zone append size. So,
+ * use max_segments * PAGE_SIZE as the pseudo max_zone_append_size.
*/
- zone_info->max_zone_append_size =
- min_t(u64, (u64)bdev_max_zone_append_sectors(bdev) << SECTOR_SHIFT,
- (u64)bdev_max_segments(bdev) << PAGE_SHIFT);
+ if (bdev_is_zoned(bdev)) {
+ zone_info->max_zone_append_size = min_t(u64,
+ (u64)bdev_max_zone_append_sectors(bdev) << SECTOR_SHIFT,
+ (u64)bdev_max_segments(bdev) << PAGE_SHIFT);
+ } else {
+ zone_info->max_zone_append_size =
+ (u64)bdev_max_segments(bdev) << PAGE_SHIFT;
+ }
if (!IS_ALIGNED(nr_sectors, zone_sectors))
zone_info->nr_zones++;
@@ -1178,7 +1187,7 @@ int btrfs_ensure_empty_zones(struct btrfs_device *device, u64 start, u64 size)
* offset.
*/
static int calculate_alloc_pointer(struct btrfs_block_group *cache,
- u64 *offset_ret)
+ u64 *offset_ret, bool new)
{
struct btrfs_fs_info *fs_info = cache->fs_info;
struct btrfs_root *root;
@@ -1188,6 +1197,21 @@ static int calculate_alloc_pointer(struct btrfs_block_group *cache,
int ret;
u64 length;
+ /*
+ * Avoid tree lookups for a new block group, there's no use for it.
+ * It must always be 0.
+ *
+ * Also, we have a lock chain of extent buffer lock -> chunk mutex.
+ * For new a block group, this function is called from
+ * btrfs_make_block_group() which is already taking the chunk mutex.
+ * Thus, we cannot call calculate_alloc_pointer() which takes extent
+ * buffer locks to avoid deadlock.
+ */
+ if (new) {
+ *offset_ret = 0;
+ return 0;
+ }
+
path = btrfs_alloc_path();
if (!path)
return -ENOMEM;
@@ -1323,6 +1347,13 @@ int btrfs_load_block_group_zone_info(struct btrfs_block_group *cache, bool new)
else
num_conventional++;
+ /*
+ * Consider a zone as active if we can allow any number of
+ * active zones.
+ */
+ if (!device->zone_info->max_active_zones)
+ __set_bit(i, active);
+
if (!is_sequential) {
alloc_offsets[i] = WP_CONVENTIONAL;
continue;
@@ -1389,45 +1420,23 @@ int btrfs_load_block_group_zone_info(struct btrfs_block_group *cache, bool new)
__set_bit(i, active);
break;
}
-
- /*
- * Consider a zone as active if we can allow any number of
- * active zones.
- */
- if (!device->zone_info->max_active_zones)
- __set_bit(i, active);
}
if (num_sequential > 0)
cache->seq_zone = true;
if (num_conventional > 0) {
- /*
- * Avoid calling calculate_alloc_pointer() for new BG. It
- * is no use for new BG. It must be always 0.
- *
- * Also, we have a lock chain of extent buffer lock ->
- * chunk mutex. For new BG, this function is called from
- * btrfs_make_block_group() which is already taking the
- * chunk mutex. Thus, we cannot call
- * calculate_alloc_pointer() which takes extent buffer
- * locks to avoid deadlock.
- */
-
/* Zone capacity is always zone size in emulation */
cache->zone_capacity = cache->length;
- if (new) {
- cache->alloc_offset = 0;
- goto out;
- }
- ret = calculate_alloc_pointer(cache, &last_alloc);
- if (ret || map->num_stripes == num_conventional) {
- if (!ret)
- cache->alloc_offset = last_alloc;
- else
- btrfs_err(fs_info,
+ ret = calculate_alloc_pointer(cache, &last_alloc, new);
+ if (ret) {
+ btrfs_err(fs_info,
"zoned: failed to determine allocation offset of bg %llu",
- cache->start);
+ cache->start);
+ goto out;
+ } else if (map->num_stripes == num_conventional) {
+ cache->alloc_offset = last_alloc;
+ cache->zone_is_active = 1;
goto out;
}
}
@@ -1495,13 +1504,6 @@ int btrfs_load_block_group_zone_info(struct btrfs_block_group *cache, bool new)
goto out;
}
- if (cache->zone_is_active) {
- btrfs_get_block_group(cache);
- spin_lock(&fs_info->zone_active_bgs_lock);
- list_add_tail(&cache->active_bg_list, &fs_info->zone_active_bgs);
- spin_unlock(&fs_info->zone_active_bgs_lock);
- }
-
out:
if (cache->alloc_offset > fs_info->zone_size) {
btrfs_err(fs_info,
@@ -1526,10 +1528,16 @@ out:
ret = -EIO;
}
- if (!ret)
+ if (!ret) {
cache->meta_write_pointer = cache->alloc_offset + cache->start;
-
- if (ret) {
+ if (cache->zone_is_active) {
+ btrfs_get_block_group(cache);
+ spin_lock(&fs_info->zone_active_bgs_lock);
+ list_add_tail(&cache->active_bg_list,
+ &fs_info->zone_active_bgs);
+ spin_unlock(&fs_info->zone_active_bgs_lock);
+ }
+ } else {
kfree(cache->physical_map);
cache->physical_map = NULL;
}
@@ -2007,8 +2015,7 @@ static int do_zone_finish(struct btrfs_block_group *block_group, bool fully_writ
/* For active_bg_list */
btrfs_put_block_group(block_group);
- clear_bit(BTRFS_FS_NEED_ZONE_FINISH, &fs_info->flags);
- wake_up_all(&fs_info->zone_finish_wait);
+ clear_and_wake_up_bit(BTRFS_FS_NEED_ZONE_FINISH, &fs_info->flags);
return 0;
}
diff --git a/fs/cachefiles/internal.h b/fs/cachefiles/internal.h
index 6cba2c6de2f9..2ad58c465208 100644
--- a/fs/cachefiles/internal.h
+++ b/fs/cachefiles/internal.h
@@ -111,6 +111,7 @@ struct cachefiles_cache {
char *tag; /* cache binding tag */
refcount_t unbind_pincount;/* refcount to do daemon unbind */
struct xarray reqs; /* xarray of pending on-demand requests */
+ unsigned long req_id_next;
struct xarray ondemand_ids; /* xarray for ondemand_id allocation */
u32 ondemand_id_next;
};
diff --git a/fs/cachefiles/ondemand.c b/fs/cachefiles/ondemand.c
index 1fee702d5529..0254ed39f68c 100644
--- a/fs/cachefiles/ondemand.c
+++ b/fs/cachefiles/ondemand.c
@@ -158,9 +158,13 @@ int cachefiles_ondemand_copen(struct cachefiles_cache *cache, char *args)
/* fail OPEN request if daemon reports an error */
if (size < 0) {
- if (!IS_ERR_VALUE(size))
- size = -EINVAL;
- req->error = size;
+ if (!IS_ERR_VALUE(size)) {
+ req->error = -EINVAL;
+ ret = -EINVAL;
+ } else {
+ req->error = size;
+ ret = 0;
+ }
goto out;
}
@@ -238,14 +242,19 @@ ssize_t cachefiles_ondemand_daemon_read(struct cachefiles_cache *cache,
unsigned long id = 0;
size_t n;
int ret = 0;
- XA_STATE(xas, &cache->reqs, 0);
+ XA_STATE(xas, &cache->reqs, cache->req_id_next);
/*
- * Search for a request that has not ever been processed, to prevent
- * requests from being processed repeatedly.
+ * Cyclically search for a request that has not ever been processed,
+ * to prevent requests from being processed repeatedly, and make
+ * request distribution fair.
*/
xa_lock(&cache->reqs);
req = xas_find_marked(&xas, UINT_MAX, CACHEFILES_REQ_NEW);
+ if (!req && cache->req_id_next > 0) {
+ xas_set(&xas, 0);
+ req = xas_find_marked(&xas, cache->req_id_next - 1, CACHEFILES_REQ_NEW);
+ }
if (!req) {
xa_unlock(&cache->reqs);
return 0;
@@ -260,6 +269,7 @@ ssize_t cachefiles_ondemand_daemon_read(struct cachefiles_cache *cache,
}
xas_clear_mark(&xas, CACHEFILES_REQ_NEW);
+ cache->req_id_next = xas.xa_index + 1;
xa_unlock(&cache->reqs);
id = xas.xa_index;
diff --git a/fs/cifs/cifsencrypt.c b/fs/cifs/cifsencrypt.c
index 8f7835ccbca1..46f5718754f9 100644
--- a/fs/cifs/cifsencrypt.c
+++ b/fs/cifs/cifsencrypt.c
@@ -32,10 +32,9 @@ int __cifs_calc_signature(struct smb_rqst *rqst,
int rc;
struct kvec *iov = rqst->rq_iov;
int n_vec = rqst->rq_nvec;
- int is_smb2 = server->vals->header_preamble_size == 0;
/* iov[0] is actual data and not the rfc1002 length for SMB2+ */
- if (is_smb2) {
+ if (!is_smb1(server)) {
if (iov[0].iov_len <= 4)
return -EIO;
i = 0;
diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c
index f54d8bf2732a..8042d7280dec 100644
--- a/fs/cifs/cifsfs.c
+++ b/fs/cifs/cifsfs.c
@@ -1248,6 +1248,12 @@ ssize_t cifs_file_copychunk_range(unsigned int xid,
lock_two_nondirectories(target_inode, src_inode);
cifs_dbg(FYI, "about to flush pages\n");
+
+ rc = filemap_write_and_wait_range(src_inode->i_mapping, off,
+ off + len - 1);
+ if (rc)
+ goto out;
+
/* should we flush first and last page first */
truncate_inode_pages(&target_inode->i_data, 0);
diff --git a/fs/cifs/cifsfs.h b/fs/cifs/cifsfs.h
index 81f4c15936d0..5b4a7a32bdc5 100644
--- a/fs/cifs/cifsfs.h
+++ b/fs/cifs/cifsfs.h
@@ -153,6 +153,6 @@ extern const struct export_operations cifs_export_ops;
#endif /* CONFIG_CIFS_NFSD_EXPORT */
/* when changing internal version - update following two lines at same time */
-#define SMB3_PRODUCT_BUILD 38
-#define CIFS_VERSION "2.38"
+#define SMB3_PRODUCT_BUILD 39
+#define CIFS_VERSION "2.39"
#endif /* _CIFSFS_H */
diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h
index f15d7b0c123d..ae7f571a7dba 100644
--- a/fs/cifs/cifsglob.h
+++ b/fs/cifs/cifsglob.h
@@ -557,6 +557,8 @@ struct smb_version_values {
#define HEADER_SIZE(server) (server->vals->header_size)
#define MAX_HEADER_SIZE(server) (server->vals->max_header_size)
+#define HEADER_PREAMBLE_SIZE(server) (server->vals->header_preamble_size)
+#define MID_HEADER_SIZE(server) (HEADER_SIZE(server) - 1 - HEADER_PREAMBLE_SIZE(server))
/**
* CIFS superblock mount flags (mnt_cifs_flags) to consider when
@@ -750,6 +752,11 @@ struct TCP_Server_Info {
#endif
};
+static inline bool is_smb1(struct TCP_Server_Info *server)
+{
+ return HEADER_PREAMBLE_SIZE(server) != 0;
+}
+
static inline void cifs_server_lock(struct TCP_Server_Info *server)
{
unsigned int nofs_flag = memalloc_nofs_save();
diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c
index 3da5da9f16b0..7ae6f2c08153 100644
--- a/fs/cifs/connect.c
+++ b/fs/cifs/connect.c
@@ -702,9 +702,6 @@ cifs_readv_from_socket(struct TCP_Server_Info *server, struct msghdr *smb_msg)
int length = 0;
int total_read;
- smb_msg->msg_control = NULL;
- smb_msg->msg_controllen = 0;
-
for (total_read = 0; msg_data_left(smb_msg); total_read += length) {
try_to_freeze();
@@ -760,7 +757,7 @@ int
cifs_read_from_socket(struct TCP_Server_Info *server, char *buf,
unsigned int to_read)
{
- struct msghdr smb_msg;
+ struct msghdr smb_msg = {};
struct kvec iov = {.iov_base = buf, .iov_len = to_read};
iov_iter_kvec(&smb_msg.msg_iter, READ, &iov, 1, to_read);
@@ -770,15 +767,13 @@ cifs_read_from_socket(struct TCP_Server_Info *server, char *buf,
ssize_t
cifs_discard_from_socket(struct TCP_Server_Info *server, size_t to_read)
{
- struct msghdr smb_msg;
+ struct msghdr smb_msg = {};
/*
* iov_iter_discard already sets smb_msg.type and count and iov_offset
* and cifs_readv_from_socket sets msg_control and msg_controllen
* so little to initialize in struct msghdr
*/
- smb_msg.msg_name = NULL;
- smb_msg.msg_namelen = 0;
iov_iter_discard(&smb_msg.msg_iter, READ, to_read);
return cifs_readv_from_socket(server, &smb_msg);
@@ -788,7 +783,7 @@ int
cifs_read_page_from_socket(struct TCP_Server_Info *server, struct page *page,
unsigned int page_offset, unsigned int to_read)
{
- struct msghdr smb_msg;
+ struct msghdr smb_msg = {};
struct bio_vec bv = {
.bv_page = page, .bv_len = to_read, .bv_offset = page_offset};
iov_iter_bvec(&smb_msg.msg_iter, READ, &bv, 1, to_read);
@@ -871,7 +866,7 @@ smb2_get_credits_from_hdr(char *buffer, struct TCP_Server_Info *server)
/*
* SMB1 does not use credits.
*/
- if (server->vals->header_preamble_size)
+ if (is_smb1(server))
return 0;
return le16_to_cpu(shdr->CreditRequest);
@@ -1050,7 +1045,7 @@ standard_receive3(struct TCP_Server_Info *server, struct mid_q_entry *mid)
/* make sure this will fit in a large buffer */
if (pdu_length > CIFSMaxBufSize + MAX_HEADER_SIZE(server) -
- server->vals->header_preamble_size) {
+ HEADER_PREAMBLE_SIZE(server)) {
cifs_server_dbg(VFS, "SMB response too long (%u bytes)\n", pdu_length);
cifs_reconnect(server, true);
return -ECONNABORTED;
@@ -1065,8 +1060,7 @@ standard_receive3(struct TCP_Server_Info *server, struct mid_q_entry *mid)
/* now read the rest */
length = cifs_read_from_socket(server, buf + HEADER_SIZE(server) - 1,
- pdu_length - HEADER_SIZE(server) + 1
- + server->vals->header_preamble_size);
+ pdu_length - MID_HEADER_SIZE(server));
if (length < 0)
return length;
@@ -1122,7 +1116,7 @@ smb2_add_credits_from_hdr(char *buffer, struct TCP_Server_Info *server)
/*
* SMB1 does not use credits.
*/
- if (server->vals->header_preamble_size)
+ if (is_smb1(server))
return;
if (shdr->CreditRequest) {
@@ -1180,10 +1174,10 @@ cifs_demultiplex_thread(void *p)
if (length < 0)
continue;
- if (server->vals->header_preamble_size == 0)
- server->total_read = 0;
- else
+ if (is_smb1(server))
server->total_read = length;
+ else
+ server->total_read = 0;
/*
* The right amount was read from socket - 4 bytes,
@@ -1198,8 +1192,7 @@ next_pdu:
server->pdu_size = pdu_length;
/* make sure we have enough to get to the MID */
- if (server->pdu_size < HEADER_SIZE(server) - 1 -
- server->vals->header_preamble_size) {
+ if (server->pdu_size < MID_HEADER_SIZE(server)) {
cifs_server_dbg(VFS, "SMB response too short (%u bytes)\n",
server->pdu_size);
cifs_reconnect(server, true);
@@ -1208,9 +1201,8 @@ next_pdu:
/* read down to the MID */
length = cifs_read_from_socket(server,
- buf + server->vals->header_preamble_size,
- HEADER_SIZE(server) - 1
- - server->vals->header_preamble_size);
+ buf + HEADER_PREAMBLE_SIZE(server),
+ MID_HEADER_SIZE(server));
if (length < 0)
continue;
server->total_read += length;
@@ -2353,7 +2345,9 @@ cifs_put_tcon(struct cifs_tcon *tcon)
ses = tcon->ses;
cifs_dbg(FYI, "%s: tc_count=%d\n", __func__, tcon->tc_count);
spin_lock(&cifs_tcp_ses_lock);
+ spin_lock(&tcon->tc_lock);
if (--tcon->tc_count > 0) {
+ spin_unlock(&tcon->tc_lock);
spin_unlock(&cifs_tcp_ses_lock);
return;
}
@@ -2362,6 +2356,7 @@ cifs_put_tcon(struct cifs_tcon *tcon)
WARN_ON(tcon->tc_count < 0);
list_del_init(&tcon->tcon_list);
+ spin_unlock(&tcon->tc_lock);
spin_unlock(&cifs_tcp_ses_lock);
/* cancel polling of interfaces */
diff --git a/fs/cifs/file.c b/fs/cifs/file.c
index fa738adc031f..6f38b134a346 100644
--- a/fs/cifs/file.c
+++ b/fs/cifs/file.c
@@ -3575,6 +3575,9 @@ static ssize_t __cifs_writev(
ssize_t cifs_direct_writev(struct kiocb *iocb, struct iov_iter *from)
{
+ struct file *file = iocb->ki_filp;
+
+ cifs_revalidate_mapping(file->f_inode);
return __cifs_writev(iocb, from, true);
}
diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c
index 96f3b0573606..421be43af425 100644
--- a/fs/cifs/smb2ops.c
+++ b/fs/cifs/smb2ops.c
@@ -1600,17 +1600,8 @@ smb2_copychunk_range(const unsigned int xid,
int chunks_copied = 0;
bool chunk_sizes_updated = false;
ssize_t bytes_written, total_bytes_written = 0;
- struct inode *inode;
pcchunk = kmalloc(sizeof(struct copychunk_ioctl), GFP_KERNEL);
-
- /*
- * We need to flush all unwritten data before we can send the
- * copychunk ioctl to the server.
- */
- inode = d_inode(trgtfile->dentry);
- filemap_write_and_wait(inode->i_mapping);
-
if (pcchunk == NULL)
return -ENOMEM;
@@ -3307,26 +3298,43 @@ get_smb2_acl(struct cifs_sb_info *cifs_sb,
return pntsd;
}
+static long smb3_zero_data(struct file *file, struct cifs_tcon *tcon,
+ loff_t offset, loff_t len, unsigned int xid)
+{
+ struct cifsFileInfo *cfile = file->private_data;
+ struct file_zero_data_information fsctl_buf;
+
+ cifs_dbg(FYI, "Offset %lld len %lld\n", offset, len);
+
+ fsctl_buf.FileOffset = cpu_to_le64(offset);
+ fsctl_buf.BeyondFinalZero = cpu_to_le64(offset + len);
+
+ return SMB2_ioctl(xid, tcon, cfile->fid.persistent_fid,
+ cfile->fid.volatile_fid, FSCTL_SET_ZERO_DATA,
+ (char *)&fsctl_buf,
+ sizeof(struct file_zero_data_information),
+ 0, NULL, NULL);
+}
+
static long smb3_zero_range(struct file *file, struct cifs_tcon *tcon,
loff_t offset, loff_t len, bool keep_size)
{
struct cifs_ses *ses = tcon->ses;
- struct inode *inode;
- struct cifsInodeInfo *cifsi;
+ struct inode *inode = file_inode(file);
+ struct cifsInodeInfo *cifsi = CIFS_I(inode);
struct cifsFileInfo *cfile = file->private_data;
- struct file_zero_data_information fsctl_buf;
long rc;
unsigned int xid;
__le64 eof;
xid = get_xid();
- inode = d_inode(cfile->dentry);
- cifsi = CIFS_I(inode);
-
trace_smb3_zero_enter(xid, cfile->fid.persistent_fid, tcon->tid,
ses->Suid, offset, len);
+ inode_lock(inode);
+ filemap_invalidate_lock(inode->i_mapping);
+
/*
* We zero the range through ioctl, so we need remove the page caches
* first, otherwise the data may be inconsistent with the server.
@@ -3334,26 +3342,12 @@ static long smb3_zero_range(struct file *file, struct cifs_tcon *tcon,
truncate_pagecache_range(inode, offset, offset + len - 1);
/* if file not oplocked can't be sure whether asking to extend size */
- if (!CIFS_CACHE_READ(cifsi))
- if (keep_size == false) {
- rc = -EOPNOTSUPP;
- trace_smb3_zero_err(xid, cfile->fid.persistent_fid,
- tcon->tid, ses->Suid, offset, len, rc);
- free_xid(xid);
- return rc;
- }
-
- cifs_dbg(FYI, "Offset %lld len %lld\n", offset, len);
-
- fsctl_buf.FileOffset = cpu_to_le64(offset);
- fsctl_buf.BeyondFinalZero = cpu_to_le64(offset + len);
+ rc = -EOPNOTSUPP;
+ if (keep_size == false && !CIFS_CACHE_READ(cifsi))
+ goto zero_range_exit;
- rc = SMB2_ioctl(xid, tcon, cfile->fid.persistent_fid,
- cfile->fid.volatile_fid, FSCTL_SET_ZERO_DATA,
- (char *)&fsctl_buf,
- sizeof(struct file_zero_data_information),
- 0, NULL, NULL);
- if (rc)
+ rc = smb3_zero_data(file, tcon, offset, len, xid);
+ if (rc < 0)
goto zero_range_exit;
/*
@@ -3366,6 +3360,8 @@ static long smb3_zero_range(struct file *file, struct cifs_tcon *tcon,
}
zero_range_exit:
+ filemap_invalidate_unlock(inode->i_mapping);
+ inode_unlock(inode);
free_xid(xid);
if (rc)
trace_smb3_zero_err(xid, cfile->fid.persistent_fid, tcon->tid,
@@ -3379,7 +3375,7 @@ static long smb3_zero_range(struct file *file, struct cifs_tcon *tcon,
static long smb3_punch_hole(struct file *file, struct cifs_tcon *tcon,
loff_t offset, loff_t len)
{
- struct inode *inode;
+ struct inode *inode = file_inode(file);
struct cifsFileInfo *cfile = file->private_data;
struct file_zero_data_information fsctl_buf;
long rc;
@@ -3388,14 +3384,12 @@ static long smb3_punch_hole(struct file *file, struct cifs_tcon *tcon,
xid = get_xid();
- inode = d_inode(cfile->dentry);
-
+ inode_lock(inode);
/* Need to make file sparse, if not already, before freeing range. */
/* Consider adding equivalent for compressed since it could also work */
if (!smb2_set_sparse(xid, tcon, cfile, inode, set_sparse)) {
rc = -EOPNOTSUPP;
- free_xid(xid);
- return rc;
+ goto out;
}
filemap_invalidate_lock(inode->i_mapping);
@@ -3415,8 +3409,10 @@ static long smb3_punch_hole(struct file *file, struct cifs_tcon *tcon,
(char *)&fsctl_buf,
sizeof(struct file_zero_data_information),
CIFSMaxBufSize, NULL, NULL);
- free_xid(xid);
filemap_invalidate_unlock(inode->i_mapping);
+out:
+ inode_unlock(inode);
+ free_xid(xid);
return rc;
}
@@ -3673,39 +3669,50 @@ static long smb3_collapse_range(struct file *file, struct cifs_tcon *tcon,
{
int rc;
unsigned int xid;
- struct inode *inode;
+ struct inode *inode = file_inode(file);
struct cifsFileInfo *cfile = file->private_data;
- struct cifsInodeInfo *cifsi;
+ struct cifsInodeInfo *cifsi = CIFS_I(inode);
__le64 eof;
+ loff_t old_eof;
xid = get_xid();
- inode = d_inode(cfile->dentry);
- cifsi = CIFS_I(inode);
+ inode_lock(inode);
- if (off >= i_size_read(inode) ||
- off + len >= i_size_read(inode)) {
+ old_eof = i_size_read(inode);
+ if ((off >= old_eof) ||
+ off + len >= old_eof) {
rc = -EINVAL;
goto out;
}
+ filemap_invalidate_lock(inode->i_mapping);
+ rc = filemap_write_and_wait_range(inode->i_mapping, off, old_eof - 1);
+ if (rc < 0)
+ goto out_2;
+
+ truncate_pagecache_range(inode, off, old_eof);
+
rc = smb2_copychunk_range(xid, cfile, cfile, off + len,
- i_size_read(inode) - off - len, off);
+ old_eof - off - len, off);
if (rc < 0)
- goto out;
+ goto out_2;
- eof = cpu_to_le64(i_size_read(inode) - len);
+ eof = cpu_to_le64(old_eof - len);
rc = SMB2_set_eof(xid, tcon, cfile->fid.persistent_fid,
cfile->fid.volatile_fid, cfile->pid, &eof);
if (rc < 0)
- goto out;
+ goto out_2;
rc = 0;
cifsi->server_eof = i_size_read(inode) - len;
truncate_setsize(inode, cifsi->server_eof);
fscache_resize_cookie(cifs_inode_cookie(inode), cifsi->server_eof);
+out_2:
+ filemap_invalidate_unlock(inode->i_mapping);
out:
+ inode_unlock(inode);
free_xid(xid);
return rc;
}
@@ -3716,34 +3723,47 @@ static long smb3_insert_range(struct file *file, struct cifs_tcon *tcon,
int rc;
unsigned int xid;
struct cifsFileInfo *cfile = file->private_data;
+ struct inode *inode = file_inode(file);
__le64 eof;
- __u64 count;
+ __u64 count, old_eof;
xid = get_xid();
- if (off >= i_size_read(file->f_inode)) {
+ inode_lock(inode);
+
+ old_eof = i_size_read(inode);
+ if (off >= old_eof) {
rc = -EINVAL;
goto out;
}
- count = i_size_read(file->f_inode) - off;
- eof = cpu_to_le64(i_size_read(file->f_inode) + len);
+ count = old_eof - off;
+ eof = cpu_to_le64(old_eof + len);
+
+ filemap_invalidate_lock(inode->i_mapping);
+ rc = filemap_write_and_wait_range(inode->i_mapping, off, old_eof + len - 1);
+ if (rc < 0)
+ goto out_2;
+ truncate_pagecache_range(inode, off, old_eof);
rc = SMB2_set_eof(xid, tcon, cfile->fid.persistent_fid,
cfile->fid.volatile_fid, cfile->pid, &eof);
if (rc < 0)
- goto out;
+ goto out_2;
rc = smb2_copychunk_range(xid, cfile, cfile, off, count, off + len);
if (rc < 0)
- goto out;
+ goto out_2;
- rc = smb3_zero_range(file, tcon, off, len, 1);
+ rc = smb3_zero_data(file, tcon, off, len, xid);
if (rc < 0)
- goto out;
+ goto out_2;
rc = 0;
+out_2:
+ filemap_invalidate_unlock(inode->i_mapping);
out:
+ inode_unlock(inode);
free_xid(xid);
return rc;
}
diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c
index 91cfc5b47ac7..6352ab32c7e7 100644
--- a/fs/cifs/smb2pdu.c
+++ b/fs/cifs/smb2pdu.c
@@ -965,16 +965,17 @@ SMB2_negotiate(const unsigned int xid,
} else if (rc != 0)
goto neg_exit;
+ rc = -EIO;
if (strcmp(server->vals->version_string,
SMB3ANY_VERSION_STRING) == 0) {
if (rsp->DialectRevision == cpu_to_le16(SMB20_PROT_ID)) {
cifs_server_dbg(VFS,
"SMB2 dialect returned but not requested\n");
- return -EIO;
+ goto neg_exit;
} else if (rsp->DialectRevision == cpu_to_le16(SMB21_PROT_ID)) {
cifs_server_dbg(VFS,
"SMB2.1 dialect returned but not requested\n");
- return -EIO;
+ goto neg_exit;
} else if (rsp->DialectRevision == cpu_to_le16(SMB311_PROT_ID)) {
/* ops set to 3.0 by default for default so update */
server->ops = &smb311_operations;
@@ -985,7 +986,7 @@ SMB2_negotiate(const unsigned int xid,
if (rsp->DialectRevision == cpu_to_le16(SMB20_PROT_ID)) {
cifs_server_dbg(VFS,
"SMB2 dialect returned but not requested\n");
- return -EIO;
+ goto neg_exit;
} else if (rsp->DialectRevision == cpu_to_le16(SMB21_PROT_ID)) {
/* ops set to 3.0 by default for default so update */
server->ops = &smb21_operations;
@@ -999,7 +1000,7 @@ SMB2_negotiate(const unsigned int xid,
/* if requested single dialect ensure returned dialect matched */
cifs_server_dbg(VFS, "Invalid 0x%x dialect returned: not requested\n",
le16_to_cpu(rsp->DialectRevision));
- return -EIO;
+ goto neg_exit;
}
cifs_dbg(FYI, "mode 0x%x\n", rsp->SecurityMode);
@@ -1017,9 +1018,10 @@ SMB2_negotiate(const unsigned int xid,
else {
cifs_server_dbg(VFS, "Invalid dialect returned by server 0x%x\n",
le16_to_cpu(rsp->DialectRevision));
- rc = -EIO;
goto neg_exit;
}
+
+ rc = 0;
server->dialect = le16_to_cpu(rsp->DialectRevision);
/*
@@ -2572,19 +2574,15 @@ alloc_path_with_tree_prefix(__le16 **out_path, int *out_size, int *out_len,
path_len = UniStrnlen((wchar_t *)path, PATH_MAX);
- /*
- * make room for one path separator between the treename and
- * path
- */
- *out_len = treename_len + 1 + path_len;
+ /* make room for one path separator only if @path isn't empty */
+ *out_len = treename_len + (path[0] ? 1 : 0) + path_len;
/*
- * final path needs to be null-terminated UTF16 with a
- * size aligned to 8
+ * final path needs to be 8-byte aligned as specified in
+ * MS-SMB2 2.2.13 SMB2 CREATE Request.
*/
-
- *out_size = roundup((*out_len+1)*2, 8);
- *out_path = kzalloc(*out_size, GFP_KERNEL);
+ *out_size = roundup(*out_len * sizeof(__le16), 8);
+ *out_path = kzalloc(*out_size + sizeof(__le16) /* null */, GFP_KERNEL);
if (!*out_path)
return -ENOMEM;
diff --git a/fs/cifs/transport.c b/fs/cifs/transport.c
index de7aeced7e16..9a2753e21170 100644
--- a/fs/cifs/transport.c
+++ b/fs/cifs/transport.c
@@ -194,10 +194,6 @@ smb_send_kvec(struct TCP_Server_Info *server, struct msghdr *smb_msg,
*sent = 0;
- smb_msg->msg_name = (struct sockaddr *) &server->dstaddr;
- smb_msg->msg_namelen = sizeof(struct sockaddr);
- smb_msg->msg_control = NULL;
- smb_msg->msg_controllen = 0;
if (server->noblocksnd)
smb_msg->msg_flags = MSG_DONTWAIT + MSG_NOSIGNAL;
else
@@ -261,8 +257,8 @@ smb_rqst_len(struct TCP_Server_Info *server, struct smb_rqst *rqst)
int nvec;
unsigned long buflen = 0;
- if (server->vals->header_preamble_size == 0 &&
- rqst->rq_nvec >= 2 && rqst->rq_iov[0].iov_len == 4) {
+ if (!is_smb1(server) && rqst->rq_nvec >= 2 &&
+ rqst->rq_iov[0].iov_len == 4) {
iov = &rqst->rq_iov[1];
nvec = rqst->rq_nvec - 1;
} else {
@@ -309,7 +305,7 @@ __smb_send_rqst(struct TCP_Server_Info *server, int num_rqst,
sigset_t mask, oldmask;
size_t total_len = 0, sent, size;
struct socket *ssocket = server->ssocket;
- struct msghdr smb_msg;
+ struct msghdr smb_msg = {};
__be32 rfc1002_marker;
if (cifs_rdma_enabled(server)) {
@@ -346,7 +342,7 @@ __smb_send_rqst(struct TCP_Server_Info *server, int num_rqst,
sigprocmask(SIG_BLOCK, &mask, &oldmask);
/* Generate a rfc1002 marker for SMB2+ */
- if (server->vals->header_preamble_size == 0) {
+ if (!is_smb1(server)) {
struct kvec hiov = {
.iov_base = &rfc1002_marker,
.iov_len = 4
@@ -1238,7 +1234,7 @@ compound_send_recv(const unsigned int xid, struct cifs_ses *ses,
buf = (char *)midQ[i]->resp_buf;
resp_iov[i].iov_base = buf;
resp_iov[i].iov_len = midQ[i]->resp_buf_size +
- server->vals->header_preamble_size;
+ HEADER_PREAMBLE_SIZE(server);
if (midQ[i]->large_buf)
resp_buf_type[i] = CIFS_LARGE_BUFFER;
@@ -1643,7 +1639,7 @@ int
cifs_discard_remaining_data(struct TCP_Server_Info *server)
{
unsigned int rfclen = server->pdu_size;
- int remaining = rfclen + server->vals->header_preamble_size -
+ int remaining = rfclen + HEADER_PREAMBLE_SIZE(server) -
server->total_read;
while (remaining > 0) {
@@ -1689,8 +1685,7 @@ cifs_readv_receive(struct TCP_Server_Info *server, struct mid_q_entry *mid)
unsigned int data_offset, data_len;
struct cifs_readdata *rdata = mid->callback_data;
char *buf = server->smallbuf;
- unsigned int buflen = server->pdu_size +
- server->vals->header_preamble_size;
+ unsigned int buflen = server->pdu_size + HEADER_PREAMBLE_SIZE(server);
bool use_rdma_mr = false;
cifs_dbg(FYI, "%s: mid=%llu offset=%llu bytes=%u\n",
@@ -1724,10 +1719,10 @@ cifs_readv_receive(struct TCP_Server_Info *server, struct mid_q_entry *mid)
/* set up first two iov for signature check and to get credits */
rdata->iov[0].iov_base = buf;
- rdata->iov[0].iov_len = server->vals->header_preamble_size;
- rdata->iov[1].iov_base = buf + server->vals->header_preamble_size;
+ rdata->iov[0].iov_len = HEADER_PREAMBLE_SIZE(server);
+ rdata->iov[1].iov_base = buf + HEADER_PREAMBLE_SIZE(server);
rdata->iov[1].iov_len =
- server->total_read - server->vals->header_preamble_size;
+ server->total_read - HEADER_PREAMBLE_SIZE(server);
cifs_dbg(FYI, "0: iov_base=%p iov_len=%zu\n",
rdata->iov[0].iov_base, rdata->iov[0].iov_len);
cifs_dbg(FYI, "1: iov_base=%p iov_len=%zu\n",
@@ -1752,7 +1747,7 @@ cifs_readv_receive(struct TCP_Server_Info *server, struct mid_q_entry *mid)
}
data_offset = server->ops->read_data_offset(buf) +
- server->vals->header_preamble_size;
+ HEADER_PREAMBLE_SIZE(server);
if (data_offset < server->total_read) {
/*
* win2k8 sometimes sends an offset of 0 when the read
diff --git a/fs/debugfs/inode.c b/fs/debugfs/inode.c
index 3dcf0b8b4e93..232cfdf095ae 100644
--- a/fs/debugfs/inode.c
+++ b/fs/debugfs/inode.c
@@ -745,6 +745,28 @@ void debugfs_remove(struct dentry *dentry)
EXPORT_SYMBOL_GPL(debugfs_remove);
/**
+ * debugfs_lookup_and_remove - lookup a directory or file and recursively remove it
+ * @name: a pointer to a string containing the name of the item to look up.
+ * @parent: a pointer to the parent dentry of the item.
+ *
+ * This is the equlivant of doing something like
+ * debugfs_remove(debugfs_lookup(..)) but with the proper reference counting
+ * handled for the directory being looked up.
+ */
+void debugfs_lookup_and_remove(const char *name, struct dentry *parent)
+{
+ struct dentry *dentry;
+
+ dentry = debugfs_lookup(name, parent);
+ if (!dentry)
+ return;
+
+ debugfs_remove(dentry);
+ dput(dentry);
+}
+EXPORT_SYMBOL_GPL(debugfs_lookup_and_remove);
+
+/**
* debugfs_rename - rename a file/directory in the debugfs filesystem
* @old_dir: a pointer to the parent dentry for the renamed object. This
* should be a directory dentry.
diff --git a/fs/erofs/fscache.c b/fs/erofs/fscache.c
index 8e01d89c3319..b5fd9d71e67f 100644
--- a/fs/erofs/fscache.c
+++ b/fs/erofs/fscache.c
@@ -222,8 +222,10 @@ static int erofs_fscache_meta_read_folio(struct file *data, struct folio *folio)
rreq = erofs_fscache_alloc_request(folio_mapping(folio),
folio_pos(folio), folio_size(folio));
- if (IS_ERR(rreq))
+ if (IS_ERR(rreq)) {
+ ret = PTR_ERR(rreq);
goto out;
+ }
return erofs_fscache_read_folios_async(mdev.m_fscache->cookie,
rreq, mdev.m_pa);
@@ -301,8 +303,10 @@ static int erofs_fscache_read_folio(struct file *file, struct folio *folio)
rreq = erofs_fscache_alloc_request(folio_mapping(folio),
folio_pos(folio), folio_size(folio));
- if (IS_ERR(rreq))
+ if (IS_ERR(rreq)) {
+ ret = PTR_ERR(rreq);
goto out_unlock;
+ }
pstart = mdev.m_pa + (pos - map.m_la);
return erofs_fscache_read_folios_async(mdev.m_fscache->cookie,
diff --git a/fs/erofs/internal.h b/fs/erofs/internal.h
index cfee49d33b95..a01cc82795a2 100644
--- a/fs/erofs/internal.h
+++ b/fs/erofs/internal.h
@@ -195,7 +195,6 @@ struct erofs_workgroup {
atomic_t refcount;
};
-#if defined(CONFIG_SMP)
static inline bool erofs_workgroup_try_to_freeze(struct erofs_workgroup *grp,
int val)
{
@@ -224,34 +223,6 @@ static inline int erofs_wait_on_workgroup_freezed(struct erofs_workgroup *grp)
return atomic_cond_read_relaxed(&grp->refcount,
VAL != EROFS_LOCKED_MAGIC);
}
-#else
-static inline bool erofs_workgroup_try_to_freeze(struct erofs_workgroup *grp,
- int val)
-{
- preempt_disable();
- /* no need to spin on UP platforms, let's just disable preemption. */
- if (val != atomic_read(&grp->refcount)) {
- preempt_enable();
- return false;
- }
- return true;
-}
-
-static inline void erofs_workgroup_unfreeze(struct erofs_workgroup *grp,
- int orig_val)
-{
- preempt_enable();
-}
-
-static inline int erofs_wait_on_workgroup_freezed(struct erofs_workgroup *grp)
-{
- int v = atomic_read(&grp->refcount);
-
- /* workgroup is never freezed on uniprocessor systems */
- DBG_BUGON(v == EROFS_LOCKED_MAGIC);
- return v;
-}
-#endif /* !CONFIG_SMP */
#endif /* !CONFIG_EROFS_FS_ZIP */
/* we strictly follow PAGE_SIZE and no buffer head yet */
diff --git a/fs/erofs/zmap.c b/fs/erofs/zmap.c
index 572f0b8151ba..d58549ca1df9 100644
--- a/fs/erofs/zmap.c
+++ b/fs/erofs/zmap.c
@@ -141,7 +141,7 @@ struct z_erofs_maprecorder {
u8 type, headtype;
u16 clusterofs;
u16 delta[2];
- erofs_blk_t pblk, compressedlcs;
+ erofs_blk_t pblk, compressedblks;
erofs_off_t nextpackoff;
};
@@ -192,7 +192,7 @@ static int legacy_load_cluster_from_disk(struct z_erofs_maprecorder *m,
DBG_BUGON(1);
return -EFSCORRUPTED;
}
- m->compressedlcs = m->delta[0] &
+ m->compressedblks = m->delta[0] &
~Z_EROFS_VLE_DI_D0_CBLKCNT;
m->delta[0] = 1;
}
@@ -293,7 +293,7 @@ static int unpack_compacted_index(struct z_erofs_maprecorder *m,
DBG_BUGON(1);
return -EFSCORRUPTED;
}
- m->compressedlcs = lo & ~Z_EROFS_VLE_DI_D0_CBLKCNT;
+ m->compressedblks = lo & ~Z_EROFS_VLE_DI_D0_CBLKCNT;
m->delta[0] = 1;
return 0;
} else if (i + 1 != (int)vcnt) {
@@ -497,7 +497,7 @@ static int z_erofs_get_extent_compressedlen(struct z_erofs_maprecorder *m,
return 0;
}
lcn = m->lcn + 1;
- if (m->compressedlcs)
+ if (m->compressedblks)
goto out;
err = z_erofs_load_cluster_from_disk(m, lcn, false);
@@ -506,7 +506,7 @@ static int z_erofs_get_extent_compressedlen(struct z_erofs_maprecorder *m,
/*
* If the 1st NONHEAD lcluster has already been handled initially w/o
- * valid compressedlcs, which means at least it mustn't be CBLKCNT, or
+ * valid compressedblks, which means at least it mustn't be CBLKCNT, or
* an internal implemenatation error is detected.
*
* The following code can also handle it properly anyway, but let's
@@ -523,12 +523,12 @@ static int z_erofs_get_extent_compressedlen(struct z_erofs_maprecorder *m,
* if the 1st NONHEAD lcluster is actually PLAIN or HEAD type
* rather than CBLKCNT, it's a 1 lcluster-sized pcluster.
*/
- m->compressedlcs = 1;
+ m->compressedblks = 1 << (lclusterbits - LOG_BLOCK_SIZE);
break;
case Z_EROFS_VLE_CLUSTER_TYPE_NONHEAD:
if (m->delta[0] != 1)
goto err_bonus_cblkcnt;
- if (m->compressedlcs)
+ if (m->compressedblks)
break;
fallthrough;
default:
@@ -539,7 +539,7 @@ static int z_erofs_get_extent_compressedlen(struct z_erofs_maprecorder *m,
return -EFSCORRUPTED;
}
out:
- map->m_plen = (u64)m->compressedlcs << lclusterbits;
+ map->m_plen = (u64)m->compressedblks << LOG_BLOCK_SIZE;
return 0;
err_bonus_cblkcnt:
erofs_err(m->inode->i_sb,
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index 05221366a16d..08a1993ab7fd 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -134,10 +134,10 @@ static bool inode_io_list_move_locked(struct inode *inode,
static void wb_wakeup(struct bdi_writeback *wb)
{
- spin_lock_bh(&wb->work_lock);
+ spin_lock_irq(&wb->work_lock);
if (test_bit(WB_registered, &wb->state))
mod_delayed_work(bdi_wq, &wb->dwork, 0);
- spin_unlock_bh(&wb->work_lock);
+ spin_unlock_irq(&wb->work_lock);
}
static void finish_writeback_work(struct bdi_writeback *wb,
@@ -164,7 +164,7 @@ static void wb_queue_work(struct bdi_writeback *wb,
if (work->done)
atomic_inc(&work->done->cnt);
- spin_lock_bh(&wb->work_lock);
+ spin_lock_irq(&wb->work_lock);
if (test_bit(WB_registered, &wb->state)) {
list_add_tail(&work->list, &wb->work_list);
@@ -172,7 +172,7 @@ static void wb_queue_work(struct bdi_writeback *wb,
} else
finish_writeback_work(wb, work);
- spin_unlock_bh(&wb->work_lock);
+ spin_unlock_irq(&wb->work_lock);
}
/**
@@ -2082,13 +2082,13 @@ static struct wb_writeback_work *get_next_work_item(struct bdi_writeback *wb)
{
struct wb_writeback_work *work = NULL;
- spin_lock_bh(&wb->work_lock);
+ spin_lock_irq(&wb->work_lock);
if (!list_empty(&wb->work_list)) {
work = list_entry(wb->work_list.next,
struct wb_writeback_work, list);
list_del_init(&work->list);
}
- spin_unlock_bh(&wb->work_lock);
+ spin_unlock_irq(&wb->work_lock);
return work;
}
diff --git a/fs/inode.c b/fs/inode.c
index 6462276dfdf0..ba1de23c13c1 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -2018,23 +2018,25 @@ static int __file_remove_privs(struct file *file, unsigned int flags)
{
struct dentry *dentry = file_dentry(file);
struct inode *inode = file_inode(file);
- int error;
+ int error = 0;
int kill;
if (IS_NOSEC(inode) || !S_ISREG(inode->i_mode))
return 0;
kill = dentry_needs_remove_privs(dentry);
- if (kill <= 0)
+ if (kill < 0)
return kill;
- if (flags & IOCB_NOWAIT)
- return -EAGAIN;
+ if (kill) {
+ if (flags & IOCB_NOWAIT)
+ return -EAGAIN;
+
+ error = __remove_privs(file_mnt_user_ns(file), dentry, kill);
+ }
- error = __remove_privs(file_mnt_user_ns(file), dentry, kill);
if (!error)
inode_has_no_xattr(inode);
-
return error;
}
diff --git a/fs/locks.c b/fs/locks.c
index c266cfdc3291..607f94a0e789 100644
--- a/fs/locks.c
+++ b/fs/locks.c
@@ -2129,6 +2129,7 @@ SYSCALL_DEFINE2(flock, unsigned int, fd, unsigned int, cmd)
else
error = locks_lock_file_wait(f.file, &fl);
+ locks_release_private(&fl);
out_putf:
fdput(f);
diff --git a/fs/namespace.c b/fs/namespace.c
index 68789f896f08..df137ba19d37 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -4238,6 +4238,13 @@ static int build_mount_idmapped(const struct mount_attr *attr, size_t usize,
err = -EPERM;
goto out_fput;
}
+
+ /* We're not controlling the target namespace. */
+ if (!ns_capable(mnt_userns, CAP_SYS_ADMIN)) {
+ err = -EPERM;
+ goto out_fput;
+ }
+
kattr->mnt_userns = get_user_ns(mnt_userns);
out_fput:
diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index dbab3caa15ed..5d6c2ddc7ea6 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -2382,7 +2382,8 @@ static void nfs_dentry_remove_handle_error(struct inode *dir,
{
switch (error) {
case -ENOENT:
- d_delete(dentry);
+ if (d_really_is_positive(dentry))
+ d_delete(dentry);
nfs_set_verifier(dentry, nfs_save_change_attribute(dir));
break;
case 0:
@@ -2484,8 +2485,10 @@ int nfs_unlink(struct inode *dir, struct dentry *dentry)
*/
error = -ETXTBSY;
if (WARN_ON(dentry->d_flags & DCACHE_NFSFS_RENAMED) ||
- WARN_ON(dentry->d_fsdata == NFS_FSDATA_BLOCKED))
+ WARN_ON(dentry->d_fsdata == NFS_FSDATA_BLOCKED)) {
+ spin_unlock(&dentry->d_lock);
goto out;
+ }
if (dentry->d_fsdata)
/* old devname */
kfree(dentry->d_fsdata);
diff --git a/fs/nfs/file.c b/fs/nfs/file.c
index d2bcd4834c0e..e032fe201a36 100644
--- a/fs/nfs/file.c
+++ b/fs/nfs/file.c
@@ -221,8 +221,10 @@ nfs_file_fsync_commit(struct file *file, int datasync)
int
nfs_file_fsync(struct file *file, loff_t start, loff_t end, int datasync)
{
- struct nfs_open_context *ctx = nfs_file_open_context(file);
struct inode *inode = file_inode(file);
+ struct nfs_inode *nfsi = NFS_I(inode);
+ long save_nredirtied = atomic_long_read(&nfsi->redirtied_pages);
+ long nredirtied;
int ret;
trace_nfs_fsync_enter(inode);
@@ -237,15 +239,10 @@ nfs_file_fsync(struct file *file, loff_t start, loff_t end, int datasync)
ret = pnfs_sync_inode(inode, !!datasync);
if (ret != 0)
break;
- if (!test_and_clear_bit(NFS_CONTEXT_RESEND_WRITES, &ctx->flags))
+ nredirtied = atomic_long_read(&nfsi->redirtied_pages);
+ if (nredirtied == save_nredirtied)
break;
- /*
- * If nfs_file_fsync_commit detected a server reboot, then
- * resend all dirty pages that might have been covered by
- * the NFS_CONTEXT_RESEND_WRITES flag
- */
- start = 0;
- end = LLONG_MAX;
+ save_nredirtied = nredirtied;
}
trace_nfs_fsync_exit(inode, ret);
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index b4e46b0ffa2d..bea7c005119c 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -426,6 +426,7 @@ nfs_ilookup(struct super_block *sb, struct nfs_fattr *fattr, struct nfs_fh *fh)
static void nfs_inode_init_regular(struct nfs_inode *nfsi)
{
atomic_long_set(&nfsi->nrequests, 0);
+ atomic_long_set(&nfsi->redirtied_pages, 0);
INIT_LIST_HEAD(&nfsi->commit_info.list);
atomic_long_set(&nfsi->commit_info.ncommit, 0);
atomic_set(&nfsi->commit_info.rpcs_out, 0);
diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h
index 27c720d71b4e..898dd95bc7a7 100644
--- a/fs/nfs/internal.h
+++ b/fs/nfs/internal.h
@@ -606,6 +606,31 @@ static inline gfp_t nfs_io_gfp_mask(void)
return GFP_KERNEL;
}
+/*
+ * Special version of should_remove_suid() that ignores capabilities.
+ */
+static inline int nfs_should_remove_suid(const struct inode *inode)
+{
+ umode_t mode = inode->i_mode;
+ int kill = 0;
+
+ /* suid always must be killed */
+ if (unlikely(mode & S_ISUID))
+ kill = ATTR_KILL_SUID;
+
+ /*
+ * sgid without any exec bits is just a mandatory locking mark; leave
+ * it alone. If some exec bits are set, it's a real sgid; kill it.
+ */
+ if (unlikely((mode & S_ISGID) && (mode & S_IXGRP)))
+ kill |= ATTR_KILL_SGID;
+
+ if (unlikely(kill && S_ISREG(mode)))
+ return kill;
+
+ return 0;
+}
+
/* unlink.c */
extern struct rpc_task *
nfs_async_rename(struct inode *old_dir, struct inode *new_dir,
diff --git a/fs/nfs/nfs42proc.c b/fs/nfs/nfs42proc.c
index 068c45b3bc1a..6dab9e408372 100644
--- a/fs/nfs/nfs42proc.c
+++ b/fs/nfs/nfs42proc.c
@@ -78,10 +78,15 @@ static int _nfs42_proc_fallocate(struct rpc_message *msg, struct file *filep,
status = nfs4_call_sync(server->client, server, msg,
&args.seq_args, &res.seq_res, 0);
- if (status == 0)
+ if (status == 0) {
+ if (nfs_should_remove_suid(inode)) {
+ spin_lock(&inode->i_lock);
+ nfs_set_cache_invalid(inode, NFS_INO_INVALID_MODE);
+ spin_unlock(&inode->i_lock);
+ }
status = nfs_post_op_update_inode_force_wcc(inode,
res.falloc_fattr);
-
+ }
if (msg->rpc_proc == &nfs4_procedures[NFSPROC4_CLNT_ALLOCATE])
trace_nfs4_fallocate(inode, &args, status);
else
diff --git a/fs/nfs/nfs4file.c b/fs/nfs/nfs4file.c
index e88f6b18445e..9eb181287879 100644
--- a/fs/nfs/nfs4file.c
+++ b/fs/nfs/nfs4file.c
@@ -340,6 +340,11 @@ static struct file *__nfs42_ssc_open(struct vfsmount *ss_mnt,
goto out;
}
+ if (!S_ISREG(fattr->mode)) {
+ res = ERR_PTR(-EBADF);
+ goto out;
+ }
+
res = ERR_PTR(-ENOMEM);
len = strlen(SSC_READ_NAME_BODY) + 16;
read_name = kzalloc(len, GFP_KERNEL);
@@ -357,6 +362,7 @@ static struct file *__nfs42_ssc_open(struct vfsmount *ss_mnt,
r_ino->i_fop);
if (IS_ERR(filep)) {
res = ERR_CAST(filep);
+ iput(r_ino);
goto out_free_name;
}
diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
index 41a9b6b58fb9..2613b7e36eb9 100644
--- a/fs/nfs/pnfs.c
+++ b/fs/nfs/pnfs.c
@@ -2817,7 +2817,6 @@ int pnfs_write_done_resend_to_mds(struct nfs_pgio_header *hdr)
/* Resend all requests through the MDS */
nfs_pageio_init_write(&pgio, hdr->inode, FLUSH_STABLE, true,
hdr->completion_ops);
- set_bit(NFS_CONTEXT_RESEND_WRITES, &hdr->args.context->flags);
return nfs_pageio_resend(&pgio, hdr);
}
EXPORT_SYMBOL_GPL(pnfs_write_done_resend_to_mds);
diff --git a/fs/nfs/super.c b/fs/nfs/super.c
index 82944e14fcea..ee66ffdb985e 100644
--- a/fs/nfs/super.c
+++ b/fs/nfs/super.c
@@ -1051,22 +1051,31 @@ static void nfs_fill_super(struct super_block *sb, struct nfs_fs_context *ctx)
if (ctx->bsize)
sb->s_blocksize = nfs_block_size(ctx->bsize, &sb->s_blocksize_bits);
- if (server->nfs_client->rpc_ops->version != 2) {
- /* The VFS shouldn't apply the umask to mode bits. We will do
- * so ourselves when necessary.
+ switch (server->nfs_client->rpc_ops->version) {
+ case 2:
+ sb->s_time_gran = 1000;
+ sb->s_time_min = 0;
+ sb->s_time_max = U32_MAX;
+ break;
+ case 3:
+ /*
+ * The VFS shouldn't apply the umask to mode bits.
+ * We will do so ourselves when necessary.
*/
sb->s_flags |= SB_POSIXACL;
sb->s_time_gran = 1;
- sb->s_export_op = &nfs_export_ops;
- } else
- sb->s_time_gran = 1000;
-
- if (server->nfs_client->rpc_ops->version != 4) {
sb->s_time_min = 0;
sb->s_time_max = U32_MAX;
- } else {
+ sb->s_export_op = &nfs_export_ops;
+ break;
+ case 4:
+ sb->s_flags |= SB_POSIXACL;
+ sb->s_time_gran = 1;
sb->s_time_min = S64_MIN;
sb->s_time_max = S64_MAX;
+ if (server->caps & NFS_CAP_ATOMIC_OPEN_V1)
+ sb->s_export_op = &nfs_export_ops;
+ break;
}
sb->s_magic = NFS_SUPER_MAGIC;
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index 51a7e202d6e5..f41d24b54fd1 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -1420,10 +1420,12 @@ static void nfs_initiate_write(struct nfs_pgio_header *hdr,
*/
static void nfs_redirty_request(struct nfs_page *req)
{
+ struct nfs_inode *nfsi = NFS_I(page_file_mapping(req->wb_page)->host);
+
/* Bump the transmission count */
req->wb_nio++;
nfs_mark_request_dirty(req);
- set_bit(NFS_CONTEXT_RESEND_WRITES, &nfs_req_openctx(req)->flags);
+ atomic_long_inc(&nfsi->redirtied_pages);
nfs_end_page_writeback(req);
nfs_release_request(req);
}
@@ -1494,31 +1496,6 @@ void nfs_commit_prepare(struct rpc_task *task, void *calldata)
NFS_PROTO(data->inode)->commit_rpc_prepare(task, data);
}
-/*
- * Special version of should_remove_suid() that ignores capabilities.
- */
-static int nfs_should_remove_suid(const struct inode *inode)
-{
- umode_t mode = inode->i_mode;
- int kill = 0;
-
- /* suid always must be killed */
- if (unlikely(mode & S_ISUID))
- kill = ATTR_KILL_SUID;
-
- /*
- * sgid without any exec bits is just a mandatory locking mark; leave
- * it alone. If some exec bits are set, it's a real sgid; kill it.
- */
- if (unlikely((mode & S_ISGID) && (mode & S_IXGRP)))
- kill |= ATTR_KILL_SGID;
-
- if (unlikely(kill && S_ISREG(mode)))
- return kill;
-
- return 0;
-}
-
static void nfs_writeback_check_extend(struct nfs_pgio_header *hdr,
struct nfs_fattr *fattr)
{
@@ -1904,7 +1881,7 @@ static void nfs_commit_release_pages(struct nfs_commit_data *data)
/* We have a mismatch. Write the page again */
dprintk_cont(" mismatch\n");
nfs_mark_request_dirty(req);
- set_bit(NFS_CONTEXT_RESEND_WRITES, &nfs_req_openctx(req)->flags);
+ atomic_long_inc(&NFS_I(data->inode)->redirtied_pages);
next:
nfs_unlock_and_release_request(req);
/* Latency breaker */
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index 9f486b788ed0..fc17b0ac8729 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -300,6 +300,10 @@ commit_metadata(struct svc_fh *fhp)
static void
nfsd_sanitize_attrs(struct inode *inode, struct iattr *iap)
{
+ /* Ignore mode updates on symlinks */
+ if (S_ISLNK(inode->i_mode))
+ iap->ia_valid &= ~ATTR_MODE;
+
/* sanitize the mode change */
if (iap->ia_valid & ATTR_MODE) {
iap->ia_mode &= S_IALLUGO;
@@ -353,7 +357,7 @@ nfsd_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp,
int accmode = NFSD_MAY_SATTR;
umode_t ftype = 0;
__be32 err;
- int host_err;
+ int host_err = 0;
bool get_write_count;
bool size_change = (iap->ia_valid & ATTR_SIZE);
@@ -391,13 +395,6 @@ nfsd_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp,
dentry = fhp->fh_dentry;
inode = d_inode(dentry);
- /* Ignore any mode updates on symlinks */
- if (S_ISLNK(inode->i_mode))
- iap->ia_valid &= ~ATTR_MODE;
-
- if (!iap->ia_valid)
- return 0;
-
nfsd_sanitize_attrs(inode, iap);
if (check_guard && guardtime != inode->i_ctime.tv_sec)
@@ -448,8 +445,10 @@ nfsd_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp,
goto out_unlock;
}
- iap->ia_valid |= ATTR_CTIME;
- host_err = notify_change(&init_user_ns, dentry, iap, NULL);
+ if (iap->ia_valid) {
+ iap->ia_valid |= ATTR_CTIME;
+ host_err = notify_change(&init_user_ns, dentry, iap, NULL);
+ }
out_unlock:
if (attr->na_seclabel && attr->na_seclabel->len)
@@ -846,10 +845,14 @@ nfsd_splice_actor(struct pipe_inode_info *pipe, struct pipe_buffer *buf,
struct splice_desc *sd)
{
struct svc_rqst *rqstp = sd->u.data;
-
- svc_rqst_replace_page(rqstp, buf->page);
- if (rqstp->rq_res.page_len == 0)
- rqstp->rq_res.page_base = buf->offset;
+ struct page *page = buf->page; // may be a compound one
+ unsigned offset = buf->offset;
+
+ page += offset / PAGE_SIZE;
+ for (int i = sd->len; i > 0; i -= PAGE_SIZE)
+ svc_rqst_replace_page(rqstp, page++);
+ if (rqstp->rq_res.page_len == 0) // first call
+ rqstp->rq_res.page_base = offset % PAGE_SIZE;
rqstp->rq_res.page_len += sd->len;
return sd->len;
}
diff --git a/fs/ntfs3/xattr.c b/fs/ntfs3/xattr.c
index 5bdff12a1232..6ae1f56b7358 100644
--- a/fs/ntfs3/xattr.c
+++ b/fs/ntfs3/xattr.c
@@ -483,8 +483,7 @@ out:
}
#ifdef CONFIG_NTFS3_FS_POSIX_ACL
-static struct posix_acl *ntfs_get_acl_ex(struct user_namespace *mnt_userns,
- struct inode *inode, int type,
+static struct posix_acl *ntfs_get_acl_ex(struct inode *inode, int type,
int locked)
{
struct ntfs_inode *ni = ntfs_i(inode);
@@ -519,7 +518,7 @@ static struct posix_acl *ntfs_get_acl_ex(struct user_namespace *mnt_userns,
/* Translate extended attribute to acl. */
if (err >= 0) {
- acl = posix_acl_from_xattr(mnt_userns, buf, err);
+ acl = posix_acl_from_xattr(&init_user_ns, buf, err);
} else if (err == -ENODATA) {
acl = NULL;
} else {
@@ -542,8 +541,7 @@ struct posix_acl *ntfs_get_acl(struct inode *inode, int type, bool rcu)
if (rcu)
return ERR_PTR(-ECHILD);
- /* TODO: init_user_ns? */
- return ntfs_get_acl_ex(&init_user_ns, inode, type, 0);
+ return ntfs_get_acl_ex(inode, type, 0);
}
static noinline int ntfs_set_acl_ex(struct user_namespace *mnt_userns,
@@ -595,7 +593,7 @@ static noinline int ntfs_set_acl_ex(struct user_namespace *mnt_userns,
value = kmalloc(size, GFP_NOFS);
if (!value)
return -ENOMEM;
- err = posix_acl_to_xattr(mnt_userns, acl, value, size);
+ err = posix_acl_to_xattr(&init_user_ns, acl, value, size);
if (err < 0)
goto out;
flags = 0;
@@ -646,7 +644,7 @@ static int ntfs_xattr_get_acl(struct user_namespace *mnt_userns,
if (!acl)
return -ENODATA;
- err = posix_acl_to_xattr(mnt_userns, acl, buffer, size);
+ err = posix_acl_to_xattr(&init_user_ns, acl, buffer, size);
posix_acl_release(acl);
return err;
@@ -670,12 +668,12 @@ static int ntfs_xattr_set_acl(struct user_namespace *mnt_userns,
if (!value) {
acl = NULL;
} else {
- acl = posix_acl_from_xattr(mnt_userns, value, size);
+ acl = posix_acl_from_xattr(&init_user_ns, value, size);
if (IS_ERR(acl))
return PTR_ERR(acl);
if (acl) {
- err = posix_acl_valid(mnt_userns, acl);
+ err = posix_acl_valid(&init_user_ns, acl);
if (err)
goto release_and_out;
}
diff --git a/fs/ocfs2/dlmglue.c b/fs/ocfs2/dlmglue.c
index 801e60bab955..c28bc983a7b1 100644
--- a/fs/ocfs2/dlmglue.c
+++ b/fs/ocfs2/dlmglue.c
@@ -3403,10 +3403,12 @@ void ocfs2_dlm_shutdown(struct ocfs2_super *osb,
ocfs2_lock_res_free(&osb->osb_nfs_sync_lockres);
ocfs2_lock_res_free(&osb->osb_orphan_scan.os_lockres);
- ocfs2_cluster_disconnect(osb->cconn, hangup_pending);
- osb->cconn = NULL;
+ if (osb->cconn) {
+ ocfs2_cluster_disconnect(osb->cconn, hangup_pending);
+ osb->cconn = NULL;
- ocfs2_dlm_shutdown_debug(osb);
+ ocfs2_dlm_shutdown_debug(osb);
+ }
}
static int ocfs2_drop_lock(struct ocfs2_super *osb,
diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c
index 013a727bd7c8..e2cc9eec287c 100644
--- a/fs/ocfs2/super.c
+++ b/fs/ocfs2/super.c
@@ -1914,8 +1914,7 @@ static void ocfs2_dismount_volume(struct super_block *sb, int mnt_err)
!ocfs2_is_hard_readonly(osb))
hangup_needed = 1;
- if (osb->cconn)
- ocfs2_dlm_shutdown(osb, hangup_needed);
+ ocfs2_dlm_shutdown(osb, hangup_needed);
ocfs2_blockcheck_stats_debugfs_remove(&osb->osb_ecc_stats);
debugfs_remove_recursive(osb->osb_debug_root);
diff --git a/fs/overlayfs/inode.c b/fs/overlayfs/inode.c
index b45fea69fff3..0fbcb590af84 100644
--- a/fs/overlayfs/inode.c
+++ b/fs/overlayfs/inode.c
@@ -460,9 +460,12 @@ ssize_t ovl_listxattr(struct dentry *dentry, char *list, size_t size)
* of the POSIX ACLs retrieved from the lower layer to this function to not
* alter the POSIX ACLs for the underlying filesystem.
*/
-static void ovl_idmap_posix_acl(struct user_namespace *mnt_userns,
+static void ovl_idmap_posix_acl(struct inode *realinode,
+ struct user_namespace *mnt_userns,
struct posix_acl *acl)
{
+ struct user_namespace *fs_userns = i_user_ns(realinode);
+
for (unsigned int i = 0; i < acl->a_count; i++) {
vfsuid_t vfsuid;
vfsgid_t vfsgid;
@@ -470,11 +473,11 @@ static void ovl_idmap_posix_acl(struct user_namespace *mnt_userns,
struct posix_acl_entry *e = &acl->a_entries[i];
switch (e->e_tag) {
case ACL_USER:
- vfsuid = make_vfsuid(mnt_userns, &init_user_ns, e->e_uid);
+ vfsuid = make_vfsuid(mnt_userns, fs_userns, e->e_uid);
e->e_uid = vfsuid_into_kuid(vfsuid);
break;
case ACL_GROUP:
- vfsgid = make_vfsgid(mnt_userns, &init_user_ns, e->e_gid);
+ vfsgid = make_vfsgid(mnt_userns, fs_userns, e->e_gid);
e->e_gid = vfsgid_into_kgid(vfsgid);
break;
}
@@ -536,7 +539,7 @@ struct posix_acl *ovl_get_acl(struct inode *inode, int type, bool rcu)
if (!clone)
clone = ERR_PTR(-ENOMEM);
else
- ovl_idmap_posix_acl(mnt_user_ns(realpath.mnt), clone);
+ ovl_idmap_posix_acl(realinode, mnt_user_ns(realpath.mnt), clone);
/*
* Since we're not in RCU path walk we always need to release the
* original ACLs.
diff --git a/fs/posix_acl.c b/fs/posix_acl.c
index 1d17d7b13dcd..5af33800743e 100644
--- a/fs/posix_acl.c
+++ b/fs/posix_acl.c
@@ -361,6 +361,7 @@ posix_acl_permission(struct user_namespace *mnt_userns, struct inode *inode,
const struct posix_acl *acl, int want)
{
const struct posix_acl_entry *pa, *pe, *mask_obj;
+ struct user_namespace *fs_userns = i_user_ns(inode);
int found = 0;
vfsuid_t vfsuid;
vfsgid_t vfsgid;
@@ -376,7 +377,7 @@ posix_acl_permission(struct user_namespace *mnt_userns, struct inode *inode,
goto check_perm;
break;
case ACL_USER:
- vfsuid = make_vfsuid(mnt_userns, &init_user_ns,
+ vfsuid = make_vfsuid(mnt_userns, fs_userns,
pa->e_uid);
if (vfsuid_eq_kuid(vfsuid, current_fsuid()))
goto mask;
@@ -390,7 +391,7 @@ posix_acl_permission(struct user_namespace *mnt_userns, struct inode *inode,
}
break;
case ACL_GROUP:
- vfsgid = make_vfsgid(mnt_userns, &init_user_ns,
+ vfsgid = make_vfsgid(mnt_userns, fs_userns,
pa->e_gid);
if (vfsgid_in_group_p(vfsgid)) {
found = 1;
@@ -736,6 +737,7 @@ void posix_acl_getxattr_idmapped_mnt(struct user_namespace *mnt_userns,
{
struct posix_acl_xattr_header *header = value;
struct posix_acl_xattr_entry *entry = (void *)(header + 1), *end;
+ struct user_namespace *fs_userns = i_user_ns(inode);
int count;
vfsuid_t vfsuid;
vfsgid_t vfsgid;
@@ -753,13 +755,13 @@ void posix_acl_getxattr_idmapped_mnt(struct user_namespace *mnt_userns,
switch (le16_to_cpu(entry->e_tag)) {
case ACL_USER:
uid = make_kuid(&init_user_ns, le32_to_cpu(entry->e_id));
- vfsuid = make_vfsuid(mnt_userns, &init_user_ns, uid);
+ vfsuid = make_vfsuid(mnt_userns, fs_userns, uid);
entry->e_id = cpu_to_le32(from_kuid(&init_user_ns,
vfsuid_into_kuid(vfsuid)));
break;
case ACL_GROUP:
gid = make_kgid(&init_user_ns, le32_to_cpu(entry->e_id));
- vfsgid = make_vfsgid(mnt_userns, &init_user_ns, gid);
+ vfsgid = make_vfsgid(mnt_userns, fs_userns, gid);
entry->e_id = cpu_to_le32(from_kgid(&init_user_ns,
vfsgid_into_kgid(vfsgid)));
break;
@@ -775,6 +777,7 @@ void posix_acl_setxattr_idmapped_mnt(struct user_namespace *mnt_userns,
{
struct posix_acl_xattr_header *header = value;
struct posix_acl_xattr_entry *entry = (void *)(header + 1), *end;
+ struct user_namespace *fs_userns = i_user_ns(inode);
int count;
vfsuid_t vfsuid;
vfsgid_t vfsgid;
@@ -793,13 +796,13 @@ void posix_acl_setxattr_idmapped_mnt(struct user_namespace *mnt_userns,
case ACL_USER:
uid = make_kuid(&init_user_ns, le32_to_cpu(entry->e_id));
vfsuid = VFSUIDT_INIT(uid);
- uid = from_vfsuid(mnt_userns, &init_user_ns, vfsuid);
+ uid = from_vfsuid(mnt_userns, fs_userns, vfsuid);
entry->e_id = cpu_to_le32(from_kuid(&init_user_ns, uid));
break;
case ACL_GROUP:
gid = make_kgid(&init_user_ns, le32_to_cpu(entry->e_id));
vfsgid = VFSGIDT_INIT(gid);
- gid = from_vfsgid(mnt_userns, &init_user_ns, vfsgid);
+ gid = from_vfsgid(mnt_userns, fs_userns, vfsgid);
entry->e_id = cpu_to_le32(from_kgid(&init_user_ns, gid));
break;
default:
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index a3398d0f1927..4e0023643f8b 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -527,10 +527,12 @@ static void smaps_pte_entry(pte_t *pte, unsigned long addr,
struct vm_area_struct *vma = walk->vma;
bool locked = !!(vma->vm_flags & VM_LOCKED);
struct page *page = NULL;
- bool migration = false;
+ bool migration = false, young = false, dirty = false;
if (pte_present(*pte)) {
page = vm_normal_page(vma, addr, *pte);
+ young = pte_young(*pte);
+ dirty = pte_dirty(*pte);
} else if (is_swap_pte(*pte)) {
swp_entry_t swpent = pte_to_swp_entry(*pte);
@@ -560,8 +562,7 @@ static void smaps_pte_entry(pte_t *pte, unsigned long addr,
if (!page)
return;
- smaps_account(mss, page, false, pte_young(*pte), pte_dirty(*pte),
- locked, migration);
+ smaps_account(mss, page, false, young, dirty, locked, migration);
}
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
diff --git a/fs/squashfs/file.c b/fs/squashfs/file.c
index 98e64fec75b7..e56510964b22 100644
--- a/fs/squashfs/file.c
+++ b/fs/squashfs/file.c
@@ -593,7 +593,7 @@ static void squashfs_readahead(struct readahead_control *ractl)
res = squashfs_read_data(inode->i_sb, block, bsize, NULL, actor);
- kfree(actor);
+ squashfs_page_actor_free(actor);
if (res == expected) {
int bytes;
diff --git a/fs/squashfs/file_direct.c b/fs/squashfs/file_direct.c
index be4b12d31e0c..f1ccad519e28 100644
--- a/fs/squashfs/file_direct.c
+++ b/fs/squashfs/file_direct.c
@@ -74,7 +74,7 @@ int squashfs_readpage_block(struct page *target_page, u64 block, int bsize,
/* Decompress directly into the page cache buffers */
res = squashfs_read_data(inode->i_sb, block, bsize, NULL, actor);
- kfree(actor);
+ squashfs_page_actor_free(actor);
if (res < 0)
goto mark_errored;
diff --git a/fs/squashfs/page_actor.c b/fs/squashfs/page_actor.c
index b23b780d8f42..54b93bf4a25c 100644
--- a/fs/squashfs/page_actor.c
+++ b/fs/squashfs/page_actor.c
@@ -52,6 +52,7 @@ struct squashfs_page_actor *squashfs_page_actor_init(void **buffer,
actor->buffer = buffer;
actor->pages = pages;
actor->next_page = 0;
+ actor->tmp_buffer = NULL;
actor->squashfs_first_page = cache_first_page;
actor->squashfs_next_page = cache_next_page;
actor->squashfs_finish_page = cache_finish_page;
@@ -68,20 +69,9 @@ static void *handle_next_page(struct squashfs_page_actor *actor)
if ((actor->next_page == actor->pages) ||
(actor->next_index != actor->page[actor->next_page]->index)) {
- if (actor->alloc_buffer) {
- void *tmp_buffer = kmalloc(PAGE_SIZE, GFP_KERNEL);
-
- if (tmp_buffer) {
- actor->tmp_buffer = tmp_buffer;
- actor->next_index++;
- actor->returned_pages++;
- return tmp_buffer;
- }
- }
-
actor->next_index++;
actor->returned_pages++;
- return ERR_PTR(-ENOMEM);
+ return actor->alloc_buffer ? actor->tmp_buffer : ERR_PTR(-ENOMEM);
}
actor->next_index++;
@@ -96,11 +86,10 @@ static void *direct_first_page(struct squashfs_page_actor *actor)
static void *direct_next_page(struct squashfs_page_actor *actor)
{
- if (actor->pageaddr)
+ if (actor->pageaddr) {
kunmap_local(actor->pageaddr);
-
- kfree(actor->tmp_buffer);
- actor->pageaddr = actor->tmp_buffer = NULL;
+ actor->pageaddr = NULL;
+ }
return handle_next_page(actor);
}
@@ -109,8 +98,6 @@ static void direct_finish_page(struct squashfs_page_actor *actor)
{
if (actor->pageaddr)
kunmap_local(actor->pageaddr);
-
- kfree(actor->tmp_buffer);
}
struct squashfs_page_actor *squashfs_page_actor_init_special(struct squashfs_sb_info *msblk,
@@ -121,6 +108,16 @@ struct squashfs_page_actor *squashfs_page_actor_init_special(struct squashfs_sb_
if (actor == NULL)
return NULL;
+ if (msblk->decompressor->alloc_buffer) {
+ actor->tmp_buffer = kmalloc(PAGE_SIZE, GFP_KERNEL);
+
+ if (actor->tmp_buffer == NULL) {
+ kfree(actor);
+ return NULL;
+ }
+ } else
+ actor->tmp_buffer = NULL;
+
actor->length = length ? : pages * PAGE_SIZE;
actor->page = page;
actor->pages = pages;
@@ -128,7 +125,6 @@ struct squashfs_page_actor *squashfs_page_actor_init_special(struct squashfs_sb_
actor->returned_pages = 0;
actor->next_index = page[0]->index & ~((1 << (msblk->block_log - PAGE_SHIFT)) - 1);
actor->pageaddr = NULL;
- actor->tmp_buffer = NULL;
actor->alloc_buffer = msblk->decompressor->alloc_buffer;
actor->squashfs_first_page = direct_first_page;
actor->squashfs_next_page = direct_next_page;
diff --git a/fs/squashfs/page_actor.h b/fs/squashfs/page_actor.h
index 24841d28bc0f..95ffbb543d91 100644
--- a/fs/squashfs/page_actor.h
+++ b/fs/squashfs/page_actor.h
@@ -29,6 +29,11 @@ extern struct squashfs_page_actor *squashfs_page_actor_init(void **buffer,
extern struct squashfs_page_actor *squashfs_page_actor_init_special(
struct squashfs_sb_info *msblk,
struct page **page, int pages, int length);
+static inline void squashfs_page_actor_free(struct squashfs_page_actor *actor)
+{
+ kfree(actor->tmp_buffer);
+ kfree(actor);
+}
static inline void *squashfs_first_page(struct squashfs_page_actor *actor)
{
return actor->squashfs_first_page(actor);
diff --git a/fs/tracefs/inode.c b/fs/tracefs/inode.c
index 81d26abf486f..da85b3979195 100644
--- a/fs/tracefs/inode.c
+++ b/fs/tracefs/inode.c
@@ -141,6 +141,8 @@ struct tracefs_mount_opts {
kuid_t uid;
kgid_t gid;
umode_t mode;
+ /* Opt_* bitfield. */
+ unsigned int opts;
};
enum {
@@ -241,6 +243,7 @@ static int tracefs_parse_options(char *data, struct tracefs_mount_opts *opts)
kgid_t gid;
char *p;
+ opts->opts = 0;
opts->mode = TRACEFS_DEFAULT_MODE;
while ((p = strsep(&data, ",")) != NULL) {
@@ -275,24 +278,36 @@ static int tracefs_parse_options(char *data, struct tracefs_mount_opts *opts)
* but traditionally tracefs has ignored all mount options
*/
}
+
+ opts->opts |= BIT(token);
}
return 0;
}
-static int tracefs_apply_options(struct super_block *sb)
+static int tracefs_apply_options(struct super_block *sb, bool remount)
{
struct tracefs_fs_info *fsi = sb->s_fs_info;
struct inode *inode = d_inode(sb->s_root);
struct tracefs_mount_opts *opts = &fsi->mount_opts;
- inode->i_mode &= ~S_IALLUGO;
- inode->i_mode |= opts->mode;
+ /*
+ * On remount, only reset mode/uid/gid if they were provided as mount
+ * options.
+ */
+
+ if (!remount || opts->opts & BIT(Opt_mode)) {
+ inode->i_mode &= ~S_IALLUGO;
+ inode->i_mode |= opts->mode;
+ }
- inode->i_uid = opts->uid;
+ if (!remount || opts->opts & BIT(Opt_uid))
+ inode->i_uid = opts->uid;
- /* Set all the group ids to the mount option */
- set_gid(sb->s_root, opts->gid);
+ if (!remount || opts->opts & BIT(Opt_gid)) {
+ /* Set all the group ids to the mount option */
+ set_gid(sb->s_root, opts->gid);
+ }
return 0;
}
@@ -307,7 +322,7 @@ static int tracefs_remount(struct super_block *sb, int *flags, char *data)
if (err)
goto fail;
- tracefs_apply_options(sb);
+ tracefs_apply_options(sb, true);
fail:
return err;
@@ -359,7 +374,7 @@ static int trace_fill_super(struct super_block *sb, void *data, int silent)
sb->s_op = &tracefs_super_operations;
- tracefs_apply_options(sb);
+ tracefs_apply_options(sb, false);
return 0;
diff --git a/fs/userfaultfd.c b/fs/userfaultfd.c
index 1c44bf75f916..175de70e3adf 100644
--- a/fs/userfaultfd.c
+++ b/fs/userfaultfd.c
@@ -1601,6 +1601,10 @@ static int userfaultfd_unregister(struct userfaultfd_ctx *ctx,
wake_userfault(vma->vm_userfaultfd_ctx.ctx, &range);
}
+ /* Reset ptes for the whole vma range if wr-protected */
+ if (userfaultfd_wp(vma))
+ uffd_wp_range(mm, vma, start, vma_end - start, false);
+
new_flags = vma->vm_flags & ~__VM_UFFD_FLAGS;
prev = vma_merge(mm, prev, start, vma_end, new_flags,
vma->anon_vma, vma->vm_file, vma->vm_pgoff,