summaryrefslogtreecommitdiff
path: root/fs
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2014-04-12 14:49:50 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2014-04-12 14:49:50 -0700
commit5166701b368caea89d57b14bf41cf39e819dad51 (patch)
treec73b9d4860809e3afa9359be9d03ba2d8d98a18e /fs
parent0a7418f5f569512e98789c439198eed4b507cce3 (diff)
parenta786c06d9f2719203c00b3d97b21f9a96980d0b5 (diff)
Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/viro/vfs
Pull vfs updates from Al Viro: "The first vfs pile, with deep apologies for being very late in this window. Assorted cleanups and fixes, plus a large preparatory part of iov_iter work. There's a lot more of that, but it'll probably go into the next merge window - it *does* shape up nicely, removes a lot of boilerplate, gets rid of locking inconsistencie between aio_write and splice_write and I hope to get Kent's direct-io rewrite merged into the same queue, but some of the stuff after this point is having (mostly trivial) conflicts with the things already merged into mainline and with some I want more testing. This one passes LTP and xfstests without regressions, in addition to usual beating. BTW, readahead02 in ltp syscalls testsuite has started giving failures since "mm/readahead.c: fix readahead failure for memoryless NUMA nodes and limit readahead pages" - might be a false positive, might be a real regression..." * 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/viro/vfs: (63 commits) missing bits of "splice: fix racy pipe->buffers uses" cifs: fix the race in cifs_writev() ceph_sync_{,direct_}write: fix an oops on ceph_osdc_new_request() failure kill generic_file_buffered_write() ocfs2_file_aio_write(): switch to generic_perform_write() ceph_aio_write(): switch to generic_perform_write() xfs_file_buffered_aio_write(): switch to generic_perform_write() export generic_perform_write(), start getting rid of generic_file_buffer_write() generic_file_direct_write(): get rid of ppos argument btrfs_file_aio_write(): get rid of ppos kill the 5th argument of generic_file_buffered_write() kill the 4th argument of __generic_file_aio_write() lustre: don't open-code kernel_recvmsg() ocfs2: don't open-code kernel_recvmsg() drbd: don't open-code kernel_recvmsg() constify blk_rq_map_user_iov() and friends lustre: switch to kernel_sendmsg() ocfs2: don't open-code kernel_sendmsg() take iov_iter stuff to mm/iov_iter.c process_vm_access: tidy up a bit ...
Diffstat (limited to 'fs')
-rw-r--r--fs/bio.c10
-rw-r--r--fs/block_dev.c2
-rw-r--r--fs/btrfs/file.c16
-rw-r--r--fs/buffer.c6
-rw-r--r--fs/cachefiles/bind.c1
-rw-r--r--fs/cachefiles/namei.c3
-rw-r--r--fs/ceph/file.c12
-rw-r--r--fs/cifs/cifsfs.c1
-rw-r--r--fs/cifs/file.c128
-rw-r--r--fs/exec.c2
-rw-r--r--fs/ext4/file.c2
-rw-r--r--fs/file.c11
-rw-r--r--fs/file_table.c43
-rw-r--r--fs/fuse/dev.c14
-rw-r--r--fs/fuse/file.c5
-rw-r--r--fs/mount.h5
-rw-r--r--fs/namei.c67
-rw-r--r--fs/namespace.c56
-rw-r--r--fs/ncpfs/inode.c50
-rw-r--r--fs/ncpfs/ncp_fs_sb.h2
-rw-r--r--fs/ntfs/inode.c2
-rw-r--r--fs/ocfs2/cluster/tcp.c49
-rw-r--r--fs/ocfs2/file.c9
-rw-r--r--fs/open.c68
-rw-r--r--fs/pipe.c133
-rw-r--r--fs/pnode.c198
-rw-r--r--fs/pnode.h3
-rw-r--r--fs/proc/namespaces.c14
-rw-r--r--fs/proc/self.c2
-rw-r--r--fs/proc_namespace.c1
-rw-r--r--fs/splice.c126
-rw-r--r--fs/udf/file.c2
-rw-r--r--fs/xfs/xfs_file.c13
-rw-r--r--fs/xfs/xfs_ioctl.c28
34 files changed, 385 insertions, 699 deletions
diff --git a/fs/bio.c b/fs/bio.c
index b1bc722b89aa..6f0362b77806 100644
--- a/fs/bio.c
+++ b/fs/bio.c
@@ -1002,7 +1002,7 @@ struct bio_map_data {
};
static void bio_set_map_data(struct bio_map_data *bmd, struct bio *bio,
- struct sg_iovec *iov, int iov_count,
+ const struct sg_iovec *iov, int iov_count,
int is_our_pages)
{
memcpy(bmd->sgvecs, iov, sizeof(struct sg_iovec) * iov_count);
@@ -1022,7 +1022,7 @@ static struct bio_map_data *bio_alloc_map_data(int nr_segs,
sizeof(struct sg_iovec) * iov_count, gfp_mask);
}
-static int __bio_copy_iov(struct bio *bio, struct sg_iovec *iov, int iov_count,
+static int __bio_copy_iov(struct bio *bio, const struct sg_iovec *iov, int iov_count,
int to_user, int from_user, int do_free_page)
{
int ret = 0, i;
@@ -1120,7 +1120,7 @@ EXPORT_SYMBOL(bio_uncopy_user);
*/
struct bio *bio_copy_user_iov(struct request_queue *q,
struct rq_map_data *map_data,
- struct sg_iovec *iov, int iov_count,
+ const struct sg_iovec *iov, int iov_count,
int write_to_vm, gfp_t gfp_mask)
{
struct bio_map_data *bmd;
@@ -1259,7 +1259,7 @@ EXPORT_SYMBOL(bio_copy_user);
static struct bio *__bio_map_user_iov(struct request_queue *q,
struct block_device *bdev,
- struct sg_iovec *iov, int iov_count,
+ const struct sg_iovec *iov, int iov_count,
int write_to_vm, gfp_t gfp_mask)
{
int i, j;
@@ -1407,7 +1407,7 @@ EXPORT_SYMBOL(bio_map_user);
* device. Returns an error pointer in case of error.
*/
struct bio *bio_map_user_iov(struct request_queue *q, struct block_device *bdev,
- struct sg_iovec *iov, int iov_count,
+ const struct sg_iovec *iov, int iov_count,
int write_to_vm, gfp_t gfp_mask)
{
struct bio *bio;
diff --git a/fs/block_dev.c b/fs/block_dev.c
index ba0d2b05bb78..552a8d13bc32 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -1518,7 +1518,7 @@ ssize_t blkdev_aio_write(struct kiocb *iocb, const struct iovec *iov,
BUG_ON(iocb->ki_pos != pos);
blk_start_plug(&plug);
- ret = __generic_file_aio_write(iocb, iov, nr_segs, &iocb->ki_pos);
+ ret = __generic_file_aio_write(iocb, iov, nr_segs);
if (ret > 0) {
ssize_t err;
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index c5998477fe60..eb742c07e7a4 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -425,13 +425,8 @@ static noinline int btrfs_copy_from_user(loff_t pos, int num_pages,
struct page *page = prepared_pages[pg];
/*
* Copy data from userspace to the current page
- *
- * Disable pagefault to avoid recursive lock since
- * the pages are already locked
*/
- pagefault_disable();
copied = iov_iter_copy_from_user_atomic(page, i, offset, count);
- pagefault_enable();
/* Flush processor's dcache for this page */
flush_dcache_page(page);
@@ -1665,7 +1660,7 @@ again:
static ssize_t __btrfs_direct_write(struct kiocb *iocb,
const struct iovec *iov,
unsigned long nr_segs, loff_t pos,
- loff_t *ppos, size_t count, size_t ocount)
+ size_t count, size_t ocount)
{
struct file *file = iocb->ki_filp;
struct iov_iter i;
@@ -1674,7 +1669,7 @@ static ssize_t __btrfs_direct_write(struct kiocb *iocb,
loff_t endbyte;
int err;
- written = generic_file_direct_write(iocb, iov, &nr_segs, pos, ppos,
+ written = generic_file_direct_write(iocb, iov, &nr_segs, pos,
count, ocount);
if (written < 0 || written == count)
@@ -1693,7 +1688,7 @@ static ssize_t __btrfs_direct_write(struct kiocb *iocb,
if (err)
goto out;
written += written_buffered;
- *ppos = pos + written_buffered;
+ iocb->ki_pos = pos + written_buffered;
invalidate_mapping_pages(file->f_mapping, pos >> PAGE_CACHE_SHIFT,
endbyte >> PAGE_CACHE_SHIFT);
out:
@@ -1725,7 +1720,6 @@ static ssize_t btrfs_file_aio_write(struct kiocb *iocb,
struct file *file = iocb->ki_filp;
struct inode *inode = file_inode(file);
struct btrfs_root *root = BTRFS_I(inode)->root;
- loff_t *ppos = &iocb->ki_pos;
u64 start_pos;
u64 end_pos;
ssize_t num_written = 0;
@@ -1796,7 +1790,7 @@ static ssize_t btrfs_file_aio_write(struct kiocb *iocb,
if (unlikely(file->f_flags & O_DIRECT)) {
num_written = __btrfs_direct_write(iocb, iov, nr_segs,
- pos, ppos, count, ocount);
+ pos, count, ocount);
} else {
struct iov_iter i;
@@ -1804,7 +1798,7 @@ static ssize_t btrfs_file_aio_write(struct kiocb *iocb,
num_written = __btrfs_buffered_write(file, &i, pos);
if (num_written > 0)
- *ppos = pos + num_written;
+ iocb->ki_pos = pos + num_written;
}
mutex_unlock(&inode->i_mutex);
diff --git a/fs/buffer.c b/fs/buffer.c
index 8c53a2b15ecb..9ddb9fc7d923 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -2114,8 +2114,8 @@ EXPORT_SYMBOL(generic_write_end);
* Returns true if all buffers which correspond to a file portion
* we want to read are uptodate.
*/
-int block_is_partially_uptodate(struct page *page, read_descriptor_t *desc,
- unsigned long from)
+int block_is_partially_uptodate(struct page *page, unsigned long from,
+ unsigned long count)
{
unsigned block_start, block_end, blocksize;
unsigned to;
@@ -2127,7 +2127,7 @@ int block_is_partially_uptodate(struct page *page, read_descriptor_t *desc,
head = page_buffers(page);
blocksize = head->b_size;
- to = min_t(unsigned, PAGE_CACHE_SIZE - from, desc->count);
+ to = min_t(unsigned, PAGE_CACHE_SIZE - from, count);
to = from + to;
if (from < blocksize && to > PAGE_CACHE_SIZE - blocksize)
return 0;
diff --git a/fs/cachefiles/bind.c b/fs/cachefiles/bind.c
index 622f4696e484..5b99bafc31d1 100644
--- a/fs/cachefiles/bind.c
+++ b/fs/cachefiles/bind.c
@@ -124,7 +124,6 @@ static int cachefiles_daemon_add_cache(struct cachefiles_cache *cache)
/* check parameters */
ret = -EOPNOTSUPP;
if (!root->d_inode ||
- !root->d_inode->i_op ||
!root->d_inode->i_op->lookup ||
!root->d_inode->i_op->mkdir ||
!root->d_inode->i_op->setxattr ||
diff --git a/fs/cachefiles/namei.c b/fs/cachefiles/namei.c
index 6494d9f673aa..c0a681705104 100644
--- a/fs/cachefiles/namei.c
+++ b/fs/cachefiles/namei.c
@@ -779,8 +779,7 @@ struct dentry *cachefiles_get_directory(struct cachefiles_cache *cache,
}
ret = -EPERM;
- if (!subdir->d_inode->i_op ||
- !subdir->d_inode->i_op->setxattr ||
+ if (!subdir->d_inode->i_op->setxattr ||
!subdir->d_inode->i_op->getxattr ||
!subdir->d_inode->i_op->lookup ||
!subdir->d_inode->i_op->mkdir ||
diff --git a/fs/ceph/file.c b/fs/ceph/file.c
index 66075a4ad979..39da1c2efa50 100644
--- a/fs/ceph/file.c
+++ b/fs/ceph/file.c
@@ -601,7 +601,7 @@ ceph_sync_direct_write(struct kiocb *iocb, const struct iovec *iov,
false);
if (IS_ERR(req)) {
ret = PTR_ERR(req);
- goto out;
+ break;
}
num_pages = calc_pages_for(page_align, len);
@@ -719,7 +719,7 @@ static ssize_t ceph_sync_write(struct kiocb *iocb, const struct iovec *iov,
false);
if (IS_ERR(req)) {
ret = PTR_ERR(req);
- goto out;
+ break;
}
/*
@@ -972,6 +972,7 @@ retry_snap:
}
} else {
loff_t old_size = inode->i_size;
+ struct iov_iter from;
/*
* No need to acquire the i_truncate_mutex. Because
* the MDS revokes Fwb caps before sending truncate
@@ -979,9 +980,10 @@ retry_snap:
* are pending vmtruncate. So write and vmtruncate
* can not run at the same time
*/
- written = generic_file_buffered_write(iocb, iov, nr_segs,
- pos, &iocb->ki_pos,
- count, 0);
+ iov_iter_init(&from, iov, nr_segs, count, 0);
+ written = generic_perform_write(file, &from, pos);
+ if (likely(written >= 0))
+ iocb->ki_pos = pos + written;
if (inode->i_size > old_size)
ceph_fscache_update_objectsize(inode);
mutex_unlock(&inode->i_mutex);
diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c
index 2c70cbe35d39..df9c9141c099 100644
--- a/fs/cifs/cifsfs.c
+++ b/fs/cifs/cifsfs.c
@@ -850,7 +850,6 @@ const struct inode_operations cifs_file_inode_ops = {
/* revalidate:cifs_revalidate, */
.setattr = cifs_setattr,
.getattr = cifs_getattr, /* do we need this anymore? */
- .rename = cifs_rename,
.permission = cifs_permission,
#ifdef CONFIG_CIFS_XATTR
.setxattr = cifs_setxattr,
diff --git a/fs/cifs/file.c b/fs/cifs/file.c
index 216d7e99f921..8807442c94dd 100644
--- a/fs/cifs/file.c
+++ b/fs/cifs/file.c
@@ -2579,19 +2579,32 @@ cifs_writev(struct kiocb *iocb, const struct iovec *iov,
struct cifsInodeInfo *cinode = CIFS_I(inode);
struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server;
ssize_t rc = -EACCES;
- loff_t lock_pos = pos;
+ loff_t lock_pos = iocb->ki_pos;
- if (file->f_flags & O_APPEND)
- lock_pos = i_size_read(inode);
/*
* We need to hold the sem to be sure nobody modifies lock list
* with a brlock that prevents writing.
*/
down_read(&cinode->lock_sem);
+ mutex_lock(&inode->i_mutex);
+ if (file->f_flags & O_APPEND)
+ lock_pos = i_size_read(inode);
if (!cifs_find_lock_conflict(cfile, lock_pos, iov_length(iov, nr_segs),
server->vals->exclusive_lock_type, NULL,
- CIFS_WRITE_OP))
- rc = generic_file_aio_write(iocb, iov, nr_segs, pos);
+ CIFS_WRITE_OP)) {
+ rc = __generic_file_aio_write(iocb, iov, nr_segs);
+ mutex_unlock(&inode->i_mutex);
+
+ if (rc > 0) {
+ ssize_t err;
+
+ err = generic_write_sync(file, iocb->ki_pos - rc, rc);
+ if (rc < 0)
+ rc = err;
+ }
+ } else {
+ mutex_unlock(&inode->i_mutex);
+ }
up_read(&cinode->lock_sem);
return rc;
}
@@ -2727,56 +2740,27 @@ cifs_retry_async_readv(struct cifs_readdata *rdata)
/**
* cifs_readdata_to_iov - copy data from pages in response to an iovec
* @rdata: the readdata response with list of pages holding data
- * @iov: vector in which we should copy the data
- * @nr_segs: number of segments in vector
- * @offset: offset into file of the first iovec
- * @copied: used to return the amount of data copied to the iov
+ * @iter: destination for our data
*
* This function copies data from a list of pages in a readdata response into
* an array of iovecs. It will first calculate where the data should go
* based on the info in the readdata and then copy the data into that spot.
*/
-static ssize_t
-cifs_readdata_to_iov(struct cifs_readdata *rdata, const struct iovec *iov,
- unsigned long nr_segs, loff_t offset, ssize_t *copied)
+static int
+cifs_readdata_to_iov(struct cifs_readdata *rdata, struct iov_iter *iter)
{
- int rc = 0;
- struct iov_iter ii;
- size_t pos = rdata->offset - offset;
- ssize_t remaining = rdata->bytes;
- unsigned char *pdata;
+ size_t remaining = rdata->bytes;
unsigned int i;
- /* set up iov_iter and advance to the correct offset */
- iov_iter_init(&ii, iov, nr_segs, iov_length(iov, nr_segs), 0);
- iov_iter_advance(&ii, pos);
-
- *copied = 0;
for (i = 0; i < rdata->nr_pages; i++) {
- ssize_t copy;
struct page *page = rdata->pages[i];
-
- /* copy a whole page or whatever's left */
- copy = min_t(ssize_t, remaining, PAGE_SIZE);
-
- /* ...but limit it to whatever space is left in the iov */
- copy = min_t(ssize_t, copy, iov_iter_count(&ii));
-
- /* go while there's data to be copied and no errors */
- if (copy && !rc) {
- pdata = kmap(page);
- rc = memcpy_toiovecend(ii.iov, pdata, ii.iov_offset,
- (int)copy);
- kunmap(page);
- if (!rc) {
- *copied += copy;
- remaining -= copy;
- iov_iter_advance(&ii, copy);
- }
- }
+ size_t copy = min(remaining, PAGE_SIZE);
+ size_t written = copy_page_to_iter(page, 0, copy, iter);
+ remaining -= written;
+ if (written < copy && iov_iter_count(iter) > 0)
+ break;
}
-
- return rc;
+ return remaining ? -EFAULT : 0;
}
static void
@@ -2837,20 +2821,21 @@ cifs_uncached_read_into_pages(struct TCP_Server_Info *server,
return total_read > 0 ? total_read : result;
}
-static ssize_t
-cifs_iovec_read(struct file *file, const struct iovec *iov,
- unsigned long nr_segs, loff_t *poffset)
+ssize_t cifs_user_readv(struct kiocb *iocb, const struct iovec *iov,
+ unsigned long nr_segs, loff_t pos)
{
+ struct file *file = iocb->ki_filp;
ssize_t rc;
size_t len, cur_len;
ssize_t total_read = 0;
- loff_t offset = *poffset;
+ loff_t offset = pos;
unsigned int npages;
struct cifs_sb_info *cifs_sb;
struct cifs_tcon *tcon;
struct cifsFileInfo *open_file;
struct cifs_readdata *rdata, *tmp;
struct list_head rdata_list;
+ struct iov_iter to;
pid_t pid;
if (!nr_segs)
@@ -2860,6 +2845,8 @@ cifs_iovec_read(struct file *file, const struct iovec *iov,
if (!len)
return 0;
+ iov_iter_init(&to, iov, nr_segs, len, 0);
+
INIT_LIST_HEAD(&rdata_list);
cifs_sb = CIFS_SB(file->f_path.dentry->d_sb);
open_file = file->private_data;
@@ -2917,55 +2904,44 @@ error:
if (!list_empty(&rdata_list))
rc = 0;
+ len = iov_iter_count(&to);
/* the loop below should proceed in the order of increasing offsets */
-restart_loop:
list_for_each_entry_safe(rdata, tmp, &rdata_list, list) {
+ again:
if (!rc) {
- ssize_t copied;
-
/* FIXME: freezable sleep too? */
rc = wait_for_completion_killable(&rdata->done);
if (rc)
rc = -EINTR;
- else if (rdata->result)
+ else if (rdata->result) {
rc = rdata->result;
- else {
- rc = cifs_readdata_to_iov(rdata, iov,
- nr_segs, *poffset,
- &copied);
- total_read += copied;
+ /* resend call if it's a retryable error */
+ if (rc == -EAGAIN) {
+ rc = cifs_retry_async_readv(rdata);
+ goto again;
+ }
+ } else {
+ rc = cifs_readdata_to_iov(rdata, &to);
}
- /* resend call if it's a retryable error */
- if (rc == -EAGAIN) {
- rc = cifs_retry_async_readv(rdata);
- goto restart_loop;
- }
}
list_del_init(&rdata->list);
kref_put(&rdata->refcount, cifs_uncached_readdata_release);
}
+ total_read = len - iov_iter_count(&to);
+
cifs_stats_bytes_read(tcon, total_read);
- *poffset += total_read;
/* mask nodata case */
if (rc == -ENODATA)
rc = 0;
- return total_read ? total_read : rc;
-}
-
-ssize_t cifs_user_readv(struct kiocb *iocb, const struct iovec *iov,
- unsigned long nr_segs, loff_t pos)
-{
- ssize_t read;
-
- read = cifs_iovec_read(iocb->ki_filp, iov, nr_segs, &pos);
- if (read > 0)
- iocb->ki_pos = pos;
-
- return read;
+ if (total_read) {
+ iocb->ki_pos = pos + total_read;
+ return total_read;
+ }
+ return rc;
}
ssize_t
diff --git a/fs/exec.c b/fs/exec.c
index 9e81c630dfa7..476f3ebf437e 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -813,7 +813,7 @@ EXPORT_SYMBOL(kernel_read);
ssize_t read_code(struct file *file, unsigned long addr, loff_t pos, size_t len)
{
- ssize_t res = file->f_op->read(file, (void __user *)addr, len, &pos);
+ ssize_t res = vfs_read(file, (void __user *)addr, len, &pos);
if (res > 0)
flush_icache_range(addr, addr + len);
return res;
diff --git a/fs/ext4/file.c b/fs/ext4/file.c
index 4e508fc83dcf..ca7502d89fde 100644
--- a/fs/ext4/file.c
+++ b/fs/ext4/file.c
@@ -146,7 +146,7 @@ ext4_file_dio_write(struct kiocb *iocb, const struct iovec *iov,
overwrite = 1;
}
- ret = __generic_file_aio_write(iocb, iov, nr_segs, &iocb->ki_pos);
+ ret = __generic_file_aio_write(iocb, iov, nr_segs);
mutex_unlock(&inode->i_mutex);
if (ret > 0) {
diff --git a/fs/file.c b/fs/file.c
index b61293badfb1..8f294cfac697 100644
--- a/fs/file.c
+++ b/fs/file.c
@@ -25,7 +25,10 @@
int sysctl_nr_open __read_mostly = 1024*1024;
int sysctl_nr_open_min = BITS_PER_LONG;
-int sysctl_nr_open_max = 1024 * 1024; /* raised later */
+/* our max() is unusable in constant expressions ;-/ */
+#define __const_max(x, y) ((x) < (y) ? (x) : (y))
+int sysctl_nr_open_max = __const_max(INT_MAX, ~(size_t)0/sizeof(void *)) &
+ -BITS_PER_LONG;
static void *alloc_fdmem(size_t size)
{
@@ -429,12 +432,6 @@ void exit_files(struct task_struct *tsk)
}
}
-void __init files_defer_init(void)
-{
- sysctl_nr_open_max = min((size_t)INT_MAX, ~(size_t)0/sizeof(void *)) &
- -BITS_PER_LONG;
-}
-
struct files_struct init_files = {
.count = ATOMIC_INIT(1),
.fdt = &init_files.fdtab,
diff --git a/fs/file_table.c b/fs/file_table.c
index 01071c4d752e..a374f5033e97 100644
--- a/fs/file_table.c
+++ b/fs/file_table.c
@@ -52,7 +52,6 @@ static void file_free_rcu(struct rcu_head *head)
static inline void file_free(struct file *f)
{
percpu_counter_dec(&nr_files);
- file_check_state(f);
call_rcu(&f->f_u.fu_rcuhead, file_free_rcu);
}
@@ -178,47 +177,12 @@ struct file *alloc_file(struct path *path, fmode_t mode,
file->f_mapping = path->dentry->d_inode->i_mapping;
file->f_mode = mode;
file->f_op = fop;
-
- /*
- * These mounts don't really matter in practice
- * for r/o bind mounts. They aren't userspace-
- * visible. We do this for consistency, and so
- * that we can do debugging checks at __fput()
- */
- if ((mode & FMODE_WRITE) && !special_file(path->dentry->d_inode->i_mode)) {
- file_take_write(file);
- WARN_ON(mnt_clone_write(path->mnt));
- }
if ((mode & (FMODE_READ | FMODE_WRITE)) == FMODE_READ)
i_readcount_inc(path->dentry->d_inode);
return file;
}
EXPORT_SYMBOL(alloc_file);
-/**
- * drop_file_write_access - give up ability to write to a file
- * @file: the file to which we will stop writing
- *
- * This is a central place which will give up the ability
- * to write to @file, along with access to write through
- * its vfsmount.
- */
-static void drop_file_write_access(struct file *file)
-{
- struct vfsmount *mnt = file->f_path.mnt;
- struct dentry *dentry = file->f_path.dentry;
- struct inode *inode = dentry->d_inode;
-
- put_write_access(inode);
-
- if (special_file(inode->i_mode))
- return;
- if (file_check_writeable(file) != 0)
- return;
- __mnt_drop_write(mnt);
- file_release_write(file);
-}
-
/* the real guts of fput() - releasing the last reference to file
*/
static void __fput(struct file *file)
@@ -253,8 +217,10 @@ static void __fput(struct file *file)
put_pid(file->f_owner.pid);
if ((file->f_mode & (FMODE_READ | FMODE_WRITE)) == FMODE_READ)
i_readcount_dec(inode);
- if (file->f_mode & FMODE_WRITE)
- drop_file_write_access(file);
+ if (file->f_mode & FMODE_WRITER) {
+ put_write_access(inode);
+ __mnt_drop_write(mnt);
+ }
file->f_path.dentry = NULL;
file->f_path.mnt = NULL;
file->f_inode = NULL;
@@ -359,6 +325,5 @@ void __init files_init(unsigned long mempages)
n = (mempages * (PAGE_SIZE / 1024)) / 10;
files_stat.max_files = max_t(unsigned long, n, NR_FILE);
- files_defer_init();
percpu_counter_init(&nr_files, 0);
}
diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c
index 0a648bb455ae..aac71ce373e4 100644
--- a/fs/fuse/dev.c
+++ b/fs/fuse/dev.c
@@ -667,15 +667,15 @@ static void fuse_copy_finish(struct fuse_copy_state *cs)
struct pipe_buffer *buf = cs->currbuf;
if (!cs->write) {
- buf->ops->unmap(cs->pipe, buf, cs->mapaddr);
+ kunmap_atomic(cs->mapaddr);
} else {
- kunmap(buf->page);
+ kunmap_atomic(cs->mapaddr);
buf->len = PAGE_SIZE - cs->len;
}
cs->currbuf = NULL;
cs->mapaddr = NULL;
} else if (cs->mapaddr) {
- kunmap(cs->pg);
+ kunmap_atomic(cs->mapaddr);
if (cs->write) {
flush_dcache_page(cs->pg);
set_page_dirty_lock(cs->pg);
@@ -706,7 +706,7 @@ static int fuse_copy_fill(struct fuse_copy_state *cs)
BUG_ON(!cs->nr_segs);
cs->currbuf = buf;
- cs->mapaddr = buf->ops->map(cs->pipe, buf, 0);
+ cs->mapaddr = kmap_atomic(buf->page);
cs->len = buf->len;
cs->buf = cs->mapaddr + buf->offset;
cs->pipebufs++;
@@ -726,7 +726,7 @@ static int fuse_copy_fill(struct fuse_copy_state *cs)
buf->len = 0;
cs->currbuf = buf;
- cs->mapaddr = kmap(page);
+ cs->mapaddr = kmap_atomic(page);
cs->buf = cs->mapaddr;
cs->len = PAGE_SIZE;
cs->pipebufs++;
@@ -745,7 +745,7 @@ static int fuse_copy_fill(struct fuse_copy_state *cs)
return err;
BUG_ON(err != 1);
offset = cs->addr % PAGE_SIZE;
- cs->mapaddr = kmap(cs->pg);
+ cs->mapaddr = kmap_atomic(cs->pg);
cs->buf = cs->mapaddr + offset;
cs->len = min(PAGE_SIZE - offset, cs->seglen);
cs->seglen -= cs->len;
@@ -874,7 +874,7 @@ static int fuse_try_move_page(struct fuse_copy_state *cs, struct page **pagep)
out_fallback_unlock:
unlock_page(newpage);
out_fallback:
- cs->mapaddr = buf->ops->map(cs->pipe, buf, 1);
+ cs->mapaddr = kmap_atomic(buf->page);
cs->buf = cs->mapaddr + buf->offset;
err = lock_request(cs->fc, cs->req);
diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index 48992cac714b..13f8bdec5110 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -1086,9 +1086,7 @@ static ssize_t fuse_fill_write_pages(struct fuse_req *req,
if (mapping_writably_mapped(mapping))
flush_dcache_page(page);
- pagefault_disable();
tmp = iov_iter_copy_from_user_atomic(page, ii, offset, bytes);
- pagefault_enable();
flush_dcache_page(page);
mark_page_accessed(page);
@@ -1237,8 +1235,7 @@ static ssize_t fuse_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
goto out;
if (file->f_flags & O_DIRECT) {
- written = generic_file_direct_write(iocb, iov, &nr_segs,
- pos, &iocb->ki_pos,
+ written = generic_file_direct_write(iocb, iov, &nr_segs, pos,
count, ocount);
if (written < 0 || written == count)
goto out;
diff --git a/fs/mount.h b/fs/mount.h
index b29e42f05f34..d55297f2fa05 100644
--- a/fs/mount.h
+++ b/fs/mount.h
@@ -10,7 +10,7 @@ struct mnt_namespace {
struct user_namespace *user_ns;
u64 seq; /* Sequence number to prevent loops */
wait_queue_head_t poll;
- int event;
+ u64 event;
};
struct mnt_pcp {
@@ -104,6 +104,9 @@ struct proc_mounts {
struct mnt_namespace *ns;
struct path root;
int (*show)(struct seq_file *, struct vfsmount *);
+ void *cached_mount;
+ u64 cached_event;
+ loff_t cached_index;
};
#define proc_mounts(p) (container_of((p), struct proc_mounts, m))
diff --git a/fs/namei.c b/fs/namei.c
index 88339f59efb5..c6157c894fce 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -358,6 +358,7 @@ int generic_permission(struct inode *inode, int mask)
return -EACCES;
}
+EXPORT_SYMBOL(generic_permission);
/*
* We _really_ want to just do "generic_permission()" without
@@ -455,6 +456,7 @@ int inode_permission(struct inode *inode, int mask)
return retval;
return __inode_permission(inode, mask);
}
+EXPORT_SYMBOL(inode_permission);
/**
* path_get - get a reference to a path
@@ -924,6 +926,7 @@ int follow_up(struct path *path)
path->mnt = &parent->mnt;
return 1;
}
+EXPORT_SYMBOL(follow_up);
/*
* Perform an automount
@@ -1085,6 +1088,7 @@ int follow_down_one(struct path *path)
}
return 0;
}
+EXPORT_SYMBOL(follow_down_one);
static inline bool managed_dentry_might_block(struct dentry *dentry)
{
@@ -1223,6 +1227,7 @@ int follow_down(struct path *path)
}
return 0;
}
+EXPORT_SYMBOL(follow_down);
/*
* Skip to top of mountpoint pile in refwalk mode for follow_dotdot()
@@ -2025,6 +2030,7 @@ int kern_path(const char *name, unsigned int flags, struct path *path)
*path = nd.path;
return res;
}
+EXPORT_SYMBOL(kern_path);
/**
* vfs_path_lookup - lookup a file path relative to a dentry-vfsmount pair
@@ -2049,6 +2055,7 @@ int vfs_path_lookup(struct dentry *dentry, struct vfsmount *mnt,
*path = nd.path;
return err;
}
+EXPORT_SYMBOL(vfs_path_lookup);
/*
* Restricted form of lookup. Doesn't follow links, single-component only,
@@ -2111,6 +2118,7 @@ struct dentry *lookup_one_len(const char *name, struct dentry *base, int len)
return __lookup_hash(&this, base, 0);
}
+EXPORT_SYMBOL(lookup_one_len);
int user_path_at_empty(int dfd, const char __user *name, unsigned flags,
struct path *path, int *empty)
@@ -2135,6 +2143,7 @@ int user_path_at(int dfd, const char __user *name, unsigned flags,
{
return user_path_at_empty(dfd, name, flags, path, NULL);
}
+EXPORT_SYMBOL(user_path_at);
/*
* NB: most callers don't do anything directly with the reference to the
@@ -2477,6 +2486,7 @@ struct dentry *lock_rename(struct dentry *p1, struct dentry *p2)
mutex_lock_nested(&p2->d_inode->i_mutex, I_MUTEX_CHILD);
return NULL;
}
+EXPORT_SYMBOL(lock_rename);
void unlock_rename(struct dentry *p1, struct dentry *p2)
{
@@ -2486,6 +2496,7 @@ void unlock_rename(struct dentry *p1, struct dentry *p2)
mutex_unlock(&p1->d_inode->i_sb->s_vfs_rename_mutex);
}
}
+EXPORT_SYMBOL(unlock_rename);
int vfs_create(struct inode *dir, struct dentry *dentry, umode_t mode,
bool want_excl)
@@ -2506,6 +2517,7 @@ int vfs_create(struct inode *dir, struct dentry *dentry, umode_t mode,
fsnotify_create(dir, dentry);
return error;
}
+EXPORT_SYMBOL(vfs_create);
static int may_open(struct path *path, int acc_mode, int flag)
{
@@ -3375,6 +3387,7 @@ int vfs_mknod(struct inode *dir, struct dentry *dentry, umode_t mode, dev_t dev)
fsnotify_create(dir, dentry);
return error;
}
+EXPORT_SYMBOL(vfs_mknod);
static int may_mknod(umode_t mode)
{
@@ -3464,6 +3477,7 @@ int vfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
fsnotify_mkdir(dir, dentry);
return error;
}
+EXPORT_SYMBOL(vfs_mkdir);
SYSCALL_DEFINE3(mkdirat, int, dfd, const char __user *, pathname, umode_t, mode)
{
@@ -3518,6 +3532,7 @@ void dentry_unhash(struct dentry *dentry)
__d_drop(dentry);
spin_unlock(&dentry->d_lock);
}
+EXPORT_SYMBOL(dentry_unhash);
int vfs_rmdir(struct inode *dir, struct dentry *dentry)
{
@@ -3555,6 +3570,7 @@ out:
d_delete(dentry);
return error;
}
+EXPORT_SYMBOL(vfs_rmdir);
static long do_rmdir(int dfd, const char __user *pathname)
{
@@ -3672,6 +3688,7 @@ out:
return error;
}
+EXPORT_SYMBOL(vfs_unlink);
/*
* Make sure that the actual truncation of the file will occur outside its
@@ -3785,6 +3802,7 @@ int vfs_symlink(struct inode *dir, struct dentry *dentry, const char *oldname)
fsnotify_create(dir, dentry);
return error;
}
+EXPORT_SYMBOL(vfs_symlink);
SYSCALL_DEFINE3(symlinkat, const char __user *, oldname,
int, newdfd, const char __user *, newname)
@@ -3893,6 +3911,7 @@ int vfs_link(struct dentry *old_dentry, struct inode *dir, struct dentry *new_de
fsnotify_link(dir, inode, new_dentry);
return error;
}
+EXPORT_SYMBOL(vfs_link);
/*
* Hardlinks are often used in delicate situations. We avoid
@@ -4152,6 +4171,7 @@ out:
return error;
}
+EXPORT_SYMBOL(vfs_rename);
SYSCALL_DEFINE5(renameat2, int, olddfd, const char __user *, oldname,
int, newdfd, const char __user *, newname, unsigned int, flags)
@@ -4304,11 +4324,9 @@ SYSCALL_DEFINE2(rename, const char __user *, oldname, const char __user *, newna
return sys_renameat2(AT_FDCWD, oldname, AT_FDCWD, newname, 0);
}
-int vfs_readlink(struct dentry *dentry, char __user *buffer, int buflen, const char *link)
+int readlink_copy(char __user *buffer, int buflen, const char *link)
{
- int len;
-
- len = PTR_ERR(link);
+ int len = PTR_ERR(link);
if (IS_ERR(link))
goto out;
@@ -4320,6 +4338,7 @@ int vfs_readlink(struct dentry *dentry, char __user *buffer, int buflen, const c
out:
return len;
}
+EXPORT_SYMBOL(readlink_copy);
/*
* A helper for ->readlink(). This should be used *ONLY* for symlinks that
@@ -4337,11 +4356,12 @@ int generic_readlink(struct dentry *dentry, char __user *buffer, int buflen)
if (IS_ERR(cookie))
return PTR_ERR(cookie);
- res = vfs_readlink(dentry, buffer, buflen, nd_get_link(&nd));
+ res = readlink_copy(buffer, buflen, nd_get_link(&nd));
if (dentry->d_inode->i_op->put_link)
dentry->d_inode->i_op->put_link(dentry, &nd, cookie);
return res;
}
+EXPORT_SYMBOL(generic_readlink);
/* get the link contents into pagecache */
static char *page_getlink(struct dentry * dentry, struct page **ppage)
@@ -4361,14 +4381,14 @@ static char *page_getlink(struct dentry * dentry, struct page **ppage)
int page_readlink(struct dentry *dentry, char __user *buffer, int buflen)
{
struct page *page = NULL;
- char *s = page_getlink(dentry, &page);
- int res = vfs_readlink(dentry,buffer,buflen,s);
+ int res = readlink_copy(buffer, buflen, page_getlink(dentry, &page));
if (page) {
kunmap(page);
page_cache_release(page);
}
return res;
}
+EXPORT_SYMBOL(page_readlink);
void *page_follow_link_light(struct dentry *dentry, struct nameidata *nd)
{
@@ -4376,6 +4396,7 @@ void *page_follow_link_light(struct dentry *dentry, struct nameidata *nd)
nd_set_link(nd, page_getlink(dentry, &page));
return page;
}
+EXPORT_SYMBOL(page_follow_link_light);
void page_put_link(struct dentry *dentry, struct nameidata *nd, void *cookie)
{
@@ -4386,6 +4407,7 @@ void page_put_link(struct dentry *dentry, struct nameidata *nd, void *cookie)
page_cache_release(page);
}
}
+EXPORT_SYMBOL(page_put_link);
/*
* The nofs argument instructs pagecache_write_begin to pass AOP_FLAG_NOFS
@@ -4423,45 +4445,18 @@ retry:
fail:
return err;
}
+EXPORT_SYMBOL(__page_symlink);
int page_symlink(struct inode *inode, const char *symname, int len)
{
return __page_symlink(inode, symname, len,
!(mapping_gfp_mask(inode->i_mapping) & __GFP_FS));
}
+EXPORT_SYMBOL(page_symlink);
const struct inode_operations page_symlink_inode_operations = {
.readlink = generic_readlink,
.follow_link = page_follow_link_light,
.put_link = page_put_link,
};
-
-EXPORT_SYMBOL(user_path_at);
-EXPORT_SYMBOL(follow_down_one);
-EXPORT_SYMBOL(follow_down);
-EXPORT_SYMBOL(follow_up);
-EXPORT_SYMBOL(get_write_access); /* nfsd */
-EXPORT_SYMBOL(lock_rename);
-EXPORT_SYMBOL(lookup_one_len);
-EXPORT_SYMBOL(page_follow_link_light);
-EXPORT_SYMBOL(page_put_link);
-EXPORT_SYMBOL(page_readlink);
-EXPORT_SYMBOL(__page_symlink);
-EXPORT_SYMBOL(page_symlink);
EXPORT_SYMBOL(page_symlink_inode_operations);
-EXPORT_SYMBOL(kern_path);
-EXPORT_SYMBOL(vfs_path_lookup);
-EXPORT_SYMBOL(inode_permission);
-EXPORT_SYMBOL(unlock_rename);
-EXPORT_SYMBOL(vfs_create);
-EXPORT_SYMBOL(vfs_link);
-EXPORT_SYMBOL(vfs_mkdir);
-EXPORT_SYMBOL(vfs_mknod);
-EXPORT_SYMBOL(generic_permission);
-EXPORT_SYMBOL(vfs_readlink);
-EXPORT_SYMBOL(vfs_rename);
-EXPORT_SYMBOL(vfs_rmdir);
-EXPORT_SYMBOL(vfs_symlink);
-EXPORT_SYMBOL(vfs_unlink);
-EXPORT_SYMBOL(dentry_unhash);
-EXPORT_SYMBOL(generic_readlink);
diff --git a/fs/namespace.c b/fs/namespace.c
index 2ffc5a2905d4..182bc41cd887 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -52,7 +52,7 @@ static int __init set_mphash_entries(char *str)
}
__setup("mphash_entries=", set_mphash_entries);
-static int event;
+static u64 event;
static DEFINE_IDA(mnt_id_ida);
static DEFINE_IDA(mnt_group_ida);
static DEFINE_SPINLOCK(mnt_id_lock);
@@ -414,9 +414,7 @@ EXPORT_SYMBOL_GPL(mnt_clone_write);
*/
int __mnt_want_write_file(struct file *file)
{
- struct inode *inode = file_inode(file);
-
- if (!(file->f_mode & FMODE_WRITE) || special_file(inode->i_mode))
+ if (!(file->f_mode & FMODE_WRITER))
return __mnt_want_write(file->f_path.mnt);
else
return mnt_clone_write(file->f_path.mnt);
@@ -570,13 +568,17 @@ int sb_prepare_remount_readonly(struct super_block *sb)
static void free_vfsmnt(struct mount *mnt)
{
kfree(mnt->mnt_devname);
- mnt_free_id(mnt);
#ifdef CONFIG_SMP
free_percpu(mnt->mnt_pcp);
#endif
kmem_cache_free(mnt_cache, mnt);
}
+static void delayed_free_vfsmnt(struct rcu_head *head)
+{
+ free_vfsmnt(container_of(head, struct mount, mnt_rcu));
+}
+
/* call under rcu_read_lock */
bool legitimize_mnt(struct vfsmount *bastard, unsigned seq)
{
@@ -848,6 +850,7 @@ vfs_kern_mount(struct file_system_type *type, int flags, const char *name, void
root = mount_fs(type, flags, name, data);
if (IS_ERR(root)) {
+ mnt_free_id(mnt);
free_vfsmnt(mnt);
return ERR_CAST(root);
}
@@ -885,7 +888,7 @@ static struct mount *clone_mnt(struct mount *old, struct dentry *root,
goto out_free;
}
- mnt->mnt.mnt_flags = old->mnt.mnt_flags & ~MNT_WRITE_HOLD;
+ mnt->mnt.mnt_flags = old->mnt.mnt_flags & ~(MNT_WRITE_HOLD|MNT_MARKED);
/* Don't allow unprivileged users to change mount flags */
if ((flag & CL_UNPRIVILEGED) && (mnt->mnt.mnt_flags & MNT_READONLY))
mnt->mnt.mnt_flags |= MNT_LOCK_READONLY;
@@ -928,20 +931,11 @@ static struct mount *clone_mnt(struct mount *old, struct dentry *root,
return mnt;
out_free:
+ mnt_free_id(mnt);
free_vfsmnt(mnt);
return ERR_PTR(err);
}
-static void delayed_free(struct rcu_head *head)
-{
- struct mount *mnt = container_of(head, struct mount, mnt_rcu);
- kfree(mnt->mnt_devname);
-#ifdef CONFIG_SMP
- free_percpu(mnt->mnt_pcp);
-#endif
- kmem_cache_free(mnt_cache, mnt);
-}
-
static void mntput_no_expire(struct mount *mnt)
{
put_again:
@@ -991,7 +985,7 @@ put_again:
dput(mnt->mnt.mnt_root);
deactivate_super(mnt->mnt.mnt_sb);
mnt_free_id(mnt);
- call_rcu(&mnt->mnt_rcu, delayed_free);
+ call_rcu(&mnt->mnt_rcu, delayed_free_vfsmnt);
}
void mntput(struct vfsmount *mnt)
@@ -1100,14 +1094,29 @@ static void *m_start(struct seq_file *m, loff_t *pos)
struct proc_mounts *p = proc_mounts(m);
down_read(&namespace_sem);
- return seq_list_start(&p->ns->list, *pos);
+ if (p->cached_event == p->ns->event) {
+ void *v = p->cached_mount;
+ if (*pos == p->cached_index)
+ return v;
+ if (*pos == p->cached_index + 1) {
+ v = seq_list_next(v, &p->ns->list, &p->cached_index);
+ return p->cached_mount = v;
+ }
+ }
+
+ p->cached_event = p->ns->event;
+ p->cached_mount = seq_list_start(&p->ns->list, *pos);
+ p->cached_index = *pos;
+ return p->cached_mount;
}
static void *m_next(struct seq_file *m, void *v, loff_t *pos)
{
struct proc_mounts *p = proc_mounts(m);
- return seq_list_next(v, &p->ns->list, pos);
+ p->cached_mount = seq_list_next(v, &p->ns->list, pos);
+ p->cached_index = *pos;
+ return p->cached_mount;
}
static void m_stop(struct seq_file *m, void *v)
@@ -1661,9 +1670,9 @@ static int attach_recursive_mnt(struct mount *source_mnt,
if (err)
goto out;
err = propagate_mnt(dest_mnt, dest_mp, source_mnt, &tree_list);
+ lock_mount_hash();
if (err)
goto out_cleanup_ids;
- lock_mount_hash();
for (p = source_mnt; p; p = next_mnt(p, source_mnt))
set_mnt_shared(p);
} else {
@@ -1690,6 +1699,11 @@ static int attach_recursive_mnt(struct mount *source_mnt,
return 0;
out_cleanup_ids:
+ while (!hlist_empty(&tree_list)) {
+ child = hlist_entry(tree_list.first, struct mount, mnt_hash);
+ umount_tree(child, 0);
+ }
+ unlock_mount_hash();
cleanup_group_ids(source_mnt, NULL);
out:
return err;
@@ -2044,7 +2058,7 @@ static int do_add_mount(struct mount *newmnt, struct path *path, int mnt_flags)
struct mount *parent;
int err;
- mnt_flags &= ~(MNT_SHARED | MNT_WRITE_HOLD | MNT_INTERNAL | MNT_DOOMED | MNT_SYNC_UMOUNT);
+ mnt_flags &= ~MNT_INTERNAL_FLAGS;
mp = lock_mount(path);
if (IS_ERR(mp))
diff --git a/fs/ncpfs/inode.c b/fs/ncpfs/inode.c
index 81b4f643ecef..e31e589369a4 100644
--- a/fs/ncpfs/inode.c
+++ b/fs/ncpfs/inode.c
@@ -470,9 +470,7 @@ static int ncp_fill_super(struct super_block *sb, void *raw_data, int silent)
{
struct ncp_mount_data_kernel data;
struct ncp_server *server;
- struct file *ncp_filp;
struct inode *root_inode;
- struct inode *sock_inode;
struct socket *sock;
int error;
int default_bufsize;
@@ -541,18 +539,10 @@ static int ncp_fill_super(struct super_block *sb, void *raw_data, int silent)
if (!uid_valid(data.mounted_uid) || !uid_valid(data.uid) ||
!gid_valid(data.gid))
goto out;
- error = -EBADF;
- ncp_filp = fget(data.ncp_fd);
- if (!ncp_filp)
- goto out;
- error = -ENOTSOCK;
- sock_inode = file_inode(ncp_filp);
- if (!S_ISSOCK(sock_inode->i_mode))
- goto out_fput;
- sock = SOCKET_I(sock_inode);
+ sock = sockfd_lookup(data.ncp_fd, &error);
if (!sock)
- goto out_fput;
-
+ goto out;
+
if (sock->type == SOCK_STREAM)
default_bufsize = 0xF000;
else
@@ -574,27 +564,16 @@ static int ncp_fill_super(struct super_block *sb, void *raw_data, int silent)
if (error)
goto out_fput;
- server->ncp_filp = ncp_filp;
server->ncp_sock = sock;
if (data.info_fd != -1) {
- struct socket *info_sock;
-
- error = -EBADF;
- server->info_filp = fget(data.info_fd);
- if (!server->info_filp)
- goto out_bdi;
- error = -ENOTSOCK;
- sock_inode = file_inode(server->info_filp);
- if (!S_ISSOCK(sock_inode->i_mode))
- goto out_fput2;
- info_sock = SOCKET_I(sock_inode);
+ struct socket *info_sock = sockfd_lookup(data.info_fd, &error);
if (!info_sock)
- goto out_fput2;
+ goto out_bdi;
+ server->info_sock = info_sock;
error = -EBADFD;
if (info_sock->type != SOCK_STREAM)
goto out_fput2;
- server->info_sock = info_sock;
}
/* server->lock = 0; */
@@ -766,17 +745,12 @@ out_nls:
mutex_destroy(&server->root_setup_lock);
mutex_destroy(&server->mutex);
out_fput2:
- if (server->info_filp)
- fput(server->info_filp);
+ if (server->info_sock)
+ sockfd_put(server->info_sock);
out_bdi:
bdi_destroy(&server->bdi);
out_fput:
- /* 23/12/1998 Marcin Dalecki <dalecki@cs.net.pl>:
- *
- * The previously used put_filp(ncp_filp); was bogus, since
- * it doesn't perform proper unlocking.
- */
- fput(ncp_filp);
+ sockfd_put(sock);
out:
put_pid(data.wdog_pid);
sb->s_fs_info = NULL;
@@ -809,9 +783,9 @@ static void ncp_put_super(struct super_block *sb)
mutex_destroy(&server->root_setup_lock);
mutex_destroy(&server->mutex);
- if (server->info_filp)
- fput(server->info_filp);
- fput(server->ncp_filp);
+ if (server->info_sock)
+ sockfd_put(server->info_sock);
+ sockfd_put(server->ncp_sock);
kill_pid(server->m.wdog_pid, SIGTERM, 1);
put_pid(server->m.wdog_pid);
diff --git a/fs/ncpfs/ncp_fs_sb.h b/fs/ncpfs/ncp_fs_sb.h
index b81e97adc5a9..7fa17e459366 100644
--- a/fs/ncpfs/ncp_fs_sb.h
+++ b/fs/ncpfs/ncp_fs_sb.h
@@ -45,9 +45,7 @@ struct ncp_server {
__u8 name_space[NCP_NUMBER_OF_VOLUMES + 2];
- struct file *ncp_filp; /* File pointer to ncp socket */
struct socket *ncp_sock;/* ncp socket */
- struct file *info_filp;
struct socket *info_sock;
u8 sequence;
diff --git a/fs/ntfs/inode.c b/fs/ntfs/inode.c
index 9d8153ebacfb..f47af5e6e230 100644
--- a/fs/ntfs/inode.c
+++ b/fs/ntfs/inode.c
@@ -1704,8 +1704,6 @@ static int ntfs_read_locked_index_inode(struct inode *base_vi, struct inode *vi)
iput(bvi);
skip_large_index_stuff:
/* Setup the operations for this index inode. */
- vi->i_op = NULL;
- vi->i_fop = NULL;
vi->i_mapping->a_ops = &ntfs_mst_aops;
vi->i_blocks = ni->allocated_size >> 9;
/*
diff --git a/fs/ocfs2/cluster/tcp.c b/fs/ocfs2/cluster/tcp.c
index eb649d23a4de..dfda2ffdb16c 100644
--- a/fs/ocfs2/cluster/tcp.c
+++ b/fs/ocfs2/cluster/tcp.c
@@ -916,57 +916,30 @@ static struct o2net_msg_handler *o2net_handler_get(u32 msg_type, u32 key)
static int o2net_recv_tcp_msg(struct socket *sock, void *data, size_t len)
{
- int ret;
- mm_segment_t oldfs;
- struct kvec vec = {
- .iov_len = len,
- .iov_base = data,
- };
- struct msghdr msg = {
- .msg_iovlen = 1,
- .msg_iov = (struct iovec *)&vec,
- .msg_flags = MSG_DONTWAIT,
- };
-
- oldfs = get_fs();
- set_fs(get_ds());
- ret = sock_recvmsg(sock, &msg, len, msg.msg_flags);
- set_fs(oldfs);
-
- return ret;
+ struct kvec vec = { .iov_len = len, .iov_base = data, };
+ struct msghdr msg = { .msg_flags = MSG_DONTWAIT, };
+ return kernel_recvmsg(sock, &msg, &vec, 1, len, msg.msg_flags);
}
static int o2net_send_tcp_msg(struct socket *sock, struct kvec *vec,
size_t veclen, size_t total)
{
int ret;
- mm_segment_t oldfs;
- struct msghdr msg = {
- .msg_iov = (struct iovec *)vec,
- .msg_iovlen = veclen,
- };
+ struct msghdr msg;
if (sock == NULL) {
ret = -EINVAL;
goto out;
}
- oldfs = get_fs();
- set_fs(get_ds());
- ret = sock_sendmsg(sock, &msg, total);
- set_fs(oldfs);
- if (ret != total) {
- mlog(ML_ERROR, "sendmsg returned %d instead of %zu\n", ret,
- total);
- if (ret >= 0)
- ret = -EPIPE; /* should be smarter, I bet */
- goto out;
- }
-
- ret = 0;
+ ret = kernel_sendmsg(sock, &msg, vec, veclen, total);
+ if (likely(ret == total))
+ return 0;
+ mlog(ML_ERROR, "sendmsg returned %d instead of %zu\n", ret, total);
+ if (ret >= 0)
+ ret = -EPIPE; /* should be smarter, I bet */
out:
- if (ret < 0)
- mlog(0, "returning error: %d\n", ret);
+ mlog(0, "returning error: %d\n", ret);
return ret;
}
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c
index ff33c5ef87f2..8970dcf74de5 100644
--- a/fs/ocfs2/file.c
+++ b/fs/ocfs2/file.c
@@ -2367,15 +2367,18 @@ relock:
if (direct_io) {
written = generic_file_direct_write(iocb, iov, &nr_segs, *ppos,
- ppos, count, ocount);
+ count, ocount);
if (written < 0) {
ret = written;
goto out_dio;
}
} else {
+ struct iov_iter from;
+ iov_iter_init(&from, iov, nr_segs, count, 0);
current->backing_dev_info = file->f_mapping->backing_dev_info;
- written = generic_file_buffered_write(iocb, iov, nr_segs, *ppos,
- ppos, count, 0);
+ written = generic_perform_write(file, &from, *ppos);
+ if (likely(written >= 0))
+ iocb->ki_pos = *ppos + written;
current->backing_dev_info = NULL;
}
diff --git a/fs/open.c b/fs/open.c
index 631aea815def..3d30eb1fc95e 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -655,35 +655,6 @@ out:
return error;
}
-/*
- * You have to be very careful that these write
- * counts get cleaned up in error cases and
- * upon __fput(). This should probably never
- * be called outside of __dentry_open().
- */
-static inline int __get_file_write_access(struct inode *inode,
- struct vfsmount *mnt)
-{
- int error;
- error = get_write_access(inode);
- if (error)
- return error;
- /*
- * Do not take mount writer counts on
- * special files since no writes to
- * the mount itself will occur.
- */
- if (!special_file(inode->i_mode)) {
- /*
- * Balanced in __fput()
- */
- error = __mnt_want_write(mnt);
- if (error)
- put_write_access(inode);
- }
- return error;
-}
-
int open_check_o_direct(struct file *f)
{
/* NB: we're sure to have correct a_ops only after f_op->open */
@@ -708,26 +679,28 @@ static int do_dentry_open(struct file *f,
f->f_mode = OPEN_FMODE(f->f_flags) | FMODE_LSEEK |
FMODE_PREAD | FMODE_PWRITE;
- if (unlikely(f->f_flags & O_PATH))
- f->f_mode = FMODE_PATH;
-
path_get(&f->f_path);
inode = f->f_inode = f->f_path.dentry->d_inode;
- if (f->f_mode & FMODE_WRITE) {
- error = __get_file_write_access(inode, f->f_path.mnt);
- if (error)
- goto cleanup_file;
- if (!special_file(inode->i_mode))
- file_take_write(f);
- }
-
f->f_mapping = inode->i_mapping;
- if (unlikely(f->f_mode & FMODE_PATH)) {
+ if (unlikely(f->f_flags & O_PATH)) {
+ f->f_mode = FMODE_PATH;
f->f_op = &empty_fops;
return 0;
}
+ if (f->f_mode & FMODE_WRITE && !special_file(inode->i_mode)) {
+ error = get_write_access(inode);
+ if (unlikely(error))
+ goto cleanup_file;
+ error = __mnt_want_write(f->f_path.mnt);
+ if (unlikely(error)) {
+ put_write_access(inode);
+ goto cleanup_file;
+ }
+ f->f_mode |= FMODE_WRITER;
+ }
+
/* POSIX.1-2008/SUSv4 Section XSI 2.9.7 */
if (S_ISREG(inode->i_mode))
f->f_mode |= FMODE_ATOMIC_POS;
@@ -764,18 +737,9 @@ static int do_dentry_open(struct file *f,
cleanup_all:
fops_put(f->f_op);
- if (f->f_mode & FMODE_WRITE) {
+ if (f->f_mode & FMODE_WRITER) {
put_write_access(inode);
- if (!special_file(inode->i_mode)) {
- /*
- * We don't consider this a real
- * mnt_want/drop_write() pair
- * because it all happenend right
- * here, so just reset the state.
- */
- file_reset_write(f);
- __mnt_drop_write(f->f_path.mnt);
- }
+ __mnt_drop_write(f->f_path.mnt);
}
cleanup_file:
path_put(&f->f_path);
diff --git a/fs/pipe.c b/fs/pipe.c
index 78fd0d0788db..034bffac3f97 100644
--- a/fs/pipe.c
+++ b/fs/pipe.c
@@ -142,55 +142,6 @@ pipe_iov_copy_from_user(void *to, struct iovec *iov, unsigned long len,
return 0;
}
-static int
-pipe_iov_copy_to_user(struct iovec *iov, const void *from, unsigned long len,
- int atomic)
-{
- unsigned long copy;
-
- while (len > 0) {
- while (!iov->iov_len)
- iov++;
- copy = min_t(unsigned long, len, iov->iov_len);
-
- if (atomic) {
- if (__copy_to_user_inatomic(iov->iov_base, from, copy))
- return -EFAULT;
- } else {
- if (copy_to_user(iov->iov_base, from, copy))
- return -EFAULT;
- }
- from += copy;
- len -= copy;
- iov->iov_base += copy;
- iov->iov_len -= copy;
- }
- return 0;
-}
-
-/*
- * Attempt to pre-fault in the user memory, so we can use atomic copies.
- * Returns the number of bytes not faulted in.
- */
-static int iov_fault_in_pages_write(struct iovec *iov, unsigned long len)
-{
- while (!iov->iov_len)
- iov++;
-
- while (len > 0) {
- unsigned long this_len;
-
- this_len = min_t(unsigned long, len, iov->iov_len);
- if (fault_in_pages_writeable(iov->iov_base, this_len))
- break;
-
- len -= this_len;
- iov++;
- }
-
- return len;
-}
-
/*
* Pre-fault in the user memory, so we can use atomic copies.
*/
@@ -226,52 +177,6 @@ static void anon_pipe_buf_release(struct pipe_inode_info *pipe,
}
/**
- * generic_pipe_buf_map - virtually map a pipe buffer
- * @pipe: the pipe that the buffer belongs to
- * @buf: the buffer that should be mapped
- * @atomic: whether to use an atomic map
- *
- * Description:
- * This function returns a kernel virtual address mapping for the
- * pipe_buffer passed in @buf. If @atomic is set, an atomic map is provided
- * and the caller has to be careful not to fault before calling
- * the unmap function.
- *
- * Note that this function calls kmap_atomic() if @atomic != 0.
- */
-void *generic_pipe_buf_map(struct pipe_inode_info *pipe,
- struct pipe_buffer *buf, int atomic)
-{
- if (atomic) {
- buf->flags |= PIPE_BUF_FLAG_ATOMIC;
- return kmap_atomic(buf->page);
- }
-
- return kmap(buf->page);
-}
-EXPORT_SYMBOL(generic_pipe_buf_map);
-
-/**
- * generic_pipe_buf_unmap - unmap a previously mapped pipe buffer
- * @pipe: the pipe that the buffer belongs to
- * @buf: the buffer that should be unmapped
- * @map_data: the data that the mapping function returned
- *
- * Description:
- * This function undoes the mapping that ->map() provided.
- */
-void generic_pipe_buf_unmap(struct pipe_inode_info *pipe,
- struct pipe_buffer *buf, void *map_data)
-{
- if (buf->flags & PIPE_BUF_FLAG_ATOMIC) {
- buf->flags &= ~PIPE_BUF_FLAG_ATOMIC;
- kunmap_atomic(map_data);
- } else
- kunmap(buf->page);
-}
-EXPORT_SYMBOL(generic_pipe_buf_unmap);
-
-/**
* generic_pipe_buf_steal - attempt to take ownership of a &pipe_buffer
* @pipe: the pipe that the buffer belongs to
* @buf: the buffer to attempt to steal
@@ -351,8 +256,6 @@ EXPORT_SYMBOL(generic_pipe_buf_release);
static const struct pipe_buf_operations anon_pipe_buf_ops = {
.can_merge = 1,
- .map = generic_pipe_buf_map,
- .unmap = generic_pipe_buf_unmap,
.confirm = generic_pipe_buf_confirm,
.release = anon_pipe_buf_release,
.steal = generic_pipe_buf_steal,
@@ -361,8 +264,6 @@ static const struct pipe_buf_operations anon_pipe_buf_ops = {
static const struct pipe_buf_operations packet_pipe_buf_ops = {
.can_merge = 0,
- .map = generic_pipe_buf_map,
- .unmap = generic_pipe_buf_unmap,
.confirm = generic_pipe_buf_confirm,
.release = anon_pipe_buf_release,
.steal = generic_pipe_buf_steal,
@@ -379,12 +280,15 @@ pipe_read(struct kiocb *iocb, const struct iovec *_iov,
ssize_t ret;
struct iovec *iov = (struct iovec *)_iov;
size_t total_len;
+ struct iov_iter iter;
total_len = iov_length(iov, nr_segs);
/* Null read succeeds. */
if (unlikely(total_len == 0))
return 0;
+ iov_iter_init(&iter, iov, nr_segs, total_len, 0);
+
do_wakeup = 0;
ret = 0;
__pipe_lock(pipe);
@@ -394,9 +298,9 @@ pipe_read(struct kiocb *iocb, const struct iovec *_iov,
int curbuf = pipe->curbuf;
struct pipe_buffer *buf = pipe->bufs + curbuf;
const struct pipe_buf_operations *ops = buf->ops;
- void *addr;
size_t chars = buf->len;
- int error, atomic;
+ size_t written;
+ int error;
if (chars > total_len)
chars = total_len;
@@ -408,21 +312,10 @@ pipe_read(struct kiocb *iocb, const struct iovec *_iov,
break;
}
- atomic = !iov_fault_in_pages_write(iov, chars);
-redo:
- addr = ops->map(pipe, buf, atomic);
- error = pipe_iov_copy_to_user(iov, addr + buf->offset, chars, atomic);
- ops->unmap(pipe, buf, addr);
- if (unlikely(error)) {
- /*
- * Just retry with the slow path if we failed.
- */
- if (atomic) {
- atomic = 0;
- goto redo;
- }
+ written = copy_page_to_iter(buf->page, buf->offset, chars, &iter);
+ if (unlikely(written < chars)) {
if (!ret)
- ret = error;
+ ret = -EFAULT;
break;
}
ret += chars;
@@ -538,10 +431,16 @@ pipe_write(struct kiocb *iocb, const struct iovec *_iov,
iov_fault_in_pages_read(iov, chars);
redo1:
- addr = ops->map(pipe, buf, atomic);
+ if (atomic)
+ addr = kmap_atomic(buf->page);
+ else
+ addr = kmap(buf->page);
error = pipe_iov_copy_from_user(offset + addr, iov,
chars, atomic);
- ops->unmap(pipe, buf, addr);
+ if (atomic)
+ kunmap_atomic(addr);
+ else
+ kunmap(buf->page);
ret = error;
do_wakeup = 1;
if (error) {
diff --git a/fs/pnode.c b/fs/pnode.c
index 88396df725b4..302bf22c4a30 100644
--- a/fs/pnode.c
+++ b/fs/pnode.c
@@ -164,46 +164,94 @@ static struct mount *propagation_next(struct mount *m,
}
}
-/*
- * return the source mount to be used for cloning
- *
- * @dest the current destination mount
- * @last_dest the last seen destination mount
- * @last_src the last seen source mount
- * @type return CL_SLAVE if the new mount has to be
- * cloned as a slave.
- */
-static struct mount *get_source(struct mount *dest,
- struct mount *last_dest,
- struct mount *last_src,
- int *type)
+static struct mount *next_group(struct mount *m, struct mount *origin)
{
- struct mount *p_last_src = NULL;
- struct mount *p_last_dest = NULL;
-
- while (last_dest != dest->mnt_master) {
- p_last_dest = last_dest;
- p_last_src = last_src;
- last_dest = last_dest->mnt_master;
- last_src = last_src->mnt_master;
+ while (1) {
+ while (1) {
+ struct mount *next;
+ if (!IS_MNT_NEW(m) && !list_empty(&m->mnt_slave_list))
+ return first_slave(m);
+ next = next_peer(m);
+ if (m->mnt_group_id == origin->mnt_group_id) {
+ if (next == origin)
+ return NULL;
+ } else if (m->mnt_slave.next != &next->mnt_slave)
+ break;
+ m = next;
+ }
+ /* m is the last peer */
+ while (1) {
+ struct mount *master = m->mnt_master;
+ if (m->mnt_slave.next != &master->mnt_slave_list)
+ return next_slave(m);
+ m = next_peer(master);
+ if (master->mnt_group_id == origin->mnt_group_id)
+ break;
+ if (master->mnt_slave.next == &m->mnt_slave)
+ break;
+ m = master;
+ }
+ if (m == origin)
+ return NULL;
}
+}
- if (p_last_dest) {
- do {
- p_last_dest = next_peer(p_last_dest);
- } while (IS_MNT_NEW(p_last_dest));
- /* is that a peer of the earlier? */
- if (dest == p_last_dest) {
- *type = CL_MAKE_SHARED;
- return p_last_src;
+/* all accesses are serialized by namespace_sem */
+static struct user_namespace *user_ns;
+static struct mount *last_dest, *last_source, *dest_master;
+static struct mountpoint *mp;
+static struct hlist_head *list;
+
+static int propagate_one(struct mount *m)
+{
+ struct mount *child;
+ int type;
+ /* skip ones added by this propagate_mnt() */
+ if (IS_MNT_NEW(m))
+ return 0;
+ /* skip if mountpoint isn't covered by it */
+ if (!is_subdir(mp->m_dentry, m->mnt.mnt_root))
+ return 0;
+ if (m->mnt_group_id == last_dest->mnt_group_id) {
+ type = CL_MAKE_SHARED;
+ } else {
+ struct mount *n, *p;
+ for (n = m; ; n = p) {
+ p = n->mnt_master;
+ if (p == dest_master || IS_MNT_MARKED(p)) {
+ while (last_dest->mnt_master != p) {
+ last_source = last_source->mnt_master;
+ last_dest = last_source->mnt_parent;
+ }
+ if (n->mnt_group_id != last_dest->mnt_group_id) {
+ last_source = last_source->mnt_master;
+ last_dest = last_source->mnt_parent;
+ }
+ break;
+ }
}
+ type = CL_SLAVE;
+ /* beginning of peer group among the slaves? */
+ if (IS_MNT_SHARED(m))
+ type |= CL_MAKE_SHARED;
}
- /* slave of the earlier, then */
- *type = CL_SLAVE;
- /* beginning of peer group among the slaves? */
- if (IS_MNT_SHARED(dest))
- *type |= CL_MAKE_SHARED;
- return last_src;
+
+ /* Notice when we are propagating across user namespaces */
+ if (m->mnt_ns->user_ns != user_ns)
+ type |= CL_UNPRIVILEGED;
+ child = copy_tree(last_source, last_source->mnt.mnt_root, type);
+ if (IS_ERR(child))
+ return PTR_ERR(child);
+ mnt_set_mountpoint(m, mp, child);
+ last_dest = m;
+ last_source = child;
+ if (m->mnt_master != dest_master) {
+ read_seqlock_excl(&mount_lock);
+ SET_MNT_MARK(m->mnt_master);
+ read_sequnlock_excl(&mount_lock);
+ }
+ hlist_add_head(&child->mnt_hash, list);
+ return 0;
}
/*
@@ -222,56 +270,48 @@ static struct mount *get_source(struct mount *dest,
int propagate_mnt(struct mount *dest_mnt, struct mountpoint *dest_mp,
struct mount *source_mnt, struct hlist_head *tree_list)
{
- struct user_namespace *user_ns = current->nsproxy->mnt_ns->user_ns;
- struct mount *m, *child;
+ struct mount *m, *n;
int ret = 0;
- struct mount *prev_dest_mnt = dest_mnt;
- struct mount *prev_src_mnt = source_mnt;
- HLIST_HEAD(tmp_list);
-
- for (m = propagation_next(dest_mnt, dest_mnt); m;
- m = propagation_next(m, dest_mnt)) {
- int type;
- struct mount *source;
-
- if (IS_MNT_NEW(m))
- continue;
-
- source = get_source(m, prev_dest_mnt, prev_src_mnt, &type);
-
- /* Notice when we are propagating across user namespaces */
- if (m->mnt_ns->user_ns != user_ns)
- type |= CL_UNPRIVILEGED;
-
- child = copy_tree(source, source->mnt.mnt_root, type);
- if (IS_ERR(child)) {
- ret = PTR_ERR(child);
- tmp_list = *tree_list;
- tmp_list.first->pprev = &tmp_list.first;
- INIT_HLIST_HEAD(tree_list);
+
+ /*
+ * we don't want to bother passing tons of arguments to
+ * propagate_one(); everything is serialized by namespace_sem,
+ * so globals will do just fine.
+ */
+ user_ns = current->nsproxy->mnt_ns->user_ns;
+ last_dest = dest_mnt;
+ last_source = source_mnt;
+ mp = dest_mp;
+ list = tree_list;
+ dest_master = dest_mnt->mnt_master;
+
+ /* all peers of dest_mnt, except dest_mnt itself */
+ for (n = next_peer(dest_mnt); n != dest_mnt; n = next_peer(n)) {
+ ret = propagate_one(n);
+ if (ret)
goto out;
- }
+ }
- if (is_subdir(dest_mp->m_dentry, m->mnt.mnt_root)) {
- mnt_set_mountpoint(m, dest_mp, child);
- hlist_add_head(&child->mnt_hash, tree_list);
- } else {
- /*
- * This can happen if the parent mount was bind mounted
- * on some subdirectory of a shared/slave mount.
- */
- hlist_add_head(&child->mnt_hash, &tmp_list);
- }
- prev_dest_mnt = m;
- prev_src_mnt = child;
+ /* all slave groups */
+ for (m = next_group(dest_mnt, dest_mnt); m;
+ m = next_group(m, dest_mnt)) {
+ /* everything in that slave group */
+ n = m;
+ do {
+ ret = propagate_one(n);
+ if (ret)
+ goto out;
+ n = next_peer(n);
+ } while (n != m);
}
out:
- lock_mount_hash();
- while (!hlist_empty(&tmp_list)) {
- child = hlist_entry(tmp_list.first, struct mount, mnt_hash);
- umount_tree(child, 0);
+ read_seqlock_excl(&mount_lock);
+ hlist_for_each_entry(n, tree_list, mnt_hash) {
+ m = n->mnt_parent;
+ if (m->mnt_master != dest_mnt->mnt_master)
+ CLEAR_MNT_MARK(m->mnt_master);
}
- unlock_mount_hash();
+ read_sequnlock_excl(&mount_lock);
return ret;
}
diff --git a/fs/pnode.h b/fs/pnode.h
index fc28a27fa892..4a246358b031 100644
--- a/fs/pnode.h
+++ b/fs/pnode.h
@@ -16,6 +16,9 @@
#define IS_MNT_NEW(m) (!(m)->mnt_ns)
#define CLEAR_MNT_SHARED(m) ((m)->mnt.mnt_flags &= ~MNT_SHARED)
#define IS_MNT_UNBINDABLE(m) ((m)->mnt.mnt_flags & MNT_UNBINDABLE)
+#define IS_MNT_MARKED(m) ((m)->mnt.mnt_flags & MNT_MARKED)
+#define SET_MNT_MARK(m) ((m)->mnt.mnt_flags |= MNT_MARKED)
+#define CLEAR_MNT_MARK(m) ((m)->mnt.mnt_flags &= ~MNT_MARKED)
#define CL_EXPIRE 0x01
#define CL_SLAVE 0x02
diff --git a/fs/proc/namespaces.c b/fs/proc/namespaces.c
index 9ae46b87470d..89026095f2b5 100644
--- a/fs/proc/namespaces.c
+++ b/fs/proc/namespaces.c
@@ -146,7 +146,7 @@ static int proc_ns_readlink(struct dentry *dentry, char __user *buffer, int bufl
struct task_struct *task;
void *ns;
char name[50];
- int len = -EACCES;
+ int res = -EACCES;
task = get_proc_task(inode);
if (!task)
@@ -155,24 +155,18 @@ static int proc_ns_readlink(struct dentry *dentry, char __user *buffer, int bufl
if (!ptrace_may_access(task, PTRACE_MODE_READ))
goto out_put_task;
- len = -ENOENT;
+ res = -ENOENT;
ns = ns_ops->get(task);
if (!ns)
goto out_put_task;
snprintf(name, sizeof(name), "%s:[%u]", ns_ops->name, ns_ops->inum(ns));
- len = strlen(name);
-
- if (len > buflen)
- len = buflen;
- if (copy_to_user(buffer, name, len))
- len = -EFAULT;
-
+ res = readlink_copy(buffer, buflen, name);
ns_ops->put(ns);
out_put_task:
put_task_struct(task);
out:
- return len;
+ return res;
}
static const struct inode_operations proc_ns_link_inode_operations = {
diff --git a/fs/proc/self.c b/fs/proc/self.c
index ffeb202ec942..4348bb8907c2 100644
--- a/fs/proc/self.c
+++ b/fs/proc/self.c
@@ -16,7 +16,7 @@ static int proc_self_readlink(struct dentry *dentry, char __user *buffer,
if (!tgid)
return -ENOENT;
sprintf(tmp, "%d", tgid);
- return vfs_readlink(dentry,buffer,buflen,tmp);
+ return readlink_copy(buffer, buflen, tmp);
}
static void *proc_self_follow_link(struct dentry *dentry, struct nameidata *nd)
diff --git a/fs/proc_namespace.c b/fs/proc_namespace.c
index 7be26f03a3f5..1a81373947f3 100644
--- a/fs/proc_namespace.c
+++ b/fs/proc_namespace.c
@@ -267,6 +267,7 @@ static int mounts_open_common(struct inode *inode, struct file *file,
p->root = root;
p->m.poll_event = ns->event;
p->show = show;
+ p->cached_event = ~0ULL;
return 0;
diff --git a/fs/splice.c b/fs/splice.c
index 12028fa41def..9bc07d2b53cf 100644
--- a/fs/splice.c
+++ b/fs/splice.c
@@ -136,8 +136,6 @@ error:
const struct pipe_buf_operations page_cache_pipe_buf_ops = {
.can_merge = 0,
- .map = generic_pipe_buf_map,
- .unmap = generic_pipe_buf_unmap,
.confirm = page_cache_pipe_buf_confirm,
.release = page_cache_pipe_buf_release,
.steal = page_cache_pipe_buf_steal,
@@ -156,8 +154,6 @@ static int user_page_pipe_buf_steal(struct pipe_inode_info *pipe,
static const struct pipe_buf_operations user_page_pipe_buf_ops = {
.can_merge = 0,
- .map = generic_pipe_buf_map,
- .unmap = generic_pipe_buf_unmap,
.confirm = generic_pipe_buf_confirm,
.release = page_cache_pipe_buf_release,
.steal = user_page_pipe_buf_steal,
@@ -547,8 +543,6 @@ EXPORT_SYMBOL(generic_file_splice_read);
static const struct pipe_buf_operations default_pipe_buf_ops = {
.can_merge = 0,
- .map = generic_pipe_buf_map,
- .unmap = generic_pipe_buf_unmap,
.confirm = generic_pipe_buf_confirm,
.release = generic_pipe_buf_release,
.steal = generic_pipe_buf_steal,
@@ -564,8 +558,6 @@ static int generic_pipe_buf_nosteal(struct pipe_inode_info *pipe,
/* Pipe buffer operations for a socket and similar. */
const struct pipe_buf_operations nosteal_pipe_buf_ops = {
.can_merge = 0,
- .map = generic_pipe_buf_map,
- .unmap = generic_pipe_buf_unmap,
.confirm = generic_pipe_buf_confirm,
.release = generic_pipe_buf_release,
.steal = generic_pipe_buf_nosteal,
@@ -767,13 +759,13 @@ int pipe_to_file(struct pipe_inode_info *pipe, struct pipe_buffer *buf,
goto out;
if (buf->page != page) {
- char *src = buf->ops->map(pipe, buf, 1);
+ char *src = kmap_atomic(buf->page);
char *dst = kmap_atomic(page);
memcpy(dst + offset, src + buf->offset, this_len);
flush_dcache_page(page);
kunmap_atomic(dst);
- buf->ops->unmap(pipe, buf, src);
+ kunmap_atomic(src);
}
ret = pagecache_write_end(file, mapping, sd->pos, this_len, this_len,
page, fsdata);
@@ -1067,9 +1059,9 @@ static int write_pipe_buf(struct pipe_inode_info *pipe, struct pipe_buffer *buf,
void *data;
loff_t tmp = sd->pos;
- data = buf->ops->map(pipe, buf, 0);
+ data = kmap(buf->page);
ret = __kernel_write(sd->u.file, data + buf->offset, sd->len, &tmp);
- buf->ops->unmap(pipe, buf, data);
+ kunmap(buf->page);
return ret;
}
@@ -1528,116 +1520,48 @@ static int get_iovec_page_array(const struct iovec __user *iov,
static int pipe_to_user(struct pipe_inode_info *pipe, struct pipe_buffer *buf,
struct splice_desc *sd)
{
- char *src;
- int ret;
-
- /*
- * See if we can use the atomic maps, by prefaulting in the
- * pages and doing an atomic copy
- */
- if (!fault_in_pages_writeable(sd->u.userptr, sd->len)) {
- src = buf->ops->map(pipe, buf, 1);
- ret = __copy_to_user_inatomic(sd->u.userptr, src + buf->offset,
- sd->len);
- buf->ops->unmap(pipe, buf, src);
- if (!ret) {
- ret = sd->len;
- goto out;
- }
- }
-
- /*
- * No dice, use slow non-atomic map and copy
- */
- src = buf->ops->map(pipe, buf, 0);
-
- ret = sd->len;
- if (copy_to_user(sd->u.userptr, src + buf->offset, sd->len))
- ret = -EFAULT;
-
- buf->ops->unmap(pipe, buf, src);
-out:
- if (ret > 0)
- sd->u.userptr += ret;
- return ret;
+ int n = copy_page_to_iter(buf->page, buf->offset, sd->len, sd->u.data);
+ return n == sd->len ? n : -EFAULT;
}
/*
* For lack of a better implementation, implement vmsplice() to userspace
* as a simple copy of the pipes pages to the user iov.
*/
-static long vmsplice_to_user(struct file *file, const struct iovec __user *iov,
+static long vmsplice_to_user(struct file *file, const struct iovec __user *uiov,
unsigned long nr_segs, unsigned int flags)
{
struct pipe_inode_info *pipe;
struct splice_desc sd;
- ssize_t size;
- int error;
long ret;
+ struct iovec iovstack[UIO_FASTIOV];
+ struct iovec *iov = iovstack;
+ struct iov_iter iter;
+ ssize_t count = 0;
pipe = get_pipe_info(file);
if (!pipe)
return -EBADF;
- pipe_lock(pipe);
-
- error = ret = 0;
- while (nr_segs) {
- void __user *base;
- size_t len;
-
- /*
- * Get user address base and length for this iovec.
- */
- error = get_user(base, &iov->iov_base);
- if (unlikely(error))
- break;
- error = get_user(len, &iov->iov_len);
- if (unlikely(error))
- break;
-
- /*
- * Sanity check this iovec. 0 read succeeds.
- */
- if (unlikely(!len))
- break;
- if (unlikely(!base)) {
- error = -EFAULT;
- break;
- }
-
- if (unlikely(!access_ok(VERIFY_WRITE, base, len))) {
- error = -EFAULT;
- break;
- }
-
- sd.len = 0;
- sd.total_len = len;
- sd.flags = flags;
- sd.u.userptr = base;
- sd.pos = 0;
-
- size = __splice_from_pipe(pipe, &sd, pipe_to_user);
- if (size < 0) {
- if (!ret)
- ret = size;
-
- break;
- }
-
- ret += size;
+ ret = rw_copy_check_uvector(READ, uiov, nr_segs,
+ ARRAY_SIZE(iovstack), iovstack, &iov);
+ if (ret <= 0)
+ return ret;
- if (size < len)
- break;
+ iov_iter_init(&iter, iov, nr_segs, count, 0);
- nr_segs--;
- iov++;
- }
+ sd.len = 0;
+ sd.total_len = count;
+ sd.flags = flags;
+ sd.u.data = &iter;
+ sd.pos = 0;
+ pipe_lock(pipe);
+ ret = __splice_from_pipe(pipe, &sd, pipe_to_user);
pipe_unlock(pipe);
- if (!ret)
- ret = error;
+ if (iov != iovstack)
+ kfree(iov);
return ret;
}
diff --git a/fs/udf/file.c b/fs/udf/file.c
index 1037637957c7..d2c170f8b035 100644
--- a/fs/udf/file.c
+++ b/fs/udf/file.c
@@ -171,7 +171,7 @@ static ssize_t udf_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
} else
up_write(&iinfo->i_data_sem);
- retval = __generic_file_aio_write(iocb, iov, nr_segs, &iocb->ki_pos);
+ retval = __generic_file_aio_write(iocb, iov, nr_segs);
mutex_unlock(&inode->i_mutex);
if (retval > 0) {
diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
index 003c0051b62f..79e96ce98733 100644
--- a/fs/xfs/xfs_file.c
+++ b/fs/xfs/xfs_file.c
@@ -699,7 +699,7 @@ xfs_file_dio_aio_write(
trace_xfs_file_direct_write(ip, count, iocb->ki_pos, 0);
ret = generic_file_direct_write(iocb, iovp,
- &nr_segs, pos, &iocb->ki_pos, count, ocount);
+ &nr_segs, pos, count, ocount);
out:
xfs_rw_iunlock(ip, iolock);
@@ -715,7 +715,7 @@ xfs_file_buffered_aio_write(
const struct iovec *iovp,
unsigned long nr_segs,
loff_t pos,
- size_t ocount)
+ size_t count)
{
struct file *file = iocb->ki_filp;
struct address_space *mapping = file->f_mapping;
@@ -724,7 +724,7 @@ xfs_file_buffered_aio_write(
ssize_t ret;
int enospc = 0;
int iolock = XFS_IOLOCK_EXCL;
- size_t count = ocount;
+ struct iov_iter from;
xfs_rw_ilock(ip, iolock);
@@ -732,14 +732,15 @@ xfs_file_buffered_aio_write(
if (ret)
goto out;
+ iov_iter_init(&from, iovp, nr_segs, count, 0);
/* We can write back this queue in page reclaim */
current->backing_dev_info = mapping->backing_dev_info;
write_retry:
trace_xfs_file_buffered_write(ip, count, iocb->ki_pos, 0);
- ret = generic_file_buffered_write(iocb, iovp, nr_segs,
- pos, &iocb->ki_pos, count, 0);
-
+ ret = generic_perform_write(file, &from, pos);
+ if (likely(ret >= 0))
+ iocb->ki_pos = pos + ret;
/*
* If we just got an ENOSPC, try to write back all dirty inodes to
* convert delalloc space to free up some of the excess reserved
diff --git a/fs/xfs/xfs_ioctl.c b/fs/xfs/xfs_ioctl.c
index bcfe61202115..0b18776b075e 100644
--- a/fs/xfs/xfs_ioctl.c
+++ b/fs/xfs/xfs_ioctl.c
@@ -271,32 +271,6 @@ xfs_open_by_handle(
return error;
}
-/*
- * This is a copy from fs/namei.c:vfs_readlink(), except for removing it's
- * unused first argument.
- */
-STATIC int
-do_readlink(
- char __user *buffer,
- int buflen,
- const char *link)
-{
- int len;
-
- len = PTR_ERR(link);
- if (IS_ERR(link))
- goto out;
-
- len = strlen(link);
- if (len > (unsigned) buflen)
- len = buflen;
- if (copy_to_user(buffer, link, len))
- len = -EFAULT;
- out:
- return len;
-}
-
-
int
xfs_readlink_by_handle(
struct file *parfilp,
@@ -334,7 +308,7 @@ xfs_readlink_by_handle(
error = -xfs_readlink(XFS_I(dentry->d_inode), link);
if (error)
goto out_kfree;
- error = do_readlink(hreq->ohandle, olen, link);
+ error = readlink_copy(hreq->ohandle, olen, link);
if (error)
goto out_kfree;