summaryrefslogtreecommitdiff
path: root/lib
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2022-08-03 13:50:22 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2022-08-03 13:50:22 -0700
commit5264406cdb66c7003eb3edf53c9773b1b20611b9 (patch)
treee94f76f64a0b3b45dcb9f9bec85cce2ba78e1221 /lib
parent200e340f2196d7fd427a5810d06e893b932f145a (diff)
parentdd45ab9dd28c82fc495d98cd9788666fd8d76b99 (diff)
Merge tag 'pull-work.iov_iter-base' of git://git.kernel.org/pub/scm/linux/kernel/git/viro/vfs
Pull vfs iov_iter updates from Al Viro: "Part 1 - isolated cleanups and optimizations. One of the goals is to reduce the overhead of using ->read_iter() and ->write_iter() instead of ->read()/->write(). new_sync_{read,write}() has a surprising amount of overhead, in particular inside iocb_flags(). That's the explanation for the beginning of the series is in this pile; it's not directly iov_iter-related, but it's a part of the same work..." * tag 'pull-work.iov_iter-base' of git://git.kernel.org/pub/scm/linux/kernel/git/viro/vfs: first_iovec_segment(): just return address iov_iter: massage calling conventions for first_{iovec,bvec}_segment() iov_iter: first_{iovec,bvec}_segment() - simplify a bit iov_iter: lift dealing with maxpages out of first_{iovec,bvec}_segment() iov_iter_get_pages{,_alloc}(): cap the maxsize with MAX_RW_COUNT iov_iter_bvec_advance(): don't bother with bvec_iter copy_page_{to,from}_iter(): switch iovec variants to generic keep iocb_flags() result cached in struct file iocb: delay evaluation of IS_SYNC(...) until we want to check IOCB_DSYNC struct file: use anonymous union member for rcuhead and llist btrfs: use IOMAP_DIO_NOSYNC teach iomap_dio_rw() to suppress dsync No need of likely/unlikely on calls of check_copy_size()
Diffstat (limited to 'lib')
-rw-r--r--lib/iov_iter.c283
1 files changed, 52 insertions, 231 deletions
diff --git a/lib/iov_iter.c b/lib/iov_iter.c
index 507e732ef7cf..58648fcd9a88 100644
--- a/lib/iov_iter.c
+++ b/lib/iov_iter.c
@@ -168,174 +168,6 @@ static int copyin(void *to, const void __user *from, size_t n)
return n;
}
-static size_t copy_page_to_iter_iovec(struct page *page, size_t offset, size_t bytes,
- struct iov_iter *i)
-{
- size_t skip, copy, left, wanted;
- const struct iovec *iov;
- char __user *buf;
- void *kaddr, *from;
-
- if (unlikely(bytes > i->count))
- bytes = i->count;
-
- if (unlikely(!bytes))
- return 0;
-
- might_fault();
- wanted = bytes;
- iov = i->iov;
- skip = i->iov_offset;
- buf = iov->iov_base + skip;
- copy = min(bytes, iov->iov_len - skip);
-
- if (IS_ENABLED(CONFIG_HIGHMEM) && !fault_in_writeable(buf, copy)) {
- kaddr = kmap_atomic(page);
- from = kaddr + offset;
-
- /* first chunk, usually the only one */
- left = copyout(buf, from, copy);
- copy -= left;
- skip += copy;
- from += copy;
- bytes -= copy;
-
- while (unlikely(!left && bytes)) {
- iov++;
- buf = iov->iov_base;
- copy = min(bytes, iov->iov_len);
- left = copyout(buf, from, copy);
- copy -= left;
- skip = copy;
- from += copy;
- bytes -= copy;
- }
- if (likely(!bytes)) {
- kunmap_atomic(kaddr);
- goto done;
- }
- offset = from - kaddr;
- buf += copy;
- kunmap_atomic(kaddr);
- copy = min(bytes, iov->iov_len - skip);
- }
- /* Too bad - revert to non-atomic kmap */
-
- kaddr = kmap(page);
- from = kaddr + offset;
- left = copyout(buf, from, copy);
- copy -= left;
- skip += copy;
- from += copy;
- bytes -= copy;
- while (unlikely(!left && bytes)) {
- iov++;
- buf = iov->iov_base;
- copy = min(bytes, iov->iov_len);
- left = copyout(buf, from, copy);
- copy -= left;
- skip = copy;
- from += copy;
- bytes -= copy;
- }
- kunmap(page);
-
-done:
- if (skip == iov->iov_len) {
- iov++;
- skip = 0;
- }
- i->count -= wanted - bytes;
- i->nr_segs -= iov - i->iov;
- i->iov = iov;
- i->iov_offset = skip;
- return wanted - bytes;
-}
-
-static size_t copy_page_from_iter_iovec(struct page *page, size_t offset, size_t bytes,
- struct iov_iter *i)
-{
- size_t skip, copy, left, wanted;
- const struct iovec *iov;
- char __user *buf;
- void *kaddr, *to;
-
- if (unlikely(bytes > i->count))
- bytes = i->count;
-
- if (unlikely(!bytes))
- return 0;
-
- might_fault();
- wanted = bytes;
- iov = i->iov;
- skip = i->iov_offset;
- buf = iov->iov_base + skip;
- copy = min(bytes, iov->iov_len - skip);
-
- if (IS_ENABLED(CONFIG_HIGHMEM) && !fault_in_readable(buf, copy)) {
- kaddr = kmap_atomic(page);
- to = kaddr + offset;
-
- /* first chunk, usually the only one */
- left = copyin(to, buf, copy);
- copy -= left;
- skip += copy;
- to += copy;
- bytes -= copy;
-
- while (unlikely(!left && bytes)) {
- iov++;
- buf = iov->iov_base;
- copy = min(bytes, iov->iov_len);
- left = copyin(to, buf, copy);
- copy -= left;
- skip = copy;
- to += copy;
- bytes -= copy;
- }
- if (likely(!bytes)) {
- kunmap_atomic(kaddr);
- goto done;
- }
- offset = to - kaddr;
- buf += copy;
- kunmap_atomic(kaddr);
- copy = min(bytes, iov->iov_len - skip);
- }
- /* Too bad - revert to non-atomic kmap */
-
- kaddr = kmap(page);
- to = kaddr + offset;
- left = copyin(to, buf, copy);
- copy -= left;
- skip += copy;
- to += copy;
- bytes -= copy;
- while (unlikely(!left && bytes)) {
- iov++;
- buf = iov->iov_base;
- copy = min(bytes, iov->iov_len);
- left = copyin(to, buf, copy);
- copy -= left;
- skip = copy;
- to += copy;
- bytes -= copy;
- }
- kunmap(page);
-
-done:
- if (skip == iov->iov_len) {
- iov++;
- skip = 0;
- }
- i->count -= wanted - bytes;
- i->nr_segs -= iov - i->iov;
- i->iov = iov;
- i->iov_offset = skip;
- return wanted - bytes;
-}
-
#ifdef PIPE_PARANOIA
static bool sanity(const struct iov_iter *i)
{
@@ -848,24 +680,14 @@ static inline bool page_copy_sane(struct page *page, size_t offset, size_t n)
static size_t __copy_page_to_iter(struct page *page, size_t offset, size_t bytes,
struct iov_iter *i)
{
- if (likely(iter_is_iovec(i)))
- return copy_page_to_iter_iovec(page, offset, bytes, i);
- if (iov_iter_is_bvec(i) || iov_iter_is_kvec(i) || iov_iter_is_xarray(i)) {
+ if (unlikely(iov_iter_is_pipe(i))) {
+ return copy_page_to_iter_pipe(page, offset, bytes, i);
+ } else {
void *kaddr = kmap_local_page(page);
size_t wanted = _copy_to_iter(kaddr + offset, bytes, i);
kunmap_local(kaddr);
return wanted;
}
- if (iov_iter_is_pipe(i))
- return copy_page_to_iter_pipe(page, offset, bytes, i);
- if (unlikely(iov_iter_is_discard(i))) {
- if (unlikely(i->count < bytes))
- bytes = i->count;
- i->count -= bytes;
- return bytes;
- }
- WARN_ON(1);
- return 0;
}
size_t copy_page_to_iter(struct page *page, size_t offset, size_t bytes,
@@ -896,17 +718,12 @@ EXPORT_SYMBOL(copy_page_to_iter);
size_t copy_page_from_iter(struct page *page, size_t offset, size_t bytes,
struct iov_iter *i)
{
- if (unlikely(!page_copy_sane(page, offset, bytes)))
- return 0;
- if (likely(iter_is_iovec(i)))
- return copy_page_from_iter_iovec(page, offset, bytes, i);
- if (iov_iter_is_bvec(i) || iov_iter_is_kvec(i) || iov_iter_is_xarray(i)) {
+ if (page_copy_sane(page, offset, bytes)) {
void *kaddr = kmap_local_page(page);
size_t wanted = _copy_from_iter(kaddr + offset, bytes, i);
kunmap_local(kaddr);
return wanted;
}
- WARN_ON(1);
return 0;
}
EXPORT_SYMBOL(copy_page_from_iter);
@@ -1029,17 +846,22 @@ static void pipe_advance(struct iov_iter *i, size_t size)
static void iov_iter_bvec_advance(struct iov_iter *i, size_t size)
{
- struct bvec_iter bi;
+ const struct bio_vec *bvec, *end;
- bi.bi_size = i->count;
- bi.bi_bvec_done = i->iov_offset;
- bi.bi_idx = 0;
- bvec_iter_advance(i->bvec, &bi, size);
+ if (!i->count)
+ return;
+ i->count -= size;
+
+ size += i->iov_offset;
- i->bvec += bi.bi_idx;
- i->nr_segs -= bi.bi_idx;
- i->count = bi.bi_size;
- i->iov_offset = bi.bi_bvec_done;
+ for (bvec = i->bvec, end = bvec + i->nr_segs; bvec < end; bvec++) {
+ if (likely(size < bvec->bv_len))
+ break;
+ size -= bvec->bv_len;
+ }
+ i->iov_offset = size;
+ i->nr_segs -= bvec - i->bvec;
+ i->bvec = bvec;
}
static void iov_iter_iovec_advance(struct iov_iter *i, size_t size)
@@ -1557,47 +1379,36 @@ static ssize_t iter_xarray_get_pages(struct iov_iter *i,
}
/* must be done on non-empty ITER_IOVEC one */
-static unsigned long first_iovec_segment(const struct iov_iter *i,
- size_t *size, size_t *start,
- size_t maxsize, unsigned maxpages)
+static unsigned long first_iovec_segment(const struct iov_iter *i, size_t *size)
{
size_t skip;
long k;
for (k = 0, skip = i->iov_offset; k < i->nr_segs; k++, skip = 0) {
- unsigned long addr = (unsigned long)i->iov[k].iov_base + skip;
size_t len = i->iov[k].iov_len - skip;
if (unlikely(!len))
continue;
- if (len > maxsize)
- len = maxsize;
- len += (*start = addr % PAGE_SIZE);
- if (len > maxpages * PAGE_SIZE)
- len = maxpages * PAGE_SIZE;
- *size = len;
- return addr & PAGE_MASK;
+ if (*size > len)
+ *size = len;
+ return (unsigned long)i->iov[k].iov_base + skip;
}
BUG(); // if it had been empty, we wouldn't get called
}
/* must be done on non-empty ITER_BVEC one */
static struct page *first_bvec_segment(const struct iov_iter *i,
- size_t *size, size_t *start,
- size_t maxsize, unsigned maxpages)
+ size_t *size, size_t *start)
{
struct page *page;
size_t skip = i->iov_offset, len;
len = i->bvec->bv_len - skip;
- if (len > maxsize)
- len = maxsize;
+ if (*size > len)
+ *size = len;
skip += i->bvec->bv_offset;
page = i->bvec->bv_page + skip / PAGE_SIZE;
- len += (*start = skip % PAGE_SIZE);
- if (len > maxpages * PAGE_SIZE)
- len = maxpages * PAGE_SIZE;
- *size = len;
+ *start = skip % PAGE_SIZE;
return page;
}
@@ -1605,13 +1416,14 @@ ssize_t iov_iter_get_pages(struct iov_iter *i,
struct page **pages, size_t maxsize, unsigned maxpages,
size_t *start)
{
- size_t len;
int n, res;
if (maxsize > i->count)
maxsize = i->count;
if (!maxsize)
return 0;
+ if (maxsize > MAX_RW_COUNT)
+ maxsize = MAX_RW_COUNT;
if (likely(iter_is_iovec(i))) {
unsigned int gup_flags = 0;
@@ -1622,21 +1434,27 @@ ssize_t iov_iter_get_pages(struct iov_iter *i,
if (i->nofault)
gup_flags |= FOLL_NOFAULT;
- addr = first_iovec_segment(i, &len, start, maxsize, maxpages);
- n = DIV_ROUND_UP(len, PAGE_SIZE);
+ addr = first_iovec_segment(i, &maxsize);
+ *start = addr % PAGE_SIZE;
+ addr &= PAGE_MASK;
+ n = DIV_ROUND_UP(maxsize + *start, PAGE_SIZE);
+ if (n > maxpages)
+ n = maxpages;
res = get_user_pages_fast(addr, n, gup_flags, pages);
if (unlikely(res <= 0))
return res;
- return (res == n ? len : res * PAGE_SIZE) - *start;
+ return min_t(size_t, maxsize, res * PAGE_SIZE - *start);
}
if (iov_iter_is_bvec(i)) {
struct page *page;
- page = first_bvec_segment(i, &len, start, maxsize, maxpages);
- n = DIV_ROUND_UP(len, PAGE_SIZE);
- while (n--)
+ page = first_bvec_segment(i, &maxsize, start);
+ n = DIV_ROUND_UP(maxsize + *start, PAGE_SIZE);
+ if (n > maxpages)
+ n = maxpages;
+ for (int k = 0; k < n; k++)
get_page(*pages++ = page++);
- return len - *start;
+ return min_t(size_t, maxsize, n * PAGE_SIZE - *start);
}
if (iov_iter_is_pipe(i))
return pipe_get_pages(i, pages, maxsize, maxpages, start);
@@ -1725,13 +1543,14 @@ ssize_t iov_iter_get_pages_alloc(struct iov_iter *i,
size_t *start)
{
struct page **p;
- size_t len;
int n, res;
if (maxsize > i->count)
maxsize = i->count;
if (!maxsize)
return 0;
+ if (maxsize > MAX_RW_COUNT)
+ maxsize = MAX_RW_COUNT;
if (likely(iter_is_iovec(i))) {
unsigned int gup_flags = 0;
@@ -1742,8 +1561,10 @@ ssize_t iov_iter_get_pages_alloc(struct iov_iter *i,
if (i->nofault)
gup_flags |= FOLL_NOFAULT;
- addr = first_iovec_segment(i, &len, start, maxsize, ~0U);
- n = DIV_ROUND_UP(len, PAGE_SIZE);
+ addr = first_iovec_segment(i, &maxsize);
+ *start = addr % PAGE_SIZE;
+ addr &= PAGE_MASK;
+ n = DIV_ROUND_UP(maxsize + *start, PAGE_SIZE);
p = get_pages_array(n);
if (!p)
return -ENOMEM;
@@ -1754,19 +1575,19 @@ ssize_t iov_iter_get_pages_alloc(struct iov_iter *i,
return res;
}
*pages = p;
- return (res == n ? len : res * PAGE_SIZE) - *start;
+ return min_t(size_t, maxsize, res * PAGE_SIZE - *start);
}
if (iov_iter_is_bvec(i)) {
struct page *page;
- page = first_bvec_segment(i, &len, start, maxsize, ~0U);
- n = DIV_ROUND_UP(len, PAGE_SIZE);
+ page = first_bvec_segment(i, &maxsize, start);
+ n = DIV_ROUND_UP(maxsize + *start, PAGE_SIZE);
*pages = p = get_pages_array(n);
if (!p)
return -ENOMEM;
- while (n--)
+ for (int k = 0; k < n; k++)
get_page(*p++ = page++);
- return len - *start;
+ return min_t(size_t, maxsize, n * PAGE_SIZE - *start);
}
if (iov_iter_is_pipe(i))
return pipe_get_pages_alloc(i, pages, maxsize, start);