summaryrefslogtreecommitdiff
path: root/fs/ceph/caps.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2014-12-17 16:03:12 -0800
committerLinus Torvalds <torvalds@linux-foundation.org>2014-12-17 16:03:12 -0800
commit57666509b70030a9483d13222bfec8eec5db07df (patch)
tree1e0021c2aabc2ce8832e8c816e2aa94b0b77a323 /fs/ceph/caps.c
parent87c31b39abcb6fb6bd7d111200c9627a594bf6a9 (diff)
parent0aeff37abada9f8c08d2b10481a43d3ae406c823 (diff)
Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/sage/ceph-client
Pull ceph updates from Sage Weil: "The big item here is support for inline data for CephFS and for message signatures from Zheng. There are also several bug fixes, including interrupted flock request handling, 0-length xattrs, mksnap, cached readdir results, and a message version compat field. Finally there are several cleanups from Ilya, Dan, and Markus. Note that there is another series coming soon that fixes some bugs in the RBD 'lingering' requests, but it isn't quite ready yet" * 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/sage/ceph-client: (27 commits) ceph: fix setting empty extended attribute ceph: fix mksnap crash ceph: do_sync is never initialized libceph: fixup includes in pagelist.h ceph: support inline data feature ceph: flush inline version ceph: convert inline data to normal data before data write ceph: sync read inline data ceph: fetch inline data when getting Fcr cap refs ceph: use getattr request to fetch inline data ceph: add inline data to pagecache ceph: parse inline data in MClientReply and MClientCaps libceph: specify position of extent operation libceph: add CREATE osd operation support libceph: add SETXATTR/CMPXATTR osd operations support rbd: don't treat CEPH_OSD_OP_DELETE as extent op ceph: remove unused stringification macros libceph: require cephx message signature by default ceph: introduce global empty snap context ceph: message versioning fixes ...
Diffstat (limited to 'fs/ceph/caps.c')
-rw-r--r--fs/ceph/caps.c132
1 files changed, 102 insertions, 30 deletions
diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c
index cefca661464b..b93c631c6c87 100644
--- a/fs/ceph/caps.c
+++ b/fs/ceph/caps.c
@@ -975,10 +975,12 @@ static int send_cap_msg(struct ceph_mds_session *session,
kuid_t uid, kgid_t gid, umode_t mode,
u64 xattr_version,
struct ceph_buffer *xattrs_buf,
- u64 follows)
+ u64 follows, bool inline_data)
{
struct ceph_mds_caps *fc;
struct ceph_msg *msg;
+ void *p;
+ size_t extra_len;
dout("send_cap_msg %s %llx %llx caps %s wanted %s dirty %s"
" seq %u/%u mseq %u follows %lld size %llu/%llu"
@@ -988,7 +990,10 @@ static int send_cap_msg(struct ceph_mds_session *session,
seq, issue_seq, mseq, follows, size, max_size,
xattr_version, xattrs_buf ? (int)xattrs_buf->vec.iov_len : 0);
- msg = ceph_msg_new(CEPH_MSG_CLIENT_CAPS, sizeof(*fc), GFP_NOFS, false);
+ /* flock buffer size + inline version + inline data size */
+ extra_len = 4 + 8 + 4;
+ msg = ceph_msg_new(CEPH_MSG_CLIENT_CAPS, sizeof(*fc) + extra_len,
+ GFP_NOFS, false);
if (!msg)
return -ENOMEM;
@@ -1020,6 +1025,14 @@ static int send_cap_msg(struct ceph_mds_session *session,
fc->gid = cpu_to_le32(from_kgid(&init_user_ns, gid));
fc->mode = cpu_to_le32(mode);
+ p = fc + 1;
+ /* flock buffer size */
+ ceph_encode_32(&p, 0);
+ /* inline version */
+ ceph_encode_64(&p, inline_data ? 0 : CEPH_INLINE_NONE);
+ /* inline data size */
+ ceph_encode_32(&p, 0);
+
fc->xattr_version = cpu_to_le64(xattr_version);
if (xattrs_buf) {
msg->middle = ceph_buffer_get(xattrs_buf);
@@ -1126,6 +1139,7 @@ static int __send_cap(struct ceph_mds_client *mdsc, struct ceph_cap *cap,
u64 flush_tid = 0;
int i;
int ret;
+ bool inline_data;
held = cap->issued | cap->implemented;
revoking = cap->implemented & ~cap->issued;
@@ -1209,13 +1223,15 @@ static int __send_cap(struct ceph_mds_client *mdsc, struct ceph_cap *cap,
xattr_version = ci->i_xattrs.version;
}
+ inline_data = ci->i_inline_version != CEPH_INLINE_NONE;
+
spin_unlock(&ci->i_ceph_lock);
ret = send_cap_msg(session, ceph_vino(inode).ino, cap_id,
op, keep, want, flushing, seq, flush_tid, issue_seq, mseq,
size, max_size, &mtime, &atime, time_warp_seq,
uid, gid, mode, xattr_version, xattr_blob,
- follows);
+ follows, inline_data);
if (ret < 0) {
dout("error sending cap msg, must requeue %p\n", inode);
delayed = 1;
@@ -1336,7 +1352,7 @@ retry:
capsnap->time_warp_seq,
capsnap->uid, capsnap->gid, capsnap->mode,
capsnap->xattr_version, capsnap->xattr_blob,
- capsnap->follows);
+ capsnap->follows, capsnap->inline_data);
next_follows = capsnap->follows + 1;
ceph_put_cap_snap(capsnap);
@@ -2057,15 +2073,17 @@ static void __take_cap_refs(struct ceph_inode_info *ci, int got)
* requested from the MDS.
*/
static int try_get_cap_refs(struct ceph_inode_info *ci, int need, int want,
- int *got, loff_t endoff, int *check_max, int *err)
+ loff_t endoff, int *got, struct page **pinned_page,
+ int *check_max, int *err)
{
struct inode *inode = &ci->vfs_inode;
int ret = 0;
- int have, implemented;
+ int have, implemented, _got = 0;
int file_wanted;
dout("get_cap_refs %p need %s want %s\n", inode,
ceph_cap_string(need), ceph_cap_string(want));
+again:
spin_lock(&ci->i_ceph_lock);
/* make sure file is actually open */
@@ -2075,7 +2093,7 @@ static int try_get_cap_refs(struct ceph_inode_info *ci, int need, int want,
ceph_cap_string(need), ceph_cap_string(file_wanted));
*err = -EBADF;
ret = 1;
- goto out;
+ goto out_unlock;
}
/* finish pending truncate */
@@ -2095,7 +2113,7 @@ static int try_get_cap_refs(struct ceph_inode_info *ci, int need, int want,
*check_max = 1;
ret = 1;
}
- goto out;
+ goto out_unlock;
}
/*
* If a sync write is in progress, we must wait, so that we
@@ -2103,7 +2121,7 @@ static int try_get_cap_refs(struct ceph_inode_info *ci, int need, int want,
*/
if (__ceph_have_pending_cap_snap(ci)) {
dout("get_cap_refs %p cap_snap_pending\n", inode);
- goto out;
+ goto out_unlock;
}
}
@@ -2120,18 +2138,50 @@ static int try_get_cap_refs(struct ceph_inode_info *ci, int need, int want,
inode, ceph_cap_string(have), ceph_cap_string(not),
ceph_cap_string(revoking));
if ((revoking & not) == 0) {
- *got = need | (have & want);
- __take_cap_refs(ci, *got);
+ _got = need | (have & want);
+ __take_cap_refs(ci, _got);
ret = 1;
}
} else {
dout("get_cap_refs %p have %s needed %s\n", inode,
ceph_cap_string(have), ceph_cap_string(need));
}
-out:
+out_unlock:
spin_unlock(&ci->i_ceph_lock);
+
+ if (ci->i_inline_version != CEPH_INLINE_NONE &&
+ (_got & (CEPH_CAP_FILE_CACHE|CEPH_CAP_FILE_LAZYIO)) &&
+ i_size_read(inode) > 0) {
+ int ret1;
+ struct page *page = find_get_page(inode->i_mapping, 0);
+ if (page) {
+ if (PageUptodate(page)) {
+ *pinned_page = page;
+ goto out;
+ }
+ page_cache_release(page);
+ }
+ /*
+ * drop cap refs first because getattr while holding
+ * caps refs can cause deadlock.
+ */
+ ceph_put_cap_refs(ci, _got);
+ _got = 0;
+
+ /* getattr request will bring inline data into page cache */
+ ret1 = __ceph_do_getattr(inode, NULL,
+ CEPH_STAT_CAP_INLINE_DATA, true);
+ if (ret1 >= 0) {
+ ret = 0;
+ goto again;
+ }
+ *err = ret1;
+ ret = 1;
+ }
+out:
dout("get_cap_refs %p ret %d got %s\n", inode,
- ret, ceph_cap_string(*got));
+ ret, ceph_cap_string(_got));
+ *got = _got;
return ret;
}
@@ -2168,8 +2218,8 @@ static void check_max_size(struct inode *inode, loff_t endoff)
* due to a small max_size, make sure we check_max_size (and possibly
* ask the mds) so we don't get hung up indefinitely.
*/
-int ceph_get_caps(struct ceph_inode_info *ci, int need, int want, int *got,
- loff_t endoff)
+int ceph_get_caps(struct ceph_inode_info *ci, int need, int want,
+ loff_t endoff, int *got, struct page **pinned_page)
{
int check_max, ret, err;
@@ -2179,8 +2229,8 @@ retry:
check_max = 0;
err = 0;
ret = wait_event_interruptible(ci->i_cap_wq,
- try_get_cap_refs(ci, need, want,
- got, endoff,
+ try_get_cap_refs(ci, need, want, endoff,
+ got, pinned_page,
&check_max, &err));
if (err)
ret = err;
@@ -2383,6 +2433,8 @@ static void invalidate_aliases(struct inode *inode)
static void handle_cap_grant(struct ceph_mds_client *mdsc,
struct inode *inode, struct ceph_mds_caps *grant,
void *snaptrace, int snaptrace_len,
+ u64 inline_version,
+ void *inline_data, int inline_len,
struct ceph_buffer *xattr_buf,
struct ceph_mds_session *session,
struct ceph_cap *cap, int issued)
@@ -2403,6 +2455,7 @@ static void handle_cap_grant(struct ceph_mds_client *mdsc,
bool queue_invalidate = false;
bool queue_revalidate = false;
bool deleted_inode = false;
+ bool fill_inline = false;
dout("handle_cap_grant inode %p cap %p mds%d seq %d %s\n",
inode, cap, mds, seq, ceph_cap_string(newcaps));
@@ -2576,6 +2629,13 @@ static void handle_cap_grant(struct ceph_mds_client *mdsc,
}
BUG_ON(cap->issued & ~cap->implemented);
+ if (inline_version > 0 && inline_version >= ci->i_inline_version) {
+ ci->i_inline_version = inline_version;
+ if (ci->i_inline_version != CEPH_INLINE_NONE &&
+ (newcaps & (CEPH_CAP_FILE_CACHE|CEPH_CAP_FILE_LAZYIO)))
+ fill_inline = true;
+ }
+
spin_unlock(&ci->i_ceph_lock);
if (le32_to_cpu(grant->op) == CEPH_CAP_OP_IMPORT) {
@@ -2589,6 +2649,9 @@ static void handle_cap_grant(struct ceph_mds_client *mdsc,
wake = true;
}
+ if (fill_inline)
+ ceph_fill_inline_data(inode, NULL, inline_data, inline_len);
+
if (queue_trunc) {
ceph_queue_vmtruncate(inode);
ceph_queue_revalidate(inode);
@@ -2996,11 +3059,12 @@ void ceph_handle_caps(struct ceph_mds_session *session,
u64 cap_id;
u64 size, max_size;
u64 tid;
+ u64 inline_version = 0;
+ void *inline_data = NULL;
+ u32 inline_len = 0;
void *snaptrace;
size_t snaptrace_len;
- void *flock;
- void *end;
- u32 flock_len;
+ void *p, *end;
dout("handle_caps from mds%d\n", mds);
@@ -3021,30 +3085,37 @@ void ceph_handle_caps(struct ceph_mds_session *session,
snaptrace = h + 1;
snaptrace_len = le32_to_cpu(h->snap_trace_len);
+ p = snaptrace + snaptrace_len;
if (le16_to_cpu(msg->hdr.version) >= 2) {
- void *p = snaptrace + snaptrace_len;
+ u32 flock_len;
ceph_decode_32_safe(&p, end, flock_len, bad);
if (p + flock_len > end)
goto bad;
- flock = p;
- } else {
- flock = NULL;
- flock_len = 0;
+ p += flock_len;
}
if (le16_to_cpu(msg->hdr.version) >= 3) {
if (op == CEPH_CAP_OP_IMPORT) {
- void *p = flock + flock_len;
if (p + sizeof(*peer) > end)
goto bad;
peer = p;
+ p += sizeof(*peer);
} else if (op == CEPH_CAP_OP_EXPORT) {
/* recorded in unused fields */
peer = (void *)&h->size;
}
}
+ if (le16_to_cpu(msg->hdr.version) >= 4) {
+ ceph_decode_64_safe(&p, end, inline_version, bad);
+ ceph_decode_32_safe(&p, end, inline_len, bad);
+ if (p + inline_len > end)
+ goto bad;
+ inline_data = p;
+ p += inline_len;
+ }
+
/* lookup ino */
inode = ceph_find_inode(sb, vino);
ci = ceph_inode(inode);
@@ -3085,6 +3156,7 @@ void ceph_handle_caps(struct ceph_mds_session *session,
handle_cap_import(mdsc, inode, h, peer, session,
&cap, &issued);
handle_cap_grant(mdsc, inode, h, snaptrace, snaptrace_len,
+ inline_version, inline_data, inline_len,
msg->middle, session, cap, issued);
goto done_unlocked;
}
@@ -3105,8 +3177,9 @@ void ceph_handle_caps(struct ceph_mds_session *session,
case CEPH_CAP_OP_GRANT:
__ceph_caps_issued(ci, &issued);
issued |= __ceph_caps_dirty(ci);
- handle_cap_grant(mdsc, inode, h, NULL, 0, msg->middle,
- session, cap, issued);
+ handle_cap_grant(mdsc, inode, h, NULL, 0,
+ inline_version, inline_data, inline_len,
+ msg->middle, session, cap, issued);
goto done_unlocked;
case CEPH_CAP_OP_FLUSH_ACK:
@@ -3137,8 +3210,7 @@ flush_cap_releases:
done:
mutex_unlock(&session->s_mutex);
done_unlocked:
- if (inode)
- iput(inode);
+ iput(inode);
return;
bad: