From 64fd72e0a44bdd62c5ca277cb24d0d02b2d8e9dc Mon Sep 17 00:00:00 2001 From: Al Viro Date: Wed, 28 May 2014 09:48:44 -0400 Subject: lift the "already marked killed" case into shrink_dentry_list() It can happen only when dentry_kill() is called with unlock_on_failure equal to 0 - other callers had dentry pinned until the moment they've got ->d_lock and DCACHE_DENTRY_KILLED is set only after lockref_mark_dead(). IOW, only one of three call sites of dentry_kill() might end up reaching that code. Just move it there. Signed-off-by: Al Viro --- fs/dcache.c | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/fs/dcache.c b/fs/dcache.c index 42ae01eefc07..6888dde4d568 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -455,12 +455,6 @@ dentry_kill(struct dentry *dentry, int unlock_on_failure) struct dentry *parent = NULL; bool can_free = true; - if (unlikely(dentry->d_flags & DCACHE_DENTRY_KILLED)) { - can_free = dentry->d_flags & DCACHE_MAY_FREE; - spin_unlock(&dentry->d_lock); - goto out; - } - inode = dentry->d_inode; if (inode && !spin_trylock(&inode->i_lock)) { relock: @@ -815,6 +809,15 @@ static void shrink_dentry_list(struct list_head *list) continue; } + + if (unlikely(dentry->d_flags & DCACHE_DENTRY_KILLED)) { + bool can_free = dentry->d_flags & DCACHE_MAY_FREE; + spin_unlock(&dentry->d_lock); + if (can_free) + dentry_free(dentry); + continue; + } + parent = dentry_kill(dentry, 0); /* * If dentry_kill returns NULL, we have nothing more to do. -- cgit v1.2.3 From e55fd011549eae01a230e3cace6f4d031b6a3453 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Wed, 28 May 2014 13:51:12 -0400 Subject: split dentry_kill() ... into trylocks and everything else. The latter (actual killing) is __dentry_kill(). Signed-off-by: Al Viro --- fs/dcache.c | 62 +++++++++++++++++++++++++++++++++++-------------------------- 1 file changed, 36 insertions(+), 26 deletions(-) diff --git a/fs/dcache.c b/fs/dcache.c index 6888dde4d568..1577c14dfb4e 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -441,36 +441,12 @@ void d_drop(struct dentry *dentry) } EXPORT_SYMBOL(d_drop); -/* - * Finish off a dentry we've decided to kill. - * dentry->d_lock must be held, returns with it unlocked. - * If ref is non-zero, then decrement the refcount too. - * Returns dentry requiring refcount drop, or NULL if we're done. - */ -static struct dentry * -dentry_kill(struct dentry *dentry, int unlock_on_failure) - __releases(dentry->d_lock) +static void __dentry_kill(struct dentry *dentry) { - struct inode *inode; struct dentry *parent = NULL; bool can_free = true; - - inode = dentry->d_inode; - if (inode && !spin_trylock(&inode->i_lock)) { -relock: - if (unlock_on_failure) { - spin_unlock(&dentry->d_lock); - cpu_relax(); - } - return dentry; /* try again with same dentry */ - } if (!IS_ROOT(dentry)) parent = dentry->d_parent; - if (parent && !spin_trylock(&parent->d_lock)) { - if (inode) - spin_unlock(&inode->i_lock); - goto relock; - } /* * The dentry is now unrecoverably dead to the world. @@ -514,10 +490,44 @@ relock: can_free = false; } spin_unlock(&dentry->d_lock); -out: if (likely(can_free)) dentry_free(dentry); +} + +/* + * Finish off a dentry we've decided to kill. + * dentry->d_lock must be held, returns with it unlocked. + * If ref is non-zero, then decrement the refcount too. + * Returns dentry requiring refcount drop, or NULL if we're done. + */ +static struct dentry * +dentry_kill(struct dentry *dentry, int unlock_on_failure) + __releases(dentry->d_lock) +{ + struct inode *inode = dentry->d_inode; + struct dentry *parent = NULL; + + if (inode && unlikely(!spin_trylock(&inode->i_lock))) + goto failed; + + if (!IS_ROOT(dentry)) { + parent = dentry->d_parent; + if (unlikely(!spin_trylock(&parent->d_lock))) { + if (inode) + spin_unlock(&inode->i_lock); + goto failed; + } + } + + __dentry_kill(dentry); return parent; + +failed: + if (unlock_on_failure) { + spin_unlock(&dentry->d_lock); + cpu_relax(); + } + return dentry; /* try again with same dentry */ } /* -- cgit v1.2.3 From ff2fde9929feb2aef45377ce56b8b12df85dda69 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Wed, 28 May 2014 13:59:13 -0400 Subject: expand dentry_kill(dentry, 0) in shrink_dentry_list() Result will be massaged to saner shape in the next commits. It is ugly, no questions - the point of that one is to be a provably equivalent transformation (and it might be worth splitting a bit more). Signed-off-by: Al Viro --- fs/dcache.c | 30 +++++++++++++++++------------- 1 file changed, 17 insertions(+), 13 deletions(-) diff --git a/fs/dcache.c b/fs/dcache.c index 1577c14dfb4e..c23f78a9d156 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -801,6 +801,7 @@ static void shrink_dentry_list(struct list_head *list) struct dentry *dentry, *parent; while (!list_empty(list)) { + struct inode *inode; dentry = list_entry(list->prev, struct dentry, d_lru); spin_lock(&dentry->d_lock); /* @@ -828,23 +829,26 @@ static void shrink_dentry_list(struct list_head *list) continue; } - parent = dentry_kill(dentry, 0); - /* - * If dentry_kill returns NULL, we have nothing more to do. - */ - if (!parent) - continue; - - if (unlikely(parent == dentry)) { - /* - * trylocks have failed and d_lock has been held the - * whole time, so it could not have been added to any - * other lists. Just add it back to the shrink list. - */ + inode = dentry->d_inode; + if (inode && unlikely(!spin_trylock(&inode->i_lock))) { d_shrink_add(dentry, list); spin_unlock(&dentry->d_lock); continue; } + + parent = NULL; + if (!IS_ROOT(dentry)) { + parent = dentry->d_parent; + if (unlikely(!spin_trylock(&parent->d_lock))) { + if (inode) + spin_unlock(&inode->i_lock); + d_shrink_add(dentry, list); + spin_unlock(&dentry->d_lock); + continue; + } + } + + __dentry_kill(dentry); /* * We need to prune ancestors too. This is necessary to prevent * quadratic behavior of shrink_dcache_parent(), but is also -- cgit v1.2.3 From 046b961b45f93a92e4c70525a12f3d378bced130 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Thu, 29 May 2014 08:54:52 -0400 Subject: shrink_dentry_list(): take parent's ->d_lock earlier The cause of livelocks there is that we are taking ->d_lock on dentry and its parent in the wrong order, forcing us to use trylock on the parent's one. d_walk() takes them in the right order, and unfortunately it's not hard to create a situation when shrink_dentry_list() can't make progress since trylock keeps failing, and shrink_dcache_parent() or check_submounts_and_drop() keeps calling d_walk() disrupting the very shrink_dentry_list() it's waiting for. Solution is straightforward - if that trylock fails, let's unlock the dentry itself and take locks in the right order. We need to stabilize ->d_parent without holding ->d_lock, but that's doable using RCU. And we'd better do that in the very beginning of the loop in shrink_dentry_list(), since the checks on refcount, etc. would need to be redone anyway. That deals with a half of the problem - killing dentries on the shrink list itself. Another one (dropping their parents) is in the next commit. locking parent is interesting - it would be easy to do rcu_read_lock(), lock whatever we think is a parent, lock dentry itself and check if the parent is still the right one. Except that we need to check that *before* locking the dentry, or we are risking taking ->d_lock out of order. Fortunately, once the D1 is locked, we can check if D2->d_parent is equal to D1 without the need to lock D2; D2->d_parent can start or stop pointing to D1 only under D1->d_lock, so taking D1->d_lock is enough. In other words, the right solution is rcu_read_lock/lock what looks like parent right now/check if it's still our parent/rcu_read_unlock/lock the child. Signed-off-by: Al Viro --- fs/dcache.c | 53 +++++++++++++++++++++++++++++++++++++++++------------ 1 file changed, 41 insertions(+), 12 deletions(-) diff --git a/fs/dcache.c b/fs/dcache.c index c23f78a9d156..d54a99baf4f3 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -530,6 +530,38 @@ failed: return dentry; /* try again with same dentry */ } +static inline struct dentry *lock_parent(struct dentry *dentry) +{ + struct dentry *parent = dentry->d_parent; + if (IS_ROOT(dentry)) + return NULL; + if (likely(spin_trylock(&parent->d_lock))) + return parent; + spin_unlock(&dentry->d_lock); + rcu_read_lock(); +again: + parent = ACCESS_ONCE(dentry->d_parent); + spin_lock(&parent->d_lock); + /* + * We can't blindly lock dentry until we are sure + * that we won't violate the locking order. + * Any changes of dentry->d_parent must have + * been done with parent->d_lock held, so + * spin_lock() above is enough of a barrier + * for checking if it's still our child. + */ + if (unlikely(parent != dentry->d_parent)) { + spin_unlock(&parent->d_lock); + goto again; + } + rcu_read_unlock(); + if (parent != dentry) + spin_lock(&dentry->d_lock); + else + parent = NULL; + return parent; +} + /* * This is dput * @@ -804,6 +836,8 @@ static void shrink_dentry_list(struct list_head *list) struct inode *inode; dentry = list_entry(list->prev, struct dentry, d_lru); spin_lock(&dentry->d_lock); + parent = lock_parent(dentry); + /* * The dispose list is isolated and dentries are not accounted * to the LRU here, so we can simply remove it from the list @@ -817,6 +851,8 @@ static void shrink_dentry_list(struct list_head *list) */ if ((int)dentry->d_lockref.count > 0) { spin_unlock(&dentry->d_lock); + if (parent) + spin_unlock(&parent->d_lock); continue; } @@ -824,6 +860,8 @@ static void shrink_dentry_list(struct list_head *list) if (unlikely(dentry->d_flags & DCACHE_DENTRY_KILLED)) { bool can_free = dentry->d_flags & DCACHE_MAY_FREE; spin_unlock(&dentry->d_lock); + if (parent) + spin_unlock(&parent->d_lock); if (can_free) dentry_free(dentry); continue; @@ -833,22 +871,13 @@ static void shrink_dentry_list(struct list_head *list) if (inode && unlikely(!spin_trylock(&inode->i_lock))) { d_shrink_add(dentry, list); spin_unlock(&dentry->d_lock); + if (parent) + spin_unlock(&parent->d_lock); continue; } - parent = NULL; - if (!IS_ROOT(dentry)) { - parent = dentry->d_parent; - if (unlikely(!spin_trylock(&parent->d_lock))) { - if (inode) - spin_unlock(&inode->i_lock); - d_shrink_add(dentry, list); - spin_unlock(&dentry->d_lock); - continue; - } - } - __dentry_kill(dentry); + /* * We need to prune ancestors too. This is necessary to prevent * quadratic behavior of shrink_dcache_parent(), but is also -- cgit v1.2.3 From b2b80195d8829921506880f6dccd21cabd163d0d Mon Sep 17 00:00:00 2001 From: Al Viro Date: Thu, 29 May 2014 09:11:45 -0400 Subject: dealing with the rest of shrink_dentry_list() livelock We have the same problem with ->d_lock order in the inner loop, where we are dropping references to ancestors. Same solution, basically - instead of using dentry_kill() we use lock_parent() (introduced in the previous commit) to get that lock in a safe way, recheck ->d_count (in case if lock_parent() has ended up dropping and retaking ->d_lock and somebody managed to grab a reference during that window), trylock the inode->i_lock and use __dentry_kill() to do the rest. Signed-off-by: Al Viro --- fs/dcache.c | 22 ++++++++++++++++++++-- 1 file changed, 20 insertions(+), 2 deletions(-) diff --git a/fs/dcache.c b/fs/dcache.c index d54a99baf4f3..eb7c7255470c 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -885,8 +885,26 @@ static void shrink_dentry_list(struct list_head *list) * fragmentation. */ dentry = parent; - while (dentry && !lockref_put_or_lock(&dentry->d_lockref)) - dentry = dentry_kill(dentry, 1); + while (dentry && !lockref_put_or_lock(&dentry->d_lockref)) { + parent = lock_parent(dentry); + if (dentry->d_lockref.count != 1) { + dentry->d_lockref.count--; + spin_unlock(&dentry->d_lock); + if (parent) + spin_unlock(&parent->d_lock); + break; + } + inode = dentry->d_inode; /* can't be NULL */ + if (unlikely(!spin_trylock(&inode->i_lock))) { + spin_unlock(&dentry->d_lock); + if (parent) + spin_unlock(&parent->d_lock); + cpu_relax(); + continue; + } + __dentry_kill(dentry); + dentry = parent; + } } } -- cgit v1.2.3 From 8cbf74da435d1bd13dbb790f94c7ff67b2fb6af4 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Thu, 29 May 2014 09:18:26 -0400 Subject: dentry_kill() doesn't need the second argument now it's 1 in the only remaining caller. Signed-off-by: Al Viro --- fs/dcache.c | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/fs/dcache.c b/fs/dcache.c index eb7c7255470c..bce851dc03ef 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -500,8 +500,7 @@ static void __dentry_kill(struct dentry *dentry) * If ref is non-zero, then decrement the refcount too. * Returns dentry requiring refcount drop, or NULL if we're done. */ -static struct dentry * -dentry_kill(struct dentry *dentry, int unlock_on_failure) +static struct dentry *dentry_kill(struct dentry *dentry) __releases(dentry->d_lock) { struct inode *inode = dentry->d_inode; @@ -523,10 +522,8 @@ dentry_kill(struct dentry *dentry, int unlock_on_failure) return parent; failed: - if (unlock_on_failure) { - spin_unlock(&dentry->d_lock); - cpu_relax(); - } + spin_unlock(&dentry->d_lock); + cpu_relax(); return dentry; /* try again with same dentry */ } @@ -615,7 +612,7 @@ repeat: return; kill_it: - dentry = dentry_kill(dentry, 1); + dentry = dentry_kill(dentry); if (dentry) goto repeat; } -- cgit v1.2.3