summaryrefslogtreecommitdiff
path: root/mm/workingset.c
diff options
context:
space:
mode:
Diffstat (limited to 'mm/workingset.c')
-rw-r--r--mm/workingset.c114
1 files changed, 77 insertions, 37 deletions
diff --git a/mm/workingset.c b/mm/workingset.c
index fb1f9183d89a..241fa5d6b3b2 100644
--- a/mm/workingset.c
+++ b/mm/workingset.c
@@ -10,6 +10,7 @@
#include <linux/atomic.h>
#include <linux/module.h>
#include <linux/swap.h>
+#include <linux/dax.h>
#include <linux/fs.h>
#include <linux/mm.h>
@@ -334,48 +335,81 @@ out:
* point where they would still be useful.
*/
-struct list_lru workingset_shadow_nodes;
+static struct list_lru shadow_nodes;
+
+void workingset_update_node(struct radix_tree_node *node, void *private)
+{
+ struct address_space *mapping = private;
+
+ /* Only regular page cache has shadow entries */
+ if (dax_mapping(mapping) || shmem_mapping(mapping))
+ return;
+
+ /*
+ * Track non-empty nodes that contain only shadow entries;
+ * unlink those that contain pages or are being freed.
+ *
+ * Avoid acquiring the list_lru lock when the nodes are
+ * already where they should be. The list_empty() test is safe
+ * as node->private_list is protected by &mapping->tree_lock.
+ */
+ if (node->count && node->count == node->exceptional) {
+ if (list_empty(&node->private_list)) {
+ node->private_data = mapping;
+ list_lru_add(&shadow_nodes, &node->private_list);
+ }
+ } else {
+ if (!list_empty(&node->private_list))
+ list_lru_del(&shadow_nodes, &node->private_list);
+ }
+}
static unsigned long count_shadow_nodes(struct shrinker *shrinker,
struct shrink_control *sc)
{
- unsigned long shadow_nodes;
unsigned long max_nodes;
- unsigned long pages;
+ unsigned long nodes;
+ unsigned long cache;
/* list_lru lock nests inside IRQ-safe mapping->tree_lock */
local_irq_disable();
- shadow_nodes = list_lru_shrink_count(&workingset_shadow_nodes, sc);
+ nodes = list_lru_shrink_count(&shadow_nodes, sc);
local_irq_enable();
- if (sc->memcg) {
- pages = mem_cgroup_node_nr_lru_pages(sc->memcg, sc->nid,
- LRU_ALL_FILE);
- } else {
- pages = node_page_state(NODE_DATA(sc->nid), NR_ACTIVE_FILE) +
- node_page_state(NODE_DATA(sc->nid), NR_INACTIVE_FILE);
- }
-
/*
- * Active cache pages are limited to 50% of memory, and shadow
- * entries that represent a refault distance bigger than that
- * do not have any effect. Limit the number of shadow nodes
- * such that shadow entries do not exceed the number of active
- * cache pages, assuming a worst-case node population density
- * of 1/8th on average.
+ * Approximate a reasonable limit for the radix tree nodes
+ * containing shadow entries. We don't need to keep more
+ * shadow entries than possible pages on the active list,
+ * since refault distances bigger than that are dismissed.
+ *
+ * The size of the active list converges toward 100% of
+ * overall page cache as memory grows, with only a tiny
+ * inactive list. Assume the total cache size for that.
+ *
+ * Nodes might be sparsely populated, with only one shadow
+ * entry in the extreme case. Obviously, we cannot keep one
+ * node for every eligible shadow entry, so compromise on a
+ * worst-case density of 1/8th. Below that, not all eligible
+ * refaults can be detected anymore.
*
* On 64-bit with 7 radix_tree_nodes per page and 64 slots
* each, this will reclaim shadow entries when they consume
- * ~2% of available memory:
+ * ~1.8% of available memory:
*
- * PAGE_SIZE / radix_tree_nodes / node_entries / PAGE_SIZE
+ * PAGE_SIZE / radix_tree_nodes / node_entries * 8 / PAGE_SIZE
*/
- max_nodes = pages >> (1 + RADIX_TREE_MAP_SHIFT - 3);
+ if (sc->memcg) {
+ cache = mem_cgroup_node_nr_lru_pages(sc->memcg, sc->nid,
+ LRU_ALL_FILE);
+ } else {
+ cache = node_page_state(NODE_DATA(sc->nid), NR_ACTIVE_FILE) +
+ node_page_state(NODE_DATA(sc->nid), NR_INACTIVE_FILE);
+ }
+ max_nodes = cache >> (RADIX_TREE_MAP_SHIFT - 3);
- if (shadow_nodes <= max_nodes)
+ if (nodes <= max_nodes)
return 0;
-
- return shadow_nodes - max_nodes;
+ return nodes - max_nodes;
}
static enum lru_status shadow_lru_isolate(struct list_head *item,
@@ -418,23 +452,30 @@ static enum lru_status shadow_lru_isolate(struct list_head *item,
* no pages, so we expect to be able to remove them all and
* delete and free the empty node afterwards.
*/
- BUG_ON(!workingset_node_shadows(node));
- BUG_ON(workingset_node_pages(node));
-
+ if (WARN_ON_ONCE(!node->exceptional))
+ goto out_invalid;
+ if (WARN_ON_ONCE(node->count != node->exceptional))
+ goto out_invalid;
for (i = 0; i < RADIX_TREE_MAP_SIZE; i++) {
if (node->slots[i]) {
- BUG_ON(!radix_tree_exceptional_entry(node->slots[i]));
+ if (WARN_ON_ONCE(!radix_tree_exceptional_entry(node->slots[i])))
+ goto out_invalid;
+ if (WARN_ON_ONCE(!node->exceptional))
+ goto out_invalid;
+ if (WARN_ON_ONCE(!mapping->nrexceptional))
+ goto out_invalid;
node->slots[i] = NULL;
- workingset_node_shadows_dec(node);
- BUG_ON(!mapping->nrexceptional);
+ node->exceptional--;
+ node->count--;
mapping->nrexceptional--;
}
}
- BUG_ON(workingset_node_shadows(node));
+ if (WARN_ON_ONCE(node->exceptional))
+ goto out_invalid;
inc_node_state(page_pgdat(virt_to_page(node)), WORKINGSET_NODERECLAIM);
- if (!__radix_tree_delete_node(&mapping->page_tree, node))
- BUG();
+ __radix_tree_delete_node(&mapping->page_tree, node);
+out_invalid:
spin_unlock(&mapping->tree_lock);
ret = LRU_REMOVED_RETRY;
out:
@@ -452,8 +493,7 @@ static unsigned long scan_shadow_nodes(struct shrinker *shrinker,
/* list_lru lock nests inside IRQ-safe mapping->tree_lock */
local_irq_disable();
- ret = list_lru_shrink_walk(&workingset_shadow_nodes, sc,
- shadow_lru_isolate, NULL);
+ ret = list_lru_shrink_walk(&shadow_nodes, sc, shadow_lru_isolate, NULL);
local_irq_enable();
return ret;
}
@@ -492,7 +532,7 @@ static int __init workingset_init(void)
pr_info("workingset: timestamp_bits=%d max_order=%d bucket_order=%u\n",
timestamp_bits, max_order, bucket_order);
- ret = list_lru_init_key(&workingset_shadow_nodes, &shadow_nodes_key);
+ ret = list_lru_init_key(&shadow_nodes, &shadow_nodes_key);
if (ret)
goto err;
ret = register_shrinker(&workingset_shadow_shrinker);
@@ -500,7 +540,7 @@ static int __init workingset_init(void)
goto err_list_lru;
return 0;
err_list_lru:
- list_lru_destroy(&workingset_shadow_nodes);
+ list_lru_destroy(&shadow_nodes);
err:
return ret;
}