diff --git a/arch/Kconfig b/arch/Kconfig index 3cc5024ca0f3..e2d8bb4b4a30 100644 --- a/arch/Kconfig +++ b/arch/Kconfig @@ -1167,7 +1167,6 @@ config ARCH_USE_MEMREMAP_PROT config ARCH_HAS_NONLEAF_PMD_YOUNG bool - depends on PGTABLE_LEVELS > 2 help Architectures that select this option are capable of setting the accessed bit in non-leaf PMD entries when using them as part of linear diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 3ba4b9a50d59..a7abd5445e89 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -57,7 +57,7 @@ config X86 select ARCH_HAS_PMEM_API if X86_64 # Causing hangs/crashes, see the commit that added this change for details. select ARCH_HAS_REFCOUNT - select ARCH_HAS_NONLEAF_PMD_YOUNG + select ARCH_HAS_NONLEAF_PMD_YOUNG if PGTABLE_LEVELS > 2 select ARCH_HAS_UACCESS_FLUSHCACHE if X86_64 select ARCH_HAS_SET_MEMORY select ARCH_HAS_SG_CHAIN diff --git a/include/linux/mm_inline.h b/include/linux/mm_inline.h index 0739dea3518a..9e0a84f3717e 100644 --- a/include/linux/mm_inline.h +++ b/include/linux/mm_inline.h @@ -5,6 +5,17 @@ #include #include +#ifndef try_cmpxchg +#define try_cmpxchg(_ptr, _oldp, _new) \ +({ \ + typeof(*(_ptr)) *___op = (_oldp), ___o = *___op, ___r; \ + ___r = cmpxchg((_ptr), ___o, (_new)); \ + if (unlikely(___r != ___o)) \ + *___op = ___r; \ + likely(___r == ___o); \ +}) +#endif /* try_cmpxchg */ + /** * page_is_file_cache - should the page be on a file LRU or anon LRU? * @page: the page to test @@ -121,17 +132,38 @@ static inline int lru_hist_from_seq(unsigned long seq) static inline int lru_tier_from_refs(int refs) { - VM_BUG_ON(refs > BIT(LRU_REFS_WIDTH)); + VM_WARN_ON_ONCE(refs > BIT(LRU_REFS_WIDTH)); - /* see the comment on MAX_NR_TIERS */ + /* see the comment in page_lru_refs() */ return order_base_2(refs + 1); } +static inline int page_lru_refs(struct page *page) +{ + unsigned long flags = READ_ONCE(page->flags); + bool workingset = flags & BIT(PG_workingset); + + /* + * Return the number of accesses beyond PG_referenced, i.e., N-1 if the + * total number of accesses is N>1, since N=0,1 both map to the first + * tier. lru_tier_from_refs() will account for this off-by-one. Also see + * the comment on MAX_NR_TIERS. + */ + return ((flags & LRU_REFS_MASK) >> LRU_REFS_PGOFF) + workingset; +} + +static inline int page_lru_gen(struct page *page) +{ + unsigned long flags = READ_ONCE(page->flags); + + return ((flags & LRU_GEN_MASK) >> LRU_GEN_PGOFF) - 1; +} + static inline bool lru_gen_is_active(struct lruvec *lruvec, int gen) { unsigned long max_seq = lruvec->lrugen.max_seq; - VM_BUG_ON(gen >= MAX_NR_GENS); + VM_WARN_ON_ONCE(gen >= MAX_NR_GENS); /* see the comment on MIN_NR_GENS */ return gen == lru_gen_from_seq(max_seq) || gen == lru_gen_from_seq(max_seq - 1); @@ -146,9 +178,9 @@ static inline void lru_gen_update_size(struct lruvec *lruvec, struct page *page, enum lru_list lru = type * LRU_INACTIVE_FILE; struct lru_gen_struct *lrugen = &lruvec->lrugen; - VM_BUG_ON(old_gen != -1 && old_gen >= MAX_NR_GENS); - VM_BUG_ON(new_gen != -1 && new_gen >= MAX_NR_GENS); - VM_BUG_ON(old_gen == -1 && new_gen == -1); + VM_WARN_ON_ONCE(old_gen != -1 && old_gen >= MAX_NR_GENS); + VM_WARN_ON_ONCE(new_gen != -1 && new_gen >= MAX_NR_GENS); + VM_WARN_ON_ONCE(old_gen == -1 && new_gen == -1); if (old_gen >= 0) WRITE_ONCE(lrugen->nr_pages[old_gen][type][zone], @@ -180,17 +212,19 @@ static inline void lru_gen_update_size(struct lruvec *lruvec, struct page *page, } /* demotion requires isolation, e.g., lru_deactivate_fn() */ - VM_BUG_ON(lru_gen_is_active(lruvec, old_gen) && !lru_gen_is_active(lruvec, new_gen)); + VM_WARN_ON_ONCE(lru_gen_is_active(lruvec, old_gen) && !lru_gen_is_active(lruvec, new_gen)); } static inline bool lru_gen_add_page(struct lruvec *lruvec, struct page *page, bool reclaiming) { - int gen; - unsigned long old_flags, new_flags; + unsigned long mask, flags; + int gen = page_lru_gen(page); int type = page_is_file_cache(page); int zone = page_zonenum(page); struct lru_gen_struct *lrugen = &lruvec->lrugen; + VM_WARN_ON_ONCE_PAGE(gen != -1, page); + if (PageUnevictable(page) || !lrugen->enabled) return false; /* @@ -210,14 +244,10 @@ static inline bool lru_gen_add_page(struct lruvec *lruvec, struct page *page, bo else gen = lru_gen_from_seq(lrugen->min_seq[type]); - do { - new_flags = old_flags = READ_ONCE(page->flags); - VM_BUG_ON_PAGE(new_flags & LRU_GEN_MASK, page); - - /* see the comment on MIN_NR_GENS */ - new_flags &= ~(LRU_GEN_MASK | BIT(PG_active)); - new_flags |= (gen + 1UL) << LRU_GEN_PGOFF; - } while (cmpxchg(&page->flags, old_flags, new_flags) != old_flags); + /* see the comment on MIN_NR_GENS */ + mask = LRU_GEN_MASK | BIT(PG_active); + flags = (gen + 1UL) << LRU_GEN_PGOFF; + set_mask_bits(&page->flags, mask, flags); lru_gen_update_size(lruvec, page, -1, gen); /* for rotate_reclaimable_page() */ @@ -231,28 +261,25 @@ static inline bool lru_gen_add_page(struct lruvec *lruvec, struct page *page, bo static inline bool lru_gen_del_page(struct lruvec *lruvec, struct page *page, bool reclaiming) { - int gen; - unsigned long old_flags, new_flags; + unsigned long mask, flags; + int gen = page_lru_gen(page); - do { - new_flags = old_flags = READ_ONCE(page->flags); - if (!(new_flags & LRU_GEN_MASK)) - return false; + if (gen < 0) + return false; - VM_BUG_ON_PAGE(PageActive(page), page); - VM_BUG_ON_PAGE(PageUnevictable(page), page); + VM_WARN_ON_ONCE_PAGE(PageActive(page), page); + VM_WARN_ON_ONCE_PAGE(PageUnevictable(page), page); - gen = ((new_flags & LRU_GEN_MASK) >> LRU_GEN_PGOFF) - 1; + mask = LRU_GEN_MASK; + flags = 0; + /* for shrink_page_list() or page_migrate_flags() */ + if (reclaiming) + mask |= BIT(PG_referenced) | BIT(PG_reclaim); + else if (lru_gen_is_active(lruvec, gen)) + flags |= BIT(PG_active); - new_flags &= ~LRU_GEN_MASK; - if (!(new_flags & BIT(PG_referenced))) - new_flags &= ~(LRU_REFS_MASK | LRU_REFS_FLAGS); - /* for shrink_page_list() */ - if (reclaiming) - new_flags &= ~(BIT(PG_referenced) | BIT(PG_reclaim)); - else if (lru_gen_is_active(lruvec, gen)) - new_flags |= BIT(PG_active); - } while (cmpxchg(&page->flags, old_flags, new_flags) != old_flags); + flags = set_mask_bits(&page->flags, mask, flags); + gen = ((flags & LRU_GEN_MASK) >> LRU_GEN_PGOFF) - 1; lru_gen_update_size(lruvec, page, gen, -1); list_del(&page->lru); diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index 4251f6167f3f..ebfe2e9d4ae7 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -15,7 +15,6 @@ #include #include #include -#include #include #include @@ -542,7 +541,7 @@ struct mm_struct { * whether it has been used since the last time per-node * page table walkers cleared the corresponding bits. */ - nodemask_t nodes; + unsigned long bitmap; } lru_gen; #endif /* CONFIG_LRU_GEN */ } __randomize_layout; @@ -584,16 +583,16 @@ static inline void lru_gen_init_mm(struct mm_struct *mm) #ifdef CONFIG_MEMCG mm->lru_gen.memcg = NULL; #endif - nodes_clear(mm->lru_gen.nodes); + mm->lru_gen.bitmap = 0; } static inline void lru_gen_use_mm(struct mm_struct *mm) { /* unlikely but not a bug when racing with lru_gen_migrate_mm() */ - VM_WARN_ON(list_empty(&mm->lru_gen.list)); + VM_WARN_ON_ONCE(list_empty(&mm->lru_gen.list)); - if (!(current->flags & PF_KTHREAD) && !nodes_full(mm->lru_gen.nodes)) - nodes_setall(mm->lru_gen.nodes); + if (!(current->flags & PF_KTHREAD)) + WRITE_ONCE(mm->lru_gen.bitmap, -1); } #else /* !CONFIG_LRU_GEN */ diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index 445d83da641b..42dbc362f652 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -250,9 +250,9 @@ struct zone_reclaim_stat { * Evictable pages are divided into multiple generations. The youngest and the * oldest generation numbers, max_seq and min_seq, are monotonically increasing. * They form a sliding window of a variable size [MIN_NR_GENS, MAX_NR_GENS]. An - * offset within MAX_NR_GENS, gen, indexes the LRU list of the corresponding - * generation. The gen counter in page->flags stores gen+1 while a page is on - * one of lrugen->lists[]. Otherwise it stores 0. + * offset within MAX_NR_GENS, i.e., gen, indexes the LRU list of the + * corresponding generation. The gen counter in page->flags stores gen+1 while + * a page is on one of lrugen->lists[]. Otherwise it stores 0. * * A page is added to the youngest generation on faulting. The aging needs to * check the accessed bit at least twice before handing this page over to the @@ -260,16 +260,17 @@ struct zone_reclaim_stat { * fault; the second check makes sure this page hasn't been used since then. * This process, AKA second chance, requires a minimum of two generations, * hence MIN_NR_GENS. And to maintain ABI compatibility with the active/inactive - * LRU, these two generations are considered active; the rest of generations, if - * they exist, are considered inactive. See lru_gen_is_active(). PG_active is - * always cleared while a page is on one of lrugen->lists[] so that the aging - * needs not to worry about it. And it's set again when a page considered active - * is isolated for non-reclaiming purposes, e.g., migration. See - * lru_gen_add_page() and lru_gen_del_page(). + * LRU, e.g., /proc/vmstat, these two generations are considered active; the + * rest of generations, if they exist, are considered inactive. See + * lru_gen_is_active(). PG_active is always cleared while a page is on one of + * lrugen->lists[] so that the aging needs not to worry about it. And it's set + * again when a page considered active is isolated for non-reclaiming purposes, + * e.g., migration. See lru_gen_add_page() and lru_gen_del_page(). * - * MAX_NR_GENS is set to 4 so that the multi-gen LRU can support twice of the - * categories of the active/inactive LRU when keeping track of accesses through - * page tables. It requires order_base_2(MAX_NR_GENS+1) bits in page->flags. + * MAX_NR_GENS is set to 4 so that the multi-gen LRU can support twice the + * number of categories of the active/inactive LRU when keeping track of + * accesses through page tables. It requires order_base_2(MAX_NR_GENS+1) bits in + * page->flags (LRU_GEN_MASK). */ #define MIN_NR_GENS 2U #define MAX_NR_GENS 4U @@ -280,18 +281,20 @@ struct zone_reclaim_stat { * times through file descriptors is in tier order_base_2(N). A page in the * first tier (N=0,1) is marked by PG_referenced unless it was faulted in * though page tables or read ahead. A page in any other tier (N>1) is marked - * by PG_referenced and PG_workingset. + * by PG_referenced and PG_workingset. This implies a minimum of two tiers is + * supported without using additional bits in page->flags. * * In contrast to moving across generations which requires the LRU lock, moving - * across tiers only requires operations on page->flags and therefore has a - * negligible cost in the buffered access path. In the eviction path, + * across tiers only involves atomic operations on page->flags and therefore + * has a negligible cost in the buffered access path. In the eviction path, * comparisons of refaulted/(evicted+protected) from the first tier and the * rest infer whether pages accessed multiple times through file descriptors * are statistically hot and thus worth protecting. * - * MAX_NR_TIERS is set to 4 so that the multi-gen LRU can support twice of the - * categories of the active/inactive LRU when keeping track of accesses through - * file descriptors. It requires MAX_NR_TIERS-2 additional bits in page->flags. + * MAX_NR_TIERS is set to 4 so that the multi-gen LRU can support twice the + * number of categories of the active/inactive LRU when keeping track of + * accesses through file descriptors. It uses MAX_NR_TIERS-2 spare bits in + * page->flags (LRU_REFS_MASK). */ #define MAX_NR_TIERS 4U @@ -303,7 +306,6 @@ struct page_vma_mapped_walk; #define LRU_GEN_MASK ((BIT(LRU_GEN_WIDTH) - 1) << LRU_GEN_PGOFF) #define LRU_REFS_MASK ((BIT(LRU_REFS_WIDTH) - 1) << LRU_REFS_PGOFF) -#define LRU_REFS_FLAGS (BIT(PG_referenced) | BIT(PG_workingset)) #ifdef CONFIG_LRU_GEN @@ -338,6 +340,9 @@ enum { * Normally anon and file min_seq are in sync. But if swapping is constrained, * e.g., out of swap space, file min_seq is allowed to advance and leave anon * min_seq behind. + * + * nr_pages[] are eventually consistent and therefore can be transiently + * negative when reset_batch_size() is pending. */ struct lru_gen_struct { /* the aging increments the youngest generation number */ @@ -349,7 +354,7 @@ struct lru_gen_struct { /* the multi-gen LRU lists */ struct list_head lists[MAX_NR_GENS][ANON_AND_FILE][MAX_NR_ZONES]; /* the sizes of the above lists */ - unsigned long nr_pages[MAX_NR_GENS][ANON_AND_FILE][MAX_NR_ZONES]; + long nr_pages[MAX_NR_GENS][ANON_AND_FILE][MAX_NR_ZONES]; /* the exponential moving average of refaulted */ unsigned long avg_refaulted[ANON_AND_FILE][MAX_NR_TIERS]; /* the exponential moving average of evicted+protected */ @@ -412,7 +417,7 @@ struct lru_gen_mm_walk { /* total batched items */ int batched; bool can_swap; - bool full_scan; + bool force_scan; }; void lru_gen_init_lruvec(struct lruvec *lruvec); diff --git a/include/linux/page-flags-layout.h b/include/linux/page-flags-layout.h index b1807016f660..60fe1d91e275 100644 --- a/include/linux/page-flags-layout.h +++ b/include/linux/page-flags-layout.h @@ -55,7 +55,7 @@ #define ZONES_WIDTH ZONES_SHIFT -#if SECTIONS_WIDTH+ZONES_WIDTH+NODES_SHIFT+LRU_GEN_WIDTH+LRU_REFS_WIDTH \ +#if SECTIONS_WIDTH+ZONES_WIDTH+NODES_SHIFT+LRU_GEN_WIDTH \ <= BITS_PER_LONG - NR_PAGEFLAGS #define NODES_WIDTH NODES_SHIFT #else @@ -77,7 +77,7 @@ #define LAST_CPUPID_SHIFT 0 #endif -#if SECTIONS_WIDTH+ZONES_WIDTH+NODES_WIDTH+LRU_GEN_WIDTH+LRU_REFS_WIDTH+ \ +#if SECTIONS_WIDTH+ZONES_WIDTH+NODES_WIDTH+LRU_GEN_WIDTH+ \ LAST_CPUPID_SHIFT <= BITS_PER_LONG - NR_PAGEFLAGS #define LAST_CPUPID_WIDTH LAST_CPUPID_SHIFT #else @@ -86,7 +86,7 @@ #ifdef CONFIG_KASAN_SW_TAGS #define KASAN_TAG_WIDTH 8 -#if SECTIONS_WIDTH+ZONES_WIDTH+NODES_WIDTH+LRU_GEN_WIDTH+LRU_REFS_WIDTH+ \ +#if SECTIONS_WIDTH+ZONES_WIDTH+NODES_WIDTH+LRU_GEN_WIDTH+ \ KASAN_TAG_WIDTH + LAST_CPUPID_WIDTH > BITS_PER_LONG - NR_PAGEFLAGS #error "KASAN: not enough bits in page flags for tag" #endif @@ -106,4 +106,9 @@ #define LAST_CPUPID_NOT_IN_PAGE_FLAGS #endif +/* see the comment on MAX_NR_TIERS */ +#define LRU_REFS_WIDTH min(__LRU_REFS_WIDTH, BITS_PER_LONG - NR_PAGEFLAGS - \ + ZONES_WIDTH - LRU_GEN_WIDTH - SECTIONS_WIDTH - \ + NODES_WIDTH - KASAN_TAG_WIDTH - LAST_CPUPID_WIDTH) + #endif /* _LINUX_PAGE_FLAGS_LAYOUT */ diff --git a/kernel/bounds.c b/kernel/bounds.c index 10dd9e6b03e5..b529182e8b04 100644 --- a/kernel/bounds.c +++ b/kernel/bounds.c @@ -24,10 +24,10 @@ int main(void) DEFINE(SPINLOCK_SIZE, sizeof(spinlock_t)); #ifdef CONFIG_LRU_GEN DEFINE(LRU_GEN_WIDTH, order_base_2(MAX_NR_GENS + 1)); - DEFINE(LRU_REFS_WIDTH, MAX_NR_TIERS - 2); + DEFINE(__LRU_REFS_WIDTH, MAX_NR_TIERS - 2); #else DEFINE(LRU_GEN_WIDTH, 0); - DEFINE(LRU_REFS_WIDTH, 0); + DEFINE(__LRU_REFS_WIDTH, 0); #endif /* End of constants */ diff --git a/mm/Kconfig b/mm/Kconfig index 0d14e6514ac0..e18038b2aaa2 100644 --- a/mm/Kconfig +++ b/mm/Kconfig @@ -852,8 +852,8 @@ config FORCE_ALLOC_FROM_DMA_ZONE config LRU_GEN bool "Multi-Gen LRU" depends on MMU - # the following options can use up the spare bits in page flags - depends on !MAXSMP && (64BIT || !SPARSEMEM || SPARSEMEM_VMEMMAP) + # make sure page->flags has enough spare bits + depends on 64BIT || !SPARSEMEM || SPARSEMEM_VMEMMAP help A high performance LRU implementation to overcommit memory. diff --git a/mm/memcontrol.c b/mm/memcontrol.c index e5642cb495e7..c031c74d9e15 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -5241,9 +5241,10 @@ static void mem_cgroup_move_task(void) #ifdef CONFIG_LRU_GEN static void mem_cgroup_attach(struct cgroup_taskset *tset) { + struct task_struct *task; struct cgroup_subsys_state *css; - struct task_struct *task = NULL; + /* find the first leader if there is any */ cgroup_taskset_for_each_leader(task, css, tset) break; diff --git a/mm/swap.c b/mm/swap.c index 71fb96142046..2ad918ce7fb9 100644 --- a/mm/swap.c +++ b/mm/swap.c @@ -364,33 +364,30 @@ static void __lru_cache_activate_page(struct page *page) #ifdef CONFIG_LRU_GEN static void page_inc_refs(struct page *page) { - unsigned long refs; - unsigned long old_flags, new_flags; + unsigned long new_flags, old_flags = READ_ONCE(page->flags); if (PageUnevictable(page)) return; + if (!PageReferenced(page)) { + SetPageReferenced(page); + return; + } + + if (!PageWorkingset(page)) { + SetPageWorkingset(page); + return; + } + /* see the comment on MAX_NR_TIERS */ do { - new_flags = old_flags = READ_ONCE(page->flags); + new_flags = old_flags & LRU_REFS_MASK; + if (new_flags == LRU_REFS_MASK) + break; - if (!(new_flags & BIT(PG_referenced))) { - new_flags |= BIT(PG_referenced); - continue; - } - - if (!(new_flags & BIT(PG_workingset))) { - new_flags |= BIT(PG_workingset); - continue; - } - - refs = new_flags & LRU_REFS_MASK; - refs = min(refs + BIT(LRU_REFS_PGOFF), LRU_REFS_MASK); - - new_flags &= ~LRU_REFS_MASK; - new_flags |= refs; - } while (new_flags != old_flags && - cmpxchg(&page->flags, old_flags, new_flags) != old_flags); + new_flags += BIT(LRU_REFS_PGOFF); + new_flags |= old_flags & ~LRU_REFS_MASK; + } while (!try_cmpxchg(&page->flags, &old_flags, new_flags)); } #else static void page_inc_refs(struct page *page) diff --git a/mm/vmscan.c b/mm/vmscan.c index 5c1ccc130fa8..8cea7bcf5cee 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -2497,6 +2497,8 @@ DEFINE_STATIC_KEY_ARRAY_FALSE(lru_gen_caps, NR_LRU_GEN_CAPS); * shorthand helpers ******************************************************************************/ +#define LRU_REFS_FLAGS (BIT(PG_referenced) | BIT(PG_workingset)) + #define DEFINE_MAX_SEQ(lruvec) \ unsigned long max_seq = READ_ONCE((lruvec)->lrugen.max_seq) @@ -2511,24 +2513,6 @@ DEFINE_STATIC_KEY_ARRAY_FALSE(lru_gen_caps, NR_LRU_GEN_CAPS); for ((type) = 0; (type) < ANON_AND_FILE; (type)++) \ for ((zone) = 0; (zone) < MAX_NR_ZONES; (zone)++) -static int page_lru_gen(struct page *page) -{ - unsigned long flags = READ_ONCE(page->flags); - - return ((flags & LRU_GEN_MASK) >> LRU_GEN_PGOFF) - 1; -} - -static int page_lru_tier(struct page *page) -{ - int refs; - unsigned long flags = READ_ONCE(page->flags); - - refs = (flags & LRU_REFS_FLAGS) == LRU_REFS_FLAGS ? - ((flags & LRU_REFS_MASK) >> LRU_REFS_PGOFF) + 1 : 0; - - return lru_tier_from_refs(refs); -} - static bool get_cap(int cap) { #ifdef CONFIG_LRU_GEN_ENABLED @@ -2553,7 +2537,7 @@ static struct lruvec *get_lruvec(struct mem_cgroup *memcg, int nid) return lruvec; } #endif - VM_BUG_ON(!mem_cgroup_disabled()); + VM_WARN_ON_ONCE(!mem_cgroup_disabled()); return pgdat ? &pgdat->lruvec : NULL; } @@ -2596,7 +2580,7 @@ static struct lru_gen_mm_list *get_mm_list(struct mem_cgroup *memcg) if (memcg) return &memcg->mm_list; #endif - VM_BUG_ON(!mem_cgroup_disabled()); + VM_WARN_ON_ONCE(!mem_cgroup_disabled()); return &mm_list; } @@ -2607,9 +2591,9 @@ void lru_gen_add_mm(struct mm_struct *mm) struct mem_cgroup *memcg = get_mem_cgroup_from_mm(mm); struct lru_gen_mm_list *mm_list = get_mm_list(memcg); - VM_BUG_ON_MM(!list_empty(&mm->lru_gen.list), mm); + VM_WARN_ON_ONCE(!list_empty(&mm->lru_gen.list)); #ifdef CONFIG_MEMCG - VM_BUG_ON_MM(mm->lru_gen.memcg, mm); + VM_WARN_ON_ONCE(mm->lru_gen.memcg); mm->lru_gen.memcg = memcg; #endif spin_lock(&mm_list->lock); @@ -2684,13 +2668,13 @@ void lru_gen_migrate_mm(struct mm_struct *mm) return; rcu_read_lock(); - memcg = mem_cgroup_from_task(mm->owner); + memcg = mem_cgroup_from_task(rcu_dereference(mm->owner)); rcu_read_unlock(); if (memcg == mm->lru_gen.memcg) return; - VM_BUG_ON_MM(!mm->lru_gen.memcg, mm); - VM_BUG_ON_MM(list_empty(&mm->lru_gen.list), mm); + VM_WARN_ON_ONCE(!mm->lru_gen.memcg); + VM_WARN_ON_ONCE(list_empty(&mm->lru_gen.list)); lru_gen_del_mm(mm); lru_gen_add_mm(mm); @@ -2816,12 +2800,13 @@ static bool should_skip_mm(struct mm_struct *mm, struct lru_gen_mm_walk *walk) int type; unsigned long size = 0; struct pglist_data *pgdat = lruvec_pgdat(walk->lruvec); + int key = pgdat->node_id % BITS_PER_TYPE(mm->lru_gen.bitmap); - if (!walk->full_scan && cpumask_empty(mm_cpumask(mm)) && - !node_isset(pgdat->node_id, mm->lru_gen.nodes)) + if (!walk->force_scan && cpumask_empty(mm_cpumask(mm)) && + !test_bit(key, &mm->lru_gen.bitmap)) return true; - node_clear(pgdat->node_id, mm->lru_gen.nodes); + clear_bit(key, &mm->lru_gen.bitmap); for (type = !walk->can_swap; type < ANON_AND_FILE; type++) { size += type ? get_mm_counter(mm, MM_FILEPAGES) : @@ -2866,9 +2851,9 @@ static bool iterate_mm_list(struct lruvec *lruvec, struct lru_gen_mm_walk *walk, spin_lock(&mm_list->lock); - VM_BUG_ON(mm_state->seq + 1 < walk->max_seq); - VM_BUG_ON(*iter && mm_state->seq > walk->max_seq); - VM_BUG_ON(*iter && !mm_state->nr_walkers); + VM_WARN_ON_ONCE(mm_state->seq + 1 < walk->max_seq); + VM_WARN_ON_ONCE(*iter && mm_state->seq > walk->max_seq); + VM_WARN_ON_ONCE(*iter && !mm_state->nr_walkers); if (walk->max_seq <= mm_state->seq) { if (!*iter) @@ -2877,7 +2862,7 @@ static bool iterate_mm_list(struct lruvec *lruvec, struct lru_gen_mm_walk *walk, } if (!mm_state->nr_walkers) { - VM_BUG_ON(mm_state->head && mm_state->head != &mm_list->fifo); + VM_WARN_ON_ONCE(mm_state->head && mm_state->head != &mm_list->fifo); mm_state->head = mm_list->fifo.next; first = true; @@ -2888,10 +2873,10 @@ static bool iterate_mm_list(struct lruvec *lruvec, struct lru_gen_mm_walk *walk, mm_state->head = mm_state->head->next; - /* full scan for those added after the last iteration */ + /* force scan for those added after the last iteration */ if (!mm_state->tail || mm_state->tail == &mm->lru_gen.list) { mm_state->tail = mm_state->head; - walk->full_scan = true; + walk->force_scan = true; } if (should_skip_mm(mm, walk)) @@ -2934,10 +2919,10 @@ static bool iterate_mm_list_nowalk(struct lruvec *lruvec, unsigned long max_seq) spin_lock(&mm_list->lock); - VM_BUG_ON(mm_state->seq + 1 < max_seq); + VM_WARN_ON_ONCE(mm_state->seq + 1 < max_seq); if (max_seq > mm_state->seq && !mm_state->nr_walkers) { - VM_BUG_ON(mm_state->head && mm_state->head != &mm_list->fifo); + VM_WARN_ON_ONCE(mm_state->head && mm_state->head != &mm_list->fifo); WRITE_ONCE(mm_state->seq, mm_state->seq + 1); reset_mm_stats(lruvec, NULL, true); @@ -3048,54 +3033,48 @@ static bool positive_ctrl_err(struct ctrl_pos *sp, struct ctrl_pos *pv) static int page_update_gen(struct page *page, int gen) { - unsigned long old_flags, new_flags; + unsigned long new_flags, old_flags = READ_ONCE(page->flags); - VM_BUG_ON(gen >= MAX_NR_GENS); - VM_BUG_ON(!rcu_read_lock_held()); + VM_WARN_ON_ONCE(gen >= MAX_NR_GENS); + VM_WARN_ON_ONCE(!rcu_read_lock_held()); do { - new_flags = old_flags = READ_ONCE(page->flags); - - /* for shrink_page_list() */ - if (!(new_flags & LRU_GEN_MASK)) { - new_flags |= BIT(PG_referenced); + /* lru_gen_del_page() has isolated this page? */ + if (!(old_flags & LRU_GEN_MASK)) { + /* for shrink_page_list() */ + new_flags = old_flags | BIT(PG_referenced); continue; } - new_flags &= ~LRU_GEN_MASK; + new_flags = old_flags & ~(LRU_GEN_MASK | LRU_REFS_MASK | LRU_REFS_FLAGS); new_flags |= (gen + 1UL) << LRU_GEN_PGOFF; - new_flags &= ~(LRU_REFS_MASK | LRU_REFS_FLAGS); - } while (new_flags != old_flags && - cmpxchg(&page->flags, old_flags, new_flags) != old_flags); + } while (!try_cmpxchg(&page->flags, &old_flags, new_flags)); return ((old_flags & LRU_GEN_MASK) >> LRU_GEN_PGOFF) - 1; } static int page_inc_gen(struct lruvec *lruvec, struct page *page, bool reclaiming) { - unsigned long old_flags, new_flags; int type = page_is_file_cache(page); struct lru_gen_struct *lrugen = &lruvec->lrugen; int new_gen, old_gen = lru_gen_from_seq(lrugen->min_seq[type]); + unsigned long new_flags, old_flags = READ_ONCE(page->flags); + VM_WARN_ON_ONCE_PAGE(!(old_flags & LRU_GEN_MASK), page); do { - new_flags = old_flags = READ_ONCE(page->flags); - VM_BUG_ON_PAGE(!(new_flags & LRU_GEN_MASK), page); - - new_gen = ((new_flags & LRU_GEN_MASK) >> LRU_GEN_PGOFF) - 1; + new_gen = ((old_flags & LRU_GEN_MASK) >> LRU_GEN_PGOFF) - 1; /* page_update_gen() has promoted this page? */ if (new_gen >= 0 && new_gen != old_gen) return new_gen; new_gen = (old_gen + 1) % MAX_NR_GENS; - new_flags &= ~LRU_GEN_MASK; + new_flags = old_flags & ~(LRU_GEN_MASK | LRU_REFS_MASK | LRU_REFS_FLAGS); new_flags |= (new_gen + 1UL) << LRU_GEN_PGOFF; - new_flags &= ~(LRU_REFS_MASK | LRU_REFS_FLAGS); /* for end_page_writeback() */ if (reclaiming) new_flags |= BIT(PG_reclaim); - } while (cmpxchg(&page->flags, old_flags, new_flags) != old_flags); + } while (!try_cmpxchg(&page->flags, &old_flags, new_flags)); lru_gen_update_size(lruvec, page, old_gen, new_gen); @@ -3109,8 +3088,8 @@ static void update_batch_size(struct lru_gen_mm_walk *walk, struct page *page, int zone = page_zonenum(page); int delta = hpage_nr_pages(page); - VM_BUG_ON(old_gen >= MAX_NR_GENS); - VM_BUG_ON(new_gen >= MAX_NR_GENS); + VM_WARN_ON_ONCE(old_gen >= MAX_NR_GENS); + VM_WARN_ON_ONCE(new_gen >= MAX_NR_GENS); walk->batched++; @@ -3142,11 +3121,11 @@ static void reset_batch_size(struct lruvec *lruvec, struct lru_gen_mm_walk *walk } } -static int should_skip_vma(unsigned long start, unsigned long end, struct mm_walk *walk) +static int should_skip_vma(unsigned long start, unsigned long end, struct mm_walk *args) { struct address_space *mapping; - struct vm_area_struct *vma = walk->vma; - struct lru_gen_mm_walk *priv = walk->private; + struct vm_area_struct *vma = args->vma; + struct lru_gen_mm_walk *walk = args->private; if (!(vma->vm_flags & (VM_READ | VM_EXEC | VM_WRITE)) || is_vm_hugetlb_page(vma) || (vma->vm_flags & (VM_LOCKED | VM_SPECIAL | VM_SEQ_READ | VM_RAND_READ)) || @@ -3154,7 +3133,7 @@ static int should_skip_vma(unsigned long start, unsigned long end, struct mm_wal return true; if (vma_is_anonymous(vma)) - return !priv->can_swap; + return !walk->can_swap; if (WARN_ON_ONCE(!vma->vm_file || !vma->vm_file->f_mapping)) return true; @@ -3164,7 +3143,7 @@ static int should_skip_vma(unsigned long start, unsigned long end, struct mm_wal return true; /* check readpage to exclude special mappings like dax, etc. */ - return shmem_mapping(mapping) ? !priv->can_swap : !mapping->a_ops->readpage; + return shmem_mapping(mapping) ? !walk->can_swap : !mapping->a_ops->readpage; } /* @@ -3172,33 +3151,33 @@ static int should_skip_vma(unsigned long start, unsigned long end, struct mm_wal * returning back to the PGD table for each of such VMAs, finish an entire PMD * table to reduce zigzags and improve cache performance. */ -static bool get_next_vma(struct mm_walk *walk, unsigned long mask, unsigned long size, +static bool get_next_vma(unsigned long mask, unsigned long size, struct mm_walk *args, unsigned long *start, unsigned long *end) { unsigned long next = round_up(*end, size); - VM_BUG_ON(mask & size); - VM_BUG_ON(*start >= *end); - VM_BUG_ON((next & mask) != (*start & mask)); + VM_WARN_ON_ONCE(mask & size); + VM_WARN_ON_ONCE(*start >= *end); + VM_WARN_ON_ONCE((next & mask) != (*start & mask)); - while (walk->vma) { - if (next >= walk->vma->vm_end) { - walk->vma = walk->vma->vm_next; + while (args->vma) { + if (next >= args->vma->vm_end) { + args->vma = args->vma->vm_next; continue; } - if ((next & mask) != (walk->vma->vm_start & mask)) + if ((next & mask) != (args->vma->vm_start & mask)) return false; - if (should_skip_vma(walk->vma->vm_start, walk->vma->vm_end, walk)) { - walk->vma = walk->vma->vm_next; + if (should_skip_vma(args->vma->vm_start, args->vma->vm_end, args)) { + args->vma = args->vma->vm_next; continue; } - *start = max(next, walk->vma->vm_start); + *start = max(next, args->vma->vm_start); next = (next | ~mask) + 1; /* rounded-up boundaries can wrap to 0 */ - *end = next && next < walk->vma->vm_end ? next : walk->vma->vm_end; + *end = next && next < args->vma->vm_end ? next : args->vma->vm_end; return true; } @@ -3215,7 +3194,7 @@ static bool suitable_to_scan(int total, int young) } static bool walk_pte_range(pmd_t *pmd, unsigned long start, unsigned long end, - struct mm_walk *walk) + struct mm_walk *args) { int i; pte_t *pte; @@ -3223,14 +3202,14 @@ static bool walk_pte_range(pmd_t *pmd, unsigned long start, unsigned long end, unsigned long addr; int total = 0; int young = 0; - struct lru_gen_mm_walk *priv = walk->private; - struct mem_cgroup *memcg = lruvec_memcg(priv->lruvec); - struct pglist_data *pgdat = lruvec_pgdat(priv->lruvec); - int old_gen, new_gen = lru_gen_from_seq(priv->max_seq); + struct lru_gen_mm_walk *walk = args->private; + struct mem_cgroup *memcg = lruvec_memcg(walk->lruvec); + struct pglist_data *pgdat = lruvec_pgdat(walk->lruvec); + int old_gen, new_gen = lru_gen_from_seq(walk->max_seq); - VM_BUG_ON(pmd_trans_huge(*pmd) || pmd_devmap(*pmd)); + VM_WARN_ON_ONCE(pmd_trans_huge(*pmd) || pmd_devmap(*pmd)); - ptl = pte_lockptr(walk->mm, pmd); + ptl = pte_lockptr(args->mm, pmd); if (!spin_trylock(ptl)) return false; @@ -3243,10 +3222,10 @@ restart: struct page *page; unsigned long pfn = pte_pfn(pte[i]); - VM_BUG_ON(addr < walk->vma->vm_start || addr >= walk->vma->vm_end); + VM_WARN_ON_ONCE(addr < args->vma->vm_start || addr >= args->vma->vm_end); total++; - priv->mm_stats[MM_PTE_TOTAL]++; + walk->mm_stats[MM_PTE_TOTAL]++; if (!pte_present(pte[i]) || is_zero_pfn(pfn)) continue; @@ -3255,11 +3234,11 @@ restart: continue; if (!pte_young(pte[i])) { - priv->mm_stats[MM_PTE_OLD]++; + walk->mm_stats[MM_PTE_OLD]++; continue; } - VM_BUG_ON(!pfn_valid(pfn)); + VM_WARN_ON_ONCE(!pfn_valid(pfn)); if (pfn < pgdat->node_start_pfn || pfn >= pgdat_end_pfn(pgdat)) continue; @@ -3270,11 +3249,14 @@ restart: if (page_memcg_rcu(page) != memcg) continue; - if (!ptep_test_and_clear_young(walk->vma, addr, pte + i)) + if (!page_is_file_cache(page) && !walk->can_swap) + continue; + + if (!ptep_test_and_clear_young(args->vma, addr, pte + i)) continue; young++; - priv->mm_stats[MM_PTE_YOUNG]++; + walk->mm_stats[MM_PTE_YOUNG]++; if (pte_dirty(pte[i]) && !PageDirty(page) && !(PageAnon(page) && PageSwapBacked(page) && !PageSwapCache(page))) @@ -3282,10 +3264,10 @@ restart: old_gen = page_update_gen(page, new_gen); if (old_gen >= 0 && old_gen != new_gen) - update_batch_size(priv, page, old_gen, new_gen); + update_batch_size(walk, page, old_gen, new_gen); } - if (i < PTRS_PER_PTE && get_next_vma(walk, PMD_MASK, PAGE_SIZE, &start, &end)) + if (i < PTRS_PER_PTE && get_next_vma(PMD_MASK, PAGE_SIZE, args, &start, &end)) goto restart; pte_unmap(pte); @@ -3298,17 +3280,17 @@ restart: #if defined(CONFIG_TRANSPARENT_HUGEPAGE) || defined(CONFIG_ARCH_HAS_NONLEAF_PMD_YOUNG) static void walk_pmd_range_locked(pud_t *pud, unsigned long next, struct vm_area_struct *vma, - struct mm_walk *walk, unsigned long *start) + struct mm_walk *args, unsigned long *start) { int i; pmd_t *pmd; spinlock_t *ptl; - struct lru_gen_mm_walk *priv = walk->private; - struct mem_cgroup *memcg = lruvec_memcg(priv->lruvec); - struct pglist_data *pgdat = lruvec_pgdat(priv->lruvec); - int old_gen, new_gen = lru_gen_from_seq(priv->max_seq); + struct lru_gen_mm_walk *walk = args->private; + struct mem_cgroup *memcg = lruvec_memcg(walk->lruvec); + struct pglist_data *pgdat = lruvec_pgdat(walk->lruvec); + int old_gen, new_gen = lru_gen_from_seq(walk->max_seq); - VM_BUG_ON(pud_trans_huge(*pud) || pud_devmap(*pud)); + VM_WARN_ON_ONCE(pud_trans_huge(*pud) || pud_devmap(*pud)); /* try to batch at most 1+MIN_LRU_BATCH+1 entries */ if (*start == -1) { @@ -3318,13 +3300,13 @@ static void walk_pmd_range_locked(pud_t *pud, unsigned long next, struct vm_area i = next == -1 ? 0 : ((next - *start) >> PMD_SHIFT) & (PTRS_PER_PMD - 1); if (i && i <= MIN_LRU_BATCH) { - __set_bit(i - 1, priv->bitmap); + __set_bit(i - 1, walk->bitmap); return; } pmd = pmd_offset(pud, *start); - ptl = pmd_lockptr(walk->mm, pmd); + ptl = pmd_lockptr(args->mm, pmd); if (!spin_trylock(ptl)) goto done; @@ -3335,7 +3317,7 @@ static void walk_pmd_range_locked(pud_t *pud, unsigned long next, struct vm_area unsigned long pfn = pmd_pfn(pmd[i]); unsigned long addr = i ? (*start & PMD_MASK) + i * PMD_SIZE : *start; - VM_BUG_ON(addr < vma->vm_start || addr >= vma->vm_end); + VM_WARN_ON_ONCE(addr < vma->vm_start || addr >= vma->vm_end); if (!pmd_present(pmd[i]) || is_huge_zero_pmd(pmd[i])) goto next; @@ -3350,7 +3332,7 @@ static void walk_pmd_range_locked(pud_t *pud, unsigned long next, struct vm_area goto next; } - VM_BUG_ON(!pfn_valid(pfn)); + VM_WARN_ON_ONCE(!pfn_valid(pfn)); if (pfn < pgdat->node_start_pfn || pfn >= pgdat_end_pfn(pgdat)) goto next; @@ -3362,10 +3344,13 @@ static void walk_pmd_range_locked(pud_t *pud, unsigned long next, struct vm_area if (page_memcg_rcu(page) != memcg) goto next; + if (!page_is_file_cache(page) && !walk->can_swap) + goto next; + if (!pmdp_test_and_clear_young(vma, addr, pmd + i)) goto next; - priv->mm_stats[MM_PTE_YOUNG]++; + walk->mm_stats[MM_PTE_YOUNG]++; if (pmd_dirty(pmd[i]) && !PageDirty(page) && !(PageAnon(page) && PageSwapBacked(page) && !PageSwapCache(page))) @@ -3373,27 +3358,27 @@ static void walk_pmd_range_locked(pud_t *pud, unsigned long next, struct vm_area old_gen = page_update_gen(page, new_gen); if (old_gen >= 0 && old_gen != new_gen) - update_batch_size(priv, page, old_gen, new_gen); + update_batch_size(walk, page, old_gen, new_gen); next: i = i > MIN_LRU_BATCH ? 0 : - find_next_bit(priv->bitmap, MIN_LRU_BATCH, i) + 1; + find_next_bit(walk->bitmap, MIN_LRU_BATCH, i) + 1; } while (i <= MIN_LRU_BATCH); arch_leave_lazy_mmu_mode(); spin_unlock(ptl); done: *start = -1; - bitmap_zero(priv->bitmap, MIN_LRU_BATCH); + bitmap_zero(walk->bitmap, MIN_LRU_BATCH); } #else static void walk_pmd_range_locked(pud_t *pud, unsigned long next, struct vm_area_struct *vma, - struct mm_walk *walk, unsigned long *start) + struct mm_walk *args, unsigned long *start) { } #endif static void walk_pmd_range(pud_t *pud, unsigned long start, unsigned long end, - struct mm_walk *walk) + struct mm_walk *args) { int i; pmd_t *pmd; @@ -3401,9 +3386,9 @@ static void walk_pmd_range(pud_t *pud, unsigned long start, unsigned long end, unsigned long addr; struct vm_area_struct *vma; unsigned long pos = -1; - struct lru_gen_mm_walk *priv = walk->private; + struct lru_gen_mm_walk *walk = args->private; - VM_BUG_ON(pud_trans_huge(*pud) || pud_devmap(*pud)); + VM_WARN_ON_ONCE(pud_trans_huge(*pud) || pud_devmap(*pud)); /* * Finish an entire PMD in two passes: the first only reaches to PTE @@ -3413,7 +3398,7 @@ static void walk_pmd_range(pud_t *pud, unsigned long start, unsigned long end, pmd = pmd_offset(pud, start & PUD_MASK); restart: /* walk_pte_range() may call get_next_vma() */ - vma = walk->vma; + vma = args->vma; i = (start >> PMD_SHIFT) & (PTRS_PER_PMD - 1); for (addr = start; addr != end; i++, addr = next) { pmd_t val = pmd_read_atomic(pmd + i); @@ -3424,70 +3409,70 @@ restart: next = pmd_addr_end(addr, end); if (!pmd_present(val)) { - priv->mm_stats[MM_PTE_TOTAL]++; + walk->mm_stats[MM_PTE_TOTAL]++; continue; } #ifdef CONFIG_TRANSPARENT_HUGEPAGE if (pmd_trans_huge(val)) { unsigned long pfn = pmd_pfn(val); - struct pglist_data *pgdat = lruvec_pgdat(priv->lruvec); + struct pglist_data *pgdat = lruvec_pgdat(walk->lruvec); - priv->mm_stats[MM_PTE_TOTAL]++; + walk->mm_stats[MM_PTE_TOTAL]++; if (is_huge_zero_pmd(val)) continue; if (!pmd_young(val)) { - priv->mm_stats[MM_PTE_OLD]++; + walk->mm_stats[MM_PTE_OLD]++; continue; } if (pfn < pgdat->node_start_pfn || pfn >= pgdat_end_pfn(pgdat)) continue; - walk_pmd_range_locked(pud, addr, vma, walk, &pos); + walk_pmd_range_locked(pud, addr, vma, args, &pos); continue; } #endif - priv->mm_stats[MM_PMD_TOTAL]++; + walk->mm_stats[MM_PMD_TOTAL]++; #ifdef CONFIG_ARCH_HAS_NONLEAF_PMD_YOUNG if (get_cap(LRU_GEN_NONLEAF_YOUNG)) { if (!pmd_young(val)) continue; - walk_pmd_range_locked(pud, addr, vma, walk, &pos); + walk_pmd_range_locked(pud, addr, vma, args, &pos); } #endif - if (!priv->full_scan && !test_bloom_filter(priv->lruvec, priv->max_seq, pmd + i)) + if (!walk->force_scan && !test_bloom_filter(walk->lruvec, walk->max_seq, pmd + i)) continue; - priv->mm_stats[MM_PMD_FOUND]++; + walk->mm_stats[MM_PMD_FOUND]++; - if (!walk_pte_range(&val, addr, next, walk)) + if (!walk_pte_range(&val, addr, next, args)) continue; - priv->mm_stats[MM_PMD_ADDED]++; + walk->mm_stats[MM_PMD_ADDED]++; /* carry over to the next generation */ - update_bloom_filter(priv->lruvec, priv->max_seq + 1, pmd + i); + update_bloom_filter(walk->lruvec, walk->max_seq + 1, pmd + i); } - walk_pmd_range_locked(pud, -1, vma, walk, &pos); + walk_pmd_range_locked(pud, -1, vma, args, &pos); - if (i < PTRS_PER_PMD && get_next_vma(walk, PUD_MASK, PMD_SIZE, &start, &end)) + if (i < PTRS_PER_PMD && get_next_vma(PUD_MASK, PMD_SIZE, args, &start, &end)) goto restart; } static int walk_pud_range(p4d_t *p4d, unsigned long start, unsigned long end, - struct mm_walk *walk) + struct mm_walk *args) { int i; pud_t *pud; unsigned long addr; unsigned long next; - struct lru_gen_mm_walk *priv = walk->private; + struct lru_gen_mm_walk *walk = args->private; pud = pud_offset(p4d, start & P4D_MASK); restart: @@ -3500,21 +3485,21 @@ restart: if (!pud_present(val) || WARN_ON_ONCE(pud_trans_huge(val) || pud_devmap(val))) continue; - walk_pmd_range(&val, addr, next, walk); + walk_pmd_range(&val, addr, next, args); - if (priv->batched >= MAX_LRU_BATCH) { + if (walk->batched >= MAX_LRU_BATCH) { end = (addr | ~PUD_MASK) + 1; goto done; } } - if (i < PTRS_PER_PUD && get_next_vma(walk, P4D_MASK, PUD_SIZE, &start, &end)) + if (i < PTRS_PER_PUD && get_next_vma(P4D_MASK, PUD_SIZE, args, &start, &end)) goto restart; end = round_up(end, P4D_SIZE); done: /* rounded-up boundaries can wrap to 0 */ - priv->next_addr = end && walk->vma ? max(end, walk->vma->vm_start) : 0; + walk->next_addr = end && args->vma ? max(end, args->vma->vm_start) : 0; return -EAGAIN; } @@ -3542,10 +3527,7 @@ static void walk_mm(struct lruvec *lruvec, struct mm_struct *mm, struct lru_gen_ /* the caller might be holding the lock for write */ if (down_read_trylock(&mm->mmap_sem)) { - unsigned long start = walk->next_addr; - unsigned long end = mm->highest_vm_end; - - err = walk_page_range(start, end, &args); + err = walk_page_range(walk->next_addr, ULONG_MAX, &args); up_read(&mm->mmap_sem); @@ -3577,20 +3559,39 @@ static void free_mm_walk(struct lru_gen_mm_walk *walk) kfree(walk); } -static void inc_min_seq(struct lruvec *lruvec) +static bool inc_min_seq(struct lruvec *lruvec, int type, bool can_swap) { - int type; + int zone; + int remaining = MAX_LRU_BATCH; struct lru_gen_struct *lrugen = &lruvec->lrugen; + int new_gen, old_gen = lru_gen_from_seq(lrugen->min_seq[type]); - VM_BUG_ON(!seq_is_valid(lruvec)); + if (type == LRU_GEN_ANON && !can_swap) + goto done; - for (type = 0; type < ANON_AND_FILE; type++) { - if (get_nr_gens(lruvec, type) != MAX_NR_GENS) - continue; + for (zone = 0; zone < MAX_NR_ZONES; zone++) { + struct list_head *head = &lrugen->lists[old_gen][type][zone]; - reset_ctrl_pos(lruvec, type, true); - WRITE_ONCE(lrugen->min_seq[type], lrugen->min_seq[type] + 1); + while (!list_empty(head)) { + struct page *page = lru_to_page(head); + + VM_WARN_ON_ONCE_PAGE(PageUnevictable(page), page); + VM_WARN_ON_ONCE_PAGE(PageActive(page), page); + VM_WARN_ON_ONCE_PAGE(page_is_file_cache(page) != type, page); + VM_WARN_ON_ONCE_PAGE(page_zonenum(page) != zone, page); + + new_gen = page_inc_gen(lruvec, page, false); + list_move_tail(&page->lru, &lrugen->lists[new_gen][type][zone]); + + if (!--remaining) + return false; + } } +done: + reset_ctrl_pos(lruvec, type, true); + WRITE_ONCE(lrugen->min_seq[type], lrugen->min_seq[type] + 1); + + return true; } static bool try_to_inc_min_seq(struct lruvec *lruvec, bool can_swap) @@ -3600,8 +3601,9 @@ static bool try_to_inc_min_seq(struct lruvec *lruvec, bool can_swap) struct lru_gen_struct *lrugen = &lruvec->lrugen; DEFINE_MIN_SEQ(lruvec); - VM_BUG_ON(!seq_is_valid(lruvec)); + VM_WARN_ON_ONCE(!seq_is_valid(lruvec)); + /* find the oldest populated generation */ for (type = !can_swap; type < ANON_AND_FILE; type++) { while (min_seq[type] + MIN_NR_GENS <= lrugen->max_seq) { gen = lru_gen_from_seq(min_seq[type]); @@ -3635,7 +3637,7 @@ next: return success; } -static void inc_max_seq(struct lruvec *lruvec) +static void inc_max_seq(struct lruvec *lruvec, bool can_swap, bool force_scan) { int prev, next; int type, zone; @@ -3644,16 +3646,26 @@ static void inc_max_seq(struct lruvec *lruvec) spin_lock_irq(&pgdat->lru_lock); - VM_BUG_ON(!seq_is_valid(lruvec)); + VM_WARN_ON_ONCE(!seq_is_valid(lruvec)); - inc_min_seq(lruvec); + for (type = 0; type < ANON_AND_FILE; type++) { + if (get_nr_gens(lruvec, type) != MAX_NR_GENS) + continue; + + VM_WARN_ON_ONCE(!force_scan && (type == LRU_GEN_FILE || can_swap)); + + while (!inc_min_seq(lruvec, type, can_swap)) { + spin_unlock_irq(&pgdat->lru_lock); + cond_resched(); + spin_lock_irq(&pgdat->lru_lock); + } + } /* * Update the active/inactive LRU sizes for compatibility. Both sides of * the current max_seq need to be covered, since max_seq+1 can overlap * with min_seq[LRU_GEN_ANON] if swapping is constrained. And if they do - * overlap, cold/hot inversion happens. This can be solved by moving - * pages from min_seq to min_seq+1 but is omitted for simplicity. + * overlap, cold/hot inversion happens. */ prev = lru_gen_from_seq(lrugen->max_seq - 1); next = lru_gen_from_seq(lrugen->max_seq + 1); @@ -3685,14 +3697,14 @@ static void inc_max_seq(struct lruvec *lruvec) } static bool try_to_inc_max_seq(struct lruvec *lruvec, unsigned long max_seq, - struct scan_control *sc, bool can_swap, bool full_scan) + struct scan_control *sc, bool can_swap, bool force_scan) { bool success; struct lru_gen_mm_walk *walk; struct mm_struct *mm = NULL; struct lru_gen_struct *lrugen = &lruvec->lrugen; - VM_BUG_ON(max_seq > READ_ONCE(lrugen->max_seq)); + VM_WARN_ON_ONCE(max_seq > READ_ONCE(lrugen->max_seq)); /* * If the hardware doesn't automatically set the accessed bit, fallback @@ -3700,7 +3712,7 @@ static bool try_to_inc_max_seq(struct lruvec *lruvec, unsigned long max_seq, * handful of PTEs. Spreading the work out over a period of time usually * is less efficient, but it avoids bursty page faults. */ - if (!full_scan && (!arch_has_hw_pte_young() || !get_cap(LRU_GEN_MM_WALK))) { + if (!force_scan && !(arch_has_hw_pte_young() && get_cap(LRU_GEN_MM_WALK))) { success = iterate_mm_list_nowalk(lruvec, max_seq); goto done; } @@ -3714,7 +3726,7 @@ static bool try_to_inc_max_seq(struct lruvec *lruvec, unsigned long max_seq, walk->lruvec = lruvec; walk->max_seq = max_seq; walk->can_swap = can_swap; - walk->full_scan = full_scan; + walk->force_scan = force_scan; do { success = iterate_mm_list(lruvec, walk, &mm); @@ -3734,9 +3746,9 @@ done: return max_seq < READ_ONCE(lrugen->max_seq); } - VM_BUG_ON(max_seq != READ_ONCE(lrugen->max_seq)); + VM_WARN_ON_ONCE(max_seq != READ_ONCE(lrugen->max_seq)); - inc_max_seq(lruvec); + inc_max_seq(lruvec, can_swap, force_scan); /* either this sees any waiters or they will see updated max_seq */ if (wq_has_sleeper(&lruvec->mm_state.wait)) wake_up_all(&lruvec->mm_state.wait); @@ -3775,23 +3787,20 @@ static long get_nr_evictable(struct lruvec *lruvec, unsigned long max_seq, } /* - * The aging and the eviction is a typical producer-consumer model. The - * aging tries to be lazy to reduce the unnecessary overhead. On the - * other hand, the eviction stalls when the number of generations - * reaches MIN_NR_GENS. So ideally, there should be MIN_NR_GENS+1 - * generations, hence the first two if's. + * The aging tries to be lazy to reduce the overhead. On the other hand, + * the eviction stalls when the number of generations reaches + * MIN_NR_GENS. So ideally, there should be MIN_NR_GENS+1 generations, + * hence the first two if's. * - * In addition, it's ideal to spread pages out evenly, meaning - * 1/(MIN_NR_GENS+1) of the total number of pages for each generation. A - * reasonable range for this average portion would [1/MIN_NR_GENS, - * 1/(MIN_NR_GENS+2)]. From the consumer's POV, the eviction only cares - * about the lower bound of cold pages, i.e., 1/(MIN_NR_GENS+2), whereas - * from the producer's POV, the aging only cares about the upper bound - * of hot pages, i.e., 1/MIN_NR_GENS. + * Also it's ideal to spread pages out evenly, meaning 1/(MIN_NR_GENS+1) + * of the total number of pages for each generation. A reasonable range + * for this average portion is [1/MIN_NR_GENS, 1/(MIN_NR_GENS+2)]. The + * eviction cares about the lower bound of cold pages, whereas the aging + * cares about the upper bound of hot pages. */ - if (min_seq[LRU_GEN_FILE] + MIN_NR_GENS > max_seq) + if (min_seq[!can_swap] + MIN_NR_GENS > max_seq) *need_aging = true; - else if (min_seq[LRU_GEN_FILE] + MIN_NR_GENS < max_seq) + else if (min_seq[!can_swap] + MIN_NR_GENS < max_seq) *need_aging = false; else if (young * MIN_NR_GENS > total) *need_aging = true; @@ -3813,6 +3822,8 @@ static bool age_lruvec(struct lruvec *lruvec, struct scan_control *sc, DEFINE_MAX_SEQ(lruvec); DEFINE_MIN_SEQ(lruvec); + VM_WARN_ON_ONCE(sc->memcg_low_reclaim); + if (min_ttl) { int gen = lru_gen_from_seq(min_seq[LRU_GEN_FILE]); unsigned long birth = READ_ONCE(lruvec->lrugen.timestamps[gen]); @@ -3830,7 +3841,7 @@ static bool age_lruvec(struct lruvec *lruvec, struct scan_control *sc, if (!mem_cgroup_online(memcg)) nr_to_scan++; - if (nr_to_scan && need_aging && (!mem_cgroup_low(NULL, memcg) || sc->memcg_low_reclaim)) + if (nr_to_scan && need_aging) try_to_inc_max_seq(lruvec, max_seq, sc, swappiness, false); return true; @@ -3845,7 +3856,7 @@ static void lru_gen_age_node(struct pglist_data *pgdat, struct scan_control *sc) bool success = false; unsigned long min_ttl = READ_ONCE(lru_gen_min_ttl); - VM_BUG_ON(!current_is_kswapd()); + VM_WARN_ON_ONCE(!current_is_kswapd()); /* * To reduce the chance of going into the aging path or swapping, which @@ -3885,7 +3896,6 @@ static void lru_gen_age_node(struct pglist_data *pgdat, struct scan_control *sc) if (!success && mutex_trylock(&oom_lock)) { struct oom_control oc = { .gfp_mask = sc->gfp_mask, - .order = sc->order, }; out_of_memory(&oc); @@ -3896,10 +3906,10 @@ static void lru_gen_age_node(struct pglist_data *pgdat, struct scan_control *sc) /* * This function exploits spatial locality when shrink_page_list() walks the - * rmap. It scans the adjacent PTEs of a young PTE and promotes hot pages. - * If the scan was done cacheline efficiently, it adds the PMD entry pointing - * to the PTE table to the Bloom filter. This process is a feedback loop from - * the eviction to the aging. + * rmap. It scans the adjacent PTEs of a young PTE and promotes hot pages. If + * the scan was done cacheline efficiently, it adds the PMD entry pointing to + * the PTE table to the Bloom filter. This forms a feedback loop between the + * eviction and the aging. */ void lru_gen_look_around(struct page_vma_mapped_walk *pvmw) { @@ -3919,7 +3929,7 @@ void lru_gen_look_around(struct page_vma_mapped_walk *pvmw) int old_gen, new_gen = lru_gen_from_seq(max_seq); lockdep_assert_held(pvmw->ptl); - VM_BUG_ON_PAGE(PageLRU(pvmw->page), pvmw->page); + VM_WARN_ON_ONCE_PAGE(PageLRU(pvmw->page), pvmw->page); if (spin_is_contended(pvmw->ptl)) return; @@ -3946,7 +3956,7 @@ void lru_gen_look_around(struct page_vma_mapped_walk *pvmw) for (i = 0, addr = start; addr != end; i++, addr += PAGE_SIZE) { unsigned long pfn = pte_pfn(pte[i]); - VM_BUG_ON(addr < pvmw->vma->vm_start || addr >= pvmw->vma->vm_end); + VM_WARN_ON_ONCE(addr < pvmw->vma->vm_start || addr >= pvmw->vma->vm_end); if (!pte_present(pte[i]) || is_zero_pfn(pfn)) continue; @@ -3957,7 +3967,7 @@ void lru_gen_look_around(struct page_vma_mapped_walk *pvmw) if (!pte_young(pte[i])) continue; - VM_BUG_ON(!pfn_valid(pfn)); + VM_WARN_ON_ONCE(!pfn_valid(pfn)); if (pfn < pgdat->node_start_pfn || pfn >= pgdat_end_pfn(pgdat)) continue; @@ -4039,34 +4049,39 @@ static bool sort_page(struct lruvec *lruvec, struct page *page, int tier_idx) int gen = page_lru_gen(page); int type = page_is_file_cache(page); int zone = page_zonenum(page); - int tier = page_lru_tier(page); int delta = hpage_nr_pages(page); + int refs = page_lru_refs(page); + int tier = lru_tier_from_refs(refs); struct lru_gen_struct *lrugen = &lruvec->lrugen; - VM_BUG_ON_PAGE(gen >= MAX_NR_GENS, page); + VM_WARN_ON_ONCE_PAGE(gen >= MAX_NR_GENS, page); + /* unevictable */ if (!page_evictable(page)) { success = lru_gen_del_page(lruvec, page, true); - VM_BUG_ON_PAGE(!success, page); + VM_WARN_ON_ONCE_PAGE(!success, page); SetPageUnevictable(page); add_page_to_lru_list(page, lruvec); __count_vm_events(UNEVICTABLE_PGCULLED, delta); return true; } + /* dirtied lazyfree */ if (type == LRU_GEN_FILE && PageAnon(page) && PageDirty(page)) { success = lru_gen_del_page(lruvec, page, true); - VM_BUG_ON_PAGE(!success, page); + VM_WARN_ON_ONCE_PAGE(!success, page); SetPageSwapBacked(page); add_page_to_lru_list_tail(page, lruvec); return true; } + /* promoted */ if (gen != lru_gen_from_seq(lrugen->min_seq[type])) { list_move(&page->lru, &lrugen->lists[gen][type][zone]); return true; } + /* protected */ if (tier > tier_idx) { int hist = lru_hist_from_seq(lrugen->min_seq[type]); @@ -4079,6 +4094,7 @@ static bool sort_page(struct lruvec *lruvec, struct page *page, int tier_idx) return true; } + /* waiting for writeback */ if (PageLocked(page) || PageWriteback(page) || (type == LRU_GEN_FILE && PageDirty(page))) { gen = page_inc_gen(lruvec, page, true); @@ -4106,7 +4122,7 @@ static bool isolate_page(struct lruvec *lruvec, struct page *page, struct scan_c ClearPageLRU(page); success = lru_gen_del_page(lruvec, page, true); - VM_BUG_ON_PAGE(!success, page); + VM_WARN_ON_ONCE_PAGE(!success, page); return true; } @@ -4123,7 +4139,7 @@ static int scan_pages(struct lruvec *lruvec, struct scan_control *sc, struct lru_gen_struct *lrugen = &lruvec->lrugen; struct mem_cgroup *memcg = lruvec_memcg(lruvec); - VM_BUG_ON(!list_empty(list)); + VM_WARN_ON_ONCE(!list_empty(list)); if (get_nr_gens(lruvec, type) == MIN_NR_GENS) return 0; @@ -4139,11 +4155,11 @@ static int scan_pages(struct lruvec *lruvec, struct scan_control *sc, struct page *page = lru_to_page(head); int delta = hpage_nr_pages(page); - VM_BUG_ON_PAGE(PageTail(page), page); - VM_BUG_ON_PAGE(PageUnevictable(page), page); - VM_BUG_ON_PAGE(PageActive(page), page); - VM_BUG_ON_PAGE(page_is_file_cache(page) != type, page); - VM_BUG_ON_PAGE(page_zonenum(page) != zone, page); + VM_WARN_ON_ONCE_PAGE(PageTail(page), page); + VM_WARN_ON_ONCE_PAGE(PageUnevictable(page), page); + VM_WARN_ON_ONCE_PAGE(PageActive(page), page); + VM_WARN_ON_ONCE_PAGE(page_is_file_cache(page) != type, page); + VM_WARN_ON_ONCE_PAGE(page_zonenum(page) != zone, page); prefetchw_prev_lru_page(page, head, flags); @@ -4245,8 +4261,6 @@ static int isolate_pages(struct lruvec *lruvec, struct scan_control *sc, int swa int tier = -1; DEFINE_MIN_SEQ(lruvec); - VM_BUG_ON(!seq_is_valid(lruvec)); - /* * Try to make the obvious choice first. When anon and file are both * available from the same generation, interpret swappiness 1 as file @@ -4300,7 +4314,7 @@ static int evict_pages(struct lruvec *lruvec, struct scan_control *sc, int swapp if (try_to_inc_min_seq(lruvec, swappiness)) scanned++; - if (get_nr_gens(lruvec, LRU_GEN_FILE) == MIN_NR_GENS) + if (get_nr_gens(lruvec, !swappiness) == MIN_NR_GENS) scanned = 0; spin_unlock_irq(&pgdat->lru_lock); @@ -4386,7 +4400,7 @@ static long get_nr_to_scan(struct lruvec *lruvec, struct scan_control *sc, bool if (try_to_inc_max_seq(lruvec, max_seq, sc, can_swap, false)) return nr_to_scan; - return min_seq[LRU_GEN_FILE] + MIN_NR_GENS <= max_seq ? nr_to_scan : 0; + return min_seq[!can_swap] + MIN_NR_GENS <= max_seq ? nr_to_scan : 0; } static unsigned long lru_gen_shrink_lruvec(struct lruvec *lruvec, struct scan_control *sc) @@ -4469,7 +4483,7 @@ static bool __maybe_unused state_is_valid(struct lruvec *lruvec) return false; /* unlikely but not a bug when reset_batch_size() is pending */ - VM_WARN_ON(lrugen->nr_pages[gen][type][zone]); + VM_WARN_ON_ONCE(lrugen->nr_pages[gen][type][zone]); } } @@ -4490,17 +4504,17 @@ static bool fill_evictable(struct lruvec *lruvec) bool success; struct page *page = lru_to_page(head); - VM_BUG_ON_PAGE(PageTail(page), page); - VM_BUG_ON_PAGE(PageUnevictable(page), page); - VM_BUG_ON_PAGE(PageActive(page) != active, page); - VM_BUG_ON_PAGE(page_is_file_cache(page) != type, page); - VM_BUG_ON_PAGE(page_lru_gen(page) < MAX_NR_GENS, page); + VM_WARN_ON_ONCE_PAGE(PageTail(page), page); + VM_WARN_ON_ONCE_PAGE(PageUnevictable(page), page); + VM_WARN_ON_ONCE_PAGE(PageActive(page) != active, page); + VM_WARN_ON_ONCE_PAGE(page_is_file_cache(page) != type, page); + VM_WARN_ON_ONCE_PAGE(page_lru_gen(page) < MAX_NR_GENS, page); prefetchw_prev_lru_page(page, head, flags); del_page_from_lru_list(page, lruvec); success = lru_gen_add_page(lruvec, page, false); - VM_BUG_ON(!success); + VM_WARN_ON_ONCE(!success); if (!--remaining) return false; @@ -4522,16 +4536,16 @@ static bool drain_evictable(struct lruvec *lruvec) bool success; struct page *page = lru_to_page(head); - VM_BUG_ON_PAGE(PageTail(page), page); - VM_BUG_ON_PAGE(PageUnevictable(page), page); - VM_BUG_ON_PAGE(PageActive(page), page); - VM_BUG_ON_PAGE(page_is_file_cache(page) != type, page); - VM_BUG_ON_PAGE(page_zonenum(page) != zone, page); + VM_WARN_ON_ONCE_PAGE(PageTail(page), page); + VM_WARN_ON_ONCE_PAGE(PageUnevictable(page), page); + VM_WARN_ON_ONCE_PAGE(PageActive(page), page); + VM_WARN_ON_ONCE_PAGE(page_is_file_cache(page) != type, page); + VM_WARN_ON_ONCE_PAGE(page_zonenum(page) != zone, page); prefetchw_prev_lru_page(page, head, flags); success = lru_gen_del_page(lruvec, page, false); - VM_BUG_ON(!success); + VM_WARN_ON_ONCE(!success); add_page_to_lru_list(page, lruvec); if (!--remaining) @@ -4579,8 +4593,8 @@ static void lru_gen_change_state(bool enable) spin_lock_irq(&pgdat->lru_lock); - VM_BUG_ON(!seq_is_valid(lruvec)); - VM_BUG_ON(!state_is_valid(lruvec)); + VM_WARN_ON_ONCE(!seq_is_valid(lruvec)); + VM_WARN_ON_ONCE(!state_is_valid(lruvec)); lruvec->lrugen.enabled = enable; @@ -4724,8 +4738,8 @@ void lru_gen_exit_memcg(struct mem_cgroup *memcg) for_each_node(nid) { struct lruvec *lruvec = get_lruvec(memcg, nid); - VM_BUG_ON(memchr_inv(lruvec->lrugen.nr_pages, 0, - sizeof(lruvec->lrugen.nr_pages))); + VM_WARN_ON_ONCE(memchr_inv(lruvec->lrugen.nr_pages, 0, + sizeof(lruvec->lrugen.nr_pages))); for (i = 0; i < NR_BLOOM_FILTERS; i++) { bitmap_free(lruvec->mm_state.filters[i]); @@ -5549,7 +5563,7 @@ unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *memcg, } #endif -static void age_active_anon(struct pglist_data *pgdat, +static void kswapd_age_node(struct pglist_data *pgdat, struct scan_control *sc) { struct mem_cgroup *memcg; @@ -5760,12 +5774,11 @@ static int balance_pgdat(pg_data_t *pgdat, int order, int classzone_idx) goto out; /* - * Do some background aging of the anon list, to give - * pages a chance to be referenced before reclaiming. All - * pages are rotated regardless of classzone as this is - * about consistent aging. + * Do some background aging, to give pages a chance to be + * referenced before reclaiming. All pages are rotated + * regardless of classzone as this is about consistent aging. */ - age_active_anon(pgdat, &sc); + kswapd_age_node(pgdat, &sc); /* * If we're getting trouble reclaiming, start doing writepage diff --git a/mm/workingset.c b/mm/workingset.c index be20cd8bcd7a..7b9369a3e970 100644 --- a/mm/workingset.c +++ b/mm/workingset.c @@ -216,43 +216,34 @@ static void unpack_shadow(void *shadow, int *memcgidp, pg_data_t **pgdat, #ifdef CONFIG_LRU_GEN -static int page_lru_refs(struct page *page) -{ - unsigned long flags = READ_ONCE(page->flags); - - BUILD_BUG_ON(LRU_GEN_WIDTH + LRU_REFS_WIDTH > BITS_PER_LONG - EVICTION_SHIFT); - - /* see the comment on MAX_NR_TIERS */ - return flags & BIT(PG_workingset) ? (flags & LRU_REFS_MASK) >> LRU_REFS_PGOFF : 0; -} - void *lru_gen_eviction(struct page *page) { - int hist, tier; + int hist; unsigned long token; unsigned long min_seq; struct lruvec *lruvec; struct lru_gen_struct *lrugen; int type = page_is_file_cache(page); - int refs = page_lru_refs(page); int delta = hpage_nr_pages(page); - bool workingset = PageWorkingset(page); + int refs = page_lru_refs(page); + int tier = lru_tier_from_refs(refs); struct mem_cgroup *memcg = page_memcg(page); struct pglist_data *pgdat = page_pgdat(page); if (!mem_cgroup_disabled() && !memcg) return NULL; + BUILD_BUG_ON(LRU_GEN_WIDTH + LRU_REFS_WIDTH > BITS_PER_LONG - EVICTION_SHIFT); + lruvec = mem_cgroup_lruvec(pgdat, memcg); lrugen = &lruvec->lrugen; min_seq = READ_ONCE(lrugen->min_seq[type]); - token = (min_seq << LRU_REFS_WIDTH) | refs; + token = (min_seq << LRU_REFS_WIDTH) | max(refs - 1, 0); hist = lru_hist_from_seq(min_seq); - tier = lru_tier_from_refs(refs + workingset); atomic_long_add(delta, &lrugen->evicted[hist][type][tier]); - return pack_shadow(mem_cgroup_id(memcg), pgdat, token, workingset); + return pack_shadow(mem_cgroup_id(memcg), pgdat, token, refs); } void lru_gen_refault(struct page *page, void *shadow) @@ -271,27 +262,27 @@ void lru_gen_refault(struct page *page, void *shadow) unpack_shadow(shadow, &memcg_id, &pgdat, &token, &workingset); - refs = token & (BIT(LRU_REFS_WIDTH) - 1); - if (refs && !workingset) - return; - if (page_pgdat(page) != pgdat) return; + /* see the comment in page_lru_refs() */ + refs = (token & (BIT(LRU_REFS_WIDTH) - 1)) + workingset; + tier = lru_tier_from_refs(refs); + rcu_read_lock(); memcg = mem_cgroup_from_id(memcg_id); if (!mem_cgroup_disabled() && !memcg) goto unlock; - token >>= LRU_REFS_WIDTH; lruvec = mem_cgroup_lruvec(pgdat, memcg); lrugen = &lruvec->lrugen; min_seq = READ_ONCE(lrugen->min_seq[type]); + + token >>= LRU_REFS_WIDTH; if (token != (min_seq & (EVICTION_MASK >> LRU_REFS_WIDTH))) goto unlock; hist = lru_hist_from_seq(min_seq); - tier = lru_tier_from_refs(refs + workingset); atomic_long_add(delta, &lrugen->refaulted[hist][type][tier]); mod_lruvec_state(lruvec, WORKINGSET_REFAULT, delta); @@ -302,7 +293,7 @@ void lru_gen_refault(struct page *page, void *shadow) * 2. For pages accessed through file descriptors, numbers of accesses * might have been beyond the limit. */ - if (lru_gen_in_fault() || refs + workingset == BIT(LRU_REFS_WIDTH)) { + if (lru_gen_in_fault() || refs == BIT(LRU_REFS_WIDTH)) { SetPageWorkingset(page); mod_lruvec_state(lruvec, WORKINGSET_RESTORE, delta); }