Unmapped folios accessed through file descriptors can be underprotected.
Those folios are added to the oldest generation based on:
1. The fact that they are less costly to reclaim (no need to walk the
rmap and flush the TLB) and have less impact on performance (don't
cause major PFs and can be non-blocking if needed again).
2. The observation that they are likely to be single-use. E.g., for
client use cases like Android, its apps parse configuration files
and store the data in heap (anon); for server use cases like MySQL,
it reads from InnoDB files and holds the cached data for tables in
buffer pools (anon).
However, the oldest generation can be very short lived, and if so, it
doesn't provide the PID controller with enough time to respond to a surge
of refaults. (Note that the PID controller uses weighted refaults and
those from evicted generations only take a half of the whole weight.) In
other words, for a short lived generation, the moving average smooths out
the spike quickly.
To fix the problem:
1. For folios that are already on LRU, if they can be beyond the
tracking range of tiers, i.e., five accesses through file
descriptors, move them to the second oldest generation to give them
more time to age. (Note that tiers are used by the PID controller
to statistically determine whether folios accessed multiple times
through file descriptors are worth protecting.)
2. When adding unmapped folios to LRU, adjust the placement of them so
that they are not too close to the tail. The effect of this is
similar to the above.
On Android, launching 55 apps sequentially:
Before After Change
workingset_refault_anon 25641024 25598972 0%
workingset_refault_file 115016834 106178438 -8%
Link: https://lkml.kernel.org/r/20231208061407.2125867-1-yuzhao@google.com
Fixes: ac35a4902374 ("mm: multi-gen LRU: minimal implementation")
Signed-off-by: Yu Zhao <yuzhao@google.com>
Reported-by: Charan Teja Kalla <quic_charante@quicinc.com>
Tested-by: Kalesh Singh <kaleshsingh@google.com>
Cc: T.J. Mercier <tjmercier@google.com>
Cc: Kairui Song <ryncsn@gmail.com>
Cc: Hillf Danton <hdanton@sina.com>
Cc: Jaroslav Pulchart <jaroslav.pulchart@gooddata.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
[ folio -> page ]
Signed-off-by: Helium-Studio <67852324+Helium-Studio@users.noreply.github.com>
359 lines
9.7 KiB
C
359 lines
9.7 KiB
C
/* SPDX-License-Identifier: GPL-2.0 */
|
|
#ifndef LINUX_MM_INLINE_H
|
|
#define LINUX_MM_INLINE_H
|
|
|
|
#include <linux/huge_mm.h>
|
|
#include <linux/swap.h>
|
|
|
|
#ifndef try_cmpxchg
|
|
#define try_cmpxchg(_ptr, _oldp, _new) \
|
|
({ \
|
|
typeof(*(_ptr)) *___op = (_oldp), ___o = *___op, ___r; \
|
|
___r = cmpxchg((_ptr), ___o, (_new)); \
|
|
if (unlikely(___r != ___o)) \
|
|
*___op = ___r; \
|
|
likely(___r == ___o); \
|
|
})
|
|
#endif /* try_cmpxchg */
|
|
|
|
/**
|
|
* page_is_file_cache - should the page be on a file LRU or anon LRU?
|
|
* @page: the page to test
|
|
*
|
|
* Returns 1 if @page is page cache page backed by a regular filesystem,
|
|
* or 0 if @page is anonymous, tmpfs or otherwise ram or swap backed.
|
|
* Used by functions that manipulate the LRU lists, to sort a page
|
|
* onto the right LRU list.
|
|
*
|
|
* We would like to get this info without a page flag, but the state
|
|
* needs to survive until the page is last deleted from the LRU, which
|
|
* could be as far down as __page_cache_release.
|
|
*/
|
|
static inline int page_is_file_cache(struct page *page)
|
|
{
|
|
return !PageSwapBacked(page);
|
|
}
|
|
|
|
static __always_inline void __update_lru_size(struct lruvec *lruvec,
|
|
enum lru_list lru, enum zone_type zid,
|
|
int nr_pages)
|
|
{
|
|
struct pglist_data *pgdat = lruvec_pgdat(lruvec);
|
|
|
|
lockdep_assert_held(&pgdat->lru_lock);
|
|
|
|
__mod_node_page_state(pgdat, NR_LRU_BASE + lru, nr_pages);
|
|
__mod_zone_page_state(&pgdat->node_zones[zid],
|
|
NR_ZONE_LRU_BASE + lru, nr_pages);
|
|
}
|
|
|
|
static __always_inline void update_lru_size(struct lruvec *lruvec,
|
|
enum lru_list lru, enum zone_type zid,
|
|
int nr_pages)
|
|
{
|
|
__update_lru_size(lruvec, lru, zid, nr_pages);
|
|
#ifdef CONFIG_MEMCG
|
|
mem_cgroup_update_lru_size(lruvec, lru, zid, nr_pages);
|
|
#endif
|
|
}
|
|
|
|
/**
|
|
* __clear_page_lru_flags - clear page lru flags before releasing a page
|
|
* @page: the page that was on lru and now has a zero reference
|
|
*/
|
|
static __always_inline void __clear_page_lru_flags(struct page *page)
|
|
{
|
|
VM_BUG_ON_PAGE(!PageLRU(page), page);
|
|
|
|
__ClearPageLRU(page);
|
|
|
|
/* this shouldn't happen, so leave the flags to bad_page() */
|
|
if (PageActive(page) && PageUnevictable(page))
|
|
return;
|
|
|
|
__ClearPageActive(page);
|
|
__ClearPageUnevictable(page);
|
|
}
|
|
|
|
/**
|
|
* page_lru - which LRU list should a page be on?
|
|
* @page: the page to test
|
|
*
|
|
* Returns the LRU list a page should be on, as an index
|
|
* into the array of LRU lists.
|
|
*/
|
|
static __always_inline enum lru_list page_lru(struct page *page)
|
|
{
|
|
enum lru_list lru;
|
|
|
|
VM_BUG_ON_PAGE(PageActive(page) && PageUnevictable(page), page);
|
|
|
|
if (PageUnevictable(page))
|
|
return LRU_UNEVICTABLE;
|
|
|
|
lru = page_is_file_cache(page) ? LRU_INACTIVE_FILE : LRU_INACTIVE_ANON;
|
|
if (PageActive(page))
|
|
lru += LRU_ACTIVE;
|
|
|
|
return lru;
|
|
}
|
|
|
|
#define lru_to_page(head) (list_entry((head)->prev, struct page, lru))
|
|
|
|
#ifdef CONFIG_LRU_GEN
|
|
|
|
static inline bool lru_gen_enabled(void)
|
|
{
|
|
#ifdef CONFIG_LRU_GEN_ENABLED
|
|
DECLARE_STATIC_KEY_TRUE(lru_gen_caps[NR_LRU_GEN_CAPS]);
|
|
|
|
return static_branch_likely(&lru_gen_caps[LRU_GEN_CORE]);
|
|
#else
|
|
DECLARE_STATIC_KEY_FALSE(lru_gen_caps[NR_LRU_GEN_CAPS]);
|
|
|
|
return static_branch_unlikely(&lru_gen_caps[LRU_GEN_CORE]);
|
|
#endif
|
|
}
|
|
|
|
static inline bool lru_gen_in_fault(void)
|
|
{
|
|
return current->in_lru_fault;
|
|
}
|
|
|
|
static inline int lru_gen_from_seq(unsigned long seq)
|
|
{
|
|
return seq % MAX_NR_GENS;
|
|
}
|
|
|
|
static inline int lru_hist_from_seq(unsigned long seq)
|
|
{
|
|
return seq % NR_HIST_GENS;
|
|
}
|
|
|
|
static inline int lru_tier_from_refs(int refs)
|
|
{
|
|
VM_BUG_ON(refs > BIT(LRU_REFS_WIDTH));
|
|
|
|
/* see the comment on MAX_NR_TIERS */
|
|
return order_base_2(refs + 1);
|
|
}
|
|
|
|
static inline int page_lru_refs(struct page *page)
|
|
{
|
|
unsigned long flags = READ_ONCE(page->flags);
|
|
bool workingset = flags & BIT(PG_workingset);
|
|
|
|
/*
|
|
* Return the number of accesses beyond PG_referenced, i.e., N-1 if the
|
|
* total number of accesses is N>1, since N=0,1 both map to the first
|
|
* tier. lru_tier_from_refs() will account for this off-by-one. Also see
|
|
* the comment on MAX_NR_TIERS.
|
|
*/
|
|
return ((flags & LRU_REFS_MASK) >> LRU_REFS_PGOFF) + workingset;
|
|
}
|
|
|
|
static inline int page_lru_gen(struct page *page)
|
|
{
|
|
unsigned long flags = READ_ONCE(page->flags);
|
|
|
|
return ((flags & LRU_GEN_MASK) >> LRU_GEN_PGOFF) - 1;
|
|
}
|
|
|
|
static inline bool lru_gen_is_active(struct lruvec *lruvec, int gen)
|
|
{
|
|
unsigned long max_seq = lruvec->lrugen.max_seq;
|
|
|
|
VM_BUG_ON(gen >= MAX_NR_GENS);
|
|
|
|
/* see the comment on MIN_NR_GENS */
|
|
return gen == lru_gen_from_seq(max_seq) || gen == lru_gen_from_seq(max_seq - 1);
|
|
}
|
|
|
|
static inline void lru_gen_update_size(struct lruvec *lruvec, struct page *page,
|
|
int old_gen, int new_gen)
|
|
{
|
|
int type = page_is_file_cache(page);
|
|
int zone = page_zonenum(page);
|
|
int delta = hpage_nr_pages(page);
|
|
enum lru_list lru = type * LRU_INACTIVE_FILE;
|
|
struct lru_gen_struct *lrugen = &lruvec->lrugen;
|
|
|
|
VM_BUG_ON(old_gen != -1 && old_gen >= MAX_NR_GENS);
|
|
VM_BUG_ON(new_gen != -1 && new_gen >= MAX_NR_GENS);
|
|
VM_BUG_ON(old_gen == -1 && new_gen == -1);
|
|
|
|
if (old_gen >= 0)
|
|
WRITE_ONCE(lrugen->nr_pages[old_gen][type][zone],
|
|
lrugen->nr_pages[old_gen][type][zone] - delta);
|
|
if (new_gen >= 0)
|
|
WRITE_ONCE(lrugen->nr_pages[new_gen][type][zone],
|
|
lrugen->nr_pages[new_gen][type][zone] + delta);
|
|
|
|
/* addition */
|
|
if (old_gen < 0) {
|
|
if (lru_gen_is_active(lruvec, new_gen))
|
|
lru += LRU_ACTIVE;
|
|
update_lru_size(lruvec, lru, zone, delta);
|
|
return;
|
|
}
|
|
|
|
/* deletion */
|
|
if (new_gen < 0) {
|
|
if (lru_gen_is_active(lruvec, old_gen))
|
|
lru += LRU_ACTIVE;
|
|
update_lru_size(lruvec, lru, zone, -delta);
|
|
return;
|
|
}
|
|
|
|
/* promotion */
|
|
if (!lru_gen_is_active(lruvec, old_gen) && lru_gen_is_active(lruvec, new_gen)) {
|
|
update_lru_size(lruvec, lru, zone, -delta);
|
|
update_lru_size(lruvec, lru + LRU_ACTIVE, zone, delta);
|
|
}
|
|
|
|
/* demotion requires isolation, e.g., lru_deactivate_fn() */
|
|
VM_BUG_ON(lru_gen_is_active(lruvec, old_gen) && !lru_gen_is_active(lruvec, new_gen));
|
|
}
|
|
|
|
static inline bool lru_gen_add_page(struct lruvec *lruvec, struct page *page, bool reclaiming)
|
|
{
|
|
int gen;
|
|
unsigned long old_flags, new_flags;
|
|
int type = page_is_file_cache(page);
|
|
int zone = page_zonenum(page);
|
|
struct lru_gen_struct *lrugen = &lruvec->lrugen;
|
|
|
|
if (PageUnevictable(page) || !lrugen->enabled)
|
|
return false;
|
|
/*
|
|
* There are four common cases for this page:
|
|
* 1. If it's hot, i.e., freshly faulted in, add it to the youngest
|
|
* generation, and it's protected over the rest below.
|
|
* 2. If it can't be evicted immediately, i.e., a dirty page pending
|
|
* writeback, add it to the second youngest generation.
|
|
* 3. If it should be evicted first, e.g., cold and clean from
|
|
* page_rotate_reclaimable(), add it to the oldest generation.
|
|
* 4. Everything else falls between 2 & 3 above and is added to the
|
|
* second oldest generation if it's considered inactive, or the
|
|
* oldest generation otherwise. See lru_gen_is_active().
|
|
*/
|
|
if (PageActive(page))
|
|
gen = lru_gen_from_seq(lrugen->max_seq);
|
|
else if ((type == LRU_GEN_ANON && !PageSwapCache(page)) ||
|
|
(PageReclaim(page) && (PageDirty(page) || PageWriteback(page))))
|
|
gen = lru_gen_from_seq(lrugen->max_seq - 1);
|
|
else if (reclaiming || lrugen->min_seq[type] + MIN_NR_GENS >= lrugen->max_seq)
|
|
gen = lru_gen_from_seq(lrugen->min_seq[type]);
|
|
else
|
|
gen = lru_gen_from_seq(lrugen->min_seq[type] + 1);
|
|
|
|
do {
|
|
new_flags = old_flags = READ_ONCE(page->flags);
|
|
VM_BUG_ON_PAGE(new_flags & LRU_GEN_MASK, page);
|
|
|
|
/* see the comment on MIN_NR_GENS */
|
|
new_flags &= ~(LRU_GEN_MASK | BIT(PG_active));
|
|
new_flags |= (gen + 1UL) << LRU_GEN_PGOFF;
|
|
} while (cmpxchg(&page->flags, old_flags, new_flags) != old_flags);
|
|
|
|
lru_gen_update_size(lruvec, page, -1, gen);
|
|
/* for rotate_reclaimable_page() */
|
|
if (reclaiming)
|
|
list_add_tail(&page->lru, &lrugen->lists[gen][type][zone]);
|
|
else
|
|
list_add(&page->lru, &lrugen->lists[gen][type][zone]);
|
|
|
|
return true;
|
|
}
|
|
|
|
static inline bool lru_gen_del_page(struct lruvec *lruvec, struct page *page, bool reclaiming)
|
|
{
|
|
int gen;
|
|
unsigned long old_flags, new_flags;
|
|
|
|
do {
|
|
new_flags = old_flags = READ_ONCE(page->flags);
|
|
if (!(new_flags & LRU_GEN_MASK))
|
|
return false;
|
|
|
|
VM_BUG_ON_PAGE(PageActive(page), page);
|
|
VM_BUG_ON_PAGE(PageUnevictable(page), page);
|
|
|
|
gen = ((new_flags & LRU_GEN_MASK) >> LRU_GEN_PGOFF) - 1;
|
|
|
|
new_flags &= ~LRU_GEN_MASK;
|
|
if (!(new_flags & BIT(PG_referenced)))
|
|
new_flags &= ~(LRU_REFS_MASK | (BIT(PG_referenced) | BIT(PG_workingset)));
|
|
/* for shrink_page_list() */
|
|
if (reclaiming)
|
|
new_flags &= ~(BIT(PG_referenced) | BIT(PG_reclaim));
|
|
else if (lru_gen_is_active(lruvec, gen))
|
|
new_flags |= BIT(PG_active);
|
|
} while (cmpxchg(&page->flags, old_flags, new_flags) != old_flags);
|
|
|
|
lru_gen_update_size(lruvec, page, gen, -1);
|
|
list_del(&page->lru);
|
|
|
|
return true;
|
|
}
|
|
|
|
#else
|
|
|
|
static inline bool lru_gen_enabled(void)
|
|
{
|
|
return false;
|
|
}
|
|
|
|
static inline bool lru_gen_in_fault(void)
|
|
{
|
|
return false;
|
|
}
|
|
|
|
static inline bool lru_gen_add_page(struct lruvec *lruvec, struct page *page, bool reclaiming)
|
|
{
|
|
return false;
|
|
}
|
|
|
|
static inline bool lru_gen_del_page(struct lruvec *lruvec, struct page *page, bool reclaiming)
|
|
{
|
|
return false;
|
|
}
|
|
|
|
#endif /* CONFIG_LRU_GEN */
|
|
|
|
static __always_inline void add_page_to_lru_list(struct page *page,
|
|
struct lruvec *lruvec)
|
|
{
|
|
enum lru_list lru = page_lru(page);
|
|
|
|
if (lru_gen_add_page(lruvec, page, false))
|
|
return;
|
|
|
|
update_lru_size(lruvec, lru, page_zonenum(page), hpage_nr_pages(page));
|
|
list_add(&page->lru, &lruvec->lists[lru]);
|
|
}
|
|
|
|
static __always_inline void add_page_to_lru_list_tail(struct page *page,
|
|
struct lruvec *lruvec)
|
|
{
|
|
enum lru_list lru = page_lru(page);
|
|
|
|
if (lru_gen_add_page(lruvec, page, true))
|
|
return;
|
|
|
|
update_lru_size(lruvec, lru, page_zonenum(page), hpage_nr_pages(page));
|
|
list_add_tail(&page->lru, &lruvec->lists[lru]);
|
|
}
|
|
|
|
static __always_inline void del_page_from_lru_list(struct page *page,
|
|
struct lruvec *lruvec)
|
|
{
|
|
if (lru_gen_del_page(lruvec, page, false))
|
|
return;
|
|
|
|
list_del(&page->lru);
|
|
update_lru_size(lruvec, page_lru(page), page_zonenum(page),
|
|
-hpage_nr_pages(page));
|
|
}
|
|
#endif
|