Revert "FROMLIST: mm: multi-gen LRU: minimal implementation"
This reverts commit a1537a68c5.
To be replaced with upstream version.
Bug: 249601646
Change-Id: I3dfbb3ec56cfdb5a2db7ec00c124dae471cce932
Signed-off-by: Kalesh Singh <kaleshsingh@google.com>
This commit is contained in:
@@ -111,19 +111,6 @@ static inline int lru_gen_from_seq(unsigned long seq)
|
||||
return seq % MAX_NR_GENS;
|
||||
}
|
||||
|
||||
static inline int lru_hist_from_seq(unsigned long seq)
|
||||
{
|
||||
return seq % NR_HIST_GENS;
|
||||
}
|
||||
|
||||
static inline int lru_tier_from_refs(int refs)
|
||||
{
|
||||
VM_BUG_ON(refs > BIT(LRU_REFS_WIDTH));
|
||||
|
||||
/* see the comment on MAX_NR_TIERS */
|
||||
return order_base_2(refs + 1);
|
||||
}
|
||||
|
||||
static inline bool lru_gen_is_active(struct lruvec *lruvec, int gen)
|
||||
{
|
||||
unsigned long max_seq = lruvec->lrugen.max_seq;
|
||||
@@ -169,15 +156,6 @@ static inline void lru_gen_update_size(struct lruvec *lruvec, struct page *page,
|
||||
__update_lru_size(lruvec, lru, zone, -delta);
|
||||
return;
|
||||
}
|
||||
|
||||
/* promotion */
|
||||
if (!lru_gen_is_active(lruvec, old_gen) && lru_gen_is_active(lruvec, new_gen)) {
|
||||
__update_lru_size(lruvec, lru, zone, -delta);
|
||||
__update_lru_size(lruvec, lru + LRU_ACTIVE, zone, delta);
|
||||
}
|
||||
|
||||
/* demotion requires isolation, e.g., lru_deactivate_fn() */
|
||||
VM_BUG_ON(lru_gen_is_active(lruvec, old_gen) && !lru_gen_is_active(lruvec, new_gen));
|
||||
}
|
||||
|
||||
static inline bool lru_gen_add_page(struct lruvec *lruvec, struct page *page, bool reclaiming)
|
||||
@@ -242,8 +220,6 @@ static inline bool lru_gen_del_page(struct lruvec *lruvec, struct page *page, bo
|
||||
gen = ((new_flags & LRU_GEN_MASK) >> LRU_GEN_PGOFF) - 1;
|
||||
|
||||
new_flags &= ~LRU_GEN_MASK;
|
||||
if (!(new_flags & BIT(PG_referenced)))
|
||||
new_flags &= ~(LRU_REFS_MASK | LRU_REFS_FLAGS);
|
||||
/* for shrink_page_list() */
|
||||
if (reclaiming)
|
||||
new_flags &= ~(BIT(PG_referenced) | BIT(PG_reclaim));
|
||||
|
||||
@@ -327,34 +327,12 @@ enum lruvec_flags {
|
||||
#define MIN_NR_GENS 2U
|
||||
#define MAX_NR_GENS 4U
|
||||
|
||||
/*
|
||||
* Each generation is divided into multiple tiers. Tiers represent different
|
||||
* ranges of numbers of accesses through file descriptors. A page accessed N
|
||||
* times through file descriptors is in tier order_base_2(N). A page in the
|
||||
* first tier (N=0,1) is marked by PG_referenced unless it was faulted in
|
||||
* though page tables or read ahead. A page in any other tier (N>1) is marked
|
||||
* by PG_referenced and PG_workingset.
|
||||
*
|
||||
* In contrast to moving across generations which requires the LRU lock, moving
|
||||
* across tiers only requires operations on page->flags and therefore has a
|
||||
* negligible cost in the buffered access path. In the eviction path,
|
||||
* comparisons of refaulted/(evicted+protected) from the first tier and the
|
||||
* rest infer whether pages accessed multiple times through file descriptors
|
||||
* are statistically hot and thus worth protecting.
|
||||
*
|
||||
* MAX_NR_TIERS is set to 4 so that the multi-gen LRU can support twice of the
|
||||
* categories of the active/inactive LRU when keeping track of accesses through
|
||||
* file descriptors. It requires MAX_NR_TIERS-2 additional bits in page->flags.
|
||||
*/
|
||||
#define MAX_NR_TIERS 4U
|
||||
|
||||
#ifndef __GENERATING_BOUNDS_H
|
||||
|
||||
struct lruvec;
|
||||
|
||||
#define LRU_GEN_MASK ((BIT(LRU_GEN_WIDTH) - 1) << LRU_GEN_PGOFF)
|
||||
#define LRU_REFS_MASK ((BIT(LRU_REFS_WIDTH) - 1) << LRU_REFS_PGOFF)
|
||||
#define LRU_REFS_FLAGS (BIT(PG_referenced) | BIT(PG_workingset))
|
||||
|
||||
#ifdef CONFIG_LRU_GEN
|
||||
|
||||
@@ -363,16 +341,6 @@ enum {
|
||||
LRU_GEN_FILE,
|
||||
};
|
||||
|
||||
#define MIN_LRU_BATCH BITS_PER_LONG
|
||||
#define MAX_LRU_BATCH (MIN_LRU_BATCH * 128)
|
||||
|
||||
/* whether to keep historical stats from evicted generations */
|
||||
#ifdef CONFIG_LRU_GEN_STATS
|
||||
#define NR_HIST_GENS MAX_NR_GENS
|
||||
#else
|
||||
#define NR_HIST_GENS 1U
|
||||
#endif
|
||||
|
||||
/*
|
||||
* The youngest generation number is stored in max_seq for both anon and file
|
||||
* types as they are aged on an equal footing. The oldest generation numbers are
|
||||
@@ -392,15 +360,6 @@ struct lru_gen_struct {
|
||||
struct list_head lists[MAX_NR_GENS][ANON_AND_FILE][MAX_NR_ZONES];
|
||||
/* the sizes of the above lists */
|
||||
unsigned long nr_pages[MAX_NR_GENS][ANON_AND_FILE][MAX_NR_ZONES];
|
||||
/* the exponential moving average of refaulted */
|
||||
unsigned long avg_refaulted[ANON_AND_FILE][MAX_NR_TIERS];
|
||||
/* the exponential moving average of evicted+protected */
|
||||
unsigned long avg_total[ANON_AND_FILE][MAX_NR_TIERS];
|
||||
/* the first tier doesn't need protection, hence the minus one */
|
||||
unsigned long protected[NR_HIST_GENS][ANON_AND_FILE][MAX_NR_TIERS - 1];
|
||||
/* can be modified without holding the LRU lock */
|
||||
atomic_long_t evicted[NR_HIST_GENS][ANON_AND_FILE][MAX_NR_TIERS];
|
||||
atomic_long_t refaulted[NR_HIST_GENS][ANON_AND_FILE][MAX_NR_TIERS];
|
||||
};
|
||||
|
||||
void lru_gen_init_lruvec(struct lruvec *lruvec);
|
||||
|
||||
@@ -24,7 +24,7 @@ int main(void)
|
||||
DEFINE(SPINLOCK_SIZE, sizeof(spinlock_t));
|
||||
#ifdef CONFIG_LRU_GEN
|
||||
DEFINE(LRU_GEN_WIDTH, order_base_2(MAX_NR_GENS + 1));
|
||||
DEFINE(LRU_REFS_WIDTH, MAX_NR_TIERS - 2);
|
||||
DEFINE(LRU_REFS_WIDTH, 0);
|
||||
#else
|
||||
DEFINE(LRU_GEN_WIDTH, 0);
|
||||
DEFINE(LRU_REFS_WIDTH, 0);
|
||||
|
||||
11
mm/Kconfig
11
mm/Kconfig
@@ -916,7 +916,6 @@ config ANON_VMA_NAME
|
||||
area from being merged with adjacent virtual memory areas due to the
|
||||
difference in their name.
|
||||
|
||||
# multi-gen LRU {
|
||||
config LRU_GEN
|
||||
bool "Multi-Gen LRU"
|
||||
depends on MMU
|
||||
@@ -925,16 +924,6 @@ config LRU_GEN
|
||||
help
|
||||
A high performance LRU implementation to overcommit memory.
|
||||
|
||||
config LRU_GEN_STATS
|
||||
bool "Full stats for debugging"
|
||||
depends on LRU_GEN
|
||||
help
|
||||
Do not enable this option unless you plan to look at historical stats
|
||||
from evicted generations for debugging purpose.
|
||||
|
||||
This option has a per-memcg and per-node memory overhead.
|
||||
# }
|
||||
|
||||
source "mm/damon/Kconfig"
|
||||
|
||||
config ARCH_SUPPORTS_SPECULATIVE_PAGE_FAULT
|
||||
|
||||
42
mm/swap.c
42
mm/swap.c
@@ -389,43 +389,6 @@ static void __lru_cache_activate_page(struct page *page)
|
||||
local_unlock(&lru_pvecs.lock);
|
||||
}
|
||||
|
||||
#ifdef CONFIG_LRU_GEN
|
||||
static void page_inc_refs(struct page *page)
|
||||
{
|
||||
unsigned long refs;
|
||||
unsigned long old_flags, new_flags;
|
||||
|
||||
if (PageUnevictable(page))
|
||||
return;
|
||||
|
||||
/* see the comment on MAX_NR_TIERS */
|
||||
do {
|
||||
new_flags = old_flags = READ_ONCE(page->flags);
|
||||
|
||||
if (!(new_flags & BIT(PG_referenced))) {
|
||||
new_flags |= BIT(PG_referenced);
|
||||
continue;
|
||||
}
|
||||
|
||||
if (!(new_flags & BIT(PG_workingset))) {
|
||||
new_flags |= BIT(PG_workingset);
|
||||
continue;
|
||||
}
|
||||
|
||||
refs = new_flags & LRU_REFS_MASK;
|
||||
refs = min(refs + BIT(LRU_REFS_PGOFF), LRU_REFS_MASK);
|
||||
|
||||
new_flags &= ~LRU_REFS_MASK;
|
||||
new_flags |= refs;
|
||||
} while (new_flags != old_flags &&
|
||||
cmpxchg(&page->flags, old_flags, new_flags) != old_flags);
|
||||
}
|
||||
#else
|
||||
static void page_inc_refs(struct page *page)
|
||||
{
|
||||
}
|
||||
#endif /* CONFIG_LRU_GEN */
|
||||
|
||||
/*
|
||||
* Mark a page as having seen activity.
|
||||
*
|
||||
@@ -440,11 +403,6 @@ void mark_page_accessed(struct page *page)
|
||||
{
|
||||
page = compound_head(page);
|
||||
|
||||
if (lru_gen_enabled()) {
|
||||
page_inc_refs(page);
|
||||
return;
|
||||
}
|
||||
|
||||
trace_android_vh_mark_page_accessed(page);
|
||||
if (!PageReferenced(page)) {
|
||||
SetPageReferenced(page);
|
||||
|
||||
806
mm/vmscan.c
806
mm/vmscan.c
@@ -1153,11 +1153,9 @@ static int __remove_mapping(struct address_space *mapping, struct page *page,
|
||||
|
||||
if (PageSwapCache(page)) {
|
||||
swp_entry_t swap = { .val = page_private(page) };
|
||||
|
||||
/* get a shadow entry before mem_cgroup_swapout() clears page_memcg() */
|
||||
mem_cgroup_swapout(page, swap);
|
||||
if (reclaimed && !mapping_exiting(mapping))
|
||||
shadow = workingset_eviction(page, target_memcg);
|
||||
mem_cgroup_swapout(page, swap);
|
||||
__delete_from_swap_cache(page, swap, shadow);
|
||||
xa_unlock_irq(&mapping->i_pages);
|
||||
put_swap_page(page, swap);
|
||||
@@ -2617,9 +2615,6 @@ static void prepare_scan_count(pg_data_t *pgdat, struct scan_control *sc)
|
||||
unsigned long file;
|
||||
struct lruvec *target_lruvec;
|
||||
|
||||
if (lru_gen_enabled())
|
||||
return;
|
||||
|
||||
target_lruvec = mem_cgroup_lruvec(sc->target_mem_cgroup, pgdat);
|
||||
|
||||
/*
|
||||
@@ -2953,38 +2948,11 @@ static bool can_age_anon_pages(struct pglist_data *pgdat,
|
||||
* shorthand helpers
|
||||
******************************************************************************/
|
||||
|
||||
#define DEFINE_MAX_SEQ(lruvec) \
|
||||
unsigned long max_seq = READ_ONCE((lruvec)->lrugen.max_seq)
|
||||
|
||||
#define DEFINE_MIN_SEQ(lruvec) \
|
||||
unsigned long min_seq[ANON_AND_FILE] = { \
|
||||
READ_ONCE((lruvec)->lrugen.min_seq[LRU_GEN_ANON]), \
|
||||
READ_ONCE((lruvec)->lrugen.min_seq[LRU_GEN_FILE]), \
|
||||
}
|
||||
|
||||
#define for_each_gen_type_zone(gen, type, zone) \
|
||||
for ((gen) = 0; (gen) < MAX_NR_GENS; (gen)++) \
|
||||
for ((type) = 0; (type) < ANON_AND_FILE; (type)++) \
|
||||
for ((zone) = 0; (zone) < MAX_NR_ZONES; (zone)++)
|
||||
|
||||
static int page_lru_gen(struct page *page)
|
||||
{
|
||||
unsigned long flags = READ_ONCE(page->flags);
|
||||
|
||||
return ((flags & LRU_GEN_MASK) >> LRU_GEN_PGOFF) - 1;
|
||||
}
|
||||
|
||||
static int page_lru_tier(struct page *page)
|
||||
{
|
||||
int refs;
|
||||
unsigned long flags = READ_ONCE(page->flags);
|
||||
|
||||
refs = (flags & LRU_REFS_FLAGS) == LRU_REFS_FLAGS ?
|
||||
((flags & LRU_REFS_MASK) >> LRU_REFS_PGOFF) + 1 : 0;
|
||||
|
||||
return lru_tier_from_refs(refs);
|
||||
}
|
||||
|
||||
static struct lruvec *get_lruvec(struct mem_cgroup *memcg, int nid)
|
||||
{
|
||||
struct pglist_data *pgdat = NODE_DATA(nid);
|
||||
@@ -3005,755 +2973,6 @@ static struct lruvec *get_lruvec(struct mem_cgroup *memcg, int nid)
|
||||
return pgdat ? &pgdat->__lruvec : NULL;
|
||||
}
|
||||
|
||||
static int get_swappiness(struct lruvec *lruvec, struct scan_control *sc)
|
||||
{
|
||||
struct mem_cgroup *memcg = lruvec_memcg(lruvec);
|
||||
struct pglist_data *pgdat = lruvec_pgdat(lruvec);
|
||||
|
||||
if (!can_demote(pgdat->node_id, sc) &&
|
||||
mem_cgroup_get_nr_swap_pages(memcg) < MIN_LRU_BATCH)
|
||||
return 0;
|
||||
|
||||
return mem_cgroup_swappiness(memcg);
|
||||
}
|
||||
|
||||
static int get_nr_gens(struct lruvec *lruvec, int type)
|
||||
{
|
||||
return lruvec->lrugen.max_seq - lruvec->lrugen.min_seq[type] + 1;
|
||||
}
|
||||
|
||||
static bool __maybe_unused seq_is_valid(struct lruvec *lruvec)
|
||||
{
|
||||
/* see the comment on lru_gen_struct */
|
||||
return get_nr_gens(lruvec, LRU_GEN_FILE) >= MIN_NR_GENS &&
|
||||
get_nr_gens(lruvec, LRU_GEN_FILE) <= get_nr_gens(lruvec, LRU_GEN_ANON) &&
|
||||
get_nr_gens(lruvec, LRU_GEN_ANON) <= MAX_NR_GENS;
|
||||
}
|
||||
|
||||
/******************************************************************************
|
||||
* refault feedback loop
|
||||
******************************************************************************/
|
||||
|
||||
/*
|
||||
* A feedback loop based on Proportional-Integral-Derivative (PID) controller.
|
||||
*
|
||||
* The P term is refaulted/(evicted+protected) from a tier in the generation
|
||||
* currently being evicted; the I term is the exponential moving average of the
|
||||
* P term over the generations previously evicted, using the smoothing factor
|
||||
* 1/2; the D term isn't supported.
|
||||
*
|
||||
* The setpoint (SP) is always the first tier of one type; the process variable
|
||||
* (PV) is either any tier of the other type or any other tier of the same
|
||||
* type.
|
||||
*
|
||||
* The error is the difference between the SP and the PV; the correction is
|
||||
* turn off protection when SP>PV or turn on protection when SP<PV.
|
||||
*
|
||||
* For future optimizations:
|
||||
* 1. The D term may discount the other two terms over time so that long-lived
|
||||
* generations can resist stale information.
|
||||
*/
|
||||
struct ctrl_pos {
|
||||
unsigned long refaulted;
|
||||
unsigned long total;
|
||||
int gain;
|
||||
};
|
||||
|
||||
static void read_ctrl_pos(struct lruvec *lruvec, int type, int tier, int gain,
|
||||
struct ctrl_pos *pos)
|
||||
{
|
||||
struct lru_gen_struct *lrugen = &lruvec->lrugen;
|
||||
int hist = lru_hist_from_seq(lrugen->min_seq[type]);
|
||||
|
||||
pos->refaulted = lrugen->avg_refaulted[type][tier] +
|
||||
atomic_long_read(&lrugen->refaulted[hist][type][tier]);
|
||||
pos->total = lrugen->avg_total[type][tier] +
|
||||
atomic_long_read(&lrugen->evicted[hist][type][tier]);
|
||||
if (tier)
|
||||
pos->total += lrugen->protected[hist][type][tier - 1];
|
||||
pos->gain = gain;
|
||||
}
|
||||
|
||||
static void reset_ctrl_pos(struct lruvec *lruvec, int type, bool carryover)
|
||||
{
|
||||
int hist, tier;
|
||||
struct lru_gen_struct *lrugen = &lruvec->lrugen;
|
||||
bool clear = carryover ? NR_HIST_GENS == 1 : NR_HIST_GENS > 1;
|
||||
unsigned long seq = carryover ? lrugen->min_seq[type] : lrugen->max_seq + 1;
|
||||
|
||||
lockdep_assert_held(&lruvec->lru_lock);
|
||||
|
||||
if (!carryover && !clear)
|
||||
return;
|
||||
|
||||
hist = lru_hist_from_seq(seq);
|
||||
|
||||
for (tier = 0; tier < MAX_NR_TIERS; tier++) {
|
||||
if (carryover) {
|
||||
unsigned long sum;
|
||||
|
||||
sum = lrugen->avg_refaulted[type][tier] +
|
||||
atomic_long_read(&lrugen->refaulted[hist][type][tier]);
|
||||
WRITE_ONCE(lrugen->avg_refaulted[type][tier], sum / 2);
|
||||
|
||||
sum = lrugen->avg_total[type][tier] +
|
||||
atomic_long_read(&lrugen->evicted[hist][type][tier]);
|
||||
if (tier)
|
||||
sum += lrugen->protected[hist][type][tier - 1];
|
||||
WRITE_ONCE(lrugen->avg_total[type][tier], sum / 2);
|
||||
}
|
||||
|
||||
if (clear) {
|
||||
atomic_long_set(&lrugen->refaulted[hist][type][tier], 0);
|
||||
atomic_long_set(&lrugen->evicted[hist][type][tier], 0);
|
||||
if (tier)
|
||||
WRITE_ONCE(lrugen->protected[hist][type][tier - 1], 0);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static bool positive_ctrl_err(struct ctrl_pos *sp, struct ctrl_pos *pv)
|
||||
{
|
||||
/*
|
||||
* Return true if the PV has a limited number of refaults or a lower
|
||||
* refaulted/total than the SP.
|
||||
*/
|
||||
return pv->refaulted < MIN_LRU_BATCH ||
|
||||
pv->refaulted * (sp->total + MIN_LRU_BATCH) * sp->gain <=
|
||||
(sp->refaulted + 1) * pv->total * pv->gain;
|
||||
}
|
||||
|
||||
/******************************************************************************
|
||||
* the aging
|
||||
******************************************************************************/
|
||||
|
||||
static int page_inc_gen(struct lruvec *lruvec, struct page *page, bool reclaiming)
|
||||
{
|
||||
unsigned long old_flags, new_flags;
|
||||
int type = page_is_file_lru(page);
|
||||
struct lru_gen_struct *lrugen = &lruvec->lrugen;
|
||||
int new_gen, old_gen = lru_gen_from_seq(lrugen->min_seq[type]);
|
||||
|
||||
do {
|
||||
new_flags = old_flags = READ_ONCE(page->flags);
|
||||
VM_BUG_ON_PAGE(!(new_flags & LRU_GEN_MASK), page);
|
||||
|
||||
new_gen = (old_gen + 1) % MAX_NR_GENS;
|
||||
|
||||
new_flags &= ~LRU_GEN_MASK;
|
||||
new_flags |= (new_gen + 1UL) << LRU_GEN_PGOFF;
|
||||
new_flags &= ~(LRU_REFS_MASK | LRU_REFS_FLAGS);
|
||||
/* for end_page_writeback() */
|
||||
if (reclaiming)
|
||||
new_flags |= BIT(PG_reclaim);
|
||||
} while (cmpxchg(&page->flags, old_flags, new_flags) != old_flags);
|
||||
|
||||
lru_gen_update_size(lruvec, page, old_gen, new_gen);
|
||||
|
||||
return new_gen;
|
||||
}
|
||||
|
||||
static void inc_min_seq(struct lruvec *lruvec)
|
||||
{
|
||||
int type;
|
||||
struct lru_gen_struct *lrugen = &lruvec->lrugen;
|
||||
|
||||
VM_BUG_ON(!seq_is_valid(lruvec));
|
||||
|
||||
for (type = 0; type < ANON_AND_FILE; type++) {
|
||||
if (get_nr_gens(lruvec, type) != MAX_NR_GENS)
|
||||
continue;
|
||||
|
||||
reset_ctrl_pos(lruvec, type, true);
|
||||
WRITE_ONCE(lrugen->min_seq[type], lrugen->min_seq[type] + 1);
|
||||
}
|
||||
}
|
||||
|
||||
static bool try_to_inc_min_seq(struct lruvec *lruvec, bool can_swap)
|
||||
{
|
||||
int gen, type, zone;
|
||||
bool success = false;
|
||||
struct lru_gen_struct *lrugen = &lruvec->lrugen;
|
||||
DEFINE_MIN_SEQ(lruvec);
|
||||
|
||||
VM_BUG_ON(!seq_is_valid(lruvec));
|
||||
|
||||
for (type = !can_swap; type < ANON_AND_FILE; type++) {
|
||||
while (min_seq[type] + MIN_NR_GENS <= lrugen->max_seq) {
|
||||
gen = lru_gen_from_seq(min_seq[type]);
|
||||
|
||||
for (zone = 0; zone < MAX_NR_ZONES; zone++) {
|
||||
if (!list_empty(&lrugen->lists[gen][type][zone]))
|
||||
goto next;
|
||||
}
|
||||
|
||||
min_seq[type]++;
|
||||
}
|
||||
next:
|
||||
;
|
||||
}
|
||||
|
||||
/* see the comment on lru_gen_struct */
|
||||
if (can_swap) {
|
||||
min_seq[LRU_GEN_ANON] = min(min_seq[LRU_GEN_ANON], min_seq[LRU_GEN_FILE]);
|
||||
min_seq[LRU_GEN_FILE] = max(min_seq[LRU_GEN_ANON], lrugen->min_seq[LRU_GEN_FILE]);
|
||||
}
|
||||
|
||||
for (type = !can_swap; type < ANON_AND_FILE; type++) {
|
||||
if (min_seq[type] == lrugen->min_seq[type])
|
||||
continue;
|
||||
|
||||
reset_ctrl_pos(lruvec, type, true);
|
||||
WRITE_ONCE(lrugen->min_seq[type], min_seq[type]);
|
||||
success = true;
|
||||
}
|
||||
|
||||
return success;
|
||||
}
|
||||
|
||||
static void inc_max_seq(struct lruvec *lruvec, unsigned long max_seq)
|
||||
{
|
||||
int prev, next;
|
||||
int type, zone;
|
||||
struct lru_gen_struct *lrugen = &lruvec->lrugen;
|
||||
|
||||
spin_lock_irq(&lruvec->lru_lock);
|
||||
|
||||
VM_BUG_ON(!seq_is_valid(lruvec));
|
||||
|
||||
if (max_seq != lrugen->max_seq)
|
||||
goto unlock;
|
||||
|
||||
inc_min_seq(lruvec);
|
||||
|
||||
/*
|
||||
* Update the active/inactive LRU sizes for compatibility. Both sides of
|
||||
* the current max_seq need to be covered, since max_seq+1 can overlap
|
||||
* with min_seq[LRU_GEN_ANON] if swapping is constrained. And if they do
|
||||
* overlap, cold/hot inversion happens. This can be solved by moving
|
||||
* pages from min_seq to min_seq+1 but is omitted for simplicity.
|
||||
*/
|
||||
prev = lru_gen_from_seq(lrugen->max_seq - 1);
|
||||
next = lru_gen_from_seq(lrugen->max_seq + 1);
|
||||
|
||||
for (type = 0; type < ANON_AND_FILE; type++) {
|
||||
for (zone = 0; zone < MAX_NR_ZONES; zone++) {
|
||||
enum lru_list lru = type * LRU_INACTIVE_FILE;
|
||||
long delta = lrugen->nr_pages[prev][type][zone] -
|
||||
lrugen->nr_pages[next][type][zone];
|
||||
|
||||
if (!delta)
|
||||
continue;
|
||||
|
||||
__update_lru_size(lruvec, lru, zone, delta);
|
||||
__update_lru_size(lruvec, lru + LRU_ACTIVE, zone, -delta);
|
||||
}
|
||||
}
|
||||
|
||||
for (type = 0; type < ANON_AND_FILE; type++)
|
||||
reset_ctrl_pos(lruvec, type, false);
|
||||
|
||||
/* make sure preceding modifications appear */
|
||||
smp_store_release(&lrugen->max_seq, lrugen->max_seq + 1);
|
||||
unlock:
|
||||
spin_unlock_irq(&lruvec->lru_lock);
|
||||
}
|
||||
|
||||
static long get_nr_evictable(struct lruvec *lruvec, unsigned long max_seq,
|
||||
unsigned long *min_seq, bool can_swap, bool *need_aging)
|
||||
{
|
||||
int gen, type, zone;
|
||||
long old = 0;
|
||||
long young = 0;
|
||||
long total = 0;
|
||||
struct lru_gen_struct *lrugen = &lruvec->lrugen;
|
||||
|
||||
for (type = !can_swap; type < ANON_AND_FILE; type++) {
|
||||
unsigned long seq;
|
||||
|
||||
for (seq = min_seq[type]; seq <= max_seq; seq++) {
|
||||
long size = 0;
|
||||
|
||||
gen = lru_gen_from_seq(seq);
|
||||
|
||||
for (zone = 0; zone < MAX_NR_ZONES; zone++)
|
||||
size += READ_ONCE(lrugen->nr_pages[gen][type][zone]);
|
||||
|
||||
total += size;
|
||||
if (seq == max_seq)
|
||||
young += size;
|
||||
if (seq + MIN_NR_GENS == max_seq)
|
||||
old += size;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* The aging and the eviction is a typical producer-consumer model. The
|
||||
* aging tries to be lazy to reduce the unnecessary overhead. On the
|
||||
* other hand, the eviction stalls when the number of generations
|
||||
* reaches MIN_NR_GENS. So ideally, there should be MIN_NR_GENS+1
|
||||
* generations, hence the first two if's.
|
||||
*
|
||||
* In addition, it's ideal to spread pages out evenly, meaning
|
||||
* 1/(MIN_NR_GENS+1) of the total number of pages for each generation. A
|
||||
* reasonable range for this average portion would [1/MIN_NR_GENS,
|
||||
* 1/(MIN_NR_GENS+2)]. From the consumer's POV, the eviction only cares
|
||||
* about the lower bound of cold pages, i.e., 1/(MIN_NR_GENS+2), whereas
|
||||
* from the producer's POV, the aging only cares about the upper bound
|
||||
* of hot pages, i.e., 1/MIN_NR_GENS.
|
||||
*/
|
||||
if (min_seq[LRU_GEN_FILE] + MIN_NR_GENS > max_seq)
|
||||
*need_aging = true;
|
||||
else if (min_seq[LRU_GEN_FILE] + MIN_NR_GENS < max_seq)
|
||||
*need_aging = false;
|
||||
else if (young * MIN_NR_GENS > total)
|
||||
*need_aging = true;
|
||||
else if (old * (MIN_NR_GENS + 2) < total)
|
||||
*need_aging = true;
|
||||
else
|
||||
*need_aging = false;
|
||||
|
||||
return total > 0 ? total : 0;
|
||||
}
|
||||
|
||||
static void age_lruvec(struct lruvec *lruvec, struct scan_control *sc)
|
||||
{
|
||||
bool need_aging;
|
||||
long nr_to_scan;
|
||||
int swappiness = get_swappiness(lruvec, sc);
|
||||
struct mem_cgroup *memcg = lruvec_memcg(lruvec);
|
||||
DEFINE_MAX_SEQ(lruvec);
|
||||
DEFINE_MIN_SEQ(lruvec);
|
||||
|
||||
mem_cgroup_calculate_protection(NULL, memcg);
|
||||
|
||||
if (mem_cgroup_below_min(memcg))
|
||||
return;
|
||||
|
||||
nr_to_scan = get_nr_evictable(lruvec, max_seq, min_seq, swappiness, &need_aging);
|
||||
if (!nr_to_scan)
|
||||
return;
|
||||
|
||||
nr_to_scan >>= sc->priority;
|
||||
|
||||
if (!mem_cgroup_online(memcg))
|
||||
nr_to_scan++;
|
||||
|
||||
if (nr_to_scan && need_aging && (!mem_cgroup_below_low(memcg) || sc->memcg_low_reclaim))
|
||||
inc_max_seq(lruvec, max_seq);
|
||||
}
|
||||
|
||||
static void lru_gen_age_node(struct pglist_data *pgdat, struct scan_control *sc)
|
||||
{
|
||||
struct mem_cgroup *memcg;
|
||||
|
||||
VM_BUG_ON(!current_is_kswapd());
|
||||
|
||||
memcg = mem_cgroup_iter(NULL, NULL, NULL);
|
||||
do {
|
||||
struct lruvec *lruvec = mem_cgroup_lruvec(memcg, pgdat);
|
||||
|
||||
age_lruvec(lruvec, sc);
|
||||
|
||||
cond_resched();
|
||||
} while ((memcg = mem_cgroup_iter(NULL, memcg, NULL)));
|
||||
}
|
||||
|
||||
/******************************************************************************
|
||||
* the eviction
|
||||
******************************************************************************/
|
||||
|
||||
static bool sort_page(struct lruvec *lruvec, struct page *page, int tier_idx)
|
||||
{
|
||||
bool success;
|
||||
int gen = page_lru_gen(page);
|
||||
int type = page_is_file_lru(page);
|
||||
int zone = page_zonenum(page);
|
||||
int tier = page_lru_tier(page);
|
||||
int delta = thp_nr_pages(page);
|
||||
struct lru_gen_struct *lrugen = &lruvec->lrugen;
|
||||
|
||||
VM_BUG_ON_PAGE(gen >= MAX_NR_GENS, page);
|
||||
|
||||
if (!page_evictable(page)) {
|
||||
success = lru_gen_del_page(lruvec, page, true);
|
||||
VM_BUG_ON_PAGE(!success, page);
|
||||
SetPageUnevictable(page);
|
||||
add_page_to_lru_list(page, lruvec);
|
||||
__count_vm_events(UNEVICTABLE_PGCULLED, delta);
|
||||
return true;
|
||||
}
|
||||
|
||||
if (type == LRU_GEN_FILE && PageAnon(page) && PageDirty(page)) {
|
||||
success = lru_gen_del_page(lruvec, page, true);
|
||||
VM_BUG_ON_PAGE(!success, page);
|
||||
SetPageSwapBacked(page);
|
||||
add_page_to_lru_list_tail(page, lruvec);
|
||||
return true;
|
||||
}
|
||||
|
||||
if (tier > tier_idx) {
|
||||
int hist = lru_hist_from_seq(lrugen->min_seq[type]);
|
||||
|
||||
gen = page_inc_gen(lruvec, page, false);
|
||||
list_move_tail(&page->lru, &lrugen->lists[gen][type][zone]);
|
||||
|
||||
WRITE_ONCE(lrugen->protected[hist][type][tier - 1],
|
||||
lrugen->protected[hist][type][tier - 1] + delta);
|
||||
__mod_lruvec_state(lruvec, WORKINGSET_ACTIVATE_BASE + type, delta);
|
||||
return true;
|
||||
}
|
||||
|
||||
if (PageLocked(page) || PageWriteback(page) ||
|
||||
(type == LRU_GEN_FILE && PageDirty(page))) {
|
||||
gen = page_inc_gen(lruvec, page, true);
|
||||
list_move(&page->lru, &lrugen->lists[gen][type][zone]);
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
static bool isolate_page(struct lruvec *lruvec, struct page *page, struct scan_control *sc)
|
||||
{
|
||||
bool success;
|
||||
|
||||
if (!sc->may_unmap && page_mapped(page))
|
||||
return false;
|
||||
|
||||
if (!(sc->may_writepage && (sc->gfp_mask & __GFP_IO)) &&
|
||||
(PageDirty(page) || (PageAnon(page) && !PageSwapCache(page))))
|
||||
return false;
|
||||
|
||||
if (!get_page_unless_zero(page))
|
||||
return false;
|
||||
|
||||
if (!TestClearPageLRU(page)) {
|
||||
put_page(page);
|
||||
return false;
|
||||
}
|
||||
|
||||
success = lru_gen_del_page(lruvec, page, true);
|
||||
VM_BUG_ON_PAGE(!success, page);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static int scan_pages(struct lruvec *lruvec, struct scan_control *sc,
|
||||
int type, int tier, struct list_head *list)
|
||||
{
|
||||
int gen, zone;
|
||||
enum vm_event_item item;
|
||||
int sorted = 0;
|
||||
int scanned = 0;
|
||||
int isolated = 0;
|
||||
int remaining = MAX_LRU_BATCH;
|
||||
struct lru_gen_struct *lrugen = &lruvec->lrugen;
|
||||
struct mem_cgroup *memcg = lruvec_memcg(lruvec);
|
||||
|
||||
VM_BUG_ON(!list_empty(list));
|
||||
|
||||
if (get_nr_gens(lruvec, type) == MIN_NR_GENS)
|
||||
return 0;
|
||||
|
||||
gen = lru_gen_from_seq(lrugen->min_seq[type]);
|
||||
|
||||
for (zone = sc->reclaim_idx; zone >= 0; zone--) {
|
||||
LIST_HEAD(moved);
|
||||
int skipped = 0;
|
||||
struct list_head *head = &lrugen->lists[gen][type][zone];
|
||||
|
||||
while (!list_empty(head)) {
|
||||
struct page *page = lru_to_page(head);
|
||||
int delta = thp_nr_pages(page);
|
||||
|
||||
VM_BUG_ON_PAGE(PageTail(page), page);
|
||||
VM_BUG_ON_PAGE(PageUnevictable(page), page);
|
||||
VM_BUG_ON_PAGE(PageActive(page), page);
|
||||
VM_BUG_ON_PAGE(page_is_file_lru(page) != type, page);
|
||||
VM_BUG_ON_PAGE(page_zonenum(page) != zone, page);
|
||||
|
||||
prefetchw_prev_lru_page(page, head, flags);
|
||||
|
||||
scanned += delta;
|
||||
|
||||
if (sort_page(lruvec, page, tier))
|
||||
sorted += delta;
|
||||
else if (isolate_page(lruvec, page, sc)) {
|
||||
list_add(&page->lru, list);
|
||||
isolated += delta;
|
||||
} else {
|
||||
list_move(&page->lru, &moved);
|
||||
skipped += delta;
|
||||
}
|
||||
|
||||
if (!--remaining || max(isolated, skipped) >= MIN_LRU_BATCH)
|
||||
break;
|
||||
}
|
||||
|
||||
if (skipped) {
|
||||
list_splice(&moved, head);
|
||||
__count_zid_vm_events(PGSCAN_SKIP, zone, skipped);
|
||||
}
|
||||
|
||||
if (!remaining || isolated >= MIN_LRU_BATCH)
|
||||
break;
|
||||
}
|
||||
|
||||
item = current_is_kswapd() ? PGSCAN_KSWAPD : PGSCAN_DIRECT;
|
||||
if (!cgroup_reclaim(sc)) {
|
||||
__count_vm_events(item, isolated);
|
||||
__count_vm_events(PGREFILL, sorted);
|
||||
}
|
||||
__count_memcg_events(memcg, item, isolated);
|
||||
__count_memcg_events(memcg, PGREFILL, sorted);
|
||||
__count_vm_events(PGSCAN_ANON + type, isolated);
|
||||
|
||||
/*
|
||||
* There might not be eligible pages due to reclaim_idx, may_unmap and
|
||||
* may_writepage. Check the remaining to prevent livelock if there is no
|
||||
* progress.
|
||||
*/
|
||||
return isolated || !remaining ? scanned : 0;
|
||||
}
|
||||
|
||||
static int get_tier_idx(struct lruvec *lruvec, int type)
|
||||
{
|
||||
int tier;
|
||||
struct ctrl_pos sp, pv;
|
||||
|
||||
/*
|
||||
* To leave a margin for fluctuations, use a larger gain factor (1:2).
|
||||
* This value is chosen because any other tier would have at least twice
|
||||
* as many refaults as the first tier.
|
||||
*/
|
||||
read_ctrl_pos(lruvec, type, 0, 1, &sp);
|
||||
for (tier = 1; tier < MAX_NR_TIERS; tier++) {
|
||||
read_ctrl_pos(lruvec, type, tier, 2, &pv);
|
||||
if (!positive_ctrl_err(&sp, &pv))
|
||||
break;
|
||||
}
|
||||
|
||||
return tier - 1;
|
||||
}
|
||||
|
||||
static int get_type_to_scan(struct lruvec *lruvec, int swappiness, int *tier_idx)
|
||||
{
|
||||
int type, tier;
|
||||
struct ctrl_pos sp, pv;
|
||||
int gain[ANON_AND_FILE] = { swappiness, 200 - swappiness };
|
||||
|
||||
/*
|
||||
* Compare the first tier of anon with that of file to determine which
|
||||
* type to scan. Also need to compare other tiers of the selected type
|
||||
* with the first tier of the other type to determine the last tier (of
|
||||
* the selected type) to evict.
|
||||
*/
|
||||
read_ctrl_pos(lruvec, LRU_GEN_ANON, 0, gain[LRU_GEN_ANON], &sp);
|
||||
read_ctrl_pos(lruvec, LRU_GEN_FILE, 0, gain[LRU_GEN_FILE], &pv);
|
||||
type = positive_ctrl_err(&sp, &pv);
|
||||
|
||||
read_ctrl_pos(lruvec, !type, 0, gain[!type], &sp);
|
||||
for (tier = 1; tier < MAX_NR_TIERS; tier++) {
|
||||
read_ctrl_pos(lruvec, type, tier, gain[type], &pv);
|
||||
if (!positive_ctrl_err(&sp, &pv))
|
||||
break;
|
||||
}
|
||||
|
||||
*tier_idx = tier - 1;
|
||||
|
||||
return type;
|
||||
}
|
||||
|
||||
static int isolate_pages(struct lruvec *lruvec, struct scan_control *sc, int swappiness,
|
||||
int *type_scanned, struct list_head *list)
|
||||
{
|
||||
int i;
|
||||
int type;
|
||||
int scanned;
|
||||
int tier = -1;
|
||||
DEFINE_MIN_SEQ(lruvec);
|
||||
|
||||
VM_BUG_ON(!seq_is_valid(lruvec));
|
||||
|
||||
/*
|
||||
* Try to make the obvious choice first. When anon and file are both
|
||||
* available from the same generation, interpret swappiness 1 as file
|
||||
* first and 200 as anon first.
|
||||
*/
|
||||
if (!swappiness)
|
||||
type = LRU_GEN_FILE;
|
||||
else if (min_seq[LRU_GEN_ANON] < min_seq[LRU_GEN_FILE])
|
||||
type = LRU_GEN_ANON;
|
||||
else if (swappiness == 1)
|
||||
type = LRU_GEN_FILE;
|
||||
else if (swappiness == 200)
|
||||
type = LRU_GEN_ANON;
|
||||
else
|
||||
type = get_type_to_scan(lruvec, swappiness, &tier);
|
||||
|
||||
for (i = !swappiness; i < ANON_AND_FILE; i++) {
|
||||
if (tier < 0)
|
||||
tier = get_tier_idx(lruvec, type);
|
||||
|
||||
scanned = scan_pages(lruvec, sc, type, tier, list);
|
||||
if (scanned)
|
||||
break;
|
||||
|
||||
type = !type;
|
||||
tier = -1;
|
||||
}
|
||||
|
||||
*type_scanned = type;
|
||||
|
||||
return scanned;
|
||||
}
|
||||
|
||||
static int evict_pages(struct lruvec *lruvec, struct scan_control *sc, int swappiness)
|
||||
{
|
||||
int type;
|
||||
int scanned;
|
||||
int reclaimed;
|
||||
LIST_HEAD(list);
|
||||
struct page *page;
|
||||
enum vm_event_item item;
|
||||
struct reclaim_stat stat;
|
||||
struct mem_cgroup *memcg = lruvec_memcg(lruvec);
|
||||
struct pglist_data *pgdat = lruvec_pgdat(lruvec);
|
||||
|
||||
spin_lock_irq(&lruvec->lru_lock);
|
||||
|
||||
scanned = isolate_pages(lruvec, sc, swappiness, &type, &list);
|
||||
|
||||
if (try_to_inc_min_seq(lruvec, swappiness))
|
||||
scanned++;
|
||||
|
||||
if (get_nr_gens(lruvec, LRU_GEN_FILE) == MIN_NR_GENS)
|
||||
scanned = 0;
|
||||
|
||||
spin_unlock_irq(&lruvec->lru_lock);
|
||||
|
||||
if (list_empty(&list))
|
||||
return scanned;
|
||||
|
||||
reclaimed = shrink_page_list(&list, pgdat, sc, &stat, false);
|
||||
|
||||
/*
|
||||
* To avoid livelock, don't add rejected pages back to the same lists
|
||||
* they were isolated from. See lru_gen_add_page().
|
||||
*/
|
||||
list_for_each_entry(page, &list, lru) {
|
||||
ClearPageReferenced(page);
|
||||
ClearPageWorkingset(page);
|
||||
|
||||
if (PageReclaim(page) && (PageDirty(page) || PageWriteback(page)))
|
||||
ClearPageActive(page);
|
||||
else if (page_is_file_lru(page) || PageSwapCache(page))
|
||||
SetPageActive(page);
|
||||
}
|
||||
|
||||
spin_lock_irq(&lruvec->lru_lock);
|
||||
|
||||
move_pages_to_lru(lruvec, &list);
|
||||
|
||||
item = current_is_kswapd() ? PGSTEAL_KSWAPD : PGSTEAL_DIRECT;
|
||||
if (!cgroup_reclaim(sc))
|
||||
__count_vm_events(item, reclaimed);
|
||||
__count_memcg_events(memcg, item, reclaimed);
|
||||
__count_vm_events(PGSTEAL_ANON + type, reclaimed);
|
||||
|
||||
spin_unlock_irq(&lruvec->lru_lock);
|
||||
|
||||
mem_cgroup_uncharge_list(&list);
|
||||
free_unref_page_list(&list);
|
||||
|
||||
sc->nr_reclaimed += reclaimed;
|
||||
|
||||
return scanned;
|
||||
}
|
||||
|
||||
static long get_nr_to_scan(struct lruvec *lruvec, struct scan_control *sc, bool can_swap)
|
||||
{
|
||||
bool need_aging;
|
||||
long nr_to_scan;
|
||||
struct mem_cgroup *memcg = lruvec_memcg(lruvec);
|
||||
DEFINE_MAX_SEQ(lruvec);
|
||||
DEFINE_MIN_SEQ(lruvec);
|
||||
|
||||
if (mem_cgroup_below_min(memcg) ||
|
||||
(mem_cgroup_below_low(memcg) && !sc->memcg_low_reclaim))
|
||||
return 0;
|
||||
|
||||
nr_to_scan = get_nr_evictable(lruvec, max_seq, min_seq, can_swap, &need_aging);
|
||||
if (!nr_to_scan)
|
||||
return 0;
|
||||
|
||||
/* reset the priority if the target has been met */
|
||||
nr_to_scan >>= sc->nr_reclaimed < sc->nr_to_reclaim ? sc->priority : DEF_PRIORITY;
|
||||
|
||||
if (!mem_cgroup_online(memcg))
|
||||
nr_to_scan++;
|
||||
|
||||
if (!nr_to_scan)
|
||||
return 0;
|
||||
|
||||
if (!need_aging)
|
||||
return nr_to_scan;
|
||||
|
||||
/* leave the work to lru_gen_age_node() */
|
||||
if (current_is_kswapd())
|
||||
return 0;
|
||||
|
||||
/* try other memcgs before going to the aging path */
|
||||
if (!cgroup_reclaim(sc) && !sc->force_deactivate) {
|
||||
sc->skipped_deactivate = true;
|
||||
return 0;
|
||||
}
|
||||
|
||||
inc_max_seq(lruvec, max_seq);
|
||||
|
||||
return nr_to_scan;
|
||||
}
|
||||
|
||||
static void lru_gen_shrink_lruvec(struct lruvec *lruvec, struct scan_control *sc)
|
||||
{
|
||||
struct blk_plug plug;
|
||||
long scanned = 0;
|
||||
|
||||
lru_add_drain();
|
||||
|
||||
blk_start_plug(&plug);
|
||||
|
||||
while (true) {
|
||||
int delta;
|
||||
int swappiness;
|
||||
long nr_to_scan;
|
||||
|
||||
if (sc->may_swap)
|
||||
swappiness = get_swappiness(lruvec, sc);
|
||||
else if (!cgroup_reclaim(sc) && get_swappiness(lruvec, sc))
|
||||
swappiness = 1;
|
||||
else
|
||||
swappiness = 0;
|
||||
|
||||
nr_to_scan = get_nr_to_scan(lruvec, sc, swappiness);
|
||||
if (!nr_to_scan)
|
||||
break;
|
||||
|
||||
delta = evict_pages(lruvec, sc, swappiness);
|
||||
if (!delta)
|
||||
break;
|
||||
|
||||
scanned += delta;
|
||||
if (scanned >= nr_to_scan)
|
||||
break;
|
||||
|
||||
cond_resched();
|
||||
}
|
||||
|
||||
blk_finish_plug(&plug);
|
||||
}
|
||||
|
||||
/******************************************************************************
|
||||
* initialization
|
||||
******************************************************************************/
|
||||
@@ -3796,16 +3015,6 @@ static int __init init_lru_gen(void)
|
||||
};
|
||||
late_initcall(init_lru_gen);
|
||||
|
||||
#else
|
||||
|
||||
static void lru_gen_age_node(struct pglist_data *pgdat, struct scan_control *sc)
|
||||
{
|
||||
}
|
||||
|
||||
static void lru_gen_shrink_lruvec(struct lruvec *lruvec, struct scan_control *sc)
|
||||
{
|
||||
}
|
||||
|
||||
#endif /* CONFIG_LRU_GEN */
|
||||
|
||||
static void shrink_lruvec(struct lruvec *lruvec, struct scan_control *sc)
|
||||
@@ -3819,11 +3028,6 @@ static void shrink_lruvec(struct lruvec *lruvec, struct scan_control *sc)
|
||||
struct blk_plug plug;
|
||||
bool scan_adjusted;
|
||||
|
||||
if (lru_gen_enabled()) {
|
||||
lru_gen_shrink_lruvec(lruvec, sc);
|
||||
return;
|
||||
}
|
||||
|
||||
get_scan_count(lruvec, sc, nr);
|
||||
|
||||
/* Record the original scan target for proportional adjustments later */
|
||||
@@ -4296,9 +3500,6 @@ static void snapshot_refaults(struct mem_cgroup *target_memcg, pg_data_t *pgdat)
|
||||
struct lruvec *target_lruvec;
|
||||
unsigned long refaults;
|
||||
|
||||
if (lru_gen_enabled())
|
||||
return;
|
||||
|
||||
target_lruvec = mem_cgroup_lruvec(target_memcg, pgdat);
|
||||
refaults = lruvec_page_state(target_lruvec, WORKINGSET_ACTIVATE_ANON);
|
||||
target_lruvec->refaults[0] = refaults;
|
||||
@@ -4670,11 +3871,6 @@ static void age_active_anon(struct pglist_data *pgdat,
|
||||
struct mem_cgroup *memcg;
|
||||
struct lruvec *lruvec;
|
||||
|
||||
if (lru_gen_enabled()) {
|
||||
lru_gen_age_node(pgdat, sc);
|
||||
return;
|
||||
}
|
||||
|
||||
if (!can_age_anon_pages(pgdat, sc))
|
||||
return;
|
||||
|
||||
|
||||
119
mm/workingset.c
119
mm/workingset.c
@@ -187,6 +187,7 @@ static unsigned int bucket_order __read_mostly;
|
||||
static void *pack_shadow(int memcgid, pg_data_t *pgdat, unsigned long eviction,
|
||||
bool workingset)
|
||||
{
|
||||
eviction >>= bucket_order;
|
||||
eviction &= EVICTION_MASK;
|
||||
eviction = (eviction << MEM_CGROUP_ID_SHIFT) | memcgid;
|
||||
eviction = (eviction << NODES_SHIFT) | pgdat->node_id;
|
||||
@@ -211,116 +212,10 @@ static void unpack_shadow(void *shadow, int *memcgidp, pg_data_t **pgdat,
|
||||
|
||||
*memcgidp = memcgid;
|
||||
*pgdat = NODE_DATA(nid);
|
||||
*evictionp = entry;
|
||||
*evictionp = entry << bucket_order;
|
||||
*workingsetp = workingset;
|
||||
}
|
||||
|
||||
#ifdef CONFIG_LRU_GEN
|
||||
|
||||
static int page_lru_refs(struct page *page)
|
||||
{
|
||||
unsigned long flags = READ_ONCE(page->flags);
|
||||
|
||||
BUILD_BUG_ON(LRU_GEN_WIDTH + LRU_REFS_WIDTH > BITS_PER_LONG - EVICTION_SHIFT);
|
||||
|
||||
/* see the comment on MAX_NR_TIERS */
|
||||
return flags & BIT(PG_workingset) ? (flags & LRU_REFS_MASK) >> LRU_REFS_PGOFF : 0;
|
||||
}
|
||||
|
||||
static void *lru_gen_eviction(struct page *page)
|
||||
{
|
||||
int hist, tier;
|
||||
unsigned long token;
|
||||
unsigned long min_seq;
|
||||
struct lruvec *lruvec;
|
||||
struct lru_gen_struct *lrugen;
|
||||
int type = page_is_file_lru(page);
|
||||
int refs = page_lru_refs(page);
|
||||
int delta = thp_nr_pages(page);
|
||||
bool workingset = PageWorkingset(page);
|
||||
struct mem_cgroup *memcg = page_memcg(page);
|
||||
struct pglist_data *pgdat = page_pgdat(page);
|
||||
|
||||
lruvec = mem_cgroup_lruvec(memcg, pgdat);
|
||||
lrugen = &lruvec->lrugen;
|
||||
min_seq = READ_ONCE(lrugen->min_seq[type]);
|
||||
token = (min_seq << LRU_REFS_WIDTH) | refs;
|
||||
|
||||
hist = lru_hist_from_seq(min_seq);
|
||||
tier = lru_tier_from_refs(refs + workingset);
|
||||
atomic_long_add(delta, &lrugen->evicted[hist][type][tier]);
|
||||
|
||||
return pack_shadow(mem_cgroup_id(memcg), pgdat, token, workingset);
|
||||
}
|
||||
|
||||
static void lru_gen_refault(struct page *page, void *shadow)
|
||||
{
|
||||
int hist, tier, refs;
|
||||
int memcg_id;
|
||||
bool workingset;
|
||||
unsigned long token;
|
||||
unsigned long min_seq;
|
||||
struct lruvec *lruvec;
|
||||
struct lru_gen_struct *lrugen;
|
||||
struct mem_cgroup *memcg;
|
||||
struct pglist_data *pgdat;
|
||||
int type = page_is_file_lru(page);
|
||||
int delta = thp_nr_pages(page);
|
||||
|
||||
unpack_shadow(shadow, &memcg_id, &pgdat, &token, &workingset);
|
||||
|
||||
refs = token & (BIT(LRU_REFS_WIDTH) - 1);
|
||||
if (refs && !workingset)
|
||||
return;
|
||||
|
||||
if (page_pgdat(page) != pgdat)
|
||||
return;
|
||||
|
||||
rcu_read_lock();
|
||||
memcg = page_memcg_rcu(page);
|
||||
if (mem_cgroup_id(memcg) != memcg_id)
|
||||
goto unlock;
|
||||
|
||||
token >>= LRU_REFS_WIDTH;
|
||||
lruvec = mem_cgroup_lruvec(memcg, pgdat);
|
||||
lrugen = &lruvec->lrugen;
|
||||
min_seq = READ_ONCE(lrugen->min_seq[type]);
|
||||
if (token != (min_seq & (EVICTION_MASK >> LRU_REFS_WIDTH)))
|
||||
goto unlock;
|
||||
|
||||
hist = lru_hist_from_seq(min_seq);
|
||||
tier = lru_tier_from_refs(refs + workingset);
|
||||
atomic_long_add(delta, &lrugen->refaulted[hist][type][tier]);
|
||||
mod_lruvec_state(lruvec, WORKINGSET_REFAULT_BASE + type, delta);
|
||||
|
||||
/*
|
||||
* Count the following two cases as stalls:
|
||||
* 1. For pages accessed through page tables, hotter pages pushed out
|
||||
* hot pages which refaulted immediately.
|
||||
* 2. For pages accessed through file descriptors, numbers of accesses
|
||||
* might have been beyond the limit.
|
||||
*/
|
||||
if (lru_gen_in_fault() || refs + workingset == BIT(LRU_REFS_WIDTH)) {
|
||||
SetPageWorkingset(page);
|
||||
mod_lruvec_state(lruvec, WORKINGSET_RESTORE_BASE + type, delta);
|
||||
}
|
||||
unlock:
|
||||
rcu_read_unlock();
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
static void *lru_gen_eviction(struct page *page)
|
||||
{
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static void lru_gen_refault(struct page *page, void *shadow)
|
||||
{
|
||||
}
|
||||
|
||||
#endif /* CONFIG_LRU_GEN */
|
||||
|
||||
/**
|
||||
* workingset_age_nonresident - age non-resident entries as LRU ages
|
||||
* @lruvec: the lruvec that was aged
|
||||
@@ -369,14 +264,10 @@ void *workingset_eviction(struct page *page, struct mem_cgroup *target_memcg)
|
||||
VM_BUG_ON_PAGE(page_count(page), page);
|
||||
VM_BUG_ON_PAGE(!PageLocked(page), page);
|
||||
|
||||
if (lru_gen_enabled())
|
||||
return lru_gen_eviction(page);
|
||||
|
||||
lruvec = mem_cgroup_lruvec(target_memcg, pgdat);
|
||||
/* XXX: target_memcg can be NULL, go through lruvec */
|
||||
memcgid = mem_cgroup_id(lruvec_memcg(lruvec));
|
||||
eviction = atomic_long_read(&lruvec->nonresident_age);
|
||||
eviction >>= bucket_order;
|
||||
workingset_age_nonresident(lruvec, thp_nr_pages(page));
|
||||
return pack_shadow(memcgid, pgdat, eviction, PageWorkingset(page));
|
||||
}
|
||||
@@ -405,13 +296,7 @@ void workingset_refault(struct page *page, void *shadow)
|
||||
bool workingset;
|
||||
int memcgid;
|
||||
|
||||
if (lru_gen_enabled()) {
|
||||
lru_gen_refault(page, shadow);
|
||||
return;
|
||||
}
|
||||
|
||||
unpack_shadow(shadow, &memcgid, &pgdat, &eviction, &workingset);
|
||||
eviction <<= bucket_order;
|
||||
|
||||
rcu_read_lock();
|
||||
/*
|
||||
|
||||
Reference in New Issue
Block a user