sched/walt: Accounting for number of irqs pending on each core

Schedules on a core whose irq count is less than a threshold.
Improves I/O performance of EAS.

Change-Id: I08ff7dd0d22502a0106fc636b1af2e6fe9e758b5
This commit is contained in:
Srinath Sridharan
2016-07-22 13:21:15 +01:00
committed by John Stultz
parent efb86bd08a
commit 519c62750e
8 changed files with 108 additions and 1 deletions

View File

@@ -47,6 +47,7 @@ extern unsigned int sysctl_sched_cstate_aware;
extern unsigned int sysctl_sched_use_walt_cpu_util;
extern unsigned int sysctl_sched_use_walt_task_util;
extern unsigned int sysctl_sched_walt_init_task_load_pct;
extern unsigned int sysctl_sched_walt_cpu_high_irqload;
#endif
enum sched_tunable_scaling {

View File

@@ -7750,6 +7750,11 @@ void __init sched_init(void)
rq->idle_stamp = 0;
rq->avg_idle = 2*sysctl_sched_migration_cost;
rq->max_idle_balance_cost = sysctl_sched_migration_cost;
#ifdef CONFIG_SCHED_WALT
rq->cur_irqload = 0;
rq->avg_irqload = 0;
rq->irqload_ts = 0;
#endif
INIT_LIST_HEAD(&rq->cfs_tasks);

View File

@@ -5,6 +5,7 @@
#include <linux/static_key.h>
#include <linux/context_tracking.h>
#include "sched.h"
#include "walt.h"
#ifdef CONFIG_IRQ_TIME_ACCOUNTING
@@ -49,6 +50,10 @@ void irqtime_account_irq(struct task_struct *curr)
unsigned long flags;
s64 delta;
int cpu;
#ifdef CONFIG_SCHED_WALT
u64 wallclock;
bool account = true;
#endif
if (!sched_clock_irqtime)
return;
@@ -56,6 +61,9 @@ void irqtime_account_irq(struct task_struct *curr)
local_irq_save(flags);
cpu = smp_processor_id();
#ifdef CONFIG_SCHED_WALT
wallclock = sched_clock_cpu(cpu);
#endif
delta = sched_clock_cpu(cpu) - __this_cpu_read(irq_start_time);
__this_cpu_add(irq_start_time, delta);
@@ -70,8 +78,16 @@ void irqtime_account_irq(struct task_struct *curr)
__this_cpu_add(cpu_hardirq_time, delta);
else if (in_serving_softirq() && curr != this_cpu_ksoftirqd())
__this_cpu_add(cpu_softirq_time, delta);
#ifdef CONFIG_SCHED_WALT
else
account = false;
#endif
irq_time_write_end();
#ifdef CONFIG_SCHED_WALT
if (account)
walt_account_irqtime(cpu, curr, delta, wallclock);
#endif
local_irq_restore(flags);
}
EXPORT_SYMBOL_GPL(irqtime_account_irq);

View File

@@ -61,6 +61,8 @@ unsigned int sysctl_sched_cstate_aware = 1;
#ifdef CONFIG_SCHED_WALT
unsigned int sysctl_sched_use_walt_cpu_util = 1;
unsigned int sysctl_sched_use_walt_task_util = 1;
__read_mostly unsigned int sysctl_sched_walt_cpu_high_irqload =
(10 * NSEC_PER_MSEC);
#endif
/*
* The initial- and re-scaling of tunables is configurable
@@ -4258,7 +4260,6 @@ enqueue_task_fair(struct rq *rq, struct task_struct *p, int flags)
schedtune_enqueue_task(p, cpu_of(rq));
#endif /* CONFIG_SMP */
hrtick_update(rq);
}
@@ -5627,6 +5628,10 @@ static inline int find_best_target(struct task_struct *p, bool boosted)
if (new_util > capacity_orig_of(i))
continue;
#ifdef CONFIG_SCHED_WALT
if (walt_cpu_high_irqload(i))
continue;
#endif
/*
* For boosted tasks we favor idle cpus unconditionally to
* improve latency.

View File

@@ -685,6 +685,9 @@ struct rq {
u64 prev_runnable_sum;
u64 nt_curr_runnable_sum;
u64 nt_prev_runnable_sum;
u64 cur_irqload;
u64 avg_irqload;
u64 irqload_ts;
#endif /* CONFIG_SCHED_WALT */

View File

@@ -221,6 +221,71 @@ static int cpu_is_waiting_on_io(struct rq *rq)
return atomic_read(&rq->nr_iowait);
}
void walt_account_irqtime(int cpu, struct task_struct *curr,
u64 delta, u64 wallclock)
{
struct rq *rq = cpu_rq(cpu);
unsigned long flags, nr_windows;
u64 cur_jiffies_ts;
raw_spin_lock_irqsave(&rq->lock, flags);
/*
* cputime (wallclock) uses sched_clock so use the same here for
* consistency.
*/
delta += sched_clock() - wallclock;
cur_jiffies_ts = get_jiffies_64();
if (is_idle_task(curr))
walt_update_task_ravg(curr, rq, IRQ_UPDATE, walt_ktime_clock(),
delta);
nr_windows = cur_jiffies_ts - rq->irqload_ts;
if (nr_windows) {
if (nr_windows < 10) {
/* Decay CPU's irqload by 3/4 for each window. */
rq->avg_irqload *= (3 * nr_windows);
rq->avg_irqload = div64_u64(rq->avg_irqload,
4 * nr_windows);
} else {
rq->avg_irqload = 0;
}
rq->avg_irqload += rq->cur_irqload;
rq->cur_irqload = 0;
}
rq->cur_irqload += delta;
rq->irqload_ts = cur_jiffies_ts;
raw_spin_unlock_irqrestore(&rq->lock, flags);
}
#define WALT_HIGH_IRQ_TIMEOUT 3
u64 walt_irqload(int cpu) {
struct rq *rq = cpu_rq(cpu);
s64 delta;
delta = get_jiffies_64() - rq->irqload_ts;
/*
* Current context can be preempted by irq and rq->irqload_ts can be
* updated by irq context so that delta can be negative.
* But this is okay and we can safely return as this means there
* was recent irq occurrence.
*/
if (delta < WALT_HIGH_IRQ_TIMEOUT)
return rq->avg_irqload;
else
return 0;
}
int walt_cpu_high_irqload(int cpu) {
return walt_irqload(cpu) >= sysctl_sched_walt_cpu_high_irqload;
}
static int account_busy_for_cpu_time(struct rq *rq, struct task_struct *p,
u64 irqtime, int event)
{

View File

@@ -31,6 +31,11 @@ void walt_set_window_start(struct rq *rq);
void walt_migrate_sync_cpu(int cpu);
void walt_init_cpu_efficiency(void);
u64 walt_ktime_clock(void);
void walt_account_irqtime(int cpu, struct task_struct *curr, u64 delta,
u64 wallclock);
u64 walt_irqload(int cpu);
int walt_cpu_high_irqload(int cpu);
#else /* CONFIG_SCHED_WALT */

View File

@@ -332,6 +332,13 @@ static struct ctl_table kern_table[] = {
.mode = 0644,
.proc_handler = proc_dointvec,
},
{
.procname = "sched_walt_cpu_high_irqload",
.data = &sysctl_sched_walt_cpu_high_irqload,
.maxlen = sizeof(unsigned int),
.mode = 0644,
.proc_handler = proc_dointvec,
},
#endif
{
.procname = "sched_sync_hint_enable",