sched: Add cpu capacity awareness to wakeup balancing
Wakeup balancing is completely unaware of cpu capacity, cpu utilization
and task utilization. The task is preferably placed on a cpu which is
idle in the instant the wakeup happens. New tasks
(SD_BALANCE_{FORK,EXEC} are placed on an idle cpu in the idlest group if
such can be found, otherwise it goes on the least loaded one. Existing
tasks (SD_BALANCE_WAKE) are placed on the previous cpu or an idle cpu
sharing the same last level cache unless the wakee_flips heuristic in
wake_wide() decides to fallback to considering cpus outside SD_LLC.
Hence existing tasks are not guaranteed to get a chance to migrate to a
different group at wakeup in case the current one has reduced cpu
capacity (due RT/IRQ pressure or different uarch e.g. ARM big.LITTLE).
They may eventually get pulled by other cpus doing
periodic/idle/nohz_idle balance, but it may take quite a while before it
happens.
This patch adds capacity awareness to find_idlest_{group,queue} (used by
SD_BALANCE_{FORK,EXEC} and SD_BALANCE_WAKE under certain circumstances)
such that groups/cpus that can accommodate the waking task based on task
utilization are preferred. In addition, wakeup of existing tasks
(SD_BALANCE_WAKE) is sent through find_idlest_{group,queue} also if the
task doesn't fit the capacity of the previous cpu to allow it to escape
(override wake_affine) when necessary instead of relying on
periodic/idle/nohz_idle balance to eventually sort it out.
cc: Ingo Molnar <mingo@redhat.com>
cc: Peter Zijlstra <peterz@infradead.org>
Signed-off-by: Morten Rasmussen <morten.rasmussen@arm.com>
This commit is contained in:
committed by
Leo Yan
parent
1eb2b8aa45
commit
e8bcb272bc
@@ -4742,6 +4742,43 @@ static int wake_affine(struct sched_domain *sd, struct task_struct *p, int sync)
|
||||
return 1;
|
||||
}
|
||||
|
||||
static inline unsigned long task_util(struct task_struct *p)
|
||||
{
|
||||
return p->se.avg.util_avg;
|
||||
}
|
||||
|
||||
static unsigned int capacity_margin = 1280; /* ~20% margin */
|
||||
|
||||
static inline bool __task_fits(struct task_struct *p, int cpu, int util)
|
||||
{
|
||||
unsigned long capacity = capacity_of(cpu);
|
||||
|
||||
util += task_util(p);
|
||||
|
||||
return (capacity * 1024) > (util * capacity_margin);
|
||||
}
|
||||
|
||||
static inline bool task_fits_max(struct task_struct *p, int cpu)
|
||||
{
|
||||
unsigned long capacity = capacity_of(cpu);
|
||||
unsigned long max_capacity = cpu_rq(cpu)->rd->max_cpu_capacity;
|
||||
|
||||
if (capacity == max_capacity)
|
||||
return true;
|
||||
|
||||
if (capacity * capacity_margin > max_capacity * 1024)
|
||||
return true;
|
||||
|
||||
return __task_fits(p, cpu, 0);
|
||||
}
|
||||
|
||||
static int cpu_util(int cpu);
|
||||
|
||||
static inline bool task_fits_spare(struct task_struct *p, int cpu)
|
||||
{
|
||||
return __task_fits(p, cpu, cpu_util(cpu));
|
||||
}
|
||||
|
||||
/*
|
||||
* find_idlest_group finds and returns the least busy CPU group within the
|
||||
* domain.
|
||||
@@ -4751,7 +4788,9 @@ find_idlest_group(struct sched_domain *sd, struct task_struct *p,
|
||||
int this_cpu, int sd_flag)
|
||||
{
|
||||
struct sched_group *idlest = NULL, *group = sd->groups;
|
||||
struct sched_group *fit_group = NULL;
|
||||
unsigned long min_load = ULONG_MAX, this_load = 0;
|
||||
unsigned long fit_capacity = ULONG_MAX;
|
||||
int load_idx = sd->forkexec_idx;
|
||||
int imbalance = 100 + (sd->imbalance_pct-100)/2;
|
||||
|
||||
@@ -4782,6 +4821,15 @@ find_idlest_group(struct sched_domain *sd, struct task_struct *p,
|
||||
load = target_load(i, load_idx);
|
||||
|
||||
avg_load += load;
|
||||
|
||||
/*
|
||||
* Look for most energy-efficient group that can fit
|
||||
* that can fit the task.
|
||||
*/
|
||||
if (capacity_of(i) < fit_capacity && task_fits_spare(p, i)) {
|
||||
fit_capacity = capacity_of(i);
|
||||
fit_group = group;
|
||||
}
|
||||
}
|
||||
|
||||
/* Adjust by relative CPU capacity of the group */
|
||||
@@ -4795,6 +4843,9 @@ find_idlest_group(struct sched_domain *sd, struct task_struct *p,
|
||||
}
|
||||
} while (group = group->next, group != sd->groups);
|
||||
|
||||
if (fit_group)
|
||||
return fit_group;
|
||||
|
||||
if (!idlest || 100*this_load < imbalance*min_load)
|
||||
return NULL;
|
||||
return idlest;
|
||||
@@ -4815,7 +4866,7 @@ find_idlest_cpu(struct sched_group *group, struct task_struct *p, int this_cpu)
|
||||
|
||||
/* Traverse only the allowed CPUs */
|
||||
for_each_cpu_and(i, sched_group_cpus(group), tsk_cpus_allowed(p)) {
|
||||
if (idle_cpu(i)) {
|
||||
if (task_fits_spare(p, i)) {
|
||||
struct rq *rq = cpu_rq(i);
|
||||
struct cpuidle_state *idle = idle_get_state(rq);
|
||||
if (idle && idle->exit_latency < min_exit_latency) {
|
||||
@@ -4827,7 +4878,8 @@ find_idlest_cpu(struct sched_group *group, struct task_struct *p, int this_cpu)
|
||||
min_exit_latency = idle->exit_latency;
|
||||
latest_idle_timestamp = rq->idle_stamp;
|
||||
shallowest_idle_cpu = i;
|
||||
} else if ((!idle || idle->exit_latency == min_exit_latency) &&
|
||||
} else if (idle_cpu(i) &&
|
||||
(!idle || idle->exit_latency == min_exit_latency) &&
|
||||
rq->idle_stamp > latest_idle_timestamp) {
|
||||
/*
|
||||
* If equal or no active idle state, then
|
||||
@@ -4836,6 +4888,13 @@ find_idlest_cpu(struct sched_group *group, struct task_struct *p, int this_cpu)
|
||||
*/
|
||||
latest_idle_timestamp = rq->idle_stamp;
|
||||
shallowest_idle_cpu = i;
|
||||
} else if (shallowest_idle_cpu == -1) {
|
||||
/*
|
||||
* If we haven't found an idle CPU yet
|
||||
* pick a non-idle one that can fit the task as
|
||||
* fallback.
|
||||
*/
|
||||
shallowest_idle_cpu = i;
|
||||
}
|
||||
} else if (shallowest_idle_cpu == -1) {
|
||||
load = weighted_cpuload(i);
|
||||
@@ -4950,7 +5009,8 @@ select_task_rq_fair(struct task_struct *p, int prev_cpu, int sd_flag, int wake_f
|
||||
int sync = wake_flags & WF_SYNC;
|
||||
|
||||
if (sd_flag & SD_BALANCE_WAKE)
|
||||
want_affine = !wake_wide(p) && cpumask_test_cpu(cpu, tsk_cpus_allowed(p));
|
||||
want_affine = !wake_wide(p) && task_fits_max(p, cpu) &&
|
||||
cpumask_test_cpu(cpu, tsk_cpus_allowed(p));
|
||||
|
||||
rcu_read_lock();
|
||||
for_each_domain(cpu, tmp) {
|
||||
|
||||
Reference in New Issue
Block a user