Files
Rabin Vincent 914d085861 sched: fix softirq time accounting
softirq time accounting is broken on v4.9.x if ksoftirqd runs.

With
	CONFIG_IRQ_TIME_ACCOUNTING=y
	# CONFIG_VIRT_CPU_ACCOUNTING_GEN is not set

this test code:

	struct tasklet_struct tasklet;

	static void delay_tasklet(unsigned long data)
	{
		udelay(10);
		tasklet_schedule(&tasklet);
	}

	tasklet_init(&tasklet, delay_tasklet, 0);
	tasklet_schedule(&tasklet);

results in:

$ while :; do grep cpu0 /proc/stat; done
cpu0 5 0 80 25 16 107 1 0 0 0
cpu0 5 0 80 25 16 107 0 0 0 0
cpu0 5 0 80 25 16 107 0 0 0 0
cpu0 5 0 80 25 16 107 0 0 0 0
cpu0 5 0 81 25 16 107 0 0 0 0
cpu0 5 0 81 25 16 107 1 0 0 0
cpu0 5 0 81 25 16 108 18446744073709551615 0 0 0
cpu0 5 0 81 25 16 108 18446744073709551615 0 0 0
cpu0 5 0 81 25 16 108 18446744073709551615 0 0 0
cpu0 5 0 81 25 16 108 0 0 0 0
cpu0 6 0 81 25 16 108 0 0 0 0
cpu0 6 0 81 25 16 108 0 0 0 0

As can be seen, the softirq numbers are totally bogus.

When ksoftirq is running, irqtime_account_process_tick() increments
cpustat[CPUSTAT_SOFTIRQ].  This causes the "nsecs_to_cputime64(irqtime)
- cpustat[CPUSTAT_SOFTIRQ]" calculation in irqtime_account_update() to
underflow the next time a softirq is handled leading to the above
values.

The underflow bug was added by 5743021831 ("sched/cputime: Count
actually elapsed irq & softirq time").

But ksoftirqd accounting was wrong even in earlier kernels.  In earlier
kernels, after a ksoftirq run, the kernel would simply stop accounting
softirq time spent outside of ksoftirqd until that (accumulated) time
exceeded the time for which ksofirqd previously had run.

Fix both the underflow and the wrong accounting by using a counter
specifically for the non-ksoftirqd softirq case.

This code has been fixed in current mainline by a499a5a14db
("sched/cputime: Increment kcpustat directly on irqtime account") [note
also the followup 25e2d8c1b9e327e ("sched/cputime: Fix ksoftirqd cputime
accounting regression")], but that patch is a part of the many changes
for eliminating of cputime_t so it does not seem suitable for backport.

Change-Id: I231b4e95df1201119360dee44e6d0847b2795ac5
Signed-off-by: Rabin Vincent <rabinv@axis.com>
Patch-mainline: linux-kernel @ 13 Dec 2017 11:11:16
Signed-off-by: Pavankumar Kondeti <pkondeti@codeaurora.org>
2018-08-02 09:46:04 +05:30

98 lines
2.4 KiB
C

#ifndef _LINUX_KERNEL_STAT_H
#define _LINUX_KERNEL_STAT_H
#include <linux/smp.h>
#include <linux/threads.h>
#include <linux/percpu.h>
#include <linux/cpumask.h>
#include <linux/interrupt.h>
#include <linux/sched.h>
#include <linux/vtime.h>
#include <asm/irq.h>
#include <linux/cputime.h>
/*
* 'kernel_stat.h' contains the definitions needed for doing
* some kernel statistics (CPU usage, context switches ...),
* used by rstatd/perfmeter
*/
enum cpu_usage_stat {
CPUTIME_USER,
CPUTIME_NICE,
CPUTIME_SYSTEM,
CPUTIME_SOFTIRQ,
CPUTIME_IRQ,
CPUTIME_IDLE,
CPUTIME_IOWAIT,
CPUTIME_STEAL,
CPUTIME_GUEST,
CPUTIME_GUEST_NICE,
NR_STATS,
};
struct kernel_cpustat {
u64 cpustat[NR_STATS];
};
struct kernel_stat {
unsigned long irqs_sum;
unsigned int softirqs[NR_SOFTIRQS];
};
DECLARE_PER_CPU(struct kernel_stat, kstat);
DECLARE_PER_CPU(struct kernel_cpustat, kernel_cpustat);
/* Must have preemption disabled for this to be meaningful. */
#define kstat_this_cpu this_cpu_ptr(&kstat)
#define kcpustat_this_cpu this_cpu_ptr(&kernel_cpustat)
#define kstat_cpu(cpu) per_cpu(kstat, cpu)
#define kcpustat_cpu(cpu) per_cpu(kernel_cpustat, cpu)
extern unsigned long long nr_context_switches(void);
extern unsigned int kstat_irqs_cpu(unsigned int irq, int cpu);
extern void kstat_incr_irq_this_cpu(unsigned int irq);
static inline void kstat_incr_softirqs_this_cpu(unsigned int irq)
{
__this_cpu_inc(kstat.softirqs[irq]);
}
static inline unsigned int kstat_softirqs_cpu(unsigned int irq, int cpu)
{
return kstat_cpu(cpu).softirqs[irq];
}
/*
* Number of interrupts per specific IRQ source, since bootup
*/
extern unsigned int kstat_irqs(unsigned int irq);
extern unsigned int kstat_irqs_usr(unsigned int irq);
/*
* Number of interrupts per cpu, since bootup
*/
static inline unsigned int kstat_cpu_irqs_sum(unsigned int cpu)
{
return kstat_cpu(cpu).irqs_sum;
}
extern void account_user_time(struct task_struct *, cputime_t, cputime_t);
extern void account_system_time(struct task_struct *, int, cputime_t, cputime_t);
extern void account_steal_time(cputime_t);
extern void account_idle_time(cputime_t);
#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
static inline void account_process_tick(struct task_struct *tsk, int user)
{
vtime_account_user(tsk);
}
#else
extern void account_process_tick(struct task_struct *, int user);
#endif
extern void account_idle_ticks(unsigned long ticks);
#endif /* _LINUX_KERNEL_STAT_H */