Files
jianzhou dc194471e3 Merge android-4.9.207(6f0b5b5) into msm-4.9
* refs/heads/tmp-6f0b5b5:
  Linux 4.9.207
  net: stmmac: don't stop NAPI processing when dropping a packet
  net: stmmac: use correct DMA buffer size in the RX descriptor
  xhci: fix USB3 device initiated resume race with roothub autosuspend
  drm/radeon: fix r1xx/r2xx register checker for POT textures
  scsi: iscsi: Fix a potential deadlock in the timeout handler
  dm btree: increase rebalance threshold in __rebalance2()
  dma-buf: Fix memory leak in sync_file_merge()
  vfio/pci: call irq_bypass_unregister_producer() before freeing irq
  ARM: tegra: Fix FLOW_CTLR_HALT register clobbering by tegra_resume()
  ARM: dts: s3c64xx: Fix init order of clock providers
  CIFS: Respect O_SYNC and O_DIRECT flags during reconnect
  xtensa: fix TLB sanity checker
  PCI/MSI: Fix incorrect MSI-X masking on resume
  PCI: Fix Intel ACS quirk UPDCR register address
  Revert "regulator: Defer init completion for a while after late_initcall"
  tcp: Protect accesses to .ts_recent_stamp with {READ,WRITE}_ONCE()
  tcp: tighten acceptance of ACKs not matching a child socket
  tcp: fix rejected syncookies due to stale timestamps
  inet: protect against too small mtu values.
  tipc: fix ordering of tipc module init and exit routine
  tcp: md5: fix potential overestimation of TCP option space
  openvswitch: support asymmetric conntrack
  net: ethernet: ti: cpsw: fix extra rx interrupt
  net: bridge: deny dev_set_mac_address() when unregistering
  nvme: host: core: fix precedence of ternary operator
  kernel/module.c: wakeup processes in module_wq on module unload
  net/mlx5e: Fix SFF 8472 eeprom length
  sunrpc: fix crash when cache_head become valid before update
  workqueue: Fix missing kfree(rescuer) in destroy_workqueue()
  blk-mq: make sure that line break can be printed
  ext4: fix a bug in ext4_wait_for_tail_page_commit
  mm/shmem.c: cast the type of unmap_start to u64
  firmware: qcom: scm: Ensure 'a0' status code is treated as signed
  reiserfs: fix extended attributes on the root directory
  powerpc: Fix vDSO clock_getres()
  scsi: qla2xxx: Always check the qla2x00_wait_for_hba_online() return value
  scsi: qla2xxx: Fix qla24xx_process_bidir_cmd()
  scsi: qla2xxx: Fix session lookup in qlt_abort_work()
  scsi: qla2xxx: Fix DMA unmap leak
  pinctrl: samsung: Fix device node refcount leaks in S3C64xx wakeup controller init
  ARM: dts: omap3-tao3530: Fix incorrect MMC card detection GPIO polarity
  ath10k: fix fw crash by moving chip reset after napi disabled
  x86/MCE/AMD: Carve out the MC4_MISC thresholding quirk
  x86/MCE/AMD: Turn off MC4_MISC thresholding on all family 0x15 models
  e100: Fix passing zero to 'PTR_ERR' warning in e100_load_ucode_wait
  scsi: lpfc: Cap NPIV vports to 256
  omap: pdata-quirks: remove openpandora quirks for mmc3 and wl1251
  Btrfs: fix negative subv_writers counter and data space leak after buffered write
  iio: adis16480: Add debugfs_reg_access entry
  xhci: make sure interrupts are restored to correct state
  xhci: Fix memory leak in xhci_add_in_port()
  usb: xhci: only set D3hot for pci device
  scsi: zfcp: trace channel log even for FCP command responses
  quota: fix livelock in dquot_writeback_dquots
  ext2: check err when partial != NULL
  quota: Check that quota is not dirty before release
  video/hdmi: Fix AVI bar unpack
  powerpc: Allow 64bit VDSO __kernel_sync_dicache to work across ranges >4GB
  ppdev: fix PPGETTIME/PPSETTIME ioctls
  mmc: host: omap_hsmmc: add code for special init of wl1251 to get rid of pandora_wl1251_init_card
  pinctrl: samsung: Fix device node refcount leaks in init code
  pinctrl: samsung: Fix device node refcount leaks in S3C24xx wakeup controller init
  ACPI: PM: Avoid attaching ACPI PM domain to certain devices
  ACPI: bus: Fix NULL pointer check in acpi_bus_get_private_data()
  ACPI: OSL: only free map once in osl.c
  PM / devfreq: Lock devfreq in trans_stat_show
  cpuidle: Do not unset the driver if it is there already
  media: radio: wl1273: fix interrupt masking on release
  media: bdisp: fix memleak on release
  ar5523: check NULL before memcpy() in ar5523_cmd()
  cgroup: pids: use atomic64_t for pids->limit
  blk-mq: avoid sysfs buffer overflow with too many CPU cores
  ASoC: Jack: Fix NULL pointer dereference in snd_soc_jack_report
  workqueue: Fix pwq ref leak in rescuer_thread()
  workqueue: Fix spurious sanity check failures in destroy_workqueue()
  ALSA: hda - Fix pending unsol events at shutdown
  lib: raid6: fix awk build warnings
  rtlwifi: rtl8192de: Fix missing enable interrupt flag
  rtlwifi: rtl8192de: Fix missing callback that tests for hw release of buffer
  rtlwifi: rtl8192de: Fix missing code to retrieve RX buffer address
  btrfs: record all roots for rename exchange on a subvol
  Btrfs: send, skip backreference walking for extents with many references
  btrfs: Remove btrfs_bio::flags member
  btrfs: check page->mapping when loading free space cache
  virtio-balloon: fix managed page counts when migrating pages between zones
  mtd: spear_smi: Fix Write Burst mode
  usb: mon: Fix a deadlock in usbmon between mmap and read
  usb: core: urb: fix URB structure initialization function
  USB: adutux: fix interface sanity check
  USB: serial: io_edgeport: fix epic endpoint lookup
  USB: idmouse: fix interface sanity checks
  USB: atm: ueagle-atm: add missing endpoint check
  iio: humidity: hdc100x: fix IIO_HUMIDITYRELATIVE channel reporting
  ARM: dts: pandora-common: define wl1251 as child node of mmc3
  xhci: Increase STS_HALT timeout in xhci_suspend()
  staging: gigaset: add endpoint-type sanity check
  staging: gigaset: fix illegal free on probe errors
  staging: gigaset: fix general protection fault on probe
  staging: rtl8712: fix interface sanity check
  staging: rtl8188eu: fix interface sanity check
  usb: Allow USB device to be warm reset in suspended state
  USB: uas: heed CAPACITY_HEURISTICS
  USB: uas: honor flag to avoid CAPACITY16
  usb: gadget: configfs: Fix missing spin_lock_init()
  appletalk: Set error code if register_snap_client failed
  appletalk: Fix potential NULL pointer dereference in unregister_snap_client
  KVM: x86: fix out-of-bounds write in KVM_GET_EMULATED_CPUID (CVE-2019-19332)
  thermal: Fix deadlock in thermal thermal_zone_device_check
  RDMA/qib: Validate ->show()/store() callbacks before calling them
  spi: atmel: Fix CS high support
  crypto: user - fix memory leak in crypto_report
  crypto: ecdh - fix big endian bug in ECC library
  crypto: ccp - fix uninitialized list head
  crypto: crypto4xx - fix double-free in crypto4xx_destroy_sdr
  KVM: x86: fix presentation of TSX feature in ARCH_CAPABILITIES
  KVM: x86: do not modify masked bits of shared MSRs
  drm/i810: Prevent underflow in ioctl
  jbd2: Fix possible overflow in jbd2_log_space_left()
  can: slcan: Fix use-after-free Read in slcan_open
  tty: vt: keyboard: reject invalid keycodes
  CIFS: Fix SMB2 oplock break processing
  CIFS: Fix NULL-pointer dereference in smb2_push_mandatory_locks
  x86/PCI: Avoid AMD FCH XHCI USB PME# from D0 defect
  coresight: etm4x: Fix input validation for sysfs.
  Input: goodix - add upside-down quirk for Teclast X89 tablet
  ALSA: pcm: oss: Avoid potential buffer overflows
  fuse: verify attributes
  fuse: verify nlink
  sched/fair: Scale bandwidth quota and period without losing quota/period ratio precision
  ARM: dts: sunxi: Fix PMU compatible strings
  mlx4: Use snprintf instead of complicated strcpy
  media: stkwebcam: Bugfix for wrong return values
  tty: Don't block on IO when ldisc change is pending
  nfsd: Return EPERM, not EACCES, in some SETATTR cases
  MIPS: OCTEON: cvmx_pko_mem_debug8: use oldest forward compatible definition
  powerpc/math-emu: Update macros from GCC
  net/mlx4_core: Fix return codes of unsupported operations
  dlm: fix invalid cluster name warning
  ARM: dts: realview: Fix some more duplicate regulator nodes
  clk: sunxi-ng: h3/h5: Fix CSI_MCLK parent
  ARM: dts: pxa: clean up USB controller nodes
  mtd: fix mtd_oobavail() incoherent returned value
  kbuild: fix single target build for external module
  modpost: skip ELF local symbols during section mismatch check
  tcp: fix SNMP TCP timeout under-estimation
  tcp: fix off-by-one bug on aborting window-probing socket
  ARM: dts: realview-pbx: Fix duplicate regulator nodes
  ARM: dts: mmp2: fix the gpio interrupt cell number
  net/x25: fix null_x25_address handling
  net/x25: fix called/calling length calculation in x25_parse_address_block
  ARM: OMAP1/2: fix SoC name printing
  nfsd: fix a warning in __cld_pipe_upcall()
  ARM: debug: enable UART1 for socfpga Cyclone5
  dlm: NULL check before kmem_cache_destroy is not needed
  i2c: imx: don't print error message on probe defer
  serial: imx: fix error handling in console_setup
  altera-stapl: check for a null key before strcasecmp'ing it
  dma-mapping: fix return type of dma_set_max_seg_size()
  ACPI: fix acpi_find_child_device() invocation in acpi_preset_companion()
  usb: dwc3: don't log probe deferrals; but do log other error codes
  dmaengine: coh901318: Remove unused variable
  dmaengine: coh901318: Fix a double-lock bug
  media: pulse8-cec: return 0 when invalidating the logical address
  ARM: dts: exynos: Use Samsung SoC specific compatible for DWC2 module
  rtc: dt-binding: abx80x: fix resistance scale
  rtc: max8997: Fix the returned value in case of error in 'max8997_rtc_read_alarm()'
  math-emu/soft-fp.h: (_FP_ROUND_ZERO) cast 0 to void to fix warning
  MIPS: OCTEON: octeon-platform: fix typing
  regulator: Fix return value of _set_load() stub
  Staging: iio: adt7316: Fix i2c data reading, set the data field
  pinctrl: qcom: ssbi-gpio: fix gpio-hog related boot issues
  scsi: zfcp: drop default switch case which might paper over missing case
  MIPS: SiByte: Enable ZONE_DMA32 for LittleSur
  dlm: fix missing idr_destroy for recover_idr
  ARM: dts: rockchip: Fix rk3288-rock2 vcc_flash name
  clk: rockchip: fix rk3188 sclk_mac_lbtest parameter ordering
  clk: rockchip: fix rk3188 sclk_smc gate data
  extcon: max8997: Fix lack of path setting in USB device mode
  net/mlx5: Release resource on error flow
  ARM: 8813/1: Make aligned 2-byte getuser()/putuser() atomic on ARMv6+
  iwlwifi: mvm: Send non offchannel traffic via AP sta
  cxgb4vf: fix memleak in mac_hlist initialization
  serial: core: Allow processing sysrq at port unlock time
  net: ep93xx_eth: fix mismatch of request_mem_region in remove
  rsxx: add missed destroy_workqueue calls in remove
  ALSA: pcm: Fix stream lock usage in snd_pcm_period_elapsed()
  Input: cyttsp4_core - fix use after free bug
  NFC: nxp-nci: Fix NULL pointer dereference after I2C communication error
  audit_get_nd(): don't unlock parent too early
  exportfs_decode_fh(): negative pinned may become positive without the parent locked
  RDMA/hns: Correct the value of HNS_ROCE_HEM_CHUNK_LEN
  autofs: fix a leak in autofs_expire_indirect()
  serial: ifx6x60: add missed pm_runtime_disable
  serial: serial_core: Perform NULL checks for break_ctl ops
  serial: pl011: Fix DMA ->flush_buffer()
  tty: serial: msm_serial: Fix flow control
  tty: serial: fsl_lpuart: use the sg count from dma_map_sg
  usb: gadget: u_serial: add missing port entry locking
  arm64: tegra: Fix 'active-low' warning for Jetson TX1 regulator
  UPSTREAM: binder: fix incorrect calculation for num_valid
  ANDROID: sched/core: Fix arm32 allmodconfig build-break
  BACKPORT: bpf: permit multiple bpf attachments for a single perf event
  UPSTREAM: bpf: use the same condition in perf event set/free bpf handler
  BACKPORT: bpf: multi program support for cgroup+bpf

 Conflicts:
	drivers/usb/dwc3/core.c
	drivers/usb/host/xhci.c
	include/trace/events/sched.h

Change-Id: Ida9ef3977cd175731c312341385819f5812f707c
Signed-off-by: jianzhou <jianzhou@codeaurora.org>
2020-03-12 15:39:56 +08:00

751 lines
20 KiB
C

/* Copyright (c) 2011-2015 PLUMgrid, http://plumgrid.com
* Copyright (c) 2016 Facebook
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of version 2 of the GNU General Public
* License as published by the Free Software Foundation.
*/
#include <linux/kernel.h>
#include <linux/types.h>
#include <linux/slab.h>
#include <linux/bpf.h>
#include <linux/bpf_perf_event.h>
#include <linux/filter.h>
#include <linux/uaccess.h>
#include <linux/ctype.h>
#include "trace.h"
/**
* trace_call_bpf - invoke BPF program
* @call: tracepoint event
* @ctx: opaque context pointer
*
* kprobe handlers execute BPF programs via this helper.
* Can be used from static tracepoints in the future.
*
* Return: BPF programs always return an integer which is interpreted by
* kprobe handler as:
* 0 - return from kprobe (event is filtered out)
* 1 - store kprobe event into ring buffer
* Other values are reserved and currently alias to 1
*/
unsigned int trace_call_bpf(struct trace_event_call *call, void *ctx)
{
unsigned int ret;
if (in_nmi()) /* not supported yet */
return 1;
preempt_disable();
if (unlikely(__this_cpu_inc_return(bpf_prog_active) != 1)) {
/*
* since some bpf program is already running on this cpu,
* don't call into another bpf program (same or different)
* and don't send kprobe event into ring-buffer,
* so return zero here
*/
ret = 0;
goto out;
}
/*
* Instead of moving rcu_read_lock/rcu_dereference/rcu_read_unlock
* to all call sites, we did a bpf_prog_array_valid() there to check
* whether call->prog_array is empty or not, which is
* a heurisitc to speed up execution.
*
* If bpf_prog_array_valid() fetched prog_array was
* non-NULL, we go into trace_call_bpf() and do the actual
* proper rcu_dereference() under RCU lock.
* If it turns out that prog_array is NULL then, we bail out.
* For the opposite, if the bpf_prog_array_valid() fetched pointer
* was NULL, you'll skip the prog_array with the risk of missing
* out of events when it was updated in between this and the
* rcu_dereference() which is accepted risk.
*/
ret = BPF_PROG_RUN_ARRAY_CHECK(call->prog_array, ctx, BPF_PROG_RUN);
out:
__this_cpu_dec(bpf_prog_active);
preempt_enable();
return ret;
}
EXPORT_SYMBOL_GPL(trace_call_bpf);
BPF_CALL_3(bpf_probe_read, void *, dst, u32, size, const void *, unsafe_ptr)
{
int ret;
ret = probe_kernel_read(dst, unsafe_ptr, size);
if (unlikely(ret < 0))
memset(dst, 0, size);
return ret;
}
static const struct bpf_func_proto bpf_probe_read_proto = {
.func = bpf_probe_read,
.gpl_only = true,
.ret_type = RET_INTEGER,
.arg1_type = ARG_PTR_TO_RAW_STACK,
.arg2_type = ARG_CONST_STACK_SIZE,
.arg3_type = ARG_ANYTHING,
};
BPF_CALL_3(bpf_probe_read_str, void *, dst, u32, size, const void *, unsafe_ptr)
{
int ret;
/*
* The strncpy_from_unsafe() call will likely not fill the entire
* buffer, but that's okay in this circumstance as we're probing
* arbitrary memory anyway similar to bpf_probe_read() and might
* as well probe the stack. Thus, memory is explicitly cleared
* only in error case, so that improper users ignoring return
* code altogether don't copy garbage; otherwise length of string
* is returned that can be used for bpf_perf_event_output() et al.
*/
ret = strncpy_from_unsafe(dst, unsafe_ptr, size);
if (unlikely(ret < 0))
memset(dst, 0, size);
return ret;
}
static const struct bpf_func_proto bpf_probe_read_str_proto = {
.func = bpf_probe_read_str,
.gpl_only = true,
.ret_type = RET_INTEGER,
.arg1_type = ARG_PTR_TO_RAW_STACK,
.arg2_type = ARG_CONST_STACK_SIZE,
.arg3_type = ARG_ANYTHING,
};
BPF_CALL_3(bpf_probe_write_user, void *, unsafe_ptr, const void *, src,
u32, size)
{
/*
* Ensure we're in user context which is safe for the helper to
* run. This helper has no business in a kthread.
*
* access_ok() should prevent writing to non-user memory, but in
* some situations (nommu, temporary switch, etc) access_ok() does
* not provide enough validation, hence the check on KERNEL_DS.
*/
if (unlikely(in_interrupt() ||
current->flags & (PF_KTHREAD | PF_EXITING)))
return -EPERM;
if (unlikely(segment_eq(get_fs(), KERNEL_DS)))
return -EPERM;
if (!access_ok(VERIFY_WRITE, unsafe_ptr, size))
return -EPERM;
return probe_kernel_write(unsafe_ptr, src, size);
}
static const struct bpf_func_proto bpf_probe_write_user_proto = {
.func = bpf_probe_write_user,
.gpl_only = true,
.ret_type = RET_INTEGER,
.arg1_type = ARG_ANYTHING,
.arg2_type = ARG_PTR_TO_STACK,
.arg3_type = ARG_CONST_STACK_SIZE,
};
static const struct bpf_func_proto *bpf_get_probe_write_proto(void)
{
pr_warn_ratelimited("%s[%d] is installing a program with bpf_probe_write_user helper that may corrupt user memory!",
current->comm, task_pid_nr(current));
return &bpf_probe_write_user_proto;
}
/*
* limited trace_printk()
* only %d %u %x %ld %lu %lx %lld %llu %llx %p %s conversion specifiers allowed
*/
BPF_CALL_5(bpf_trace_printk, char *, fmt, u32, fmt_size, u64, arg1,
u64, arg2, u64, arg3)
{
bool str_seen = false;
int mod[3] = {};
int fmt_cnt = 0;
u64 unsafe_addr;
char buf[64];
int i;
/*
* bpf_check()->check_func_arg()->check_stack_boundary()
* guarantees that fmt points to bpf program stack,
* fmt_size bytes of it were initialized and fmt_size > 0
*/
if (fmt[--fmt_size] != 0)
return -EINVAL;
/* check format string for allowed specifiers */
for (i = 0; i < fmt_size; i++) {
if ((!isprint(fmt[i]) && !isspace(fmt[i])) || !isascii(fmt[i]))
return -EINVAL;
if (fmt[i] != '%')
continue;
if (fmt_cnt >= 3)
return -EINVAL;
/* fmt[i] != 0 && fmt[last] == 0, so we can access fmt[i + 1] */
i++;
if (fmt[i] == 'l') {
mod[fmt_cnt]++;
i++;
} else if (fmt[i] == 'p' || fmt[i] == 's') {
mod[fmt_cnt]++;
/* disallow any further format extensions */
if (fmt[i + 1] != 0 &&
!isspace(fmt[i + 1]) &&
!ispunct(fmt[i + 1]))
return -EINVAL;
fmt_cnt++;
if (fmt[i] == 's') {
if (str_seen)
/* allow only one '%s' per fmt string */
return -EINVAL;
str_seen = true;
switch (fmt_cnt) {
case 1:
unsafe_addr = arg1;
arg1 = (long) buf;
break;
case 2:
unsafe_addr = arg2;
arg2 = (long) buf;
break;
case 3:
unsafe_addr = arg3;
arg3 = (long) buf;
break;
}
buf[0] = 0;
strncpy_from_unsafe(buf,
(void *) (long) unsafe_addr,
sizeof(buf));
}
continue;
}
if (fmt[i] == 'l') {
mod[fmt_cnt]++;
i++;
}
if (fmt[i] != 'd' && fmt[i] != 'u' && fmt[i] != 'x')
return -EINVAL;
fmt_cnt++;
}
/* Horrid workaround for getting va_list handling working with different
* argument type combinations generically for 32 and 64 bit archs.
*/
#define __BPF_TP_EMIT() __BPF_ARG3_TP()
#define __BPF_TP(...) \
__trace_printk(1 /* Fake ip will not be printed. */, \
fmt, ##__VA_ARGS__)
#define __BPF_ARG1_TP(...) \
((mod[0] == 2 || (mod[0] == 1 && __BITS_PER_LONG == 64)) \
? __BPF_TP(arg1, ##__VA_ARGS__) \
: ((mod[0] == 1 || (mod[0] == 0 && __BITS_PER_LONG == 32)) \
? __BPF_TP((long)arg1, ##__VA_ARGS__) \
: __BPF_TP((u32)arg1, ##__VA_ARGS__)))
#define __BPF_ARG2_TP(...) \
((mod[1] == 2 || (mod[1] == 1 && __BITS_PER_LONG == 64)) \
? __BPF_ARG1_TP(arg2, ##__VA_ARGS__) \
: ((mod[1] == 1 || (mod[1] == 0 && __BITS_PER_LONG == 32)) \
? __BPF_ARG1_TP((long)arg2, ##__VA_ARGS__) \
: __BPF_ARG1_TP((u32)arg2, ##__VA_ARGS__)))
#define __BPF_ARG3_TP(...) \
((mod[2] == 2 || (mod[2] == 1 && __BITS_PER_LONG == 64)) \
? __BPF_ARG2_TP(arg3, ##__VA_ARGS__) \
: ((mod[2] == 1 || (mod[2] == 0 && __BITS_PER_LONG == 32)) \
? __BPF_ARG2_TP((long)arg3, ##__VA_ARGS__) \
: __BPF_ARG2_TP((u32)arg3, ##__VA_ARGS__)))
return __BPF_TP_EMIT();
}
static const struct bpf_func_proto bpf_trace_printk_proto = {
.func = bpf_trace_printk,
.gpl_only = true,
.ret_type = RET_INTEGER,
.arg1_type = ARG_PTR_TO_STACK,
.arg2_type = ARG_CONST_STACK_SIZE,
};
const struct bpf_func_proto *bpf_get_trace_printk_proto(void)
{
/*
* this program might be calling bpf_trace_printk,
* so allocate per-cpu printk buffers
*/
trace_printk_init_buffers();
return &bpf_trace_printk_proto;
}
BPF_CALL_2(bpf_perf_event_read, struct bpf_map *, map, u64, flags)
{
struct bpf_array *array = container_of(map, struct bpf_array, map);
unsigned int cpu = smp_processor_id();
u64 index = flags & BPF_F_INDEX_MASK;
struct bpf_event_entry *ee;
struct perf_event *event;
if (unlikely(flags & ~(BPF_F_INDEX_MASK)))
return -EINVAL;
if (index == BPF_F_CURRENT_CPU)
index = cpu;
if (unlikely(index >= array->map.max_entries))
return -E2BIG;
ee = READ_ONCE(array->ptrs[index]);
if (!ee)
return -ENOENT;
event = ee->event;
if (unlikely(event->attr.type != PERF_TYPE_HARDWARE &&
event->attr.type != PERF_TYPE_RAW))
return -EINVAL;
/* make sure event is local and doesn't have pmu::count */
if (unlikely(event->oncpu != cpu || event->pmu->count))
return -EINVAL;
/*
* we don't know if the function is run successfully by the
* return value. It can be judged in other places, such as
* eBPF programs.
*/
return perf_event_read_local(event);
}
static const struct bpf_func_proto bpf_perf_event_read_proto = {
.func = bpf_perf_event_read,
.gpl_only = true,
.ret_type = RET_INTEGER,
.arg1_type = ARG_CONST_MAP_PTR,
.arg2_type = ARG_ANYTHING,
};
static __always_inline u64
__bpf_perf_event_output(struct pt_regs *regs, struct bpf_map *map,
u64 flags, struct perf_raw_record *raw)
{
struct bpf_array *array = container_of(map, struct bpf_array, map);
unsigned int cpu = smp_processor_id();
u64 index = flags & BPF_F_INDEX_MASK;
struct perf_sample_data sample_data;
struct bpf_event_entry *ee;
struct perf_event *event;
if (index == BPF_F_CURRENT_CPU)
index = cpu;
if (unlikely(index >= array->map.max_entries))
return -E2BIG;
ee = READ_ONCE(array->ptrs[index]);
if (!ee)
return -ENOENT;
event = ee->event;
if (unlikely(event->attr.type != PERF_TYPE_SOFTWARE ||
event->attr.config != PERF_COUNT_SW_BPF_OUTPUT))
return -EINVAL;
if (unlikely(event->oncpu != cpu))
return -EOPNOTSUPP;
perf_sample_data_init(&sample_data, 0, 0);
sample_data.raw = raw;
perf_event_output(event, &sample_data, regs);
return 0;
}
BPF_CALL_5(bpf_perf_event_output, struct pt_regs *, regs, struct bpf_map *, map,
u64, flags, void *, data, u64, size)
{
struct perf_raw_record raw = {
.frag = {
.size = size,
.data = data,
},
};
if (unlikely(flags & ~(BPF_F_INDEX_MASK)))
return -EINVAL;
return __bpf_perf_event_output(regs, map, flags, &raw);
}
static const struct bpf_func_proto bpf_perf_event_output_proto = {
.func = bpf_perf_event_output,
.gpl_only = true,
.ret_type = RET_INTEGER,
.arg1_type = ARG_PTR_TO_CTX,
.arg2_type = ARG_CONST_MAP_PTR,
.arg3_type = ARG_ANYTHING,
.arg4_type = ARG_PTR_TO_STACK,
.arg5_type = ARG_CONST_STACK_SIZE,
};
static DEFINE_PER_CPU(struct pt_regs, bpf_pt_regs);
u64 bpf_event_output(struct bpf_map *map, u64 flags, void *meta, u64 meta_size,
void *ctx, u64 ctx_size, bpf_ctx_copy_t ctx_copy)
{
struct pt_regs *regs = this_cpu_ptr(&bpf_pt_regs);
struct perf_raw_frag frag = {
.copy = ctx_copy,
.size = ctx_size,
.data = ctx,
};
struct perf_raw_record raw = {
.frag = {
{
.next = ctx_size ? &frag : NULL,
},
.size = meta_size,
.data = meta,
},
};
perf_fetch_caller_regs(regs);
return __bpf_perf_event_output(regs, map, flags, &raw);
}
BPF_CALL_0(bpf_get_current_task)
{
return (long) current;
}
static const struct bpf_func_proto bpf_get_current_task_proto = {
.func = bpf_get_current_task,
.gpl_only = true,
.ret_type = RET_INTEGER,
};
BPF_CALL_2(bpf_current_task_under_cgroup, struct bpf_map *, map, u32, idx)
{
struct bpf_array *array = container_of(map, struct bpf_array, map);
struct cgroup *cgrp;
if (unlikely(in_interrupt()))
return -EINVAL;
if (unlikely(idx >= array->map.max_entries))
return -E2BIG;
cgrp = READ_ONCE(array->ptrs[idx]);
if (unlikely(!cgrp))
return -EAGAIN;
return task_under_cgroup_hierarchy(current, cgrp);
}
static const struct bpf_func_proto bpf_current_task_under_cgroup_proto = {
.func = bpf_current_task_under_cgroup,
.gpl_only = false,
.ret_type = RET_INTEGER,
.arg1_type = ARG_CONST_MAP_PTR,
.arg2_type = ARG_ANYTHING,
};
static const struct bpf_func_proto *tracing_func_proto(enum bpf_func_id func_id)
{
switch (func_id) {
case BPF_FUNC_map_lookup_elem:
return &bpf_map_lookup_elem_proto;
case BPF_FUNC_map_update_elem:
return &bpf_map_update_elem_proto;
case BPF_FUNC_map_delete_elem:
return &bpf_map_delete_elem_proto;
case BPF_FUNC_probe_read:
return &bpf_probe_read_proto;
case BPF_FUNC_probe_read_str:
return &bpf_probe_read_str_proto;
case BPF_FUNC_ktime_get_ns:
return &bpf_ktime_get_ns_proto;
case BPF_FUNC_tail_call:
return &bpf_tail_call_proto;
case BPF_FUNC_get_current_pid_tgid:
return &bpf_get_current_pid_tgid_proto;
case BPF_FUNC_get_current_task:
return &bpf_get_current_task_proto;
case BPF_FUNC_get_current_uid_gid:
return &bpf_get_current_uid_gid_proto;
case BPF_FUNC_get_current_comm:
return &bpf_get_current_comm_proto;
case BPF_FUNC_trace_printk:
return bpf_get_trace_printk_proto();
case BPF_FUNC_get_smp_processor_id:
return &bpf_get_smp_processor_id_proto;
case BPF_FUNC_perf_event_read:
return &bpf_perf_event_read_proto;
case BPF_FUNC_probe_write_user:
return bpf_get_probe_write_proto();
case BPF_FUNC_current_task_under_cgroup:
return &bpf_current_task_under_cgroup_proto;
case BPF_FUNC_get_prandom_u32:
return &bpf_get_prandom_u32_proto;
default:
return NULL;
}
}
static const struct bpf_func_proto *kprobe_prog_func_proto(enum bpf_func_id func_id)
{
switch (func_id) {
case BPF_FUNC_perf_event_output:
return &bpf_perf_event_output_proto;
case BPF_FUNC_get_stackid:
return &bpf_get_stackid_proto;
default:
return tracing_func_proto(func_id);
}
}
/* bpf+kprobe programs can access fields of 'struct pt_regs' */
static bool kprobe_prog_is_valid_access(int off, int size, enum bpf_access_type type,
enum bpf_reg_type *reg_type)
{
if (off < 0 || off >= sizeof(struct pt_regs))
return false;
if (type != BPF_READ)
return false;
if (off % size != 0)
return false;
return true;
}
static const struct bpf_verifier_ops kprobe_prog_ops = {
.get_func_proto = kprobe_prog_func_proto,
.is_valid_access = kprobe_prog_is_valid_access,
};
static struct bpf_prog_type_list kprobe_tl = {
.ops = &kprobe_prog_ops,
.type = BPF_PROG_TYPE_KPROBE,
};
BPF_CALL_5(bpf_perf_event_output_tp, void *, tp_buff, struct bpf_map *, map,
u64, flags, void *, data, u64, size)
{
struct pt_regs *regs = *(struct pt_regs **)tp_buff;
/*
* r1 points to perf tracepoint buffer where first 8 bytes are hidden
* from bpf program and contain a pointer to 'struct pt_regs'. Fetch it
* from there and call the same bpf_perf_event_output() helper inline.
*/
return ____bpf_perf_event_output(regs, map, flags, data, size);
}
static const struct bpf_func_proto bpf_perf_event_output_proto_tp = {
.func = bpf_perf_event_output_tp,
.gpl_only = true,
.ret_type = RET_INTEGER,
.arg1_type = ARG_PTR_TO_CTX,
.arg2_type = ARG_CONST_MAP_PTR,
.arg3_type = ARG_ANYTHING,
.arg4_type = ARG_PTR_TO_STACK,
.arg5_type = ARG_CONST_STACK_SIZE,
};
BPF_CALL_3(bpf_get_stackid_tp, void *, tp_buff, struct bpf_map *, map,
u64, flags)
{
struct pt_regs *regs = *(struct pt_regs **)tp_buff;
/*
* Same comment as in bpf_perf_event_output_tp(), only that this time
* the other helper's function body cannot be inlined due to being
* external, thus we need to call raw helper function.
*/
return bpf_get_stackid((unsigned long) regs, (unsigned long) map,
flags, 0, 0);
}
static const struct bpf_func_proto bpf_get_stackid_proto_tp = {
.func = bpf_get_stackid_tp,
.gpl_only = true,
.ret_type = RET_INTEGER,
.arg1_type = ARG_PTR_TO_CTX,
.arg2_type = ARG_CONST_MAP_PTR,
.arg3_type = ARG_ANYTHING,
};
static const struct bpf_func_proto *tp_prog_func_proto(enum bpf_func_id func_id)
{
switch (func_id) {
case BPF_FUNC_perf_event_output:
return &bpf_perf_event_output_proto_tp;
case BPF_FUNC_get_stackid:
return &bpf_get_stackid_proto_tp;
default:
return tracing_func_proto(func_id);
}
}
static bool tp_prog_is_valid_access(int off, int size, enum bpf_access_type type,
enum bpf_reg_type *reg_type)
{
if (off < sizeof(void *) || off >= PERF_MAX_TRACE_SIZE)
return false;
if (type != BPF_READ)
return false;
if (off % size != 0)
return false;
return true;
}
static const struct bpf_verifier_ops tracepoint_prog_ops = {
.get_func_proto = tp_prog_func_proto,
.is_valid_access = tp_prog_is_valid_access,
};
static struct bpf_prog_type_list tracepoint_tl = {
.ops = &tracepoint_prog_ops,
.type = BPF_PROG_TYPE_TRACEPOINT,
};
static bool pe_prog_is_valid_access(int off, int size, enum bpf_access_type type,
enum bpf_reg_type *reg_type)
{
if (off < 0 || off >= sizeof(struct bpf_perf_event_data))
return false;
if (type != BPF_READ)
return false;
if (off % size != 0)
return false;
if (off == offsetof(struct bpf_perf_event_data, sample_period)) {
if (size != sizeof(u64))
return false;
} else {
if (size != sizeof(long))
return false;
}
return true;
}
static u32 pe_prog_convert_ctx_access(enum bpf_access_type type, int dst_reg,
int src_reg, int ctx_off,
struct bpf_insn *insn_buf,
struct bpf_prog *prog)
{
struct bpf_insn *insn = insn_buf;
switch (ctx_off) {
case offsetof(struct bpf_perf_event_data, sample_period):
BUILD_BUG_ON(FIELD_SIZEOF(struct perf_sample_data, period) != sizeof(u64));
*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct bpf_perf_event_data_kern,
data), dst_reg, src_reg,
offsetof(struct bpf_perf_event_data_kern, data));
*insn++ = BPF_LDX_MEM(BPF_DW, dst_reg, dst_reg,
offsetof(struct perf_sample_data, period));
break;
default:
*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct bpf_perf_event_data_kern,
regs), dst_reg, src_reg,
offsetof(struct bpf_perf_event_data_kern, regs));
*insn++ = BPF_LDX_MEM(BPF_SIZEOF(long), dst_reg, dst_reg, ctx_off);
break;
}
return insn - insn_buf;
}
static const struct bpf_verifier_ops perf_event_prog_ops = {
.get_func_proto = tp_prog_func_proto,
.is_valid_access = pe_prog_is_valid_access,
.convert_ctx_access = pe_prog_convert_ctx_access,
};
static DEFINE_MUTEX(bpf_event_mutex);
int perf_event_attach_bpf_prog(struct perf_event *event,
struct bpf_prog *prog)
{
struct bpf_prog_array __rcu *old_array;
struct bpf_prog_array *new_array;
int ret = -EEXIST;
mutex_lock(&bpf_event_mutex);
if (event->prog)
goto out;
old_array = rcu_dereference_protected(event->tp_event->prog_array,
lockdep_is_held(&bpf_event_mutex));
ret = bpf_prog_array_copy(old_array, NULL, prog, &new_array);
if (ret < 0)
goto out;
/* set the new array to event->tp_event and set event->prog */
event->prog = prog;
rcu_assign_pointer(event->tp_event->prog_array, new_array);
bpf_prog_array_free(old_array);
out:
mutex_unlock(&bpf_event_mutex);
return ret;
}
void perf_event_detach_bpf_prog(struct perf_event *event)
{
struct bpf_prog_array __rcu *old_array;
struct bpf_prog_array *new_array;
int ret;
mutex_lock(&bpf_event_mutex);
if (!event->prog)
goto out;
old_array = rcu_dereference_protected(event->tp_event->prog_array,
lockdep_is_held(&bpf_event_mutex));
ret = bpf_prog_array_copy(old_array, event->prog, NULL, &new_array);
if (ret < 0) {
bpf_prog_array_delete_safe(old_array, event->prog);
} else {
rcu_assign_pointer(event->tp_event->prog_array, new_array);
bpf_prog_array_free(old_array);
}
bpf_prog_put(event->prog);
event->prog = NULL;
out:
mutex_unlock(&bpf_event_mutex);
}
static struct bpf_prog_type_list perf_event_tl = {
.ops = &perf_event_prog_ops,
.type = BPF_PROG_TYPE_PERF_EVENT,
};
static int __init register_kprobe_prog_ops(void)
{
bpf_register_prog_type(&kprobe_tl);
bpf_register_prog_type(&tracepoint_tl);
bpf_register_prog_type(&perf_event_tl);
return 0;
}
late_initcall(register_kprobe_prog_ops);