Files
kernel_xiaomi_raphael/kernel/bpf/arraymap.c
Greg Kroah-Hartman 6c95b90db5 Merge 4.14.79 into android-4.14
Changes in 4.14.79
	xfrm: Validate address prefix lengths in the xfrm selector.
	xfrm6: call kfree_skb when skb is toobig
	xfrm: reset transport header back to network header after all input transforms ahave been applied
	xfrm: reset crypto_done when iterating over multiple input xfrms
	mac80211: Always report TX status
	cfg80211: reg: Init wiphy_idx in regulatory_hint_core()
	mac80211: fix pending queue hang due to TX_DROP
	cfg80211: Address some corner cases in scan result channel updating
	mac80211: TDLS: fix skb queue/priority assignment
	mac80211: fix TX status reporting for ieee80211s
	xfrm: Fix NULL pointer dereference when skb_dst_force clears the dst_entry.
	ARM: 8799/1: mm: fix pci_ioremap_io() offset check
	xfrm: validate template mode
	netfilter: bridge: Don't sabotage nf_hook calls from an l3mdev
	arm64: hugetlb: Fix handling of young ptes
	ARM: dts: BCM63xx: Fix incorrect interrupt specifiers
	net: macb: Clean 64b dma addresses if they are not detected
	soc: fsl: qbman: qman: avoid allocating from non existing gen_pool
	soc: fsl: qe: Fix copy/paste bug in ucc_get_tdm_sync_shift()
	nl80211: Fix possible Spectre-v1 for NL80211_TXRATE_HT
	mac80211_hwsim: do not omit multicast announce of first added radio
	Bluetooth: SMP: fix crash in unpairing
	pxa168fb: prepare the clock
	qed: Avoid implicit enum conversion in qed_set_tunn_cls_info
	qed: Fix mask parameter in qed_vf_prep_tunn_req_tlv
	qed: Avoid implicit enum conversion in qed_roce_mode_to_flavor
	qed: Avoid constant logical operation warning in qed_vf_pf_acquire
	qed: Avoid implicit enum conversion in qed_iwarp_parse_rx_pkt
	nl80211: Fix possible Spectre-v1 for CQM RSSI thresholds
	asix: Check for supported Wake-on-LAN modes
	ax88179_178a: Check for supported Wake-on-LAN modes
	lan78xx: Check for supported Wake-on-LAN modes
	sr9800: Check for supported Wake-on-LAN modes
	r8152: Check for supported Wake-on-LAN Modes
	smsc75xx: Check for Wake-on-LAN modes
	smsc95xx: Check for Wake-on-LAN modes
	cfg80211: fix use-after-free in reg_process_hint()
	perf/core: Fix perf_pmu_unregister() locking
	perf/ring_buffer: Prevent concurent ring buffer access
	perf/x86/intel/uncore: Fix PCI BDF address of M3UPI on SKX
	perf/x86/amd/uncore: Set ThreadMask and SliceMask for L3 Cache perf events
	net: fec: fix rare tx timeout
	declance: Fix continuation with the adapter identification message
	net: qualcomm: rmnet: Skip processing loopback packets
	locking/ww_mutex: Fix runtime warning in the WW mutex selftest
	be2net: don't flip hw_features when VXLANs are added/deleted
	net: cxgb3_main: fix a missing-check bug
	yam: fix a missing-check bug
	ocfs2: fix crash in ocfs2_duplicate_clusters_by_page()
	iwlwifi: mvm: check for short GI only for OFDM
	iwlwifi: dbg: allow wrt collection before ALIVE
	iwlwifi: fix the ALIVE notification layout
	tools/testing/nvdimm: unit test clear-error commands
	usbip: vhci_hcd: update 'status' file header and format
	scsi: aacraid: address UBSAN warning regression
	IB/ipoib: Fix lockdep issue found on ipoib_ib_dev_heavy_flush
	IB/rxe: put the pool on allocation failure
	s390/qeth: fix error handling in adapter command callbacks
	net/mlx5: Fix mlx5_get_vector_affinity function
	powerpc/pseries: Add empty update_numa_cpu_lookup_table() for NUMA=n
	dm integrity: fail early if required HMAC key is not available
	net: phy: realtek: Use the dummy stubs for MMD register access for rtl8211b
	net: phy: Add general dummy stubs for MMD register access
	net/mlx5e: Refine ets validation function
	scsi: qla2xxx: Avoid double completion of abort command
	kbuild: set no-integrated-as before incl. arch Makefile
	IB/mlx5: Avoid passing an invalid QP type to firmware
	ARM: tegra: Fix ULPI regression on Tegra20
	l2tp: remove configurable payload offset
	cifs: Use ULL suffix for 64-bit constant
	test_bpf: Fix testing with CONFIG_BPF_JIT_ALWAYS_ON=y on other arches
	KVM: x86: Update the exit_qualification access bits while walking an address
	sparc64: Fix regression in pmdp_invalidate().
	tpm: move the delay_msec increment after sleep in tpm_transmit()
	bpf: sockmap, map_release does not hold refcnt for pinned maps
	tpm: tpm_crb: relinquish locality on error path.
	xen-netfront: Update features after registering netdev
	xen-netfront: Fix mismatched rtnl_unlock
	IB/usnic: Update with bug fixes from core code
	mmc: dw_mmc-rockchip: correct property names in debug
	MIPS: Workaround GCC __builtin_unreachable reordering bug
	lan78xx: Don't reset the interface on open
	enic: do not overwrite error code
	iio: buffer: fix the function signature to match implementation
	selftests/powerpc: Add ptrace hw breakpoint test
	scsi: ibmvfc: Avoid unnecessary port relogin
	scsi: sd: Remember that READ CAPACITY(16) succeeded
	btrfs: quota: Set rescan progress to (u64)-1 if we hit last leaf
	net: phy: phylink: Don't release NULL GPIO
	x86/paravirt: Fix some warning messages
	net: stmmac: mark PM functions as __maybe_unused
	kconfig: fix the rule of mainmenu_stmt symbol
	libertas: call into generic suspend code before turning off power
	perf tests: Fix indexing when invoking subtests
	compiler.h: Allow arch-specific asm/compiler.h
	ARM: dts: imx53-qsb: disable 1.2GHz OPP
	perf python: Use -Wno-redundant-decls to build with PYTHON=python3
	rxrpc: Don't check RXRPC_CALL_TX_LAST after calling rxrpc_rotate_tx_window()
	rxrpc: Only take the rwind and mtu values from latest ACK
	rxrpc: Fix connection-level abort handling
	net: ena: fix warning in rmmod caused by double iounmap
	net: ena: fix NULL dereference due to untimely napi initialization
	selftests: rtnetlink.sh explicitly requires bash.
	fs/fat/fatent.c: add cond_resched() to fat_count_free_clusters()
	sch_netem: restore skb->dev after dequeuing from the rbtree
	mtd: spi-nor: Add support for is25wp series chips
	kvm: x86: fix WARN due to uninitialized guest FPU state
	ARM: dts: r8a7790: Correct critical CPU temperature
	media: uvcvideo: Fix driver reference counting
	ALSA: usx2y: Fix invalid stream URBs
	Revert "netfilter: ipv6: nf_defrag: drop skb dst before queueing"
	perf tools: Disable parallelism for 'make clean'
	drm/i915/gvt: fix memory leak of a cmd_entry struct on error exit path
	bridge: do not add port to router list when receives query with source 0.0.0.0
	net: bridge: remove ipv6 zero address check in mcast queries
	ipv6: mcast: fix a use-after-free in inet6_mc_check
	ipv6/ndisc: Preserve IPv6 control buffer if protocol error handlers are called
	llc: set SOCK_RCU_FREE in llc_sap_add_socket()
	net: fec: don't dump RX FIFO register when not available
	net/ipv6: Fix index counter for unicast addresses in in6_dump_addrs
	net: sched: gred: pass the right attribute to gred_change_table_def()
	net: socket: fix a missing-check bug
	net: stmmac: Fix stmmac_mdio_reset() when building stmmac as modules
	net: udp: fix handling of CHECKSUM_COMPLETE packets
	r8169: fix NAPI handling under high load
	sctp: fix race on sctp_id2asoc
	udp6: fix encap return code for resubmitting
	vhost: Fix Spectre V1 vulnerability
	virtio_net: avoid using netif_tx_disable() for serializing tx routine
	ethtool: fix a privilege escalation bug
	bonding: fix length of actor system
	ip6_tunnel: Fix encapsulation layout
	openvswitch: Fix push/pop ethernet validation
	net/mlx5: Take only bit 24-26 of wqe.pftype_wq for page fault type
	net: sched: Fix for duplicate class dump
	net: drop skb on failure in ip_check_defrag()
	net: fix pskb_trim_rcsum_slow() with odd trim offset
	net/mlx5e: fix csum adjustments caused by RXFCS
	rtnetlink: Disallow FDB configuration for non-Ethernet device
	net: ipmr: fix unresolved entry dumps
	net: bcmgenet: Poll internal PHY for GENETv5
	net/sched: cls_api: add missing validation of netlink attributes
	net/mlx5: Fix build break when CONFIG_SMP=n
	Linux 4.14.79

Signed-off-by: Greg Kroah-Hartman <gregkh@google.com>
2018-11-08 07:43:01 -08:00

694 lines
18 KiB
C

/* Copyright (c) 2011-2014 PLUMgrid, http://plumgrid.com
* Copyright (c) 2016,2017 Facebook
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of version 2 of the GNU General Public
* License as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*/
#include <linux/bpf.h>
#include <linux/err.h>
#include <linux/slab.h>
#include <linux/mm.h>
#include <linux/filter.h>
#include <linux/perf_event.h>
#include "map_in_map.h"
#define ARRAY_CREATE_FLAG_MASK \
(BPF_F_NUMA_NODE | BPF_F_RDONLY | BPF_F_WRONLY)
static void bpf_array_free_percpu(struct bpf_array *array)
{
int i;
for (i = 0; i < array->map.max_entries; i++) {
free_percpu(array->pptrs[i]);
cond_resched();
}
}
static int bpf_array_alloc_percpu(struct bpf_array *array)
{
void __percpu *ptr;
int i;
for (i = 0; i < array->map.max_entries; i++) {
ptr = __alloc_percpu_gfp(array->elem_size, 8,
GFP_USER | __GFP_NOWARN);
if (!ptr) {
bpf_array_free_percpu(array);
return -ENOMEM;
}
array->pptrs[i] = ptr;
cond_resched();
}
return 0;
}
/* Called from syscall */
static struct bpf_map *array_map_alloc(union bpf_attr *attr)
{
bool percpu = attr->map_type == BPF_MAP_TYPE_PERCPU_ARRAY;
int ret, numa_node = bpf_map_attr_numa_node(attr);
u32 elem_size, index_mask, max_entries;
bool unpriv = !capable(CAP_SYS_ADMIN);
u64 cost, array_size, mask64;
struct bpf_array *array;
/* check sanity of attributes */
if (attr->max_entries == 0 || attr->key_size != 4 ||
attr->value_size == 0 ||
attr->map_flags & ~ARRAY_CREATE_FLAG_MASK ||
(percpu && numa_node != NUMA_NO_NODE))
return ERR_PTR(-EINVAL);
if (attr->value_size > KMALLOC_MAX_SIZE)
/* if value_size is bigger, the user space won't be able to
* access the elements.
*/
return ERR_PTR(-E2BIG);
elem_size = round_up(attr->value_size, 8);
max_entries = attr->max_entries;
/* On 32 bit archs roundup_pow_of_two() with max_entries that has
* upper most bit set in u32 space is undefined behavior due to
* resulting 1U << 32, so do it manually here in u64 space.
*/
mask64 = fls_long(max_entries - 1);
mask64 = 1ULL << mask64;
mask64 -= 1;
index_mask = mask64;
if (unpriv) {
/* round up array size to nearest power of 2,
* since cpu will speculate within index_mask limits
*/
max_entries = index_mask + 1;
/* Check for overflows. */
if (max_entries < attr->max_entries)
return ERR_PTR(-E2BIG);
}
array_size = sizeof(*array);
if (percpu)
array_size += (u64) max_entries * sizeof(void *);
else
array_size += (u64) max_entries * elem_size;
/* make sure there is no u32 overflow later in round_up() */
cost = array_size;
if (cost >= U32_MAX - PAGE_SIZE)
return ERR_PTR(-ENOMEM);
if (percpu) {
cost += (u64)attr->max_entries * elem_size * num_possible_cpus();
if (cost >= U32_MAX - PAGE_SIZE)
return ERR_PTR(-ENOMEM);
}
cost = round_up(cost, PAGE_SIZE) >> PAGE_SHIFT;
ret = bpf_map_precharge_memlock(cost);
if (ret < 0)
return ERR_PTR(ret);
/* allocate all map elements and zero-initialize them */
array = bpf_map_area_alloc(array_size, numa_node);
if (!array)
return ERR_PTR(-ENOMEM);
array->index_mask = index_mask;
array->map.unpriv_array = unpriv;
/* copy mandatory map attributes */
array->map.map_type = attr->map_type;
array->map.key_size = attr->key_size;
array->map.value_size = attr->value_size;
array->map.max_entries = attr->max_entries;
array->map.map_flags = attr->map_flags;
array->map.numa_node = numa_node;
array->map.pages = cost;
array->elem_size = elem_size;
if (percpu && bpf_array_alloc_percpu(array)) {
bpf_map_area_free(array);
return ERR_PTR(-ENOMEM);
}
return &array->map;
}
/* Called from syscall or from eBPF program */
static void *array_map_lookup_elem(struct bpf_map *map, void *key)
{
struct bpf_array *array = container_of(map, struct bpf_array, map);
u32 index = *(u32 *)key;
if (unlikely(index >= array->map.max_entries))
return NULL;
return array->value + array->elem_size * (index & array->index_mask);
}
/* emit BPF instructions equivalent to C code of array_map_lookup_elem() */
static u32 array_map_gen_lookup(struct bpf_map *map, struct bpf_insn *insn_buf)
{
struct bpf_array *array = container_of(map, struct bpf_array, map);
struct bpf_insn *insn = insn_buf;
u32 elem_size = round_up(map->value_size, 8);
const int ret = BPF_REG_0;
const int map_ptr = BPF_REG_1;
const int index = BPF_REG_2;
*insn++ = BPF_ALU64_IMM(BPF_ADD, map_ptr, offsetof(struct bpf_array, value));
*insn++ = BPF_LDX_MEM(BPF_W, ret, index, 0);
if (map->unpriv_array) {
*insn++ = BPF_JMP_IMM(BPF_JGE, ret, map->max_entries, 4);
*insn++ = BPF_ALU32_IMM(BPF_AND, ret, array->index_mask);
} else {
*insn++ = BPF_JMP_IMM(BPF_JGE, ret, map->max_entries, 3);
}
if (is_power_of_2(elem_size)) {
*insn++ = BPF_ALU64_IMM(BPF_LSH, ret, ilog2(elem_size));
} else {
*insn++ = BPF_ALU64_IMM(BPF_MUL, ret, elem_size);
}
*insn++ = BPF_ALU64_REG(BPF_ADD, ret, map_ptr);
*insn++ = BPF_JMP_IMM(BPF_JA, 0, 0, 1);
*insn++ = BPF_MOV64_IMM(ret, 0);
return insn - insn_buf;
}
/* Called from eBPF program */
static void *percpu_array_map_lookup_elem(struct bpf_map *map, void *key)
{
struct bpf_array *array = container_of(map, struct bpf_array, map);
u32 index = *(u32 *)key;
if (unlikely(index >= array->map.max_entries))
return NULL;
return this_cpu_ptr(array->pptrs[index & array->index_mask]);
}
int bpf_percpu_array_copy(struct bpf_map *map, void *key, void *value)
{
struct bpf_array *array = container_of(map, struct bpf_array, map);
u32 index = *(u32 *)key;
void __percpu *pptr;
int cpu, off = 0;
u32 size;
if (unlikely(index >= array->map.max_entries))
return -ENOENT;
/* per_cpu areas are zero-filled and bpf programs can only
* access 'value_size' of them, so copying rounded areas
* will not leak any kernel data
*/
size = round_up(map->value_size, 8);
rcu_read_lock();
pptr = array->pptrs[index & array->index_mask];
for_each_possible_cpu(cpu) {
bpf_long_memcpy(value + off, per_cpu_ptr(pptr, cpu), size);
off += size;
}
rcu_read_unlock();
return 0;
}
/* Called from syscall */
static int array_map_get_next_key(struct bpf_map *map, void *key, void *next_key)
{
struct bpf_array *array = container_of(map, struct bpf_array, map);
u32 index = key ? *(u32 *)key : U32_MAX;
u32 *next = (u32 *)next_key;
if (index >= array->map.max_entries) {
*next = 0;
return 0;
}
if (index == array->map.max_entries - 1)
return -ENOENT;
*next = index + 1;
return 0;
}
/* Called from syscall or from eBPF program */
static int array_map_update_elem(struct bpf_map *map, void *key, void *value,
u64 map_flags)
{
struct bpf_array *array = container_of(map, struct bpf_array, map);
u32 index = *(u32 *)key;
if (unlikely(map_flags > BPF_EXIST))
/* unknown flags */
return -EINVAL;
if (unlikely(index >= array->map.max_entries))
/* all elements were pre-allocated, cannot insert a new one */
return -E2BIG;
if (unlikely(map_flags == BPF_NOEXIST))
/* all elements already exist */
return -EEXIST;
if (array->map.map_type == BPF_MAP_TYPE_PERCPU_ARRAY)
memcpy(this_cpu_ptr(array->pptrs[index & array->index_mask]),
value, map->value_size);
else
memcpy(array->value +
array->elem_size * (index & array->index_mask),
value, map->value_size);
return 0;
}
int bpf_percpu_array_update(struct bpf_map *map, void *key, void *value,
u64 map_flags)
{
struct bpf_array *array = container_of(map, struct bpf_array, map);
u32 index = *(u32 *)key;
void __percpu *pptr;
int cpu, off = 0;
u32 size;
if (unlikely(map_flags > BPF_EXIST))
/* unknown flags */
return -EINVAL;
if (unlikely(index >= array->map.max_entries))
/* all elements were pre-allocated, cannot insert a new one */
return -E2BIG;
if (unlikely(map_flags == BPF_NOEXIST))
/* all elements already exist */
return -EEXIST;
/* the user space will provide round_up(value_size, 8) bytes that
* will be copied into per-cpu area. bpf programs can only access
* value_size of it. During lookup the same extra bytes will be
* returned or zeros which were zero-filled by percpu_alloc,
* so no kernel data leaks possible
*/
size = round_up(map->value_size, 8);
rcu_read_lock();
pptr = array->pptrs[index & array->index_mask];
for_each_possible_cpu(cpu) {
bpf_long_memcpy(per_cpu_ptr(pptr, cpu), value + off, size);
off += size;
}
rcu_read_unlock();
return 0;
}
/* Called from syscall or from eBPF program */
static int array_map_delete_elem(struct bpf_map *map, void *key)
{
return -EINVAL;
}
/* Called when map->refcnt goes to zero, either from workqueue or from syscall */
static void array_map_free(struct bpf_map *map)
{
struct bpf_array *array = container_of(map, struct bpf_array, map);
/* at this point bpf_prog->aux->refcnt == 0 and this map->refcnt == 0,
* so the programs (can be more than one that used this map) were
* disconnected from events. Wait for outstanding programs to complete
* and free the array
*/
synchronize_rcu();
if (array->map.map_type == BPF_MAP_TYPE_PERCPU_ARRAY)
bpf_array_free_percpu(array);
bpf_map_area_free(array);
}
const struct bpf_map_ops array_map_ops = {
.map_alloc = array_map_alloc,
.map_free = array_map_free,
.map_get_next_key = array_map_get_next_key,
.map_lookup_elem = array_map_lookup_elem,
.map_update_elem = array_map_update_elem,
.map_delete_elem = array_map_delete_elem,
.map_gen_lookup = array_map_gen_lookup,
};
const struct bpf_map_ops percpu_array_map_ops = {
.map_alloc = array_map_alloc,
.map_free = array_map_free,
.map_get_next_key = array_map_get_next_key,
.map_lookup_elem = percpu_array_map_lookup_elem,
.map_update_elem = array_map_update_elem,
.map_delete_elem = array_map_delete_elem,
};
static struct bpf_map *fd_array_map_alloc(union bpf_attr *attr)
{
/* only file descriptors can be stored in this type of map */
if (attr->value_size != sizeof(u32))
return ERR_PTR(-EINVAL);
return array_map_alloc(attr);
}
static void fd_array_map_free(struct bpf_map *map)
{
struct bpf_array *array = container_of(map, struct bpf_array, map);
int i;
synchronize_rcu();
/* make sure it's empty */
for (i = 0; i < array->map.max_entries; i++)
BUG_ON(array->ptrs[i] != NULL);
bpf_map_area_free(array);
}
static void *fd_array_map_lookup_elem(struct bpf_map *map, void *key)
{
return NULL;
}
/* only called from syscall */
int bpf_fd_array_map_lookup_elem(struct bpf_map *map, void *key, u32 *value)
{
void **elem, *ptr;
int ret = 0;
if (!map->ops->map_fd_sys_lookup_elem)
return -ENOTSUPP;
rcu_read_lock();
elem = array_map_lookup_elem(map, key);
if (elem && (ptr = READ_ONCE(*elem)))
*value = map->ops->map_fd_sys_lookup_elem(ptr);
else
ret = -ENOENT;
rcu_read_unlock();
return ret;
}
/* only called from syscall */
int bpf_fd_array_map_update_elem(struct bpf_map *map, struct file *map_file,
void *key, void *value, u64 map_flags)
{
struct bpf_array *array = container_of(map, struct bpf_array, map);
void *new_ptr, *old_ptr;
u32 index = *(u32 *)key, ufd;
if (map_flags != BPF_ANY)
return -EINVAL;
if (index >= array->map.max_entries)
return -E2BIG;
ufd = *(u32 *)value;
new_ptr = map->ops->map_fd_get_ptr(map, map_file, ufd);
if (IS_ERR(new_ptr))
return PTR_ERR(new_ptr);
old_ptr = xchg(array->ptrs + index, new_ptr);
if (old_ptr)
map->ops->map_fd_put_ptr(old_ptr);
return 0;
}
static int fd_array_map_delete_elem(struct bpf_map *map, void *key)
{
struct bpf_array *array = container_of(map, struct bpf_array, map);
void *old_ptr;
u32 index = *(u32 *)key;
if (index >= array->map.max_entries)
return -E2BIG;
old_ptr = xchg(array->ptrs + index, NULL);
if (old_ptr) {
map->ops->map_fd_put_ptr(old_ptr);
return 0;
} else {
return -ENOENT;
}
}
static void *prog_fd_array_get_ptr(struct bpf_map *map,
struct file *map_file, int fd)
{
struct bpf_array *array = container_of(map, struct bpf_array, map);
struct bpf_prog *prog = bpf_prog_get(fd);
if (IS_ERR(prog))
return prog;
if (!bpf_prog_array_compatible(array, prog)) {
bpf_prog_put(prog);
return ERR_PTR(-EINVAL);
}
return prog;
}
static void prog_fd_array_put_ptr(void *ptr)
{
bpf_prog_put(ptr);
}
static u32 prog_fd_array_sys_lookup_elem(void *ptr)
{
return ((struct bpf_prog *)ptr)->aux->id;
}
/* decrement refcnt of all bpf_progs that are stored in this map */
static void bpf_fd_array_map_clear(struct bpf_map *map)
{
struct bpf_array *array = container_of(map, struct bpf_array, map);
int i;
for (i = 0; i < array->map.max_entries; i++)
fd_array_map_delete_elem(map, &i);
}
const struct bpf_map_ops prog_array_map_ops = {
.map_alloc = fd_array_map_alloc,
.map_free = fd_array_map_free,
.map_get_next_key = array_map_get_next_key,
.map_lookup_elem = fd_array_map_lookup_elem,
.map_delete_elem = fd_array_map_delete_elem,
.map_fd_get_ptr = prog_fd_array_get_ptr,
.map_fd_put_ptr = prog_fd_array_put_ptr,
.map_fd_sys_lookup_elem = prog_fd_array_sys_lookup_elem,
.map_release_uref = bpf_fd_array_map_clear,
};
static struct bpf_event_entry *bpf_event_entry_gen(struct file *perf_file,
struct file *map_file)
{
struct bpf_event_entry *ee;
ee = kzalloc(sizeof(*ee), GFP_ATOMIC);
if (ee) {
ee->event = perf_file->private_data;
ee->perf_file = perf_file;
ee->map_file = map_file;
}
return ee;
}
static void __bpf_event_entry_free(struct rcu_head *rcu)
{
struct bpf_event_entry *ee;
ee = container_of(rcu, struct bpf_event_entry, rcu);
fput(ee->perf_file);
kfree(ee);
}
static void bpf_event_entry_free_rcu(struct bpf_event_entry *ee)
{
call_rcu(&ee->rcu, __bpf_event_entry_free);
}
static void *perf_event_fd_array_get_ptr(struct bpf_map *map,
struct file *map_file, int fd)
{
struct bpf_event_entry *ee;
struct perf_event *event;
struct file *perf_file;
u64 value;
perf_file = perf_event_get(fd);
if (IS_ERR(perf_file))
return perf_file;
ee = ERR_PTR(-EOPNOTSUPP);
event = perf_file->private_data;
if (perf_event_read_local(event, &value) == -EOPNOTSUPP)
goto err_out;
ee = bpf_event_entry_gen(perf_file, map_file);
if (ee)
return ee;
ee = ERR_PTR(-ENOMEM);
err_out:
fput(perf_file);
return ee;
}
static void perf_event_fd_array_put_ptr(void *ptr)
{
bpf_event_entry_free_rcu(ptr);
}
static void perf_event_fd_array_release(struct bpf_map *map,
struct file *map_file)
{
struct bpf_array *array = container_of(map, struct bpf_array, map);
struct bpf_event_entry *ee;
int i;
rcu_read_lock();
for (i = 0; i < array->map.max_entries; i++) {
ee = READ_ONCE(array->ptrs[i]);
if (ee && ee->map_file == map_file)
fd_array_map_delete_elem(map, &i);
}
rcu_read_unlock();
}
const struct bpf_map_ops perf_event_array_map_ops = {
.map_alloc = fd_array_map_alloc,
.map_free = fd_array_map_free,
.map_get_next_key = array_map_get_next_key,
.map_lookup_elem = fd_array_map_lookup_elem,
.map_delete_elem = fd_array_map_delete_elem,
.map_fd_get_ptr = perf_event_fd_array_get_ptr,
.map_fd_put_ptr = perf_event_fd_array_put_ptr,
.map_release = perf_event_fd_array_release,
};
#ifdef CONFIG_CGROUPS
static void *cgroup_fd_array_get_ptr(struct bpf_map *map,
struct file *map_file /* not used */,
int fd)
{
return cgroup_get_from_fd(fd);
}
static void cgroup_fd_array_put_ptr(void *ptr)
{
/* cgroup_put free cgrp after a rcu grace period */
cgroup_put(ptr);
}
static void cgroup_fd_array_free(struct bpf_map *map)
{
bpf_fd_array_map_clear(map);
fd_array_map_free(map);
}
const struct bpf_map_ops cgroup_array_map_ops = {
.map_alloc = fd_array_map_alloc,
.map_free = cgroup_fd_array_free,
.map_get_next_key = array_map_get_next_key,
.map_lookup_elem = fd_array_map_lookup_elem,
.map_delete_elem = fd_array_map_delete_elem,
.map_fd_get_ptr = cgroup_fd_array_get_ptr,
.map_fd_put_ptr = cgroup_fd_array_put_ptr,
};
#endif
static struct bpf_map *array_of_map_alloc(union bpf_attr *attr)
{
struct bpf_map *map, *inner_map_meta;
inner_map_meta = bpf_map_meta_alloc(attr->inner_map_fd);
if (IS_ERR(inner_map_meta))
return inner_map_meta;
map = fd_array_map_alloc(attr);
if (IS_ERR(map)) {
bpf_map_meta_free(inner_map_meta);
return map;
}
map->inner_map_meta = inner_map_meta;
return map;
}
static void array_of_map_free(struct bpf_map *map)
{
/* map->inner_map_meta is only accessed by syscall which
* is protected by fdget/fdput.
*/
bpf_map_meta_free(map->inner_map_meta);
bpf_fd_array_map_clear(map);
fd_array_map_free(map);
}
static void *array_of_map_lookup_elem(struct bpf_map *map, void *key)
{
struct bpf_map **inner_map = array_map_lookup_elem(map, key);
if (!inner_map)
return NULL;
return READ_ONCE(*inner_map);
}
static u32 array_of_map_gen_lookup(struct bpf_map *map,
struct bpf_insn *insn_buf)
{
struct bpf_array *array = container_of(map, struct bpf_array, map);
u32 elem_size = round_up(map->value_size, 8);
struct bpf_insn *insn = insn_buf;
const int ret = BPF_REG_0;
const int map_ptr = BPF_REG_1;
const int index = BPF_REG_2;
*insn++ = BPF_ALU64_IMM(BPF_ADD, map_ptr, offsetof(struct bpf_array, value));
*insn++ = BPF_LDX_MEM(BPF_W, ret, index, 0);
if (map->unpriv_array) {
*insn++ = BPF_JMP_IMM(BPF_JGE, ret, map->max_entries, 6);
*insn++ = BPF_ALU32_IMM(BPF_AND, ret, array->index_mask);
} else {
*insn++ = BPF_JMP_IMM(BPF_JGE, ret, map->max_entries, 5);
}
if (is_power_of_2(elem_size))
*insn++ = BPF_ALU64_IMM(BPF_LSH, ret, ilog2(elem_size));
else
*insn++ = BPF_ALU64_IMM(BPF_MUL, ret, elem_size);
*insn++ = BPF_ALU64_REG(BPF_ADD, ret, map_ptr);
*insn++ = BPF_LDX_MEM(BPF_DW, ret, ret, 0);
*insn++ = BPF_JMP_IMM(BPF_JEQ, ret, 0, 1);
*insn++ = BPF_JMP_IMM(BPF_JA, 0, 0, 1);
*insn++ = BPF_MOV64_IMM(ret, 0);
return insn - insn_buf;
}
const struct bpf_map_ops array_of_maps_map_ops = {
.map_alloc = array_of_map_alloc,
.map_free = array_of_map_free,
.map_get_next_key = array_map_get_next_key,
.map_lookup_elem = array_of_map_lookup_elem,
.map_delete_elem = fd_array_map_delete_elem,
.map_fd_get_ptr = bpf_map_fd_get_ptr,
.map_fd_put_ptr = bpf_map_fd_put_ptr,
.map_fd_sys_lookup_elem = bpf_map_fd_sys_lookup_elem,
.map_gen_lookup = array_of_map_gen_lookup,
};